1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/time.h> 63 #include <sys/eventhandler.h> 64 #include <sys/umtx.h> 65 #include <sys/umtxvar.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #include <compat/freebsd32/freebsd32.h> 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 92 #ifdef INVARIANTS 93 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 94 struct umtxq_chain *uc; \ 95 \ 96 uc = umtxq_getchain(key); \ 97 mtx_assert(&uc->uc_lock, MA_OWNED); \ 98 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 99 } while (0) 100 #else 101 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 102 #endif 103 104 /* 105 * Don't propagate time-sharing priority, there is a security reason, 106 * a user can simply introduce PI-mutex, let thread A lock the mutex, 107 * and let another thread B block on the mutex, because B is 108 * sleeping, its priority will be boosted, this causes A's priority to 109 * be boosted via priority propagating too and will never be lowered even 110 * if it is using 100%CPU, this is unfair to other processes. 111 */ 112 113 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 114 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 115 PRI_MAX_TIMESHARE : (td)->td_user_pri) 116 117 #define GOLDEN_RATIO_PRIME 2654404609U 118 #ifndef UMTX_CHAINS 119 #define UMTX_CHAINS 512 120 #endif 121 #define UMTX_SHIFTS (__WORD_BIT - 9) 122 123 #define GET_SHARE(flags) \ 124 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 125 126 #define BUSY_SPINS 200 127 128 struct umtx_copyops { 129 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 130 int (*copyin_umtx_time)(const void *uaddr, size_t size, 131 struct _umtx_time *tp); 132 int (*copyin_robust_lists)(const void *uaddr, size_t size, 133 struct umtx_robust_lists_params *rbp); 134 int (*copyout_timeout)(void *uaddr, size_t size, 135 struct timespec *tsp); 136 const size_t timespec_sz; 137 const size_t umtx_time_sz; 138 const bool compat32; 139 }; 140 141 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 142 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 143 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 144 145 int umtx_shm_vnobj_persistent = 0; 146 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 147 &umtx_shm_vnobj_persistent, 0, 148 "False forces destruction of umtx attached to file, on last close"); 149 static int umtx_max_rb = 1000; 150 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 151 &umtx_max_rb, 0, 152 "Maximum number of robust mutexes allowed for each thread"); 153 154 static uma_zone_t umtx_pi_zone; 155 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 156 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 157 static int umtx_pi_allocated; 158 159 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 160 "umtx debug"); 161 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 162 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 163 static int umtx_verbose_rb = 1; 164 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 165 &umtx_verbose_rb, 0, 166 ""); 167 168 #ifdef UMTX_PROFILING 169 static long max_length; 170 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 171 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 172 "umtx chain stats"); 173 #endif 174 175 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 176 const struct _umtx_time *umtxtime); 177 178 static void umtx_shm_init(void); 179 static void umtxq_sysinit(void *); 180 static void umtxq_hash(struct umtx_key *key); 181 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 182 bool rb); 183 static void umtx_thread_cleanup(struct thread *td); 184 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 185 186 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 187 188 static struct mtx umtx_lock; 189 190 #ifdef UMTX_PROFILING 191 static void 192 umtx_init_profiling(void) 193 { 194 struct sysctl_oid *chain_oid; 195 char chain_name[10]; 196 int i; 197 198 for (i = 0; i < UMTX_CHAINS; ++i) { 199 snprintf(chain_name, sizeof(chain_name), "%d", i); 200 chain_oid = SYSCTL_ADD_NODE(NULL, 201 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 202 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 203 "umtx hash stats"); 204 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 205 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 206 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 207 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 208 } 209 } 210 211 static int 212 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 213 { 214 char buf[512]; 215 struct sbuf sb; 216 struct umtxq_chain *uc; 217 u_int fract, i, j, tot, whole; 218 u_int sf0, sf1, sf2, sf3, sf4; 219 u_int si0, si1, si2, si3, si4; 220 u_int sw0, sw1, sw2, sw3, sw4; 221 222 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 223 for (i = 0; i < 2; i++) { 224 tot = 0; 225 for (j = 0; j < UMTX_CHAINS; ++j) { 226 uc = &umtxq_chains[i][j]; 227 mtx_lock(&uc->uc_lock); 228 tot += uc->max_length; 229 mtx_unlock(&uc->uc_lock); 230 } 231 if (tot == 0) 232 sbuf_printf(&sb, "%u) Empty ", i); 233 else { 234 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 235 si0 = si1 = si2 = si3 = si4 = 0; 236 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 237 for (j = 0; j < UMTX_CHAINS; j++) { 238 uc = &umtxq_chains[i][j]; 239 mtx_lock(&uc->uc_lock); 240 whole = uc->max_length * 100; 241 mtx_unlock(&uc->uc_lock); 242 fract = (whole % tot) * 100; 243 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 244 sf0 = fract; 245 si0 = j; 246 sw0 = whole; 247 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 248 sf1)) { 249 sf1 = fract; 250 si1 = j; 251 sw1 = whole; 252 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 253 sf2)) { 254 sf2 = fract; 255 si2 = j; 256 sw2 = whole; 257 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 258 sf3)) { 259 sf3 = fract; 260 si3 = j; 261 sw3 = whole; 262 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 263 sf4)) { 264 sf4 = fract; 265 si4 = j; 266 sw4 = whole; 267 } 268 } 269 sbuf_printf(&sb, "queue %u:\n", i); 270 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 271 sf0 / tot, si0); 272 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 273 sf1 / tot, si1); 274 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 275 sf2 / tot, si2); 276 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 277 sf3 / tot, si3); 278 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 279 sf4 / tot, si4); 280 } 281 } 282 sbuf_trim(&sb); 283 sbuf_finish(&sb); 284 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 285 sbuf_delete(&sb); 286 return (0); 287 } 288 289 static int 290 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 291 { 292 struct umtxq_chain *uc; 293 u_int i, j; 294 int clear, error; 295 296 clear = 0; 297 error = sysctl_handle_int(oidp, &clear, 0, req); 298 if (error != 0 || req->newptr == NULL) 299 return (error); 300 301 if (clear != 0) { 302 for (i = 0; i < 2; ++i) { 303 for (j = 0; j < UMTX_CHAINS; ++j) { 304 uc = &umtxq_chains[i][j]; 305 mtx_lock(&uc->uc_lock); 306 uc->length = 0; 307 uc->max_length = 0; 308 mtx_unlock(&uc->uc_lock); 309 } 310 } 311 } 312 return (0); 313 } 314 315 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 316 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 317 sysctl_debug_umtx_chains_clear, "I", 318 "Clear umtx chains statistics"); 319 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 320 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 321 sysctl_debug_umtx_chains_peaks, "A", 322 "Highest peaks in chains max length"); 323 #endif 324 325 static void 326 umtxq_sysinit(void *arg __unused) 327 { 328 int i, j; 329 330 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 331 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 332 for (i = 0; i < 2; ++i) { 333 for (j = 0; j < UMTX_CHAINS; ++j) { 334 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 335 MTX_DEF | MTX_DUPOK); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 337 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 338 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 339 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 340 umtxq_chains[i][j].uc_busy = 0; 341 umtxq_chains[i][j].uc_waiters = 0; 342 #ifdef UMTX_PROFILING 343 umtxq_chains[i][j].length = 0; 344 umtxq_chains[i][j].max_length = 0; 345 #endif 346 } 347 } 348 #ifdef UMTX_PROFILING 349 umtx_init_profiling(); 350 #endif 351 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 352 umtx_shm_init(); 353 } 354 355 struct umtx_q * 356 umtxq_alloc(void) 357 { 358 struct umtx_q *uq; 359 360 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 361 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 362 M_WAITOK | M_ZERO); 363 TAILQ_INIT(&uq->uq_spare_queue->head); 364 TAILQ_INIT(&uq->uq_pi_contested); 365 uq->uq_inherited_pri = PRI_MAX; 366 return (uq); 367 } 368 369 void 370 umtxq_free(struct umtx_q *uq) 371 { 372 373 MPASS(uq->uq_spare_queue != NULL); 374 free(uq->uq_spare_queue, M_UMTX); 375 free(uq, M_UMTX); 376 } 377 378 static inline void 379 umtxq_hash(struct umtx_key *key) 380 { 381 unsigned n; 382 383 n = (uintptr_t)key->info.both.a + key->info.both.b; 384 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 385 } 386 387 struct umtxq_chain * 388 umtxq_getchain(struct umtx_key *key) 389 { 390 391 if (key->type <= TYPE_SEM) 392 return (&umtxq_chains[1][key->hash]); 393 return (&umtxq_chains[0][key->hash]); 394 } 395 396 /* 397 * Set chain to busy state when following operation 398 * may be blocked (kernel mutex can not be used). 399 */ 400 void 401 umtxq_busy(struct umtx_key *key) 402 { 403 struct umtxq_chain *uc; 404 405 uc = umtxq_getchain(key); 406 mtx_assert(&uc->uc_lock, MA_OWNED); 407 if (uc->uc_busy) { 408 #ifdef SMP 409 if (smp_cpus > 1) { 410 int count = BUSY_SPINS; 411 if (count > 0) { 412 umtxq_unlock(key); 413 while (uc->uc_busy && --count > 0) 414 cpu_spinwait(); 415 umtxq_lock(key); 416 } 417 } 418 #endif 419 while (uc->uc_busy) { 420 uc->uc_waiters++; 421 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 422 uc->uc_waiters--; 423 } 424 } 425 uc->uc_busy = 1; 426 } 427 428 /* 429 * Unbusy a chain. 430 */ 431 void 432 umtxq_unbusy(struct umtx_key *key) 433 { 434 struct umtxq_chain *uc; 435 436 uc = umtxq_getchain(key); 437 mtx_assert(&uc->uc_lock, MA_OWNED); 438 KASSERT(uc->uc_busy != 0, ("not busy")); 439 uc->uc_busy = 0; 440 if (uc->uc_waiters) 441 wakeup_one(uc); 442 } 443 444 void 445 umtxq_unbusy_unlocked(struct umtx_key *key) 446 { 447 448 umtxq_lock(key); 449 umtxq_unbusy(key); 450 umtxq_unlock(key); 451 } 452 453 static struct umtxq_queue * 454 umtxq_queue_lookup(struct umtx_key *key, int q) 455 { 456 struct umtxq_queue *uh; 457 struct umtxq_chain *uc; 458 459 uc = umtxq_getchain(key); 460 UMTXQ_LOCKED_ASSERT(uc); 461 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 462 if (umtx_key_match(&uh->key, key)) 463 return (uh); 464 } 465 466 return (NULL); 467 } 468 469 void 470 umtxq_insert_queue(struct umtx_q *uq, int q) 471 { 472 struct umtxq_queue *uh; 473 struct umtxq_chain *uc; 474 475 uc = umtxq_getchain(&uq->uq_key); 476 UMTXQ_LOCKED_ASSERT(uc); 477 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 478 uh = umtxq_queue_lookup(&uq->uq_key, q); 479 if (uh != NULL) { 480 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 481 } else { 482 uh = uq->uq_spare_queue; 483 uh->key = uq->uq_key; 484 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 485 #ifdef UMTX_PROFILING 486 uc->length++; 487 if (uc->length > uc->max_length) { 488 uc->max_length = uc->length; 489 if (uc->max_length > max_length) 490 max_length = uc->max_length; 491 } 492 #endif 493 } 494 uq->uq_spare_queue = NULL; 495 496 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 497 uh->length++; 498 uq->uq_flags |= UQF_UMTXQ; 499 uq->uq_cur_queue = uh; 500 return; 501 } 502 503 void 504 umtxq_remove_queue(struct umtx_q *uq, int q) 505 { 506 struct umtxq_chain *uc; 507 struct umtxq_queue *uh; 508 509 uc = umtxq_getchain(&uq->uq_key); 510 UMTXQ_LOCKED_ASSERT(uc); 511 if (uq->uq_flags & UQF_UMTXQ) { 512 uh = uq->uq_cur_queue; 513 TAILQ_REMOVE(&uh->head, uq, uq_link); 514 uh->length--; 515 uq->uq_flags &= ~UQF_UMTXQ; 516 if (TAILQ_EMPTY(&uh->head)) { 517 KASSERT(uh->length == 0, 518 ("inconsistent umtxq_queue length")); 519 #ifdef UMTX_PROFILING 520 uc->length--; 521 #endif 522 LIST_REMOVE(uh, link); 523 } else { 524 uh = LIST_FIRST(&uc->uc_spare_queue); 525 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 526 LIST_REMOVE(uh, link); 527 } 528 uq->uq_spare_queue = uh; 529 uq->uq_cur_queue = NULL; 530 } 531 } 532 533 /* 534 * Check if there are multiple waiters 535 */ 536 int 537 umtxq_count(struct umtx_key *key) 538 { 539 struct umtxq_queue *uh; 540 541 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 542 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 543 if (uh != NULL) 544 return (uh->length); 545 return (0); 546 } 547 548 /* 549 * Check if there are multiple PI waiters and returns first 550 * waiter. 551 */ 552 static int 553 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 554 { 555 struct umtxq_queue *uh; 556 557 *first = NULL; 558 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 559 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 560 if (uh != NULL) { 561 *first = TAILQ_FIRST(&uh->head); 562 return (uh->length); 563 } 564 return (0); 565 } 566 567 /* 568 * Wake up threads waiting on an userland object by a bit mask. 569 */ 570 int 571 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 572 { 573 struct umtxq_queue *uh; 574 struct umtx_q *uq, *uq_temp; 575 int ret; 576 577 ret = 0; 578 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 579 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 580 if (uh == NULL) 581 return (0); 582 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 583 if ((uq->uq_bitset & bitset) == 0) 584 continue; 585 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 586 wakeup_one(uq); 587 if (++ret >= n_wake) 588 break; 589 } 590 return (ret); 591 } 592 593 /* 594 * Wake up threads waiting on an userland object. 595 */ 596 597 static int 598 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 599 { 600 struct umtxq_queue *uh; 601 struct umtx_q *uq; 602 int ret; 603 604 ret = 0; 605 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 606 uh = umtxq_queue_lookup(key, q); 607 if (uh != NULL) { 608 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 609 umtxq_remove_queue(uq, q); 610 wakeup(uq); 611 if (++ret >= n_wake) 612 return (ret); 613 } 614 } 615 return (ret); 616 } 617 618 /* 619 * Wake up specified thread. 620 */ 621 static inline void 622 umtxq_signal_thread(struct umtx_q *uq) 623 { 624 625 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 626 umtxq_remove(uq); 627 wakeup(uq); 628 } 629 630 /* 631 * Wake up a maximum of n_wake threads that are waiting on an userland 632 * object identified by key. The remaining threads are removed from queue 633 * identified by key and added to the queue identified by key2 (requeued). 634 * The n_requeue specifies an upper limit on the number of threads that 635 * are requeued to the second queue. 636 */ 637 int 638 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 639 int n_requeue) 640 { 641 struct umtxq_queue *uh; 642 struct umtx_q *uq, *uq_temp; 643 int ret; 644 645 ret = 0; 646 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 647 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 648 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 649 if (uh == NULL) 650 return (0); 651 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 652 if (++ret <= n_wake) { 653 umtxq_remove(uq); 654 wakeup_one(uq); 655 } else { 656 umtxq_remove(uq); 657 uq->uq_key = *key2; 658 umtxq_insert(uq); 659 if (ret - n_wake == n_requeue) 660 break; 661 } 662 } 663 return (ret); 664 } 665 666 static inline int 667 tstohz(const struct timespec *tsp) 668 { 669 struct timeval tv; 670 671 TIMESPEC_TO_TIMEVAL(&tv, tsp); 672 return tvtohz(&tv); 673 } 674 675 void 676 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 677 int absolute, const struct timespec *timeout) 678 { 679 680 timo->clockid = clockid; 681 if (!absolute) { 682 timo->is_abs_real = false; 683 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 684 timespecadd(&timo->cur, timeout, &timo->end); 685 } else { 686 timo->end = *timeout; 687 timo->is_abs_real = clockid == CLOCK_REALTIME || 688 clockid == CLOCK_REALTIME_FAST || 689 clockid == CLOCK_REALTIME_PRECISE || 690 clockid == CLOCK_SECOND; 691 } 692 } 693 694 static void 695 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 696 const struct _umtx_time *umtxtime) 697 { 698 699 umtx_abs_timeout_init(timo, umtxtime->_clockid, 700 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 701 } 702 703 static void 704 umtx_abs_timeout_enforce_min(sbintime_t *sbt) 705 { 706 sbintime_t when, mint; 707 708 mint = curproc->p_umtx_min_timeout; 709 if (__predict_false(mint != 0)) { 710 when = sbinuptime() + mint; 711 if (*sbt < when) 712 *sbt = when; 713 } 714 } 715 716 static int 717 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 718 int *flags) 719 { 720 struct bintime bt, bbt; 721 struct timespec tts; 722 sbintime_t rem; 723 724 switch (timo->clockid) { 725 726 /* Clocks that can be converted into absolute time. */ 727 case CLOCK_REALTIME: 728 case CLOCK_REALTIME_PRECISE: 729 case CLOCK_REALTIME_FAST: 730 case CLOCK_MONOTONIC: 731 case CLOCK_MONOTONIC_PRECISE: 732 case CLOCK_MONOTONIC_FAST: 733 case CLOCK_UPTIME: 734 case CLOCK_UPTIME_PRECISE: 735 case CLOCK_UPTIME_FAST: 736 case CLOCK_SECOND: 737 timespec2bintime(&timo->end, &bt); 738 switch (timo->clockid) { 739 case CLOCK_REALTIME: 740 case CLOCK_REALTIME_PRECISE: 741 case CLOCK_REALTIME_FAST: 742 case CLOCK_SECOND: 743 getboottimebin(&bbt); 744 bintime_sub(&bt, &bbt); 745 break; 746 } 747 if (bt.sec < 0) 748 return (ETIMEDOUT); 749 if (bt.sec >= (SBT_MAX >> 32)) { 750 *sbt = 0; 751 *flags = 0; 752 return (0); 753 } 754 *sbt = bttosbt(bt); 755 umtx_abs_timeout_enforce_min(sbt); 756 757 /* 758 * Check if the absolute time should be aligned to 759 * avoid firing multiple timer events in non-periodic 760 * timer mode. 761 */ 762 switch (timo->clockid) { 763 case CLOCK_REALTIME_FAST: 764 case CLOCK_MONOTONIC_FAST: 765 case CLOCK_UPTIME_FAST: 766 rem = *sbt % tc_tick_sbt; 767 if (__predict_true(rem != 0)) 768 *sbt += tc_tick_sbt - rem; 769 break; 770 case CLOCK_SECOND: 771 rem = *sbt % SBT_1S; 772 if (__predict_true(rem != 0)) 773 *sbt += SBT_1S - rem; 774 break; 775 } 776 *flags = C_ABSOLUTE; 777 return (0); 778 779 /* Clocks that has to be periodically polled. */ 780 case CLOCK_VIRTUAL: 781 case CLOCK_PROF: 782 case CLOCK_THREAD_CPUTIME_ID: 783 case CLOCK_PROCESS_CPUTIME_ID: 784 default: 785 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 786 if (timespeccmp(&timo->end, &timo->cur, <=)) 787 return (ETIMEDOUT); 788 timespecsub(&timo->end, &timo->cur, &tts); 789 *sbt = tick_sbt * tstohz(&tts); 790 *flags = C_HARDCLOCK; 791 return (0); 792 } 793 } 794 795 static uint32_t 796 umtx_unlock_val(uint32_t flags, bool rb) 797 { 798 799 if (rb) 800 return (UMUTEX_RB_OWNERDEAD); 801 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 802 return (UMUTEX_RB_NOTRECOV); 803 else 804 return (UMUTEX_UNOWNED); 805 806 } 807 808 /* 809 * Put thread into sleep state, before sleeping, check if 810 * thread was removed from umtx queue. 811 */ 812 int 813 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 814 struct umtx_abs_timeout *timo) 815 { 816 struct umtxq_chain *uc; 817 sbintime_t sbt = 0; 818 int error, flags = 0; 819 820 uc = umtxq_getchain(&uq->uq_key); 821 UMTXQ_LOCKED_ASSERT(uc); 822 for (;;) { 823 if (!(uq->uq_flags & UQF_UMTXQ)) { 824 error = 0; 825 break; 826 } 827 if (timo != NULL) { 828 if (timo->is_abs_real) 829 curthread->td_rtcgen = 830 atomic_load_acq_int(&rtc_generation); 831 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 832 if (error != 0) 833 break; 834 } 835 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 836 sbt, 0, flags); 837 uc = umtxq_getchain(&uq->uq_key); 838 mtx_lock(&uc->uc_lock); 839 if (error == EINTR || error == ERESTART) 840 break; 841 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 842 error = ETIMEDOUT; 843 break; 844 } 845 } 846 847 curthread->td_rtcgen = 0; 848 return (error); 849 } 850 851 /* 852 * Convert userspace address into unique logical address. 853 */ 854 int 855 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 856 { 857 struct thread *td = curthread; 858 vm_map_t map; 859 vm_map_entry_t entry; 860 vm_pindex_t pindex; 861 vm_prot_t prot; 862 boolean_t wired; 863 864 key->type = type; 865 if (share == THREAD_SHARE) { 866 key->shared = 0; 867 key->info.private.vs = td->td_proc->p_vmspace; 868 key->info.private.addr = (uintptr_t)addr; 869 } else { 870 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 871 map = &td->td_proc->p_vmspace->vm_map; 872 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 873 &entry, &key->info.shared.object, &pindex, &prot, 874 &wired) != KERN_SUCCESS) { 875 return (EFAULT); 876 } 877 878 if ((share == PROCESS_SHARE) || 879 (share == AUTO_SHARE && 880 VM_INHERIT_SHARE == entry->inheritance)) { 881 key->shared = 1; 882 key->info.shared.offset = (vm_offset_t)addr - 883 entry->start + entry->offset; 884 vm_object_reference(key->info.shared.object); 885 } else { 886 key->shared = 0; 887 key->info.private.vs = td->td_proc->p_vmspace; 888 key->info.private.addr = (uintptr_t)addr; 889 } 890 vm_map_lookup_done(map, entry); 891 } 892 893 umtxq_hash(key); 894 return (0); 895 } 896 897 /* 898 * Release key. 899 */ 900 void 901 umtx_key_release(struct umtx_key *key) 902 { 903 if (key->shared) 904 vm_object_deallocate(key->info.shared.object); 905 } 906 907 #ifdef COMPAT_FREEBSD10 908 /* 909 * Lock a umtx object. 910 */ 911 static int 912 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 913 const struct timespec *timeout) 914 { 915 struct umtx_abs_timeout timo; 916 struct umtx_q *uq; 917 u_long owner; 918 u_long old; 919 int error = 0; 920 921 uq = td->td_umtxq; 922 if (timeout != NULL) 923 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 924 925 /* 926 * Care must be exercised when dealing with umtx structure. It 927 * can fault on any access. 928 */ 929 for (;;) { 930 /* 931 * Try the uncontested case. This should be done in userland. 932 */ 933 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 934 935 /* The acquire succeeded. */ 936 if (owner == UMTX_UNOWNED) 937 return (0); 938 939 /* The address was invalid. */ 940 if (owner == -1) 941 return (EFAULT); 942 943 /* If no one owns it but it is contested try to acquire it. */ 944 if (owner == UMTX_CONTESTED) { 945 owner = casuword(&umtx->u_owner, 946 UMTX_CONTESTED, id | UMTX_CONTESTED); 947 948 if (owner == UMTX_CONTESTED) 949 return (0); 950 951 /* The address was invalid. */ 952 if (owner == -1) 953 return (EFAULT); 954 955 error = thread_check_susp(td, false); 956 if (error != 0) 957 break; 958 959 /* If this failed the lock has changed, restart. */ 960 continue; 961 } 962 963 /* 964 * If we caught a signal, we have retried and now 965 * exit immediately. 966 */ 967 if (error != 0) 968 break; 969 970 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 971 AUTO_SHARE, &uq->uq_key)) != 0) 972 return (error); 973 974 umtxq_lock(&uq->uq_key); 975 umtxq_busy(&uq->uq_key); 976 umtxq_insert(uq); 977 umtxq_unbusy(&uq->uq_key); 978 umtxq_unlock(&uq->uq_key); 979 980 /* 981 * Set the contested bit so that a release in user space 982 * knows to use the system call for unlock. If this fails 983 * either some one else has acquired the lock or it has been 984 * released. 985 */ 986 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 987 988 /* The address was invalid. */ 989 if (old == -1) { 990 umtxq_lock(&uq->uq_key); 991 umtxq_remove(uq); 992 umtxq_unlock(&uq->uq_key); 993 umtx_key_release(&uq->uq_key); 994 return (EFAULT); 995 } 996 997 /* 998 * We set the contested bit, sleep. Otherwise the lock changed 999 * and we need to retry or we lost a race to the thread 1000 * unlocking the umtx. 1001 */ 1002 umtxq_lock(&uq->uq_key); 1003 if (old == owner) 1004 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1005 &timo); 1006 umtxq_remove(uq); 1007 umtxq_unlock(&uq->uq_key); 1008 umtx_key_release(&uq->uq_key); 1009 1010 if (error == 0) 1011 error = thread_check_susp(td, false); 1012 } 1013 1014 if (timeout == NULL) { 1015 /* Mutex locking is restarted if it is interrupted. */ 1016 if (error == EINTR) 1017 error = ERESTART; 1018 } else { 1019 /* Timed-locking is not restarted. */ 1020 if (error == ERESTART) 1021 error = EINTR; 1022 } 1023 return (error); 1024 } 1025 1026 /* 1027 * Unlock a umtx object. 1028 */ 1029 static int 1030 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1031 { 1032 struct umtx_key key; 1033 u_long owner; 1034 u_long old; 1035 int error; 1036 int count; 1037 1038 /* 1039 * Make sure we own this mtx. 1040 */ 1041 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1042 if (owner == -1) 1043 return (EFAULT); 1044 1045 if ((owner & ~UMTX_CONTESTED) != id) 1046 return (EPERM); 1047 1048 /* This should be done in userland */ 1049 if ((owner & UMTX_CONTESTED) == 0) { 1050 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1051 if (old == -1) 1052 return (EFAULT); 1053 if (old == owner) 1054 return (0); 1055 owner = old; 1056 } 1057 1058 /* We should only ever be in here for contested locks */ 1059 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1060 &key)) != 0) 1061 return (error); 1062 1063 umtxq_lock(&key); 1064 umtxq_busy(&key); 1065 count = umtxq_count(&key); 1066 umtxq_unlock(&key); 1067 1068 /* 1069 * When unlocking the umtx, it must be marked as unowned if 1070 * there is zero or one thread only waiting for it. 1071 * Otherwise, it must be marked as contested. 1072 */ 1073 old = casuword(&umtx->u_owner, owner, 1074 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1075 umtxq_lock(&key); 1076 umtxq_signal(&key,1); 1077 umtxq_unbusy(&key); 1078 umtxq_unlock(&key); 1079 umtx_key_release(&key); 1080 if (old == -1) 1081 return (EFAULT); 1082 if (old != owner) 1083 return (EINVAL); 1084 return (0); 1085 } 1086 1087 #ifdef COMPAT_FREEBSD32 1088 1089 /* 1090 * Lock a umtx object. 1091 */ 1092 static int 1093 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1094 const struct timespec *timeout) 1095 { 1096 struct umtx_abs_timeout timo; 1097 struct umtx_q *uq; 1098 uint32_t owner; 1099 uint32_t old; 1100 int error = 0; 1101 1102 uq = td->td_umtxq; 1103 1104 if (timeout != NULL) 1105 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1106 1107 /* 1108 * Care must be exercised when dealing with umtx structure. It 1109 * can fault on any access. 1110 */ 1111 for (;;) { 1112 /* 1113 * Try the uncontested case. This should be done in userland. 1114 */ 1115 owner = casuword32(m, UMUTEX_UNOWNED, id); 1116 1117 /* The acquire succeeded. */ 1118 if (owner == UMUTEX_UNOWNED) 1119 return (0); 1120 1121 /* The address was invalid. */ 1122 if (owner == -1) 1123 return (EFAULT); 1124 1125 /* If no one owns it but it is contested try to acquire it. */ 1126 if (owner == UMUTEX_CONTESTED) { 1127 owner = casuword32(m, 1128 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1129 if (owner == UMUTEX_CONTESTED) 1130 return (0); 1131 1132 /* The address was invalid. */ 1133 if (owner == -1) 1134 return (EFAULT); 1135 1136 error = thread_check_susp(td, false); 1137 if (error != 0) 1138 break; 1139 1140 /* If this failed the lock has changed, restart. */ 1141 continue; 1142 } 1143 1144 /* 1145 * If we caught a signal, we have retried and now 1146 * exit immediately. 1147 */ 1148 if (error != 0) 1149 return (error); 1150 1151 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1152 AUTO_SHARE, &uq->uq_key)) != 0) 1153 return (error); 1154 1155 umtxq_lock(&uq->uq_key); 1156 umtxq_busy(&uq->uq_key); 1157 umtxq_insert(uq); 1158 umtxq_unbusy(&uq->uq_key); 1159 umtxq_unlock(&uq->uq_key); 1160 1161 /* 1162 * Set the contested bit so that a release in user space 1163 * knows to use the system call for unlock. If this fails 1164 * either some one else has acquired the lock or it has been 1165 * released. 1166 */ 1167 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1168 1169 /* The address was invalid. */ 1170 if (old == -1) { 1171 umtxq_lock(&uq->uq_key); 1172 umtxq_remove(uq); 1173 umtxq_unlock(&uq->uq_key); 1174 umtx_key_release(&uq->uq_key); 1175 return (EFAULT); 1176 } 1177 1178 /* 1179 * We set the contested bit, sleep. Otherwise the lock changed 1180 * and we need to retry or we lost a race to the thread 1181 * unlocking the umtx. 1182 */ 1183 umtxq_lock(&uq->uq_key); 1184 if (old == owner) 1185 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1186 NULL : &timo); 1187 umtxq_remove(uq); 1188 umtxq_unlock(&uq->uq_key); 1189 umtx_key_release(&uq->uq_key); 1190 1191 if (error == 0) 1192 error = thread_check_susp(td, false); 1193 } 1194 1195 if (timeout == NULL) { 1196 /* Mutex locking is restarted if it is interrupted. */ 1197 if (error == EINTR) 1198 error = ERESTART; 1199 } else { 1200 /* Timed-locking is not restarted. */ 1201 if (error == ERESTART) 1202 error = EINTR; 1203 } 1204 return (error); 1205 } 1206 1207 /* 1208 * Unlock a umtx object. 1209 */ 1210 static int 1211 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1212 { 1213 struct umtx_key key; 1214 uint32_t owner; 1215 uint32_t old; 1216 int error; 1217 int count; 1218 1219 /* 1220 * Make sure we own this mtx. 1221 */ 1222 owner = fuword32(m); 1223 if (owner == -1) 1224 return (EFAULT); 1225 1226 if ((owner & ~UMUTEX_CONTESTED) != id) 1227 return (EPERM); 1228 1229 /* This should be done in userland */ 1230 if ((owner & UMUTEX_CONTESTED) == 0) { 1231 old = casuword32(m, owner, UMUTEX_UNOWNED); 1232 if (old == -1) 1233 return (EFAULT); 1234 if (old == owner) 1235 return (0); 1236 owner = old; 1237 } 1238 1239 /* We should only ever be in here for contested locks */ 1240 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1241 &key)) != 0) 1242 return (error); 1243 1244 umtxq_lock(&key); 1245 umtxq_busy(&key); 1246 count = umtxq_count(&key); 1247 umtxq_unlock(&key); 1248 1249 /* 1250 * When unlocking the umtx, it must be marked as unowned if 1251 * there is zero or one thread only waiting for it. 1252 * Otherwise, it must be marked as contested. 1253 */ 1254 old = casuword32(m, owner, 1255 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1256 umtxq_lock(&key); 1257 umtxq_signal(&key,1); 1258 umtxq_unbusy(&key); 1259 umtxq_unlock(&key); 1260 umtx_key_release(&key); 1261 if (old == -1) 1262 return (EFAULT); 1263 if (old != owner) 1264 return (EINVAL); 1265 return (0); 1266 } 1267 #endif /* COMPAT_FREEBSD32 */ 1268 #endif /* COMPAT_FREEBSD10 */ 1269 1270 /* 1271 * Fetch and compare value, sleep on the address if value is not changed. 1272 */ 1273 static int 1274 do_wait(struct thread *td, void *addr, u_long id, 1275 struct _umtx_time *timeout, int compat32, int is_private) 1276 { 1277 struct umtx_abs_timeout timo; 1278 struct umtx_q *uq; 1279 u_long tmp; 1280 uint32_t tmp32; 1281 int error = 0; 1282 1283 uq = td->td_umtxq; 1284 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1285 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1286 return (error); 1287 1288 if (timeout != NULL) 1289 umtx_abs_timeout_init2(&timo, timeout); 1290 1291 umtxq_lock(&uq->uq_key); 1292 umtxq_insert(uq); 1293 umtxq_unlock(&uq->uq_key); 1294 if (compat32 == 0) { 1295 error = fueword(addr, &tmp); 1296 if (error != 0) 1297 error = EFAULT; 1298 } else { 1299 error = fueword32(addr, &tmp32); 1300 if (error == 0) 1301 tmp = tmp32; 1302 else 1303 error = EFAULT; 1304 } 1305 umtxq_lock(&uq->uq_key); 1306 if (error == 0) { 1307 if (tmp == id) 1308 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1309 NULL : &timo); 1310 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1311 error = 0; 1312 else 1313 umtxq_remove(uq); 1314 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1315 umtxq_remove(uq); 1316 } 1317 umtxq_unlock(&uq->uq_key); 1318 umtx_key_release(&uq->uq_key); 1319 if (error == ERESTART) 1320 error = EINTR; 1321 return (error); 1322 } 1323 1324 /* 1325 * Wake up threads sleeping on the specified address. 1326 */ 1327 int 1328 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1329 { 1330 struct umtx_key key; 1331 int ret; 1332 1333 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1334 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1335 return (ret); 1336 umtxq_lock(&key); 1337 umtxq_signal(&key, n_wake); 1338 umtxq_unlock(&key); 1339 umtx_key_release(&key); 1340 return (0); 1341 } 1342 1343 /* 1344 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1345 */ 1346 static int 1347 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1348 struct _umtx_time *timeout, int mode) 1349 { 1350 struct umtx_abs_timeout timo; 1351 struct umtx_q *uq; 1352 uint32_t owner, old, id; 1353 int error, rv; 1354 1355 id = td->td_tid; 1356 uq = td->td_umtxq; 1357 error = 0; 1358 if (timeout != NULL) 1359 umtx_abs_timeout_init2(&timo, timeout); 1360 1361 /* 1362 * Care must be exercised when dealing with umtx structure. It 1363 * can fault on any access. 1364 */ 1365 for (;;) { 1366 rv = fueword32(&m->m_owner, &owner); 1367 if (rv == -1) 1368 return (EFAULT); 1369 if (mode == _UMUTEX_WAIT) { 1370 if (owner == UMUTEX_UNOWNED || 1371 owner == UMUTEX_CONTESTED || 1372 owner == UMUTEX_RB_OWNERDEAD || 1373 owner == UMUTEX_RB_NOTRECOV) 1374 return (0); 1375 } else { 1376 /* 1377 * Robust mutex terminated. Kernel duty is to 1378 * return EOWNERDEAD to the userspace. The 1379 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1380 * by the common userspace code. 1381 */ 1382 if (owner == UMUTEX_RB_OWNERDEAD) { 1383 rv = casueword32(&m->m_owner, 1384 UMUTEX_RB_OWNERDEAD, &owner, 1385 id | UMUTEX_CONTESTED); 1386 if (rv == -1) 1387 return (EFAULT); 1388 if (rv == 0) { 1389 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1390 return (EOWNERDEAD); /* success */ 1391 } 1392 MPASS(rv == 1); 1393 rv = thread_check_susp(td, false); 1394 if (rv != 0) 1395 return (rv); 1396 continue; 1397 } 1398 if (owner == UMUTEX_RB_NOTRECOV) 1399 return (ENOTRECOVERABLE); 1400 1401 /* 1402 * Try the uncontested case. This should be 1403 * done in userland. 1404 */ 1405 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1406 &owner, id); 1407 /* The address was invalid. */ 1408 if (rv == -1) 1409 return (EFAULT); 1410 1411 /* The acquire succeeded. */ 1412 if (rv == 0) { 1413 MPASS(owner == UMUTEX_UNOWNED); 1414 return (0); 1415 } 1416 1417 /* 1418 * If no one owns it but it is contested try 1419 * to acquire it. 1420 */ 1421 MPASS(rv == 1); 1422 if (owner == UMUTEX_CONTESTED) { 1423 rv = casueword32(&m->m_owner, 1424 UMUTEX_CONTESTED, &owner, 1425 id | UMUTEX_CONTESTED); 1426 /* The address was invalid. */ 1427 if (rv == -1) 1428 return (EFAULT); 1429 if (rv == 0) { 1430 MPASS(owner == UMUTEX_CONTESTED); 1431 return (0); 1432 } 1433 if (rv == 1) { 1434 rv = thread_check_susp(td, false); 1435 if (rv != 0) 1436 return (rv); 1437 } 1438 1439 /* 1440 * If this failed the lock has 1441 * changed, restart. 1442 */ 1443 continue; 1444 } 1445 1446 /* rv == 1 but not contested, likely store failure */ 1447 rv = thread_check_susp(td, false); 1448 if (rv != 0) 1449 return (rv); 1450 } 1451 1452 if (mode == _UMUTEX_TRY) 1453 return (EBUSY); 1454 1455 /* 1456 * If we caught a signal, we have retried and now 1457 * exit immediately. 1458 */ 1459 if (error != 0) 1460 return (error); 1461 1462 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1463 GET_SHARE(flags), &uq->uq_key)) != 0) 1464 return (error); 1465 1466 umtxq_lock(&uq->uq_key); 1467 umtxq_busy(&uq->uq_key); 1468 umtxq_insert(uq); 1469 umtxq_unlock(&uq->uq_key); 1470 1471 /* 1472 * Set the contested bit so that a release in user space 1473 * knows to use the system call for unlock. If this fails 1474 * either some one else has acquired the lock or it has been 1475 * released. 1476 */ 1477 rv = casueword32(&m->m_owner, owner, &old, 1478 owner | UMUTEX_CONTESTED); 1479 1480 /* The address was invalid or casueword failed to store. */ 1481 if (rv == -1 || rv == 1) { 1482 umtxq_lock(&uq->uq_key); 1483 umtxq_remove(uq); 1484 umtxq_unbusy(&uq->uq_key); 1485 umtxq_unlock(&uq->uq_key); 1486 umtx_key_release(&uq->uq_key); 1487 if (rv == -1) 1488 return (EFAULT); 1489 if (rv == 1) { 1490 rv = thread_check_susp(td, false); 1491 if (rv != 0) 1492 return (rv); 1493 } 1494 continue; 1495 } 1496 1497 /* 1498 * We set the contested bit, sleep. Otherwise the lock changed 1499 * and we need to retry or we lost a race to the thread 1500 * unlocking the umtx. 1501 */ 1502 umtxq_lock(&uq->uq_key); 1503 umtxq_unbusy(&uq->uq_key); 1504 MPASS(old == owner); 1505 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1506 NULL : &timo); 1507 umtxq_remove(uq); 1508 umtxq_unlock(&uq->uq_key); 1509 umtx_key_release(&uq->uq_key); 1510 1511 if (error == 0) 1512 error = thread_check_susp(td, false); 1513 } 1514 1515 return (0); 1516 } 1517 1518 /* 1519 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1520 */ 1521 static int 1522 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1523 { 1524 struct umtx_key key; 1525 uint32_t owner, old, id, newlock; 1526 int error, count; 1527 1528 id = td->td_tid; 1529 1530 again: 1531 /* 1532 * Make sure we own this mtx. 1533 */ 1534 error = fueword32(&m->m_owner, &owner); 1535 if (error == -1) 1536 return (EFAULT); 1537 1538 if ((owner & ~UMUTEX_CONTESTED) != id) 1539 return (EPERM); 1540 1541 newlock = umtx_unlock_val(flags, rb); 1542 if ((owner & UMUTEX_CONTESTED) == 0) { 1543 error = casueword32(&m->m_owner, owner, &old, newlock); 1544 if (error == -1) 1545 return (EFAULT); 1546 if (error == 1) { 1547 error = thread_check_susp(td, false); 1548 if (error != 0) 1549 return (error); 1550 goto again; 1551 } 1552 MPASS(old == owner); 1553 return (0); 1554 } 1555 1556 /* We should only ever be in here for contested locks */ 1557 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1558 &key)) != 0) 1559 return (error); 1560 1561 umtxq_lock(&key); 1562 umtxq_busy(&key); 1563 count = umtxq_count(&key); 1564 umtxq_unlock(&key); 1565 1566 /* 1567 * When unlocking the umtx, it must be marked as unowned if 1568 * there is zero or one thread only waiting for it. 1569 * Otherwise, it must be marked as contested. 1570 */ 1571 if (count > 1) 1572 newlock |= UMUTEX_CONTESTED; 1573 error = casueword32(&m->m_owner, owner, &old, newlock); 1574 umtxq_lock(&key); 1575 umtxq_signal(&key, 1); 1576 umtxq_unbusy(&key); 1577 umtxq_unlock(&key); 1578 umtx_key_release(&key); 1579 if (error == -1) 1580 return (EFAULT); 1581 if (error == 1) { 1582 if (old != owner) 1583 return (EINVAL); 1584 error = thread_check_susp(td, false); 1585 if (error != 0) 1586 return (error); 1587 goto again; 1588 } 1589 return (0); 1590 } 1591 1592 /* 1593 * Check if the mutex is available and wake up a waiter, 1594 * only for simple mutex. 1595 */ 1596 static int 1597 do_wake_umutex(struct thread *td, struct umutex *m) 1598 { 1599 struct umtx_key key; 1600 uint32_t owner; 1601 uint32_t flags; 1602 int error; 1603 int count; 1604 1605 again: 1606 error = fueword32(&m->m_owner, &owner); 1607 if (error == -1) 1608 return (EFAULT); 1609 1610 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1611 owner != UMUTEX_RB_NOTRECOV) 1612 return (0); 1613 1614 error = fueword32(&m->m_flags, &flags); 1615 if (error == -1) 1616 return (EFAULT); 1617 1618 /* We should only ever be in here for contested locks */ 1619 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1620 &key)) != 0) 1621 return (error); 1622 1623 umtxq_lock(&key); 1624 umtxq_busy(&key); 1625 count = umtxq_count(&key); 1626 umtxq_unlock(&key); 1627 1628 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1629 owner != UMUTEX_RB_NOTRECOV) { 1630 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1631 UMUTEX_UNOWNED); 1632 if (error == -1) { 1633 error = EFAULT; 1634 } else if (error == 1) { 1635 umtxq_lock(&key); 1636 umtxq_unbusy(&key); 1637 umtxq_unlock(&key); 1638 umtx_key_release(&key); 1639 error = thread_check_susp(td, false); 1640 if (error != 0) 1641 return (error); 1642 goto again; 1643 } 1644 } 1645 1646 umtxq_lock(&key); 1647 if (error == 0 && count != 0) { 1648 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1649 owner == UMUTEX_RB_OWNERDEAD || 1650 owner == UMUTEX_RB_NOTRECOV); 1651 umtxq_signal(&key, 1); 1652 } 1653 umtxq_unbusy(&key); 1654 umtxq_unlock(&key); 1655 umtx_key_release(&key); 1656 return (error); 1657 } 1658 1659 /* 1660 * Check if the mutex has waiters and tries to fix contention bit. 1661 */ 1662 static int 1663 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1664 { 1665 struct umtx_key key; 1666 uint32_t owner, old; 1667 int type; 1668 int error; 1669 int count; 1670 1671 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1672 UMUTEX_ROBUST)) { 1673 case 0: 1674 case UMUTEX_ROBUST: 1675 type = TYPE_NORMAL_UMUTEX; 1676 break; 1677 case UMUTEX_PRIO_INHERIT: 1678 type = TYPE_PI_UMUTEX; 1679 break; 1680 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1681 type = TYPE_PI_ROBUST_UMUTEX; 1682 break; 1683 case UMUTEX_PRIO_PROTECT: 1684 type = TYPE_PP_UMUTEX; 1685 break; 1686 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1687 type = TYPE_PP_ROBUST_UMUTEX; 1688 break; 1689 default: 1690 return (EINVAL); 1691 } 1692 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1693 return (error); 1694 1695 owner = 0; 1696 umtxq_lock(&key); 1697 umtxq_busy(&key); 1698 count = umtxq_count(&key); 1699 umtxq_unlock(&key); 1700 1701 error = fueword32(&m->m_owner, &owner); 1702 if (error == -1) 1703 error = EFAULT; 1704 1705 /* 1706 * Only repair contention bit if there is a waiter, this means 1707 * the mutex is still being referenced by userland code, 1708 * otherwise don't update any memory. 1709 */ 1710 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1711 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1712 error = casueword32(&m->m_owner, owner, &old, 1713 owner | UMUTEX_CONTESTED); 1714 if (error == -1) { 1715 error = EFAULT; 1716 break; 1717 } 1718 if (error == 0) { 1719 MPASS(old == owner); 1720 break; 1721 } 1722 owner = old; 1723 error = thread_check_susp(td, false); 1724 } 1725 1726 umtxq_lock(&key); 1727 if (error == EFAULT) { 1728 umtxq_signal(&key, INT_MAX); 1729 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1730 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1731 umtxq_signal(&key, 1); 1732 umtxq_unbusy(&key); 1733 umtxq_unlock(&key); 1734 umtx_key_release(&key); 1735 return (error); 1736 } 1737 1738 struct umtx_pi * 1739 umtx_pi_alloc(int flags) 1740 { 1741 struct umtx_pi *pi; 1742 1743 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1744 TAILQ_INIT(&pi->pi_blocked); 1745 atomic_add_int(&umtx_pi_allocated, 1); 1746 return (pi); 1747 } 1748 1749 void 1750 umtx_pi_free(struct umtx_pi *pi) 1751 { 1752 uma_zfree(umtx_pi_zone, pi); 1753 atomic_add_int(&umtx_pi_allocated, -1); 1754 } 1755 1756 /* 1757 * Adjust the thread's position on a pi_state after its priority has been 1758 * changed. 1759 */ 1760 static int 1761 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1762 { 1763 struct umtx_q *uq, *uq1, *uq2; 1764 struct thread *td1; 1765 1766 mtx_assert(&umtx_lock, MA_OWNED); 1767 if (pi == NULL) 1768 return (0); 1769 1770 uq = td->td_umtxq; 1771 1772 /* 1773 * Check if the thread needs to be moved on the blocked chain. 1774 * It needs to be moved if either its priority is lower than 1775 * the previous thread or higher than the next thread. 1776 */ 1777 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1778 uq2 = TAILQ_NEXT(uq, uq_lockq); 1779 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1780 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1781 /* 1782 * Remove thread from blocked chain and determine where 1783 * it should be moved to. 1784 */ 1785 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1786 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1787 td1 = uq1->uq_thread; 1788 MPASS(td1->td_proc->p_magic == P_MAGIC); 1789 if (UPRI(td1) > UPRI(td)) 1790 break; 1791 } 1792 1793 if (uq1 == NULL) 1794 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1795 else 1796 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1797 } 1798 return (1); 1799 } 1800 1801 static struct umtx_pi * 1802 umtx_pi_next(struct umtx_pi *pi) 1803 { 1804 struct umtx_q *uq_owner; 1805 1806 if (pi->pi_owner == NULL) 1807 return (NULL); 1808 uq_owner = pi->pi_owner->td_umtxq; 1809 if (uq_owner == NULL) 1810 return (NULL); 1811 return (uq_owner->uq_pi_blocked); 1812 } 1813 1814 /* 1815 * Floyd's Cycle-Finding Algorithm. 1816 */ 1817 static bool 1818 umtx_pi_check_loop(struct umtx_pi *pi) 1819 { 1820 struct umtx_pi *pi1; /* fast iterator */ 1821 1822 mtx_assert(&umtx_lock, MA_OWNED); 1823 if (pi == NULL) 1824 return (false); 1825 pi1 = pi; 1826 for (;;) { 1827 pi = umtx_pi_next(pi); 1828 if (pi == NULL) 1829 break; 1830 pi1 = umtx_pi_next(pi1); 1831 if (pi1 == NULL) 1832 break; 1833 pi1 = umtx_pi_next(pi1); 1834 if (pi1 == NULL) 1835 break; 1836 if (pi == pi1) 1837 return (true); 1838 } 1839 return (false); 1840 } 1841 1842 /* 1843 * Propagate priority when a thread is blocked on POSIX 1844 * PI mutex. 1845 */ 1846 static void 1847 umtx_propagate_priority(struct thread *td) 1848 { 1849 struct umtx_q *uq; 1850 struct umtx_pi *pi; 1851 int pri; 1852 1853 mtx_assert(&umtx_lock, MA_OWNED); 1854 pri = UPRI(td); 1855 uq = td->td_umtxq; 1856 pi = uq->uq_pi_blocked; 1857 if (pi == NULL) 1858 return; 1859 if (umtx_pi_check_loop(pi)) 1860 return; 1861 1862 for (;;) { 1863 td = pi->pi_owner; 1864 if (td == NULL || td == curthread) 1865 return; 1866 1867 MPASS(td->td_proc != NULL); 1868 MPASS(td->td_proc->p_magic == P_MAGIC); 1869 1870 thread_lock(td); 1871 if (td->td_lend_user_pri > pri) 1872 sched_lend_user_prio(td, pri); 1873 else { 1874 thread_unlock(td); 1875 break; 1876 } 1877 thread_unlock(td); 1878 1879 /* 1880 * Pick up the lock that td is blocked on. 1881 */ 1882 uq = td->td_umtxq; 1883 pi = uq->uq_pi_blocked; 1884 if (pi == NULL) 1885 break; 1886 /* Resort td on the list if needed. */ 1887 umtx_pi_adjust_thread(pi, td); 1888 } 1889 } 1890 1891 /* 1892 * Unpropagate priority for a PI mutex when a thread blocked on 1893 * it is interrupted by signal or resumed by others. 1894 */ 1895 static void 1896 umtx_repropagate_priority(struct umtx_pi *pi) 1897 { 1898 struct umtx_q *uq, *uq_owner; 1899 struct umtx_pi *pi2; 1900 int pri; 1901 1902 mtx_assert(&umtx_lock, MA_OWNED); 1903 1904 if (umtx_pi_check_loop(pi)) 1905 return; 1906 while (pi != NULL && pi->pi_owner != NULL) { 1907 pri = PRI_MAX; 1908 uq_owner = pi->pi_owner->td_umtxq; 1909 1910 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1911 uq = TAILQ_FIRST(&pi2->pi_blocked); 1912 if (uq != NULL) { 1913 if (pri > UPRI(uq->uq_thread)) 1914 pri = UPRI(uq->uq_thread); 1915 } 1916 } 1917 1918 if (pri > uq_owner->uq_inherited_pri) 1919 pri = uq_owner->uq_inherited_pri; 1920 thread_lock(pi->pi_owner); 1921 sched_lend_user_prio(pi->pi_owner, pri); 1922 thread_unlock(pi->pi_owner); 1923 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1924 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1925 } 1926 } 1927 1928 /* 1929 * Insert a PI mutex into owned list. 1930 */ 1931 static void 1932 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1933 { 1934 struct umtx_q *uq_owner; 1935 1936 uq_owner = owner->td_umtxq; 1937 mtx_assert(&umtx_lock, MA_OWNED); 1938 MPASS(pi->pi_owner == NULL); 1939 pi->pi_owner = owner; 1940 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1941 } 1942 1943 /* 1944 * Disown a PI mutex, and remove it from the owned list. 1945 */ 1946 static void 1947 umtx_pi_disown(struct umtx_pi *pi) 1948 { 1949 1950 mtx_assert(&umtx_lock, MA_OWNED); 1951 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1952 pi->pi_owner = NULL; 1953 } 1954 1955 /* 1956 * Claim ownership of a PI mutex. 1957 */ 1958 int 1959 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1960 { 1961 struct umtx_q *uq; 1962 int pri; 1963 1964 mtx_lock(&umtx_lock); 1965 if (pi->pi_owner == owner) { 1966 mtx_unlock(&umtx_lock); 1967 return (0); 1968 } 1969 1970 if (pi->pi_owner != NULL) { 1971 /* 1972 * userland may have already messed the mutex, sigh. 1973 */ 1974 mtx_unlock(&umtx_lock); 1975 return (EPERM); 1976 } 1977 umtx_pi_setowner(pi, owner); 1978 uq = TAILQ_FIRST(&pi->pi_blocked); 1979 if (uq != NULL) { 1980 pri = UPRI(uq->uq_thread); 1981 thread_lock(owner); 1982 if (pri < UPRI(owner)) 1983 sched_lend_user_prio(owner, pri); 1984 thread_unlock(owner); 1985 } 1986 mtx_unlock(&umtx_lock); 1987 return (0); 1988 } 1989 1990 /* 1991 * Adjust a thread's order position in its blocked PI mutex, 1992 * this may result new priority propagating process. 1993 */ 1994 void 1995 umtx_pi_adjust(struct thread *td, u_char oldpri) 1996 { 1997 struct umtx_q *uq; 1998 struct umtx_pi *pi; 1999 2000 uq = td->td_umtxq; 2001 mtx_lock(&umtx_lock); 2002 /* 2003 * Pick up the lock that td is blocked on. 2004 */ 2005 pi = uq->uq_pi_blocked; 2006 if (pi != NULL) { 2007 umtx_pi_adjust_thread(pi, td); 2008 umtx_repropagate_priority(pi); 2009 } 2010 mtx_unlock(&umtx_lock); 2011 } 2012 2013 /* 2014 * Sleep on a PI mutex. 2015 */ 2016 int 2017 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2018 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2019 { 2020 struct thread *td, *td1; 2021 struct umtx_q *uq1; 2022 int error, pri; 2023 #ifdef INVARIANTS 2024 struct umtxq_chain *uc; 2025 2026 uc = umtxq_getchain(&pi->pi_key); 2027 #endif 2028 error = 0; 2029 td = uq->uq_thread; 2030 KASSERT(td == curthread, ("inconsistent uq_thread")); 2031 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2032 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2033 umtxq_insert(uq); 2034 mtx_lock(&umtx_lock); 2035 if (pi->pi_owner == NULL) { 2036 mtx_unlock(&umtx_lock); 2037 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2038 mtx_lock(&umtx_lock); 2039 if (td1 != NULL) { 2040 if (pi->pi_owner == NULL) 2041 umtx_pi_setowner(pi, td1); 2042 PROC_UNLOCK(td1->td_proc); 2043 } 2044 } 2045 2046 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2047 pri = UPRI(uq1->uq_thread); 2048 if (pri > UPRI(td)) 2049 break; 2050 } 2051 2052 if (uq1 != NULL) 2053 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2054 else 2055 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2056 2057 uq->uq_pi_blocked = pi; 2058 thread_lock(td); 2059 td->td_flags |= TDF_UPIBLOCKED; 2060 thread_unlock(td); 2061 umtx_propagate_priority(td); 2062 mtx_unlock(&umtx_lock); 2063 umtxq_unbusy(&uq->uq_key); 2064 2065 error = umtxq_sleep(uq, wmesg, timo); 2066 umtxq_remove(uq); 2067 2068 mtx_lock(&umtx_lock); 2069 uq->uq_pi_blocked = NULL; 2070 thread_lock(td); 2071 td->td_flags &= ~TDF_UPIBLOCKED; 2072 thread_unlock(td); 2073 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2074 umtx_repropagate_priority(pi); 2075 mtx_unlock(&umtx_lock); 2076 umtxq_unlock(&uq->uq_key); 2077 2078 return (error); 2079 } 2080 2081 /* 2082 * Add reference count for a PI mutex. 2083 */ 2084 void 2085 umtx_pi_ref(struct umtx_pi *pi) 2086 { 2087 2088 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2089 pi->pi_refcount++; 2090 } 2091 2092 /* 2093 * Decrease reference count for a PI mutex, if the counter 2094 * is decreased to zero, its memory space is freed. 2095 */ 2096 void 2097 umtx_pi_unref(struct umtx_pi *pi) 2098 { 2099 struct umtxq_chain *uc; 2100 2101 uc = umtxq_getchain(&pi->pi_key); 2102 UMTXQ_LOCKED_ASSERT(uc); 2103 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2104 if (--pi->pi_refcount == 0) { 2105 mtx_lock(&umtx_lock); 2106 if (pi->pi_owner != NULL) 2107 umtx_pi_disown(pi); 2108 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2109 ("blocked queue not empty")); 2110 mtx_unlock(&umtx_lock); 2111 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2112 umtx_pi_free(pi); 2113 } 2114 } 2115 2116 /* 2117 * Find a PI mutex in hash table. 2118 */ 2119 struct umtx_pi * 2120 umtx_pi_lookup(struct umtx_key *key) 2121 { 2122 struct umtxq_chain *uc; 2123 struct umtx_pi *pi; 2124 2125 uc = umtxq_getchain(key); 2126 UMTXQ_LOCKED_ASSERT(uc); 2127 2128 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2129 if (umtx_key_match(&pi->pi_key, key)) { 2130 return (pi); 2131 } 2132 } 2133 return (NULL); 2134 } 2135 2136 /* 2137 * Insert a PI mutex into hash table. 2138 */ 2139 void 2140 umtx_pi_insert(struct umtx_pi *pi) 2141 { 2142 struct umtxq_chain *uc; 2143 2144 uc = umtxq_getchain(&pi->pi_key); 2145 UMTXQ_LOCKED_ASSERT(uc); 2146 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2147 } 2148 2149 /* 2150 * Drop a PI mutex and wakeup a top waiter. 2151 */ 2152 int 2153 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2154 { 2155 struct umtx_q *uq_first, *uq_first2, *uq_me; 2156 struct umtx_pi *pi, *pi2; 2157 int pri; 2158 2159 UMTXQ_ASSERT_LOCKED_BUSY(key); 2160 *count = umtxq_count_pi(key, &uq_first); 2161 if (uq_first != NULL) { 2162 mtx_lock(&umtx_lock); 2163 pi = uq_first->uq_pi_blocked; 2164 KASSERT(pi != NULL, ("pi == NULL?")); 2165 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2166 mtx_unlock(&umtx_lock); 2167 /* userland messed the mutex */ 2168 return (EPERM); 2169 } 2170 uq_me = td->td_umtxq; 2171 if (pi->pi_owner == td) 2172 umtx_pi_disown(pi); 2173 /* get highest priority thread which is still sleeping. */ 2174 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2175 while (uq_first != NULL && 2176 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2177 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2178 } 2179 pri = PRI_MAX; 2180 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2181 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2182 if (uq_first2 != NULL) { 2183 if (pri > UPRI(uq_first2->uq_thread)) 2184 pri = UPRI(uq_first2->uq_thread); 2185 } 2186 } 2187 thread_lock(td); 2188 sched_lend_user_prio(td, pri); 2189 thread_unlock(td); 2190 mtx_unlock(&umtx_lock); 2191 if (uq_first) 2192 umtxq_signal_thread(uq_first); 2193 } else { 2194 pi = umtx_pi_lookup(key); 2195 /* 2196 * A umtx_pi can exist if a signal or timeout removed the 2197 * last waiter from the umtxq, but there is still 2198 * a thread in do_lock_pi() holding the umtx_pi. 2199 */ 2200 if (pi != NULL) { 2201 /* 2202 * The umtx_pi can be unowned, such as when a thread 2203 * has just entered do_lock_pi(), allocated the 2204 * umtx_pi, and unlocked the umtxq. 2205 * If the current thread owns it, it must disown it. 2206 */ 2207 mtx_lock(&umtx_lock); 2208 if (pi->pi_owner == td) 2209 umtx_pi_disown(pi); 2210 mtx_unlock(&umtx_lock); 2211 } 2212 } 2213 return (0); 2214 } 2215 2216 /* 2217 * Lock a PI mutex. 2218 */ 2219 static int 2220 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2221 struct _umtx_time *timeout, int try) 2222 { 2223 struct umtx_abs_timeout timo; 2224 struct umtx_q *uq; 2225 struct umtx_pi *pi, *new_pi; 2226 uint32_t id, old_owner, owner, old; 2227 int error, rv; 2228 2229 id = td->td_tid; 2230 uq = td->td_umtxq; 2231 2232 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2233 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2234 &uq->uq_key)) != 0) 2235 return (error); 2236 2237 if (timeout != NULL) 2238 umtx_abs_timeout_init2(&timo, timeout); 2239 2240 umtxq_lock(&uq->uq_key); 2241 pi = umtx_pi_lookup(&uq->uq_key); 2242 if (pi == NULL) { 2243 new_pi = umtx_pi_alloc(M_NOWAIT); 2244 if (new_pi == NULL) { 2245 umtxq_unlock(&uq->uq_key); 2246 new_pi = umtx_pi_alloc(M_WAITOK); 2247 umtxq_lock(&uq->uq_key); 2248 pi = umtx_pi_lookup(&uq->uq_key); 2249 if (pi != NULL) { 2250 umtx_pi_free(new_pi); 2251 new_pi = NULL; 2252 } 2253 } 2254 if (new_pi != NULL) { 2255 new_pi->pi_key = uq->uq_key; 2256 umtx_pi_insert(new_pi); 2257 pi = new_pi; 2258 } 2259 } 2260 umtx_pi_ref(pi); 2261 umtxq_unlock(&uq->uq_key); 2262 2263 /* 2264 * Care must be exercised when dealing with umtx structure. It 2265 * can fault on any access. 2266 */ 2267 for (;;) { 2268 /* 2269 * Try the uncontested case. This should be done in userland. 2270 */ 2271 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2272 /* The address was invalid. */ 2273 if (rv == -1) { 2274 error = EFAULT; 2275 break; 2276 } 2277 /* The acquire succeeded. */ 2278 if (rv == 0) { 2279 MPASS(owner == UMUTEX_UNOWNED); 2280 error = 0; 2281 break; 2282 } 2283 2284 if (owner == UMUTEX_RB_NOTRECOV) { 2285 error = ENOTRECOVERABLE; 2286 break; 2287 } 2288 2289 /* 2290 * Nobody owns it, but the acquire failed. This can happen 2291 * with ll/sc atomics. 2292 */ 2293 if (owner == UMUTEX_UNOWNED) { 2294 error = thread_check_susp(td, true); 2295 if (error != 0) 2296 break; 2297 continue; 2298 } 2299 2300 /* 2301 * Avoid overwriting a possible error from sleep due 2302 * to the pending signal with suspension check result. 2303 */ 2304 if (error == 0) { 2305 error = thread_check_susp(td, true); 2306 if (error != 0) 2307 break; 2308 } 2309 2310 /* If no one owns it but it is contested try to acquire it. */ 2311 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2312 old_owner = owner; 2313 rv = casueword32(&m->m_owner, owner, &owner, 2314 id | UMUTEX_CONTESTED); 2315 /* The address was invalid. */ 2316 if (rv == -1) { 2317 error = EFAULT; 2318 break; 2319 } 2320 if (rv == 1) { 2321 if (error == 0) { 2322 error = thread_check_susp(td, true); 2323 if (error != 0) 2324 break; 2325 } 2326 2327 /* 2328 * If this failed the lock could 2329 * changed, restart. 2330 */ 2331 continue; 2332 } 2333 2334 MPASS(rv == 0); 2335 MPASS(owner == old_owner); 2336 umtxq_lock(&uq->uq_key); 2337 umtxq_busy(&uq->uq_key); 2338 error = umtx_pi_claim(pi, td); 2339 umtxq_unbusy(&uq->uq_key); 2340 umtxq_unlock(&uq->uq_key); 2341 if (error != 0) { 2342 /* 2343 * Since we're going to return an 2344 * error, restore the m_owner to its 2345 * previous, unowned state to avoid 2346 * compounding the problem. 2347 */ 2348 (void)casuword32(&m->m_owner, 2349 id | UMUTEX_CONTESTED, old_owner); 2350 } 2351 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2352 error = EOWNERDEAD; 2353 break; 2354 } 2355 2356 if ((owner & ~UMUTEX_CONTESTED) == id) { 2357 error = EDEADLK; 2358 break; 2359 } 2360 2361 if (try != 0) { 2362 error = EBUSY; 2363 break; 2364 } 2365 2366 /* 2367 * If we caught a signal, we have retried and now 2368 * exit immediately. 2369 */ 2370 if (error != 0) 2371 break; 2372 2373 umtxq_lock(&uq->uq_key); 2374 umtxq_busy(&uq->uq_key); 2375 umtxq_unlock(&uq->uq_key); 2376 2377 /* 2378 * Set the contested bit so that a release in user space 2379 * knows to use the system call for unlock. If this fails 2380 * either some one else has acquired the lock or it has been 2381 * released. 2382 */ 2383 rv = casueword32(&m->m_owner, owner, &old, owner | 2384 UMUTEX_CONTESTED); 2385 2386 /* The address was invalid. */ 2387 if (rv == -1) { 2388 umtxq_unbusy_unlocked(&uq->uq_key); 2389 error = EFAULT; 2390 break; 2391 } 2392 if (rv == 1) { 2393 umtxq_unbusy_unlocked(&uq->uq_key); 2394 error = thread_check_susp(td, true); 2395 if (error != 0) 2396 break; 2397 2398 /* 2399 * The lock changed and we need to retry or we 2400 * lost a race to the thread unlocking the 2401 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2402 * value for owner is impossible there. 2403 */ 2404 continue; 2405 } 2406 2407 umtxq_lock(&uq->uq_key); 2408 2409 /* We set the contested bit, sleep. */ 2410 MPASS(old == owner); 2411 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2412 "umtxpi", timeout == NULL ? NULL : &timo, 2413 (flags & USYNC_PROCESS_SHARED) != 0); 2414 if (error != 0) 2415 continue; 2416 2417 error = thread_check_susp(td, false); 2418 if (error != 0) 2419 break; 2420 } 2421 2422 umtxq_lock(&uq->uq_key); 2423 umtx_pi_unref(pi); 2424 umtxq_unlock(&uq->uq_key); 2425 2426 umtx_key_release(&uq->uq_key); 2427 return (error); 2428 } 2429 2430 /* 2431 * Unlock a PI mutex. 2432 */ 2433 static int 2434 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2435 { 2436 struct umtx_key key; 2437 uint32_t id, new_owner, old, owner; 2438 int count, error; 2439 2440 id = td->td_tid; 2441 2442 usrloop: 2443 /* 2444 * Make sure we own this mtx. 2445 */ 2446 error = fueword32(&m->m_owner, &owner); 2447 if (error == -1) 2448 return (EFAULT); 2449 2450 if ((owner & ~UMUTEX_CONTESTED) != id) 2451 return (EPERM); 2452 2453 new_owner = umtx_unlock_val(flags, rb); 2454 2455 /* This should be done in userland */ 2456 if ((owner & UMUTEX_CONTESTED) == 0) { 2457 error = casueword32(&m->m_owner, owner, &old, new_owner); 2458 if (error == -1) 2459 return (EFAULT); 2460 if (error == 1) { 2461 error = thread_check_susp(td, true); 2462 if (error != 0) 2463 return (error); 2464 goto usrloop; 2465 } 2466 if (old == owner) 2467 return (0); 2468 owner = old; 2469 } 2470 2471 /* We should only ever be in here for contested locks */ 2472 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2473 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2474 &key)) != 0) 2475 return (error); 2476 2477 umtxq_lock(&key); 2478 umtxq_busy(&key); 2479 error = umtx_pi_drop(td, &key, rb, &count); 2480 if (error != 0) { 2481 umtxq_unbusy(&key); 2482 umtxq_unlock(&key); 2483 umtx_key_release(&key); 2484 /* userland messed the mutex */ 2485 return (error); 2486 } 2487 umtxq_unlock(&key); 2488 2489 /* 2490 * When unlocking the umtx, it must be marked as unowned if 2491 * there is zero or one thread only waiting for it. 2492 * Otherwise, it must be marked as contested. 2493 */ 2494 2495 if (count > 1) 2496 new_owner |= UMUTEX_CONTESTED; 2497 again: 2498 error = casueword32(&m->m_owner, owner, &old, new_owner); 2499 if (error == 1) { 2500 error = thread_check_susp(td, false); 2501 if (error == 0) 2502 goto again; 2503 } 2504 umtxq_unbusy_unlocked(&key); 2505 umtx_key_release(&key); 2506 if (error == -1) 2507 return (EFAULT); 2508 if (error == 0 && old != owner) 2509 return (EINVAL); 2510 return (error); 2511 } 2512 2513 /* 2514 * Lock a PP mutex. 2515 */ 2516 static int 2517 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2518 struct _umtx_time *timeout, int try) 2519 { 2520 struct umtx_abs_timeout timo; 2521 struct umtx_q *uq, *uq2; 2522 struct umtx_pi *pi; 2523 uint32_t ceiling; 2524 uint32_t owner, id; 2525 int error, pri, old_inherited_pri, su, rv; 2526 2527 id = td->td_tid; 2528 uq = td->td_umtxq; 2529 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2530 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2531 &uq->uq_key)) != 0) 2532 return (error); 2533 2534 if (timeout != NULL) 2535 umtx_abs_timeout_init2(&timo, timeout); 2536 2537 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2538 for (;;) { 2539 old_inherited_pri = uq->uq_inherited_pri; 2540 umtxq_lock(&uq->uq_key); 2541 umtxq_busy(&uq->uq_key); 2542 umtxq_unlock(&uq->uq_key); 2543 2544 rv = fueword32(&m->m_ceilings[0], &ceiling); 2545 if (rv == -1) { 2546 error = EFAULT; 2547 goto out; 2548 } 2549 ceiling = RTP_PRIO_MAX - ceiling; 2550 if (ceiling > RTP_PRIO_MAX) { 2551 error = EINVAL; 2552 goto out; 2553 } 2554 2555 mtx_lock(&umtx_lock); 2556 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2557 mtx_unlock(&umtx_lock); 2558 error = EINVAL; 2559 goto out; 2560 } 2561 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2562 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2563 thread_lock(td); 2564 if (uq->uq_inherited_pri < UPRI(td)) 2565 sched_lend_user_prio(td, uq->uq_inherited_pri); 2566 thread_unlock(td); 2567 } 2568 mtx_unlock(&umtx_lock); 2569 2570 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2571 id | UMUTEX_CONTESTED); 2572 /* The address was invalid. */ 2573 if (rv == -1) { 2574 error = EFAULT; 2575 break; 2576 } 2577 if (rv == 0) { 2578 MPASS(owner == UMUTEX_CONTESTED); 2579 error = 0; 2580 break; 2581 } 2582 /* rv == 1 */ 2583 if (owner == UMUTEX_RB_OWNERDEAD) { 2584 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2585 &owner, id | UMUTEX_CONTESTED); 2586 if (rv == -1) { 2587 error = EFAULT; 2588 break; 2589 } 2590 if (rv == 0) { 2591 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2592 error = EOWNERDEAD; /* success */ 2593 break; 2594 } 2595 2596 /* 2597 * rv == 1, only check for suspension if we 2598 * did not already catched a signal. If we 2599 * get an error from the check, the same 2600 * condition is checked by the umtxq_sleep() 2601 * call below, so we should obliterate the 2602 * error to not skip the last loop iteration. 2603 */ 2604 if (error == 0) { 2605 error = thread_check_susp(td, false); 2606 if (error == 0) { 2607 if (try != 0) 2608 error = EBUSY; 2609 else 2610 continue; 2611 } 2612 error = 0; 2613 } 2614 } else if (owner == UMUTEX_RB_NOTRECOV) { 2615 error = ENOTRECOVERABLE; 2616 } 2617 2618 if (try != 0) 2619 error = EBUSY; 2620 2621 /* 2622 * If we caught a signal, we have retried and now 2623 * exit immediately. 2624 */ 2625 if (error != 0) 2626 break; 2627 2628 umtxq_lock(&uq->uq_key); 2629 umtxq_insert(uq); 2630 umtxq_unbusy(&uq->uq_key); 2631 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2632 NULL : &timo); 2633 umtxq_remove(uq); 2634 umtxq_unlock(&uq->uq_key); 2635 2636 mtx_lock(&umtx_lock); 2637 uq->uq_inherited_pri = old_inherited_pri; 2638 pri = PRI_MAX; 2639 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2640 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2641 if (uq2 != NULL) { 2642 if (pri > UPRI(uq2->uq_thread)) 2643 pri = UPRI(uq2->uq_thread); 2644 } 2645 } 2646 if (pri > uq->uq_inherited_pri) 2647 pri = uq->uq_inherited_pri; 2648 thread_lock(td); 2649 sched_lend_user_prio(td, pri); 2650 thread_unlock(td); 2651 mtx_unlock(&umtx_lock); 2652 } 2653 2654 if (error != 0 && error != EOWNERDEAD) { 2655 mtx_lock(&umtx_lock); 2656 uq->uq_inherited_pri = old_inherited_pri; 2657 pri = PRI_MAX; 2658 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2659 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2660 if (uq2 != NULL) { 2661 if (pri > UPRI(uq2->uq_thread)) 2662 pri = UPRI(uq2->uq_thread); 2663 } 2664 } 2665 if (pri > uq->uq_inherited_pri) 2666 pri = uq->uq_inherited_pri; 2667 thread_lock(td); 2668 sched_lend_user_prio(td, pri); 2669 thread_unlock(td); 2670 mtx_unlock(&umtx_lock); 2671 } 2672 2673 out: 2674 umtxq_unbusy_unlocked(&uq->uq_key); 2675 umtx_key_release(&uq->uq_key); 2676 return (error); 2677 } 2678 2679 /* 2680 * Unlock a PP mutex. 2681 */ 2682 static int 2683 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2684 { 2685 struct umtx_key key; 2686 struct umtx_q *uq, *uq2; 2687 struct umtx_pi *pi; 2688 uint32_t id, owner, rceiling; 2689 int error, pri, new_inherited_pri, su; 2690 2691 id = td->td_tid; 2692 uq = td->td_umtxq; 2693 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2694 2695 /* 2696 * Make sure we own this mtx. 2697 */ 2698 error = fueword32(&m->m_owner, &owner); 2699 if (error == -1) 2700 return (EFAULT); 2701 2702 if ((owner & ~UMUTEX_CONTESTED) != id) 2703 return (EPERM); 2704 2705 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2706 if (error != 0) 2707 return (error); 2708 2709 if (rceiling == -1) 2710 new_inherited_pri = PRI_MAX; 2711 else { 2712 rceiling = RTP_PRIO_MAX - rceiling; 2713 if (rceiling > RTP_PRIO_MAX) 2714 return (EINVAL); 2715 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2716 } 2717 2718 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2719 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2720 &key)) != 0) 2721 return (error); 2722 umtxq_lock(&key); 2723 umtxq_busy(&key); 2724 umtxq_unlock(&key); 2725 /* 2726 * For priority protected mutex, always set unlocked state 2727 * to UMUTEX_CONTESTED, so that userland always enters kernel 2728 * to lock the mutex, it is necessary because thread priority 2729 * has to be adjusted for such mutex. 2730 */ 2731 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2732 UMUTEX_CONTESTED); 2733 2734 umtxq_lock(&key); 2735 if (error == 0) 2736 umtxq_signal(&key, 1); 2737 umtxq_unbusy(&key); 2738 umtxq_unlock(&key); 2739 2740 if (error == -1) 2741 error = EFAULT; 2742 else { 2743 mtx_lock(&umtx_lock); 2744 if (su != 0) 2745 uq->uq_inherited_pri = new_inherited_pri; 2746 pri = PRI_MAX; 2747 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2748 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2749 if (uq2 != NULL) { 2750 if (pri > UPRI(uq2->uq_thread)) 2751 pri = UPRI(uq2->uq_thread); 2752 } 2753 } 2754 if (pri > uq->uq_inherited_pri) 2755 pri = uq->uq_inherited_pri; 2756 thread_lock(td); 2757 sched_lend_user_prio(td, pri); 2758 thread_unlock(td); 2759 mtx_unlock(&umtx_lock); 2760 } 2761 umtx_key_release(&key); 2762 return (error); 2763 } 2764 2765 static int 2766 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2767 uint32_t *old_ceiling) 2768 { 2769 struct umtx_q *uq; 2770 uint32_t flags, id, owner, save_ceiling; 2771 int error, rv, rv1; 2772 2773 error = fueword32(&m->m_flags, &flags); 2774 if (error == -1) 2775 return (EFAULT); 2776 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2777 return (EINVAL); 2778 if (ceiling > RTP_PRIO_MAX) 2779 return (EINVAL); 2780 id = td->td_tid; 2781 uq = td->td_umtxq; 2782 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2783 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2784 &uq->uq_key)) != 0) 2785 return (error); 2786 for (;;) { 2787 umtxq_lock(&uq->uq_key); 2788 umtxq_busy(&uq->uq_key); 2789 umtxq_unlock(&uq->uq_key); 2790 2791 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2792 if (rv == -1) { 2793 error = EFAULT; 2794 break; 2795 } 2796 2797 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2798 id | UMUTEX_CONTESTED); 2799 if (rv == -1) { 2800 error = EFAULT; 2801 break; 2802 } 2803 2804 if (rv == 0) { 2805 MPASS(owner == UMUTEX_CONTESTED); 2806 rv = suword32(&m->m_ceilings[0], ceiling); 2807 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2808 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2809 break; 2810 } 2811 2812 if ((owner & ~UMUTEX_CONTESTED) == id) { 2813 rv = suword32(&m->m_ceilings[0], ceiling); 2814 error = rv == 0 ? 0 : EFAULT; 2815 break; 2816 } 2817 2818 if (owner == UMUTEX_RB_OWNERDEAD) { 2819 error = EOWNERDEAD; 2820 break; 2821 } else if (owner == UMUTEX_RB_NOTRECOV) { 2822 error = ENOTRECOVERABLE; 2823 break; 2824 } 2825 2826 /* 2827 * If we caught a signal, we have retried and now 2828 * exit immediately. 2829 */ 2830 if (error != 0) 2831 break; 2832 2833 /* 2834 * We set the contested bit, sleep. Otherwise the lock changed 2835 * and we need to retry or we lost a race to the thread 2836 * unlocking the umtx. 2837 */ 2838 umtxq_lock(&uq->uq_key); 2839 umtxq_insert(uq); 2840 umtxq_unbusy(&uq->uq_key); 2841 error = umtxq_sleep(uq, "umtxpp", NULL); 2842 umtxq_remove(uq); 2843 umtxq_unlock(&uq->uq_key); 2844 } 2845 umtxq_lock(&uq->uq_key); 2846 if (error == 0) 2847 umtxq_signal(&uq->uq_key, INT_MAX); 2848 umtxq_unbusy(&uq->uq_key); 2849 umtxq_unlock(&uq->uq_key); 2850 umtx_key_release(&uq->uq_key); 2851 if (error == 0 && old_ceiling != NULL) { 2852 rv = suword32(old_ceiling, save_ceiling); 2853 error = rv == 0 ? 0 : EFAULT; 2854 } 2855 return (error); 2856 } 2857 2858 /* 2859 * Lock a userland POSIX mutex. 2860 */ 2861 static int 2862 do_lock_umutex(struct thread *td, struct umutex *m, 2863 struct _umtx_time *timeout, int mode) 2864 { 2865 uint32_t flags; 2866 int error; 2867 2868 error = fueword32(&m->m_flags, &flags); 2869 if (error == -1) 2870 return (EFAULT); 2871 2872 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2873 case 0: 2874 error = do_lock_normal(td, m, flags, timeout, mode); 2875 break; 2876 case UMUTEX_PRIO_INHERIT: 2877 error = do_lock_pi(td, m, flags, timeout, mode); 2878 break; 2879 case UMUTEX_PRIO_PROTECT: 2880 error = do_lock_pp(td, m, flags, timeout, mode); 2881 break; 2882 default: 2883 return (EINVAL); 2884 } 2885 if (timeout == NULL) { 2886 if (error == EINTR && mode != _UMUTEX_WAIT) 2887 error = ERESTART; 2888 } else { 2889 /* Timed-locking is not restarted. */ 2890 if (error == ERESTART) 2891 error = EINTR; 2892 } 2893 return (error); 2894 } 2895 2896 /* 2897 * Unlock a userland POSIX mutex. 2898 */ 2899 static int 2900 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2901 { 2902 uint32_t flags; 2903 int error; 2904 2905 error = fueword32(&m->m_flags, &flags); 2906 if (error == -1) 2907 return (EFAULT); 2908 2909 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2910 case 0: 2911 return (do_unlock_normal(td, m, flags, rb)); 2912 case UMUTEX_PRIO_INHERIT: 2913 return (do_unlock_pi(td, m, flags, rb)); 2914 case UMUTEX_PRIO_PROTECT: 2915 return (do_unlock_pp(td, m, flags, rb)); 2916 } 2917 2918 return (EINVAL); 2919 } 2920 2921 static int 2922 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2923 struct timespec *timeout, u_long wflags) 2924 { 2925 struct umtx_abs_timeout timo; 2926 struct umtx_q *uq; 2927 uint32_t flags, clockid, hasw; 2928 int error; 2929 2930 uq = td->td_umtxq; 2931 error = fueword32(&cv->c_flags, &flags); 2932 if (error == -1) 2933 return (EFAULT); 2934 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2935 if (error != 0) 2936 return (error); 2937 2938 if ((wflags & CVWAIT_CLOCKID) != 0) { 2939 error = fueword32(&cv->c_clockid, &clockid); 2940 if (error == -1) { 2941 umtx_key_release(&uq->uq_key); 2942 return (EFAULT); 2943 } 2944 if (clockid < CLOCK_REALTIME || 2945 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2946 /* hmm, only HW clock id will work. */ 2947 umtx_key_release(&uq->uq_key); 2948 return (EINVAL); 2949 } 2950 } else { 2951 clockid = CLOCK_REALTIME; 2952 } 2953 2954 umtxq_lock(&uq->uq_key); 2955 umtxq_busy(&uq->uq_key); 2956 umtxq_insert(uq); 2957 umtxq_unlock(&uq->uq_key); 2958 2959 /* 2960 * Set c_has_waiters to 1 before releasing user mutex, also 2961 * don't modify cache line when unnecessary. 2962 */ 2963 error = fueword32(&cv->c_has_waiters, &hasw); 2964 if (error == 0 && hasw == 0) 2965 suword32(&cv->c_has_waiters, 1); 2966 2967 umtxq_unbusy_unlocked(&uq->uq_key); 2968 2969 error = do_unlock_umutex(td, m, false); 2970 2971 if (timeout != NULL) 2972 umtx_abs_timeout_init(&timo, clockid, 2973 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2974 2975 umtxq_lock(&uq->uq_key); 2976 if (error == 0) { 2977 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2978 NULL : &timo); 2979 } 2980 2981 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2982 error = 0; 2983 else { 2984 /* 2985 * This must be timeout,interrupted by signal or 2986 * surprious wakeup, clear c_has_waiter flag when 2987 * necessary. 2988 */ 2989 umtxq_busy(&uq->uq_key); 2990 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2991 int oldlen = uq->uq_cur_queue->length; 2992 umtxq_remove(uq); 2993 if (oldlen == 1) { 2994 umtxq_unlock(&uq->uq_key); 2995 suword32(&cv->c_has_waiters, 0); 2996 umtxq_lock(&uq->uq_key); 2997 } 2998 } 2999 umtxq_unbusy(&uq->uq_key); 3000 if (error == ERESTART) 3001 error = EINTR; 3002 } 3003 3004 umtxq_unlock(&uq->uq_key); 3005 umtx_key_release(&uq->uq_key); 3006 return (error); 3007 } 3008 3009 /* 3010 * Signal a userland condition variable. 3011 */ 3012 static int 3013 do_cv_signal(struct thread *td, struct ucond *cv) 3014 { 3015 struct umtx_key key; 3016 int error, cnt, nwake; 3017 uint32_t flags; 3018 3019 error = fueword32(&cv->c_flags, &flags); 3020 if (error == -1) 3021 return (EFAULT); 3022 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3023 return (error); 3024 umtxq_lock(&key); 3025 umtxq_busy(&key); 3026 cnt = umtxq_count(&key); 3027 nwake = umtxq_signal(&key, 1); 3028 if (cnt <= nwake) { 3029 umtxq_unlock(&key); 3030 error = suword32(&cv->c_has_waiters, 0); 3031 if (error == -1) 3032 error = EFAULT; 3033 umtxq_lock(&key); 3034 } 3035 umtxq_unbusy(&key); 3036 umtxq_unlock(&key); 3037 umtx_key_release(&key); 3038 return (error); 3039 } 3040 3041 static int 3042 do_cv_broadcast(struct thread *td, struct ucond *cv) 3043 { 3044 struct umtx_key key; 3045 int error; 3046 uint32_t flags; 3047 3048 error = fueword32(&cv->c_flags, &flags); 3049 if (error == -1) 3050 return (EFAULT); 3051 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3052 return (error); 3053 3054 umtxq_lock(&key); 3055 umtxq_busy(&key); 3056 umtxq_signal(&key, INT_MAX); 3057 umtxq_unlock(&key); 3058 3059 error = suword32(&cv->c_has_waiters, 0); 3060 if (error == -1) 3061 error = EFAULT; 3062 3063 umtxq_unbusy_unlocked(&key); 3064 3065 umtx_key_release(&key); 3066 return (error); 3067 } 3068 3069 static int 3070 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3071 struct _umtx_time *timeout) 3072 { 3073 struct umtx_abs_timeout timo; 3074 struct umtx_q *uq; 3075 uint32_t flags, wrflags; 3076 int32_t state, oldstate; 3077 int32_t blocked_readers; 3078 int error, error1, rv; 3079 3080 uq = td->td_umtxq; 3081 error = fueword32(&rwlock->rw_flags, &flags); 3082 if (error == -1) 3083 return (EFAULT); 3084 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3085 if (error != 0) 3086 return (error); 3087 3088 if (timeout != NULL) 3089 umtx_abs_timeout_init2(&timo, timeout); 3090 3091 wrflags = URWLOCK_WRITE_OWNER; 3092 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3093 wrflags |= URWLOCK_WRITE_WAITERS; 3094 3095 for (;;) { 3096 rv = fueword32(&rwlock->rw_state, &state); 3097 if (rv == -1) { 3098 umtx_key_release(&uq->uq_key); 3099 return (EFAULT); 3100 } 3101 3102 /* try to lock it */ 3103 while (!(state & wrflags)) { 3104 if (__predict_false(URWLOCK_READER_COUNT(state) == 3105 URWLOCK_MAX_READERS)) { 3106 umtx_key_release(&uq->uq_key); 3107 return (EAGAIN); 3108 } 3109 rv = casueword32(&rwlock->rw_state, state, 3110 &oldstate, state + 1); 3111 if (rv == -1) { 3112 umtx_key_release(&uq->uq_key); 3113 return (EFAULT); 3114 } 3115 if (rv == 0) { 3116 MPASS(oldstate == state); 3117 umtx_key_release(&uq->uq_key); 3118 return (0); 3119 } 3120 error = thread_check_susp(td, true); 3121 if (error != 0) 3122 break; 3123 state = oldstate; 3124 } 3125 3126 if (error) 3127 break; 3128 3129 /* grab monitor lock */ 3130 umtxq_lock(&uq->uq_key); 3131 umtxq_busy(&uq->uq_key); 3132 umtxq_unlock(&uq->uq_key); 3133 3134 /* 3135 * re-read the state, in case it changed between the try-lock above 3136 * and the check below 3137 */ 3138 rv = fueword32(&rwlock->rw_state, &state); 3139 if (rv == -1) 3140 error = EFAULT; 3141 3142 /* set read contention bit */ 3143 while (error == 0 && (state & wrflags) && 3144 !(state & URWLOCK_READ_WAITERS)) { 3145 rv = casueword32(&rwlock->rw_state, state, 3146 &oldstate, state | URWLOCK_READ_WAITERS); 3147 if (rv == -1) { 3148 error = EFAULT; 3149 break; 3150 } 3151 if (rv == 0) { 3152 MPASS(oldstate == state); 3153 goto sleep; 3154 } 3155 state = oldstate; 3156 error = thread_check_susp(td, false); 3157 if (error != 0) 3158 break; 3159 } 3160 if (error != 0) { 3161 umtxq_unbusy_unlocked(&uq->uq_key); 3162 break; 3163 } 3164 3165 /* state is changed while setting flags, restart */ 3166 if (!(state & wrflags)) { 3167 umtxq_unbusy_unlocked(&uq->uq_key); 3168 error = thread_check_susp(td, true); 3169 if (error != 0) 3170 break; 3171 continue; 3172 } 3173 3174 sleep: 3175 /* 3176 * Contention bit is set, before sleeping, increase 3177 * read waiter count. 3178 */ 3179 rv = fueword32(&rwlock->rw_blocked_readers, 3180 &blocked_readers); 3181 if (rv == -1) { 3182 umtxq_unbusy_unlocked(&uq->uq_key); 3183 error = EFAULT; 3184 break; 3185 } 3186 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3187 3188 while (state & wrflags) { 3189 umtxq_lock(&uq->uq_key); 3190 umtxq_insert(uq); 3191 umtxq_unbusy(&uq->uq_key); 3192 3193 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3194 NULL : &timo); 3195 3196 umtxq_busy(&uq->uq_key); 3197 umtxq_remove(uq); 3198 umtxq_unlock(&uq->uq_key); 3199 if (error) 3200 break; 3201 rv = fueword32(&rwlock->rw_state, &state); 3202 if (rv == -1) { 3203 error = EFAULT; 3204 break; 3205 } 3206 } 3207 3208 /* decrease read waiter count, and may clear read contention bit */ 3209 rv = fueword32(&rwlock->rw_blocked_readers, 3210 &blocked_readers); 3211 if (rv == -1) { 3212 umtxq_unbusy_unlocked(&uq->uq_key); 3213 error = EFAULT; 3214 break; 3215 } 3216 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3217 if (blocked_readers == 1) { 3218 rv = fueword32(&rwlock->rw_state, &state); 3219 if (rv == -1) { 3220 umtxq_unbusy_unlocked(&uq->uq_key); 3221 error = EFAULT; 3222 break; 3223 } 3224 for (;;) { 3225 rv = casueword32(&rwlock->rw_state, state, 3226 &oldstate, state & ~URWLOCK_READ_WAITERS); 3227 if (rv == -1) { 3228 error = EFAULT; 3229 break; 3230 } 3231 if (rv == 0) { 3232 MPASS(oldstate == state); 3233 break; 3234 } 3235 state = oldstate; 3236 error1 = thread_check_susp(td, false); 3237 if (error1 != 0) { 3238 if (error == 0) 3239 error = error1; 3240 break; 3241 } 3242 } 3243 } 3244 3245 umtxq_unbusy_unlocked(&uq->uq_key); 3246 if (error != 0) 3247 break; 3248 } 3249 umtx_key_release(&uq->uq_key); 3250 if (error == ERESTART) 3251 error = EINTR; 3252 return (error); 3253 } 3254 3255 static int 3256 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3257 { 3258 struct umtx_abs_timeout timo; 3259 struct umtx_q *uq; 3260 uint32_t flags; 3261 int32_t state, oldstate; 3262 int32_t blocked_writers; 3263 int32_t blocked_readers; 3264 int error, error1, rv; 3265 3266 uq = td->td_umtxq; 3267 error = fueword32(&rwlock->rw_flags, &flags); 3268 if (error == -1) 3269 return (EFAULT); 3270 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3271 if (error != 0) 3272 return (error); 3273 3274 if (timeout != NULL) 3275 umtx_abs_timeout_init2(&timo, timeout); 3276 3277 blocked_readers = 0; 3278 for (;;) { 3279 rv = fueword32(&rwlock->rw_state, &state); 3280 if (rv == -1) { 3281 umtx_key_release(&uq->uq_key); 3282 return (EFAULT); 3283 } 3284 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3285 URWLOCK_READER_COUNT(state) == 0) { 3286 rv = casueword32(&rwlock->rw_state, state, 3287 &oldstate, state | URWLOCK_WRITE_OWNER); 3288 if (rv == -1) { 3289 umtx_key_release(&uq->uq_key); 3290 return (EFAULT); 3291 } 3292 if (rv == 0) { 3293 MPASS(oldstate == state); 3294 umtx_key_release(&uq->uq_key); 3295 return (0); 3296 } 3297 state = oldstate; 3298 error = thread_check_susp(td, true); 3299 if (error != 0) 3300 break; 3301 } 3302 3303 if (error) { 3304 if ((state & (URWLOCK_WRITE_OWNER | 3305 URWLOCK_WRITE_WAITERS)) == 0 && 3306 blocked_readers != 0) { 3307 umtxq_lock(&uq->uq_key); 3308 umtxq_busy(&uq->uq_key); 3309 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3310 UMTX_SHARED_QUEUE); 3311 umtxq_unbusy(&uq->uq_key); 3312 umtxq_unlock(&uq->uq_key); 3313 } 3314 3315 break; 3316 } 3317 3318 /* grab monitor lock */ 3319 umtxq_lock(&uq->uq_key); 3320 umtxq_busy(&uq->uq_key); 3321 umtxq_unlock(&uq->uq_key); 3322 3323 /* 3324 * Re-read the state, in case it changed between the 3325 * try-lock above and the check below. 3326 */ 3327 rv = fueword32(&rwlock->rw_state, &state); 3328 if (rv == -1) 3329 error = EFAULT; 3330 3331 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3332 URWLOCK_READER_COUNT(state) != 0) && 3333 (state & URWLOCK_WRITE_WAITERS) == 0) { 3334 rv = casueword32(&rwlock->rw_state, state, 3335 &oldstate, state | URWLOCK_WRITE_WAITERS); 3336 if (rv == -1) { 3337 error = EFAULT; 3338 break; 3339 } 3340 if (rv == 0) { 3341 MPASS(oldstate == state); 3342 goto sleep; 3343 } 3344 state = oldstate; 3345 error = thread_check_susp(td, false); 3346 if (error != 0) 3347 break; 3348 } 3349 if (error != 0) { 3350 umtxq_unbusy_unlocked(&uq->uq_key); 3351 break; 3352 } 3353 3354 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3355 URWLOCK_READER_COUNT(state) == 0) { 3356 umtxq_unbusy_unlocked(&uq->uq_key); 3357 error = thread_check_susp(td, false); 3358 if (error != 0) 3359 break; 3360 continue; 3361 } 3362 sleep: 3363 rv = fueword32(&rwlock->rw_blocked_writers, 3364 &blocked_writers); 3365 if (rv == -1) { 3366 umtxq_unbusy_unlocked(&uq->uq_key); 3367 error = EFAULT; 3368 break; 3369 } 3370 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3371 3372 while ((state & URWLOCK_WRITE_OWNER) || 3373 URWLOCK_READER_COUNT(state) != 0) { 3374 umtxq_lock(&uq->uq_key); 3375 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3376 umtxq_unbusy(&uq->uq_key); 3377 3378 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3379 NULL : &timo); 3380 3381 umtxq_busy(&uq->uq_key); 3382 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3383 umtxq_unlock(&uq->uq_key); 3384 if (error) 3385 break; 3386 rv = fueword32(&rwlock->rw_state, &state); 3387 if (rv == -1) { 3388 error = EFAULT; 3389 break; 3390 } 3391 } 3392 3393 rv = fueword32(&rwlock->rw_blocked_writers, 3394 &blocked_writers); 3395 if (rv == -1) { 3396 umtxq_unbusy_unlocked(&uq->uq_key); 3397 error = EFAULT; 3398 break; 3399 } 3400 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3401 if (blocked_writers == 1) { 3402 rv = fueword32(&rwlock->rw_state, &state); 3403 if (rv == -1) { 3404 umtxq_unbusy_unlocked(&uq->uq_key); 3405 error = EFAULT; 3406 break; 3407 } 3408 for (;;) { 3409 rv = casueword32(&rwlock->rw_state, state, 3410 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3411 if (rv == -1) { 3412 error = EFAULT; 3413 break; 3414 } 3415 if (rv == 0) { 3416 MPASS(oldstate == state); 3417 break; 3418 } 3419 state = oldstate; 3420 error1 = thread_check_susp(td, false); 3421 /* 3422 * We are leaving the URWLOCK_WRITE_WAITERS 3423 * behind, but this should not harm the 3424 * correctness. 3425 */ 3426 if (error1 != 0) { 3427 if (error == 0) 3428 error = error1; 3429 break; 3430 } 3431 } 3432 rv = fueword32(&rwlock->rw_blocked_readers, 3433 &blocked_readers); 3434 if (rv == -1) { 3435 umtxq_unbusy_unlocked(&uq->uq_key); 3436 error = EFAULT; 3437 break; 3438 } 3439 } else 3440 blocked_readers = 0; 3441 3442 umtxq_unbusy_unlocked(&uq->uq_key); 3443 } 3444 3445 umtx_key_release(&uq->uq_key); 3446 if (error == ERESTART) 3447 error = EINTR; 3448 return (error); 3449 } 3450 3451 static int 3452 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3453 { 3454 struct umtx_q *uq; 3455 uint32_t flags; 3456 int32_t state, oldstate; 3457 int error, rv, q, count; 3458 3459 uq = td->td_umtxq; 3460 error = fueword32(&rwlock->rw_flags, &flags); 3461 if (error == -1) 3462 return (EFAULT); 3463 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3464 if (error != 0) 3465 return (error); 3466 3467 error = fueword32(&rwlock->rw_state, &state); 3468 if (error == -1) { 3469 error = EFAULT; 3470 goto out; 3471 } 3472 if (state & URWLOCK_WRITE_OWNER) { 3473 for (;;) { 3474 rv = casueword32(&rwlock->rw_state, state, 3475 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3476 if (rv == -1) { 3477 error = EFAULT; 3478 goto out; 3479 } 3480 if (rv == 1) { 3481 state = oldstate; 3482 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3483 error = EPERM; 3484 goto out; 3485 } 3486 error = thread_check_susp(td, true); 3487 if (error != 0) 3488 goto out; 3489 } else 3490 break; 3491 } 3492 } else if (URWLOCK_READER_COUNT(state) != 0) { 3493 for (;;) { 3494 rv = casueword32(&rwlock->rw_state, state, 3495 &oldstate, state - 1); 3496 if (rv == -1) { 3497 error = EFAULT; 3498 goto out; 3499 } 3500 if (rv == 1) { 3501 state = oldstate; 3502 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3503 error = EPERM; 3504 goto out; 3505 } 3506 error = thread_check_susp(td, true); 3507 if (error != 0) 3508 goto out; 3509 } else 3510 break; 3511 } 3512 } else { 3513 error = EPERM; 3514 goto out; 3515 } 3516 3517 count = 0; 3518 3519 if (!(flags & URWLOCK_PREFER_READER)) { 3520 if (state & URWLOCK_WRITE_WAITERS) { 3521 count = 1; 3522 q = UMTX_EXCLUSIVE_QUEUE; 3523 } else if (state & URWLOCK_READ_WAITERS) { 3524 count = INT_MAX; 3525 q = UMTX_SHARED_QUEUE; 3526 } 3527 } else { 3528 if (state & URWLOCK_READ_WAITERS) { 3529 count = INT_MAX; 3530 q = UMTX_SHARED_QUEUE; 3531 } else if (state & URWLOCK_WRITE_WAITERS) { 3532 count = 1; 3533 q = UMTX_EXCLUSIVE_QUEUE; 3534 } 3535 } 3536 3537 if (count) { 3538 umtxq_lock(&uq->uq_key); 3539 umtxq_busy(&uq->uq_key); 3540 umtxq_signal_queue(&uq->uq_key, count, q); 3541 umtxq_unbusy(&uq->uq_key); 3542 umtxq_unlock(&uq->uq_key); 3543 } 3544 out: 3545 umtx_key_release(&uq->uq_key); 3546 return (error); 3547 } 3548 3549 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3550 static int 3551 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3552 { 3553 struct umtx_abs_timeout timo; 3554 struct umtx_q *uq; 3555 uint32_t flags, count, count1; 3556 int error, rv, rv1; 3557 3558 uq = td->td_umtxq; 3559 error = fueword32(&sem->_flags, &flags); 3560 if (error == -1) 3561 return (EFAULT); 3562 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3563 if (error != 0) 3564 return (error); 3565 3566 if (timeout != NULL) 3567 umtx_abs_timeout_init2(&timo, timeout); 3568 3569 again: 3570 umtxq_lock(&uq->uq_key); 3571 umtxq_busy(&uq->uq_key); 3572 umtxq_insert(uq); 3573 umtxq_unlock(&uq->uq_key); 3574 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3575 if (rv != -1) 3576 rv1 = fueword32(&sem->_count, &count); 3577 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3578 if (rv == 0) 3579 suword32(&sem->_has_waiters, 0); 3580 umtxq_lock(&uq->uq_key); 3581 umtxq_unbusy(&uq->uq_key); 3582 umtxq_remove(uq); 3583 umtxq_unlock(&uq->uq_key); 3584 if (rv == -1 || rv1 == -1) { 3585 error = EFAULT; 3586 goto out; 3587 } 3588 if (count != 0) { 3589 error = 0; 3590 goto out; 3591 } 3592 MPASS(rv == 1 && count1 == 0); 3593 rv = thread_check_susp(td, true); 3594 if (rv == 0) 3595 goto again; 3596 error = rv; 3597 goto out; 3598 } 3599 umtxq_lock(&uq->uq_key); 3600 umtxq_unbusy(&uq->uq_key); 3601 3602 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3603 3604 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3605 error = 0; 3606 else { 3607 umtxq_remove(uq); 3608 /* A relative timeout cannot be restarted. */ 3609 if (error == ERESTART && timeout != NULL && 3610 (timeout->_flags & UMTX_ABSTIME) == 0) 3611 error = EINTR; 3612 } 3613 umtxq_unlock(&uq->uq_key); 3614 out: 3615 umtx_key_release(&uq->uq_key); 3616 return (error); 3617 } 3618 3619 /* 3620 * Signal a userland semaphore. 3621 */ 3622 static int 3623 do_sem_wake(struct thread *td, struct _usem *sem) 3624 { 3625 struct umtx_key key; 3626 int error, cnt; 3627 uint32_t flags; 3628 3629 error = fueword32(&sem->_flags, &flags); 3630 if (error == -1) 3631 return (EFAULT); 3632 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3633 return (error); 3634 umtxq_lock(&key); 3635 umtxq_busy(&key); 3636 cnt = umtxq_count(&key); 3637 if (cnt > 0) { 3638 /* 3639 * Check if count is greater than 0, this means the memory is 3640 * still being referenced by user code, so we can safely 3641 * update _has_waiters flag. 3642 */ 3643 if (cnt == 1) { 3644 umtxq_unlock(&key); 3645 error = suword32(&sem->_has_waiters, 0); 3646 umtxq_lock(&key); 3647 if (error == -1) 3648 error = EFAULT; 3649 } 3650 umtxq_signal(&key, 1); 3651 } 3652 umtxq_unbusy(&key); 3653 umtxq_unlock(&key); 3654 umtx_key_release(&key); 3655 return (error); 3656 } 3657 #endif 3658 3659 static int 3660 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3661 { 3662 struct umtx_abs_timeout timo; 3663 struct umtx_q *uq; 3664 uint32_t count, flags; 3665 int error, rv; 3666 3667 uq = td->td_umtxq; 3668 flags = fuword32(&sem->_flags); 3669 if (timeout != NULL) 3670 umtx_abs_timeout_init2(&timo, timeout); 3671 3672 again: 3673 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3674 if (error != 0) 3675 return (error); 3676 umtxq_lock(&uq->uq_key); 3677 umtxq_busy(&uq->uq_key); 3678 umtxq_insert(uq); 3679 umtxq_unlock(&uq->uq_key); 3680 rv = fueword32(&sem->_count, &count); 3681 if (rv == -1) { 3682 umtxq_lock(&uq->uq_key); 3683 umtxq_unbusy(&uq->uq_key); 3684 umtxq_remove(uq); 3685 umtxq_unlock(&uq->uq_key); 3686 umtx_key_release(&uq->uq_key); 3687 return (EFAULT); 3688 } 3689 for (;;) { 3690 if (USEM_COUNT(count) != 0) { 3691 umtxq_lock(&uq->uq_key); 3692 umtxq_unbusy(&uq->uq_key); 3693 umtxq_remove(uq); 3694 umtxq_unlock(&uq->uq_key); 3695 umtx_key_release(&uq->uq_key); 3696 return (0); 3697 } 3698 if (count == USEM_HAS_WAITERS) 3699 break; 3700 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3701 if (rv == 0) 3702 break; 3703 umtxq_lock(&uq->uq_key); 3704 umtxq_unbusy(&uq->uq_key); 3705 umtxq_remove(uq); 3706 umtxq_unlock(&uq->uq_key); 3707 umtx_key_release(&uq->uq_key); 3708 if (rv == -1) 3709 return (EFAULT); 3710 rv = thread_check_susp(td, true); 3711 if (rv != 0) 3712 return (rv); 3713 goto again; 3714 } 3715 umtxq_lock(&uq->uq_key); 3716 umtxq_unbusy(&uq->uq_key); 3717 3718 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3719 3720 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3721 error = 0; 3722 else { 3723 umtxq_remove(uq); 3724 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3725 /* A relative timeout cannot be restarted. */ 3726 if (error == ERESTART) 3727 error = EINTR; 3728 if (error == EINTR) { 3729 kern_clock_gettime(curthread, timo.clockid, 3730 &timo.cur); 3731 timespecsub(&timo.end, &timo.cur, 3732 &timeout->_timeout); 3733 } 3734 } 3735 } 3736 umtxq_unlock(&uq->uq_key); 3737 umtx_key_release(&uq->uq_key); 3738 return (error); 3739 } 3740 3741 /* 3742 * Signal a userland semaphore. 3743 */ 3744 static int 3745 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3746 { 3747 struct umtx_key key; 3748 int error, cnt, rv; 3749 uint32_t count, flags; 3750 3751 rv = fueword32(&sem->_flags, &flags); 3752 if (rv == -1) 3753 return (EFAULT); 3754 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3755 return (error); 3756 umtxq_lock(&key); 3757 umtxq_busy(&key); 3758 cnt = umtxq_count(&key); 3759 if (cnt > 0) { 3760 /* 3761 * If this was the last sleeping thread, clear the waiters 3762 * flag in _count. 3763 */ 3764 if (cnt == 1) { 3765 umtxq_unlock(&key); 3766 rv = fueword32(&sem->_count, &count); 3767 while (rv != -1 && count & USEM_HAS_WAITERS) { 3768 rv = casueword32(&sem->_count, count, &count, 3769 count & ~USEM_HAS_WAITERS); 3770 if (rv == 1) { 3771 rv = thread_check_susp(td, true); 3772 if (rv != 0) 3773 break; 3774 } 3775 } 3776 if (rv == -1) 3777 error = EFAULT; 3778 else if (rv > 0) { 3779 error = rv; 3780 } 3781 umtxq_lock(&key); 3782 } 3783 3784 umtxq_signal(&key, 1); 3785 } 3786 umtxq_unbusy(&key); 3787 umtxq_unlock(&key); 3788 umtx_key_release(&key); 3789 return (error); 3790 } 3791 3792 #ifdef COMPAT_FREEBSD10 3793 int 3794 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3795 { 3796 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3797 } 3798 3799 int 3800 freebsd10__umtx_unlock(struct thread *td, 3801 struct freebsd10__umtx_unlock_args *uap) 3802 { 3803 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3804 } 3805 #endif 3806 3807 inline int 3808 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3809 { 3810 int error; 3811 3812 error = copyin(uaddr, tsp, sizeof(*tsp)); 3813 if (error == 0) { 3814 if (!timespecvalid_interval(tsp)) 3815 error = EINVAL; 3816 } 3817 return (error); 3818 } 3819 3820 static inline int 3821 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3822 { 3823 int error; 3824 3825 if (size <= sizeof(tp->_timeout)) { 3826 tp->_clockid = CLOCK_REALTIME; 3827 tp->_flags = 0; 3828 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3829 } else 3830 error = copyin(uaddr, tp, sizeof(*tp)); 3831 if (error != 0) 3832 return (error); 3833 if (!timespecvalid_interval(&tp->_timeout)) 3834 return (EINVAL); 3835 return (0); 3836 } 3837 3838 static int 3839 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3840 struct umtx_robust_lists_params *rb) 3841 { 3842 3843 if (size > sizeof(*rb)) 3844 return (EINVAL); 3845 return (copyin(uaddr, rb, size)); 3846 } 3847 3848 static int 3849 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3850 { 3851 3852 /* 3853 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3854 * and we're only called if sz >= sizeof(timespec) as supplied in the 3855 * copyops. 3856 */ 3857 KASSERT(sz >= sizeof(*tsp), 3858 ("umtx_copyops specifies incorrect sizes")); 3859 3860 return (copyout(tsp, uaddr, sizeof(*tsp))); 3861 } 3862 3863 #ifdef COMPAT_FREEBSD10 3864 static int 3865 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3866 const struct umtx_copyops *ops) 3867 { 3868 struct timespec *ts, timeout; 3869 int error; 3870 3871 /* Allow a null timespec (wait forever). */ 3872 if (uap->uaddr2 == NULL) 3873 ts = NULL; 3874 else { 3875 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3876 if (error != 0) 3877 return (error); 3878 ts = &timeout; 3879 } 3880 #ifdef COMPAT_FREEBSD32 3881 if (ops->compat32) 3882 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3883 #endif 3884 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3885 } 3886 3887 static int 3888 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3889 const struct umtx_copyops *ops) 3890 { 3891 #ifdef COMPAT_FREEBSD32 3892 if (ops->compat32) 3893 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3894 #endif 3895 return (do_unlock_umtx(td, uap->obj, uap->val)); 3896 } 3897 #endif /* COMPAT_FREEBSD10 */ 3898 3899 #if !defined(COMPAT_FREEBSD10) 3900 static int 3901 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3902 const struct umtx_copyops *ops __unused) 3903 { 3904 return (EOPNOTSUPP); 3905 } 3906 #endif /* COMPAT_FREEBSD10 */ 3907 3908 static int 3909 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3910 const struct umtx_copyops *ops) 3911 { 3912 struct _umtx_time timeout, *tm_p; 3913 int error; 3914 3915 if (uap->uaddr2 == NULL) 3916 tm_p = NULL; 3917 else { 3918 error = ops->copyin_umtx_time( 3919 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3920 if (error != 0) 3921 return (error); 3922 tm_p = &timeout; 3923 } 3924 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3925 } 3926 3927 static int 3928 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3929 const struct umtx_copyops *ops) 3930 { 3931 struct _umtx_time timeout, *tm_p; 3932 int error; 3933 3934 if (uap->uaddr2 == NULL) 3935 tm_p = NULL; 3936 else { 3937 error = ops->copyin_umtx_time( 3938 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3939 if (error != 0) 3940 return (error); 3941 tm_p = &timeout; 3942 } 3943 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3944 } 3945 3946 static int 3947 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3948 const struct umtx_copyops *ops) 3949 { 3950 struct _umtx_time *tm_p, timeout; 3951 int error; 3952 3953 if (uap->uaddr2 == NULL) 3954 tm_p = NULL; 3955 else { 3956 error = ops->copyin_umtx_time( 3957 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3958 if (error != 0) 3959 return (error); 3960 tm_p = &timeout; 3961 } 3962 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3963 } 3964 3965 static int 3966 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3967 const struct umtx_copyops *ops __unused) 3968 { 3969 3970 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3971 } 3972 3973 #define BATCH_SIZE 128 3974 static int 3975 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3976 { 3977 char *uaddrs[BATCH_SIZE], **upp; 3978 int count, error, i, pos, tocopy; 3979 3980 upp = (char **)uap->obj; 3981 error = 0; 3982 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3983 pos += tocopy) { 3984 tocopy = MIN(count, BATCH_SIZE); 3985 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3986 if (error != 0) 3987 break; 3988 for (i = 0; i < tocopy; ++i) { 3989 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3990 } 3991 maybe_yield(); 3992 } 3993 return (error); 3994 } 3995 3996 static int 3997 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3998 { 3999 uint32_t uaddrs[BATCH_SIZE], *upp; 4000 int count, error, i, pos, tocopy; 4001 4002 upp = (uint32_t *)uap->obj; 4003 error = 0; 4004 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4005 pos += tocopy) { 4006 tocopy = MIN(count, BATCH_SIZE); 4007 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4008 if (error != 0) 4009 break; 4010 for (i = 0; i < tocopy; ++i) { 4011 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 4012 INT_MAX, 1); 4013 } 4014 maybe_yield(); 4015 } 4016 return (error); 4017 } 4018 4019 static int 4020 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4021 const struct umtx_copyops *ops) 4022 { 4023 4024 if (ops->compat32) 4025 return (__umtx_op_nwake_private_compat32(td, uap)); 4026 return (__umtx_op_nwake_private_native(td, uap)); 4027 } 4028 4029 static int 4030 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4031 const struct umtx_copyops *ops __unused) 4032 { 4033 4034 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4035 } 4036 4037 static int 4038 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4039 const struct umtx_copyops *ops) 4040 { 4041 struct _umtx_time *tm_p, timeout; 4042 int error; 4043 4044 /* Allow a null timespec (wait forever). */ 4045 if (uap->uaddr2 == NULL) 4046 tm_p = NULL; 4047 else { 4048 error = ops->copyin_umtx_time( 4049 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4050 if (error != 0) 4051 return (error); 4052 tm_p = &timeout; 4053 } 4054 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4055 } 4056 4057 static int 4058 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4059 const struct umtx_copyops *ops __unused) 4060 { 4061 4062 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4063 } 4064 4065 static int 4066 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4067 const struct umtx_copyops *ops) 4068 { 4069 struct _umtx_time *tm_p, timeout; 4070 int error; 4071 4072 /* Allow a null timespec (wait forever). */ 4073 if (uap->uaddr2 == NULL) 4074 tm_p = NULL; 4075 else { 4076 error = ops->copyin_umtx_time( 4077 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4078 if (error != 0) 4079 return (error); 4080 tm_p = &timeout; 4081 } 4082 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4083 } 4084 4085 static int 4086 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4087 const struct umtx_copyops *ops __unused) 4088 { 4089 4090 return (do_wake_umutex(td, uap->obj)); 4091 } 4092 4093 static int 4094 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4095 const struct umtx_copyops *ops __unused) 4096 { 4097 4098 return (do_unlock_umutex(td, uap->obj, false)); 4099 } 4100 4101 static int 4102 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4103 const struct umtx_copyops *ops __unused) 4104 { 4105 4106 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4107 } 4108 4109 static int 4110 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4111 const struct umtx_copyops *ops) 4112 { 4113 struct timespec *ts, timeout; 4114 int error; 4115 4116 /* Allow a null timespec (wait forever). */ 4117 if (uap->uaddr2 == NULL) 4118 ts = NULL; 4119 else { 4120 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4121 if (error != 0) 4122 return (error); 4123 ts = &timeout; 4124 } 4125 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4126 } 4127 4128 static int 4129 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4130 const struct umtx_copyops *ops __unused) 4131 { 4132 4133 return (do_cv_signal(td, uap->obj)); 4134 } 4135 4136 static int 4137 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4138 const struct umtx_copyops *ops __unused) 4139 { 4140 4141 return (do_cv_broadcast(td, uap->obj)); 4142 } 4143 4144 static int 4145 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4146 const struct umtx_copyops *ops) 4147 { 4148 struct _umtx_time timeout; 4149 int error; 4150 4151 /* Allow a null timespec (wait forever). */ 4152 if (uap->uaddr2 == NULL) { 4153 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4154 } else { 4155 error = ops->copyin_umtx_time(uap->uaddr2, 4156 (size_t)uap->uaddr1, &timeout); 4157 if (error != 0) 4158 return (error); 4159 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4160 } 4161 return (error); 4162 } 4163 4164 static int 4165 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4166 const struct umtx_copyops *ops) 4167 { 4168 struct _umtx_time timeout; 4169 int error; 4170 4171 /* Allow a null timespec (wait forever). */ 4172 if (uap->uaddr2 == NULL) { 4173 error = do_rw_wrlock(td, uap->obj, 0); 4174 } else { 4175 error = ops->copyin_umtx_time(uap->uaddr2, 4176 (size_t)uap->uaddr1, &timeout); 4177 if (error != 0) 4178 return (error); 4179 4180 error = do_rw_wrlock(td, uap->obj, &timeout); 4181 } 4182 return (error); 4183 } 4184 4185 static int 4186 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4187 const struct umtx_copyops *ops __unused) 4188 { 4189 4190 return (do_rw_unlock(td, uap->obj)); 4191 } 4192 4193 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4194 static int 4195 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4196 const struct umtx_copyops *ops) 4197 { 4198 struct _umtx_time *tm_p, timeout; 4199 int error; 4200 4201 /* Allow a null timespec (wait forever). */ 4202 if (uap->uaddr2 == NULL) 4203 tm_p = NULL; 4204 else { 4205 error = ops->copyin_umtx_time( 4206 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4207 if (error != 0) 4208 return (error); 4209 tm_p = &timeout; 4210 } 4211 return (do_sem_wait(td, uap->obj, tm_p)); 4212 } 4213 4214 static int 4215 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4216 const struct umtx_copyops *ops __unused) 4217 { 4218 4219 return (do_sem_wake(td, uap->obj)); 4220 } 4221 #endif 4222 4223 static int 4224 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4225 const struct umtx_copyops *ops __unused) 4226 { 4227 4228 return (do_wake2_umutex(td, uap->obj, uap->val)); 4229 } 4230 4231 static int 4232 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4233 const struct umtx_copyops *ops) 4234 { 4235 struct _umtx_time *tm_p, timeout; 4236 size_t uasize; 4237 int error; 4238 4239 /* Allow a null timespec (wait forever). */ 4240 if (uap->uaddr2 == NULL) { 4241 uasize = 0; 4242 tm_p = NULL; 4243 } else { 4244 uasize = (size_t)uap->uaddr1; 4245 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4246 if (error != 0) 4247 return (error); 4248 tm_p = &timeout; 4249 } 4250 error = do_sem2_wait(td, uap->obj, tm_p); 4251 if (error == EINTR && uap->uaddr2 != NULL && 4252 (timeout._flags & UMTX_ABSTIME) == 0 && 4253 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4254 error = ops->copyout_timeout( 4255 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4256 uasize - ops->umtx_time_sz, &timeout._timeout); 4257 if (error == 0) { 4258 error = EINTR; 4259 } 4260 } 4261 4262 return (error); 4263 } 4264 4265 static int 4266 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4267 const struct umtx_copyops *ops __unused) 4268 { 4269 4270 return (do_sem2_wake(td, uap->obj)); 4271 } 4272 4273 #define USHM_OBJ_UMTX(o) \ 4274 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4275 4276 #define USHMF_REG_LINKED 0x0001 4277 #define USHMF_OBJ_LINKED 0x0002 4278 struct umtx_shm_reg { 4279 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4280 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4281 struct umtx_key ushm_key; 4282 struct ucred *ushm_cred; 4283 struct shmfd *ushm_obj; 4284 u_int ushm_refcnt; 4285 u_int ushm_flags; 4286 }; 4287 4288 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4289 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4290 4291 static uma_zone_t umtx_shm_reg_zone; 4292 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4293 static struct mtx umtx_shm_lock; 4294 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4295 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4296 4297 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4298 4299 static void 4300 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4301 { 4302 struct umtx_shm_reg_head d; 4303 struct umtx_shm_reg *reg, *reg1; 4304 4305 TAILQ_INIT(&d); 4306 mtx_lock(&umtx_shm_lock); 4307 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4308 mtx_unlock(&umtx_shm_lock); 4309 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4310 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4311 umtx_shm_free_reg(reg); 4312 } 4313 } 4314 4315 static struct task umtx_shm_reg_delfree_task = 4316 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4317 4318 static struct umtx_shm_reg * 4319 umtx_shm_find_reg_locked(const struct umtx_key *key) 4320 { 4321 struct umtx_shm_reg *reg; 4322 struct umtx_shm_reg_head *reg_head; 4323 4324 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4325 mtx_assert(&umtx_shm_lock, MA_OWNED); 4326 reg_head = &umtx_shm_registry[key->hash]; 4327 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4328 KASSERT(reg->ushm_key.shared, 4329 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4330 if (reg->ushm_key.info.shared.object == 4331 key->info.shared.object && 4332 reg->ushm_key.info.shared.offset == 4333 key->info.shared.offset) { 4334 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4335 KASSERT(reg->ushm_refcnt > 0, 4336 ("reg %p refcnt 0 onlist", reg)); 4337 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4338 ("reg %p not linked", reg)); 4339 reg->ushm_refcnt++; 4340 return (reg); 4341 } 4342 } 4343 return (NULL); 4344 } 4345 4346 static struct umtx_shm_reg * 4347 umtx_shm_find_reg(const struct umtx_key *key) 4348 { 4349 struct umtx_shm_reg *reg; 4350 4351 mtx_lock(&umtx_shm_lock); 4352 reg = umtx_shm_find_reg_locked(key); 4353 mtx_unlock(&umtx_shm_lock); 4354 return (reg); 4355 } 4356 4357 static void 4358 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4359 { 4360 4361 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4362 crfree(reg->ushm_cred); 4363 shm_drop(reg->ushm_obj); 4364 uma_zfree(umtx_shm_reg_zone, reg); 4365 } 4366 4367 static bool 4368 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4369 { 4370 bool res; 4371 4372 mtx_assert(&umtx_shm_lock, MA_OWNED); 4373 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4374 reg->ushm_refcnt--; 4375 res = reg->ushm_refcnt == 0; 4376 if (res || force) { 4377 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4378 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4379 reg, ushm_reg_link); 4380 reg->ushm_flags &= ~USHMF_REG_LINKED; 4381 } 4382 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4383 LIST_REMOVE(reg, ushm_obj_link); 4384 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4385 } 4386 } 4387 return (res); 4388 } 4389 4390 static void 4391 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4392 { 4393 vm_object_t object; 4394 bool dofree; 4395 4396 if (force) { 4397 object = reg->ushm_obj->shm_object; 4398 VM_OBJECT_WLOCK(object); 4399 vm_object_set_flag(object, OBJ_UMTXDEAD); 4400 VM_OBJECT_WUNLOCK(object); 4401 } 4402 mtx_lock(&umtx_shm_lock); 4403 dofree = umtx_shm_unref_reg_locked(reg, force); 4404 mtx_unlock(&umtx_shm_lock); 4405 if (dofree) 4406 umtx_shm_free_reg(reg); 4407 } 4408 4409 void 4410 umtx_shm_object_init(vm_object_t object) 4411 { 4412 4413 LIST_INIT(USHM_OBJ_UMTX(object)); 4414 } 4415 4416 void 4417 umtx_shm_object_terminated(vm_object_t object) 4418 { 4419 struct umtx_shm_reg *reg, *reg1; 4420 bool dofree; 4421 4422 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4423 return; 4424 4425 dofree = false; 4426 mtx_lock(&umtx_shm_lock); 4427 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4428 if (umtx_shm_unref_reg_locked(reg, true)) { 4429 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4430 ushm_reg_link); 4431 dofree = true; 4432 } 4433 } 4434 mtx_unlock(&umtx_shm_lock); 4435 if (dofree) 4436 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4437 } 4438 4439 static int 4440 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4441 struct umtx_shm_reg **res) 4442 { 4443 struct umtx_shm_reg *reg, *reg1; 4444 struct ucred *cred; 4445 int error; 4446 4447 reg = umtx_shm_find_reg(key); 4448 if (reg != NULL) { 4449 *res = reg; 4450 return (0); 4451 } 4452 cred = td->td_ucred; 4453 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4454 return (ENOMEM); 4455 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4456 reg->ushm_refcnt = 1; 4457 bcopy(key, ®->ushm_key, sizeof(*key)); 4458 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4459 reg->ushm_cred = crhold(cred); 4460 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4461 if (error != 0) { 4462 umtx_shm_free_reg(reg); 4463 return (error); 4464 } 4465 mtx_lock(&umtx_shm_lock); 4466 reg1 = umtx_shm_find_reg_locked(key); 4467 if (reg1 != NULL) { 4468 mtx_unlock(&umtx_shm_lock); 4469 umtx_shm_free_reg(reg); 4470 *res = reg1; 4471 return (0); 4472 } 4473 reg->ushm_refcnt++; 4474 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4475 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4476 ushm_obj_link); 4477 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4478 mtx_unlock(&umtx_shm_lock); 4479 *res = reg; 4480 return (0); 4481 } 4482 4483 static int 4484 umtx_shm_alive(struct thread *td, void *addr) 4485 { 4486 vm_map_t map; 4487 vm_map_entry_t entry; 4488 vm_object_t object; 4489 vm_pindex_t pindex; 4490 vm_prot_t prot; 4491 int res, ret; 4492 boolean_t wired; 4493 4494 map = &td->td_proc->p_vmspace->vm_map; 4495 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4496 &object, &pindex, &prot, &wired); 4497 if (res != KERN_SUCCESS) 4498 return (EFAULT); 4499 if (object == NULL) 4500 ret = EINVAL; 4501 else 4502 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4503 vm_map_lookup_done(map, entry); 4504 return (ret); 4505 } 4506 4507 static void 4508 umtx_shm_init(void) 4509 { 4510 int i; 4511 4512 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4513 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4514 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4515 for (i = 0; i < nitems(umtx_shm_registry); i++) 4516 TAILQ_INIT(&umtx_shm_registry[i]); 4517 } 4518 4519 static int 4520 umtx_shm(struct thread *td, void *addr, u_int flags) 4521 { 4522 struct umtx_key key; 4523 struct umtx_shm_reg *reg; 4524 struct file *fp; 4525 int error, fd; 4526 4527 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4528 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4529 return (EINVAL); 4530 if ((flags & UMTX_SHM_ALIVE) != 0) 4531 return (umtx_shm_alive(td, addr)); 4532 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4533 if (error != 0) 4534 return (error); 4535 KASSERT(key.shared == 1, ("non-shared key")); 4536 if ((flags & UMTX_SHM_CREAT) != 0) { 4537 error = umtx_shm_create_reg(td, &key, ®); 4538 } else { 4539 reg = umtx_shm_find_reg(&key); 4540 if (reg == NULL) 4541 error = ESRCH; 4542 } 4543 umtx_key_release(&key); 4544 if (error != 0) 4545 return (error); 4546 KASSERT(reg != NULL, ("no reg")); 4547 if ((flags & UMTX_SHM_DESTROY) != 0) { 4548 umtx_shm_unref_reg(reg, true); 4549 } else { 4550 #if 0 4551 #ifdef MAC 4552 error = mac_posixshm_check_open(td->td_ucred, 4553 reg->ushm_obj, FFLAGS(O_RDWR)); 4554 if (error == 0) 4555 #endif 4556 error = shm_access(reg->ushm_obj, td->td_ucred, 4557 FFLAGS(O_RDWR)); 4558 if (error == 0) 4559 #endif 4560 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4561 if (error == 0) { 4562 shm_hold(reg->ushm_obj); 4563 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4564 &shm_ops); 4565 td->td_retval[0] = fd; 4566 fdrop(fp, td); 4567 } 4568 } 4569 umtx_shm_unref_reg(reg, false); 4570 return (error); 4571 } 4572 4573 static int 4574 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4575 const struct umtx_copyops *ops __unused) 4576 { 4577 4578 return (umtx_shm(td, uap->uaddr1, uap->val)); 4579 } 4580 4581 static int 4582 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4583 const struct umtx_copyops *ops) 4584 { 4585 struct umtx_robust_lists_params rb; 4586 int error; 4587 4588 if (ops->compat32) { 4589 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4590 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4591 td->td_rb_inact != 0)) 4592 return (EBUSY); 4593 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4594 return (EBUSY); 4595 } 4596 4597 bzero(&rb, sizeof(rb)); 4598 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4599 if (error != 0) 4600 return (error); 4601 4602 if (ops->compat32) 4603 td->td_pflags2 |= TDP2_COMPAT32RB; 4604 4605 td->td_rb_list = rb.robust_list_offset; 4606 td->td_rbp_list = rb.robust_priv_list_offset; 4607 td->td_rb_inact = rb.robust_inact_offset; 4608 return (0); 4609 } 4610 4611 static int 4612 __umtx_op_get_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4613 const struct umtx_copyops *ops) 4614 { 4615 long val; 4616 int error, val1; 4617 4618 val = sbttons(td->td_proc->p_umtx_min_timeout); 4619 if (ops->compat32) { 4620 val1 = (int)val; 4621 error = copyout(&val1, uap->uaddr1, sizeof(val1)); 4622 } else { 4623 error = copyout(&val, uap->uaddr1, sizeof(val)); 4624 } 4625 return (error); 4626 } 4627 4628 static int 4629 __umtx_op_set_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4630 const struct umtx_copyops *ops) 4631 { 4632 if (uap->val < 0) 4633 return (EINVAL); 4634 td->td_proc->p_umtx_min_timeout = nstosbt(uap->val); 4635 return (0); 4636 } 4637 4638 #if defined(__i386__) || defined(__amd64__) 4639 /* 4640 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4641 * 32-bit time_t there. Other architectures just need the i386 definitions 4642 * along with their standard compat32. 4643 */ 4644 struct timespecx32 { 4645 int64_t tv_sec; 4646 int32_t tv_nsec; 4647 }; 4648 4649 struct umtx_timex32 { 4650 struct timespecx32 _timeout; 4651 uint32_t _flags; 4652 uint32_t _clockid; 4653 }; 4654 4655 #ifndef __i386__ 4656 #define timespeci386 timespec32 4657 #define umtx_timei386 umtx_time32 4658 #endif 4659 #else /* !__i386__ && !__amd64__ */ 4660 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4661 struct timespeci386 { 4662 int32_t tv_sec; 4663 int32_t tv_nsec; 4664 }; 4665 4666 struct umtx_timei386 { 4667 struct timespeci386 _timeout; 4668 uint32_t _flags; 4669 uint32_t _clockid; 4670 }; 4671 4672 #if defined(__LP64__) 4673 #define timespecx32 timespec32 4674 #define umtx_timex32 umtx_time32 4675 #endif 4676 #endif 4677 4678 static int 4679 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4680 struct umtx_robust_lists_params *rbp) 4681 { 4682 struct umtx_robust_lists_params_compat32 rb32; 4683 int error; 4684 4685 if (size > sizeof(rb32)) 4686 return (EINVAL); 4687 bzero(&rb32, sizeof(rb32)); 4688 error = copyin(uaddr, &rb32, size); 4689 if (error != 0) 4690 return (error); 4691 CP(rb32, *rbp, robust_list_offset); 4692 CP(rb32, *rbp, robust_priv_list_offset); 4693 CP(rb32, *rbp, robust_inact_offset); 4694 return (0); 4695 } 4696 4697 #ifndef __i386__ 4698 static inline int 4699 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4700 { 4701 struct timespeci386 ts32; 4702 int error; 4703 4704 error = copyin(uaddr, &ts32, sizeof(ts32)); 4705 if (error == 0) { 4706 if (!timespecvalid_interval(&ts32)) 4707 error = EINVAL; 4708 else { 4709 CP(ts32, *tsp, tv_sec); 4710 CP(ts32, *tsp, tv_nsec); 4711 } 4712 } 4713 return (error); 4714 } 4715 4716 static inline int 4717 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4718 { 4719 struct umtx_timei386 t32; 4720 int error; 4721 4722 t32._clockid = CLOCK_REALTIME; 4723 t32._flags = 0; 4724 if (size <= sizeof(t32._timeout)) 4725 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4726 else 4727 error = copyin(uaddr, &t32, sizeof(t32)); 4728 if (error != 0) 4729 return (error); 4730 if (!timespecvalid_interval(&t32._timeout)) 4731 return (EINVAL); 4732 TS_CP(t32, *tp, _timeout); 4733 CP(t32, *tp, _flags); 4734 CP(t32, *tp, _clockid); 4735 return (0); 4736 } 4737 4738 static int 4739 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4740 { 4741 struct timespeci386 remain32 = { 4742 .tv_sec = tsp->tv_sec, 4743 .tv_nsec = tsp->tv_nsec, 4744 }; 4745 4746 /* 4747 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4748 * and we're only called if sz >= sizeof(timespec) as supplied in the 4749 * copyops. 4750 */ 4751 KASSERT(sz >= sizeof(remain32), 4752 ("umtx_copyops specifies incorrect sizes")); 4753 4754 return (copyout(&remain32, uaddr, sizeof(remain32))); 4755 } 4756 #endif /* !__i386__ */ 4757 4758 #if defined(__i386__) || defined(__LP64__) 4759 static inline int 4760 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4761 { 4762 struct timespecx32 ts32; 4763 int error; 4764 4765 error = copyin(uaddr, &ts32, sizeof(ts32)); 4766 if (error == 0) { 4767 if (!timespecvalid_interval(&ts32)) 4768 error = EINVAL; 4769 else { 4770 CP(ts32, *tsp, tv_sec); 4771 CP(ts32, *tsp, tv_nsec); 4772 } 4773 } 4774 return (error); 4775 } 4776 4777 static inline int 4778 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4779 { 4780 struct umtx_timex32 t32; 4781 int error; 4782 4783 t32._clockid = CLOCK_REALTIME; 4784 t32._flags = 0; 4785 if (size <= sizeof(t32._timeout)) 4786 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4787 else 4788 error = copyin(uaddr, &t32, sizeof(t32)); 4789 if (error != 0) 4790 return (error); 4791 if (!timespecvalid_interval(&t32._timeout)) 4792 return (EINVAL); 4793 TS_CP(t32, *tp, _timeout); 4794 CP(t32, *tp, _flags); 4795 CP(t32, *tp, _clockid); 4796 return (0); 4797 } 4798 4799 static int 4800 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4801 { 4802 struct timespecx32 remain32 = { 4803 .tv_sec = tsp->tv_sec, 4804 .tv_nsec = tsp->tv_nsec, 4805 }; 4806 4807 /* 4808 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4809 * and we're only called if sz >= sizeof(timespec) as supplied in the 4810 * copyops. 4811 */ 4812 KASSERT(sz >= sizeof(remain32), 4813 ("umtx_copyops specifies incorrect sizes")); 4814 4815 return (copyout(&remain32, uaddr, sizeof(remain32))); 4816 } 4817 #endif /* __i386__ || __LP64__ */ 4818 4819 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4820 const struct umtx_copyops *umtx_ops); 4821 4822 static const _umtx_op_func op_table[] = { 4823 #ifdef COMPAT_FREEBSD10 4824 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4825 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4826 #else 4827 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4828 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4829 #endif 4830 [UMTX_OP_WAIT] = __umtx_op_wait, 4831 [UMTX_OP_WAKE] = __umtx_op_wake, 4832 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4833 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4834 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4835 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4836 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4837 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4838 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4839 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4840 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4841 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4842 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4843 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4844 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4845 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4846 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4847 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4848 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4849 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4850 #else 4851 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4852 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4853 #endif 4854 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4855 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4856 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4857 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4858 [UMTX_OP_SHM] = __umtx_op_shm, 4859 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4860 [UMTX_OP_GET_MIN_TIMEOUT] = __umtx_op_get_min_timeout, 4861 [UMTX_OP_SET_MIN_TIMEOUT] = __umtx_op_set_min_timeout, 4862 }; 4863 4864 static const struct umtx_copyops umtx_native_ops = { 4865 .copyin_timeout = umtx_copyin_timeout, 4866 .copyin_umtx_time = umtx_copyin_umtx_time, 4867 .copyin_robust_lists = umtx_copyin_robust_lists, 4868 .copyout_timeout = umtx_copyout_timeout, 4869 .timespec_sz = sizeof(struct timespec), 4870 .umtx_time_sz = sizeof(struct _umtx_time), 4871 }; 4872 4873 #ifndef __i386__ 4874 static const struct umtx_copyops umtx_native_opsi386 = { 4875 .copyin_timeout = umtx_copyin_timeouti386, 4876 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4877 .copyin_robust_lists = umtx_copyin_robust_lists32, 4878 .copyout_timeout = umtx_copyout_timeouti386, 4879 .timespec_sz = sizeof(struct timespeci386), 4880 .umtx_time_sz = sizeof(struct umtx_timei386), 4881 .compat32 = true, 4882 }; 4883 #endif 4884 4885 #if defined(__i386__) || defined(__LP64__) 4886 /* i386 can emulate other 32-bit archs, too! */ 4887 static const struct umtx_copyops umtx_native_opsx32 = { 4888 .copyin_timeout = umtx_copyin_timeoutx32, 4889 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4890 .copyin_robust_lists = umtx_copyin_robust_lists32, 4891 .copyout_timeout = umtx_copyout_timeoutx32, 4892 .timespec_sz = sizeof(struct timespecx32), 4893 .umtx_time_sz = sizeof(struct umtx_timex32), 4894 .compat32 = true, 4895 }; 4896 4897 #ifdef COMPAT_FREEBSD32 4898 #ifdef __amd64__ 4899 #define umtx_native_ops32 umtx_native_opsi386 4900 #else 4901 #define umtx_native_ops32 umtx_native_opsx32 4902 #endif 4903 #endif /* COMPAT_FREEBSD32 */ 4904 #endif /* __i386__ || __LP64__ */ 4905 4906 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4907 4908 static int 4909 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4910 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4911 { 4912 struct _umtx_op_args uap = { 4913 .obj = obj, 4914 .op = op & ~UMTX_OP__FLAGS, 4915 .val = val, 4916 .uaddr1 = uaddr1, 4917 .uaddr2 = uaddr2 4918 }; 4919 4920 if ((uap.op >= nitems(op_table))) 4921 return (EINVAL); 4922 return ((*op_table[uap.op])(td, &uap, ops)); 4923 } 4924 4925 int 4926 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4927 { 4928 static const struct umtx_copyops *umtx_ops; 4929 4930 umtx_ops = &umtx_native_ops; 4931 #ifdef __LP64__ 4932 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4933 if ((uap->op & UMTX_OP__I386) != 0) 4934 umtx_ops = &umtx_native_opsi386; 4935 else 4936 umtx_ops = &umtx_native_opsx32; 4937 } 4938 #elif !defined(__i386__) 4939 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4940 if ((uap->op & UMTX_OP__I386) != 0) 4941 umtx_ops = &umtx_native_opsi386; 4942 #else 4943 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4944 if ((uap->op & UMTX_OP__32BIT) != 0) 4945 umtx_ops = &umtx_native_opsx32; 4946 #endif 4947 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4948 uap->uaddr2, umtx_ops)); 4949 } 4950 4951 #ifdef COMPAT_FREEBSD32 4952 #ifdef COMPAT_FREEBSD10 4953 int 4954 freebsd10_freebsd32__umtx_lock(struct thread *td, 4955 struct freebsd10_freebsd32__umtx_lock_args *uap) 4956 { 4957 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4958 } 4959 4960 int 4961 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4962 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4963 { 4964 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4965 } 4966 #endif /* COMPAT_FREEBSD10 */ 4967 4968 int 4969 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4970 { 4971 4972 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4973 uap->uaddr2, &umtx_native_ops32)); 4974 } 4975 #endif /* COMPAT_FREEBSD32 */ 4976 4977 void 4978 umtx_thread_init(struct thread *td) 4979 { 4980 4981 td->td_umtxq = umtxq_alloc(); 4982 td->td_umtxq->uq_thread = td; 4983 } 4984 4985 void 4986 umtx_thread_fini(struct thread *td) 4987 { 4988 4989 umtxq_free(td->td_umtxq); 4990 } 4991 4992 /* 4993 * It will be called when new thread is created, e.g fork(). 4994 */ 4995 void 4996 umtx_thread_alloc(struct thread *td) 4997 { 4998 struct umtx_q *uq; 4999 5000 uq = td->td_umtxq; 5001 uq->uq_inherited_pri = PRI_MAX; 5002 5003 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 5004 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 5005 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 5006 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 5007 } 5008 5009 /* 5010 * exec() hook. 5011 * 5012 * Clear robust lists for all process' threads, not delaying the 5013 * cleanup to thread exit, since the relevant address space is 5014 * destroyed right now. 5015 */ 5016 void 5017 umtx_exec(struct proc *p) 5018 { 5019 struct thread *td; 5020 5021 KASSERT(p == curproc, ("need curproc")); 5022 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 5023 (p->p_flag & P_STOPPED_SINGLE) != 0, 5024 ("curproc must be single-threaded")); 5025 /* 5026 * There is no need to lock the list as only this thread can be 5027 * running. 5028 */ 5029 FOREACH_THREAD_IN_PROC(p, td) { 5030 KASSERT(td == curthread || 5031 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 5032 ("running thread %p %p", p, td)); 5033 umtx_thread_cleanup(td); 5034 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 5035 } 5036 5037 p->p_umtx_min_timeout = 0; 5038 } 5039 5040 /* 5041 * thread exit hook. 5042 */ 5043 void 5044 umtx_thread_exit(struct thread *td) 5045 { 5046 5047 umtx_thread_cleanup(td); 5048 } 5049 5050 static int 5051 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5052 { 5053 u_long res1; 5054 uint32_t res32; 5055 int error; 5056 5057 if (compat32) { 5058 error = fueword32((void *)ptr, &res32); 5059 if (error == 0) 5060 res1 = res32; 5061 } else { 5062 error = fueword((void *)ptr, &res1); 5063 } 5064 if (error == 0) 5065 *res = res1; 5066 else 5067 error = EFAULT; 5068 return (error); 5069 } 5070 5071 static void 5072 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5073 bool compat32) 5074 { 5075 struct umutex32 m32; 5076 5077 if (compat32) { 5078 memcpy(&m32, m, sizeof(m32)); 5079 *rb_list = m32.m_rb_lnk; 5080 } else { 5081 *rb_list = m->m_rb_lnk; 5082 } 5083 } 5084 5085 static int 5086 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5087 bool compat32) 5088 { 5089 struct umutex m; 5090 int error; 5091 5092 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5093 error = copyin((void *)rbp, &m, sizeof(m)); 5094 if (error != 0) 5095 return (error); 5096 if (rb_list != NULL) 5097 umtx_read_rb_list(td, &m, rb_list, compat32); 5098 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5099 return (EINVAL); 5100 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5101 /* inact is cleared after unlock, allow the inconsistency */ 5102 return (inact ? 0 : EINVAL); 5103 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5104 } 5105 5106 static void 5107 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5108 const char *name, bool compat32) 5109 { 5110 int error, i; 5111 uintptr_t rbp; 5112 bool inact; 5113 5114 if (rb_list == 0) 5115 return; 5116 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5117 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5118 if (rbp == *rb_inact) { 5119 inact = true; 5120 *rb_inact = 0; 5121 } else 5122 inact = false; 5123 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5124 } 5125 if (i == umtx_max_rb && umtx_verbose_rb) { 5126 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5127 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5128 } 5129 if (error != 0 && umtx_verbose_rb) { 5130 uprintf("comm %s pid %d: handling %srb error %d\n", 5131 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5132 } 5133 } 5134 5135 /* 5136 * Clean up umtx data. 5137 */ 5138 static void 5139 umtx_thread_cleanup(struct thread *td) 5140 { 5141 struct umtx_q *uq; 5142 struct umtx_pi *pi; 5143 uintptr_t rb_inact; 5144 bool compat32; 5145 5146 /* 5147 * Disown pi mutexes. 5148 */ 5149 uq = td->td_umtxq; 5150 if (uq != NULL) { 5151 if (uq->uq_inherited_pri != PRI_MAX || 5152 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5153 mtx_lock(&umtx_lock); 5154 uq->uq_inherited_pri = PRI_MAX; 5155 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5156 pi->pi_owner = NULL; 5157 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5158 } 5159 mtx_unlock(&umtx_lock); 5160 } 5161 sched_lend_user_prio_cond(td, PRI_MAX); 5162 } 5163 5164 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5165 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5166 5167 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5168 return; 5169 5170 /* 5171 * Handle terminated robust mutexes. Must be done after 5172 * robust pi disown, otherwise unlock could see unowned 5173 * entries. 5174 */ 5175 rb_inact = td->td_rb_inact; 5176 if (rb_inact != 0) 5177 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5178 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5179 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5180 if (rb_inact != 0) 5181 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5182 } 5183