1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 #include "opt_umtx_profiling.h" 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/limits.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mman.h> 46 #include <sys/mutex.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/rwlock.h> 52 #include <sys/sbuf.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sysproto.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/taskqueue.h> 60 #include <sys/time.h> 61 #include <sys/eventhandler.h> 62 #include <sys/umtx.h> 63 #include <sys/umtxvar.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/uma.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 74 #include <machine/atomic.h> 75 #include <machine/cpu.h> 76 77 #include <compat/freebsd32/freebsd32.h> 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 91 #ifdef INVARIANTS 92 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 93 struct umtxq_chain *uc; \ 94 \ 95 uc = umtxq_getchain(key); \ 96 mtx_assert(&uc->uc_lock, MA_OWNED); \ 97 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 98 } while (0) 99 #else 100 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 101 #endif 102 103 /* 104 * Don't propagate time-sharing priority, there is a security reason, 105 * a user can simply introduce PI-mutex, let thread A lock the mutex, 106 * and let another thread B block on the mutex, because B is 107 * sleeping, its priority will be boosted, this causes A's priority to 108 * be boosted via priority propagating too and will never be lowered even 109 * if it is using 100%CPU, this is unfair to other processes. 110 */ 111 112 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 113 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 114 PRI_MAX_TIMESHARE : (td)->td_user_pri) 115 116 #define GOLDEN_RATIO_PRIME 2654404609U 117 #ifndef UMTX_CHAINS 118 #define UMTX_CHAINS 512 119 #endif 120 #define UMTX_SHIFTS (__WORD_BIT - 9) 121 122 #define GET_SHARE(flags) \ 123 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 124 125 #define BUSY_SPINS 200 126 127 struct umtx_copyops { 128 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 129 int (*copyin_umtx_time)(const void *uaddr, size_t size, 130 struct _umtx_time *tp); 131 int (*copyin_robust_lists)(const void *uaddr, size_t size, 132 struct umtx_robust_lists_params *rbp); 133 int (*copyout_timeout)(void *uaddr, size_t size, 134 struct timespec *tsp); 135 const size_t timespec_sz; 136 const size_t umtx_time_sz; 137 const bool compat32; 138 }; 139 140 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 141 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 142 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 143 144 int umtx_shm_vnobj_persistent = 0; 145 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 146 &umtx_shm_vnobj_persistent, 0, 147 "False forces destruction of umtx attached to file, on last close"); 148 static int umtx_max_rb = 1000; 149 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 150 &umtx_max_rb, 0, 151 "Maximum number of robust mutexes allowed for each thread"); 152 153 static uma_zone_t umtx_pi_zone; 154 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 155 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 156 static int umtx_pi_allocated; 157 158 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 159 "umtx debug"); 160 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 161 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 162 static int umtx_verbose_rb = 1; 163 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 164 &umtx_verbose_rb, 0, 165 ""); 166 167 #ifdef UMTX_PROFILING 168 static long max_length; 169 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 170 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 171 "umtx chain stats"); 172 #endif 173 174 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 175 const struct _umtx_time *umtxtime); 176 177 static void umtx_shm_init(void); 178 static void umtxq_sysinit(void *); 179 static void umtxq_hash(struct umtx_key *key); 180 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 181 bool rb); 182 static void umtx_thread_cleanup(struct thread *td); 183 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 184 185 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 186 187 static struct mtx umtx_lock; 188 189 #ifdef UMTX_PROFILING 190 static void 191 umtx_init_profiling(void) 192 { 193 struct sysctl_oid *chain_oid; 194 char chain_name[10]; 195 int i; 196 197 for (i = 0; i < UMTX_CHAINS; ++i) { 198 snprintf(chain_name, sizeof(chain_name), "%d", i); 199 chain_oid = SYSCTL_ADD_NODE(NULL, 200 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 201 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 202 "umtx hash stats"); 203 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 204 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 206 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 207 } 208 } 209 210 static int 211 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 212 { 213 char buf[512]; 214 struct sbuf sb; 215 struct umtxq_chain *uc; 216 u_int fract, i, j, tot, whole; 217 u_int sf0, sf1, sf2, sf3, sf4; 218 u_int si0, si1, si2, si3, si4; 219 u_int sw0, sw1, sw2, sw3, sw4; 220 221 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 222 for (i = 0; i < 2; i++) { 223 tot = 0; 224 for (j = 0; j < UMTX_CHAINS; ++j) { 225 uc = &umtxq_chains[i][j]; 226 mtx_lock(&uc->uc_lock); 227 tot += uc->max_length; 228 mtx_unlock(&uc->uc_lock); 229 } 230 if (tot == 0) 231 sbuf_printf(&sb, "%u) Empty ", i); 232 else { 233 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 234 si0 = si1 = si2 = si3 = si4 = 0; 235 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 236 for (j = 0; j < UMTX_CHAINS; j++) { 237 uc = &umtxq_chains[i][j]; 238 mtx_lock(&uc->uc_lock); 239 whole = uc->max_length * 100; 240 mtx_unlock(&uc->uc_lock); 241 fract = (whole % tot) * 100; 242 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 243 sf0 = fract; 244 si0 = j; 245 sw0 = whole; 246 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 247 sf1)) { 248 sf1 = fract; 249 si1 = j; 250 sw1 = whole; 251 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 252 sf2)) { 253 sf2 = fract; 254 si2 = j; 255 sw2 = whole; 256 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 257 sf3)) { 258 sf3 = fract; 259 si3 = j; 260 sw3 = whole; 261 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 262 sf4)) { 263 sf4 = fract; 264 si4 = j; 265 sw4 = whole; 266 } 267 } 268 sbuf_printf(&sb, "queue %u:\n", i); 269 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 270 sf0 / tot, si0); 271 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 272 sf1 / tot, si1); 273 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 274 sf2 / tot, si2); 275 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 276 sf3 / tot, si3); 277 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 278 sf4 / tot, si4); 279 } 280 } 281 sbuf_trim(&sb); 282 sbuf_finish(&sb); 283 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 284 sbuf_delete(&sb); 285 return (0); 286 } 287 288 static int 289 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 290 { 291 struct umtxq_chain *uc; 292 u_int i, j; 293 int clear, error; 294 295 clear = 0; 296 error = sysctl_handle_int(oidp, &clear, 0, req); 297 if (error != 0 || req->newptr == NULL) 298 return (error); 299 300 if (clear != 0) { 301 for (i = 0; i < 2; ++i) { 302 for (j = 0; j < UMTX_CHAINS; ++j) { 303 uc = &umtxq_chains[i][j]; 304 mtx_lock(&uc->uc_lock); 305 uc->length = 0; 306 uc->max_length = 0; 307 mtx_unlock(&uc->uc_lock); 308 } 309 } 310 } 311 return (0); 312 } 313 314 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 315 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 316 sysctl_debug_umtx_chains_clear, "I", 317 "Clear umtx chains statistics"); 318 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 319 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 320 sysctl_debug_umtx_chains_peaks, "A", 321 "Highest peaks in chains max length"); 322 #endif 323 324 static void 325 umtxq_sysinit(void *arg __unused) 326 { 327 int i, j; 328 329 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 330 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 331 for (i = 0; i < 2; ++i) { 332 for (j = 0; j < UMTX_CHAINS; ++j) { 333 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 334 MTX_DEF | MTX_DUPOK); 335 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 337 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 338 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 339 umtxq_chains[i][j].uc_busy = 0; 340 umtxq_chains[i][j].uc_waiters = 0; 341 #ifdef UMTX_PROFILING 342 umtxq_chains[i][j].length = 0; 343 umtxq_chains[i][j].max_length = 0; 344 #endif 345 } 346 } 347 #ifdef UMTX_PROFILING 348 umtx_init_profiling(); 349 #endif 350 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 351 umtx_shm_init(); 352 } 353 354 struct umtx_q * 355 umtxq_alloc(void) 356 { 357 struct umtx_q *uq; 358 359 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 360 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 361 M_WAITOK | M_ZERO); 362 TAILQ_INIT(&uq->uq_spare_queue->head); 363 TAILQ_INIT(&uq->uq_pi_contested); 364 uq->uq_inherited_pri = PRI_MAX; 365 return (uq); 366 } 367 368 void 369 umtxq_free(struct umtx_q *uq) 370 { 371 372 MPASS(uq->uq_spare_queue != NULL); 373 free(uq->uq_spare_queue, M_UMTX); 374 free(uq, M_UMTX); 375 } 376 377 static inline void 378 umtxq_hash(struct umtx_key *key) 379 { 380 unsigned n; 381 382 n = (uintptr_t)key->info.both.a + key->info.both.b; 383 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 384 } 385 386 struct umtxq_chain * 387 umtxq_getchain(struct umtx_key *key) 388 { 389 390 if (key->type <= TYPE_SEM) 391 return (&umtxq_chains[1][key->hash]); 392 return (&umtxq_chains[0][key->hash]); 393 } 394 395 /* 396 * Set chain to busy state when following operation 397 * may be blocked (kernel mutex can not be used). 398 */ 399 void 400 umtxq_busy(struct umtx_key *key) 401 { 402 struct umtxq_chain *uc; 403 404 uc = umtxq_getchain(key); 405 mtx_assert(&uc->uc_lock, MA_OWNED); 406 if (uc->uc_busy) { 407 #ifdef SMP 408 if (smp_cpus > 1) { 409 int count = BUSY_SPINS; 410 if (count > 0) { 411 umtxq_unlock(key); 412 while (uc->uc_busy && --count > 0) 413 cpu_spinwait(); 414 umtxq_lock(key); 415 } 416 } 417 #endif 418 while (uc->uc_busy) { 419 uc->uc_waiters++; 420 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 421 uc->uc_waiters--; 422 } 423 } 424 uc->uc_busy = 1; 425 } 426 427 /* 428 * Unbusy a chain. 429 */ 430 void 431 umtxq_unbusy(struct umtx_key *key) 432 { 433 struct umtxq_chain *uc; 434 435 uc = umtxq_getchain(key); 436 mtx_assert(&uc->uc_lock, MA_OWNED); 437 KASSERT(uc->uc_busy != 0, ("not busy")); 438 uc->uc_busy = 0; 439 if (uc->uc_waiters) 440 wakeup_one(uc); 441 } 442 443 void 444 umtxq_busy_unlocked(struct umtx_key *key) 445 { 446 umtxq_lock(key); 447 umtxq_busy(key); 448 umtxq_unlock(key); 449 } 450 451 void 452 umtxq_unbusy_unlocked(struct umtx_key *key) 453 { 454 umtxq_lock(key); 455 umtxq_unbusy(key); 456 umtxq_unlock(key); 457 } 458 459 static struct umtxq_queue * 460 umtxq_queue_lookup(struct umtx_key *key, int q) 461 { 462 struct umtxq_queue *uh; 463 struct umtxq_chain *uc; 464 465 uc = umtxq_getchain(key); 466 UMTXQ_LOCKED_ASSERT(uc); 467 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 468 if (umtx_key_match(&uh->key, key)) 469 return (uh); 470 } 471 472 return (NULL); 473 } 474 475 void 476 umtxq_insert_queue(struct umtx_q *uq, int q) 477 { 478 struct umtxq_queue *uh; 479 struct umtxq_chain *uc; 480 481 uc = umtxq_getchain(&uq->uq_key); 482 UMTXQ_LOCKED_ASSERT(uc); 483 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 484 uh = umtxq_queue_lookup(&uq->uq_key, q); 485 if (uh != NULL) { 486 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 487 } else { 488 uh = uq->uq_spare_queue; 489 uh->key = uq->uq_key; 490 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 491 #ifdef UMTX_PROFILING 492 uc->length++; 493 if (uc->length > uc->max_length) { 494 uc->max_length = uc->length; 495 if (uc->max_length > max_length) 496 max_length = uc->max_length; 497 } 498 #endif 499 } 500 uq->uq_spare_queue = NULL; 501 502 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 503 uh->length++; 504 uq->uq_flags |= UQF_UMTXQ; 505 uq->uq_cur_queue = uh; 506 return; 507 } 508 509 void 510 umtxq_remove_queue(struct umtx_q *uq, int q) 511 { 512 struct umtxq_chain *uc; 513 struct umtxq_queue *uh; 514 515 uc = umtxq_getchain(&uq->uq_key); 516 UMTXQ_LOCKED_ASSERT(uc); 517 if (uq->uq_flags & UQF_UMTXQ) { 518 uh = uq->uq_cur_queue; 519 TAILQ_REMOVE(&uh->head, uq, uq_link); 520 uh->length--; 521 uq->uq_flags &= ~UQF_UMTXQ; 522 if (TAILQ_EMPTY(&uh->head)) { 523 KASSERT(uh->length == 0, 524 ("inconsistent umtxq_queue length")); 525 #ifdef UMTX_PROFILING 526 uc->length--; 527 #endif 528 LIST_REMOVE(uh, link); 529 } else { 530 uh = LIST_FIRST(&uc->uc_spare_queue); 531 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 532 LIST_REMOVE(uh, link); 533 } 534 uq->uq_spare_queue = uh; 535 uq->uq_cur_queue = NULL; 536 } 537 } 538 539 /* 540 * Check if there are multiple waiters 541 */ 542 int 543 umtxq_count(struct umtx_key *key) 544 { 545 struct umtxq_queue *uh; 546 547 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 548 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 549 if (uh != NULL) 550 return (uh->length); 551 return (0); 552 } 553 554 /* 555 * Check if there are multiple PI waiters and returns first 556 * waiter. 557 */ 558 static int 559 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 560 { 561 struct umtxq_queue *uh; 562 563 *first = NULL; 564 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 565 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 566 if (uh != NULL) { 567 *first = TAILQ_FIRST(&uh->head); 568 return (uh->length); 569 } 570 return (0); 571 } 572 573 /* 574 * Wake up threads waiting on an userland object by a bit mask. 575 */ 576 int 577 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 578 { 579 struct umtxq_queue *uh; 580 struct umtx_q *uq, *uq_temp; 581 int ret; 582 583 ret = 0; 584 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 585 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 586 if (uh == NULL) 587 return (0); 588 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 589 if ((uq->uq_bitset & bitset) == 0) 590 continue; 591 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 592 wakeup_one(uq); 593 if (++ret >= n_wake) 594 break; 595 } 596 return (ret); 597 } 598 599 /* 600 * Wake up threads waiting on an userland object. 601 */ 602 603 static int 604 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 605 { 606 struct umtxq_queue *uh; 607 struct umtx_q *uq; 608 int ret; 609 610 ret = 0; 611 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 612 uh = umtxq_queue_lookup(key, q); 613 if (uh != NULL) { 614 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 615 umtxq_remove_queue(uq, q); 616 wakeup(uq); 617 if (++ret >= n_wake) 618 return (ret); 619 } 620 } 621 return (ret); 622 } 623 624 /* 625 * Wake up specified thread. 626 */ 627 static inline void 628 umtxq_signal_thread(struct umtx_q *uq) 629 { 630 631 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 632 umtxq_remove(uq); 633 wakeup(uq); 634 } 635 636 /* 637 * Wake up a maximum of n_wake threads that are waiting on an userland 638 * object identified by key. The remaining threads are removed from queue 639 * identified by key and added to the queue identified by key2 (requeued). 640 * The n_requeue specifies an upper limit on the number of threads that 641 * are requeued to the second queue. 642 */ 643 int 644 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 645 int n_requeue) 646 { 647 struct umtxq_queue *uh; 648 struct umtx_q *uq, *uq_temp; 649 int ret; 650 651 ret = 0; 652 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 653 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 654 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 655 if (uh == NULL) 656 return (0); 657 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 658 if (++ret <= n_wake) { 659 umtxq_remove(uq); 660 wakeup_one(uq); 661 } else { 662 umtxq_remove(uq); 663 uq->uq_key = *key2; 664 umtxq_insert(uq); 665 if (ret - n_wake == n_requeue) 666 break; 667 } 668 } 669 return (ret); 670 } 671 672 static inline int 673 tstohz(const struct timespec *tsp) 674 { 675 struct timeval tv; 676 677 TIMESPEC_TO_TIMEVAL(&tv, tsp); 678 return tvtohz(&tv); 679 } 680 681 void 682 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 683 int absolute, const struct timespec *timeout) 684 { 685 686 timo->clockid = clockid; 687 if (!absolute) { 688 timo->is_abs_real = false; 689 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 690 timespecadd(&timo->cur, timeout, &timo->end); 691 } else { 692 timo->end = *timeout; 693 timo->is_abs_real = clockid == CLOCK_REALTIME || 694 clockid == CLOCK_REALTIME_FAST || 695 clockid == CLOCK_REALTIME_PRECISE || 696 clockid == CLOCK_SECOND; 697 } 698 } 699 700 static void 701 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 702 const struct _umtx_time *umtxtime) 703 { 704 705 umtx_abs_timeout_init(timo, umtxtime->_clockid, 706 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 707 } 708 709 static void 710 umtx_abs_timeout_enforce_min(sbintime_t *sbt) 711 { 712 sbintime_t when, mint; 713 714 mint = curproc->p_umtx_min_timeout; 715 if (__predict_false(mint != 0)) { 716 when = sbinuptime() + mint; 717 if (*sbt < when) 718 *sbt = when; 719 } 720 } 721 722 static int 723 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 724 int *flags) 725 { 726 struct bintime bt, bbt; 727 struct timespec tts; 728 sbintime_t rem; 729 730 switch (timo->clockid) { 731 732 /* Clocks that can be converted into absolute time. */ 733 case CLOCK_REALTIME: 734 case CLOCK_REALTIME_PRECISE: 735 case CLOCK_REALTIME_FAST: 736 case CLOCK_MONOTONIC: 737 case CLOCK_MONOTONIC_PRECISE: 738 case CLOCK_MONOTONIC_FAST: 739 case CLOCK_UPTIME: 740 case CLOCK_UPTIME_PRECISE: 741 case CLOCK_UPTIME_FAST: 742 case CLOCK_SECOND: 743 timespec2bintime(&timo->end, &bt); 744 switch (timo->clockid) { 745 case CLOCK_REALTIME: 746 case CLOCK_REALTIME_PRECISE: 747 case CLOCK_REALTIME_FAST: 748 case CLOCK_SECOND: 749 getboottimebin(&bbt); 750 bintime_sub(&bt, &bbt); 751 break; 752 } 753 if (bt.sec < 0) 754 return (ETIMEDOUT); 755 if (bt.sec >= (SBT_MAX >> 32)) { 756 *sbt = 0; 757 *flags = 0; 758 return (0); 759 } 760 *sbt = bttosbt(bt); 761 umtx_abs_timeout_enforce_min(sbt); 762 763 /* 764 * Check if the absolute time should be aligned to 765 * avoid firing multiple timer events in non-periodic 766 * timer mode. 767 */ 768 switch (timo->clockid) { 769 case CLOCK_REALTIME_FAST: 770 case CLOCK_MONOTONIC_FAST: 771 case CLOCK_UPTIME_FAST: 772 rem = *sbt % tc_tick_sbt; 773 if (__predict_true(rem != 0)) 774 *sbt += tc_tick_sbt - rem; 775 break; 776 case CLOCK_SECOND: 777 rem = *sbt % SBT_1S; 778 if (__predict_true(rem != 0)) 779 *sbt += SBT_1S - rem; 780 break; 781 } 782 *flags = C_ABSOLUTE; 783 return (0); 784 785 /* Clocks that has to be periodically polled. */ 786 case CLOCK_VIRTUAL: 787 case CLOCK_PROF: 788 case CLOCK_THREAD_CPUTIME_ID: 789 case CLOCK_PROCESS_CPUTIME_ID: 790 default: 791 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 792 if (timespeccmp(&timo->end, &timo->cur, <=)) 793 return (ETIMEDOUT); 794 timespecsub(&timo->end, &timo->cur, &tts); 795 *sbt = tick_sbt * tstohz(&tts); 796 *flags = C_HARDCLOCK; 797 return (0); 798 } 799 } 800 801 static uint32_t 802 umtx_unlock_val(uint32_t flags, bool rb) 803 { 804 805 if (rb) 806 return (UMUTEX_RB_OWNERDEAD); 807 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 808 return (UMUTEX_RB_NOTRECOV); 809 else 810 return (UMUTEX_UNOWNED); 811 812 } 813 814 /* 815 * Put thread into sleep state, before sleeping, check if 816 * thread was removed from umtx queue. 817 */ 818 int 819 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 820 struct umtx_abs_timeout *timo) 821 { 822 struct umtxq_chain *uc; 823 sbintime_t sbt = 0; 824 int error, flags = 0; 825 826 uc = umtxq_getchain(&uq->uq_key); 827 UMTXQ_LOCKED_ASSERT(uc); 828 for (;;) { 829 if (!(uq->uq_flags & UQF_UMTXQ)) { 830 error = 0; 831 break; 832 } 833 if (timo != NULL) { 834 if (timo->is_abs_real) 835 curthread->td_rtcgen = 836 atomic_load_acq_int(&rtc_generation); 837 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 838 if (error != 0) 839 break; 840 } 841 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 842 sbt, 0, flags); 843 uc = umtxq_getchain(&uq->uq_key); 844 mtx_lock(&uc->uc_lock); 845 if (error == EINTR || error == ERESTART) 846 break; 847 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 848 error = ETIMEDOUT; 849 break; 850 } 851 } 852 853 curthread->td_rtcgen = 0; 854 return (error); 855 } 856 857 /* 858 * Convert userspace address into unique logical address. 859 */ 860 int 861 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 862 { 863 struct thread *td = curthread; 864 vm_map_t map; 865 vm_map_entry_t entry; 866 vm_pindex_t pindex; 867 vm_prot_t prot; 868 boolean_t wired; 869 870 key->type = type; 871 if (share == THREAD_SHARE) { 872 key->shared = 0; 873 key->info.private.vs = td->td_proc->p_vmspace; 874 key->info.private.addr = (uintptr_t)addr; 875 } else { 876 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 877 map = &td->td_proc->p_vmspace->vm_map; 878 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 879 &entry, &key->info.shared.object, &pindex, &prot, 880 &wired) != KERN_SUCCESS) { 881 return (EFAULT); 882 } 883 884 if ((share == PROCESS_SHARE) || 885 (share == AUTO_SHARE && 886 VM_INHERIT_SHARE == entry->inheritance)) { 887 key->shared = 1; 888 key->info.shared.offset = (vm_offset_t)addr - 889 entry->start + entry->offset; 890 vm_object_reference(key->info.shared.object); 891 } else { 892 key->shared = 0; 893 key->info.private.vs = td->td_proc->p_vmspace; 894 key->info.private.addr = (uintptr_t)addr; 895 } 896 vm_map_lookup_done(map, entry); 897 } 898 899 umtxq_hash(key); 900 return (0); 901 } 902 903 /* 904 * Release key. 905 */ 906 void 907 umtx_key_release(struct umtx_key *key) 908 { 909 if (key->shared) 910 vm_object_deallocate(key->info.shared.object); 911 } 912 913 #ifdef COMPAT_FREEBSD10 914 /* 915 * Lock a umtx object. 916 */ 917 static int 918 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 919 const struct timespec *timeout) 920 { 921 struct umtx_abs_timeout timo; 922 struct umtx_q *uq; 923 u_long owner; 924 u_long old; 925 int error = 0; 926 927 uq = td->td_umtxq; 928 if (timeout != NULL) 929 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 930 931 /* 932 * Care must be exercised when dealing with umtx structure. It 933 * can fault on any access. 934 */ 935 for (;;) { 936 /* 937 * Try the uncontested case. This should be done in userland. 938 */ 939 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 940 941 /* The acquire succeeded. */ 942 if (owner == UMTX_UNOWNED) 943 return (0); 944 945 /* The address was invalid. */ 946 if (owner == -1) 947 return (EFAULT); 948 949 /* If no one owns it but it is contested try to acquire it. */ 950 if (owner == UMTX_CONTESTED) { 951 owner = casuword(&umtx->u_owner, 952 UMTX_CONTESTED, id | UMTX_CONTESTED); 953 954 if (owner == UMTX_CONTESTED) 955 return (0); 956 957 /* The address was invalid. */ 958 if (owner == -1) 959 return (EFAULT); 960 961 error = thread_check_susp(td, false); 962 if (error != 0) 963 break; 964 965 /* If this failed the lock has changed, restart. */ 966 continue; 967 } 968 969 /* 970 * If we caught a signal, we have retried and now 971 * exit immediately. 972 */ 973 if (error != 0) 974 break; 975 976 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 977 AUTO_SHARE, &uq->uq_key)) != 0) 978 return (error); 979 980 umtxq_lock(&uq->uq_key); 981 umtxq_busy(&uq->uq_key); 982 umtxq_insert(uq); 983 umtxq_unbusy(&uq->uq_key); 984 umtxq_unlock(&uq->uq_key); 985 986 /* 987 * Set the contested bit so that a release in user space 988 * knows to use the system call for unlock. If this fails 989 * either some one else has acquired the lock or it has been 990 * released. 991 */ 992 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 993 994 /* The address was invalid. */ 995 if (old == -1) { 996 umtxq_lock(&uq->uq_key); 997 umtxq_remove(uq); 998 umtxq_unlock(&uq->uq_key); 999 umtx_key_release(&uq->uq_key); 1000 return (EFAULT); 1001 } 1002 1003 /* 1004 * We set the contested bit, sleep. Otherwise the lock changed 1005 * and we need to retry or we lost a race to the thread 1006 * unlocking the umtx. 1007 */ 1008 umtxq_lock(&uq->uq_key); 1009 if (old == owner) 1010 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1011 &timo); 1012 umtxq_remove(uq); 1013 umtxq_unlock(&uq->uq_key); 1014 umtx_key_release(&uq->uq_key); 1015 1016 if (error == 0) 1017 error = thread_check_susp(td, false); 1018 } 1019 1020 if (timeout == NULL) { 1021 /* Mutex locking is restarted if it is interrupted. */ 1022 if (error == EINTR) 1023 error = ERESTART; 1024 } else { 1025 /* Timed-locking is not restarted. */ 1026 if (error == ERESTART) 1027 error = EINTR; 1028 } 1029 return (error); 1030 } 1031 1032 /* 1033 * Unlock a umtx object. 1034 */ 1035 static int 1036 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1037 { 1038 struct umtx_key key; 1039 u_long owner; 1040 u_long old; 1041 int error; 1042 int count; 1043 1044 /* 1045 * Make sure we own this mtx. 1046 */ 1047 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1048 if (owner == -1) 1049 return (EFAULT); 1050 1051 if ((owner & ~UMTX_CONTESTED) != id) 1052 return (EPERM); 1053 1054 /* This should be done in userland */ 1055 if ((owner & UMTX_CONTESTED) == 0) { 1056 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1057 if (old == -1) 1058 return (EFAULT); 1059 if (old == owner) 1060 return (0); 1061 owner = old; 1062 } 1063 1064 /* We should only ever be in here for contested locks */ 1065 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1066 &key)) != 0) 1067 return (error); 1068 1069 umtxq_lock(&key); 1070 umtxq_busy(&key); 1071 count = umtxq_count(&key); 1072 umtxq_unlock(&key); 1073 1074 /* 1075 * When unlocking the umtx, it must be marked as unowned if 1076 * there is zero or one thread only waiting for it. 1077 * Otherwise, it must be marked as contested. 1078 */ 1079 old = casuword(&umtx->u_owner, owner, 1080 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1081 umtxq_lock(&key); 1082 umtxq_signal(&key,1); 1083 umtxq_unbusy(&key); 1084 umtxq_unlock(&key); 1085 umtx_key_release(&key); 1086 if (old == -1) 1087 return (EFAULT); 1088 if (old != owner) 1089 return (EINVAL); 1090 return (0); 1091 } 1092 1093 #ifdef COMPAT_FREEBSD32 1094 1095 /* 1096 * Lock a umtx object. 1097 */ 1098 static int 1099 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1100 const struct timespec *timeout) 1101 { 1102 struct umtx_abs_timeout timo; 1103 struct umtx_q *uq; 1104 uint32_t owner; 1105 uint32_t old; 1106 int error = 0; 1107 1108 uq = td->td_umtxq; 1109 1110 if (timeout != NULL) 1111 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1112 1113 /* 1114 * Care must be exercised when dealing with umtx structure. It 1115 * can fault on any access. 1116 */ 1117 for (;;) { 1118 /* 1119 * Try the uncontested case. This should be done in userland. 1120 */ 1121 owner = casuword32(m, UMUTEX_UNOWNED, id); 1122 1123 /* The acquire succeeded. */ 1124 if (owner == UMUTEX_UNOWNED) 1125 return (0); 1126 1127 /* The address was invalid. */ 1128 if (owner == -1) 1129 return (EFAULT); 1130 1131 /* If no one owns it but it is contested try to acquire it. */ 1132 if (owner == UMUTEX_CONTESTED) { 1133 owner = casuword32(m, 1134 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1135 if (owner == UMUTEX_CONTESTED) 1136 return (0); 1137 1138 /* The address was invalid. */ 1139 if (owner == -1) 1140 return (EFAULT); 1141 1142 error = thread_check_susp(td, false); 1143 if (error != 0) 1144 break; 1145 1146 /* If this failed the lock has changed, restart. */ 1147 continue; 1148 } 1149 1150 /* 1151 * If we caught a signal, we have retried and now 1152 * exit immediately. 1153 */ 1154 if (error != 0) 1155 return (error); 1156 1157 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1158 AUTO_SHARE, &uq->uq_key)) != 0) 1159 return (error); 1160 1161 umtxq_lock(&uq->uq_key); 1162 umtxq_busy(&uq->uq_key); 1163 umtxq_insert(uq); 1164 umtxq_unbusy(&uq->uq_key); 1165 umtxq_unlock(&uq->uq_key); 1166 1167 /* 1168 * Set the contested bit so that a release in user space 1169 * knows to use the system call for unlock. If this fails 1170 * either some one else has acquired the lock or it has been 1171 * released. 1172 */ 1173 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1174 1175 /* The address was invalid. */ 1176 if (old == -1) { 1177 umtxq_lock(&uq->uq_key); 1178 umtxq_remove(uq); 1179 umtxq_unlock(&uq->uq_key); 1180 umtx_key_release(&uq->uq_key); 1181 return (EFAULT); 1182 } 1183 1184 /* 1185 * We set the contested bit, sleep. Otherwise the lock changed 1186 * and we need to retry or we lost a race to the thread 1187 * unlocking the umtx. 1188 */ 1189 umtxq_lock(&uq->uq_key); 1190 if (old == owner) 1191 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1192 NULL : &timo); 1193 umtxq_remove(uq); 1194 umtxq_unlock(&uq->uq_key); 1195 umtx_key_release(&uq->uq_key); 1196 1197 if (error == 0) 1198 error = thread_check_susp(td, false); 1199 } 1200 1201 if (timeout == NULL) { 1202 /* Mutex locking is restarted if it is interrupted. */ 1203 if (error == EINTR) 1204 error = ERESTART; 1205 } else { 1206 /* Timed-locking is not restarted. */ 1207 if (error == ERESTART) 1208 error = EINTR; 1209 } 1210 return (error); 1211 } 1212 1213 /* 1214 * Unlock a umtx object. 1215 */ 1216 static int 1217 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1218 { 1219 struct umtx_key key; 1220 uint32_t owner; 1221 uint32_t old; 1222 int error; 1223 int count; 1224 1225 /* 1226 * Make sure we own this mtx. 1227 */ 1228 owner = fuword32(m); 1229 if (owner == -1) 1230 return (EFAULT); 1231 1232 if ((owner & ~UMUTEX_CONTESTED) != id) 1233 return (EPERM); 1234 1235 /* This should be done in userland */ 1236 if ((owner & UMUTEX_CONTESTED) == 0) { 1237 old = casuword32(m, owner, UMUTEX_UNOWNED); 1238 if (old == -1) 1239 return (EFAULT); 1240 if (old == owner) 1241 return (0); 1242 owner = old; 1243 } 1244 1245 /* We should only ever be in here for contested locks */ 1246 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1247 &key)) != 0) 1248 return (error); 1249 1250 umtxq_lock(&key); 1251 umtxq_busy(&key); 1252 count = umtxq_count(&key); 1253 umtxq_unlock(&key); 1254 1255 /* 1256 * When unlocking the umtx, it must be marked as unowned if 1257 * there is zero or one thread only waiting for it. 1258 * Otherwise, it must be marked as contested. 1259 */ 1260 old = casuword32(m, owner, 1261 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1262 umtxq_lock(&key); 1263 umtxq_signal(&key,1); 1264 umtxq_unbusy(&key); 1265 umtxq_unlock(&key); 1266 umtx_key_release(&key); 1267 if (old == -1) 1268 return (EFAULT); 1269 if (old != owner) 1270 return (EINVAL); 1271 return (0); 1272 } 1273 #endif /* COMPAT_FREEBSD32 */ 1274 #endif /* COMPAT_FREEBSD10 */ 1275 1276 /* 1277 * Fetch and compare value, sleep on the address if value is not changed. 1278 */ 1279 static int 1280 do_wait(struct thread *td, void *addr, u_long id, 1281 struct _umtx_time *timeout, int compat32, int is_private) 1282 { 1283 struct umtx_abs_timeout timo; 1284 struct umtx_q *uq; 1285 u_long tmp; 1286 uint32_t tmp32; 1287 int error = 0; 1288 1289 uq = td->td_umtxq; 1290 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1291 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1292 return (error); 1293 1294 if (timeout != NULL) 1295 umtx_abs_timeout_init2(&timo, timeout); 1296 1297 umtxq_lock(&uq->uq_key); 1298 umtxq_insert(uq); 1299 umtxq_unlock(&uq->uq_key); 1300 if (compat32 == 0) { 1301 error = fueword(addr, &tmp); 1302 if (error != 0) 1303 error = EFAULT; 1304 } else { 1305 error = fueword32(addr, &tmp32); 1306 if (error == 0) 1307 tmp = tmp32; 1308 else 1309 error = EFAULT; 1310 } 1311 umtxq_lock(&uq->uq_key); 1312 if (error == 0) { 1313 if (tmp == id) 1314 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1315 NULL : &timo); 1316 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1317 error = 0; 1318 else 1319 umtxq_remove(uq); 1320 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1321 umtxq_remove(uq); 1322 } 1323 umtxq_unlock(&uq->uq_key); 1324 umtx_key_release(&uq->uq_key); 1325 if (error == ERESTART) 1326 error = EINTR; 1327 return (error); 1328 } 1329 1330 /* 1331 * Wake up threads sleeping on the specified address. 1332 */ 1333 int 1334 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1335 { 1336 struct umtx_key key; 1337 int ret; 1338 1339 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1340 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1341 return (ret); 1342 umtxq_lock(&key); 1343 umtxq_signal(&key, n_wake); 1344 umtxq_unlock(&key); 1345 umtx_key_release(&key); 1346 return (0); 1347 } 1348 1349 /* 1350 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1351 */ 1352 static int 1353 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1354 struct _umtx_time *timeout, int mode) 1355 { 1356 struct umtx_abs_timeout timo; 1357 struct umtx_q *uq; 1358 uint32_t owner, old, id; 1359 int error, rv; 1360 1361 id = td->td_tid; 1362 uq = td->td_umtxq; 1363 error = 0; 1364 if (timeout != NULL) 1365 umtx_abs_timeout_init2(&timo, timeout); 1366 1367 /* 1368 * Care must be exercised when dealing with umtx structure. It 1369 * can fault on any access. 1370 */ 1371 for (;;) { 1372 rv = fueword32(&m->m_owner, &owner); 1373 if (rv == -1) 1374 return (EFAULT); 1375 if (mode == _UMUTEX_WAIT) { 1376 if (owner == UMUTEX_UNOWNED || 1377 owner == UMUTEX_CONTESTED || 1378 owner == UMUTEX_RB_OWNERDEAD || 1379 owner == UMUTEX_RB_NOTRECOV) 1380 return (0); 1381 } else { 1382 /* 1383 * Robust mutex terminated. Kernel duty is to 1384 * return EOWNERDEAD to the userspace. The 1385 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1386 * by the common userspace code. 1387 */ 1388 if (owner == UMUTEX_RB_OWNERDEAD) { 1389 rv = casueword32(&m->m_owner, 1390 UMUTEX_RB_OWNERDEAD, &owner, 1391 id | UMUTEX_CONTESTED); 1392 if (rv == -1) 1393 return (EFAULT); 1394 if (rv == 0) { 1395 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1396 return (EOWNERDEAD); /* success */ 1397 } 1398 MPASS(rv == 1); 1399 rv = thread_check_susp(td, false); 1400 if (rv != 0) 1401 return (rv); 1402 continue; 1403 } 1404 if (owner == UMUTEX_RB_NOTRECOV) 1405 return (ENOTRECOVERABLE); 1406 1407 /* 1408 * Try the uncontested case. This should be 1409 * done in userland. 1410 */ 1411 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1412 &owner, id); 1413 /* The address was invalid. */ 1414 if (rv == -1) 1415 return (EFAULT); 1416 1417 /* The acquire succeeded. */ 1418 if (rv == 0) { 1419 MPASS(owner == UMUTEX_UNOWNED); 1420 return (0); 1421 } 1422 1423 /* 1424 * If no one owns it but it is contested try 1425 * to acquire it. 1426 */ 1427 MPASS(rv == 1); 1428 if (owner == UMUTEX_CONTESTED) { 1429 rv = casueword32(&m->m_owner, 1430 UMUTEX_CONTESTED, &owner, 1431 id | UMUTEX_CONTESTED); 1432 /* The address was invalid. */ 1433 if (rv == -1) 1434 return (EFAULT); 1435 if (rv == 0) { 1436 MPASS(owner == UMUTEX_CONTESTED); 1437 return (0); 1438 } 1439 if (rv == 1) { 1440 rv = thread_check_susp(td, false); 1441 if (rv != 0) 1442 return (rv); 1443 } 1444 1445 /* 1446 * If this failed the lock has 1447 * changed, restart. 1448 */ 1449 continue; 1450 } 1451 1452 /* rv == 1 but not contested, likely store failure */ 1453 rv = thread_check_susp(td, false); 1454 if (rv != 0) 1455 return (rv); 1456 } 1457 1458 if (mode == _UMUTEX_TRY) 1459 return (EBUSY); 1460 1461 /* 1462 * If we caught a signal, we have retried and now 1463 * exit immediately. 1464 */ 1465 if (error != 0) 1466 return (error); 1467 1468 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1469 GET_SHARE(flags), &uq->uq_key)) != 0) 1470 return (error); 1471 1472 umtxq_lock(&uq->uq_key); 1473 umtxq_busy(&uq->uq_key); 1474 umtxq_insert(uq); 1475 umtxq_unlock(&uq->uq_key); 1476 1477 /* 1478 * Set the contested bit so that a release in user space 1479 * knows to use the system call for unlock. If this fails 1480 * either some one else has acquired the lock or it has been 1481 * released. 1482 */ 1483 rv = casueword32(&m->m_owner, owner, &old, 1484 owner | UMUTEX_CONTESTED); 1485 1486 /* The address was invalid or casueword failed to store. */ 1487 if (rv == -1 || rv == 1) { 1488 umtxq_lock(&uq->uq_key); 1489 umtxq_remove(uq); 1490 umtxq_unbusy(&uq->uq_key); 1491 umtxq_unlock(&uq->uq_key); 1492 umtx_key_release(&uq->uq_key); 1493 if (rv == -1) 1494 return (EFAULT); 1495 if (rv == 1) { 1496 rv = thread_check_susp(td, false); 1497 if (rv != 0) 1498 return (rv); 1499 } 1500 continue; 1501 } 1502 1503 /* 1504 * We set the contested bit, sleep. Otherwise the lock changed 1505 * and we need to retry or we lost a race to the thread 1506 * unlocking the umtx. 1507 */ 1508 umtxq_lock(&uq->uq_key); 1509 umtxq_unbusy(&uq->uq_key); 1510 MPASS(old == owner); 1511 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1512 NULL : &timo); 1513 umtxq_remove(uq); 1514 umtxq_unlock(&uq->uq_key); 1515 umtx_key_release(&uq->uq_key); 1516 1517 if (error == 0) 1518 error = thread_check_susp(td, false); 1519 } 1520 1521 return (0); 1522 } 1523 1524 /* 1525 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1526 */ 1527 static int 1528 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1529 { 1530 struct umtx_key key; 1531 uint32_t owner, old, id, newlock; 1532 int error, count; 1533 1534 id = td->td_tid; 1535 1536 again: 1537 /* 1538 * Make sure we own this mtx. 1539 */ 1540 error = fueword32(&m->m_owner, &owner); 1541 if (error == -1) 1542 return (EFAULT); 1543 1544 if ((owner & ~UMUTEX_CONTESTED) != id) 1545 return (EPERM); 1546 1547 newlock = umtx_unlock_val(flags, rb); 1548 if ((owner & UMUTEX_CONTESTED) == 0) { 1549 error = casueword32(&m->m_owner, owner, &old, newlock); 1550 if (error == -1) 1551 return (EFAULT); 1552 if (error == 1) { 1553 error = thread_check_susp(td, false); 1554 if (error != 0) 1555 return (error); 1556 goto again; 1557 } 1558 MPASS(old == owner); 1559 return (0); 1560 } 1561 1562 /* We should only ever be in here for contested locks */ 1563 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1564 &key)) != 0) 1565 return (error); 1566 1567 umtxq_lock(&key); 1568 umtxq_busy(&key); 1569 count = umtxq_count(&key); 1570 umtxq_unlock(&key); 1571 1572 /* 1573 * When unlocking the umtx, it must be marked as unowned if 1574 * there is zero or one thread only waiting for it. 1575 * Otherwise, it must be marked as contested. 1576 */ 1577 if (count > 1) 1578 newlock |= UMUTEX_CONTESTED; 1579 error = casueword32(&m->m_owner, owner, &old, newlock); 1580 umtxq_lock(&key); 1581 umtxq_signal(&key, 1); 1582 umtxq_unbusy(&key); 1583 umtxq_unlock(&key); 1584 umtx_key_release(&key); 1585 if (error == -1) 1586 return (EFAULT); 1587 if (error == 1) { 1588 if (old != owner) 1589 return (EINVAL); 1590 error = thread_check_susp(td, false); 1591 if (error != 0) 1592 return (error); 1593 goto again; 1594 } 1595 return (0); 1596 } 1597 1598 /* 1599 * Check if the mutex is available and wake up a waiter, 1600 * only for simple mutex. 1601 */ 1602 static int 1603 do_wake_umutex(struct thread *td, struct umutex *m) 1604 { 1605 struct umtx_key key; 1606 uint32_t owner; 1607 uint32_t flags; 1608 int error; 1609 int count; 1610 1611 again: 1612 error = fueword32(&m->m_owner, &owner); 1613 if (error == -1) 1614 return (EFAULT); 1615 1616 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1617 owner != UMUTEX_RB_NOTRECOV) 1618 return (0); 1619 1620 error = fueword32(&m->m_flags, &flags); 1621 if (error == -1) 1622 return (EFAULT); 1623 1624 /* We should only ever be in here for contested locks */ 1625 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1626 &key)) != 0) 1627 return (error); 1628 1629 umtxq_lock(&key); 1630 umtxq_busy(&key); 1631 count = umtxq_count(&key); 1632 umtxq_unlock(&key); 1633 1634 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1635 owner != UMUTEX_RB_NOTRECOV) { 1636 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1637 UMUTEX_UNOWNED); 1638 if (error == -1) { 1639 error = EFAULT; 1640 } else if (error == 1) { 1641 umtxq_lock(&key); 1642 umtxq_unbusy(&key); 1643 umtxq_unlock(&key); 1644 umtx_key_release(&key); 1645 error = thread_check_susp(td, false); 1646 if (error != 0) 1647 return (error); 1648 goto again; 1649 } 1650 } 1651 1652 umtxq_lock(&key); 1653 if (error == 0 && count != 0) { 1654 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1655 owner == UMUTEX_RB_OWNERDEAD || 1656 owner == UMUTEX_RB_NOTRECOV); 1657 umtxq_signal(&key, 1); 1658 } 1659 umtxq_unbusy(&key); 1660 umtxq_unlock(&key); 1661 umtx_key_release(&key); 1662 return (error); 1663 } 1664 1665 /* 1666 * Check if the mutex has waiters and tries to fix contention bit. 1667 */ 1668 static int 1669 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1670 { 1671 struct umtx_key key; 1672 uint32_t owner, old; 1673 int type; 1674 int error; 1675 int count; 1676 1677 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1678 UMUTEX_ROBUST)) { 1679 case 0: 1680 case UMUTEX_ROBUST: 1681 type = TYPE_NORMAL_UMUTEX; 1682 break; 1683 case UMUTEX_PRIO_INHERIT: 1684 type = TYPE_PI_UMUTEX; 1685 break; 1686 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1687 type = TYPE_PI_ROBUST_UMUTEX; 1688 break; 1689 case UMUTEX_PRIO_PROTECT: 1690 type = TYPE_PP_UMUTEX; 1691 break; 1692 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1693 type = TYPE_PP_ROBUST_UMUTEX; 1694 break; 1695 default: 1696 return (EINVAL); 1697 } 1698 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1699 return (error); 1700 1701 owner = 0; 1702 umtxq_lock(&key); 1703 umtxq_busy(&key); 1704 count = umtxq_count(&key); 1705 umtxq_unlock(&key); 1706 1707 error = fueword32(&m->m_owner, &owner); 1708 if (error == -1) 1709 error = EFAULT; 1710 1711 /* 1712 * Only repair contention bit if there is a waiter, this means 1713 * the mutex is still being referenced by userland code, 1714 * otherwise don't update any memory. 1715 */ 1716 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1717 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1718 error = casueword32(&m->m_owner, owner, &old, 1719 owner | UMUTEX_CONTESTED); 1720 if (error == -1) { 1721 error = EFAULT; 1722 break; 1723 } 1724 if (error == 0) { 1725 MPASS(old == owner); 1726 break; 1727 } 1728 owner = old; 1729 error = thread_check_susp(td, false); 1730 } 1731 1732 umtxq_lock(&key); 1733 if (error == EFAULT) { 1734 umtxq_signal(&key, INT_MAX); 1735 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1736 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1737 umtxq_signal(&key, 1); 1738 umtxq_unbusy(&key); 1739 umtxq_unlock(&key); 1740 umtx_key_release(&key); 1741 return (error); 1742 } 1743 1744 struct umtx_pi * 1745 umtx_pi_alloc(int flags) 1746 { 1747 struct umtx_pi *pi; 1748 1749 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1750 if (pi == NULL) 1751 return (NULL); 1752 1753 TAILQ_INIT(&pi->pi_blocked); 1754 atomic_add_int(&umtx_pi_allocated, 1); 1755 return (pi); 1756 } 1757 1758 void 1759 umtx_pi_free(struct umtx_pi *pi) 1760 { 1761 uma_zfree(umtx_pi_zone, pi); 1762 atomic_add_int(&umtx_pi_allocated, -1); 1763 } 1764 1765 /* 1766 * Adjust the thread's position on a pi_state after its priority has been 1767 * changed. 1768 */ 1769 static int 1770 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1771 { 1772 struct umtx_q *uq, *uq1, *uq2; 1773 struct thread *td1; 1774 1775 mtx_assert(&umtx_lock, MA_OWNED); 1776 if (pi == NULL) 1777 return (0); 1778 1779 uq = td->td_umtxq; 1780 1781 /* 1782 * Check if the thread needs to be moved on the blocked chain. 1783 * It needs to be moved if either its priority is lower than 1784 * the previous thread or higher than the next thread. 1785 */ 1786 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1787 uq2 = TAILQ_NEXT(uq, uq_lockq); 1788 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1789 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1790 /* 1791 * Remove thread from blocked chain and determine where 1792 * it should be moved to. 1793 */ 1794 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1795 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1796 td1 = uq1->uq_thread; 1797 MPASS(td1->td_proc->p_magic == P_MAGIC); 1798 if (UPRI(td1) > UPRI(td)) 1799 break; 1800 } 1801 1802 if (uq1 == NULL) 1803 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1804 else 1805 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1806 } 1807 return (1); 1808 } 1809 1810 static struct umtx_pi * 1811 umtx_pi_next(struct umtx_pi *pi) 1812 { 1813 struct umtx_q *uq_owner; 1814 1815 if (pi->pi_owner == NULL) 1816 return (NULL); 1817 uq_owner = pi->pi_owner->td_umtxq; 1818 if (uq_owner == NULL) 1819 return (NULL); 1820 return (uq_owner->uq_pi_blocked); 1821 } 1822 1823 /* 1824 * Floyd's Cycle-Finding Algorithm. 1825 */ 1826 static bool 1827 umtx_pi_check_loop(struct umtx_pi *pi) 1828 { 1829 struct umtx_pi *pi1; /* fast iterator */ 1830 1831 mtx_assert(&umtx_lock, MA_OWNED); 1832 if (pi == NULL) 1833 return (false); 1834 pi1 = pi; 1835 for (;;) { 1836 pi = umtx_pi_next(pi); 1837 if (pi == NULL) 1838 break; 1839 pi1 = umtx_pi_next(pi1); 1840 if (pi1 == NULL) 1841 break; 1842 pi1 = umtx_pi_next(pi1); 1843 if (pi1 == NULL) 1844 break; 1845 if (pi == pi1) 1846 return (true); 1847 } 1848 return (false); 1849 } 1850 1851 /* 1852 * Propagate priority when a thread is blocked on POSIX 1853 * PI mutex. 1854 */ 1855 static void 1856 umtx_propagate_priority(struct thread *td) 1857 { 1858 struct umtx_q *uq; 1859 struct umtx_pi *pi; 1860 int pri; 1861 1862 mtx_assert(&umtx_lock, MA_OWNED); 1863 pri = UPRI(td); 1864 uq = td->td_umtxq; 1865 pi = uq->uq_pi_blocked; 1866 if (pi == NULL) 1867 return; 1868 if (umtx_pi_check_loop(pi)) 1869 return; 1870 1871 for (;;) { 1872 td = pi->pi_owner; 1873 if (td == NULL || td == curthread) 1874 return; 1875 1876 MPASS(td->td_proc != NULL); 1877 MPASS(td->td_proc->p_magic == P_MAGIC); 1878 1879 thread_lock(td); 1880 if (td->td_lend_user_pri > pri) 1881 sched_lend_user_prio(td, pri); 1882 else { 1883 thread_unlock(td); 1884 break; 1885 } 1886 thread_unlock(td); 1887 1888 /* 1889 * Pick up the lock that td is blocked on. 1890 */ 1891 uq = td->td_umtxq; 1892 pi = uq->uq_pi_blocked; 1893 if (pi == NULL) 1894 break; 1895 /* Resort td on the list if needed. */ 1896 umtx_pi_adjust_thread(pi, td); 1897 } 1898 } 1899 1900 /* 1901 * Unpropagate priority for a PI mutex when a thread blocked on 1902 * it is interrupted by signal or resumed by others. 1903 */ 1904 static void 1905 umtx_repropagate_priority(struct umtx_pi *pi) 1906 { 1907 struct umtx_q *uq, *uq_owner; 1908 struct umtx_pi *pi2; 1909 int pri; 1910 1911 mtx_assert(&umtx_lock, MA_OWNED); 1912 1913 if (umtx_pi_check_loop(pi)) 1914 return; 1915 while (pi != NULL && pi->pi_owner != NULL) { 1916 pri = PRI_MAX; 1917 uq_owner = pi->pi_owner->td_umtxq; 1918 1919 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1920 uq = TAILQ_FIRST(&pi2->pi_blocked); 1921 if (uq != NULL) { 1922 if (pri > UPRI(uq->uq_thread)) 1923 pri = UPRI(uq->uq_thread); 1924 } 1925 } 1926 1927 if (pri > uq_owner->uq_inherited_pri) 1928 pri = uq_owner->uq_inherited_pri; 1929 thread_lock(pi->pi_owner); 1930 sched_lend_user_prio(pi->pi_owner, pri); 1931 thread_unlock(pi->pi_owner); 1932 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1933 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1934 } 1935 } 1936 1937 /* 1938 * Insert a PI mutex into owned list. 1939 */ 1940 static void 1941 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1942 { 1943 struct umtx_q *uq_owner; 1944 1945 uq_owner = owner->td_umtxq; 1946 mtx_assert(&umtx_lock, MA_OWNED); 1947 MPASS(pi->pi_owner == NULL); 1948 pi->pi_owner = owner; 1949 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1950 } 1951 1952 /* 1953 * Disown a PI mutex, and remove it from the owned list. 1954 */ 1955 static void 1956 umtx_pi_disown(struct umtx_pi *pi) 1957 { 1958 1959 mtx_assert(&umtx_lock, MA_OWNED); 1960 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1961 pi->pi_owner = NULL; 1962 } 1963 1964 /* 1965 * Claim ownership of a PI mutex. 1966 */ 1967 int 1968 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1969 { 1970 struct umtx_q *uq; 1971 int pri; 1972 1973 mtx_lock(&umtx_lock); 1974 if (pi->pi_owner == owner) { 1975 mtx_unlock(&umtx_lock); 1976 return (0); 1977 } 1978 1979 if (pi->pi_owner != NULL) { 1980 /* 1981 * userland may have already messed the mutex, sigh. 1982 */ 1983 mtx_unlock(&umtx_lock); 1984 return (EPERM); 1985 } 1986 umtx_pi_setowner(pi, owner); 1987 uq = TAILQ_FIRST(&pi->pi_blocked); 1988 if (uq != NULL) { 1989 pri = UPRI(uq->uq_thread); 1990 thread_lock(owner); 1991 if (pri < UPRI(owner)) 1992 sched_lend_user_prio(owner, pri); 1993 thread_unlock(owner); 1994 } 1995 mtx_unlock(&umtx_lock); 1996 return (0); 1997 } 1998 1999 /* 2000 * Adjust a thread's order position in its blocked PI mutex, 2001 * this may result new priority propagating process. 2002 */ 2003 void 2004 umtx_pi_adjust(struct thread *td, u_char oldpri) 2005 { 2006 struct umtx_q *uq; 2007 struct umtx_pi *pi; 2008 2009 uq = td->td_umtxq; 2010 mtx_lock(&umtx_lock); 2011 /* 2012 * Pick up the lock that td is blocked on. 2013 */ 2014 pi = uq->uq_pi_blocked; 2015 if (pi != NULL) { 2016 umtx_pi_adjust_thread(pi, td); 2017 umtx_repropagate_priority(pi); 2018 } 2019 mtx_unlock(&umtx_lock); 2020 } 2021 2022 /* 2023 * Sleep on a PI mutex. 2024 */ 2025 int 2026 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2027 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2028 { 2029 struct thread *td, *td1; 2030 struct umtx_q *uq1; 2031 int error, pri; 2032 #ifdef INVARIANTS 2033 struct umtxq_chain *uc; 2034 2035 uc = umtxq_getchain(&pi->pi_key); 2036 #endif 2037 error = 0; 2038 td = uq->uq_thread; 2039 KASSERT(td == curthread, ("inconsistent uq_thread")); 2040 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2041 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2042 umtxq_insert(uq); 2043 mtx_lock(&umtx_lock); 2044 if (pi->pi_owner == NULL) { 2045 mtx_unlock(&umtx_lock); 2046 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2047 mtx_lock(&umtx_lock); 2048 if (td1 != NULL) { 2049 if (pi->pi_owner == NULL) 2050 umtx_pi_setowner(pi, td1); 2051 PROC_UNLOCK(td1->td_proc); 2052 } 2053 } 2054 2055 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2056 pri = UPRI(uq1->uq_thread); 2057 if (pri > UPRI(td)) 2058 break; 2059 } 2060 2061 if (uq1 != NULL) 2062 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2063 else 2064 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2065 2066 uq->uq_pi_blocked = pi; 2067 thread_lock(td); 2068 td->td_flags |= TDF_UPIBLOCKED; 2069 thread_unlock(td); 2070 umtx_propagate_priority(td); 2071 mtx_unlock(&umtx_lock); 2072 umtxq_unbusy(&uq->uq_key); 2073 2074 error = umtxq_sleep(uq, wmesg, timo); 2075 umtxq_remove(uq); 2076 2077 mtx_lock(&umtx_lock); 2078 uq->uq_pi_blocked = NULL; 2079 thread_lock(td); 2080 td->td_flags &= ~TDF_UPIBLOCKED; 2081 thread_unlock(td); 2082 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2083 umtx_repropagate_priority(pi); 2084 mtx_unlock(&umtx_lock); 2085 umtxq_unlock(&uq->uq_key); 2086 2087 return (error); 2088 } 2089 2090 /* 2091 * Add reference count for a PI mutex. 2092 */ 2093 void 2094 umtx_pi_ref(struct umtx_pi *pi) 2095 { 2096 2097 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2098 pi->pi_refcount++; 2099 } 2100 2101 /* 2102 * Decrease reference count for a PI mutex, if the counter 2103 * is decreased to zero, its memory space is freed. 2104 */ 2105 void 2106 umtx_pi_unref(struct umtx_pi *pi) 2107 { 2108 struct umtxq_chain *uc; 2109 2110 uc = umtxq_getchain(&pi->pi_key); 2111 UMTXQ_LOCKED_ASSERT(uc); 2112 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2113 if (--pi->pi_refcount == 0) { 2114 mtx_lock(&umtx_lock); 2115 if (pi->pi_owner != NULL) 2116 umtx_pi_disown(pi); 2117 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2118 ("blocked queue not empty")); 2119 mtx_unlock(&umtx_lock); 2120 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2121 umtx_pi_free(pi); 2122 } 2123 } 2124 2125 /* 2126 * Find a PI mutex in hash table. 2127 */ 2128 struct umtx_pi * 2129 umtx_pi_lookup(struct umtx_key *key) 2130 { 2131 struct umtxq_chain *uc; 2132 struct umtx_pi *pi; 2133 2134 uc = umtxq_getchain(key); 2135 UMTXQ_LOCKED_ASSERT(uc); 2136 2137 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2138 if (umtx_key_match(&pi->pi_key, key)) { 2139 return (pi); 2140 } 2141 } 2142 return (NULL); 2143 } 2144 2145 /* 2146 * Insert a PI mutex into hash table. 2147 */ 2148 void 2149 umtx_pi_insert(struct umtx_pi *pi) 2150 { 2151 struct umtxq_chain *uc; 2152 2153 uc = umtxq_getchain(&pi->pi_key); 2154 UMTXQ_LOCKED_ASSERT(uc); 2155 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2156 } 2157 2158 /* 2159 * Drop a PI mutex and wakeup a top waiter. 2160 */ 2161 int 2162 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2163 { 2164 struct umtx_q *uq_first, *uq_first2, *uq_me; 2165 struct umtx_pi *pi, *pi2; 2166 int pri; 2167 2168 UMTXQ_ASSERT_LOCKED_BUSY(key); 2169 *count = umtxq_count_pi(key, &uq_first); 2170 if (uq_first != NULL) { 2171 mtx_lock(&umtx_lock); 2172 pi = uq_first->uq_pi_blocked; 2173 KASSERT(pi != NULL, ("pi == NULL?")); 2174 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2175 mtx_unlock(&umtx_lock); 2176 /* userland messed the mutex */ 2177 return (EPERM); 2178 } 2179 uq_me = td->td_umtxq; 2180 if (pi->pi_owner == td) 2181 umtx_pi_disown(pi); 2182 /* get highest priority thread which is still sleeping. */ 2183 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2184 while (uq_first != NULL && 2185 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2186 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2187 } 2188 pri = PRI_MAX; 2189 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2190 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2191 if (uq_first2 != NULL) { 2192 if (pri > UPRI(uq_first2->uq_thread)) 2193 pri = UPRI(uq_first2->uq_thread); 2194 } 2195 } 2196 thread_lock(td); 2197 sched_lend_user_prio(td, pri); 2198 thread_unlock(td); 2199 mtx_unlock(&umtx_lock); 2200 if (uq_first) 2201 umtxq_signal_thread(uq_first); 2202 } else { 2203 pi = umtx_pi_lookup(key); 2204 /* 2205 * A umtx_pi can exist if a signal or timeout removed the 2206 * last waiter from the umtxq, but there is still 2207 * a thread in do_lock_pi() holding the umtx_pi. 2208 */ 2209 if (pi != NULL) { 2210 /* 2211 * The umtx_pi can be unowned, such as when a thread 2212 * has just entered do_lock_pi(), allocated the 2213 * umtx_pi, and unlocked the umtxq. 2214 * If the current thread owns it, it must disown it. 2215 */ 2216 mtx_lock(&umtx_lock); 2217 if (pi->pi_owner == td) 2218 umtx_pi_disown(pi); 2219 mtx_unlock(&umtx_lock); 2220 } 2221 } 2222 return (0); 2223 } 2224 2225 /* 2226 * Lock a PI mutex. 2227 */ 2228 static int 2229 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2230 struct _umtx_time *timeout, int try) 2231 { 2232 struct umtx_abs_timeout timo; 2233 struct umtx_q *uq; 2234 struct umtx_pi *pi, *new_pi; 2235 uint32_t id, old_owner, owner, old; 2236 int error, rv; 2237 2238 id = td->td_tid; 2239 uq = td->td_umtxq; 2240 2241 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2242 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2243 &uq->uq_key)) != 0) 2244 return (error); 2245 2246 if (timeout != NULL) 2247 umtx_abs_timeout_init2(&timo, timeout); 2248 2249 umtxq_lock(&uq->uq_key); 2250 pi = umtx_pi_lookup(&uq->uq_key); 2251 if (pi == NULL) { 2252 new_pi = umtx_pi_alloc(M_NOWAIT); 2253 if (new_pi == NULL) { 2254 umtxq_unlock(&uq->uq_key); 2255 new_pi = umtx_pi_alloc(M_WAITOK); 2256 umtxq_lock(&uq->uq_key); 2257 pi = umtx_pi_lookup(&uq->uq_key); 2258 if (pi != NULL) { 2259 umtx_pi_free(new_pi); 2260 new_pi = NULL; 2261 } 2262 } 2263 if (new_pi != NULL) { 2264 new_pi->pi_key = uq->uq_key; 2265 umtx_pi_insert(new_pi); 2266 pi = new_pi; 2267 } 2268 } 2269 umtx_pi_ref(pi); 2270 umtxq_unlock(&uq->uq_key); 2271 2272 /* 2273 * Care must be exercised when dealing with umtx structure. It 2274 * can fault on any access. 2275 */ 2276 for (;;) { 2277 /* 2278 * Try the uncontested case. This should be done in userland. 2279 */ 2280 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2281 /* The address was invalid. */ 2282 if (rv == -1) { 2283 error = EFAULT; 2284 break; 2285 } 2286 /* The acquire succeeded. */ 2287 if (rv == 0) { 2288 MPASS(owner == UMUTEX_UNOWNED); 2289 error = 0; 2290 break; 2291 } 2292 2293 if (owner == UMUTEX_RB_NOTRECOV) { 2294 error = ENOTRECOVERABLE; 2295 break; 2296 } 2297 2298 /* 2299 * Nobody owns it, but the acquire failed. This can happen 2300 * with ll/sc atomics. 2301 */ 2302 if (owner == UMUTEX_UNOWNED) { 2303 error = thread_check_susp(td, true); 2304 if (error != 0) 2305 break; 2306 continue; 2307 } 2308 2309 /* 2310 * Avoid overwriting a possible error from sleep due 2311 * to the pending signal with suspension check result. 2312 */ 2313 if (error == 0) { 2314 error = thread_check_susp(td, true); 2315 if (error != 0) 2316 break; 2317 } 2318 2319 /* If no one owns it but it is contested try to acquire it. */ 2320 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2321 old_owner = owner; 2322 rv = casueword32(&m->m_owner, owner, &owner, 2323 id | UMUTEX_CONTESTED); 2324 /* The address was invalid. */ 2325 if (rv == -1) { 2326 error = EFAULT; 2327 break; 2328 } 2329 if (rv == 1) { 2330 if (error == 0) { 2331 error = thread_check_susp(td, true); 2332 if (error != 0) 2333 break; 2334 } 2335 2336 /* 2337 * If this failed the lock could 2338 * changed, restart. 2339 */ 2340 continue; 2341 } 2342 2343 MPASS(rv == 0); 2344 MPASS(owner == old_owner); 2345 umtxq_lock(&uq->uq_key); 2346 umtxq_busy(&uq->uq_key); 2347 error = umtx_pi_claim(pi, td); 2348 umtxq_unbusy(&uq->uq_key); 2349 umtxq_unlock(&uq->uq_key); 2350 if (error != 0) { 2351 /* 2352 * Since we're going to return an 2353 * error, restore the m_owner to its 2354 * previous, unowned state to avoid 2355 * compounding the problem. 2356 */ 2357 (void)casuword32(&m->m_owner, 2358 id | UMUTEX_CONTESTED, old_owner); 2359 } 2360 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2361 error = EOWNERDEAD; 2362 break; 2363 } 2364 2365 if ((owner & ~UMUTEX_CONTESTED) == id) { 2366 error = EDEADLK; 2367 break; 2368 } 2369 2370 if (try != 0) { 2371 error = EBUSY; 2372 break; 2373 } 2374 2375 /* 2376 * If we caught a signal, we have retried and now 2377 * exit immediately. 2378 */ 2379 if (error != 0) 2380 break; 2381 2382 umtxq_busy_unlocked(&uq->uq_key); 2383 2384 /* 2385 * Set the contested bit so that a release in user space 2386 * knows to use the system call for unlock. If this fails 2387 * either some one else has acquired the lock or it has been 2388 * released. 2389 */ 2390 rv = casueword32(&m->m_owner, owner, &old, owner | 2391 UMUTEX_CONTESTED); 2392 2393 /* The address was invalid. */ 2394 if (rv == -1) { 2395 umtxq_unbusy_unlocked(&uq->uq_key); 2396 error = EFAULT; 2397 break; 2398 } 2399 if (rv == 1) { 2400 umtxq_unbusy_unlocked(&uq->uq_key); 2401 error = thread_check_susp(td, true); 2402 if (error != 0) 2403 break; 2404 2405 /* 2406 * The lock changed and we need to retry or we 2407 * lost a race to the thread unlocking the 2408 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2409 * value for owner is impossible there. 2410 */ 2411 continue; 2412 } 2413 2414 umtxq_lock(&uq->uq_key); 2415 2416 /* We set the contested bit, sleep. */ 2417 MPASS(old == owner); 2418 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2419 "umtxpi", timeout == NULL ? NULL : &timo, 2420 (flags & USYNC_PROCESS_SHARED) != 0); 2421 if (error != 0) 2422 continue; 2423 2424 error = thread_check_susp(td, false); 2425 if (error != 0) 2426 break; 2427 } 2428 2429 umtxq_lock(&uq->uq_key); 2430 umtx_pi_unref(pi); 2431 umtxq_unlock(&uq->uq_key); 2432 2433 umtx_key_release(&uq->uq_key); 2434 return (error); 2435 } 2436 2437 /* 2438 * Unlock a PI mutex. 2439 */ 2440 static int 2441 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2442 { 2443 struct umtx_key key; 2444 uint32_t id, new_owner, old, owner; 2445 int count, error; 2446 2447 id = td->td_tid; 2448 2449 usrloop: 2450 /* 2451 * Make sure we own this mtx. 2452 */ 2453 error = fueword32(&m->m_owner, &owner); 2454 if (error == -1) 2455 return (EFAULT); 2456 2457 if ((owner & ~UMUTEX_CONTESTED) != id) 2458 return (EPERM); 2459 2460 new_owner = umtx_unlock_val(flags, rb); 2461 2462 /* This should be done in userland */ 2463 if ((owner & UMUTEX_CONTESTED) == 0) { 2464 error = casueword32(&m->m_owner, owner, &old, new_owner); 2465 if (error == -1) 2466 return (EFAULT); 2467 if (error == 1) { 2468 error = thread_check_susp(td, true); 2469 if (error != 0) 2470 return (error); 2471 goto usrloop; 2472 } 2473 if (old == owner) 2474 return (0); 2475 owner = old; 2476 } 2477 2478 /* We should only ever be in here for contested locks */ 2479 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2480 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2481 &key)) != 0) 2482 return (error); 2483 2484 umtxq_lock(&key); 2485 umtxq_busy(&key); 2486 error = umtx_pi_drop(td, &key, rb, &count); 2487 if (error != 0) { 2488 umtxq_unbusy(&key); 2489 umtxq_unlock(&key); 2490 umtx_key_release(&key); 2491 /* userland messed the mutex */ 2492 return (error); 2493 } 2494 umtxq_unlock(&key); 2495 2496 /* 2497 * When unlocking the umtx, it must be marked as unowned if 2498 * there is zero or one thread only waiting for it. 2499 * Otherwise, it must be marked as contested. 2500 */ 2501 2502 if (count > 1) 2503 new_owner |= UMUTEX_CONTESTED; 2504 again: 2505 error = casueword32(&m->m_owner, owner, &old, new_owner); 2506 if (error == 1) { 2507 error = thread_check_susp(td, false); 2508 if (error == 0) 2509 goto again; 2510 } 2511 umtxq_unbusy_unlocked(&key); 2512 umtx_key_release(&key); 2513 if (error == -1) 2514 return (EFAULT); 2515 if (error == 0 && old != owner) 2516 return (EINVAL); 2517 return (error); 2518 } 2519 2520 /* 2521 * Lock a PP mutex. 2522 */ 2523 static int 2524 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2525 struct _umtx_time *timeout, int try) 2526 { 2527 struct umtx_abs_timeout timo; 2528 struct umtx_q *uq, *uq2; 2529 struct umtx_pi *pi; 2530 uint32_t ceiling; 2531 uint32_t owner, id; 2532 int error, pri, old_inherited_pri, new_pri, rv; 2533 bool su; 2534 2535 id = td->td_tid; 2536 uq = td->td_umtxq; 2537 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2538 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2539 &uq->uq_key)) != 0) 2540 return (error); 2541 2542 if (timeout != NULL) 2543 umtx_abs_timeout_init2(&timo, timeout); 2544 2545 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2546 for (;;) { 2547 old_inherited_pri = uq->uq_inherited_pri; 2548 umtxq_busy_unlocked(&uq->uq_key); 2549 2550 rv = fueword32(&m->m_ceilings[0], &ceiling); 2551 if (rv == -1) { 2552 error = EFAULT; 2553 goto out; 2554 } 2555 ceiling = RTP_PRIO_MAX - ceiling; 2556 if (ceiling > RTP_PRIO_MAX) { 2557 error = EINVAL; 2558 goto out; 2559 } 2560 new_pri = PRI_MIN_REALTIME + ceiling; 2561 2562 if (td->td_base_user_pri < new_pri) { 2563 error = EINVAL; 2564 goto out; 2565 } 2566 if (su) { 2567 mtx_lock(&umtx_lock); 2568 if (new_pri < uq->uq_inherited_pri) { 2569 uq->uq_inherited_pri = new_pri; 2570 thread_lock(td); 2571 if (new_pri < UPRI(td)) 2572 sched_lend_user_prio(td, new_pri); 2573 thread_unlock(td); 2574 } 2575 mtx_unlock(&umtx_lock); 2576 } 2577 2578 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2579 id | UMUTEX_CONTESTED); 2580 /* The address was invalid. */ 2581 if (rv == -1) { 2582 error = EFAULT; 2583 break; 2584 } 2585 if (rv == 0) { 2586 MPASS(owner == UMUTEX_CONTESTED); 2587 error = 0; 2588 break; 2589 } 2590 /* rv == 1 */ 2591 if (owner == UMUTEX_RB_OWNERDEAD) { 2592 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2593 &owner, id | UMUTEX_CONTESTED); 2594 if (rv == -1) { 2595 error = EFAULT; 2596 break; 2597 } 2598 if (rv == 0) { 2599 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2600 error = EOWNERDEAD; /* success */ 2601 break; 2602 } 2603 2604 /* 2605 * rv == 1, only check for suspension if we 2606 * did not already catched a signal. If we 2607 * get an error from the check, the same 2608 * condition is checked by the umtxq_sleep() 2609 * call below, so we should obliterate the 2610 * error to not skip the last loop iteration. 2611 */ 2612 if (error == 0) { 2613 error = thread_check_susp(td, false); 2614 if (error == 0 && try == 0) { 2615 umtxq_unbusy_unlocked(&uq->uq_key); 2616 continue; 2617 } 2618 error = 0; 2619 } 2620 } else if (owner == UMUTEX_RB_NOTRECOV) { 2621 error = ENOTRECOVERABLE; 2622 } else if (owner == UMUTEX_CONTESTED) { 2623 /* Spurious failure, retry. */ 2624 umtxq_unbusy_unlocked(&uq->uq_key); 2625 continue; 2626 } 2627 2628 if (try != 0) 2629 error = EBUSY; 2630 2631 /* 2632 * If we caught a signal, we have retried and now 2633 * exit immediately. 2634 */ 2635 if (error != 0) 2636 break; 2637 2638 umtxq_lock(&uq->uq_key); 2639 umtxq_insert(uq); 2640 umtxq_unbusy(&uq->uq_key); 2641 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2642 NULL : &timo); 2643 umtxq_remove(uq); 2644 umtxq_unlock(&uq->uq_key); 2645 2646 mtx_lock(&umtx_lock); 2647 uq->uq_inherited_pri = old_inherited_pri; 2648 pri = PRI_MAX; 2649 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2650 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2651 if (uq2 != NULL) { 2652 if (pri > UPRI(uq2->uq_thread)) 2653 pri = UPRI(uq2->uq_thread); 2654 } 2655 } 2656 if (pri > uq->uq_inherited_pri) 2657 pri = uq->uq_inherited_pri; 2658 thread_lock(td); 2659 sched_lend_user_prio(td, pri); 2660 thread_unlock(td); 2661 mtx_unlock(&umtx_lock); 2662 } 2663 2664 if (error != 0 && error != EOWNERDEAD) { 2665 mtx_lock(&umtx_lock); 2666 uq->uq_inherited_pri = old_inherited_pri; 2667 pri = PRI_MAX; 2668 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2669 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2670 if (uq2 != NULL) { 2671 if (pri > UPRI(uq2->uq_thread)) 2672 pri = UPRI(uq2->uq_thread); 2673 } 2674 } 2675 if (pri > uq->uq_inherited_pri) 2676 pri = uq->uq_inherited_pri; 2677 thread_lock(td); 2678 sched_lend_user_prio(td, pri); 2679 thread_unlock(td); 2680 mtx_unlock(&umtx_lock); 2681 } 2682 2683 out: 2684 umtxq_unbusy_unlocked(&uq->uq_key); 2685 umtx_key_release(&uq->uq_key); 2686 return (error); 2687 } 2688 2689 /* 2690 * Unlock a PP mutex. 2691 */ 2692 static int 2693 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2694 { 2695 struct umtx_key key; 2696 struct umtx_q *uq, *uq2; 2697 struct umtx_pi *pi; 2698 uint32_t id, owner, rceiling; 2699 int error, pri, new_inherited_pri; 2700 bool su; 2701 2702 id = td->td_tid; 2703 uq = td->td_umtxq; 2704 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2705 2706 /* 2707 * Make sure we own this mtx. 2708 */ 2709 error = fueword32(&m->m_owner, &owner); 2710 if (error == -1) 2711 return (EFAULT); 2712 2713 if ((owner & ~UMUTEX_CONTESTED) != id) 2714 return (EPERM); 2715 2716 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2717 if (error != 0) 2718 return (error); 2719 2720 if (rceiling == -1) 2721 new_inherited_pri = PRI_MAX; 2722 else { 2723 rceiling = RTP_PRIO_MAX - rceiling; 2724 if (rceiling > RTP_PRIO_MAX) 2725 return (EINVAL); 2726 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2727 } 2728 2729 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2730 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2731 &key)) != 0) 2732 return (error); 2733 umtxq_busy_unlocked(&key); 2734 2735 /* 2736 * For priority protected mutex, always set unlocked state 2737 * to UMUTEX_CONTESTED, so that userland always enters kernel 2738 * to lock the mutex, it is necessary because thread priority 2739 * has to be adjusted for such mutex. 2740 */ 2741 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2742 UMUTEX_CONTESTED); 2743 2744 umtxq_lock(&key); 2745 if (error == 0) 2746 umtxq_signal(&key, 1); 2747 umtxq_unbusy(&key); 2748 umtxq_unlock(&key); 2749 2750 if (error == -1) 2751 error = EFAULT; 2752 else { 2753 mtx_lock(&umtx_lock); 2754 if (su || new_inherited_pri == PRI_MAX) 2755 uq->uq_inherited_pri = new_inherited_pri; 2756 pri = PRI_MAX; 2757 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2758 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2759 if (uq2 != NULL) { 2760 if (pri > UPRI(uq2->uq_thread)) 2761 pri = UPRI(uq2->uq_thread); 2762 } 2763 } 2764 if (pri > uq->uq_inherited_pri) 2765 pri = uq->uq_inherited_pri; 2766 thread_lock(td); 2767 sched_lend_user_prio(td, pri); 2768 thread_unlock(td); 2769 mtx_unlock(&umtx_lock); 2770 } 2771 umtx_key_release(&key); 2772 return (error); 2773 } 2774 2775 static int 2776 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2777 uint32_t *old_ceiling) 2778 { 2779 struct umtx_q *uq; 2780 uint32_t flags, id, owner, save_ceiling; 2781 int error, rv, rv1; 2782 2783 error = fueword32(&m->m_flags, &flags); 2784 if (error == -1) 2785 return (EFAULT); 2786 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2787 return (EINVAL); 2788 if (ceiling > RTP_PRIO_MAX) 2789 return (EINVAL); 2790 id = td->td_tid; 2791 uq = td->td_umtxq; 2792 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2793 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2794 &uq->uq_key)) != 0) 2795 return (error); 2796 for (;;) { 2797 umtxq_busy_unlocked(&uq->uq_key); 2798 2799 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2800 if (rv == -1) { 2801 error = EFAULT; 2802 break; 2803 } 2804 2805 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2806 id | UMUTEX_CONTESTED); 2807 if (rv == -1) { 2808 error = EFAULT; 2809 break; 2810 } 2811 2812 if (rv == 0) { 2813 MPASS(owner == UMUTEX_CONTESTED); 2814 rv = suword32(&m->m_ceilings[0], ceiling); 2815 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2816 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2817 break; 2818 } 2819 2820 if ((owner & ~UMUTEX_CONTESTED) == id) { 2821 rv = suword32(&m->m_ceilings[0], ceiling); 2822 error = rv == 0 ? 0 : EFAULT; 2823 break; 2824 } 2825 2826 if (owner == UMUTEX_RB_OWNERDEAD) { 2827 error = EOWNERDEAD; 2828 break; 2829 } else if (owner == UMUTEX_RB_NOTRECOV) { 2830 error = ENOTRECOVERABLE; 2831 break; 2832 } else if (owner == UMUTEX_CONTESTED) { 2833 /* Spurious failure, retry. */ 2834 umtxq_unbusy_unlocked(&uq->uq_key); 2835 continue; 2836 } 2837 2838 /* 2839 * If we caught a signal, we have retried and now 2840 * exit immediately. 2841 */ 2842 if (error != 0) 2843 break; 2844 2845 /* 2846 * We set the contested bit, sleep. Otherwise the lock changed 2847 * and we need to retry or we lost a race to the thread 2848 * unlocking the umtx. 2849 */ 2850 umtxq_lock(&uq->uq_key); 2851 umtxq_insert(uq); 2852 umtxq_unbusy(&uq->uq_key); 2853 error = umtxq_sleep(uq, "umtxpp", NULL); 2854 umtxq_remove(uq); 2855 umtxq_unlock(&uq->uq_key); 2856 } 2857 umtxq_lock(&uq->uq_key); 2858 if (error == 0) 2859 umtxq_signal(&uq->uq_key, INT_MAX); 2860 umtxq_unbusy(&uq->uq_key); 2861 umtxq_unlock(&uq->uq_key); 2862 umtx_key_release(&uq->uq_key); 2863 if (error == 0 && old_ceiling != NULL) { 2864 rv = suword32(old_ceiling, save_ceiling); 2865 error = rv == 0 ? 0 : EFAULT; 2866 } 2867 return (error); 2868 } 2869 2870 /* 2871 * Lock a userland POSIX mutex. 2872 */ 2873 static int 2874 do_lock_umutex(struct thread *td, struct umutex *m, 2875 struct _umtx_time *timeout, int mode) 2876 { 2877 uint32_t flags; 2878 int error; 2879 2880 error = fueword32(&m->m_flags, &flags); 2881 if (error == -1) 2882 return (EFAULT); 2883 2884 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2885 case 0: 2886 error = do_lock_normal(td, m, flags, timeout, mode); 2887 break; 2888 case UMUTEX_PRIO_INHERIT: 2889 error = do_lock_pi(td, m, flags, timeout, mode); 2890 break; 2891 case UMUTEX_PRIO_PROTECT: 2892 error = do_lock_pp(td, m, flags, timeout, mode); 2893 break; 2894 default: 2895 return (EINVAL); 2896 } 2897 if (timeout == NULL) { 2898 if (error == EINTR && mode != _UMUTEX_WAIT) 2899 error = ERESTART; 2900 } else { 2901 /* Timed-locking is not restarted. */ 2902 if (error == ERESTART) 2903 error = EINTR; 2904 } 2905 return (error); 2906 } 2907 2908 /* 2909 * Unlock a userland POSIX mutex. 2910 */ 2911 static int 2912 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2913 { 2914 uint32_t flags; 2915 int error; 2916 2917 error = fueword32(&m->m_flags, &flags); 2918 if (error == -1) 2919 return (EFAULT); 2920 2921 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2922 case 0: 2923 return (do_unlock_normal(td, m, flags, rb)); 2924 case UMUTEX_PRIO_INHERIT: 2925 return (do_unlock_pi(td, m, flags, rb)); 2926 case UMUTEX_PRIO_PROTECT: 2927 return (do_unlock_pp(td, m, flags, rb)); 2928 } 2929 2930 return (EINVAL); 2931 } 2932 2933 static int 2934 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2935 struct timespec *timeout, u_long wflags) 2936 { 2937 struct umtx_abs_timeout timo; 2938 struct umtx_q *uq; 2939 uint32_t flags, clockid, hasw; 2940 int error; 2941 2942 uq = td->td_umtxq; 2943 error = fueword32(&cv->c_flags, &flags); 2944 if (error == -1) 2945 return (EFAULT); 2946 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2947 if (error != 0) 2948 return (error); 2949 2950 if ((wflags & CVWAIT_CLOCKID) != 0) { 2951 error = fueword32(&cv->c_clockid, &clockid); 2952 if (error == -1) { 2953 umtx_key_release(&uq->uq_key); 2954 return (EFAULT); 2955 } 2956 if (clockid < CLOCK_REALTIME || 2957 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2958 /* hmm, only HW clock id will work. */ 2959 umtx_key_release(&uq->uq_key); 2960 return (EINVAL); 2961 } 2962 } else { 2963 clockid = CLOCK_REALTIME; 2964 } 2965 2966 umtxq_lock(&uq->uq_key); 2967 umtxq_busy(&uq->uq_key); 2968 umtxq_insert(uq); 2969 umtxq_unlock(&uq->uq_key); 2970 2971 /* 2972 * Set c_has_waiters to 1 before releasing user mutex, also 2973 * don't modify cache line when unnecessary. 2974 */ 2975 error = fueword32(&cv->c_has_waiters, &hasw); 2976 if (error == 0 && hasw == 0) 2977 error = suword32(&cv->c_has_waiters, 1); 2978 if (error != 0) { 2979 umtxq_lock(&uq->uq_key); 2980 umtxq_remove(uq); 2981 umtxq_unbusy(&uq->uq_key); 2982 error = EFAULT; 2983 goto out; 2984 } 2985 2986 umtxq_unbusy_unlocked(&uq->uq_key); 2987 2988 error = do_unlock_umutex(td, m, false); 2989 2990 if (timeout != NULL) 2991 umtx_abs_timeout_init(&timo, clockid, 2992 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2993 2994 umtxq_lock(&uq->uq_key); 2995 if (error == 0) { 2996 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2997 NULL : &timo); 2998 } 2999 3000 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3001 error = 0; 3002 else { 3003 /* 3004 * This must be timeout,interrupted by signal or 3005 * surprious wakeup, clear c_has_waiter flag when 3006 * necessary. 3007 */ 3008 umtxq_busy(&uq->uq_key); 3009 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 3010 int oldlen = uq->uq_cur_queue->length; 3011 umtxq_remove(uq); 3012 if (oldlen == 1) { 3013 umtxq_unlock(&uq->uq_key); 3014 if (suword32(&cv->c_has_waiters, 0) != 0 && 3015 error == 0) 3016 error = EFAULT; 3017 umtxq_lock(&uq->uq_key); 3018 } 3019 } 3020 umtxq_unbusy(&uq->uq_key); 3021 if (error == ERESTART) 3022 error = EINTR; 3023 } 3024 out: 3025 umtxq_unlock(&uq->uq_key); 3026 umtx_key_release(&uq->uq_key); 3027 return (error); 3028 } 3029 3030 /* 3031 * Signal a userland condition variable. 3032 */ 3033 static int 3034 do_cv_signal(struct thread *td, struct ucond *cv) 3035 { 3036 struct umtx_key key; 3037 int error, cnt, nwake; 3038 uint32_t flags; 3039 3040 error = fueword32(&cv->c_flags, &flags); 3041 if (error == -1) 3042 return (EFAULT); 3043 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3044 return (error); 3045 umtxq_lock(&key); 3046 umtxq_busy(&key); 3047 cnt = umtxq_count(&key); 3048 nwake = umtxq_signal(&key, 1); 3049 if (cnt <= nwake) { 3050 umtxq_unlock(&key); 3051 error = suword32(&cv->c_has_waiters, 0); 3052 if (error == -1) 3053 error = EFAULT; 3054 umtxq_lock(&key); 3055 } 3056 umtxq_unbusy(&key); 3057 umtxq_unlock(&key); 3058 umtx_key_release(&key); 3059 return (error); 3060 } 3061 3062 static int 3063 do_cv_broadcast(struct thread *td, struct ucond *cv) 3064 { 3065 struct umtx_key key; 3066 int error; 3067 uint32_t flags; 3068 3069 error = fueword32(&cv->c_flags, &flags); 3070 if (error == -1) 3071 return (EFAULT); 3072 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3073 return (error); 3074 3075 umtxq_lock(&key); 3076 umtxq_busy(&key); 3077 umtxq_signal(&key, INT_MAX); 3078 umtxq_unlock(&key); 3079 3080 error = suword32(&cv->c_has_waiters, 0); 3081 if (error == -1) 3082 error = EFAULT; 3083 3084 umtxq_unbusy_unlocked(&key); 3085 3086 umtx_key_release(&key); 3087 return (error); 3088 } 3089 3090 static int 3091 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3092 struct _umtx_time *timeout) 3093 { 3094 struct umtx_abs_timeout timo; 3095 struct umtx_q *uq; 3096 uint32_t flags, wrflags; 3097 int32_t state, oldstate; 3098 int32_t blocked_readers; 3099 int error, error1, rv; 3100 3101 uq = td->td_umtxq; 3102 error = fueword32(&rwlock->rw_flags, &flags); 3103 if (error == -1) 3104 return (EFAULT); 3105 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3106 if (error != 0) 3107 return (error); 3108 3109 if (timeout != NULL) 3110 umtx_abs_timeout_init2(&timo, timeout); 3111 3112 wrflags = URWLOCK_WRITE_OWNER; 3113 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3114 wrflags |= URWLOCK_WRITE_WAITERS; 3115 3116 for (;;) { 3117 rv = fueword32(&rwlock->rw_state, &state); 3118 if (rv == -1) { 3119 umtx_key_release(&uq->uq_key); 3120 return (EFAULT); 3121 } 3122 3123 /* try to lock it */ 3124 while (!(state & wrflags)) { 3125 if (__predict_false(URWLOCK_READER_COUNT(state) == 3126 URWLOCK_MAX_READERS)) { 3127 umtx_key_release(&uq->uq_key); 3128 return (EAGAIN); 3129 } 3130 rv = casueword32(&rwlock->rw_state, state, 3131 &oldstate, state + 1); 3132 if (rv == -1) { 3133 umtx_key_release(&uq->uq_key); 3134 return (EFAULT); 3135 } 3136 if (rv == 0) { 3137 MPASS(oldstate == state); 3138 umtx_key_release(&uq->uq_key); 3139 return (0); 3140 } 3141 error = thread_check_susp(td, true); 3142 if (error != 0) 3143 break; 3144 state = oldstate; 3145 } 3146 3147 if (error) 3148 break; 3149 3150 /* grab monitor lock */ 3151 umtxq_busy_unlocked(&uq->uq_key); 3152 3153 /* 3154 * re-read the state, in case it changed between the try-lock above 3155 * and the check below 3156 */ 3157 rv = fueword32(&rwlock->rw_state, &state); 3158 if (rv == -1) 3159 error = EFAULT; 3160 3161 /* set read contention bit */ 3162 while (error == 0 && (state & wrflags) && 3163 !(state & URWLOCK_READ_WAITERS)) { 3164 rv = casueword32(&rwlock->rw_state, state, 3165 &oldstate, state | URWLOCK_READ_WAITERS); 3166 if (rv == -1) { 3167 error = EFAULT; 3168 break; 3169 } 3170 if (rv == 0) { 3171 MPASS(oldstate == state); 3172 goto sleep; 3173 } 3174 state = oldstate; 3175 error = thread_check_susp(td, false); 3176 if (error != 0) 3177 break; 3178 } 3179 if (error != 0) { 3180 umtxq_unbusy_unlocked(&uq->uq_key); 3181 break; 3182 } 3183 3184 /* state is changed while setting flags, restart */ 3185 if (!(state & wrflags)) { 3186 umtxq_unbusy_unlocked(&uq->uq_key); 3187 error = thread_check_susp(td, true); 3188 if (error != 0) 3189 break; 3190 continue; 3191 } 3192 3193 sleep: 3194 /* 3195 * Contention bit is set, before sleeping, increase 3196 * read waiter count. 3197 */ 3198 rv = fueword32(&rwlock->rw_blocked_readers, 3199 &blocked_readers); 3200 if (rv == 0) 3201 rv = suword32(&rwlock->rw_blocked_readers, 3202 blocked_readers + 1); 3203 if (rv == -1) { 3204 umtxq_unbusy_unlocked(&uq->uq_key); 3205 error = EFAULT; 3206 break; 3207 } 3208 3209 while (state & wrflags) { 3210 umtxq_lock(&uq->uq_key); 3211 umtxq_insert(uq); 3212 umtxq_unbusy(&uq->uq_key); 3213 3214 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3215 NULL : &timo); 3216 3217 umtxq_busy(&uq->uq_key); 3218 umtxq_remove(uq); 3219 umtxq_unlock(&uq->uq_key); 3220 if (error) 3221 break; 3222 rv = fueword32(&rwlock->rw_state, &state); 3223 if (rv == -1) { 3224 error = EFAULT; 3225 break; 3226 } 3227 } 3228 3229 /* decrease read waiter count, and may clear read contention bit */ 3230 rv = fueword32(&rwlock->rw_blocked_readers, 3231 &blocked_readers); 3232 if (rv == 0) 3233 rv = suword32(&rwlock->rw_blocked_readers, 3234 blocked_readers - 1); 3235 if (rv == -1) { 3236 umtxq_unbusy_unlocked(&uq->uq_key); 3237 error = EFAULT; 3238 break; 3239 } 3240 if (blocked_readers == 1) { 3241 rv = fueword32(&rwlock->rw_state, &state); 3242 if (rv == -1) { 3243 umtxq_unbusy_unlocked(&uq->uq_key); 3244 error = EFAULT; 3245 break; 3246 } 3247 for (;;) { 3248 rv = casueword32(&rwlock->rw_state, state, 3249 &oldstate, state & ~URWLOCK_READ_WAITERS); 3250 if (rv == -1) { 3251 error = EFAULT; 3252 break; 3253 } 3254 if (rv == 0) { 3255 MPASS(oldstate == state); 3256 break; 3257 } 3258 state = oldstate; 3259 error1 = thread_check_susp(td, false); 3260 if (error1 != 0) { 3261 if (error == 0) 3262 error = error1; 3263 break; 3264 } 3265 } 3266 } 3267 3268 umtxq_unbusy_unlocked(&uq->uq_key); 3269 if (error != 0) 3270 break; 3271 } 3272 umtx_key_release(&uq->uq_key); 3273 if (error == ERESTART) 3274 error = EINTR; 3275 return (error); 3276 } 3277 3278 static int 3279 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3280 { 3281 struct umtx_abs_timeout timo; 3282 struct umtx_q *uq; 3283 uint32_t flags; 3284 int32_t state, oldstate; 3285 int32_t blocked_writers; 3286 int32_t blocked_readers; 3287 int error, error1, rv; 3288 3289 uq = td->td_umtxq; 3290 error = fueword32(&rwlock->rw_flags, &flags); 3291 if (error == -1) 3292 return (EFAULT); 3293 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3294 if (error != 0) 3295 return (error); 3296 3297 if (timeout != NULL) 3298 umtx_abs_timeout_init2(&timo, timeout); 3299 3300 blocked_readers = 0; 3301 for (;;) { 3302 rv = fueword32(&rwlock->rw_state, &state); 3303 if (rv == -1) { 3304 umtx_key_release(&uq->uq_key); 3305 return (EFAULT); 3306 } 3307 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3308 URWLOCK_READER_COUNT(state) == 0) { 3309 rv = casueword32(&rwlock->rw_state, state, 3310 &oldstate, state | URWLOCK_WRITE_OWNER); 3311 if (rv == -1) { 3312 umtx_key_release(&uq->uq_key); 3313 return (EFAULT); 3314 } 3315 if (rv == 0) { 3316 MPASS(oldstate == state); 3317 umtx_key_release(&uq->uq_key); 3318 return (0); 3319 } 3320 state = oldstate; 3321 error = thread_check_susp(td, true); 3322 if (error != 0) 3323 break; 3324 } 3325 3326 if (error) { 3327 if ((state & (URWLOCK_WRITE_OWNER | 3328 URWLOCK_WRITE_WAITERS)) == 0 && 3329 blocked_readers != 0) { 3330 umtxq_lock(&uq->uq_key); 3331 umtxq_busy(&uq->uq_key); 3332 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3333 UMTX_SHARED_QUEUE); 3334 umtxq_unbusy(&uq->uq_key); 3335 umtxq_unlock(&uq->uq_key); 3336 } 3337 3338 break; 3339 } 3340 3341 /* grab monitor lock */ 3342 umtxq_busy_unlocked(&uq->uq_key); 3343 3344 /* 3345 * Re-read the state, in case it changed between the 3346 * try-lock above and the check below. 3347 */ 3348 rv = fueword32(&rwlock->rw_state, &state); 3349 if (rv == -1) 3350 error = EFAULT; 3351 3352 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3353 URWLOCK_READER_COUNT(state) != 0) && 3354 (state & URWLOCK_WRITE_WAITERS) == 0) { 3355 rv = casueword32(&rwlock->rw_state, state, 3356 &oldstate, state | URWLOCK_WRITE_WAITERS); 3357 if (rv == -1) { 3358 error = EFAULT; 3359 break; 3360 } 3361 if (rv == 0) { 3362 MPASS(oldstate == state); 3363 goto sleep; 3364 } 3365 state = oldstate; 3366 error = thread_check_susp(td, false); 3367 if (error != 0) 3368 break; 3369 } 3370 if (error != 0) { 3371 umtxq_unbusy_unlocked(&uq->uq_key); 3372 break; 3373 } 3374 3375 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3376 URWLOCK_READER_COUNT(state) == 0) { 3377 umtxq_unbusy_unlocked(&uq->uq_key); 3378 error = thread_check_susp(td, false); 3379 if (error != 0) 3380 break; 3381 continue; 3382 } 3383 sleep: 3384 rv = fueword32(&rwlock->rw_blocked_writers, 3385 &blocked_writers); 3386 if (rv == 0) 3387 rv = suword32(&rwlock->rw_blocked_writers, 3388 blocked_writers + 1); 3389 if (rv == -1) { 3390 umtxq_unbusy_unlocked(&uq->uq_key); 3391 error = EFAULT; 3392 break; 3393 } 3394 3395 while ((state & URWLOCK_WRITE_OWNER) || 3396 URWLOCK_READER_COUNT(state) != 0) { 3397 umtxq_lock(&uq->uq_key); 3398 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3399 umtxq_unbusy(&uq->uq_key); 3400 3401 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3402 NULL : &timo); 3403 3404 umtxq_busy(&uq->uq_key); 3405 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3406 umtxq_unlock(&uq->uq_key); 3407 if (error) 3408 break; 3409 rv = fueword32(&rwlock->rw_state, &state); 3410 if (rv == -1) { 3411 error = EFAULT; 3412 break; 3413 } 3414 } 3415 3416 rv = fueword32(&rwlock->rw_blocked_writers, 3417 &blocked_writers); 3418 if (rv == 0) 3419 rv = suword32(&rwlock->rw_blocked_writers, 3420 blocked_writers - 1); 3421 if (rv == -1) { 3422 umtxq_unbusy_unlocked(&uq->uq_key); 3423 error = EFAULT; 3424 break; 3425 } 3426 if (blocked_writers == 1) { 3427 rv = fueword32(&rwlock->rw_state, &state); 3428 if (rv == -1) { 3429 umtxq_unbusy_unlocked(&uq->uq_key); 3430 error = EFAULT; 3431 break; 3432 } 3433 for (;;) { 3434 rv = casueword32(&rwlock->rw_state, state, 3435 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3436 if (rv == -1) { 3437 error = EFAULT; 3438 break; 3439 } 3440 if (rv == 0) { 3441 MPASS(oldstate == state); 3442 break; 3443 } 3444 state = oldstate; 3445 error1 = thread_check_susp(td, false); 3446 /* 3447 * We are leaving the URWLOCK_WRITE_WAITERS 3448 * behind, but this should not harm the 3449 * correctness. 3450 */ 3451 if (error1 != 0) { 3452 if (error == 0) 3453 error = error1; 3454 break; 3455 } 3456 } 3457 rv = fueword32(&rwlock->rw_blocked_readers, 3458 &blocked_readers); 3459 if (rv == -1) { 3460 umtxq_unbusy_unlocked(&uq->uq_key); 3461 error = EFAULT; 3462 break; 3463 } 3464 } else 3465 blocked_readers = 0; 3466 3467 umtxq_unbusy_unlocked(&uq->uq_key); 3468 } 3469 3470 umtx_key_release(&uq->uq_key); 3471 if (error == ERESTART) 3472 error = EINTR; 3473 return (error); 3474 } 3475 3476 static int 3477 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3478 { 3479 struct umtx_q *uq; 3480 uint32_t flags; 3481 int32_t state, oldstate; 3482 int error, rv, q, count; 3483 3484 uq = td->td_umtxq; 3485 error = fueword32(&rwlock->rw_flags, &flags); 3486 if (error == -1) 3487 return (EFAULT); 3488 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3489 if (error != 0) 3490 return (error); 3491 3492 error = fueword32(&rwlock->rw_state, &state); 3493 if (error == -1) { 3494 error = EFAULT; 3495 goto out; 3496 } 3497 if (state & URWLOCK_WRITE_OWNER) { 3498 for (;;) { 3499 rv = casueword32(&rwlock->rw_state, state, 3500 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3501 if (rv == -1) { 3502 error = EFAULT; 3503 goto out; 3504 } 3505 if (rv == 1) { 3506 state = oldstate; 3507 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3508 error = EPERM; 3509 goto out; 3510 } 3511 error = thread_check_susp(td, true); 3512 if (error != 0) 3513 goto out; 3514 } else 3515 break; 3516 } 3517 } else if (URWLOCK_READER_COUNT(state) != 0) { 3518 for (;;) { 3519 rv = casueword32(&rwlock->rw_state, state, 3520 &oldstate, state - 1); 3521 if (rv == -1) { 3522 error = EFAULT; 3523 goto out; 3524 } 3525 if (rv == 1) { 3526 state = oldstate; 3527 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3528 error = EPERM; 3529 goto out; 3530 } 3531 error = thread_check_susp(td, true); 3532 if (error != 0) 3533 goto out; 3534 } else 3535 break; 3536 } 3537 } else { 3538 error = EPERM; 3539 goto out; 3540 } 3541 3542 count = 0; 3543 3544 if (!(flags & URWLOCK_PREFER_READER)) { 3545 if (state & URWLOCK_WRITE_WAITERS) { 3546 count = 1; 3547 q = UMTX_EXCLUSIVE_QUEUE; 3548 } else if (state & URWLOCK_READ_WAITERS) { 3549 count = INT_MAX; 3550 q = UMTX_SHARED_QUEUE; 3551 } 3552 } else { 3553 if (state & URWLOCK_READ_WAITERS) { 3554 count = INT_MAX; 3555 q = UMTX_SHARED_QUEUE; 3556 } else if (state & URWLOCK_WRITE_WAITERS) { 3557 count = 1; 3558 q = UMTX_EXCLUSIVE_QUEUE; 3559 } 3560 } 3561 3562 if (count) { 3563 umtxq_lock(&uq->uq_key); 3564 umtxq_busy(&uq->uq_key); 3565 umtxq_signal_queue(&uq->uq_key, count, q); 3566 umtxq_unbusy(&uq->uq_key); 3567 umtxq_unlock(&uq->uq_key); 3568 } 3569 out: 3570 umtx_key_release(&uq->uq_key); 3571 return (error); 3572 } 3573 3574 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3575 static int 3576 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3577 { 3578 struct umtx_abs_timeout timo; 3579 struct umtx_q *uq; 3580 uint32_t flags, count, count1; 3581 int error, rv, rv1; 3582 3583 uq = td->td_umtxq; 3584 error = fueword32(&sem->_flags, &flags); 3585 if (error == -1) 3586 return (EFAULT); 3587 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3588 if (error != 0) 3589 return (error); 3590 3591 if (timeout != NULL) 3592 umtx_abs_timeout_init2(&timo, timeout); 3593 3594 again: 3595 umtxq_lock(&uq->uq_key); 3596 umtxq_busy(&uq->uq_key); 3597 umtxq_insert(uq); 3598 umtxq_unlock(&uq->uq_key); 3599 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3600 if (rv != -1) 3601 rv1 = fueword32(&sem->_count, &count); 3602 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3603 if (rv == 0) 3604 rv = suword32(&sem->_has_waiters, 0); 3605 umtxq_lock(&uq->uq_key); 3606 umtxq_unbusy(&uq->uq_key); 3607 umtxq_remove(uq); 3608 umtxq_unlock(&uq->uq_key); 3609 if (rv == -1 || rv1 == -1) { 3610 error = EFAULT; 3611 goto out; 3612 } 3613 if (count != 0) { 3614 error = 0; 3615 goto out; 3616 } 3617 MPASS(rv == 1 && count1 == 0); 3618 rv = thread_check_susp(td, true); 3619 if (rv == 0) 3620 goto again; 3621 error = rv; 3622 goto out; 3623 } 3624 umtxq_lock(&uq->uq_key); 3625 umtxq_unbusy(&uq->uq_key); 3626 3627 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3628 3629 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3630 error = 0; 3631 else { 3632 umtxq_remove(uq); 3633 /* A relative timeout cannot be restarted. */ 3634 if (error == ERESTART && timeout != NULL && 3635 (timeout->_flags & UMTX_ABSTIME) == 0) 3636 error = EINTR; 3637 } 3638 umtxq_unlock(&uq->uq_key); 3639 out: 3640 umtx_key_release(&uq->uq_key); 3641 return (error); 3642 } 3643 3644 /* 3645 * Signal a userland semaphore. 3646 */ 3647 static int 3648 do_sem_wake(struct thread *td, struct _usem *sem) 3649 { 3650 struct umtx_key key; 3651 int error, cnt; 3652 uint32_t flags; 3653 3654 error = fueword32(&sem->_flags, &flags); 3655 if (error == -1) 3656 return (EFAULT); 3657 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3658 return (error); 3659 umtxq_lock(&key); 3660 umtxq_busy(&key); 3661 cnt = umtxq_count(&key); 3662 if (cnt > 0) { 3663 /* 3664 * Check if count is greater than 0, this means the memory is 3665 * still being referenced by user code, so we can safely 3666 * update _has_waiters flag. 3667 */ 3668 if (cnt == 1) { 3669 umtxq_unlock(&key); 3670 error = suword32(&sem->_has_waiters, 0); 3671 umtxq_lock(&key); 3672 if (error == -1) 3673 error = EFAULT; 3674 } 3675 umtxq_signal(&key, 1); 3676 } 3677 umtxq_unbusy(&key); 3678 umtxq_unlock(&key); 3679 umtx_key_release(&key); 3680 return (error); 3681 } 3682 #endif 3683 3684 static int 3685 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3686 { 3687 struct umtx_abs_timeout timo; 3688 struct umtx_q *uq; 3689 uint32_t count, flags; 3690 int error, rv; 3691 3692 uq = td->td_umtxq; 3693 flags = fuword32(&sem->_flags); 3694 if (timeout != NULL) 3695 umtx_abs_timeout_init2(&timo, timeout); 3696 3697 again: 3698 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3699 if (error != 0) 3700 return (error); 3701 umtxq_lock(&uq->uq_key); 3702 umtxq_busy(&uq->uq_key); 3703 umtxq_insert(uq); 3704 umtxq_unlock(&uq->uq_key); 3705 rv = fueword32(&sem->_count, &count); 3706 if (rv == -1) { 3707 umtxq_lock(&uq->uq_key); 3708 umtxq_unbusy(&uq->uq_key); 3709 umtxq_remove(uq); 3710 umtxq_unlock(&uq->uq_key); 3711 umtx_key_release(&uq->uq_key); 3712 return (EFAULT); 3713 } 3714 for (;;) { 3715 if (USEM_COUNT(count) != 0) { 3716 umtxq_lock(&uq->uq_key); 3717 umtxq_unbusy(&uq->uq_key); 3718 umtxq_remove(uq); 3719 umtxq_unlock(&uq->uq_key); 3720 umtx_key_release(&uq->uq_key); 3721 return (0); 3722 } 3723 if (count == USEM_HAS_WAITERS) 3724 break; 3725 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3726 if (rv == 0) 3727 break; 3728 umtxq_lock(&uq->uq_key); 3729 umtxq_unbusy(&uq->uq_key); 3730 umtxq_remove(uq); 3731 umtxq_unlock(&uq->uq_key); 3732 umtx_key_release(&uq->uq_key); 3733 if (rv == -1) 3734 return (EFAULT); 3735 rv = thread_check_susp(td, true); 3736 if (rv != 0) 3737 return (rv); 3738 goto again; 3739 } 3740 umtxq_lock(&uq->uq_key); 3741 umtxq_unbusy(&uq->uq_key); 3742 3743 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3744 3745 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3746 error = 0; 3747 else { 3748 umtxq_remove(uq); 3749 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3750 /* A relative timeout cannot be restarted. */ 3751 if (error == ERESTART) 3752 error = EINTR; 3753 if (error == EINTR) { 3754 kern_clock_gettime(curthread, timo.clockid, 3755 &timo.cur); 3756 timespecsub(&timo.end, &timo.cur, 3757 &timeout->_timeout); 3758 } 3759 } 3760 } 3761 umtxq_unlock(&uq->uq_key); 3762 umtx_key_release(&uq->uq_key); 3763 return (error); 3764 } 3765 3766 /* 3767 * Signal a userland semaphore. 3768 */ 3769 static int 3770 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3771 { 3772 struct umtx_key key; 3773 int error, cnt, rv; 3774 uint32_t count, flags; 3775 3776 rv = fueword32(&sem->_flags, &flags); 3777 if (rv == -1) 3778 return (EFAULT); 3779 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3780 return (error); 3781 umtxq_lock(&key); 3782 umtxq_busy(&key); 3783 cnt = umtxq_count(&key); 3784 if (cnt > 0) { 3785 /* 3786 * If this was the last sleeping thread, clear the waiters 3787 * flag in _count. 3788 */ 3789 if (cnt == 1) { 3790 umtxq_unlock(&key); 3791 rv = fueword32(&sem->_count, &count); 3792 while (rv != -1 && count & USEM_HAS_WAITERS) { 3793 rv = casueword32(&sem->_count, count, &count, 3794 count & ~USEM_HAS_WAITERS); 3795 if (rv == 1) { 3796 rv = thread_check_susp(td, false); 3797 if (rv != 0) 3798 break; 3799 } 3800 } 3801 if (rv == -1) 3802 error = EFAULT; 3803 else if (rv > 0) { 3804 error = rv; 3805 } 3806 umtxq_lock(&key); 3807 } 3808 3809 umtxq_signal(&key, 1); 3810 } 3811 umtxq_unbusy(&key); 3812 umtxq_unlock(&key); 3813 umtx_key_release(&key); 3814 return (error); 3815 } 3816 3817 #ifdef COMPAT_FREEBSD10 3818 int 3819 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3820 { 3821 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3822 } 3823 3824 int 3825 freebsd10__umtx_unlock(struct thread *td, 3826 struct freebsd10__umtx_unlock_args *uap) 3827 { 3828 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3829 } 3830 #endif 3831 3832 inline int 3833 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3834 { 3835 int error; 3836 3837 error = copyin(uaddr, tsp, sizeof(*tsp)); 3838 if (error == 0) { 3839 if (!timespecvalid_interval(tsp)) 3840 error = EINVAL; 3841 } 3842 return (error); 3843 } 3844 3845 static inline int 3846 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3847 { 3848 int error; 3849 3850 if (size <= sizeof(tp->_timeout)) { 3851 tp->_clockid = CLOCK_REALTIME; 3852 tp->_flags = 0; 3853 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3854 } else 3855 error = copyin(uaddr, tp, sizeof(*tp)); 3856 if (error != 0) 3857 return (error); 3858 if (!timespecvalid_interval(&tp->_timeout)) 3859 return (EINVAL); 3860 return (0); 3861 } 3862 3863 static int 3864 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3865 struct umtx_robust_lists_params *rb) 3866 { 3867 3868 if (size > sizeof(*rb)) 3869 return (EINVAL); 3870 return (copyin(uaddr, rb, size)); 3871 } 3872 3873 static int 3874 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3875 { 3876 3877 /* 3878 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3879 * and we're only called if sz >= sizeof(timespec) as supplied in the 3880 * copyops. 3881 */ 3882 KASSERT(sz >= sizeof(*tsp), 3883 ("umtx_copyops specifies incorrect sizes")); 3884 3885 return (copyout(tsp, uaddr, sizeof(*tsp))); 3886 } 3887 3888 #ifdef COMPAT_FREEBSD10 3889 static int 3890 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3891 const struct umtx_copyops *ops) 3892 { 3893 struct timespec *ts, timeout; 3894 int error; 3895 3896 /* Allow a null timespec (wait forever). */ 3897 if (uap->uaddr2 == NULL) 3898 ts = NULL; 3899 else { 3900 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3901 if (error != 0) 3902 return (error); 3903 ts = &timeout; 3904 } 3905 #ifdef COMPAT_FREEBSD32 3906 if (ops->compat32) 3907 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3908 #endif 3909 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3910 } 3911 3912 static int 3913 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3914 const struct umtx_copyops *ops) 3915 { 3916 #ifdef COMPAT_FREEBSD32 3917 if (ops->compat32) 3918 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3919 #endif 3920 return (do_unlock_umtx(td, uap->obj, uap->val)); 3921 } 3922 #endif /* COMPAT_FREEBSD10 */ 3923 3924 #if !defined(COMPAT_FREEBSD10) 3925 static int 3926 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3927 const struct umtx_copyops *ops __unused) 3928 { 3929 return (EOPNOTSUPP); 3930 } 3931 #endif /* COMPAT_FREEBSD10 */ 3932 3933 static int 3934 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3935 const struct umtx_copyops *ops) 3936 { 3937 struct _umtx_time timeout, *tm_p; 3938 int error; 3939 3940 if (uap->uaddr2 == NULL) 3941 tm_p = NULL; 3942 else { 3943 error = ops->copyin_umtx_time( 3944 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3945 if (error != 0) 3946 return (error); 3947 tm_p = &timeout; 3948 } 3949 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3950 } 3951 3952 static int 3953 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3954 const struct umtx_copyops *ops) 3955 { 3956 struct _umtx_time timeout, *tm_p; 3957 int error; 3958 3959 if (uap->uaddr2 == NULL) 3960 tm_p = NULL; 3961 else { 3962 error = ops->copyin_umtx_time( 3963 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3964 if (error != 0) 3965 return (error); 3966 tm_p = &timeout; 3967 } 3968 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3969 } 3970 3971 static int 3972 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3973 const struct umtx_copyops *ops) 3974 { 3975 struct _umtx_time *tm_p, timeout; 3976 int error; 3977 3978 if (uap->uaddr2 == NULL) 3979 tm_p = NULL; 3980 else { 3981 error = ops->copyin_umtx_time( 3982 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3983 if (error != 0) 3984 return (error); 3985 tm_p = &timeout; 3986 } 3987 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3988 } 3989 3990 static int 3991 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3992 const struct umtx_copyops *ops __unused) 3993 { 3994 3995 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3996 } 3997 3998 #define BATCH_SIZE 128 3999 static int 4000 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 4001 { 4002 char *uaddrs[BATCH_SIZE], **upp; 4003 int count, error, i, pos, tocopy; 4004 4005 upp = (char **)uap->obj; 4006 error = 0; 4007 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4008 pos += tocopy) { 4009 tocopy = MIN(count, BATCH_SIZE); 4010 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 4011 if (error != 0) 4012 break; 4013 for (i = 0; i < tocopy; ++i) { 4014 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 4015 } 4016 maybe_yield(); 4017 } 4018 return (error); 4019 } 4020 4021 static int 4022 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4023 { 4024 uint32_t uaddrs[BATCH_SIZE], *upp; 4025 int count, error, i, pos, tocopy; 4026 4027 upp = (uint32_t *)uap->obj; 4028 error = 0; 4029 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4030 pos += tocopy) { 4031 tocopy = MIN(count, BATCH_SIZE); 4032 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4033 if (error != 0) 4034 break; 4035 for (i = 0; i < tocopy; ++i) { 4036 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 4037 INT_MAX, 1); 4038 } 4039 maybe_yield(); 4040 } 4041 return (error); 4042 } 4043 4044 static int 4045 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4046 const struct umtx_copyops *ops) 4047 { 4048 4049 if (ops->compat32) 4050 return (__umtx_op_nwake_private_compat32(td, uap)); 4051 return (__umtx_op_nwake_private_native(td, uap)); 4052 } 4053 4054 static int 4055 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4056 const struct umtx_copyops *ops __unused) 4057 { 4058 4059 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4060 } 4061 4062 static int 4063 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4064 const struct umtx_copyops *ops) 4065 { 4066 struct _umtx_time *tm_p, timeout; 4067 int error; 4068 4069 /* Allow a null timespec (wait forever). */ 4070 if (uap->uaddr2 == NULL) 4071 tm_p = NULL; 4072 else { 4073 error = ops->copyin_umtx_time( 4074 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4075 if (error != 0) 4076 return (error); 4077 tm_p = &timeout; 4078 } 4079 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4080 } 4081 4082 static int 4083 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4084 const struct umtx_copyops *ops __unused) 4085 { 4086 4087 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4088 } 4089 4090 static int 4091 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4092 const struct umtx_copyops *ops) 4093 { 4094 struct _umtx_time *tm_p, timeout; 4095 int error; 4096 4097 /* Allow a null timespec (wait forever). */ 4098 if (uap->uaddr2 == NULL) 4099 tm_p = NULL; 4100 else { 4101 error = ops->copyin_umtx_time( 4102 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4103 if (error != 0) 4104 return (error); 4105 tm_p = &timeout; 4106 } 4107 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4108 } 4109 4110 static int 4111 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4112 const struct umtx_copyops *ops __unused) 4113 { 4114 4115 return (do_wake_umutex(td, uap->obj)); 4116 } 4117 4118 static int 4119 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4120 const struct umtx_copyops *ops __unused) 4121 { 4122 4123 return (do_unlock_umutex(td, uap->obj, false)); 4124 } 4125 4126 static int 4127 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4128 const struct umtx_copyops *ops __unused) 4129 { 4130 4131 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4132 } 4133 4134 static int 4135 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4136 const struct umtx_copyops *ops) 4137 { 4138 struct timespec *ts, timeout; 4139 int error; 4140 4141 /* Allow a null timespec (wait forever). */ 4142 if (uap->uaddr2 == NULL) 4143 ts = NULL; 4144 else { 4145 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4146 if (error != 0) 4147 return (error); 4148 ts = &timeout; 4149 } 4150 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4151 } 4152 4153 static int 4154 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4155 const struct umtx_copyops *ops __unused) 4156 { 4157 4158 return (do_cv_signal(td, uap->obj)); 4159 } 4160 4161 static int 4162 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4163 const struct umtx_copyops *ops __unused) 4164 { 4165 4166 return (do_cv_broadcast(td, uap->obj)); 4167 } 4168 4169 static int 4170 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4171 const struct umtx_copyops *ops) 4172 { 4173 struct _umtx_time timeout; 4174 int error; 4175 4176 /* Allow a null timespec (wait forever). */ 4177 if (uap->uaddr2 == NULL) { 4178 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4179 } else { 4180 error = ops->copyin_umtx_time(uap->uaddr2, 4181 (size_t)uap->uaddr1, &timeout); 4182 if (error != 0) 4183 return (error); 4184 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4185 } 4186 return (error); 4187 } 4188 4189 static int 4190 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4191 const struct umtx_copyops *ops) 4192 { 4193 struct _umtx_time timeout; 4194 int error; 4195 4196 /* Allow a null timespec (wait forever). */ 4197 if (uap->uaddr2 == NULL) { 4198 error = do_rw_wrlock(td, uap->obj, 0); 4199 } else { 4200 error = ops->copyin_umtx_time(uap->uaddr2, 4201 (size_t)uap->uaddr1, &timeout); 4202 if (error != 0) 4203 return (error); 4204 4205 error = do_rw_wrlock(td, uap->obj, &timeout); 4206 } 4207 return (error); 4208 } 4209 4210 static int 4211 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4212 const struct umtx_copyops *ops __unused) 4213 { 4214 4215 return (do_rw_unlock(td, uap->obj)); 4216 } 4217 4218 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4219 static int 4220 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4221 const struct umtx_copyops *ops) 4222 { 4223 struct _umtx_time *tm_p, timeout; 4224 int error; 4225 4226 /* Allow a null timespec (wait forever). */ 4227 if (uap->uaddr2 == NULL) 4228 tm_p = NULL; 4229 else { 4230 error = ops->copyin_umtx_time( 4231 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4232 if (error != 0) 4233 return (error); 4234 tm_p = &timeout; 4235 } 4236 return (do_sem_wait(td, uap->obj, tm_p)); 4237 } 4238 4239 static int 4240 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4241 const struct umtx_copyops *ops __unused) 4242 { 4243 4244 return (do_sem_wake(td, uap->obj)); 4245 } 4246 #endif 4247 4248 static int 4249 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4250 const struct umtx_copyops *ops __unused) 4251 { 4252 4253 return (do_wake2_umutex(td, uap->obj, uap->val)); 4254 } 4255 4256 static int 4257 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4258 const struct umtx_copyops *ops) 4259 { 4260 struct _umtx_time *tm_p, timeout; 4261 size_t uasize; 4262 int error; 4263 4264 /* Allow a null timespec (wait forever). */ 4265 if (uap->uaddr2 == NULL) { 4266 uasize = 0; 4267 tm_p = NULL; 4268 } else { 4269 uasize = (size_t)uap->uaddr1; 4270 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4271 if (error != 0) 4272 return (error); 4273 tm_p = &timeout; 4274 } 4275 error = do_sem2_wait(td, uap->obj, tm_p); 4276 if (error == EINTR && uap->uaddr2 != NULL && 4277 (timeout._flags & UMTX_ABSTIME) == 0 && 4278 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4279 error = ops->copyout_timeout( 4280 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4281 uasize - ops->umtx_time_sz, &timeout._timeout); 4282 if (error == 0) { 4283 error = EINTR; 4284 } 4285 } 4286 4287 return (error); 4288 } 4289 4290 static int 4291 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4292 const struct umtx_copyops *ops __unused) 4293 { 4294 4295 return (do_sem2_wake(td, uap->obj)); 4296 } 4297 4298 #define USHM_OBJ_UMTX(o) \ 4299 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4300 4301 #define USHMF_LINKED 0x0001 4302 struct umtx_shm_reg { 4303 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4304 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4305 struct umtx_key ushm_key; 4306 struct ucred *ushm_cred; 4307 struct shmfd *ushm_obj; 4308 u_int ushm_refcnt; 4309 u_int ushm_flags; 4310 }; 4311 4312 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4313 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4314 4315 static uma_zone_t umtx_shm_reg_zone; 4316 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4317 static struct mtx umtx_shm_lock; 4318 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4319 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4320 4321 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4322 4323 static void 4324 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4325 { 4326 struct umtx_shm_reg_head d; 4327 struct umtx_shm_reg *reg, *reg1; 4328 4329 TAILQ_INIT(&d); 4330 mtx_lock(&umtx_shm_lock); 4331 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4332 mtx_unlock(&umtx_shm_lock); 4333 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4334 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4335 umtx_shm_free_reg(reg); 4336 } 4337 } 4338 4339 static struct task umtx_shm_reg_delfree_task = 4340 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4341 4342 /* 4343 * Returns 0 if a SHM with the passed key is found in the registry, in which 4344 * case it is returned through 'oreg'. Otherwise, returns an error among ESRCH 4345 * (no corresponding SHM; ESRCH was chosen for compatibility, ENOENT would have 4346 * been preferable) or EOVERFLOW (there is a corresponding SHM, but reference 4347 * count would overflow, so can't return it), in which case '*oreg' is left 4348 * unchanged. 4349 */ 4350 static int 4351 umtx_shm_find_reg_locked(const struct umtx_key *key, 4352 struct umtx_shm_reg **const oreg) 4353 { 4354 struct umtx_shm_reg *reg; 4355 struct umtx_shm_reg_head *reg_head; 4356 4357 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4358 mtx_assert(&umtx_shm_lock, MA_OWNED); 4359 reg_head = &umtx_shm_registry[key->hash]; 4360 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4361 KASSERT(reg->ushm_key.shared, 4362 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4363 if (reg->ushm_key.info.shared.object == 4364 key->info.shared.object && 4365 reg->ushm_key.info.shared.offset == 4366 key->info.shared.offset) { 4367 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4368 KASSERT(reg->ushm_refcnt != 0, 4369 ("reg %p refcnt 0 onlist", reg)); 4370 KASSERT((reg->ushm_flags & USHMF_LINKED) != 0, 4371 ("reg %p not linked", reg)); 4372 /* 4373 * Don't let overflow happen, just deny a new reference 4374 * (this is additional protection against some reference 4375 * count leak, which is known not to be the case at the 4376 * time of this writing). 4377 */ 4378 if (__predict_false(reg->ushm_refcnt == UINT_MAX)) 4379 return (EOVERFLOW); 4380 reg->ushm_refcnt++; 4381 *oreg = reg; 4382 return (0); 4383 } 4384 } 4385 return (ESRCH); 4386 } 4387 4388 /* 4389 * Calls umtx_shm_find_reg_unlocked() under the 'umtx_shm_lock'. 4390 */ 4391 static int 4392 umtx_shm_find_reg(const struct umtx_key *key, struct umtx_shm_reg **const oreg) 4393 { 4394 int error; 4395 4396 mtx_lock(&umtx_shm_lock); 4397 error = umtx_shm_find_reg_locked(key, oreg); 4398 mtx_unlock(&umtx_shm_lock); 4399 return (error); 4400 } 4401 4402 static void 4403 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4404 { 4405 4406 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4407 crfree(reg->ushm_cred); 4408 shm_drop(reg->ushm_obj); 4409 uma_zfree(umtx_shm_reg_zone, reg); 4410 } 4411 4412 static bool 4413 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool linked_ref) 4414 { 4415 mtx_assert(&umtx_shm_lock, MA_OWNED); 4416 KASSERT(reg->ushm_refcnt != 0, ("ushm_reg %p refcnt 0", reg)); 4417 4418 if (linked_ref) { 4419 if ((reg->ushm_flags & USHMF_LINKED) == 0) 4420 /* 4421 * The reference tied to USHMF_LINKED has already been 4422 * released concurrently. 4423 */ 4424 return (false); 4425 4426 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], reg, 4427 ushm_reg_link); 4428 LIST_REMOVE(reg, ushm_obj_link); 4429 reg->ushm_flags &= ~USHMF_LINKED; 4430 } 4431 4432 reg->ushm_refcnt--; 4433 return (reg->ushm_refcnt == 0); 4434 } 4435 4436 static void 4437 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool linked_ref) 4438 { 4439 vm_object_t object; 4440 bool dofree; 4441 4442 if (linked_ref) { 4443 /* 4444 * Note: This may be executed multiple times on the same 4445 * shared-memory VM object in presence of concurrent callers 4446 * because 'umtx_shm_lock' is not held all along in umtx_shm() 4447 * and here. 4448 */ 4449 object = reg->ushm_obj->shm_object; 4450 VM_OBJECT_WLOCK(object); 4451 vm_object_set_flag(object, OBJ_UMTXDEAD); 4452 VM_OBJECT_WUNLOCK(object); 4453 } 4454 mtx_lock(&umtx_shm_lock); 4455 dofree = umtx_shm_unref_reg_locked(reg, linked_ref); 4456 mtx_unlock(&umtx_shm_lock); 4457 if (dofree) 4458 umtx_shm_free_reg(reg); 4459 } 4460 4461 void 4462 umtx_shm_object_init(vm_object_t object) 4463 { 4464 4465 LIST_INIT(USHM_OBJ_UMTX(object)); 4466 } 4467 4468 void 4469 umtx_shm_object_terminated(vm_object_t object) 4470 { 4471 struct umtx_shm_reg *reg, *reg1; 4472 bool dofree; 4473 4474 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4475 return; 4476 4477 dofree = false; 4478 mtx_lock(&umtx_shm_lock); 4479 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4480 if (umtx_shm_unref_reg_locked(reg, true)) { 4481 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4482 ushm_reg_link); 4483 dofree = true; 4484 } 4485 } 4486 mtx_unlock(&umtx_shm_lock); 4487 if (dofree) 4488 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4489 } 4490 4491 static int 4492 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4493 struct umtx_shm_reg **res) 4494 { 4495 struct shmfd *shm; 4496 struct umtx_shm_reg *reg, *reg1; 4497 struct ucred *cred; 4498 int error; 4499 4500 error = umtx_shm_find_reg(key, res); 4501 if (error != ESRCH) { 4502 /* 4503 * Either no error occured, and '*res' was filled, or EOVERFLOW 4504 * was returned, indicating a reference count limit, and we 4505 * won't create a duplicate registration. In both cases, we are 4506 * done. 4507 */ 4508 return (error); 4509 } 4510 /* No entry, we will create one. */ 4511 4512 cred = td->td_ucred; 4513 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4514 return (ENOMEM); 4515 shm = shm_alloc(td->td_ucred, O_RDWR, false); 4516 if (shm == NULL) { 4517 chgumtxcnt(cred->cr_ruidinfo, -1, 0); 4518 return (ENOMEM); 4519 } 4520 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4521 bcopy(key, ®->ushm_key, sizeof(*key)); 4522 reg->ushm_obj = shm; 4523 reg->ushm_cred = crhold(cred); 4524 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4525 if (error != 0) { 4526 umtx_shm_free_reg(reg); 4527 return (error); 4528 } 4529 mtx_lock(&umtx_shm_lock); 4530 /* Re-lookup as 'umtx_shm_lock' has been temporarily released. */ 4531 error = umtx_shm_find_reg_locked(key, ®1); 4532 switch (error) { 4533 case 0: 4534 mtx_unlock(&umtx_shm_lock); 4535 umtx_shm_free_reg(reg); 4536 *res = reg1; 4537 return (0); 4538 case ESRCH: 4539 break; 4540 default: 4541 mtx_unlock(&umtx_shm_lock); 4542 umtx_shm_free_reg(reg); 4543 return (error); 4544 } 4545 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4546 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4547 ushm_obj_link); 4548 reg->ushm_flags = USHMF_LINKED; 4549 /* 4550 * This is one reference for the registry and the list of shared 4551 * mutexes referenced by the VM object containing the lock pointer, and 4552 * another for the caller, which it will free after use. So, one of 4553 * these is tied to the presence of USHMF_LINKED. 4554 */ 4555 reg->ushm_refcnt = 2; 4556 mtx_unlock(&umtx_shm_lock); 4557 *res = reg; 4558 return (0); 4559 } 4560 4561 static int 4562 umtx_shm_alive(struct thread *td, void *addr) 4563 { 4564 vm_map_t map; 4565 vm_map_entry_t entry; 4566 vm_object_t object; 4567 vm_pindex_t pindex; 4568 vm_prot_t prot; 4569 int res, ret; 4570 boolean_t wired; 4571 4572 map = &td->td_proc->p_vmspace->vm_map; 4573 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4574 &object, &pindex, &prot, &wired); 4575 if (res != KERN_SUCCESS) 4576 return (EFAULT); 4577 if (object == NULL) 4578 ret = EINVAL; 4579 else 4580 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4581 vm_map_lookup_done(map, entry); 4582 return (ret); 4583 } 4584 4585 static void 4586 umtx_shm_init(void) 4587 { 4588 int i; 4589 4590 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4591 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4592 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4593 for (i = 0; i < nitems(umtx_shm_registry); i++) 4594 TAILQ_INIT(&umtx_shm_registry[i]); 4595 } 4596 4597 static int 4598 umtx_shm(struct thread *td, void *addr, u_int flags) 4599 { 4600 struct umtx_key key; 4601 struct umtx_shm_reg *reg; 4602 struct file *fp; 4603 int error, fd; 4604 4605 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4606 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4607 return (EINVAL); 4608 if ((flags & UMTX_SHM_ALIVE) != 0) 4609 return (umtx_shm_alive(td, addr)); 4610 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4611 if (error != 0) 4612 return (error); 4613 KASSERT(key.shared == 1, ("non-shared key")); 4614 error = (flags & UMTX_SHM_CREAT) != 0 ? 4615 umtx_shm_create_reg(td, &key, ®) : 4616 umtx_shm_find_reg(&key, ®); 4617 umtx_key_release(&key); 4618 if (error != 0) 4619 return (error); 4620 KASSERT(reg != NULL, ("no reg")); 4621 if ((flags & UMTX_SHM_DESTROY) != 0) { 4622 umtx_shm_unref_reg(reg, true); 4623 } else { 4624 #if 0 4625 #ifdef MAC 4626 error = mac_posixshm_check_open(td->td_ucred, 4627 reg->ushm_obj, FFLAGS(O_RDWR)); 4628 if (error == 0) 4629 #endif 4630 error = shm_access(reg->ushm_obj, td->td_ucred, 4631 FFLAGS(O_RDWR)); 4632 if (error == 0) 4633 #endif 4634 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4635 if (error == 0) { 4636 shm_hold(reg->ushm_obj); 4637 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4638 &shm_ops); 4639 td->td_retval[0] = fd; 4640 fdrop(fp, td); 4641 } 4642 } 4643 umtx_shm_unref_reg(reg, false); 4644 return (error); 4645 } 4646 4647 static int 4648 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4649 const struct umtx_copyops *ops __unused) 4650 { 4651 4652 return (umtx_shm(td, uap->uaddr1, uap->val)); 4653 } 4654 4655 static int 4656 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4657 const struct umtx_copyops *ops) 4658 { 4659 struct umtx_robust_lists_params rb; 4660 int error; 4661 4662 if (ops->compat32) { 4663 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4664 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4665 td->td_rb_inact != 0)) 4666 return (EBUSY); 4667 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4668 return (EBUSY); 4669 } 4670 4671 bzero(&rb, sizeof(rb)); 4672 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4673 if (error != 0) 4674 return (error); 4675 4676 if (ops->compat32) 4677 td->td_pflags2 |= TDP2_COMPAT32RB; 4678 4679 td->td_rb_list = rb.robust_list_offset; 4680 td->td_rbp_list = rb.robust_priv_list_offset; 4681 td->td_rb_inact = rb.robust_inact_offset; 4682 return (0); 4683 } 4684 4685 static int 4686 __umtx_op_get_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4687 const struct umtx_copyops *ops) 4688 { 4689 long val; 4690 int error, val1; 4691 4692 val = sbttons(td->td_proc->p_umtx_min_timeout); 4693 if (ops->compat32) { 4694 val1 = (int)val; 4695 error = copyout(&val1, uap->uaddr1, sizeof(val1)); 4696 } else { 4697 error = copyout(&val, uap->uaddr1, sizeof(val)); 4698 } 4699 return (error); 4700 } 4701 4702 static int 4703 __umtx_op_set_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4704 const struct umtx_copyops *ops) 4705 { 4706 if (uap->val < 0) 4707 return (EINVAL); 4708 td->td_proc->p_umtx_min_timeout = nstosbt(uap->val); 4709 return (0); 4710 } 4711 4712 #if defined(__i386__) || defined(__amd64__) 4713 /* 4714 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4715 * 32-bit time_t there. Other architectures just need the i386 definitions 4716 * along with their standard compat32. 4717 */ 4718 struct timespecx32 { 4719 int64_t tv_sec; 4720 int32_t tv_nsec; 4721 }; 4722 4723 struct umtx_timex32 { 4724 struct timespecx32 _timeout; 4725 uint32_t _flags; 4726 uint32_t _clockid; 4727 }; 4728 4729 #ifndef __i386__ 4730 #define timespeci386 timespec32 4731 #define umtx_timei386 umtx_time32 4732 #endif 4733 #else /* !__i386__ && !__amd64__ */ 4734 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4735 struct timespeci386 { 4736 int32_t tv_sec; 4737 int32_t tv_nsec; 4738 }; 4739 4740 struct umtx_timei386 { 4741 struct timespeci386 _timeout; 4742 uint32_t _flags; 4743 uint32_t _clockid; 4744 }; 4745 4746 #if defined(__LP64__) 4747 #define timespecx32 timespec32 4748 #define umtx_timex32 umtx_time32 4749 #endif 4750 #endif 4751 4752 static int 4753 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4754 struct umtx_robust_lists_params *rbp) 4755 { 4756 struct umtx_robust_lists_params_compat32 rb32; 4757 int error; 4758 4759 if (size > sizeof(rb32)) 4760 return (EINVAL); 4761 bzero(&rb32, sizeof(rb32)); 4762 error = copyin(uaddr, &rb32, size); 4763 if (error != 0) 4764 return (error); 4765 CP(rb32, *rbp, robust_list_offset); 4766 CP(rb32, *rbp, robust_priv_list_offset); 4767 CP(rb32, *rbp, robust_inact_offset); 4768 return (0); 4769 } 4770 4771 #ifndef __i386__ 4772 static inline int 4773 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4774 { 4775 struct timespeci386 ts32; 4776 int error; 4777 4778 error = copyin(uaddr, &ts32, sizeof(ts32)); 4779 if (error == 0) { 4780 if (!timespecvalid_interval(&ts32)) 4781 error = EINVAL; 4782 else { 4783 CP(ts32, *tsp, tv_sec); 4784 CP(ts32, *tsp, tv_nsec); 4785 } 4786 } 4787 return (error); 4788 } 4789 4790 static inline int 4791 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4792 { 4793 struct umtx_timei386 t32; 4794 int error; 4795 4796 t32._clockid = CLOCK_REALTIME; 4797 t32._flags = 0; 4798 if (size <= sizeof(t32._timeout)) 4799 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4800 else 4801 error = copyin(uaddr, &t32, sizeof(t32)); 4802 if (error != 0) 4803 return (error); 4804 if (!timespecvalid_interval(&t32._timeout)) 4805 return (EINVAL); 4806 TS_CP(t32, *tp, _timeout); 4807 CP(t32, *tp, _flags); 4808 CP(t32, *tp, _clockid); 4809 return (0); 4810 } 4811 4812 static int 4813 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4814 { 4815 struct timespeci386 remain32 = { 4816 .tv_sec = tsp->tv_sec, 4817 .tv_nsec = tsp->tv_nsec, 4818 }; 4819 4820 /* 4821 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4822 * and we're only called if sz >= sizeof(timespec) as supplied in the 4823 * copyops. 4824 */ 4825 KASSERT(sz >= sizeof(remain32), 4826 ("umtx_copyops specifies incorrect sizes")); 4827 4828 return (copyout(&remain32, uaddr, sizeof(remain32))); 4829 } 4830 #endif /* !__i386__ */ 4831 4832 #if defined(__i386__) || defined(__LP64__) 4833 static inline int 4834 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4835 { 4836 struct timespecx32 ts32; 4837 int error; 4838 4839 error = copyin(uaddr, &ts32, sizeof(ts32)); 4840 if (error == 0) { 4841 if (!timespecvalid_interval(&ts32)) 4842 error = EINVAL; 4843 else { 4844 CP(ts32, *tsp, tv_sec); 4845 CP(ts32, *tsp, tv_nsec); 4846 } 4847 } 4848 return (error); 4849 } 4850 4851 static inline int 4852 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4853 { 4854 struct umtx_timex32 t32; 4855 int error; 4856 4857 t32._clockid = CLOCK_REALTIME; 4858 t32._flags = 0; 4859 if (size <= sizeof(t32._timeout)) 4860 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4861 else 4862 error = copyin(uaddr, &t32, sizeof(t32)); 4863 if (error != 0) 4864 return (error); 4865 if (!timespecvalid_interval(&t32._timeout)) 4866 return (EINVAL); 4867 TS_CP(t32, *tp, _timeout); 4868 CP(t32, *tp, _flags); 4869 CP(t32, *tp, _clockid); 4870 return (0); 4871 } 4872 4873 static int 4874 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4875 { 4876 struct timespecx32 remain32 = { 4877 .tv_sec = tsp->tv_sec, 4878 .tv_nsec = tsp->tv_nsec, 4879 }; 4880 4881 /* 4882 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4883 * and we're only called if sz >= sizeof(timespec) as supplied in the 4884 * copyops. 4885 */ 4886 KASSERT(sz >= sizeof(remain32), 4887 ("umtx_copyops specifies incorrect sizes")); 4888 4889 return (copyout(&remain32, uaddr, sizeof(remain32))); 4890 } 4891 #endif /* __i386__ || __LP64__ */ 4892 4893 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4894 const struct umtx_copyops *umtx_ops); 4895 4896 static const _umtx_op_func op_table[] = { 4897 #ifdef COMPAT_FREEBSD10 4898 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4899 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4900 #else 4901 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4902 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4903 #endif 4904 [UMTX_OP_WAIT] = __umtx_op_wait, 4905 [UMTX_OP_WAKE] = __umtx_op_wake, 4906 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4907 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4908 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4909 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4910 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4911 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4912 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4913 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4914 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4915 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4916 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4917 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4918 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4919 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4920 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4921 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4922 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4923 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4924 #else 4925 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4926 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4927 #endif 4928 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4929 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4930 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4931 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4932 [UMTX_OP_SHM] = __umtx_op_shm, 4933 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4934 [UMTX_OP_GET_MIN_TIMEOUT] = __umtx_op_get_min_timeout, 4935 [UMTX_OP_SET_MIN_TIMEOUT] = __umtx_op_set_min_timeout, 4936 }; 4937 4938 static const struct umtx_copyops umtx_native_ops = { 4939 .copyin_timeout = umtx_copyin_timeout, 4940 .copyin_umtx_time = umtx_copyin_umtx_time, 4941 .copyin_robust_lists = umtx_copyin_robust_lists, 4942 .copyout_timeout = umtx_copyout_timeout, 4943 .timespec_sz = sizeof(struct timespec), 4944 .umtx_time_sz = sizeof(struct _umtx_time), 4945 }; 4946 4947 #ifndef __i386__ 4948 static const struct umtx_copyops umtx_native_opsi386 = { 4949 .copyin_timeout = umtx_copyin_timeouti386, 4950 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4951 .copyin_robust_lists = umtx_copyin_robust_lists32, 4952 .copyout_timeout = umtx_copyout_timeouti386, 4953 .timespec_sz = sizeof(struct timespeci386), 4954 .umtx_time_sz = sizeof(struct umtx_timei386), 4955 .compat32 = true, 4956 }; 4957 #endif 4958 4959 #if defined(__i386__) || defined(__LP64__) 4960 /* i386 can emulate other 32-bit archs, too! */ 4961 static const struct umtx_copyops umtx_native_opsx32 = { 4962 .copyin_timeout = umtx_copyin_timeoutx32, 4963 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4964 .copyin_robust_lists = umtx_copyin_robust_lists32, 4965 .copyout_timeout = umtx_copyout_timeoutx32, 4966 .timespec_sz = sizeof(struct timespecx32), 4967 .umtx_time_sz = sizeof(struct umtx_timex32), 4968 .compat32 = true, 4969 }; 4970 4971 #ifdef COMPAT_FREEBSD32 4972 #ifdef __amd64__ 4973 #define umtx_native_ops32 umtx_native_opsi386 4974 #else 4975 #define umtx_native_ops32 umtx_native_opsx32 4976 #endif 4977 #endif /* COMPAT_FREEBSD32 */ 4978 #endif /* __i386__ || __LP64__ */ 4979 4980 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4981 4982 static int 4983 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4984 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4985 { 4986 struct _umtx_op_args uap = { 4987 .obj = obj, 4988 .op = op & ~UMTX_OP__FLAGS, 4989 .val = val, 4990 .uaddr1 = uaddr1, 4991 .uaddr2 = uaddr2 4992 }; 4993 4994 if ((uap.op >= nitems(op_table))) 4995 return (EINVAL); 4996 return ((*op_table[uap.op])(td, &uap, ops)); 4997 } 4998 4999 int 5000 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 5001 { 5002 static const struct umtx_copyops *umtx_ops; 5003 5004 umtx_ops = &umtx_native_ops; 5005 #ifdef __LP64__ 5006 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 5007 if ((uap->op & UMTX_OP__I386) != 0) 5008 umtx_ops = &umtx_native_opsi386; 5009 else 5010 umtx_ops = &umtx_native_opsx32; 5011 } 5012 #elif !defined(__i386__) 5013 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 5014 if ((uap->op & UMTX_OP__I386) != 0) 5015 umtx_ops = &umtx_native_opsi386; 5016 #else 5017 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 5018 if ((uap->op & UMTX_OP__32BIT) != 0) 5019 umtx_ops = &umtx_native_opsx32; 5020 #endif 5021 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5022 uap->uaddr2, umtx_ops)); 5023 } 5024 5025 #ifdef COMPAT_FREEBSD32 5026 #ifdef COMPAT_FREEBSD10 5027 int 5028 freebsd10_freebsd32__umtx_lock(struct thread *td, 5029 struct freebsd10_freebsd32__umtx_lock_args *uap) 5030 { 5031 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 5032 } 5033 5034 int 5035 freebsd10_freebsd32__umtx_unlock(struct thread *td, 5036 struct freebsd10_freebsd32__umtx_unlock_args *uap) 5037 { 5038 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 5039 } 5040 #endif /* COMPAT_FREEBSD10 */ 5041 5042 int 5043 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 5044 { 5045 5046 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5047 uap->uaddr2, &umtx_native_ops32)); 5048 } 5049 #endif /* COMPAT_FREEBSD32 */ 5050 5051 void 5052 umtx_thread_init(struct thread *td) 5053 { 5054 5055 td->td_umtxq = umtxq_alloc(); 5056 td->td_umtxq->uq_thread = td; 5057 } 5058 5059 void 5060 umtx_thread_fini(struct thread *td) 5061 { 5062 5063 umtxq_free(td->td_umtxq); 5064 } 5065 5066 /* 5067 * It will be called when new thread is created, e.g fork(). 5068 */ 5069 void 5070 umtx_thread_alloc(struct thread *td) 5071 { 5072 struct umtx_q *uq; 5073 5074 uq = td->td_umtxq; 5075 uq->uq_inherited_pri = PRI_MAX; 5076 5077 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 5078 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 5079 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 5080 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 5081 } 5082 5083 /* 5084 * exec() hook. 5085 * 5086 * Clear robust lists for all process' threads, not delaying the 5087 * cleanup to thread exit, since the relevant address space is 5088 * destroyed right now. 5089 */ 5090 void 5091 umtx_exec(struct proc *p) 5092 { 5093 struct thread *td; 5094 5095 KASSERT(p == curproc, ("need curproc")); 5096 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 5097 (p->p_flag & P_STOPPED_SINGLE) != 0, 5098 ("curproc must be single-threaded")); 5099 /* 5100 * There is no need to lock the list as only this thread can be 5101 * running. 5102 */ 5103 FOREACH_THREAD_IN_PROC(p, td) { 5104 KASSERT(td == curthread || 5105 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 5106 ("running thread %p %p", p, td)); 5107 umtx_thread_cleanup(td); 5108 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 5109 } 5110 5111 p->p_umtx_min_timeout = 0; 5112 } 5113 5114 /* 5115 * thread exit hook. 5116 */ 5117 void 5118 umtx_thread_exit(struct thread *td) 5119 { 5120 5121 umtx_thread_cleanup(td); 5122 } 5123 5124 static int 5125 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5126 { 5127 u_long res1; 5128 uint32_t res32; 5129 int error; 5130 5131 if (compat32) { 5132 error = fueword32((void *)ptr, &res32); 5133 if (error == 0) 5134 res1 = res32; 5135 } else { 5136 error = fueword((void *)ptr, &res1); 5137 } 5138 if (error == 0) 5139 *res = res1; 5140 else 5141 error = EFAULT; 5142 return (error); 5143 } 5144 5145 static void 5146 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5147 bool compat32) 5148 { 5149 struct umutex32 m32; 5150 5151 if (compat32) { 5152 memcpy(&m32, m, sizeof(m32)); 5153 *rb_list = m32.m_rb_lnk; 5154 } else { 5155 *rb_list = m->m_rb_lnk; 5156 } 5157 } 5158 5159 static int 5160 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5161 bool compat32) 5162 { 5163 struct umutex m; 5164 int error; 5165 5166 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5167 error = copyin((void *)rbp, &m, sizeof(m)); 5168 if (error != 0) 5169 return (error); 5170 if (rb_list != NULL) 5171 umtx_read_rb_list(td, &m, rb_list, compat32); 5172 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5173 return (EINVAL); 5174 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5175 /* inact is cleared after unlock, allow the inconsistency */ 5176 return (inact ? 0 : EINVAL); 5177 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5178 } 5179 5180 static void 5181 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5182 const char *name, bool compat32) 5183 { 5184 int error, i; 5185 uintptr_t rbp; 5186 bool inact; 5187 5188 if (rb_list == 0) 5189 return; 5190 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5191 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5192 if (rbp == *rb_inact) { 5193 inact = true; 5194 *rb_inact = 0; 5195 } else 5196 inact = false; 5197 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5198 } 5199 if (i == umtx_max_rb && umtx_verbose_rb) { 5200 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5201 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5202 } 5203 if (error != 0 && umtx_verbose_rb) { 5204 uprintf("comm %s pid %d: handling %srb error %d\n", 5205 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5206 } 5207 } 5208 5209 /* 5210 * Clean up umtx data. 5211 */ 5212 static void 5213 umtx_thread_cleanup(struct thread *td) 5214 { 5215 struct umtx_q *uq; 5216 struct umtx_pi *pi; 5217 uintptr_t rb_inact; 5218 bool compat32; 5219 5220 /* 5221 * Disown pi mutexes. 5222 */ 5223 uq = td->td_umtxq; 5224 if (uq != NULL) { 5225 if (uq->uq_inherited_pri != PRI_MAX || 5226 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5227 mtx_lock(&umtx_lock); 5228 uq->uq_inherited_pri = PRI_MAX; 5229 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5230 pi->pi_owner = NULL; 5231 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5232 } 5233 mtx_unlock(&umtx_lock); 5234 } 5235 sched_lend_user_prio_cond(td, PRI_MAX); 5236 } 5237 5238 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5239 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5240 5241 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5242 return; 5243 5244 /* 5245 * Handle terminated robust mutexes. Must be done after 5246 * robust pi disown, otherwise unlock could see unowned 5247 * entries. 5248 */ 5249 rb_inact = td->td_rb_inact; 5250 if (rb_inact != 0) 5251 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5252 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5253 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5254 if (rb_inact != 0) 5255 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5256 } 5257