1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 #include "opt_umtx_profiling.h" 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/limits.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mman.h> 46 #include <sys/mutex.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/rwlock.h> 52 #include <sys/sbuf.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sysproto.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/taskqueue.h> 60 #include <sys/time.h> 61 #include <sys/eventhandler.h> 62 #include <sys/umtx.h> 63 #include <sys/umtxvar.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/uma.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 74 #include <machine/atomic.h> 75 #include <machine/cpu.h> 76 77 #include <compat/freebsd32/freebsd32.h> 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 91 #ifdef INVARIANTS 92 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 93 struct umtxq_chain *uc; \ 94 \ 95 uc = umtxq_getchain(key); \ 96 mtx_assert(&uc->uc_lock, MA_OWNED); \ 97 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 98 } while (0) 99 #else 100 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 101 #endif 102 103 /* 104 * Don't propagate time-sharing priority, there is a security reason, 105 * a user can simply introduce PI-mutex, let thread A lock the mutex, 106 * and let another thread B block on the mutex, because B is 107 * sleeping, its priority will be boosted, this causes A's priority to 108 * be boosted via priority propagating too and will never be lowered even 109 * if it is using 100%CPU, this is unfair to other processes. 110 */ 111 112 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 113 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 114 PRI_MAX_TIMESHARE : (td)->td_user_pri) 115 116 #define GOLDEN_RATIO_PRIME 2654404609U 117 #ifndef UMTX_CHAINS 118 #define UMTX_CHAINS 512 119 #endif 120 #define UMTX_SHIFTS (__WORD_BIT - 9) 121 122 #define GET_SHARE(flags) \ 123 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 124 125 #define BUSY_SPINS 200 126 127 struct umtx_copyops { 128 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 129 int (*copyin_umtx_time)(const void *uaddr, size_t size, 130 struct _umtx_time *tp); 131 int (*copyin_robust_lists)(const void *uaddr, size_t size, 132 struct umtx_robust_lists_params *rbp); 133 int (*copyout_timeout)(void *uaddr, size_t size, 134 struct timespec *tsp); 135 const size_t timespec_sz; 136 const size_t umtx_time_sz; 137 const bool compat32; 138 }; 139 140 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 141 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 142 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 143 144 int umtx_shm_vnobj_persistent = 0; 145 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 146 &umtx_shm_vnobj_persistent, 0, 147 "False forces destruction of umtx attached to file, on last close"); 148 static int umtx_max_rb = 1000; 149 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 150 &umtx_max_rb, 0, 151 "Maximum number of robust mutexes allowed for each thread"); 152 153 static uma_zone_t umtx_pi_zone; 154 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 155 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 156 static int umtx_pi_allocated; 157 158 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 159 "umtx debug"); 160 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 161 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 162 static int umtx_verbose_rb = 1; 163 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 164 &umtx_verbose_rb, 0, 165 ""); 166 167 #ifdef UMTX_PROFILING 168 static long max_length; 169 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 170 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 171 "umtx chain stats"); 172 #endif 173 174 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 175 const struct _umtx_time *umtxtime); 176 177 static void umtx_shm_init(void); 178 static void umtxq_sysinit(void *); 179 static void umtxq_hash(struct umtx_key *key); 180 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 181 bool rb); 182 static void umtx_thread_cleanup(struct thread *td); 183 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 184 185 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 186 187 static struct mtx umtx_lock; 188 189 #ifdef UMTX_PROFILING 190 static void 191 umtx_init_profiling(void) 192 { 193 struct sysctl_oid *chain_oid; 194 char chain_name[10]; 195 int i; 196 197 for (i = 0; i < UMTX_CHAINS; ++i) { 198 snprintf(chain_name, sizeof(chain_name), "%d", i); 199 chain_oid = SYSCTL_ADD_NODE(NULL, 200 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 201 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 202 "umtx hash stats"); 203 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 204 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 206 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 207 } 208 } 209 210 static int 211 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 212 { 213 char buf[512]; 214 struct sbuf sb; 215 struct umtxq_chain *uc; 216 u_int fract, i, j, tot, whole; 217 u_int sf0, sf1, sf2, sf3, sf4; 218 u_int si0, si1, si2, si3, si4; 219 u_int sw0, sw1, sw2, sw3, sw4; 220 221 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 222 for (i = 0; i < 2; i++) { 223 tot = 0; 224 for (j = 0; j < UMTX_CHAINS; ++j) { 225 uc = &umtxq_chains[i][j]; 226 mtx_lock(&uc->uc_lock); 227 tot += uc->max_length; 228 mtx_unlock(&uc->uc_lock); 229 } 230 if (tot == 0) 231 sbuf_printf(&sb, "%u) Empty ", i); 232 else { 233 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 234 si0 = si1 = si2 = si3 = si4 = 0; 235 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 236 for (j = 0; j < UMTX_CHAINS; j++) { 237 uc = &umtxq_chains[i][j]; 238 mtx_lock(&uc->uc_lock); 239 whole = uc->max_length * 100; 240 mtx_unlock(&uc->uc_lock); 241 fract = (whole % tot) * 100; 242 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 243 sf0 = fract; 244 si0 = j; 245 sw0 = whole; 246 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 247 sf1)) { 248 sf1 = fract; 249 si1 = j; 250 sw1 = whole; 251 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 252 sf2)) { 253 sf2 = fract; 254 si2 = j; 255 sw2 = whole; 256 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 257 sf3)) { 258 sf3 = fract; 259 si3 = j; 260 sw3 = whole; 261 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 262 sf4)) { 263 sf4 = fract; 264 si4 = j; 265 sw4 = whole; 266 } 267 } 268 sbuf_printf(&sb, "queue %u:\n", i); 269 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 270 sf0 / tot, si0); 271 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 272 sf1 / tot, si1); 273 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 274 sf2 / tot, si2); 275 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 276 sf3 / tot, si3); 277 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 278 sf4 / tot, si4); 279 } 280 } 281 sbuf_trim(&sb); 282 sbuf_finish(&sb); 283 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 284 sbuf_delete(&sb); 285 return (0); 286 } 287 288 static int 289 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 290 { 291 struct umtxq_chain *uc; 292 u_int i, j; 293 int clear, error; 294 295 clear = 0; 296 error = sysctl_handle_int(oidp, &clear, 0, req); 297 if (error != 0 || req->newptr == NULL) 298 return (error); 299 300 if (clear != 0) { 301 for (i = 0; i < 2; ++i) { 302 for (j = 0; j < UMTX_CHAINS; ++j) { 303 uc = &umtxq_chains[i][j]; 304 mtx_lock(&uc->uc_lock); 305 uc->length = 0; 306 uc->max_length = 0; 307 mtx_unlock(&uc->uc_lock); 308 } 309 } 310 } 311 return (0); 312 } 313 314 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 315 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 316 sysctl_debug_umtx_chains_clear, "I", 317 "Clear umtx chains statistics"); 318 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 319 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 320 sysctl_debug_umtx_chains_peaks, "A", 321 "Highest peaks in chains max length"); 322 #endif 323 324 static void 325 umtxq_sysinit(void *arg __unused) 326 { 327 int i, j; 328 329 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 330 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 331 for (i = 0; i < 2; ++i) { 332 for (j = 0; j < UMTX_CHAINS; ++j) { 333 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 334 MTX_DEF | MTX_DUPOK); 335 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 337 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 338 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 339 umtxq_chains[i][j].uc_busy = 0; 340 umtxq_chains[i][j].uc_waiters = 0; 341 #ifdef UMTX_PROFILING 342 umtxq_chains[i][j].length = 0; 343 umtxq_chains[i][j].max_length = 0; 344 #endif 345 } 346 } 347 #ifdef UMTX_PROFILING 348 umtx_init_profiling(); 349 #endif 350 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 351 umtx_shm_init(); 352 } 353 354 struct umtx_q * 355 umtxq_alloc(void) 356 { 357 struct umtx_q *uq; 358 359 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 360 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 361 M_WAITOK | M_ZERO); 362 TAILQ_INIT(&uq->uq_spare_queue->head); 363 TAILQ_INIT(&uq->uq_pi_contested); 364 uq->uq_inherited_pri = PRI_MAX; 365 return (uq); 366 } 367 368 void 369 umtxq_free(struct umtx_q *uq) 370 { 371 372 MPASS(uq->uq_spare_queue != NULL); 373 free(uq->uq_spare_queue, M_UMTX); 374 free(uq, M_UMTX); 375 } 376 377 static inline void 378 umtxq_hash(struct umtx_key *key) 379 { 380 unsigned n; 381 382 n = (uintptr_t)key->info.both.a + key->info.both.b; 383 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 384 } 385 386 struct umtxq_chain * 387 umtxq_getchain(struct umtx_key *key) 388 { 389 390 if (key->type <= TYPE_SEM) 391 return (&umtxq_chains[1][key->hash]); 392 return (&umtxq_chains[0][key->hash]); 393 } 394 395 /* 396 * Set chain to busy state when following operation 397 * may be blocked (kernel mutex can not be used). 398 */ 399 void 400 umtxq_busy(struct umtx_key *key) 401 { 402 struct umtxq_chain *uc; 403 404 uc = umtxq_getchain(key); 405 mtx_assert(&uc->uc_lock, MA_OWNED); 406 if (uc->uc_busy) { 407 #ifdef SMP 408 if (smp_cpus > 1) { 409 int count = BUSY_SPINS; 410 if (count > 0) { 411 umtxq_unlock(key); 412 while (uc->uc_busy && --count > 0) 413 cpu_spinwait(); 414 umtxq_lock(key); 415 } 416 } 417 #endif 418 while (uc->uc_busy) { 419 uc->uc_waiters++; 420 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 421 uc->uc_waiters--; 422 } 423 } 424 uc->uc_busy = 1; 425 } 426 427 /* 428 * Unbusy a chain. 429 */ 430 void 431 umtxq_unbusy(struct umtx_key *key) 432 { 433 struct umtxq_chain *uc; 434 435 uc = umtxq_getchain(key); 436 mtx_assert(&uc->uc_lock, MA_OWNED); 437 KASSERT(uc->uc_busy != 0, ("not busy")); 438 uc->uc_busy = 0; 439 if (uc->uc_waiters) 440 wakeup_one(uc); 441 } 442 443 void 444 umtxq_unbusy_unlocked(struct umtx_key *key) 445 { 446 447 umtxq_lock(key); 448 umtxq_unbusy(key); 449 umtxq_unlock(key); 450 } 451 452 static struct umtxq_queue * 453 umtxq_queue_lookup(struct umtx_key *key, int q) 454 { 455 struct umtxq_queue *uh; 456 struct umtxq_chain *uc; 457 458 uc = umtxq_getchain(key); 459 UMTXQ_LOCKED_ASSERT(uc); 460 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 461 if (umtx_key_match(&uh->key, key)) 462 return (uh); 463 } 464 465 return (NULL); 466 } 467 468 void 469 umtxq_insert_queue(struct umtx_q *uq, int q) 470 { 471 struct umtxq_queue *uh; 472 struct umtxq_chain *uc; 473 474 uc = umtxq_getchain(&uq->uq_key); 475 UMTXQ_LOCKED_ASSERT(uc); 476 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 477 uh = umtxq_queue_lookup(&uq->uq_key, q); 478 if (uh != NULL) { 479 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 480 } else { 481 uh = uq->uq_spare_queue; 482 uh->key = uq->uq_key; 483 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 484 #ifdef UMTX_PROFILING 485 uc->length++; 486 if (uc->length > uc->max_length) { 487 uc->max_length = uc->length; 488 if (uc->max_length > max_length) 489 max_length = uc->max_length; 490 } 491 #endif 492 } 493 uq->uq_spare_queue = NULL; 494 495 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 496 uh->length++; 497 uq->uq_flags |= UQF_UMTXQ; 498 uq->uq_cur_queue = uh; 499 return; 500 } 501 502 void 503 umtxq_remove_queue(struct umtx_q *uq, int q) 504 { 505 struct umtxq_chain *uc; 506 struct umtxq_queue *uh; 507 508 uc = umtxq_getchain(&uq->uq_key); 509 UMTXQ_LOCKED_ASSERT(uc); 510 if (uq->uq_flags & UQF_UMTXQ) { 511 uh = uq->uq_cur_queue; 512 TAILQ_REMOVE(&uh->head, uq, uq_link); 513 uh->length--; 514 uq->uq_flags &= ~UQF_UMTXQ; 515 if (TAILQ_EMPTY(&uh->head)) { 516 KASSERT(uh->length == 0, 517 ("inconsistent umtxq_queue length")); 518 #ifdef UMTX_PROFILING 519 uc->length--; 520 #endif 521 LIST_REMOVE(uh, link); 522 } else { 523 uh = LIST_FIRST(&uc->uc_spare_queue); 524 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 525 LIST_REMOVE(uh, link); 526 } 527 uq->uq_spare_queue = uh; 528 uq->uq_cur_queue = NULL; 529 } 530 } 531 532 /* 533 * Check if there are multiple waiters 534 */ 535 int 536 umtxq_count(struct umtx_key *key) 537 { 538 struct umtxq_queue *uh; 539 540 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 541 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 542 if (uh != NULL) 543 return (uh->length); 544 return (0); 545 } 546 547 /* 548 * Check if there are multiple PI waiters and returns first 549 * waiter. 550 */ 551 static int 552 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 553 { 554 struct umtxq_queue *uh; 555 556 *first = NULL; 557 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 558 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 559 if (uh != NULL) { 560 *first = TAILQ_FIRST(&uh->head); 561 return (uh->length); 562 } 563 return (0); 564 } 565 566 /* 567 * Wake up threads waiting on an userland object by a bit mask. 568 */ 569 int 570 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 571 { 572 struct umtxq_queue *uh; 573 struct umtx_q *uq, *uq_temp; 574 int ret; 575 576 ret = 0; 577 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 578 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 579 if (uh == NULL) 580 return (0); 581 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 582 if ((uq->uq_bitset & bitset) == 0) 583 continue; 584 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 585 wakeup_one(uq); 586 if (++ret >= n_wake) 587 break; 588 } 589 return (ret); 590 } 591 592 /* 593 * Wake up threads waiting on an userland object. 594 */ 595 596 static int 597 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 598 { 599 struct umtxq_queue *uh; 600 struct umtx_q *uq; 601 int ret; 602 603 ret = 0; 604 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 605 uh = umtxq_queue_lookup(key, q); 606 if (uh != NULL) { 607 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 608 umtxq_remove_queue(uq, q); 609 wakeup(uq); 610 if (++ret >= n_wake) 611 return (ret); 612 } 613 } 614 return (ret); 615 } 616 617 /* 618 * Wake up specified thread. 619 */ 620 static inline void 621 umtxq_signal_thread(struct umtx_q *uq) 622 { 623 624 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 625 umtxq_remove(uq); 626 wakeup(uq); 627 } 628 629 /* 630 * Wake up a maximum of n_wake threads that are waiting on an userland 631 * object identified by key. The remaining threads are removed from queue 632 * identified by key and added to the queue identified by key2 (requeued). 633 * The n_requeue specifies an upper limit on the number of threads that 634 * are requeued to the second queue. 635 */ 636 int 637 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 638 int n_requeue) 639 { 640 struct umtxq_queue *uh; 641 struct umtx_q *uq, *uq_temp; 642 int ret; 643 644 ret = 0; 645 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 646 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 647 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 648 if (uh == NULL) 649 return (0); 650 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 651 if (++ret <= n_wake) { 652 umtxq_remove(uq); 653 wakeup_one(uq); 654 } else { 655 umtxq_remove(uq); 656 uq->uq_key = *key2; 657 umtxq_insert(uq); 658 if (ret - n_wake == n_requeue) 659 break; 660 } 661 } 662 return (ret); 663 } 664 665 static inline int 666 tstohz(const struct timespec *tsp) 667 { 668 struct timeval tv; 669 670 TIMESPEC_TO_TIMEVAL(&tv, tsp); 671 return tvtohz(&tv); 672 } 673 674 void 675 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 676 int absolute, const struct timespec *timeout) 677 { 678 679 timo->clockid = clockid; 680 if (!absolute) { 681 timo->is_abs_real = false; 682 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 683 timespecadd(&timo->cur, timeout, &timo->end); 684 } else { 685 timo->end = *timeout; 686 timo->is_abs_real = clockid == CLOCK_REALTIME || 687 clockid == CLOCK_REALTIME_FAST || 688 clockid == CLOCK_REALTIME_PRECISE || 689 clockid == CLOCK_SECOND; 690 } 691 } 692 693 static void 694 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 695 const struct _umtx_time *umtxtime) 696 { 697 698 umtx_abs_timeout_init(timo, umtxtime->_clockid, 699 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 700 } 701 702 static void 703 umtx_abs_timeout_enforce_min(sbintime_t *sbt) 704 { 705 sbintime_t when, mint; 706 707 mint = curproc->p_umtx_min_timeout; 708 if (__predict_false(mint != 0)) { 709 when = sbinuptime() + mint; 710 if (*sbt < when) 711 *sbt = when; 712 } 713 } 714 715 static int 716 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 717 int *flags) 718 { 719 struct bintime bt, bbt; 720 struct timespec tts; 721 sbintime_t rem; 722 723 switch (timo->clockid) { 724 725 /* Clocks that can be converted into absolute time. */ 726 case CLOCK_REALTIME: 727 case CLOCK_REALTIME_PRECISE: 728 case CLOCK_REALTIME_FAST: 729 case CLOCK_MONOTONIC: 730 case CLOCK_MONOTONIC_PRECISE: 731 case CLOCK_MONOTONIC_FAST: 732 case CLOCK_UPTIME: 733 case CLOCK_UPTIME_PRECISE: 734 case CLOCK_UPTIME_FAST: 735 case CLOCK_SECOND: 736 timespec2bintime(&timo->end, &bt); 737 switch (timo->clockid) { 738 case CLOCK_REALTIME: 739 case CLOCK_REALTIME_PRECISE: 740 case CLOCK_REALTIME_FAST: 741 case CLOCK_SECOND: 742 getboottimebin(&bbt); 743 bintime_sub(&bt, &bbt); 744 break; 745 } 746 if (bt.sec < 0) 747 return (ETIMEDOUT); 748 if (bt.sec >= (SBT_MAX >> 32)) { 749 *sbt = 0; 750 *flags = 0; 751 return (0); 752 } 753 *sbt = bttosbt(bt); 754 umtx_abs_timeout_enforce_min(sbt); 755 756 /* 757 * Check if the absolute time should be aligned to 758 * avoid firing multiple timer events in non-periodic 759 * timer mode. 760 */ 761 switch (timo->clockid) { 762 case CLOCK_REALTIME_FAST: 763 case CLOCK_MONOTONIC_FAST: 764 case CLOCK_UPTIME_FAST: 765 rem = *sbt % tc_tick_sbt; 766 if (__predict_true(rem != 0)) 767 *sbt += tc_tick_sbt - rem; 768 break; 769 case CLOCK_SECOND: 770 rem = *sbt % SBT_1S; 771 if (__predict_true(rem != 0)) 772 *sbt += SBT_1S - rem; 773 break; 774 } 775 *flags = C_ABSOLUTE; 776 return (0); 777 778 /* Clocks that has to be periodically polled. */ 779 case CLOCK_VIRTUAL: 780 case CLOCK_PROF: 781 case CLOCK_THREAD_CPUTIME_ID: 782 case CLOCK_PROCESS_CPUTIME_ID: 783 default: 784 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 785 if (timespeccmp(&timo->end, &timo->cur, <=)) 786 return (ETIMEDOUT); 787 timespecsub(&timo->end, &timo->cur, &tts); 788 *sbt = tick_sbt * tstohz(&tts); 789 *flags = C_HARDCLOCK; 790 return (0); 791 } 792 } 793 794 static uint32_t 795 umtx_unlock_val(uint32_t flags, bool rb) 796 { 797 798 if (rb) 799 return (UMUTEX_RB_OWNERDEAD); 800 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 801 return (UMUTEX_RB_NOTRECOV); 802 else 803 return (UMUTEX_UNOWNED); 804 805 } 806 807 /* 808 * Put thread into sleep state, before sleeping, check if 809 * thread was removed from umtx queue. 810 */ 811 int 812 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 813 struct umtx_abs_timeout *timo) 814 { 815 struct umtxq_chain *uc; 816 sbintime_t sbt = 0; 817 int error, flags = 0; 818 819 uc = umtxq_getchain(&uq->uq_key); 820 UMTXQ_LOCKED_ASSERT(uc); 821 for (;;) { 822 if (!(uq->uq_flags & UQF_UMTXQ)) { 823 error = 0; 824 break; 825 } 826 if (timo != NULL) { 827 if (timo->is_abs_real) 828 curthread->td_rtcgen = 829 atomic_load_acq_int(&rtc_generation); 830 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 831 if (error != 0) 832 break; 833 } 834 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 835 sbt, 0, flags); 836 uc = umtxq_getchain(&uq->uq_key); 837 mtx_lock(&uc->uc_lock); 838 if (error == EINTR || error == ERESTART) 839 break; 840 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 841 error = ETIMEDOUT; 842 break; 843 } 844 } 845 846 curthread->td_rtcgen = 0; 847 return (error); 848 } 849 850 /* 851 * Convert userspace address into unique logical address. 852 */ 853 int 854 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 855 { 856 struct thread *td = curthread; 857 vm_map_t map; 858 vm_map_entry_t entry; 859 vm_pindex_t pindex; 860 vm_prot_t prot; 861 boolean_t wired; 862 863 key->type = type; 864 if (share == THREAD_SHARE) { 865 key->shared = 0; 866 key->info.private.vs = td->td_proc->p_vmspace; 867 key->info.private.addr = (uintptr_t)addr; 868 } else { 869 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 870 map = &td->td_proc->p_vmspace->vm_map; 871 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 872 &entry, &key->info.shared.object, &pindex, &prot, 873 &wired) != KERN_SUCCESS) { 874 return (EFAULT); 875 } 876 877 if ((share == PROCESS_SHARE) || 878 (share == AUTO_SHARE && 879 VM_INHERIT_SHARE == entry->inheritance)) { 880 key->shared = 1; 881 key->info.shared.offset = (vm_offset_t)addr - 882 entry->start + entry->offset; 883 vm_object_reference(key->info.shared.object); 884 } else { 885 key->shared = 0; 886 key->info.private.vs = td->td_proc->p_vmspace; 887 key->info.private.addr = (uintptr_t)addr; 888 } 889 vm_map_lookup_done(map, entry); 890 } 891 892 umtxq_hash(key); 893 return (0); 894 } 895 896 /* 897 * Release key. 898 */ 899 void 900 umtx_key_release(struct umtx_key *key) 901 { 902 if (key->shared) 903 vm_object_deallocate(key->info.shared.object); 904 } 905 906 #ifdef COMPAT_FREEBSD10 907 /* 908 * Lock a umtx object. 909 */ 910 static int 911 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 912 const struct timespec *timeout) 913 { 914 struct umtx_abs_timeout timo; 915 struct umtx_q *uq; 916 u_long owner; 917 u_long old; 918 int error = 0; 919 920 uq = td->td_umtxq; 921 if (timeout != NULL) 922 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 923 924 /* 925 * Care must be exercised when dealing with umtx structure. It 926 * can fault on any access. 927 */ 928 for (;;) { 929 /* 930 * Try the uncontested case. This should be done in userland. 931 */ 932 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 933 934 /* The acquire succeeded. */ 935 if (owner == UMTX_UNOWNED) 936 return (0); 937 938 /* The address was invalid. */ 939 if (owner == -1) 940 return (EFAULT); 941 942 /* If no one owns it but it is contested try to acquire it. */ 943 if (owner == UMTX_CONTESTED) { 944 owner = casuword(&umtx->u_owner, 945 UMTX_CONTESTED, id | UMTX_CONTESTED); 946 947 if (owner == UMTX_CONTESTED) 948 return (0); 949 950 /* The address was invalid. */ 951 if (owner == -1) 952 return (EFAULT); 953 954 error = thread_check_susp(td, false); 955 if (error != 0) 956 break; 957 958 /* If this failed the lock has changed, restart. */ 959 continue; 960 } 961 962 /* 963 * If we caught a signal, we have retried and now 964 * exit immediately. 965 */ 966 if (error != 0) 967 break; 968 969 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 970 AUTO_SHARE, &uq->uq_key)) != 0) 971 return (error); 972 973 umtxq_lock(&uq->uq_key); 974 umtxq_busy(&uq->uq_key); 975 umtxq_insert(uq); 976 umtxq_unbusy(&uq->uq_key); 977 umtxq_unlock(&uq->uq_key); 978 979 /* 980 * Set the contested bit so that a release in user space 981 * knows to use the system call for unlock. If this fails 982 * either some one else has acquired the lock or it has been 983 * released. 984 */ 985 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 986 987 /* The address was invalid. */ 988 if (old == -1) { 989 umtxq_lock(&uq->uq_key); 990 umtxq_remove(uq); 991 umtxq_unlock(&uq->uq_key); 992 umtx_key_release(&uq->uq_key); 993 return (EFAULT); 994 } 995 996 /* 997 * We set the contested bit, sleep. Otherwise the lock changed 998 * and we need to retry or we lost a race to the thread 999 * unlocking the umtx. 1000 */ 1001 umtxq_lock(&uq->uq_key); 1002 if (old == owner) 1003 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1004 &timo); 1005 umtxq_remove(uq); 1006 umtxq_unlock(&uq->uq_key); 1007 umtx_key_release(&uq->uq_key); 1008 1009 if (error == 0) 1010 error = thread_check_susp(td, false); 1011 } 1012 1013 if (timeout == NULL) { 1014 /* Mutex locking is restarted if it is interrupted. */ 1015 if (error == EINTR) 1016 error = ERESTART; 1017 } else { 1018 /* Timed-locking is not restarted. */ 1019 if (error == ERESTART) 1020 error = EINTR; 1021 } 1022 return (error); 1023 } 1024 1025 /* 1026 * Unlock a umtx object. 1027 */ 1028 static int 1029 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1030 { 1031 struct umtx_key key; 1032 u_long owner; 1033 u_long old; 1034 int error; 1035 int count; 1036 1037 /* 1038 * Make sure we own this mtx. 1039 */ 1040 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1041 if (owner == -1) 1042 return (EFAULT); 1043 1044 if ((owner & ~UMTX_CONTESTED) != id) 1045 return (EPERM); 1046 1047 /* This should be done in userland */ 1048 if ((owner & UMTX_CONTESTED) == 0) { 1049 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1050 if (old == -1) 1051 return (EFAULT); 1052 if (old == owner) 1053 return (0); 1054 owner = old; 1055 } 1056 1057 /* We should only ever be in here for contested locks */ 1058 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1059 &key)) != 0) 1060 return (error); 1061 1062 umtxq_lock(&key); 1063 umtxq_busy(&key); 1064 count = umtxq_count(&key); 1065 umtxq_unlock(&key); 1066 1067 /* 1068 * When unlocking the umtx, it must be marked as unowned if 1069 * there is zero or one thread only waiting for it. 1070 * Otherwise, it must be marked as contested. 1071 */ 1072 old = casuword(&umtx->u_owner, owner, 1073 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1074 umtxq_lock(&key); 1075 umtxq_signal(&key,1); 1076 umtxq_unbusy(&key); 1077 umtxq_unlock(&key); 1078 umtx_key_release(&key); 1079 if (old == -1) 1080 return (EFAULT); 1081 if (old != owner) 1082 return (EINVAL); 1083 return (0); 1084 } 1085 1086 #ifdef COMPAT_FREEBSD32 1087 1088 /* 1089 * Lock a umtx object. 1090 */ 1091 static int 1092 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1093 const struct timespec *timeout) 1094 { 1095 struct umtx_abs_timeout timo; 1096 struct umtx_q *uq; 1097 uint32_t owner; 1098 uint32_t old; 1099 int error = 0; 1100 1101 uq = td->td_umtxq; 1102 1103 if (timeout != NULL) 1104 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1105 1106 /* 1107 * Care must be exercised when dealing with umtx structure. It 1108 * can fault on any access. 1109 */ 1110 for (;;) { 1111 /* 1112 * Try the uncontested case. This should be done in userland. 1113 */ 1114 owner = casuword32(m, UMUTEX_UNOWNED, id); 1115 1116 /* The acquire succeeded. */ 1117 if (owner == UMUTEX_UNOWNED) 1118 return (0); 1119 1120 /* The address was invalid. */ 1121 if (owner == -1) 1122 return (EFAULT); 1123 1124 /* If no one owns it but it is contested try to acquire it. */ 1125 if (owner == UMUTEX_CONTESTED) { 1126 owner = casuword32(m, 1127 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1128 if (owner == UMUTEX_CONTESTED) 1129 return (0); 1130 1131 /* The address was invalid. */ 1132 if (owner == -1) 1133 return (EFAULT); 1134 1135 error = thread_check_susp(td, false); 1136 if (error != 0) 1137 break; 1138 1139 /* If this failed the lock has changed, restart. */ 1140 continue; 1141 } 1142 1143 /* 1144 * If we caught a signal, we have retried and now 1145 * exit immediately. 1146 */ 1147 if (error != 0) 1148 return (error); 1149 1150 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1151 AUTO_SHARE, &uq->uq_key)) != 0) 1152 return (error); 1153 1154 umtxq_lock(&uq->uq_key); 1155 umtxq_busy(&uq->uq_key); 1156 umtxq_insert(uq); 1157 umtxq_unbusy(&uq->uq_key); 1158 umtxq_unlock(&uq->uq_key); 1159 1160 /* 1161 * Set the contested bit so that a release in user space 1162 * knows to use the system call for unlock. If this fails 1163 * either some one else has acquired the lock or it has been 1164 * released. 1165 */ 1166 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1167 1168 /* The address was invalid. */ 1169 if (old == -1) { 1170 umtxq_lock(&uq->uq_key); 1171 umtxq_remove(uq); 1172 umtxq_unlock(&uq->uq_key); 1173 umtx_key_release(&uq->uq_key); 1174 return (EFAULT); 1175 } 1176 1177 /* 1178 * We set the contested bit, sleep. Otherwise the lock changed 1179 * and we need to retry or we lost a race to the thread 1180 * unlocking the umtx. 1181 */ 1182 umtxq_lock(&uq->uq_key); 1183 if (old == owner) 1184 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1185 NULL : &timo); 1186 umtxq_remove(uq); 1187 umtxq_unlock(&uq->uq_key); 1188 umtx_key_release(&uq->uq_key); 1189 1190 if (error == 0) 1191 error = thread_check_susp(td, false); 1192 } 1193 1194 if (timeout == NULL) { 1195 /* Mutex locking is restarted if it is interrupted. */ 1196 if (error == EINTR) 1197 error = ERESTART; 1198 } else { 1199 /* Timed-locking is not restarted. */ 1200 if (error == ERESTART) 1201 error = EINTR; 1202 } 1203 return (error); 1204 } 1205 1206 /* 1207 * Unlock a umtx object. 1208 */ 1209 static int 1210 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1211 { 1212 struct umtx_key key; 1213 uint32_t owner; 1214 uint32_t old; 1215 int error; 1216 int count; 1217 1218 /* 1219 * Make sure we own this mtx. 1220 */ 1221 owner = fuword32(m); 1222 if (owner == -1) 1223 return (EFAULT); 1224 1225 if ((owner & ~UMUTEX_CONTESTED) != id) 1226 return (EPERM); 1227 1228 /* This should be done in userland */ 1229 if ((owner & UMUTEX_CONTESTED) == 0) { 1230 old = casuword32(m, owner, UMUTEX_UNOWNED); 1231 if (old == -1) 1232 return (EFAULT); 1233 if (old == owner) 1234 return (0); 1235 owner = old; 1236 } 1237 1238 /* We should only ever be in here for contested locks */ 1239 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1240 &key)) != 0) 1241 return (error); 1242 1243 umtxq_lock(&key); 1244 umtxq_busy(&key); 1245 count = umtxq_count(&key); 1246 umtxq_unlock(&key); 1247 1248 /* 1249 * When unlocking the umtx, it must be marked as unowned if 1250 * there is zero or one thread only waiting for it. 1251 * Otherwise, it must be marked as contested. 1252 */ 1253 old = casuword32(m, owner, 1254 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1255 umtxq_lock(&key); 1256 umtxq_signal(&key,1); 1257 umtxq_unbusy(&key); 1258 umtxq_unlock(&key); 1259 umtx_key_release(&key); 1260 if (old == -1) 1261 return (EFAULT); 1262 if (old != owner) 1263 return (EINVAL); 1264 return (0); 1265 } 1266 #endif /* COMPAT_FREEBSD32 */ 1267 #endif /* COMPAT_FREEBSD10 */ 1268 1269 /* 1270 * Fetch and compare value, sleep on the address if value is not changed. 1271 */ 1272 static int 1273 do_wait(struct thread *td, void *addr, u_long id, 1274 struct _umtx_time *timeout, int compat32, int is_private) 1275 { 1276 struct umtx_abs_timeout timo; 1277 struct umtx_q *uq; 1278 u_long tmp; 1279 uint32_t tmp32; 1280 int error = 0; 1281 1282 uq = td->td_umtxq; 1283 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1284 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1285 return (error); 1286 1287 if (timeout != NULL) 1288 umtx_abs_timeout_init2(&timo, timeout); 1289 1290 umtxq_lock(&uq->uq_key); 1291 umtxq_insert(uq); 1292 umtxq_unlock(&uq->uq_key); 1293 if (compat32 == 0) { 1294 error = fueword(addr, &tmp); 1295 if (error != 0) 1296 error = EFAULT; 1297 } else { 1298 error = fueword32(addr, &tmp32); 1299 if (error == 0) 1300 tmp = tmp32; 1301 else 1302 error = EFAULT; 1303 } 1304 umtxq_lock(&uq->uq_key); 1305 if (error == 0) { 1306 if (tmp == id) 1307 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1308 NULL : &timo); 1309 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1310 error = 0; 1311 else 1312 umtxq_remove(uq); 1313 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1314 umtxq_remove(uq); 1315 } 1316 umtxq_unlock(&uq->uq_key); 1317 umtx_key_release(&uq->uq_key); 1318 if (error == ERESTART) 1319 error = EINTR; 1320 return (error); 1321 } 1322 1323 /* 1324 * Wake up threads sleeping on the specified address. 1325 */ 1326 int 1327 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1328 { 1329 struct umtx_key key; 1330 int ret; 1331 1332 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1333 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1334 return (ret); 1335 umtxq_lock(&key); 1336 umtxq_signal(&key, n_wake); 1337 umtxq_unlock(&key); 1338 umtx_key_release(&key); 1339 return (0); 1340 } 1341 1342 /* 1343 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1344 */ 1345 static int 1346 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1347 struct _umtx_time *timeout, int mode) 1348 { 1349 struct umtx_abs_timeout timo; 1350 struct umtx_q *uq; 1351 uint32_t owner, old, id; 1352 int error, rv; 1353 1354 id = td->td_tid; 1355 uq = td->td_umtxq; 1356 error = 0; 1357 if (timeout != NULL) 1358 umtx_abs_timeout_init2(&timo, timeout); 1359 1360 /* 1361 * Care must be exercised when dealing with umtx structure. It 1362 * can fault on any access. 1363 */ 1364 for (;;) { 1365 rv = fueword32(&m->m_owner, &owner); 1366 if (rv == -1) 1367 return (EFAULT); 1368 if (mode == _UMUTEX_WAIT) { 1369 if (owner == UMUTEX_UNOWNED || 1370 owner == UMUTEX_CONTESTED || 1371 owner == UMUTEX_RB_OWNERDEAD || 1372 owner == UMUTEX_RB_NOTRECOV) 1373 return (0); 1374 } else { 1375 /* 1376 * Robust mutex terminated. Kernel duty is to 1377 * return EOWNERDEAD to the userspace. The 1378 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1379 * by the common userspace code. 1380 */ 1381 if (owner == UMUTEX_RB_OWNERDEAD) { 1382 rv = casueword32(&m->m_owner, 1383 UMUTEX_RB_OWNERDEAD, &owner, 1384 id | UMUTEX_CONTESTED); 1385 if (rv == -1) 1386 return (EFAULT); 1387 if (rv == 0) { 1388 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1389 return (EOWNERDEAD); /* success */ 1390 } 1391 MPASS(rv == 1); 1392 rv = thread_check_susp(td, false); 1393 if (rv != 0) 1394 return (rv); 1395 continue; 1396 } 1397 if (owner == UMUTEX_RB_NOTRECOV) 1398 return (ENOTRECOVERABLE); 1399 1400 /* 1401 * Try the uncontested case. This should be 1402 * done in userland. 1403 */ 1404 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1405 &owner, id); 1406 /* The address was invalid. */ 1407 if (rv == -1) 1408 return (EFAULT); 1409 1410 /* The acquire succeeded. */ 1411 if (rv == 0) { 1412 MPASS(owner == UMUTEX_UNOWNED); 1413 return (0); 1414 } 1415 1416 /* 1417 * If no one owns it but it is contested try 1418 * to acquire it. 1419 */ 1420 MPASS(rv == 1); 1421 if (owner == UMUTEX_CONTESTED) { 1422 rv = casueword32(&m->m_owner, 1423 UMUTEX_CONTESTED, &owner, 1424 id | UMUTEX_CONTESTED); 1425 /* The address was invalid. */ 1426 if (rv == -1) 1427 return (EFAULT); 1428 if (rv == 0) { 1429 MPASS(owner == UMUTEX_CONTESTED); 1430 return (0); 1431 } 1432 if (rv == 1) { 1433 rv = thread_check_susp(td, false); 1434 if (rv != 0) 1435 return (rv); 1436 } 1437 1438 /* 1439 * If this failed the lock has 1440 * changed, restart. 1441 */ 1442 continue; 1443 } 1444 1445 /* rv == 1 but not contested, likely store failure */ 1446 rv = thread_check_susp(td, false); 1447 if (rv != 0) 1448 return (rv); 1449 } 1450 1451 if (mode == _UMUTEX_TRY) 1452 return (EBUSY); 1453 1454 /* 1455 * If we caught a signal, we have retried and now 1456 * exit immediately. 1457 */ 1458 if (error != 0) 1459 return (error); 1460 1461 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1462 GET_SHARE(flags), &uq->uq_key)) != 0) 1463 return (error); 1464 1465 umtxq_lock(&uq->uq_key); 1466 umtxq_busy(&uq->uq_key); 1467 umtxq_insert(uq); 1468 umtxq_unlock(&uq->uq_key); 1469 1470 /* 1471 * Set the contested bit so that a release in user space 1472 * knows to use the system call for unlock. If this fails 1473 * either some one else has acquired the lock or it has been 1474 * released. 1475 */ 1476 rv = casueword32(&m->m_owner, owner, &old, 1477 owner | UMUTEX_CONTESTED); 1478 1479 /* The address was invalid or casueword failed to store. */ 1480 if (rv == -1 || rv == 1) { 1481 umtxq_lock(&uq->uq_key); 1482 umtxq_remove(uq); 1483 umtxq_unbusy(&uq->uq_key); 1484 umtxq_unlock(&uq->uq_key); 1485 umtx_key_release(&uq->uq_key); 1486 if (rv == -1) 1487 return (EFAULT); 1488 if (rv == 1) { 1489 rv = thread_check_susp(td, false); 1490 if (rv != 0) 1491 return (rv); 1492 } 1493 continue; 1494 } 1495 1496 /* 1497 * We set the contested bit, sleep. Otherwise the lock changed 1498 * and we need to retry or we lost a race to the thread 1499 * unlocking the umtx. 1500 */ 1501 umtxq_lock(&uq->uq_key); 1502 umtxq_unbusy(&uq->uq_key); 1503 MPASS(old == owner); 1504 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1505 NULL : &timo); 1506 umtxq_remove(uq); 1507 umtxq_unlock(&uq->uq_key); 1508 umtx_key_release(&uq->uq_key); 1509 1510 if (error == 0) 1511 error = thread_check_susp(td, false); 1512 } 1513 1514 return (0); 1515 } 1516 1517 /* 1518 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1519 */ 1520 static int 1521 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1522 { 1523 struct umtx_key key; 1524 uint32_t owner, old, id, newlock; 1525 int error, count; 1526 1527 id = td->td_tid; 1528 1529 again: 1530 /* 1531 * Make sure we own this mtx. 1532 */ 1533 error = fueword32(&m->m_owner, &owner); 1534 if (error == -1) 1535 return (EFAULT); 1536 1537 if ((owner & ~UMUTEX_CONTESTED) != id) 1538 return (EPERM); 1539 1540 newlock = umtx_unlock_val(flags, rb); 1541 if ((owner & UMUTEX_CONTESTED) == 0) { 1542 error = casueword32(&m->m_owner, owner, &old, newlock); 1543 if (error == -1) 1544 return (EFAULT); 1545 if (error == 1) { 1546 error = thread_check_susp(td, false); 1547 if (error != 0) 1548 return (error); 1549 goto again; 1550 } 1551 MPASS(old == owner); 1552 return (0); 1553 } 1554 1555 /* We should only ever be in here for contested locks */ 1556 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1557 &key)) != 0) 1558 return (error); 1559 1560 umtxq_lock(&key); 1561 umtxq_busy(&key); 1562 count = umtxq_count(&key); 1563 umtxq_unlock(&key); 1564 1565 /* 1566 * When unlocking the umtx, it must be marked as unowned if 1567 * there is zero or one thread only waiting for it. 1568 * Otherwise, it must be marked as contested. 1569 */ 1570 if (count > 1) 1571 newlock |= UMUTEX_CONTESTED; 1572 error = casueword32(&m->m_owner, owner, &old, newlock); 1573 umtxq_lock(&key); 1574 umtxq_signal(&key, 1); 1575 umtxq_unbusy(&key); 1576 umtxq_unlock(&key); 1577 umtx_key_release(&key); 1578 if (error == -1) 1579 return (EFAULT); 1580 if (error == 1) { 1581 if (old != owner) 1582 return (EINVAL); 1583 error = thread_check_susp(td, false); 1584 if (error != 0) 1585 return (error); 1586 goto again; 1587 } 1588 return (0); 1589 } 1590 1591 /* 1592 * Check if the mutex is available and wake up a waiter, 1593 * only for simple mutex. 1594 */ 1595 static int 1596 do_wake_umutex(struct thread *td, struct umutex *m) 1597 { 1598 struct umtx_key key; 1599 uint32_t owner; 1600 uint32_t flags; 1601 int error; 1602 int count; 1603 1604 again: 1605 error = fueword32(&m->m_owner, &owner); 1606 if (error == -1) 1607 return (EFAULT); 1608 1609 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1610 owner != UMUTEX_RB_NOTRECOV) 1611 return (0); 1612 1613 error = fueword32(&m->m_flags, &flags); 1614 if (error == -1) 1615 return (EFAULT); 1616 1617 /* We should only ever be in here for contested locks */ 1618 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1619 &key)) != 0) 1620 return (error); 1621 1622 umtxq_lock(&key); 1623 umtxq_busy(&key); 1624 count = umtxq_count(&key); 1625 umtxq_unlock(&key); 1626 1627 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1628 owner != UMUTEX_RB_NOTRECOV) { 1629 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1630 UMUTEX_UNOWNED); 1631 if (error == -1) { 1632 error = EFAULT; 1633 } else if (error == 1) { 1634 umtxq_lock(&key); 1635 umtxq_unbusy(&key); 1636 umtxq_unlock(&key); 1637 umtx_key_release(&key); 1638 error = thread_check_susp(td, false); 1639 if (error != 0) 1640 return (error); 1641 goto again; 1642 } 1643 } 1644 1645 umtxq_lock(&key); 1646 if (error == 0 && count != 0) { 1647 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1648 owner == UMUTEX_RB_OWNERDEAD || 1649 owner == UMUTEX_RB_NOTRECOV); 1650 umtxq_signal(&key, 1); 1651 } 1652 umtxq_unbusy(&key); 1653 umtxq_unlock(&key); 1654 umtx_key_release(&key); 1655 return (error); 1656 } 1657 1658 /* 1659 * Check if the mutex has waiters and tries to fix contention bit. 1660 */ 1661 static int 1662 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1663 { 1664 struct umtx_key key; 1665 uint32_t owner, old; 1666 int type; 1667 int error; 1668 int count; 1669 1670 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1671 UMUTEX_ROBUST)) { 1672 case 0: 1673 case UMUTEX_ROBUST: 1674 type = TYPE_NORMAL_UMUTEX; 1675 break; 1676 case UMUTEX_PRIO_INHERIT: 1677 type = TYPE_PI_UMUTEX; 1678 break; 1679 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1680 type = TYPE_PI_ROBUST_UMUTEX; 1681 break; 1682 case UMUTEX_PRIO_PROTECT: 1683 type = TYPE_PP_UMUTEX; 1684 break; 1685 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1686 type = TYPE_PP_ROBUST_UMUTEX; 1687 break; 1688 default: 1689 return (EINVAL); 1690 } 1691 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1692 return (error); 1693 1694 owner = 0; 1695 umtxq_lock(&key); 1696 umtxq_busy(&key); 1697 count = umtxq_count(&key); 1698 umtxq_unlock(&key); 1699 1700 error = fueword32(&m->m_owner, &owner); 1701 if (error == -1) 1702 error = EFAULT; 1703 1704 /* 1705 * Only repair contention bit if there is a waiter, this means 1706 * the mutex is still being referenced by userland code, 1707 * otherwise don't update any memory. 1708 */ 1709 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1710 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1711 error = casueword32(&m->m_owner, owner, &old, 1712 owner | UMUTEX_CONTESTED); 1713 if (error == -1) { 1714 error = EFAULT; 1715 break; 1716 } 1717 if (error == 0) { 1718 MPASS(old == owner); 1719 break; 1720 } 1721 owner = old; 1722 error = thread_check_susp(td, false); 1723 } 1724 1725 umtxq_lock(&key); 1726 if (error == EFAULT) { 1727 umtxq_signal(&key, INT_MAX); 1728 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1729 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1730 umtxq_signal(&key, 1); 1731 umtxq_unbusy(&key); 1732 umtxq_unlock(&key); 1733 umtx_key_release(&key); 1734 return (error); 1735 } 1736 1737 struct umtx_pi * 1738 umtx_pi_alloc(int flags) 1739 { 1740 struct umtx_pi *pi; 1741 1742 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1743 if (pi == NULL) 1744 return (NULL); 1745 1746 TAILQ_INIT(&pi->pi_blocked); 1747 atomic_add_int(&umtx_pi_allocated, 1); 1748 return (pi); 1749 } 1750 1751 void 1752 umtx_pi_free(struct umtx_pi *pi) 1753 { 1754 uma_zfree(umtx_pi_zone, pi); 1755 atomic_add_int(&umtx_pi_allocated, -1); 1756 } 1757 1758 /* 1759 * Adjust the thread's position on a pi_state after its priority has been 1760 * changed. 1761 */ 1762 static int 1763 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1764 { 1765 struct umtx_q *uq, *uq1, *uq2; 1766 struct thread *td1; 1767 1768 mtx_assert(&umtx_lock, MA_OWNED); 1769 if (pi == NULL) 1770 return (0); 1771 1772 uq = td->td_umtxq; 1773 1774 /* 1775 * Check if the thread needs to be moved on the blocked chain. 1776 * It needs to be moved if either its priority is lower than 1777 * the previous thread or higher than the next thread. 1778 */ 1779 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1780 uq2 = TAILQ_NEXT(uq, uq_lockq); 1781 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1782 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1783 /* 1784 * Remove thread from blocked chain and determine where 1785 * it should be moved to. 1786 */ 1787 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1788 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1789 td1 = uq1->uq_thread; 1790 MPASS(td1->td_proc->p_magic == P_MAGIC); 1791 if (UPRI(td1) > UPRI(td)) 1792 break; 1793 } 1794 1795 if (uq1 == NULL) 1796 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1797 else 1798 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1799 } 1800 return (1); 1801 } 1802 1803 static struct umtx_pi * 1804 umtx_pi_next(struct umtx_pi *pi) 1805 { 1806 struct umtx_q *uq_owner; 1807 1808 if (pi->pi_owner == NULL) 1809 return (NULL); 1810 uq_owner = pi->pi_owner->td_umtxq; 1811 if (uq_owner == NULL) 1812 return (NULL); 1813 return (uq_owner->uq_pi_blocked); 1814 } 1815 1816 /* 1817 * Floyd's Cycle-Finding Algorithm. 1818 */ 1819 static bool 1820 umtx_pi_check_loop(struct umtx_pi *pi) 1821 { 1822 struct umtx_pi *pi1; /* fast iterator */ 1823 1824 mtx_assert(&umtx_lock, MA_OWNED); 1825 if (pi == NULL) 1826 return (false); 1827 pi1 = pi; 1828 for (;;) { 1829 pi = umtx_pi_next(pi); 1830 if (pi == NULL) 1831 break; 1832 pi1 = umtx_pi_next(pi1); 1833 if (pi1 == NULL) 1834 break; 1835 pi1 = umtx_pi_next(pi1); 1836 if (pi1 == NULL) 1837 break; 1838 if (pi == pi1) 1839 return (true); 1840 } 1841 return (false); 1842 } 1843 1844 /* 1845 * Propagate priority when a thread is blocked on POSIX 1846 * PI mutex. 1847 */ 1848 static void 1849 umtx_propagate_priority(struct thread *td) 1850 { 1851 struct umtx_q *uq; 1852 struct umtx_pi *pi; 1853 int pri; 1854 1855 mtx_assert(&umtx_lock, MA_OWNED); 1856 pri = UPRI(td); 1857 uq = td->td_umtxq; 1858 pi = uq->uq_pi_blocked; 1859 if (pi == NULL) 1860 return; 1861 if (umtx_pi_check_loop(pi)) 1862 return; 1863 1864 for (;;) { 1865 td = pi->pi_owner; 1866 if (td == NULL || td == curthread) 1867 return; 1868 1869 MPASS(td->td_proc != NULL); 1870 MPASS(td->td_proc->p_magic == P_MAGIC); 1871 1872 thread_lock(td); 1873 if (td->td_lend_user_pri > pri) 1874 sched_lend_user_prio(td, pri); 1875 else { 1876 thread_unlock(td); 1877 break; 1878 } 1879 thread_unlock(td); 1880 1881 /* 1882 * Pick up the lock that td is blocked on. 1883 */ 1884 uq = td->td_umtxq; 1885 pi = uq->uq_pi_blocked; 1886 if (pi == NULL) 1887 break; 1888 /* Resort td on the list if needed. */ 1889 umtx_pi_adjust_thread(pi, td); 1890 } 1891 } 1892 1893 /* 1894 * Unpropagate priority for a PI mutex when a thread blocked on 1895 * it is interrupted by signal or resumed by others. 1896 */ 1897 static void 1898 umtx_repropagate_priority(struct umtx_pi *pi) 1899 { 1900 struct umtx_q *uq, *uq_owner; 1901 struct umtx_pi *pi2; 1902 int pri; 1903 1904 mtx_assert(&umtx_lock, MA_OWNED); 1905 1906 if (umtx_pi_check_loop(pi)) 1907 return; 1908 while (pi != NULL && pi->pi_owner != NULL) { 1909 pri = PRI_MAX; 1910 uq_owner = pi->pi_owner->td_umtxq; 1911 1912 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1913 uq = TAILQ_FIRST(&pi2->pi_blocked); 1914 if (uq != NULL) { 1915 if (pri > UPRI(uq->uq_thread)) 1916 pri = UPRI(uq->uq_thread); 1917 } 1918 } 1919 1920 if (pri > uq_owner->uq_inherited_pri) 1921 pri = uq_owner->uq_inherited_pri; 1922 thread_lock(pi->pi_owner); 1923 sched_lend_user_prio(pi->pi_owner, pri); 1924 thread_unlock(pi->pi_owner); 1925 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1926 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1927 } 1928 } 1929 1930 /* 1931 * Insert a PI mutex into owned list. 1932 */ 1933 static void 1934 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1935 { 1936 struct umtx_q *uq_owner; 1937 1938 uq_owner = owner->td_umtxq; 1939 mtx_assert(&umtx_lock, MA_OWNED); 1940 MPASS(pi->pi_owner == NULL); 1941 pi->pi_owner = owner; 1942 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1943 } 1944 1945 /* 1946 * Disown a PI mutex, and remove it from the owned list. 1947 */ 1948 static void 1949 umtx_pi_disown(struct umtx_pi *pi) 1950 { 1951 1952 mtx_assert(&umtx_lock, MA_OWNED); 1953 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1954 pi->pi_owner = NULL; 1955 } 1956 1957 /* 1958 * Claim ownership of a PI mutex. 1959 */ 1960 int 1961 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1962 { 1963 struct umtx_q *uq; 1964 int pri; 1965 1966 mtx_lock(&umtx_lock); 1967 if (pi->pi_owner == owner) { 1968 mtx_unlock(&umtx_lock); 1969 return (0); 1970 } 1971 1972 if (pi->pi_owner != NULL) { 1973 /* 1974 * userland may have already messed the mutex, sigh. 1975 */ 1976 mtx_unlock(&umtx_lock); 1977 return (EPERM); 1978 } 1979 umtx_pi_setowner(pi, owner); 1980 uq = TAILQ_FIRST(&pi->pi_blocked); 1981 if (uq != NULL) { 1982 pri = UPRI(uq->uq_thread); 1983 thread_lock(owner); 1984 if (pri < UPRI(owner)) 1985 sched_lend_user_prio(owner, pri); 1986 thread_unlock(owner); 1987 } 1988 mtx_unlock(&umtx_lock); 1989 return (0); 1990 } 1991 1992 /* 1993 * Adjust a thread's order position in its blocked PI mutex, 1994 * this may result new priority propagating process. 1995 */ 1996 void 1997 umtx_pi_adjust(struct thread *td, u_char oldpri) 1998 { 1999 struct umtx_q *uq; 2000 struct umtx_pi *pi; 2001 2002 uq = td->td_umtxq; 2003 mtx_lock(&umtx_lock); 2004 /* 2005 * Pick up the lock that td is blocked on. 2006 */ 2007 pi = uq->uq_pi_blocked; 2008 if (pi != NULL) { 2009 umtx_pi_adjust_thread(pi, td); 2010 umtx_repropagate_priority(pi); 2011 } 2012 mtx_unlock(&umtx_lock); 2013 } 2014 2015 /* 2016 * Sleep on a PI mutex. 2017 */ 2018 int 2019 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2020 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2021 { 2022 struct thread *td, *td1; 2023 struct umtx_q *uq1; 2024 int error, pri; 2025 #ifdef INVARIANTS 2026 struct umtxq_chain *uc; 2027 2028 uc = umtxq_getchain(&pi->pi_key); 2029 #endif 2030 error = 0; 2031 td = uq->uq_thread; 2032 KASSERT(td == curthread, ("inconsistent uq_thread")); 2033 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2034 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2035 umtxq_insert(uq); 2036 mtx_lock(&umtx_lock); 2037 if (pi->pi_owner == NULL) { 2038 mtx_unlock(&umtx_lock); 2039 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2040 mtx_lock(&umtx_lock); 2041 if (td1 != NULL) { 2042 if (pi->pi_owner == NULL) 2043 umtx_pi_setowner(pi, td1); 2044 PROC_UNLOCK(td1->td_proc); 2045 } 2046 } 2047 2048 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2049 pri = UPRI(uq1->uq_thread); 2050 if (pri > UPRI(td)) 2051 break; 2052 } 2053 2054 if (uq1 != NULL) 2055 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2056 else 2057 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2058 2059 uq->uq_pi_blocked = pi; 2060 thread_lock(td); 2061 td->td_flags |= TDF_UPIBLOCKED; 2062 thread_unlock(td); 2063 umtx_propagate_priority(td); 2064 mtx_unlock(&umtx_lock); 2065 umtxq_unbusy(&uq->uq_key); 2066 2067 error = umtxq_sleep(uq, wmesg, timo); 2068 umtxq_remove(uq); 2069 2070 mtx_lock(&umtx_lock); 2071 uq->uq_pi_blocked = NULL; 2072 thread_lock(td); 2073 td->td_flags &= ~TDF_UPIBLOCKED; 2074 thread_unlock(td); 2075 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2076 umtx_repropagate_priority(pi); 2077 mtx_unlock(&umtx_lock); 2078 umtxq_unlock(&uq->uq_key); 2079 2080 return (error); 2081 } 2082 2083 /* 2084 * Add reference count for a PI mutex. 2085 */ 2086 void 2087 umtx_pi_ref(struct umtx_pi *pi) 2088 { 2089 2090 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2091 pi->pi_refcount++; 2092 } 2093 2094 /* 2095 * Decrease reference count for a PI mutex, if the counter 2096 * is decreased to zero, its memory space is freed. 2097 */ 2098 void 2099 umtx_pi_unref(struct umtx_pi *pi) 2100 { 2101 struct umtxq_chain *uc; 2102 2103 uc = umtxq_getchain(&pi->pi_key); 2104 UMTXQ_LOCKED_ASSERT(uc); 2105 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2106 if (--pi->pi_refcount == 0) { 2107 mtx_lock(&umtx_lock); 2108 if (pi->pi_owner != NULL) 2109 umtx_pi_disown(pi); 2110 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2111 ("blocked queue not empty")); 2112 mtx_unlock(&umtx_lock); 2113 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2114 umtx_pi_free(pi); 2115 } 2116 } 2117 2118 /* 2119 * Find a PI mutex in hash table. 2120 */ 2121 struct umtx_pi * 2122 umtx_pi_lookup(struct umtx_key *key) 2123 { 2124 struct umtxq_chain *uc; 2125 struct umtx_pi *pi; 2126 2127 uc = umtxq_getchain(key); 2128 UMTXQ_LOCKED_ASSERT(uc); 2129 2130 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2131 if (umtx_key_match(&pi->pi_key, key)) { 2132 return (pi); 2133 } 2134 } 2135 return (NULL); 2136 } 2137 2138 /* 2139 * Insert a PI mutex into hash table. 2140 */ 2141 void 2142 umtx_pi_insert(struct umtx_pi *pi) 2143 { 2144 struct umtxq_chain *uc; 2145 2146 uc = umtxq_getchain(&pi->pi_key); 2147 UMTXQ_LOCKED_ASSERT(uc); 2148 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2149 } 2150 2151 /* 2152 * Drop a PI mutex and wakeup a top waiter. 2153 */ 2154 int 2155 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2156 { 2157 struct umtx_q *uq_first, *uq_first2, *uq_me; 2158 struct umtx_pi *pi, *pi2; 2159 int pri; 2160 2161 UMTXQ_ASSERT_LOCKED_BUSY(key); 2162 *count = umtxq_count_pi(key, &uq_first); 2163 if (uq_first != NULL) { 2164 mtx_lock(&umtx_lock); 2165 pi = uq_first->uq_pi_blocked; 2166 KASSERT(pi != NULL, ("pi == NULL?")); 2167 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2168 mtx_unlock(&umtx_lock); 2169 /* userland messed the mutex */ 2170 return (EPERM); 2171 } 2172 uq_me = td->td_umtxq; 2173 if (pi->pi_owner == td) 2174 umtx_pi_disown(pi); 2175 /* get highest priority thread which is still sleeping. */ 2176 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2177 while (uq_first != NULL && 2178 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2179 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2180 } 2181 pri = PRI_MAX; 2182 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2183 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2184 if (uq_first2 != NULL) { 2185 if (pri > UPRI(uq_first2->uq_thread)) 2186 pri = UPRI(uq_first2->uq_thread); 2187 } 2188 } 2189 thread_lock(td); 2190 sched_lend_user_prio(td, pri); 2191 thread_unlock(td); 2192 mtx_unlock(&umtx_lock); 2193 if (uq_first) 2194 umtxq_signal_thread(uq_first); 2195 } else { 2196 pi = umtx_pi_lookup(key); 2197 /* 2198 * A umtx_pi can exist if a signal or timeout removed the 2199 * last waiter from the umtxq, but there is still 2200 * a thread in do_lock_pi() holding the umtx_pi. 2201 */ 2202 if (pi != NULL) { 2203 /* 2204 * The umtx_pi can be unowned, such as when a thread 2205 * has just entered do_lock_pi(), allocated the 2206 * umtx_pi, and unlocked the umtxq. 2207 * If the current thread owns it, it must disown it. 2208 */ 2209 mtx_lock(&umtx_lock); 2210 if (pi->pi_owner == td) 2211 umtx_pi_disown(pi); 2212 mtx_unlock(&umtx_lock); 2213 } 2214 } 2215 return (0); 2216 } 2217 2218 /* 2219 * Lock a PI mutex. 2220 */ 2221 static int 2222 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2223 struct _umtx_time *timeout, int try) 2224 { 2225 struct umtx_abs_timeout timo; 2226 struct umtx_q *uq; 2227 struct umtx_pi *pi, *new_pi; 2228 uint32_t id, old_owner, owner, old; 2229 int error, rv; 2230 2231 id = td->td_tid; 2232 uq = td->td_umtxq; 2233 2234 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2235 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2236 &uq->uq_key)) != 0) 2237 return (error); 2238 2239 if (timeout != NULL) 2240 umtx_abs_timeout_init2(&timo, timeout); 2241 2242 umtxq_lock(&uq->uq_key); 2243 pi = umtx_pi_lookup(&uq->uq_key); 2244 if (pi == NULL) { 2245 new_pi = umtx_pi_alloc(M_NOWAIT); 2246 if (new_pi == NULL) { 2247 umtxq_unlock(&uq->uq_key); 2248 new_pi = umtx_pi_alloc(M_WAITOK); 2249 umtxq_lock(&uq->uq_key); 2250 pi = umtx_pi_lookup(&uq->uq_key); 2251 if (pi != NULL) { 2252 umtx_pi_free(new_pi); 2253 new_pi = NULL; 2254 } 2255 } 2256 if (new_pi != NULL) { 2257 new_pi->pi_key = uq->uq_key; 2258 umtx_pi_insert(new_pi); 2259 pi = new_pi; 2260 } 2261 } 2262 umtx_pi_ref(pi); 2263 umtxq_unlock(&uq->uq_key); 2264 2265 /* 2266 * Care must be exercised when dealing with umtx structure. It 2267 * can fault on any access. 2268 */ 2269 for (;;) { 2270 /* 2271 * Try the uncontested case. This should be done in userland. 2272 */ 2273 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2274 /* The address was invalid. */ 2275 if (rv == -1) { 2276 error = EFAULT; 2277 break; 2278 } 2279 /* The acquire succeeded. */ 2280 if (rv == 0) { 2281 MPASS(owner == UMUTEX_UNOWNED); 2282 error = 0; 2283 break; 2284 } 2285 2286 if (owner == UMUTEX_RB_NOTRECOV) { 2287 error = ENOTRECOVERABLE; 2288 break; 2289 } 2290 2291 /* 2292 * Nobody owns it, but the acquire failed. This can happen 2293 * with ll/sc atomics. 2294 */ 2295 if (owner == UMUTEX_UNOWNED) { 2296 error = thread_check_susp(td, true); 2297 if (error != 0) 2298 break; 2299 continue; 2300 } 2301 2302 /* 2303 * Avoid overwriting a possible error from sleep due 2304 * to the pending signal with suspension check result. 2305 */ 2306 if (error == 0) { 2307 error = thread_check_susp(td, true); 2308 if (error != 0) 2309 break; 2310 } 2311 2312 /* If no one owns it but it is contested try to acquire it. */ 2313 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2314 old_owner = owner; 2315 rv = casueword32(&m->m_owner, owner, &owner, 2316 id | UMUTEX_CONTESTED); 2317 /* The address was invalid. */ 2318 if (rv == -1) { 2319 error = EFAULT; 2320 break; 2321 } 2322 if (rv == 1) { 2323 if (error == 0) { 2324 error = thread_check_susp(td, true); 2325 if (error != 0) 2326 break; 2327 } 2328 2329 /* 2330 * If this failed the lock could 2331 * changed, restart. 2332 */ 2333 continue; 2334 } 2335 2336 MPASS(rv == 0); 2337 MPASS(owner == old_owner); 2338 umtxq_lock(&uq->uq_key); 2339 umtxq_busy(&uq->uq_key); 2340 error = umtx_pi_claim(pi, td); 2341 umtxq_unbusy(&uq->uq_key); 2342 umtxq_unlock(&uq->uq_key); 2343 if (error != 0) { 2344 /* 2345 * Since we're going to return an 2346 * error, restore the m_owner to its 2347 * previous, unowned state to avoid 2348 * compounding the problem. 2349 */ 2350 (void)casuword32(&m->m_owner, 2351 id | UMUTEX_CONTESTED, old_owner); 2352 } 2353 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2354 error = EOWNERDEAD; 2355 break; 2356 } 2357 2358 if ((owner & ~UMUTEX_CONTESTED) == id) { 2359 error = EDEADLK; 2360 break; 2361 } 2362 2363 if (try != 0) { 2364 error = EBUSY; 2365 break; 2366 } 2367 2368 /* 2369 * If we caught a signal, we have retried and now 2370 * exit immediately. 2371 */ 2372 if (error != 0) 2373 break; 2374 2375 umtxq_lock(&uq->uq_key); 2376 umtxq_busy(&uq->uq_key); 2377 umtxq_unlock(&uq->uq_key); 2378 2379 /* 2380 * Set the contested bit so that a release in user space 2381 * knows to use the system call for unlock. If this fails 2382 * either some one else has acquired the lock or it has been 2383 * released. 2384 */ 2385 rv = casueword32(&m->m_owner, owner, &old, owner | 2386 UMUTEX_CONTESTED); 2387 2388 /* The address was invalid. */ 2389 if (rv == -1) { 2390 umtxq_unbusy_unlocked(&uq->uq_key); 2391 error = EFAULT; 2392 break; 2393 } 2394 if (rv == 1) { 2395 umtxq_unbusy_unlocked(&uq->uq_key); 2396 error = thread_check_susp(td, true); 2397 if (error != 0) 2398 break; 2399 2400 /* 2401 * The lock changed and we need to retry or we 2402 * lost a race to the thread unlocking the 2403 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2404 * value for owner is impossible there. 2405 */ 2406 continue; 2407 } 2408 2409 umtxq_lock(&uq->uq_key); 2410 2411 /* We set the contested bit, sleep. */ 2412 MPASS(old == owner); 2413 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2414 "umtxpi", timeout == NULL ? NULL : &timo, 2415 (flags & USYNC_PROCESS_SHARED) != 0); 2416 if (error != 0) 2417 continue; 2418 2419 error = thread_check_susp(td, false); 2420 if (error != 0) 2421 break; 2422 } 2423 2424 umtxq_lock(&uq->uq_key); 2425 umtx_pi_unref(pi); 2426 umtxq_unlock(&uq->uq_key); 2427 2428 umtx_key_release(&uq->uq_key); 2429 return (error); 2430 } 2431 2432 /* 2433 * Unlock a PI mutex. 2434 */ 2435 static int 2436 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2437 { 2438 struct umtx_key key; 2439 uint32_t id, new_owner, old, owner; 2440 int count, error; 2441 2442 id = td->td_tid; 2443 2444 usrloop: 2445 /* 2446 * Make sure we own this mtx. 2447 */ 2448 error = fueword32(&m->m_owner, &owner); 2449 if (error == -1) 2450 return (EFAULT); 2451 2452 if ((owner & ~UMUTEX_CONTESTED) != id) 2453 return (EPERM); 2454 2455 new_owner = umtx_unlock_val(flags, rb); 2456 2457 /* This should be done in userland */ 2458 if ((owner & UMUTEX_CONTESTED) == 0) { 2459 error = casueword32(&m->m_owner, owner, &old, new_owner); 2460 if (error == -1) 2461 return (EFAULT); 2462 if (error == 1) { 2463 error = thread_check_susp(td, true); 2464 if (error != 0) 2465 return (error); 2466 goto usrloop; 2467 } 2468 if (old == owner) 2469 return (0); 2470 owner = old; 2471 } 2472 2473 /* We should only ever be in here for contested locks */ 2474 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2475 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2476 &key)) != 0) 2477 return (error); 2478 2479 umtxq_lock(&key); 2480 umtxq_busy(&key); 2481 error = umtx_pi_drop(td, &key, rb, &count); 2482 if (error != 0) { 2483 umtxq_unbusy(&key); 2484 umtxq_unlock(&key); 2485 umtx_key_release(&key); 2486 /* userland messed the mutex */ 2487 return (error); 2488 } 2489 umtxq_unlock(&key); 2490 2491 /* 2492 * When unlocking the umtx, it must be marked as unowned if 2493 * there is zero or one thread only waiting for it. 2494 * Otherwise, it must be marked as contested. 2495 */ 2496 2497 if (count > 1) 2498 new_owner |= UMUTEX_CONTESTED; 2499 again: 2500 error = casueword32(&m->m_owner, owner, &old, new_owner); 2501 if (error == 1) { 2502 error = thread_check_susp(td, false); 2503 if (error == 0) 2504 goto again; 2505 } 2506 umtxq_unbusy_unlocked(&key); 2507 umtx_key_release(&key); 2508 if (error == -1) 2509 return (EFAULT); 2510 if (error == 0 && old != owner) 2511 return (EINVAL); 2512 return (error); 2513 } 2514 2515 /* 2516 * Lock a PP mutex. 2517 */ 2518 static int 2519 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2520 struct _umtx_time *timeout, int try) 2521 { 2522 struct umtx_abs_timeout timo; 2523 struct umtx_q *uq, *uq2; 2524 struct umtx_pi *pi; 2525 uint32_t ceiling; 2526 uint32_t owner, id; 2527 int error, pri, old_inherited_pri, new_pri, rv; 2528 bool su; 2529 2530 id = td->td_tid; 2531 uq = td->td_umtxq; 2532 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2533 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2534 &uq->uq_key)) != 0) 2535 return (error); 2536 2537 if (timeout != NULL) 2538 umtx_abs_timeout_init2(&timo, timeout); 2539 2540 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2541 for (;;) { 2542 old_inherited_pri = uq->uq_inherited_pri; 2543 umtxq_lock(&uq->uq_key); 2544 umtxq_busy(&uq->uq_key); 2545 umtxq_unlock(&uq->uq_key); 2546 2547 rv = fueword32(&m->m_ceilings[0], &ceiling); 2548 if (rv == -1) { 2549 error = EFAULT; 2550 goto out; 2551 } 2552 ceiling = RTP_PRIO_MAX - ceiling; 2553 if (ceiling > RTP_PRIO_MAX) { 2554 error = EINVAL; 2555 goto out; 2556 } 2557 new_pri = PRI_MIN_REALTIME + ceiling; 2558 2559 if (td->td_base_user_pri < new_pri) { 2560 error = EINVAL; 2561 goto out; 2562 } 2563 if (su) { 2564 mtx_lock(&umtx_lock); 2565 if (new_pri < uq->uq_inherited_pri) { 2566 uq->uq_inherited_pri = new_pri; 2567 thread_lock(td); 2568 if (new_pri < UPRI(td)) 2569 sched_lend_user_prio(td, new_pri); 2570 thread_unlock(td); 2571 } 2572 mtx_unlock(&umtx_lock); 2573 } 2574 2575 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2576 id | UMUTEX_CONTESTED); 2577 /* The address was invalid. */ 2578 if (rv == -1) { 2579 error = EFAULT; 2580 break; 2581 } 2582 if (rv == 0) { 2583 MPASS(owner == UMUTEX_CONTESTED); 2584 error = 0; 2585 break; 2586 } 2587 /* rv == 1 */ 2588 if (owner == UMUTEX_RB_OWNERDEAD) { 2589 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2590 &owner, id | UMUTEX_CONTESTED); 2591 if (rv == -1) { 2592 error = EFAULT; 2593 break; 2594 } 2595 if (rv == 0) { 2596 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2597 error = EOWNERDEAD; /* success */ 2598 break; 2599 } 2600 2601 /* 2602 * rv == 1, only check for suspension if we 2603 * did not already catched a signal. If we 2604 * get an error from the check, the same 2605 * condition is checked by the umtxq_sleep() 2606 * call below, so we should obliterate the 2607 * error to not skip the last loop iteration. 2608 */ 2609 if (error == 0) { 2610 error = thread_check_susp(td, false); 2611 if (error == 0 && try == 0) { 2612 umtxq_unbusy_unlocked(&uq->uq_key); 2613 continue; 2614 } 2615 error = 0; 2616 } 2617 } else if (owner == UMUTEX_RB_NOTRECOV) { 2618 error = ENOTRECOVERABLE; 2619 } 2620 2621 if (try != 0) 2622 error = EBUSY; 2623 2624 /* 2625 * If we caught a signal, we have retried and now 2626 * exit immediately. 2627 */ 2628 if (error != 0) 2629 break; 2630 2631 umtxq_lock(&uq->uq_key); 2632 umtxq_insert(uq); 2633 umtxq_unbusy(&uq->uq_key); 2634 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2635 NULL : &timo); 2636 umtxq_remove(uq); 2637 umtxq_unlock(&uq->uq_key); 2638 2639 mtx_lock(&umtx_lock); 2640 uq->uq_inherited_pri = old_inherited_pri; 2641 pri = PRI_MAX; 2642 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2643 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2644 if (uq2 != NULL) { 2645 if (pri > UPRI(uq2->uq_thread)) 2646 pri = UPRI(uq2->uq_thread); 2647 } 2648 } 2649 if (pri > uq->uq_inherited_pri) 2650 pri = uq->uq_inherited_pri; 2651 thread_lock(td); 2652 sched_lend_user_prio(td, pri); 2653 thread_unlock(td); 2654 mtx_unlock(&umtx_lock); 2655 } 2656 2657 if (error != 0 && error != EOWNERDEAD) { 2658 mtx_lock(&umtx_lock); 2659 uq->uq_inherited_pri = old_inherited_pri; 2660 pri = PRI_MAX; 2661 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2662 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2663 if (uq2 != NULL) { 2664 if (pri > UPRI(uq2->uq_thread)) 2665 pri = UPRI(uq2->uq_thread); 2666 } 2667 } 2668 if (pri > uq->uq_inherited_pri) 2669 pri = uq->uq_inherited_pri; 2670 thread_lock(td); 2671 sched_lend_user_prio(td, pri); 2672 thread_unlock(td); 2673 mtx_unlock(&umtx_lock); 2674 } 2675 2676 out: 2677 umtxq_unbusy_unlocked(&uq->uq_key); 2678 umtx_key_release(&uq->uq_key); 2679 return (error); 2680 } 2681 2682 /* 2683 * Unlock a PP mutex. 2684 */ 2685 static int 2686 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2687 { 2688 struct umtx_key key; 2689 struct umtx_q *uq, *uq2; 2690 struct umtx_pi *pi; 2691 uint32_t id, owner, rceiling; 2692 int error, pri, new_inherited_pri; 2693 bool su; 2694 2695 id = td->td_tid; 2696 uq = td->td_umtxq; 2697 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2698 2699 /* 2700 * Make sure we own this mtx. 2701 */ 2702 error = fueword32(&m->m_owner, &owner); 2703 if (error == -1) 2704 return (EFAULT); 2705 2706 if ((owner & ~UMUTEX_CONTESTED) != id) 2707 return (EPERM); 2708 2709 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2710 if (error != 0) 2711 return (error); 2712 2713 if (rceiling == -1) 2714 new_inherited_pri = PRI_MAX; 2715 else { 2716 rceiling = RTP_PRIO_MAX - rceiling; 2717 if (rceiling > RTP_PRIO_MAX) 2718 return (EINVAL); 2719 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2720 } 2721 2722 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2723 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2724 &key)) != 0) 2725 return (error); 2726 umtxq_lock(&key); 2727 umtxq_busy(&key); 2728 umtxq_unlock(&key); 2729 /* 2730 * For priority protected mutex, always set unlocked state 2731 * to UMUTEX_CONTESTED, so that userland always enters kernel 2732 * to lock the mutex, it is necessary because thread priority 2733 * has to be adjusted for such mutex. 2734 */ 2735 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2736 UMUTEX_CONTESTED); 2737 2738 umtxq_lock(&key); 2739 if (error == 0) 2740 umtxq_signal(&key, 1); 2741 umtxq_unbusy(&key); 2742 umtxq_unlock(&key); 2743 2744 if (error == -1) 2745 error = EFAULT; 2746 else { 2747 mtx_lock(&umtx_lock); 2748 if (su || new_inherited_pri == PRI_MAX) 2749 uq->uq_inherited_pri = new_inherited_pri; 2750 pri = PRI_MAX; 2751 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2752 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2753 if (uq2 != NULL) { 2754 if (pri > UPRI(uq2->uq_thread)) 2755 pri = UPRI(uq2->uq_thread); 2756 } 2757 } 2758 if (pri > uq->uq_inherited_pri) 2759 pri = uq->uq_inherited_pri; 2760 thread_lock(td); 2761 sched_lend_user_prio(td, pri); 2762 thread_unlock(td); 2763 mtx_unlock(&umtx_lock); 2764 } 2765 umtx_key_release(&key); 2766 return (error); 2767 } 2768 2769 static int 2770 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2771 uint32_t *old_ceiling) 2772 { 2773 struct umtx_q *uq; 2774 uint32_t flags, id, owner, save_ceiling; 2775 int error, rv, rv1; 2776 2777 error = fueword32(&m->m_flags, &flags); 2778 if (error == -1) 2779 return (EFAULT); 2780 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2781 return (EINVAL); 2782 if (ceiling > RTP_PRIO_MAX) 2783 return (EINVAL); 2784 id = td->td_tid; 2785 uq = td->td_umtxq; 2786 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2787 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2788 &uq->uq_key)) != 0) 2789 return (error); 2790 for (;;) { 2791 umtxq_lock(&uq->uq_key); 2792 umtxq_busy(&uq->uq_key); 2793 umtxq_unlock(&uq->uq_key); 2794 2795 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2796 if (rv == -1) { 2797 error = EFAULT; 2798 break; 2799 } 2800 2801 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2802 id | UMUTEX_CONTESTED); 2803 if (rv == -1) { 2804 error = EFAULT; 2805 break; 2806 } 2807 2808 if (rv == 0) { 2809 MPASS(owner == UMUTEX_CONTESTED); 2810 rv = suword32(&m->m_ceilings[0], ceiling); 2811 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2812 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2813 break; 2814 } 2815 2816 if ((owner & ~UMUTEX_CONTESTED) == id) { 2817 rv = suword32(&m->m_ceilings[0], ceiling); 2818 error = rv == 0 ? 0 : EFAULT; 2819 break; 2820 } 2821 2822 if (owner == UMUTEX_RB_OWNERDEAD) { 2823 error = EOWNERDEAD; 2824 break; 2825 } else if (owner == UMUTEX_RB_NOTRECOV) { 2826 error = ENOTRECOVERABLE; 2827 break; 2828 } 2829 2830 /* 2831 * If we caught a signal, we have retried and now 2832 * exit immediately. 2833 */ 2834 if (error != 0) 2835 break; 2836 2837 /* 2838 * We set the contested bit, sleep. Otherwise the lock changed 2839 * and we need to retry or we lost a race to the thread 2840 * unlocking the umtx. 2841 */ 2842 umtxq_lock(&uq->uq_key); 2843 umtxq_insert(uq); 2844 umtxq_unbusy(&uq->uq_key); 2845 error = umtxq_sleep(uq, "umtxpp", NULL); 2846 umtxq_remove(uq); 2847 umtxq_unlock(&uq->uq_key); 2848 } 2849 umtxq_lock(&uq->uq_key); 2850 if (error == 0) 2851 umtxq_signal(&uq->uq_key, INT_MAX); 2852 umtxq_unbusy(&uq->uq_key); 2853 umtxq_unlock(&uq->uq_key); 2854 umtx_key_release(&uq->uq_key); 2855 if (error == 0 && old_ceiling != NULL) { 2856 rv = suword32(old_ceiling, save_ceiling); 2857 error = rv == 0 ? 0 : EFAULT; 2858 } 2859 return (error); 2860 } 2861 2862 /* 2863 * Lock a userland POSIX mutex. 2864 */ 2865 static int 2866 do_lock_umutex(struct thread *td, struct umutex *m, 2867 struct _umtx_time *timeout, int mode) 2868 { 2869 uint32_t flags; 2870 int error; 2871 2872 error = fueword32(&m->m_flags, &flags); 2873 if (error == -1) 2874 return (EFAULT); 2875 2876 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2877 case 0: 2878 error = do_lock_normal(td, m, flags, timeout, mode); 2879 break; 2880 case UMUTEX_PRIO_INHERIT: 2881 error = do_lock_pi(td, m, flags, timeout, mode); 2882 break; 2883 case UMUTEX_PRIO_PROTECT: 2884 error = do_lock_pp(td, m, flags, timeout, mode); 2885 break; 2886 default: 2887 return (EINVAL); 2888 } 2889 if (timeout == NULL) { 2890 if (error == EINTR && mode != _UMUTEX_WAIT) 2891 error = ERESTART; 2892 } else { 2893 /* Timed-locking is not restarted. */ 2894 if (error == ERESTART) 2895 error = EINTR; 2896 } 2897 return (error); 2898 } 2899 2900 /* 2901 * Unlock a userland POSIX mutex. 2902 */ 2903 static int 2904 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2905 { 2906 uint32_t flags; 2907 int error; 2908 2909 error = fueword32(&m->m_flags, &flags); 2910 if (error == -1) 2911 return (EFAULT); 2912 2913 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2914 case 0: 2915 return (do_unlock_normal(td, m, flags, rb)); 2916 case UMUTEX_PRIO_INHERIT: 2917 return (do_unlock_pi(td, m, flags, rb)); 2918 case UMUTEX_PRIO_PROTECT: 2919 return (do_unlock_pp(td, m, flags, rb)); 2920 } 2921 2922 return (EINVAL); 2923 } 2924 2925 static int 2926 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2927 struct timespec *timeout, u_long wflags) 2928 { 2929 struct umtx_abs_timeout timo; 2930 struct umtx_q *uq; 2931 uint32_t flags, clockid, hasw; 2932 int error; 2933 2934 uq = td->td_umtxq; 2935 error = fueword32(&cv->c_flags, &flags); 2936 if (error == -1) 2937 return (EFAULT); 2938 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2939 if (error != 0) 2940 return (error); 2941 2942 if ((wflags & CVWAIT_CLOCKID) != 0) { 2943 error = fueword32(&cv->c_clockid, &clockid); 2944 if (error == -1) { 2945 umtx_key_release(&uq->uq_key); 2946 return (EFAULT); 2947 } 2948 if (clockid < CLOCK_REALTIME || 2949 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2950 /* hmm, only HW clock id will work. */ 2951 umtx_key_release(&uq->uq_key); 2952 return (EINVAL); 2953 } 2954 } else { 2955 clockid = CLOCK_REALTIME; 2956 } 2957 2958 umtxq_lock(&uq->uq_key); 2959 umtxq_busy(&uq->uq_key); 2960 umtxq_insert(uq); 2961 umtxq_unlock(&uq->uq_key); 2962 2963 /* 2964 * Set c_has_waiters to 1 before releasing user mutex, also 2965 * don't modify cache line when unnecessary. 2966 */ 2967 error = fueword32(&cv->c_has_waiters, &hasw); 2968 if (error == 0 && hasw == 0) 2969 error = suword32(&cv->c_has_waiters, 1); 2970 if (error != 0) { 2971 umtxq_lock(&uq->uq_key); 2972 umtxq_remove(uq); 2973 umtxq_unbusy(&uq->uq_key); 2974 error = EFAULT; 2975 goto out; 2976 } 2977 2978 umtxq_unbusy_unlocked(&uq->uq_key); 2979 2980 error = do_unlock_umutex(td, m, false); 2981 2982 if (timeout != NULL) 2983 umtx_abs_timeout_init(&timo, clockid, 2984 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2985 2986 umtxq_lock(&uq->uq_key); 2987 if (error == 0) { 2988 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2989 NULL : &timo); 2990 } 2991 2992 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2993 error = 0; 2994 else { 2995 /* 2996 * This must be timeout,interrupted by signal or 2997 * surprious wakeup, clear c_has_waiter flag when 2998 * necessary. 2999 */ 3000 umtxq_busy(&uq->uq_key); 3001 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 3002 int oldlen = uq->uq_cur_queue->length; 3003 umtxq_remove(uq); 3004 if (oldlen == 1) { 3005 umtxq_unlock(&uq->uq_key); 3006 if (suword32(&cv->c_has_waiters, 0) != 0 && 3007 error == 0) 3008 error = EFAULT; 3009 umtxq_lock(&uq->uq_key); 3010 } 3011 } 3012 umtxq_unbusy(&uq->uq_key); 3013 if (error == ERESTART) 3014 error = EINTR; 3015 } 3016 out: 3017 umtxq_unlock(&uq->uq_key); 3018 umtx_key_release(&uq->uq_key); 3019 return (error); 3020 } 3021 3022 /* 3023 * Signal a userland condition variable. 3024 */ 3025 static int 3026 do_cv_signal(struct thread *td, struct ucond *cv) 3027 { 3028 struct umtx_key key; 3029 int error, cnt, nwake; 3030 uint32_t flags; 3031 3032 error = fueword32(&cv->c_flags, &flags); 3033 if (error == -1) 3034 return (EFAULT); 3035 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3036 return (error); 3037 umtxq_lock(&key); 3038 umtxq_busy(&key); 3039 cnt = umtxq_count(&key); 3040 nwake = umtxq_signal(&key, 1); 3041 if (cnt <= nwake) { 3042 umtxq_unlock(&key); 3043 error = suword32(&cv->c_has_waiters, 0); 3044 if (error == -1) 3045 error = EFAULT; 3046 umtxq_lock(&key); 3047 } 3048 umtxq_unbusy(&key); 3049 umtxq_unlock(&key); 3050 umtx_key_release(&key); 3051 return (error); 3052 } 3053 3054 static int 3055 do_cv_broadcast(struct thread *td, struct ucond *cv) 3056 { 3057 struct umtx_key key; 3058 int error; 3059 uint32_t flags; 3060 3061 error = fueword32(&cv->c_flags, &flags); 3062 if (error == -1) 3063 return (EFAULT); 3064 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3065 return (error); 3066 3067 umtxq_lock(&key); 3068 umtxq_busy(&key); 3069 umtxq_signal(&key, INT_MAX); 3070 umtxq_unlock(&key); 3071 3072 error = suword32(&cv->c_has_waiters, 0); 3073 if (error == -1) 3074 error = EFAULT; 3075 3076 umtxq_unbusy_unlocked(&key); 3077 3078 umtx_key_release(&key); 3079 return (error); 3080 } 3081 3082 static int 3083 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3084 struct _umtx_time *timeout) 3085 { 3086 struct umtx_abs_timeout timo; 3087 struct umtx_q *uq; 3088 uint32_t flags, wrflags; 3089 int32_t state, oldstate; 3090 int32_t blocked_readers; 3091 int error, error1, rv; 3092 3093 uq = td->td_umtxq; 3094 error = fueword32(&rwlock->rw_flags, &flags); 3095 if (error == -1) 3096 return (EFAULT); 3097 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3098 if (error != 0) 3099 return (error); 3100 3101 if (timeout != NULL) 3102 umtx_abs_timeout_init2(&timo, timeout); 3103 3104 wrflags = URWLOCK_WRITE_OWNER; 3105 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3106 wrflags |= URWLOCK_WRITE_WAITERS; 3107 3108 for (;;) { 3109 rv = fueword32(&rwlock->rw_state, &state); 3110 if (rv == -1) { 3111 umtx_key_release(&uq->uq_key); 3112 return (EFAULT); 3113 } 3114 3115 /* try to lock it */ 3116 while (!(state & wrflags)) { 3117 if (__predict_false(URWLOCK_READER_COUNT(state) == 3118 URWLOCK_MAX_READERS)) { 3119 umtx_key_release(&uq->uq_key); 3120 return (EAGAIN); 3121 } 3122 rv = casueword32(&rwlock->rw_state, state, 3123 &oldstate, state + 1); 3124 if (rv == -1) { 3125 umtx_key_release(&uq->uq_key); 3126 return (EFAULT); 3127 } 3128 if (rv == 0) { 3129 MPASS(oldstate == state); 3130 umtx_key_release(&uq->uq_key); 3131 return (0); 3132 } 3133 error = thread_check_susp(td, true); 3134 if (error != 0) 3135 break; 3136 state = oldstate; 3137 } 3138 3139 if (error) 3140 break; 3141 3142 /* grab monitor lock */ 3143 umtxq_lock(&uq->uq_key); 3144 umtxq_busy(&uq->uq_key); 3145 umtxq_unlock(&uq->uq_key); 3146 3147 /* 3148 * re-read the state, in case it changed between the try-lock above 3149 * and the check below 3150 */ 3151 rv = fueword32(&rwlock->rw_state, &state); 3152 if (rv == -1) 3153 error = EFAULT; 3154 3155 /* set read contention bit */ 3156 while (error == 0 && (state & wrflags) && 3157 !(state & URWLOCK_READ_WAITERS)) { 3158 rv = casueword32(&rwlock->rw_state, state, 3159 &oldstate, state | URWLOCK_READ_WAITERS); 3160 if (rv == -1) { 3161 error = EFAULT; 3162 break; 3163 } 3164 if (rv == 0) { 3165 MPASS(oldstate == state); 3166 goto sleep; 3167 } 3168 state = oldstate; 3169 error = thread_check_susp(td, false); 3170 if (error != 0) 3171 break; 3172 } 3173 if (error != 0) { 3174 umtxq_unbusy_unlocked(&uq->uq_key); 3175 break; 3176 } 3177 3178 /* state is changed while setting flags, restart */ 3179 if (!(state & wrflags)) { 3180 umtxq_unbusy_unlocked(&uq->uq_key); 3181 error = thread_check_susp(td, true); 3182 if (error != 0) 3183 break; 3184 continue; 3185 } 3186 3187 sleep: 3188 /* 3189 * Contention bit is set, before sleeping, increase 3190 * read waiter count. 3191 */ 3192 rv = fueword32(&rwlock->rw_blocked_readers, 3193 &blocked_readers); 3194 if (rv == 0) 3195 rv = suword32(&rwlock->rw_blocked_readers, 3196 blocked_readers + 1); 3197 if (rv == -1) { 3198 umtxq_unbusy_unlocked(&uq->uq_key); 3199 error = EFAULT; 3200 break; 3201 } 3202 3203 while (state & wrflags) { 3204 umtxq_lock(&uq->uq_key); 3205 umtxq_insert(uq); 3206 umtxq_unbusy(&uq->uq_key); 3207 3208 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3209 NULL : &timo); 3210 3211 umtxq_busy(&uq->uq_key); 3212 umtxq_remove(uq); 3213 umtxq_unlock(&uq->uq_key); 3214 if (error) 3215 break; 3216 rv = fueword32(&rwlock->rw_state, &state); 3217 if (rv == -1) { 3218 error = EFAULT; 3219 break; 3220 } 3221 } 3222 3223 /* decrease read waiter count, and may clear read contention bit */ 3224 rv = fueword32(&rwlock->rw_blocked_readers, 3225 &blocked_readers); 3226 if (rv == 0) 3227 rv = suword32(&rwlock->rw_blocked_readers, 3228 blocked_readers - 1); 3229 if (rv == -1) { 3230 umtxq_unbusy_unlocked(&uq->uq_key); 3231 error = EFAULT; 3232 break; 3233 } 3234 if (blocked_readers == 1) { 3235 rv = fueword32(&rwlock->rw_state, &state); 3236 if (rv == -1) { 3237 umtxq_unbusy_unlocked(&uq->uq_key); 3238 error = EFAULT; 3239 break; 3240 } 3241 for (;;) { 3242 rv = casueword32(&rwlock->rw_state, state, 3243 &oldstate, state & ~URWLOCK_READ_WAITERS); 3244 if (rv == -1) { 3245 error = EFAULT; 3246 break; 3247 } 3248 if (rv == 0) { 3249 MPASS(oldstate == state); 3250 break; 3251 } 3252 state = oldstate; 3253 error1 = thread_check_susp(td, false); 3254 if (error1 != 0) { 3255 if (error == 0) 3256 error = error1; 3257 break; 3258 } 3259 } 3260 } 3261 3262 umtxq_unbusy_unlocked(&uq->uq_key); 3263 if (error != 0) 3264 break; 3265 } 3266 umtx_key_release(&uq->uq_key); 3267 if (error == ERESTART) 3268 error = EINTR; 3269 return (error); 3270 } 3271 3272 static int 3273 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3274 { 3275 struct umtx_abs_timeout timo; 3276 struct umtx_q *uq; 3277 uint32_t flags; 3278 int32_t state, oldstate; 3279 int32_t blocked_writers; 3280 int32_t blocked_readers; 3281 int error, error1, rv; 3282 3283 uq = td->td_umtxq; 3284 error = fueword32(&rwlock->rw_flags, &flags); 3285 if (error == -1) 3286 return (EFAULT); 3287 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3288 if (error != 0) 3289 return (error); 3290 3291 if (timeout != NULL) 3292 umtx_abs_timeout_init2(&timo, timeout); 3293 3294 blocked_readers = 0; 3295 for (;;) { 3296 rv = fueword32(&rwlock->rw_state, &state); 3297 if (rv == -1) { 3298 umtx_key_release(&uq->uq_key); 3299 return (EFAULT); 3300 } 3301 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3302 URWLOCK_READER_COUNT(state) == 0) { 3303 rv = casueword32(&rwlock->rw_state, state, 3304 &oldstate, state | URWLOCK_WRITE_OWNER); 3305 if (rv == -1) { 3306 umtx_key_release(&uq->uq_key); 3307 return (EFAULT); 3308 } 3309 if (rv == 0) { 3310 MPASS(oldstate == state); 3311 umtx_key_release(&uq->uq_key); 3312 return (0); 3313 } 3314 state = oldstate; 3315 error = thread_check_susp(td, true); 3316 if (error != 0) 3317 break; 3318 } 3319 3320 if (error) { 3321 if ((state & (URWLOCK_WRITE_OWNER | 3322 URWLOCK_WRITE_WAITERS)) == 0 && 3323 blocked_readers != 0) { 3324 umtxq_lock(&uq->uq_key); 3325 umtxq_busy(&uq->uq_key); 3326 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3327 UMTX_SHARED_QUEUE); 3328 umtxq_unbusy(&uq->uq_key); 3329 umtxq_unlock(&uq->uq_key); 3330 } 3331 3332 break; 3333 } 3334 3335 /* grab monitor lock */ 3336 umtxq_lock(&uq->uq_key); 3337 umtxq_busy(&uq->uq_key); 3338 umtxq_unlock(&uq->uq_key); 3339 3340 /* 3341 * Re-read the state, in case it changed between the 3342 * try-lock above and the check below. 3343 */ 3344 rv = fueword32(&rwlock->rw_state, &state); 3345 if (rv == -1) 3346 error = EFAULT; 3347 3348 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3349 URWLOCK_READER_COUNT(state) != 0) && 3350 (state & URWLOCK_WRITE_WAITERS) == 0) { 3351 rv = casueword32(&rwlock->rw_state, state, 3352 &oldstate, state | URWLOCK_WRITE_WAITERS); 3353 if (rv == -1) { 3354 error = EFAULT; 3355 break; 3356 } 3357 if (rv == 0) { 3358 MPASS(oldstate == state); 3359 goto sleep; 3360 } 3361 state = oldstate; 3362 error = thread_check_susp(td, false); 3363 if (error != 0) 3364 break; 3365 } 3366 if (error != 0) { 3367 umtxq_unbusy_unlocked(&uq->uq_key); 3368 break; 3369 } 3370 3371 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3372 URWLOCK_READER_COUNT(state) == 0) { 3373 umtxq_unbusy_unlocked(&uq->uq_key); 3374 error = thread_check_susp(td, false); 3375 if (error != 0) 3376 break; 3377 continue; 3378 } 3379 sleep: 3380 rv = fueword32(&rwlock->rw_blocked_writers, 3381 &blocked_writers); 3382 if (rv == 0) 3383 rv = suword32(&rwlock->rw_blocked_writers, 3384 blocked_writers + 1); 3385 if (rv == -1) { 3386 umtxq_unbusy_unlocked(&uq->uq_key); 3387 error = EFAULT; 3388 break; 3389 } 3390 3391 while ((state & URWLOCK_WRITE_OWNER) || 3392 URWLOCK_READER_COUNT(state) != 0) { 3393 umtxq_lock(&uq->uq_key); 3394 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3395 umtxq_unbusy(&uq->uq_key); 3396 3397 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3398 NULL : &timo); 3399 3400 umtxq_busy(&uq->uq_key); 3401 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3402 umtxq_unlock(&uq->uq_key); 3403 if (error) 3404 break; 3405 rv = fueword32(&rwlock->rw_state, &state); 3406 if (rv == -1) { 3407 error = EFAULT; 3408 break; 3409 } 3410 } 3411 3412 rv = fueword32(&rwlock->rw_blocked_writers, 3413 &blocked_writers); 3414 if (rv == 0) 3415 rv = suword32(&rwlock->rw_blocked_writers, 3416 blocked_writers - 1); 3417 if (rv == -1) { 3418 umtxq_unbusy_unlocked(&uq->uq_key); 3419 error = EFAULT; 3420 break; 3421 } 3422 if (blocked_writers == 1) { 3423 rv = fueword32(&rwlock->rw_state, &state); 3424 if (rv == -1) { 3425 umtxq_unbusy_unlocked(&uq->uq_key); 3426 error = EFAULT; 3427 break; 3428 } 3429 for (;;) { 3430 rv = casueword32(&rwlock->rw_state, state, 3431 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3432 if (rv == -1) { 3433 error = EFAULT; 3434 break; 3435 } 3436 if (rv == 0) { 3437 MPASS(oldstate == state); 3438 break; 3439 } 3440 state = oldstate; 3441 error1 = thread_check_susp(td, false); 3442 /* 3443 * We are leaving the URWLOCK_WRITE_WAITERS 3444 * behind, but this should not harm the 3445 * correctness. 3446 */ 3447 if (error1 != 0) { 3448 if (error == 0) 3449 error = error1; 3450 break; 3451 } 3452 } 3453 rv = fueword32(&rwlock->rw_blocked_readers, 3454 &blocked_readers); 3455 if (rv == -1) { 3456 umtxq_unbusy_unlocked(&uq->uq_key); 3457 error = EFAULT; 3458 break; 3459 } 3460 } else 3461 blocked_readers = 0; 3462 3463 umtxq_unbusy_unlocked(&uq->uq_key); 3464 } 3465 3466 umtx_key_release(&uq->uq_key); 3467 if (error == ERESTART) 3468 error = EINTR; 3469 return (error); 3470 } 3471 3472 static int 3473 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3474 { 3475 struct umtx_q *uq; 3476 uint32_t flags; 3477 int32_t state, oldstate; 3478 int error, rv, q, count; 3479 3480 uq = td->td_umtxq; 3481 error = fueword32(&rwlock->rw_flags, &flags); 3482 if (error == -1) 3483 return (EFAULT); 3484 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3485 if (error != 0) 3486 return (error); 3487 3488 error = fueword32(&rwlock->rw_state, &state); 3489 if (error == -1) { 3490 error = EFAULT; 3491 goto out; 3492 } 3493 if (state & URWLOCK_WRITE_OWNER) { 3494 for (;;) { 3495 rv = casueword32(&rwlock->rw_state, state, 3496 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3497 if (rv == -1) { 3498 error = EFAULT; 3499 goto out; 3500 } 3501 if (rv == 1) { 3502 state = oldstate; 3503 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3504 error = EPERM; 3505 goto out; 3506 } 3507 error = thread_check_susp(td, true); 3508 if (error != 0) 3509 goto out; 3510 } else 3511 break; 3512 } 3513 } else if (URWLOCK_READER_COUNT(state) != 0) { 3514 for (;;) { 3515 rv = casueword32(&rwlock->rw_state, state, 3516 &oldstate, state - 1); 3517 if (rv == -1) { 3518 error = EFAULT; 3519 goto out; 3520 } 3521 if (rv == 1) { 3522 state = oldstate; 3523 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3524 error = EPERM; 3525 goto out; 3526 } 3527 error = thread_check_susp(td, true); 3528 if (error != 0) 3529 goto out; 3530 } else 3531 break; 3532 } 3533 } else { 3534 error = EPERM; 3535 goto out; 3536 } 3537 3538 count = 0; 3539 3540 if (!(flags & URWLOCK_PREFER_READER)) { 3541 if (state & URWLOCK_WRITE_WAITERS) { 3542 count = 1; 3543 q = UMTX_EXCLUSIVE_QUEUE; 3544 } else if (state & URWLOCK_READ_WAITERS) { 3545 count = INT_MAX; 3546 q = UMTX_SHARED_QUEUE; 3547 } 3548 } else { 3549 if (state & URWLOCK_READ_WAITERS) { 3550 count = INT_MAX; 3551 q = UMTX_SHARED_QUEUE; 3552 } else if (state & URWLOCK_WRITE_WAITERS) { 3553 count = 1; 3554 q = UMTX_EXCLUSIVE_QUEUE; 3555 } 3556 } 3557 3558 if (count) { 3559 umtxq_lock(&uq->uq_key); 3560 umtxq_busy(&uq->uq_key); 3561 umtxq_signal_queue(&uq->uq_key, count, q); 3562 umtxq_unbusy(&uq->uq_key); 3563 umtxq_unlock(&uq->uq_key); 3564 } 3565 out: 3566 umtx_key_release(&uq->uq_key); 3567 return (error); 3568 } 3569 3570 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3571 static int 3572 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3573 { 3574 struct umtx_abs_timeout timo; 3575 struct umtx_q *uq; 3576 uint32_t flags, count, count1; 3577 int error, rv, rv1; 3578 3579 uq = td->td_umtxq; 3580 error = fueword32(&sem->_flags, &flags); 3581 if (error == -1) 3582 return (EFAULT); 3583 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3584 if (error != 0) 3585 return (error); 3586 3587 if (timeout != NULL) 3588 umtx_abs_timeout_init2(&timo, timeout); 3589 3590 again: 3591 umtxq_lock(&uq->uq_key); 3592 umtxq_busy(&uq->uq_key); 3593 umtxq_insert(uq); 3594 umtxq_unlock(&uq->uq_key); 3595 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3596 if (rv != -1) 3597 rv1 = fueword32(&sem->_count, &count); 3598 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3599 if (rv == 0) 3600 rv = suword32(&sem->_has_waiters, 0); 3601 umtxq_lock(&uq->uq_key); 3602 umtxq_unbusy(&uq->uq_key); 3603 umtxq_remove(uq); 3604 umtxq_unlock(&uq->uq_key); 3605 if (rv == -1 || rv1 == -1) { 3606 error = EFAULT; 3607 goto out; 3608 } 3609 if (count != 0) { 3610 error = 0; 3611 goto out; 3612 } 3613 MPASS(rv == 1 && count1 == 0); 3614 rv = thread_check_susp(td, true); 3615 if (rv == 0) 3616 goto again; 3617 error = rv; 3618 goto out; 3619 } 3620 umtxq_lock(&uq->uq_key); 3621 umtxq_unbusy(&uq->uq_key); 3622 3623 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3624 3625 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3626 error = 0; 3627 else { 3628 umtxq_remove(uq); 3629 /* A relative timeout cannot be restarted. */ 3630 if (error == ERESTART && timeout != NULL && 3631 (timeout->_flags & UMTX_ABSTIME) == 0) 3632 error = EINTR; 3633 } 3634 umtxq_unlock(&uq->uq_key); 3635 out: 3636 umtx_key_release(&uq->uq_key); 3637 return (error); 3638 } 3639 3640 /* 3641 * Signal a userland semaphore. 3642 */ 3643 static int 3644 do_sem_wake(struct thread *td, struct _usem *sem) 3645 { 3646 struct umtx_key key; 3647 int error, cnt; 3648 uint32_t flags; 3649 3650 error = fueword32(&sem->_flags, &flags); 3651 if (error == -1) 3652 return (EFAULT); 3653 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3654 return (error); 3655 umtxq_lock(&key); 3656 umtxq_busy(&key); 3657 cnt = umtxq_count(&key); 3658 if (cnt > 0) { 3659 /* 3660 * Check if count is greater than 0, this means the memory is 3661 * still being referenced by user code, so we can safely 3662 * update _has_waiters flag. 3663 */ 3664 if (cnt == 1) { 3665 umtxq_unlock(&key); 3666 error = suword32(&sem->_has_waiters, 0); 3667 umtxq_lock(&key); 3668 if (error == -1) 3669 error = EFAULT; 3670 } 3671 umtxq_signal(&key, 1); 3672 } 3673 umtxq_unbusy(&key); 3674 umtxq_unlock(&key); 3675 umtx_key_release(&key); 3676 return (error); 3677 } 3678 #endif 3679 3680 static int 3681 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3682 { 3683 struct umtx_abs_timeout timo; 3684 struct umtx_q *uq; 3685 uint32_t count, flags; 3686 int error, rv; 3687 3688 uq = td->td_umtxq; 3689 flags = fuword32(&sem->_flags); 3690 if (timeout != NULL) 3691 umtx_abs_timeout_init2(&timo, timeout); 3692 3693 again: 3694 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3695 if (error != 0) 3696 return (error); 3697 umtxq_lock(&uq->uq_key); 3698 umtxq_busy(&uq->uq_key); 3699 umtxq_insert(uq); 3700 umtxq_unlock(&uq->uq_key); 3701 rv = fueword32(&sem->_count, &count); 3702 if (rv == -1) { 3703 umtxq_lock(&uq->uq_key); 3704 umtxq_unbusy(&uq->uq_key); 3705 umtxq_remove(uq); 3706 umtxq_unlock(&uq->uq_key); 3707 umtx_key_release(&uq->uq_key); 3708 return (EFAULT); 3709 } 3710 for (;;) { 3711 if (USEM_COUNT(count) != 0) { 3712 umtxq_lock(&uq->uq_key); 3713 umtxq_unbusy(&uq->uq_key); 3714 umtxq_remove(uq); 3715 umtxq_unlock(&uq->uq_key); 3716 umtx_key_release(&uq->uq_key); 3717 return (0); 3718 } 3719 if (count == USEM_HAS_WAITERS) 3720 break; 3721 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3722 if (rv == 0) 3723 break; 3724 umtxq_lock(&uq->uq_key); 3725 umtxq_unbusy(&uq->uq_key); 3726 umtxq_remove(uq); 3727 umtxq_unlock(&uq->uq_key); 3728 umtx_key_release(&uq->uq_key); 3729 if (rv == -1) 3730 return (EFAULT); 3731 rv = thread_check_susp(td, true); 3732 if (rv != 0) 3733 return (rv); 3734 goto again; 3735 } 3736 umtxq_lock(&uq->uq_key); 3737 umtxq_unbusy(&uq->uq_key); 3738 3739 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3740 3741 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3742 error = 0; 3743 else { 3744 umtxq_remove(uq); 3745 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3746 /* A relative timeout cannot be restarted. */ 3747 if (error == ERESTART) 3748 error = EINTR; 3749 if (error == EINTR) { 3750 kern_clock_gettime(curthread, timo.clockid, 3751 &timo.cur); 3752 timespecsub(&timo.end, &timo.cur, 3753 &timeout->_timeout); 3754 } 3755 } 3756 } 3757 umtxq_unlock(&uq->uq_key); 3758 umtx_key_release(&uq->uq_key); 3759 return (error); 3760 } 3761 3762 /* 3763 * Signal a userland semaphore. 3764 */ 3765 static int 3766 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3767 { 3768 struct umtx_key key; 3769 int error, cnt, rv; 3770 uint32_t count, flags; 3771 3772 rv = fueword32(&sem->_flags, &flags); 3773 if (rv == -1) 3774 return (EFAULT); 3775 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3776 return (error); 3777 umtxq_lock(&key); 3778 umtxq_busy(&key); 3779 cnt = umtxq_count(&key); 3780 if (cnt > 0) { 3781 /* 3782 * If this was the last sleeping thread, clear the waiters 3783 * flag in _count. 3784 */ 3785 if (cnt == 1) { 3786 umtxq_unlock(&key); 3787 rv = fueword32(&sem->_count, &count); 3788 while (rv != -1 && count & USEM_HAS_WAITERS) { 3789 rv = casueword32(&sem->_count, count, &count, 3790 count & ~USEM_HAS_WAITERS); 3791 if (rv == 1) { 3792 rv = thread_check_susp(td, true); 3793 if (rv != 0) 3794 break; 3795 } 3796 } 3797 if (rv == -1) 3798 error = EFAULT; 3799 else if (rv > 0) { 3800 error = rv; 3801 } 3802 umtxq_lock(&key); 3803 } 3804 3805 umtxq_signal(&key, 1); 3806 } 3807 umtxq_unbusy(&key); 3808 umtxq_unlock(&key); 3809 umtx_key_release(&key); 3810 return (error); 3811 } 3812 3813 #ifdef COMPAT_FREEBSD10 3814 int 3815 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3816 { 3817 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3818 } 3819 3820 int 3821 freebsd10__umtx_unlock(struct thread *td, 3822 struct freebsd10__umtx_unlock_args *uap) 3823 { 3824 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3825 } 3826 #endif 3827 3828 inline int 3829 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3830 { 3831 int error; 3832 3833 error = copyin(uaddr, tsp, sizeof(*tsp)); 3834 if (error == 0) { 3835 if (!timespecvalid_interval(tsp)) 3836 error = EINVAL; 3837 } 3838 return (error); 3839 } 3840 3841 static inline int 3842 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3843 { 3844 int error; 3845 3846 if (size <= sizeof(tp->_timeout)) { 3847 tp->_clockid = CLOCK_REALTIME; 3848 tp->_flags = 0; 3849 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3850 } else 3851 error = copyin(uaddr, tp, sizeof(*tp)); 3852 if (error != 0) 3853 return (error); 3854 if (!timespecvalid_interval(&tp->_timeout)) 3855 return (EINVAL); 3856 return (0); 3857 } 3858 3859 static int 3860 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3861 struct umtx_robust_lists_params *rb) 3862 { 3863 3864 if (size > sizeof(*rb)) 3865 return (EINVAL); 3866 return (copyin(uaddr, rb, size)); 3867 } 3868 3869 static int 3870 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3871 { 3872 3873 /* 3874 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3875 * and we're only called if sz >= sizeof(timespec) as supplied in the 3876 * copyops. 3877 */ 3878 KASSERT(sz >= sizeof(*tsp), 3879 ("umtx_copyops specifies incorrect sizes")); 3880 3881 return (copyout(tsp, uaddr, sizeof(*tsp))); 3882 } 3883 3884 #ifdef COMPAT_FREEBSD10 3885 static int 3886 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3887 const struct umtx_copyops *ops) 3888 { 3889 struct timespec *ts, timeout; 3890 int error; 3891 3892 /* Allow a null timespec (wait forever). */ 3893 if (uap->uaddr2 == NULL) 3894 ts = NULL; 3895 else { 3896 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3897 if (error != 0) 3898 return (error); 3899 ts = &timeout; 3900 } 3901 #ifdef COMPAT_FREEBSD32 3902 if (ops->compat32) 3903 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3904 #endif 3905 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3906 } 3907 3908 static int 3909 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3910 const struct umtx_copyops *ops) 3911 { 3912 #ifdef COMPAT_FREEBSD32 3913 if (ops->compat32) 3914 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3915 #endif 3916 return (do_unlock_umtx(td, uap->obj, uap->val)); 3917 } 3918 #endif /* COMPAT_FREEBSD10 */ 3919 3920 #if !defined(COMPAT_FREEBSD10) 3921 static int 3922 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3923 const struct umtx_copyops *ops __unused) 3924 { 3925 return (EOPNOTSUPP); 3926 } 3927 #endif /* COMPAT_FREEBSD10 */ 3928 3929 static int 3930 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3931 const struct umtx_copyops *ops) 3932 { 3933 struct _umtx_time timeout, *tm_p; 3934 int error; 3935 3936 if (uap->uaddr2 == NULL) 3937 tm_p = NULL; 3938 else { 3939 error = ops->copyin_umtx_time( 3940 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3941 if (error != 0) 3942 return (error); 3943 tm_p = &timeout; 3944 } 3945 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3946 } 3947 3948 static int 3949 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3950 const struct umtx_copyops *ops) 3951 { 3952 struct _umtx_time timeout, *tm_p; 3953 int error; 3954 3955 if (uap->uaddr2 == NULL) 3956 tm_p = NULL; 3957 else { 3958 error = ops->copyin_umtx_time( 3959 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3960 if (error != 0) 3961 return (error); 3962 tm_p = &timeout; 3963 } 3964 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3965 } 3966 3967 static int 3968 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3969 const struct umtx_copyops *ops) 3970 { 3971 struct _umtx_time *tm_p, timeout; 3972 int error; 3973 3974 if (uap->uaddr2 == NULL) 3975 tm_p = NULL; 3976 else { 3977 error = ops->copyin_umtx_time( 3978 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3979 if (error != 0) 3980 return (error); 3981 tm_p = &timeout; 3982 } 3983 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3984 } 3985 3986 static int 3987 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3988 const struct umtx_copyops *ops __unused) 3989 { 3990 3991 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3992 } 3993 3994 #define BATCH_SIZE 128 3995 static int 3996 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3997 { 3998 char *uaddrs[BATCH_SIZE], **upp; 3999 int count, error, i, pos, tocopy; 4000 4001 upp = (char **)uap->obj; 4002 error = 0; 4003 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4004 pos += tocopy) { 4005 tocopy = MIN(count, BATCH_SIZE); 4006 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 4007 if (error != 0) 4008 break; 4009 for (i = 0; i < tocopy; ++i) { 4010 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 4011 } 4012 maybe_yield(); 4013 } 4014 return (error); 4015 } 4016 4017 static int 4018 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4019 { 4020 uint32_t uaddrs[BATCH_SIZE], *upp; 4021 int count, error, i, pos, tocopy; 4022 4023 upp = (uint32_t *)uap->obj; 4024 error = 0; 4025 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4026 pos += tocopy) { 4027 tocopy = MIN(count, BATCH_SIZE); 4028 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4029 if (error != 0) 4030 break; 4031 for (i = 0; i < tocopy; ++i) { 4032 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 4033 INT_MAX, 1); 4034 } 4035 maybe_yield(); 4036 } 4037 return (error); 4038 } 4039 4040 static int 4041 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4042 const struct umtx_copyops *ops) 4043 { 4044 4045 if (ops->compat32) 4046 return (__umtx_op_nwake_private_compat32(td, uap)); 4047 return (__umtx_op_nwake_private_native(td, uap)); 4048 } 4049 4050 static int 4051 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4052 const struct umtx_copyops *ops __unused) 4053 { 4054 4055 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4056 } 4057 4058 static int 4059 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4060 const struct umtx_copyops *ops) 4061 { 4062 struct _umtx_time *tm_p, timeout; 4063 int error; 4064 4065 /* Allow a null timespec (wait forever). */ 4066 if (uap->uaddr2 == NULL) 4067 tm_p = NULL; 4068 else { 4069 error = ops->copyin_umtx_time( 4070 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4071 if (error != 0) 4072 return (error); 4073 tm_p = &timeout; 4074 } 4075 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4076 } 4077 4078 static int 4079 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4080 const struct umtx_copyops *ops __unused) 4081 { 4082 4083 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4084 } 4085 4086 static int 4087 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4088 const struct umtx_copyops *ops) 4089 { 4090 struct _umtx_time *tm_p, timeout; 4091 int error; 4092 4093 /* Allow a null timespec (wait forever). */ 4094 if (uap->uaddr2 == NULL) 4095 tm_p = NULL; 4096 else { 4097 error = ops->copyin_umtx_time( 4098 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4099 if (error != 0) 4100 return (error); 4101 tm_p = &timeout; 4102 } 4103 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4104 } 4105 4106 static int 4107 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4108 const struct umtx_copyops *ops __unused) 4109 { 4110 4111 return (do_wake_umutex(td, uap->obj)); 4112 } 4113 4114 static int 4115 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4116 const struct umtx_copyops *ops __unused) 4117 { 4118 4119 return (do_unlock_umutex(td, uap->obj, false)); 4120 } 4121 4122 static int 4123 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4124 const struct umtx_copyops *ops __unused) 4125 { 4126 4127 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4128 } 4129 4130 static int 4131 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4132 const struct umtx_copyops *ops) 4133 { 4134 struct timespec *ts, timeout; 4135 int error; 4136 4137 /* Allow a null timespec (wait forever). */ 4138 if (uap->uaddr2 == NULL) 4139 ts = NULL; 4140 else { 4141 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4142 if (error != 0) 4143 return (error); 4144 ts = &timeout; 4145 } 4146 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4147 } 4148 4149 static int 4150 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4151 const struct umtx_copyops *ops __unused) 4152 { 4153 4154 return (do_cv_signal(td, uap->obj)); 4155 } 4156 4157 static int 4158 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4159 const struct umtx_copyops *ops __unused) 4160 { 4161 4162 return (do_cv_broadcast(td, uap->obj)); 4163 } 4164 4165 static int 4166 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4167 const struct umtx_copyops *ops) 4168 { 4169 struct _umtx_time timeout; 4170 int error; 4171 4172 /* Allow a null timespec (wait forever). */ 4173 if (uap->uaddr2 == NULL) { 4174 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4175 } else { 4176 error = ops->copyin_umtx_time(uap->uaddr2, 4177 (size_t)uap->uaddr1, &timeout); 4178 if (error != 0) 4179 return (error); 4180 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4181 } 4182 return (error); 4183 } 4184 4185 static int 4186 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4187 const struct umtx_copyops *ops) 4188 { 4189 struct _umtx_time timeout; 4190 int error; 4191 4192 /* Allow a null timespec (wait forever). */ 4193 if (uap->uaddr2 == NULL) { 4194 error = do_rw_wrlock(td, uap->obj, 0); 4195 } else { 4196 error = ops->copyin_umtx_time(uap->uaddr2, 4197 (size_t)uap->uaddr1, &timeout); 4198 if (error != 0) 4199 return (error); 4200 4201 error = do_rw_wrlock(td, uap->obj, &timeout); 4202 } 4203 return (error); 4204 } 4205 4206 static int 4207 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4208 const struct umtx_copyops *ops __unused) 4209 { 4210 4211 return (do_rw_unlock(td, uap->obj)); 4212 } 4213 4214 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4215 static int 4216 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4217 const struct umtx_copyops *ops) 4218 { 4219 struct _umtx_time *tm_p, timeout; 4220 int error; 4221 4222 /* Allow a null timespec (wait forever). */ 4223 if (uap->uaddr2 == NULL) 4224 tm_p = NULL; 4225 else { 4226 error = ops->copyin_umtx_time( 4227 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4228 if (error != 0) 4229 return (error); 4230 tm_p = &timeout; 4231 } 4232 return (do_sem_wait(td, uap->obj, tm_p)); 4233 } 4234 4235 static int 4236 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4237 const struct umtx_copyops *ops __unused) 4238 { 4239 4240 return (do_sem_wake(td, uap->obj)); 4241 } 4242 #endif 4243 4244 static int 4245 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4246 const struct umtx_copyops *ops __unused) 4247 { 4248 4249 return (do_wake2_umutex(td, uap->obj, uap->val)); 4250 } 4251 4252 static int 4253 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4254 const struct umtx_copyops *ops) 4255 { 4256 struct _umtx_time *tm_p, timeout; 4257 size_t uasize; 4258 int error; 4259 4260 /* Allow a null timespec (wait forever). */ 4261 if (uap->uaddr2 == NULL) { 4262 uasize = 0; 4263 tm_p = NULL; 4264 } else { 4265 uasize = (size_t)uap->uaddr1; 4266 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4267 if (error != 0) 4268 return (error); 4269 tm_p = &timeout; 4270 } 4271 error = do_sem2_wait(td, uap->obj, tm_p); 4272 if (error == EINTR && uap->uaddr2 != NULL && 4273 (timeout._flags & UMTX_ABSTIME) == 0 && 4274 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4275 error = ops->copyout_timeout( 4276 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4277 uasize - ops->umtx_time_sz, &timeout._timeout); 4278 if (error == 0) { 4279 error = EINTR; 4280 } 4281 } 4282 4283 return (error); 4284 } 4285 4286 static int 4287 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4288 const struct umtx_copyops *ops __unused) 4289 { 4290 4291 return (do_sem2_wake(td, uap->obj)); 4292 } 4293 4294 #define USHM_OBJ_UMTX(o) \ 4295 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4296 4297 #define USHMF_LINKED 0x0001 4298 struct umtx_shm_reg { 4299 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4300 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4301 struct umtx_key ushm_key; 4302 struct ucred *ushm_cred; 4303 struct shmfd *ushm_obj; 4304 u_int ushm_refcnt; 4305 u_int ushm_flags; 4306 }; 4307 4308 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4309 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4310 4311 static uma_zone_t umtx_shm_reg_zone; 4312 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4313 static struct mtx umtx_shm_lock; 4314 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4315 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4316 4317 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4318 4319 static void 4320 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4321 { 4322 struct umtx_shm_reg_head d; 4323 struct umtx_shm_reg *reg, *reg1; 4324 4325 TAILQ_INIT(&d); 4326 mtx_lock(&umtx_shm_lock); 4327 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4328 mtx_unlock(&umtx_shm_lock); 4329 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4330 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4331 umtx_shm_free_reg(reg); 4332 } 4333 } 4334 4335 static struct task umtx_shm_reg_delfree_task = 4336 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4337 4338 /* 4339 * Returns 0 if a SHM with the passed key is found in the registry, in which 4340 * case it is returned through 'oreg'. Otherwise, returns an error among ESRCH 4341 * (no corresponding SHM; ESRCH was chosen for compatibility, ENOENT would have 4342 * been preferable) or EOVERFLOW (there is a corresponding SHM, but reference 4343 * count would overflow, so can't return it), in which case '*oreg' is left 4344 * unchanged. 4345 */ 4346 static int 4347 umtx_shm_find_reg_locked(const struct umtx_key *key, 4348 struct umtx_shm_reg **const oreg) 4349 { 4350 struct umtx_shm_reg *reg; 4351 struct umtx_shm_reg_head *reg_head; 4352 4353 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4354 mtx_assert(&umtx_shm_lock, MA_OWNED); 4355 reg_head = &umtx_shm_registry[key->hash]; 4356 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4357 KASSERT(reg->ushm_key.shared, 4358 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4359 if (reg->ushm_key.info.shared.object == 4360 key->info.shared.object && 4361 reg->ushm_key.info.shared.offset == 4362 key->info.shared.offset) { 4363 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4364 KASSERT(reg->ushm_refcnt != 0, 4365 ("reg %p refcnt 0 onlist", reg)); 4366 KASSERT((reg->ushm_flags & USHMF_LINKED) != 0, 4367 ("reg %p not linked", reg)); 4368 /* 4369 * Don't let overflow happen, just deny a new reference 4370 * (this is additional protection against some reference 4371 * count leak, which is known not to be the case at the 4372 * time of this writing). 4373 */ 4374 if (__predict_false(reg->ushm_refcnt == UINT_MAX)) 4375 return (EOVERFLOW); 4376 reg->ushm_refcnt++; 4377 *oreg = reg; 4378 return (0); 4379 } 4380 } 4381 return (ESRCH); 4382 } 4383 4384 /* 4385 * Calls umtx_shm_find_reg_unlocked() under the 'umtx_shm_lock'. 4386 */ 4387 static int 4388 umtx_shm_find_reg(const struct umtx_key *key, struct umtx_shm_reg **const oreg) 4389 { 4390 int error; 4391 4392 mtx_lock(&umtx_shm_lock); 4393 error = umtx_shm_find_reg_locked(key, oreg); 4394 mtx_unlock(&umtx_shm_lock); 4395 return (error); 4396 } 4397 4398 static void 4399 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4400 { 4401 4402 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4403 crfree(reg->ushm_cred); 4404 shm_drop(reg->ushm_obj); 4405 uma_zfree(umtx_shm_reg_zone, reg); 4406 } 4407 4408 static bool 4409 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool linked_ref) 4410 { 4411 mtx_assert(&umtx_shm_lock, MA_OWNED); 4412 KASSERT(reg->ushm_refcnt != 0, ("ushm_reg %p refcnt 0", reg)); 4413 4414 if (linked_ref) { 4415 if ((reg->ushm_flags & USHMF_LINKED) == 0) 4416 /* 4417 * The reference tied to USHMF_LINKED has already been 4418 * released concurrently. 4419 */ 4420 return (false); 4421 4422 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], reg, 4423 ushm_reg_link); 4424 LIST_REMOVE(reg, ushm_obj_link); 4425 reg->ushm_flags &= ~USHMF_LINKED; 4426 } 4427 4428 reg->ushm_refcnt--; 4429 return (reg->ushm_refcnt == 0); 4430 } 4431 4432 static void 4433 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool linked_ref) 4434 { 4435 vm_object_t object; 4436 bool dofree; 4437 4438 if (linked_ref) { 4439 /* 4440 * Note: This may be executed multiple times on the same 4441 * shared-memory VM object in presence of concurrent callers 4442 * because 'umtx_shm_lock' is not held all along in umtx_shm() 4443 * and here. 4444 */ 4445 object = reg->ushm_obj->shm_object; 4446 VM_OBJECT_WLOCK(object); 4447 vm_object_set_flag(object, OBJ_UMTXDEAD); 4448 VM_OBJECT_WUNLOCK(object); 4449 } 4450 mtx_lock(&umtx_shm_lock); 4451 dofree = umtx_shm_unref_reg_locked(reg, linked_ref); 4452 mtx_unlock(&umtx_shm_lock); 4453 if (dofree) 4454 umtx_shm_free_reg(reg); 4455 } 4456 4457 void 4458 umtx_shm_object_init(vm_object_t object) 4459 { 4460 4461 LIST_INIT(USHM_OBJ_UMTX(object)); 4462 } 4463 4464 void 4465 umtx_shm_object_terminated(vm_object_t object) 4466 { 4467 struct umtx_shm_reg *reg, *reg1; 4468 bool dofree; 4469 4470 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4471 return; 4472 4473 dofree = false; 4474 mtx_lock(&umtx_shm_lock); 4475 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4476 if (umtx_shm_unref_reg_locked(reg, true)) { 4477 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4478 ushm_reg_link); 4479 dofree = true; 4480 } 4481 } 4482 mtx_unlock(&umtx_shm_lock); 4483 if (dofree) 4484 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4485 } 4486 4487 static int 4488 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4489 struct umtx_shm_reg **res) 4490 { 4491 struct shmfd *shm; 4492 struct umtx_shm_reg *reg, *reg1; 4493 struct ucred *cred; 4494 int error; 4495 4496 error = umtx_shm_find_reg(key, res); 4497 if (error != ESRCH) { 4498 /* 4499 * Either no error occured, and '*res' was filled, or EOVERFLOW 4500 * was returned, indicating a reference count limit, and we 4501 * won't create a duplicate registration. In both cases, we are 4502 * done. 4503 */ 4504 return (error); 4505 } 4506 /* No entry, we will create one. */ 4507 4508 cred = td->td_ucred; 4509 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4510 return (ENOMEM); 4511 shm = shm_alloc(td->td_ucred, O_RDWR, false); 4512 if (shm == NULL) { 4513 chgumtxcnt(cred->cr_ruidinfo, -1, 0); 4514 return (ENOMEM); 4515 } 4516 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4517 bcopy(key, ®->ushm_key, sizeof(*key)); 4518 reg->ushm_obj = shm; 4519 reg->ushm_cred = crhold(cred); 4520 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4521 if (error != 0) { 4522 umtx_shm_free_reg(reg); 4523 return (error); 4524 } 4525 mtx_lock(&umtx_shm_lock); 4526 /* Re-lookup as 'umtx_shm_lock' has been temporarily released. */ 4527 error = umtx_shm_find_reg_locked(key, ®1); 4528 switch (error) { 4529 case 0: 4530 mtx_unlock(&umtx_shm_lock); 4531 umtx_shm_free_reg(reg); 4532 *res = reg1; 4533 return (0); 4534 case ESRCH: 4535 break; 4536 default: 4537 mtx_unlock(&umtx_shm_lock); 4538 umtx_shm_free_reg(reg); 4539 return (error); 4540 } 4541 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4542 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4543 ushm_obj_link); 4544 reg->ushm_flags = USHMF_LINKED; 4545 /* 4546 * This is one reference for the registry and the list of shared 4547 * mutexes referenced by the VM object containing the lock pointer, and 4548 * another for the caller, which it will free after use. So, one of 4549 * these is tied to the presence of USHMF_LINKED. 4550 */ 4551 reg->ushm_refcnt = 2; 4552 mtx_unlock(&umtx_shm_lock); 4553 *res = reg; 4554 return (0); 4555 } 4556 4557 static int 4558 umtx_shm_alive(struct thread *td, void *addr) 4559 { 4560 vm_map_t map; 4561 vm_map_entry_t entry; 4562 vm_object_t object; 4563 vm_pindex_t pindex; 4564 vm_prot_t prot; 4565 int res, ret; 4566 boolean_t wired; 4567 4568 map = &td->td_proc->p_vmspace->vm_map; 4569 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4570 &object, &pindex, &prot, &wired); 4571 if (res != KERN_SUCCESS) 4572 return (EFAULT); 4573 if (object == NULL) 4574 ret = EINVAL; 4575 else 4576 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4577 vm_map_lookup_done(map, entry); 4578 return (ret); 4579 } 4580 4581 static void 4582 umtx_shm_init(void) 4583 { 4584 int i; 4585 4586 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4587 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4588 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4589 for (i = 0; i < nitems(umtx_shm_registry); i++) 4590 TAILQ_INIT(&umtx_shm_registry[i]); 4591 } 4592 4593 static int 4594 umtx_shm(struct thread *td, void *addr, u_int flags) 4595 { 4596 struct umtx_key key; 4597 struct umtx_shm_reg *reg; 4598 struct file *fp; 4599 int error, fd; 4600 4601 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4602 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4603 return (EINVAL); 4604 if ((flags & UMTX_SHM_ALIVE) != 0) 4605 return (umtx_shm_alive(td, addr)); 4606 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4607 if (error != 0) 4608 return (error); 4609 KASSERT(key.shared == 1, ("non-shared key")); 4610 error = (flags & UMTX_SHM_CREAT) != 0 ? 4611 umtx_shm_create_reg(td, &key, ®) : 4612 umtx_shm_find_reg(&key, ®); 4613 umtx_key_release(&key); 4614 if (error != 0) 4615 return (error); 4616 KASSERT(reg != NULL, ("no reg")); 4617 if ((flags & UMTX_SHM_DESTROY) != 0) { 4618 umtx_shm_unref_reg(reg, true); 4619 } else { 4620 #if 0 4621 #ifdef MAC 4622 error = mac_posixshm_check_open(td->td_ucred, 4623 reg->ushm_obj, FFLAGS(O_RDWR)); 4624 if (error == 0) 4625 #endif 4626 error = shm_access(reg->ushm_obj, td->td_ucred, 4627 FFLAGS(O_RDWR)); 4628 if (error == 0) 4629 #endif 4630 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4631 if (error == 0) { 4632 shm_hold(reg->ushm_obj); 4633 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4634 &shm_ops); 4635 td->td_retval[0] = fd; 4636 fdrop(fp, td); 4637 } 4638 } 4639 umtx_shm_unref_reg(reg, false); 4640 return (error); 4641 } 4642 4643 static int 4644 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4645 const struct umtx_copyops *ops __unused) 4646 { 4647 4648 return (umtx_shm(td, uap->uaddr1, uap->val)); 4649 } 4650 4651 static int 4652 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4653 const struct umtx_copyops *ops) 4654 { 4655 struct umtx_robust_lists_params rb; 4656 int error; 4657 4658 if (ops->compat32) { 4659 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4660 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4661 td->td_rb_inact != 0)) 4662 return (EBUSY); 4663 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4664 return (EBUSY); 4665 } 4666 4667 bzero(&rb, sizeof(rb)); 4668 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4669 if (error != 0) 4670 return (error); 4671 4672 if (ops->compat32) 4673 td->td_pflags2 |= TDP2_COMPAT32RB; 4674 4675 td->td_rb_list = rb.robust_list_offset; 4676 td->td_rbp_list = rb.robust_priv_list_offset; 4677 td->td_rb_inact = rb.robust_inact_offset; 4678 return (0); 4679 } 4680 4681 static int 4682 __umtx_op_get_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4683 const struct umtx_copyops *ops) 4684 { 4685 long val; 4686 int error, val1; 4687 4688 val = sbttons(td->td_proc->p_umtx_min_timeout); 4689 if (ops->compat32) { 4690 val1 = (int)val; 4691 error = copyout(&val1, uap->uaddr1, sizeof(val1)); 4692 } else { 4693 error = copyout(&val, uap->uaddr1, sizeof(val)); 4694 } 4695 return (error); 4696 } 4697 4698 static int 4699 __umtx_op_set_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4700 const struct umtx_copyops *ops) 4701 { 4702 if (uap->val < 0) 4703 return (EINVAL); 4704 td->td_proc->p_umtx_min_timeout = nstosbt(uap->val); 4705 return (0); 4706 } 4707 4708 #if defined(__i386__) || defined(__amd64__) 4709 /* 4710 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4711 * 32-bit time_t there. Other architectures just need the i386 definitions 4712 * along with their standard compat32. 4713 */ 4714 struct timespecx32 { 4715 int64_t tv_sec; 4716 int32_t tv_nsec; 4717 }; 4718 4719 struct umtx_timex32 { 4720 struct timespecx32 _timeout; 4721 uint32_t _flags; 4722 uint32_t _clockid; 4723 }; 4724 4725 #ifndef __i386__ 4726 #define timespeci386 timespec32 4727 #define umtx_timei386 umtx_time32 4728 #endif 4729 #else /* !__i386__ && !__amd64__ */ 4730 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4731 struct timespeci386 { 4732 int32_t tv_sec; 4733 int32_t tv_nsec; 4734 }; 4735 4736 struct umtx_timei386 { 4737 struct timespeci386 _timeout; 4738 uint32_t _flags; 4739 uint32_t _clockid; 4740 }; 4741 4742 #if defined(__LP64__) 4743 #define timespecx32 timespec32 4744 #define umtx_timex32 umtx_time32 4745 #endif 4746 #endif 4747 4748 static int 4749 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4750 struct umtx_robust_lists_params *rbp) 4751 { 4752 struct umtx_robust_lists_params_compat32 rb32; 4753 int error; 4754 4755 if (size > sizeof(rb32)) 4756 return (EINVAL); 4757 bzero(&rb32, sizeof(rb32)); 4758 error = copyin(uaddr, &rb32, size); 4759 if (error != 0) 4760 return (error); 4761 CP(rb32, *rbp, robust_list_offset); 4762 CP(rb32, *rbp, robust_priv_list_offset); 4763 CP(rb32, *rbp, robust_inact_offset); 4764 return (0); 4765 } 4766 4767 #ifndef __i386__ 4768 static inline int 4769 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4770 { 4771 struct timespeci386 ts32; 4772 int error; 4773 4774 error = copyin(uaddr, &ts32, sizeof(ts32)); 4775 if (error == 0) { 4776 if (!timespecvalid_interval(&ts32)) 4777 error = EINVAL; 4778 else { 4779 CP(ts32, *tsp, tv_sec); 4780 CP(ts32, *tsp, tv_nsec); 4781 } 4782 } 4783 return (error); 4784 } 4785 4786 static inline int 4787 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4788 { 4789 struct umtx_timei386 t32; 4790 int error; 4791 4792 t32._clockid = CLOCK_REALTIME; 4793 t32._flags = 0; 4794 if (size <= sizeof(t32._timeout)) 4795 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4796 else 4797 error = copyin(uaddr, &t32, sizeof(t32)); 4798 if (error != 0) 4799 return (error); 4800 if (!timespecvalid_interval(&t32._timeout)) 4801 return (EINVAL); 4802 TS_CP(t32, *tp, _timeout); 4803 CP(t32, *tp, _flags); 4804 CP(t32, *tp, _clockid); 4805 return (0); 4806 } 4807 4808 static int 4809 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4810 { 4811 struct timespeci386 remain32 = { 4812 .tv_sec = tsp->tv_sec, 4813 .tv_nsec = tsp->tv_nsec, 4814 }; 4815 4816 /* 4817 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4818 * and we're only called if sz >= sizeof(timespec) as supplied in the 4819 * copyops. 4820 */ 4821 KASSERT(sz >= sizeof(remain32), 4822 ("umtx_copyops specifies incorrect sizes")); 4823 4824 return (copyout(&remain32, uaddr, sizeof(remain32))); 4825 } 4826 #endif /* !__i386__ */ 4827 4828 #if defined(__i386__) || defined(__LP64__) 4829 static inline int 4830 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4831 { 4832 struct timespecx32 ts32; 4833 int error; 4834 4835 error = copyin(uaddr, &ts32, sizeof(ts32)); 4836 if (error == 0) { 4837 if (!timespecvalid_interval(&ts32)) 4838 error = EINVAL; 4839 else { 4840 CP(ts32, *tsp, tv_sec); 4841 CP(ts32, *tsp, tv_nsec); 4842 } 4843 } 4844 return (error); 4845 } 4846 4847 static inline int 4848 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4849 { 4850 struct umtx_timex32 t32; 4851 int error; 4852 4853 t32._clockid = CLOCK_REALTIME; 4854 t32._flags = 0; 4855 if (size <= sizeof(t32._timeout)) 4856 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4857 else 4858 error = copyin(uaddr, &t32, sizeof(t32)); 4859 if (error != 0) 4860 return (error); 4861 if (!timespecvalid_interval(&t32._timeout)) 4862 return (EINVAL); 4863 TS_CP(t32, *tp, _timeout); 4864 CP(t32, *tp, _flags); 4865 CP(t32, *tp, _clockid); 4866 return (0); 4867 } 4868 4869 static int 4870 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4871 { 4872 struct timespecx32 remain32 = { 4873 .tv_sec = tsp->tv_sec, 4874 .tv_nsec = tsp->tv_nsec, 4875 }; 4876 4877 /* 4878 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4879 * and we're only called if sz >= sizeof(timespec) as supplied in the 4880 * copyops. 4881 */ 4882 KASSERT(sz >= sizeof(remain32), 4883 ("umtx_copyops specifies incorrect sizes")); 4884 4885 return (copyout(&remain32, uaddr, sizeof(remain32))); 4886 } 4887 #endif /* __i386__ || __LP64__ */ 4888 4889 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4890 const struct umtx_copyops *umtx_ops); 4891 4892 static const _umtx_op_func op_table[] = { 4893 #ifdef COMPAT_FREEBSD10 4894 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4895 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4896 #else 4897 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4898 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4899 #endif 4900 [UMTX_OP_WAIT] = __umtx_op_wait, 4901 [UMTX_OP_WAKE] = __umtx_op_wake, 4902 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4903 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4904 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4905 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4906 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4907 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4908 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4909 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4910 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4911 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4912 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4913 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4914 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4915 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4916 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4917 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4918 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4919 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4920 #else 4921 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4922 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4923 #endif 4924 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4925 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4926 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4927 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4928 [UMTX_OP_SHM] = __umtx_op_shm, 4929 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4930 [UMTX_OP_GET_MIN_TIMEOUT] = __umtx_op_get_min_timeout, 4931 [UMTX_OP_SET_MIN_TIMEOUT] = __umtx_op_set_min_timeout, 4932 }; 4933 4934 static const struct umtx_copyops umtx_native_ops = { 4935 .copyin_timeout = umtx_copyin_timeout, 4936 .copyin_umtx_time = umtx_copyin_umtx_time, 4937 .copyin_robust_lists = umtx_copyin_robust_lists, 4938 .copyout_timeout = umtx_copyout_timeout, 4939 .timespec_sz = sizeof(struct timespec), 4940 .umtx_time_sz = sizeof(struct _umtx_time), 4941 }; 4942 4943 #ifndef __i386__ 4944 static const struct umtx_copyops umtx_native_opsi386 = { 4945 .copyin_timeout = umtx_copyin_timeouti386, 4946 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4947 .copyin_robust_lists = umtx_copyin_robust_lists32, 4948 .copyout_timeout = umtx_copyout_timeouti386, 4949 .timespec_sz = sizeof(struct timespeci386), 4950 .umtx_time_sz = sizeof(struct umtx_timei386), 4951 .compat32 = true, 4952 }; 4953 #endif 4954 4955 #if defined(__i386__) || defined(__LP64__) 4956 /* i386 can emulate other 32-bit archs, too! */ 4957 static const struct umtx_copyops umtx_native_opsx32 = { 4958 .copyin_timeout = umtx_copyin_timeoutx32, 4959 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4960 .copyin_robust_lists = umtx_copyin_robust_lists32, 4961 .copyout_timeout = umtx_copyout_timeoutx32, 4962 .timespec_sz = sizeof(struct timespecx32), 4963 .umtx_time_sz = sizeof(struct umtx_timex32), 4964 .compat32 = true, 4965 }; 4966 4967 #ifdef COMPAT_FREEBSD32 4968 #ifdef __amd64__ 4969 #define umtx_native_ops32 umtx_native_opsi386 4970 #else 4971 #define umtx_native_ops32 umtx_native_opsx32 4972 #endif 4973 #endif /* COMPAT_FREEBSD32 */ 4974 #endif /* __i386__ || __LP64__ */ 4975 4976 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4977 4978 static int 4979 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4980 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4981 { 4982 struct _umtx_op_args uap = { 4983 .obj = obj, 4984 .op = op & ~UMTX_OP__FLAGS, 4985 .val = val, 4986 .uaddr1 = uaddr1, 4987 .uaddr2 = uaddr2 4988 }; 4989 4990 if ((uap.op >= nitems(op_table))) 4991 return (EINVAL); 4992 return ((*op_table[uap.op])(td, &uap, ops)); 4993 } 4994 4995 int 4996 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4997 { 4998 static const struct umtx_copyops *umtx_ops; 4999 5000 umtx_ops = &umtx_native_ops; 5001 #ifdef __LP64__ 5002 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 5003 if ((uap->op & UMTX_OP__I386) != 0) 5004 umtx_ops = &umtx_native_opsi386; 5005 else 5006 umtx_ops = &umtx_native_opsx32; 5007 } 5008 #elif !defined(__i386__) 5009 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 5010 if ((uap->op & UMTX_OP__I386) != 0) 5011 umtx_ops = &umtx_native_opsi386; 5012 #else 5013 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 5014 if ((uap->op & UMTX_OP__32BIT) != 0) 5015 umtx_ops = &umtx_native_opsx32; 5016 #endif 5017 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5018 uap->uaddr2, umtx_ops)); 5019 } 5020 5021 #ifdef COMPAT_FREEBSD32 5022 #ifdef COMPAT_FREEBSD10 5023 int 5024 freebsd10_freebsd32__umtx_lock(struct thread *td, 5025 struct freebsd10_freebsd32__umtx_lock_args *uap) 5026 { 5027 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 5028 } 5029 5030 int 5031 freebsd10_freebsd32__umtx_unlock(struct thread *td, 5032 struct freebsd10_freebsd32__umtx_unlock_args *uap) 5033 { 5034 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 5035 } 5036 #endif /* COMPAT_FREEBSD10 */ 5037 5038 int 5039 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 5040 { 5041 5042 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5043 uap->uaddr2, &umtx_native_ops32)); 5044 } 5045 #endif /* COMPAT_FREEBSD32 */ 5046 5047 void 5048 umtx_thread_init(struct thread *td) 5049 { 5050 5051 td->td_umtxq = umtxq_alloc(); 5052 td->td_umtxq->uq_thread = td; 5053 } 5054 5055 void 5056 umtx_thread_fini(struct thread *td) 5057 { 5058 5059 umtxq_free(td->td_umtxq); 5060 } 5061 5062 /* 5063 * It will be called when new thread is created, e.g fork(). 5064 */ 5065 void 5066 umtx_thread_alloc(struct thread *td) 5067 { 5068 struct umtx_q *uq; 5069 5070 uq = td->td_umtxq; 5071 uq->uq_inherited_pri = PRI_MAX; 5072 5073 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 5074 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 5075 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 5076 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 5077 } 5078 5079 /* 5080 * exec() hook. 5081 * 5082 * Clear robust lists for all process' threads, not delaying the 5083 * cleanup to thread exit, since the relevant address space is 5084 * destroyed right now. 5085 */ 5086 void 5087 umtx_exec(struct proc *p) 5088 { 5089 struct thread *td; 5090 5091 KASSERT(p == curproc, ("need curproc")); 5092 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 5093 (p->p_flag & P_STOPPED_SINGLE) != 0, 5094 ("curproc must be single-threaded")); 5095 /* 5096 * There is no need to lock the list as only this thread can be 5097 * running. 5098 */ 5099 FOREACH_THREAD_IN_PROC(p, td) { 5100 KASSERT(td == curthread || 5101 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 5102 ("running thread %p %p", p, td)); 5103 umtx_thread_cleanup(td); 5104 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 5105 } 5106 5107 p->p_umtx_min_timeout = 0; 5108 } 5109 5110 /* 5111 * thread exit hook. 5112 */ 5113 void 5114 umtx_thread_exit(struct thread *td) 5115 { 5116 5117 umtx_thread_cleanup(td); 5118 } 5119 5120 static int 5121 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5122 { 5123 u_long res1; 5124 uint32_t res32; 5125 int error; 5126 5127 if (compat32) { 5128 error = fueword32((void *)ptr, &res32); 5129 if (error == 0) 5130 res1 = res32; 5131 } else { 5132 error = fueword((void *)ptr, &res1); 5133 } 5134 if (error == 0) 5135 *res = res1; 5136 else 5137 error = EFAULT; 5138 return (error); 5139 } 5140 5141 static void 5142 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5143 bool compat32) 5144 { 5145 struct umutex32 m32; 5146 5147 if (compat32) { 5148 memcpy(&m32, m, sizeof(m32)); 5149 *rb_list = m32.m_rb_lnk; 5150 } else { 5151 *rb_list = m->m_rb_lnk; 5152 } 5153 } 5154 5155 static int 5156 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5157 bool compat32) 5158 { 5159 struct umutex m; 5160 int error; 5161 5162 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5163 error = copyin((void *)rbp, &m, sizeof(m)); 5164 if (error != 0) 5165 return (error); 5166 if (rb_list != NULL) 5167 umtx_read_rb_list(td, &m, rb_list, compat32); 5168 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5169 return (EINVAL); 5170 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5171 /* inact is cleared after unlock, allow the inconsistency */ 5172 return (inact ? 0 : EINVAL); 5173 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5174 } 5175 5176 static void 5177 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5178 const char *name, bool compat32) 5179 { 5180 int error, i; 5181 uintptr_t rbp; 5182 bool inact; 5183 5184 if (rb_list == 0) 5185 return; 5186 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5187 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5188 if (rbp == *rb_inact) { 5189 inact = true; 5190 *rb_inact = 0; 5191 } else 5192 inact = false; 5193 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5194 } 5195 if (i == umtx_max_rb && umtx_verbose_rb) { 5196 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5197 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5198 } 5199 if (error != 0 && umtx_verbose_rb) { 5200 uprintf("comm %s pid %d: handling %srb error %d\n", 5201 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5202 } 5203 } 5204 5205 /* 5206 * Clean up umtx data. 5207 */ 5208 static void 5209 umtx_thread_cleanup(struct thread *td) 5210 { 5211 struct umtx_q *uq; 5212 struct umtx_pi *pi; 5213 uintptr_t rb_inact; 5214 bool compat32; 5215 5216 /* 5217 * Disown pi mutexes. 5218 */ 5219 uq = td->td_umtxq; 5220 if (uq != NULL) { 5221 if (uq->uq_inherited_pri != PRI_MAX || 5222 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5223 mtx_lock(&umtx_lock); 5224 uq->uq_inherited_pri = PRI_MAX; 5225 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5226 pi->pi_owner = NULL; 5227 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5228 } 5229 mtx_unlock(&umtx_lock); 5230 } 5231 sched_lend_user_prio_cond(td, PRI_MAX); 5232 } 5233 5234 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5235 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5236 5237 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5238 return; 5239 5240 /* 5241 * Handle terminated robust mutexes. Must be done after 5242 * robust pi disown, otherwise unlock could see unowned 5243 * entries. 5244 */ 5245 rb_inact = td->td_rb_inact; 5246 if (rb_inact != 0) 5247 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5248 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5249 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5250 if (rb_inact != 0) 5251 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5252 } 5253