1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 #include "opt_umtx_profiling.h" 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/limits.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mman.h> 46 #include <sys/mutex.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/rwlock.h> 52 #include <sys/sbuf.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sysproto.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/taskqueue.h> 60 #include <sys/time.h> 61 #include <sys/eventhandler.h> 62 #include <sys/umtx.h> 63 #include <sys/umtxvar.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/uma.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 74 #include <machine/atomic.h> 75 #include <machine/cpu.h> 76 77 #include <compat/freebsd32/freebsd32.h> 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 91 #ifdef INVARIANTS 92 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 93 struct umtxq_chain *uc; \ 94 \ 95 uc = umtxq_getchain(key); \ 96 mtx_assert(&uc->uc_lock, MA_OWNED); \ 97 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 98 } while (0) 99 #else 100 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 101 #endif 102 103 /* 104 * Don't propagate time-sharing priority, there is a security reason, 105 * a user can simply introduce PI-mutex, let thread A lock the mutex, 106 * and let another thread B block on the mutex, because B is 107 * sleeping, its priority will be boosted, this causes A's priority to 108 * be boosted via priority propagating too and will never be lowered even 109 * if it is using 100%CPU, this is unfair to other processes. 110 */ 111 112 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 113 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 114 PRI_MAX_TIMESHARE : (td)->td_user_pri) 115 116 #define GOLDEN_RATIO_PRIME 2654404609U 117 #ifndef UMTX_CHAINS 118 #define UMTX_CHAINS 512 119 #endif 120 #define UMTX_SHIFTS (__WORD_BIT - 9) 121 122 #define GET_SHARE(flags) \ 123 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 124 125 #define BUSY_SPINS 200 126 127 struct umtx_copyops { 128 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 129 int (*copyin_umtx_time)(const void *uaddr, size_t size, 130 struct _umtx_time *tp); 131 int (*copyin_robust_lists)(const void *uaddr, size_t size, 132 struct umtx_robust_lists_params *rbp); 133 int (*copyout_timeout)(void *uaddr, size_t size, 134 struct timespec *tsp); 135 const size_t timespec_sz; 136 const size_t umtx_time_sz; 137 const bool compat32; 138 }; 139 140 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 141 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 142 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 143 144 int umtx_shm_vnobj_persistent = 0; 145 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 146 &umtx_shm_vnobj_persistent, 0, 147 "False forces destruction of umtx attached to file, on last close"); 148 static int umtx_max_rb = 1000; 149 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 150 &umtx_max_rb, 0, 151 "Maximum number of robust mutexes allowed for each thread"); 152 153 static uma_zone_t umtx_pi_zone; 154 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 155 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 156 static int umtx_pi_allocated; 157 158 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 159 "umtx debug"); 160 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 161 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 162 static int umtx_verbose_rb = 1; 163 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 164 &umtx_verbose_rb, 0, 165 ""); 166 167 #ifdef UMTX_PROFILING 168 static long max_length; 169 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 170 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 171 "umtx chain stats"); 172 #endif 173 174 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 175 const struct _umtx_time *umtxtime); 176 177 static void umtx_shm_init(void); 178 static void umtxq_sysinit(void *); 179 static void umtxq_hash(struct umtx_key *key); 180 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 181 bool rb); 182 static void umtx_thread_cleanup(struct thread *td); 183 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 184 185 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 186 187 static struct mtx umtx_lock; 188 189 #ifdef UMTX_PROFILING 190 static void 191 umtx_init_profiling(void) 192 { 193 struct sysctl_oid *chain_oid; 194 char chain_name[10]; 195 int i; 196 197 for (i = 0; i < UMTX_CHAINS; ++i) { 198 snprintf(chain_name, sizeof(chain_name), "%d", i); 199 chain_oid = SYSCTL_ADD_NODE(NULL, 200 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 201 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 202 "umtx hash stats"); 203 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 204 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 206 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 207 } 208 } 209 210 static int 211 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 212 { 213 char buf[512]; 214 struct sbuf sb; 215 struct umtxq_chain *uc; 216 u_int fract, i, j, tot, whole; 217 u_int sf0, sf1, sf2, sf3, sf4; 218 u_int si0, si1, si2, si3, si4; 219 u_int sw0, sw1, sw2, sw3, sw4; 220 221 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 222 for (i = 0; i < 2; i++) { 223 tot = 0; 224 for (j = 0; j < UMTX_CHAINS; ++j) { 225 uc = &umtxq_chains[i][j]; 226 mtx_lock(&uc->uc_lock); 227 tot += uc->max_length; 228 mtx_unlock(&uc->uc_lock); 229 } 230 if (tot == 0) 231 sbuf_printf(&sb, "%u) Empty ", i); 232 else { 233 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 234 si0 = si1 = si2 = si3 = si4 = 0; 235 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 236 for (j = 0; j < UMTX_CHAINS; j++) { 237 uc = &umtxq_chains[i][j]; 238 mtx_lock(&uc->uc_lock); 239 whole = uc->max_length * 100; 240 mtx_unlock(&uc->uc_lock); 241 fract = (whole % tot) * 100; 242 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 243 sf0 = fract; 244 si0 = j; 245 sw0 = whole; 246 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 247 sf1)) { 248 sf1 = fract; 249 si1 = j; 250 sw1 = whole; 251 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 252 sf2)) { 253 sf2 = fract; 254 si2 = j; 255 sw2 = whole; 256 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 257 sf3)) { 258 sf3 = fract; 259 si3 = j; 260 sw3 = whole; 261 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 262 sf4)) { 263 sf4 = fract; 264 si4 = j; 265 sw4 = whole; 266 } 267 } 268 sbuf_printf(&sb, "queue %u:\n", i); 269 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 270 sf0 / tot, si0); 271 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 272 sf1 / tot, si1); 273 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 274 sf2 / tot, si2); 275 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 276 sf3 / tot, si3); 277 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 278 sf4 / tot, si4); 279 } 280 } 281 sbuf_trim(&sb); 282 sbuf_finish(&sb); 283 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 284 sbuf_delete(&sb); 285 return (0); 286 } 287 288 static int 289 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 290 { 291 struct umtxq_chain *uc; 292 u_int i, j; 293 int clear, error; 294 295 clear = 0; 296 error = sysctl_handle_int(oidp, &clear, 0, req); 297 if (error != 0 || req->newptr == NULL) 298 return (error); 299 300 if (clear != 0) { 301 for (i = 0; i < 2; ++i) { 302 for (j = 0; j < UMTX_CHAINS; ++j) { 303 uc = &umtxq_chains[i][j]; 304 mtx_lock(&uc->uc_lock); 305 uc->length = 0; 306 uc->max_length = 0; 307 mtx_unlock(&uc->uc_lock); 308 } 309 } 310 } 311 return (0); 312 } 313 314 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 315 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 316 sysctl_debug_umtx_chains_clear, "I", 317 "Clear umtx chains statistics"); 318 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 319 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 320 sysctl_debug_umtx_chains_peaks, "A", 321 "Highest peaks in chains max length"); 322 #endif 323 324 static void 325 umtxq_sysinit(void *arg __unused) 326 { 327 int i, j; 328 329 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 330 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 331 for (i = 0; i < 2; ++i) { 332 for (j = 0; j < UMTX_CHAINS; ++j) { 333 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 334 MTX_DEF | MTX_DUPOK); 335 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 336 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 337 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 338 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 339 umtxq_chains[i][j].uc_busy = 0; 340 umtxq_chains[i][j].uc_waiters = 0; 341 #ifdef UMTX_PROFILING 342 umtxq_chains[i][j].length = 0; 343 umtxq_chains[i][j].max_length = 0; 344 #endif 345 } 346 } 347 #ifdef UMTX_PROFILING 348 umtx_init_profiling(); 349 #endif 350 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 351 umtx_shm_init(); 352 } 353 354 struct umtx_q * 355 umtxq_alloc(void) 356 { 357 struct umtx_q *uq; 358 359 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 360 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 361 M_WAITOK | M_ZERO); 362 TAILQ_INIT(&uq->uq_spare_queue->head); 363 TAILQ_INIT(&uq->uq_pi_contested); 364 uq->uq_inherited_pri = PRI_MAX; 365 return (uq); 366 } 367 368 void 369 umtxq_free(struct umtx_q *uq) 370 { 371 372 MPASS(uq->uq_spare_queue != NULL); 373 free(uq->uq_spare_queue, M_UMTX); 374 free(uq, M_UMTX); 375 } 376 377 static inline void 378 umtxq_hash(struct umtx_key *key) 379 { 380 unsigned n; 381 382 n = (uintptr_t)key->info.both.a + key->info.both.b; 383 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 384 } 385 386 struct umtxq_chain * 387 umtxq_getchain(struct umtx_key *key) 388 { 389 390 if (key->type <= TYPE_SEM) 391 return (&umtxq_chains[1][key->hash]); 392 return (&umtxq_chains[0][key->hash]); 393 } 394 395 /* 396 * Set chain to busy state when following operation 397 * may be blocked (kernel mutex can not be used). 398 */ 399 void 400 umtxq_busy(struct umtx_key *key) 401 { 402 struct umtxq_chain *uc; 403 404 uc = umtxq_getchain(key); 405 mtx_assert(&uc->uc_lock, MA_OWNED); 406 if (uc->uc_busy) { 407 #ifdef SMP 408 if (smp_cpus > 1) { 409 int count = BUSY_SPINS; 410 if (count > 0) { 411 umtxq_unlock(key); 412 while (uc->uc_busy && --count > 0) 413 cpu_spinwait(); 414 umtxq_lock(key); 415 } 416 } 417 #endif 418 while (uc->uc_busy) { 419 uc->uc_waiters++; 420 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 421 uc->uc_waiters--; 422 } 423 } 424 uc->uc_busy = 1; 425 } 426 427 /* 428 * Unbusy a chain. 429 */ 430 void 431 umtxq_unbusy(struct umtx_key *key) 432 { 433 struct umtxq_chain *uc; 434 435 uc = umtxq_getchain(key); 436 mtx_assert(&uc->uc_lock, MA_OWNED); 437 KASSERT(uc->uc_busy != 0, ("not busy")); 438 uc->uc_busy = 0; 439 if (uc->uc_waiters) 440 wakeup_one(uc); 441 } 442 443 void 444 umtxq_unbusy_unlocked(struct umtx_key *key) 445 { 446 447 umtxq_lock(key); 448 umtxq_unbusy(key); 449 umtxq_unlock(key); 450 } 451 452 static struct umtxq_queue * 453 umtxq_queue_lookup(struct umtx_key *key, int q) 454 { 455 struct umtxq_queue *uh; 456 struct umtxq_chain *uc; 457 458 uc = umtxq_getchain(key); 459 UMTXQ_LOCKED_ASSERT(uc); 460 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 461 if (umtx_key_match(&uh->key, key)) 462 return (uh); 463 } 464 465 return (NULL); 466 } 467 468 void 469 umtxq_insert_queue(struct umtx_q *uq, int q) 470 { 471 struct umtxq_queue *uh; 472 struct umtxq_chain *uc; 473 474 uc = umtxq_getchain(&uq->uq_key); 475 UMTXQ_LOCKED_ASSERT(uc); 476 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 477 uh = umtxq_queue_lookup(&uq->uq_key, q); 478 if (uh != NULL) { 479 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 480 } else { 481 uh = uq->uq_spare_queue; 482 uh->key = uq->uq_key; 483 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 484 #ifdef UMTX_PROFILING 485 uc->length++; 486 if (uc->length > uc->max_length) { 487 uc->max_length = uc->length; 488 if (uc->max_length > max_length) 489 max_length = uc->max_length; 490 } 491 #endif 492 } 493 uq->uq_spare_queue = NULL; 494 495 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 496 uh->length++; 497 uq->uq_flags |= UQF_UMTXQ; 498 uq->uq_cur_queue = uh; 499 return; 500 } 501 502 void 503 umtxq_remove_queue(struct umtx_q *uq, int q) 504 { 505 struct umtxq_chain *uc; 506 struct umtxq_queue *uh; 507 508 uc = umtxq_getchain(&uq->uq_key); 509 UMTXQ_LOCKED_ASSERT(uc); 510 if (uq->uq_flags & UQF_UMTXQ) { 511 uh = uq->uq_cur_queue; 512 TAILQ_REMOVE(&uh->head, uq, uq_link); 513 uh->length--; 514 uq->uq_flags &= ~UQF_UMTXQ; 515 if (TAILQ_EMPTY(&uh->head)) { 516 KASSERT(uh->length == 0, 517 ("inconsistent umtxq_queue length")); 518 #ifdef UMTX_PROFILING 519 uc->length--; 520 #endif 521 LIST_REMOVE(uh, link); 522 } else { 523 uh = LIST_FIRST(&uc->uc_spare_queue); 524 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 525 LIST_REMOVE(uh, link); 526 } 527 uq->uq_spare_queue = uh; 528 uq->uq_cur_queue = NULL; 529 } 530 } 531 532 /* 533 * Check if there are multiple waiters 534 */ 535 int 536 umtxq_count(struct umtx_key *key) 537 { 538 struct umtxq_queue *uh; 539 540 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 541 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 542 if (uh != NULL) 543 return (uh->length); 544 return (0); 545 } 546 547 /* 548 * Check if there are multiple PI waiters and returns first 549 * waiter. 550 */ 551 static int 552 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 553 { 554 struct umtxq_queue *uh; 555 556 *first = NULL; 557 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 558 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 559 if (uh != NULL) { 560 *first = TAILQ_FIRST(&uh->head); 561 return (uh->length); 562 } 563 return (0); 564 } 565 566 /* 567 * Wake up threads waiting on an userland object by a bit mask. 568 */ 569 int 570 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 571 { 572 struct umtxq_queue *uh; 573 struct umtx_q *uq, *uq_temp; 574 int ret; 575 576 ret = 0; 577 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 578 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 579 if (uh == NULL) 580 return (0); 581 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 582 if ((uq->uq_bitset & bitset) == 0) 583 continue; 584 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 585 wakeup_one(uq); 586 if (++ret >= n_wake) 587 break; 588 } 589 return (ret); 590 } 591 592 /* 593 * Wake up threads waiting on an userland object. 594 */ 595 596 static int 597 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 598 { 599 struct umtxq_queue *uh; 600 struct umtx_q *uq; 601 int ret; 602 603 ret = 0; 604 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 605 uh = umtxq_queue_lookup(key, q); 606 if (uh != NULL) { 607 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 608 umtxq_remove_queue(uq, q); 609 wakeup(uq); 610 if (++ret >= n_wake) 611 return (ret); 612 } 613 } 614 return (ret); 615 } 616 617 /* 618 * Wake up specified thread. 619 */ 620 static inline void 621 umtxq_signal_thread(struct umtx_q *uq) 622 { 623 624 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 625 umtxq_remove(uq); 626 wakeup(uq); 627 } 628 629 /* 630 * Wake up a maximum of n_wake threads that are waiting on an userland 631 * object identified by key. The remaining threads are removed from queue 632 * identified by key and added to the queue identified by key2 (requeued). 633 * The n_requeue specifies an upper limit on the number of threads that 634 * are requeued to the second queue. 635 */ 636 int 637 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 638 int n_requeue) 639 { 640 struct umtxq_queue *uh; 641 struct umtx_q *uq, *uq_temp; 642 int ret; 643 644 ret = 0; 645 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 646 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 647 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 648 if (uh == NULL) 649 return (0); 650 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 651 if (++ret <= n_wake) { 652 umtxq_remove(uq); 653 wakeup_one(uq); 654 } else { 655 umtxq_remove(uq); 656 uq->uq_key = *key2; 657 umtxq_insert(uq); 658 if (ret - n_wake == n_requeue) 659 break; 660 } 661 } 662 return (ret); 663 } 664 665 static inline int 666 tstohz(const struct timespec *tsp) 667 { 668 struct timeval tv; 669 670 TIMESPEC_TO_TIMEVAL(&tv, tsp); 671 return tvtohz(&tv); 672 } 673 674 void 675 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 676 int absolute, const struct timespec *timeout) 677 { 678 679 timo->clockid = clockid; 680 if (!absolute) { 681 timo->is_abs_real = false; 682 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 683 timespecadd(&timo->cur, timeout, &timo->end); 684 } else { 685 timo->end = *timeout; 686 timo->is_abs_real = clockid == CLOCK_REALTIME || 687 clockid == CLOCK_REALTIME_FAST || 688 clockid == CLOCK_REALTIME_PRECISE || 689 clockid == CLOCK_SECOND; 690 } 691 } 692 693 static void 694 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 695 const struct _umtx_time *umtxtime) 696 { 697 698 umtx_abs_timeout_init(timo, umtxtime->_clockid, 699 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 700 } 701 702 static void 703 umtx_abs_timeout_enforce_min(sbintime_t *sbt) 704 { 705 sbintime_t when, mint; 706 707 mint = curproc->p_umtx_min_timeout; 708 if (__predict_false(mint != 0)) { 709 when = sbinuptime() + mint; 710 if (*sbt < when) 711 *sbt = when; 712 } 713 } 714 715 static int 716 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 717 int *flags) 718 { 719 struct bintime bt, bbt; 720 struct timespec tts; 721 sbintime_t rem; 722 723 switch (timo->clockid) { 724 725 /* Clocks that can be converted into absolute time. */ 726 case CLOCK_REALTIME: 727 case CLOCK_REALTIME_PRECISE: 728 case CLOCK_REALTIME_FAST: 729 case CLOCK_MONOTONIC: 730 case CLOCK_MONOTONIC_PRECISE: 731 case CLOCK_MONOTONIC_FAST: 732 case CLOCK_UPTIME: 733 case CLOCK_UPTIME_PRECISE: 734 case CLOCK_UPTIME_FAST: 735 case CLOCK_SECOND: 736 timespec2bintime(&timo->end, &bt); 737 switch (timo->clockid) { 738 case CLOCK_REALTIME: 739 case CLOCK_REALTIME_PRECISE: 740 case CLOCK_REALTIME_FAST: 741 case CLOCK_SECOND: 742 getboottimebin(&bbt); 743 bintime_sub(&bt, &bbt); 744 break; 745 } 746 if (bt.sec < 0) 747 return (ETIMEDOUT); 748 if (bt.sec >= (SBT_MAX >> 32)) { 749 *sbt = 0; 750 *flags = 0; 751 return (0); 752 } 753 *sbt = bttosbt(bt); 754 umtx_abs_timeout_enforce_min(sbt); 755 756 /* 757 * Check if the absolute time should be aligned to 758 * avoid firing multiple timer events in non-periodic 759 * timer mode. 760 */ 761 switch (timo->clockid) { 762 case CLOCK_REALTIME_FAST: 763 case CLOCK_MONOTONIC_FAST: 764 case CLOCK_UPTIME_FAST: 765 rem = *sbt % tc_tick_sbt; 766 if (__predict_true(rem != 0)) 767 *sbt += tc_tick_sbt - rem; 768 break; 769 case CLOCK_SECOND: 770 rem = *sbt % SBT_1S; 771 if (__predict_true(rem != 0)) 772 *sbt += SBT_1S - rem; 773 break; 774 } 775 *flags = C_ABSOLUTE; 776 return (0); 777 778 /* Clocks that has to be periodically polled. */ 779 case CLOCK_VIRTUAL: 780 case CLOCK_PROF: 781 case CLOCK_THREAD_CPUTIME_ID: 782 case CLOCK_PROCESS_CPUTIME_ID: 783 default: 784 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 785 if (timespeccmp(&timo->end, &timo->cur, <=)) 786 return (ETIMEDOUT); 787 timespecsub(&timo->end, &timo->cur, &tts); 788 *sbt = tick_sbt * tstohz(&tts); 789 *flags = C_HARDCLOCK; 790 return (0); 791 } 792 } 793 794 static uint32_t 795 umtx_unlock_val(uint32_t flags, bool rb) 796 { 797 798 if (rb) 799 return (UMUTEX_RB_OWNERDEAD); 800 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 801 return (UMUTEX_RB_NOTRECOV); 802 else 803 return (UMUTEX_UNOWNED); 804 805 } 806 807 /* 808 * Put thread into sleep state, before sleeping, check if 809 * thread was removed from umtx queue. 810 */ 811 int 812 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 813 struct umtx_abs_timeout *timo) 814 { 815 struct umtxq_chain *uc; 816 sbintime_t sbt = 0; 817 int error, flags = 0; 818 819 uc = umtxq_getchain(&uq->uq_key); 820 UMTXQ_LOCKED_ASSERT(uc); 821 for (;;) { 822 if (!(uq->uq_flags & UQF_UMTXQ)) { 823 error = 0; 824 break; 825 } 826 if (timo != NULL) { 827 if (timo->is_abs_real) 828 curthread->td_rtcgen = 829 atomic_load_acq_int(&rtc_generation); 830 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 831 if (error != 0) 832 break; 833 } 834 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 835 sbt, 0, flags); 836 uc = umtxq_getchain(&uq->uq_key); 837 mtx_lock(&uc->uc_lock); 838 if (error == EINTR || error == ERESTART) 839 break; 840 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 841 error = ETIMEDOUT; 842 break; 843 } 844 } 845 846 curthread->td_rtcgen = 0; 847 return (error); 848 } 849 850 /* 851 * Convert userspace address into unique logical address. 852 */ 853 int 854 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 855 { 856 struct thread *td = curthread; 857 vm_map_t map; 858 vm_map_entry_t entry; 859 vm_pindex_t pindex; 860 vm_prot_t prot; 861 boolean_t wired; 862 863 key->type = type; 864 if (share == THREAD_SHARE) { 865 key->shared = 0; 866 key->info.private.vs = td->td_proc->p_vmspace; 867 key->info.private.addr = (uintptr_t)addr; 868 } else { 869 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 870 map = &td->td_proc->p_vmspace->vm_map; 871 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 872 &entry, &key->info.shared.object, &pindex, &prot, 873 &wired) != KERN_SUCCESS) { 874 return (EFAULT); 875 } 876 877 if ((share == PROCESS_SHARE) || 878 (share == AUTO_SHARE && 879 VM_INHERIT_SHARE == entry->inheritance)) { 880 key->shared = 1; 881 key->info.shared.offset = (vm_offset_t)addr - 882 entry->start + entry->offset; 883 vm_object_reference(key->info.shared.object); 884 } else { 885 key->shared = 0; 886 key->info.private.vs = td->td_proc->p_vmspace; 887 key->info.private.addr = (uintptr_t)addr; 888 } 889 vm_map_lookup_done(map, entry); 890 } 891 892 umtxq_hash(key); 893 return (0); 894 } 895 896 /* 897 * Release key. 898 */ 899 void 900 umtx_key_release(struct umtx_key *key) 901 { 902 if (key->shared) 903 vm_object_deallocate(key->info.shared.object); 904 } 905 906 #ifdef COMPAT_FREEBSD10 907 /* 908 * Lock a umtx object. 909 */ 910 static int 911 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 912 const struct timespec *timeout) 913 { 914 struct umtx_abs_timeout timo; 915 struct umtx_q *uq; 916 u_long owner; 917 u_long old; 918 int error = 0; 919 920 uq = td->td_umtxq; 921 if (timeout != NULL) 922 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 923 924 /* 925 * Care must be exercised when dealing with umtx structure. It 926 * can fault on any access. 927 */ 928 for (;;) { 929 /* 930 * Try the uncontested case. This should be done in userland. 931 */ 932 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 933 934 /* The acquire succeeded. */ 935 if (owner == UMTX_UNOWNED) 936 return (0); 937 938 /* The address was invalid. */ 939 if (owner == -1) 940 return (EFAULT); 941 942 /* If no one owns it but it is contested try to acquire it. */ 943 if (owner == UMTX_CONTESTED) { 944 owner = casuword(&umtx->u_owner, 945 UMTX_CONTESTED, id | UMTX_CONTESTED); 946 947 if (owner == UMTX_CONTESTED) 948 return (0); 949 950 /* The address was invalid. */ 951 if (owner == -1) 952 return (EFAULT); 953 954 error = thread_check_susp(td, false); 955 if (error != 0) 956 break; 957 958 /* If this failed the lock has changed, restart. */ 959 continue; 960 } 961 962 /* 963 * If we caught a signal, we have retried and now 964 * exit immediately. 965 */ 966 if (error != 0) 967 break; 968 969 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 970 AUTO_SHARE, &uq->uq_key)) != 0) 971 return (error); 972 973 umtxq_lock(&uq->uq_key); 974 umtxq_busy(&uq->uq_key); 975 umtxq_insert(uq); 976 umtxq_unbusy(&uq->uq_key); 977 umtxq_unlock(&uq->uq_key); 978 979 /* 980 * Set the contested bit so that a release in user space 981 * knows to use the system call for unlock. If this fails 982 * either some one else has acquired the lock or it has been 983 * released. 984 */ 985 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 986 987 /* The address was invalid. */ 988 if (old == -1) { 989 umtxq_lock(&uq->uq_key); 990 umtxq_remove(uq); 991 umtxq_unlock(&uq->uq_key); 992 umtx_key_release(&uq->uq_key); 993 return (EFAULT); 994 } 995 996 /* 997 * We set the contested bit, sleep. Otherwise the lock changed 998 * and we need to retry or we lost a race to the thread 999 * unlocking the umtx. 1000 */ 1001 umtxq_lock(&uq->uq_key); 1002 if (old == owner) 1003 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1004 &timo); 1005 umtxq_remove(uq); 1006 umtxq_unlock(&uq->uq_key); 1007 umtx_key_release(&uq->uq_key); 1008 1009 if (error == 0) 1010 error = thread_check_susp(td, false); 1011 } 1012 1013 if (timeout == NULL) { 1014 /* Mutex locking is restarted if it is interrupted. */ 1015 if (error == EINTR) 1016 error = ERESTART; 1017 } else { 1018 /* Timed-locking is not restarted. */ 1019 if (error == ERESTART) 1020 error = EINTR; 1021 } 1022 return (error); 1023 } 1024 1025 /* 1026 * Unlock a umtx object. 1027 */ 1028 static int 1029 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1030 { 1031 struct umtx_key key; 1032 u_long owner; 1033 u_long old; 1034 int error; 1035 int count; 1036 1037 /* 1038 * Make sure we own this mtx. 1039 */ 1040 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1041 if (owner == -1) 1042 return (EFAULT); 1043 1044 if ((owner & ~UMTX_CONTESTED) != id) 1045 return (EPERM); 1046 1047 /* This should be done in userland */ 1048 if ((owner & UMTX_CONTESTED) == 0) { 1049 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1050 if (old == -1) 1051 return (EFAULT); 1052 if (old == owner) 1053 return (0); 1054 owner = old; 1055 } 1056 1057 /* We should only ever be in here for contested locks */ 1058 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1059 &key)) != 0) 1060 return (error); 1061 1062 umtxq_lock(&key); 1063 umtxq_busy(&key); 1064 count = umtxq_count(&key); 1065 umtxq_unlock(&key); 1066 1067 /* 1068 * When unlocking the umtx, it must be marked as unowned if 1069 * there is zero or one thread only waiting for it. 1070 * Otherwise, it must be marked as contested. 1071 */ 1072 old = casuword(&umtx->u_owner, owner, 1073 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1074 umtxq_lock(&key); 1075 umtxq_signal(&key,1); 1076 umtxq_unbusy(&key); 1077 umtxq_unlock(&key); 1078 umtx_key_release(&key); 1079 if (old == -1) 1080 return (EFAULT); 1081 if (old != owner) 1082 return (EINVAL); 1083 return (0); 1084 } 1085 1086 #ifdef COMPAT_FREEBSD32 1087 1088 /* 1089 * Lock a umtx object. 1090 */ 1091 static int 1092 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1093 const struct timespec *timeout) 1094 { 1095 struct umtx_abs_timeout timo; 1096 struct umtx_q *uq; 1097 uint32_t owner; 1098 uint32_t old; 1099 int error = 0; 1100 1101 uq = td->td_umtxq; 1102 1103 if (timeout != NULL) 1104 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1105 1106 /* 1107 * Care must be exercised when dealing with umtx structure. It 1108 * can fault on any access. 1109 */ 1110 for (;;) { 1111 /* 1112 * Try the uncontested case. This should be done in userland. 1113 */ 1114 owner = casuword32(m, UMUTEX_UNOWNED, id); 1115 1116 /* The acquire succeeded. */ 1117 if (owner == UMUTEX_UNOWNED) 1118 return (0); 1119 1120 /* The address was invalid. */ 1121 if (owner == -1) 1122 return (EFAULT); 1123 1124 /* If no one owns it but it is contested try to acquire it. */ 1125 if (owner == UMUTEX_CONTESTED) { 1126 owner = casuword32(m, 1127 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1128 if (owner == UMUTEX_CONTESTED) 1129 return (0); 1130 1131 /* The address was invalid. */ 1132 if (owner == -1) 1133 return (EFAULT); 1134 1135 error = thread_check_susp(td, false); 1136 if (error != 0) 1137 break; 1138 1139 /* If this failed the lock has changed, restart. */ 1140 continue; 1141 } 1142 1143 /* 1144 * If we caught a signal, we have retried and now 1145 * exit immediately. 1146 */ 1147 if (error != 0) 1148 return (error); 1149 1150 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1151 AUTO_SHARE, &uq->uq_key)) != 0) 1152 return (error); 1153 1154 umtxq_lock(&uq->uq_key); 1155 umtxq_busy(&uq->uq_key); 1156 umtxq_insert(uq); 1157 umtxq_unbusy(&uq->uq_key); 1158 umtxq_unlock(&uq->uq_key); 1159 1160 /* 1161 * Set the contested bit so that a release in user space 1162 * knows to use the system call for unlock. If this fails 1163 * either some one else has acquired the lock or it has been 1164 * released. 1165 */ 1166 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1167 1168 /* The address was invalid. */ 1169 if (old == -1) { 1170 umtxq_lock(&uq->uq_key); 1171 umtxq_remove(uq); 1172 umtxq_unlock(&uq->uq_key); 1173 umtx_key_release(&uq->uq_key); 1174 return (EFAULT); 1175 } 1176 1177 /* 1178 * We set the contested bit, sleep. Otherwise the lock changed 1179 * and we need to retry or we lost a race to the thread 1180 * unlocking the umtx. 1181 */ 1182 umtxq_lock(&uq->uq_key); 1183 if (old == owner) 1184 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1185 NULL : &timo); 1186 umtxq_remove(uq); 1187 umtxq_unlock(&uq->uq_key); 1188 umtx_key_release(&uq->uq_key); 1189 1190 if (error == 0) 1191 error = thread_check_susp(td, false); 1192 } 1193 1194 if (timeout == NULL) { 1195 /* Mutex locking is restarted if it is interrupted. */ 1196 if (error == EINTR) 1197 error = ERESTART; 1198 } else { 1199 /* Timed-locking is not restarted. */ 1200 if (error == ERESTART) 1201 error = EINTR; 1202 } 1203 return (error); 1204 } 1205 1206 /* 1207 * Unlock a umtx object. 1208 */ 1209 static int 1210 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1211 { 1212 struct umtx_key key; 1213 uint32_t owner; 1214 uint32_t old; 1215 int error; 1216 int count; 1217 1218 /* 1219 * Make sure we own this mtx. 1220 */ 1221 owner = fuword32(m); 1222 if (owner == -1) 1223 return (EFAULT); 1224 1225 if ((owner & ~UMUTEX_CONTESTED) != id) 1226 return (EPERM); 1227 1228 /* This should be done in userland */ 1229 if ((owner & UMUTEX_CONTESTED) == 0) { 1230 old = casuword32(m, owner, UMUTEX_UNOWNED); 1231 if (old == -1) 1232 return (EFAULT); 1233 if (old == owner) 1234 return (0); 1235 owner = old; 1236 } 1237 1238 /* We should only ever be in here for contested locks */ 1239 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1240 &key)) != 0) 1241 return (error); 1242 1243 umtxq_lock(&key); 1244 umtxq_busy(&key); 1245 count = umtxq_count(&key); 1246 umtxq_unlock(&key); 1247 1248 /* 1249 * When unlocking the umtx, it must be marked as unowned if 1250 * there is zero or one thread only waiting for it. 1251 * Otherwise, it must be marked as contested. 1252 */ 1253 old = casuword32(m, owner, 1254 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1255 umtxq_lock(&key); 1256 umtxq_signal(&key,1); 1257 umtxq_unbusy(&key); 1258 umtxq_unlock(&key); 1259 umtx_key_release(&key); 1260 if (old == -1) 1261 return (EFAULT); 1262 if (old != owner) 1263 return (EINVAL); 1264 return (0); 1265 } 1266 #endif /* COMPAT_FREEBSD32 */ 1267 #endif /* COMPAT_FREEBSD10 */ 1268 1269 /* 1270 * Fetch and compare value, sleep on the address if value is not changed. 1271 */ 1272 static int 1273 do_wait(struct thread *td, void *addr, u_long id, 1274 struct _umtx_time *timeout, int compat32, int is_private) 1275 { 1276 struct umtx_abs_timeout timo; 1277 struct umtx_q *uq; 1278 u_long tmp; 1279 uint32_t tmp32; 1280 int error = 0; 1281 1282 uq = td->td_umtxq; 1283 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1284 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1285 return (error); 1286 1287 if (timeout != NULL) 1288 umtx_abs_timeout_init2(&timo, timeout); 1289 1290 umtxq_lock(&uq->uq_key); 1291 umtxq_insert(uq); 1292 umtxq_unlock(&uq->uq_key); 1293 if (compat32 == 0) { 1294 error = fueword(addr, &tmp); 1295 if (error != 0) 1296 error = EFAULT; 1297 } else { 1298 error = fueword32(addr, &tmp32); 1299 if (error == 0) 1300 tmp = tmp32; 1301 else 1302 error = EFAULT; 1303 } 1304 umtxq_lock(&uq->uq_key); 1305 if (error == 0) { 1306 if (tmp == id) 1307 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1308 NULL : &timo); 1309 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1310 error = 0; 1311 else 1312 umtxq_remove(uq); 1313 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1314 umtxq_remove(uq); 1315 } 1316 umtxq_unlock(&uq->uq_key); 1317 umtx_key_release(&uq->uq_key); 1318 if (error == ERESTART) 1319 error = EINTR; 1320 return (error); 1321 } 1322 1323 /* 1324 * Wake up threads sleeping on the specified address. 1325 */ 1326 int 1327 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1328 { 1329 struct umtx_key key; 1330 int ret; 1331 1332 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1333 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1334 return (ret); 1335 umtxq_lock(&key); 1336 umtxq_signal(&key, n_wake); 1337 umtxq_unlock(&key); 1338 umtx_key_release(&key); 1339 return (0); 1340 } 1341 1342 /* 1343 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1344 */ 1345 static int 1346 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1347 struct _umtx_time *timeout, int mode) 1348 { 1349 struct umtx_abs_timeout timo; 1350 struct umtx_q *uq; 1351 uint32_t owner, old, id; 1352 int error, rv; 1353 1354 id = td->td_tid; 1355 uq = td->td_umtxq; 1356 error = 0; 1357 if (timeout != NULL) 1358 umtx_abs_timeout_init2(&timo, timeout); 1359 1360 /* 1361 * Care must be exercised when dealing with umtx structure. It 1362 * can fault on any access. 1363 */ 1364 for (;;) { 1365 rv = fueword32(&m->m_owner, &owner); 1366 if (rv == -1) 1367 return (EFAULT); 1368 if (mode == _UMUTEX_WAIT) { 1369 if (owner == UMUTEX_UNOWNED || 1370 owner == UMUTEX_CONTESTED || 1371 owner == UMUTEX_RB_OWNERDEAD || 1372 owner == UMUTEX_RB_NOTRECOV) 1373 return (0); 1374 } else { 1375 /* 1376 * Robust mutex terminated. Kernel duty is to 1377 * return EOWNERDEAD to the userspace. The 1378 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1379 * by the common userspace code. 1380 */ 1381 if (owner == UMUTEX_RB_OWNERDEAD) { 1382 rv = casueword32(&m->m_owner, 1383 UMUTEX_RB_OWNERDEAD, &owner, 1384 id | UMUTEX_CONTESTED); 1385 if (rv == -1) 1386 return (EFAULT); 1387 if (rv == 0) { 1388 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1389 return (EOWNERDEAD); /* success */ 1390 } 1391 MPASS(rv == 1); 1392 rv = thread_check_susp(td, false); 1393 if (rv != 0) 1394 return (rv); 1395 continue; 1396 } 1397 if (owner == UMUTEX_RB_NOTRECOV) 1398 return (ENOTRECOVERABLE); 1399 1400 /* 1401 * Try the uncontested case. This should be 1402 * done in userland. 1403 */ 1404 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1405 &owner, id); 1406 /* The address was invalid. */ 1407 if (rv == -1) 1408 return (EFAULT); 1409 1410 /* The acquire succeeded. */ 1411 if (rv == 0) { 1412 MPASS(owner == UMUTEX_UNOWNED); 1413 return (0); 1414 } 1415 1416 /* 1417 * If no one owns it but it is contested try 1418 * to acquire it. 1419 */ 1420 MPASS(rv == 1); 1421 if (owner == UMUTEX_CONTESTED) { 1422 rv = casueword32(&m->m_owner, 1423 UMUTEX_CONTESTED, &owner, 1424 id | UMUTEX_CONTESTED); 1425 /* The address was invalid. */ 1426 if (rv == -1) 1427 return (EFAULT); 1428 if (rv == 0) { 1429 MPASS(owner == UMUTEX_CONTESTED); 1430 return (0); 1431 } 1432 if (rv == 1) { 1433 rv = thread_check_susp(td, false); 1434 if (rv != 0) 1435 return (rv); 1436 } 1437 1438 /* 1439 * If this failed the lock has 1440 * changed, restart. 1441 */ 1442 continue; 1443 } 1444 1445 /* rv == 1 but not contested, likely store failure */ 1446 rv = thread_check_susp(td, false); 1447 if (rv != 0) 1448 return (rv); 1449 } 1450 1451 if (mode == _UMUTEX_TRY) 1452 return (EBUSY); 1453 1454 /* 1455 * If we caught a signal, we have retried and now 1456 * exit immediately. 1457 */ 1458 if (error != 0) 1459 return (error); 1460 1461 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1462 GET_SHARE(flags), &uq->uq_key)) != 0) 1463 return (error); 1464 1465 umtxq_lock(&uq->uq_key); 1466 umtxq_busy(&uq->uq_key); 1467 umtxq_insert(uq); 1468 umtxq_unlock(&uq->uq_key); 1469 1470 /* 1471 * Set the contested bit so that a release in user space 1472 * knows to use the system call for unlock. If this fails 1473 * either some one else has acquired the lock or it has been 1474 * released. 1475 */ 1476 rv = casueword32(&m->m_owner, owner, &old, 1477 owner | UMUTEX_CONTESTED); 1478 1479 /* The address was invalid or casueword failed to store. */ 1480 if (rv == -1 || rv == 1) { 1481 umtxq_lock(&uq->uq_key); 1482 umtxq_remove(uq); 1483 umtxq_unbusy(&uq->uq_key); 1484 umtxq_unlock(&uq->uq_key); 1485 umtx_key_release(&uq->uq_key); 1486 if (rv == -1) 1487 return (EFAULT); 1488 if (rv == 1) { 1489 rv = thread_check_susp(td, false); 1490 if (rv != 0) 1491 return (rv); 1492 } 1493 continue; 1494 } 1495 1496 /* 1497 * We set the contested bit, sleep. Otherwise the lock changed 1498 * and we need to retry or we lost a race to the thread 1499 * unlocking the umtx. 1500 */ 1501 umtxq_lock(&uq->uq_key); 1502 umtxq_unbusy(&uq->uq_key); 1503 MPASS(old == owner); 1504 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1505 NULL : &timo); 1506 umtxq_remove(uq); 1507 umtxq_unlock(&uq->uq_key); 1508 umtx_key_release(&uq->uq_key); 1509 1510 if (error == 0) 1511 error = thread_check_susp(td, false); 1512 } 1513 1514 return (0); 1515 } 1516 1517 /* 1518 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1519 */ 1520 static int 1521 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1522 { 1523 struct umtx_key key; 1524 uint32_t owner, old, id, newlock; 1525 int error, count; 1526 1527 id = td->td_tid; 1528 1529 again: 1530 /* 1531 * Make sure we own this mtx. 1532 */ 1533 error = fueword32(&m->m_owner, &owner); 1534 if (error == -1) 1535 return (EFAULT); 1536 1537 if ((owner & ~UMUTEX_CONTESTED) != id) 1538 return (EPERM); 1539 1540 newlock = umtx_unlock_val(flags, rb); 1541 if ((owner & UMUTEX_CONTESTED) == 0) { 1542 error = casueword32(&m->m_owner, owner, &old, newlock); 1543 if (error == -1) 1544 return (EFAULT); 1545 if (error == 1) { 1546 error = thread_check_susp(td, false); 1547 if (error != 0) 1548 return (error); 1549 goto again; 1550 } 1551 MPASS(old == owner); 1552 return (0); 1553 } 1554 1555 /* We should only ever be in here for contested locks */ 1556 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1557 &key)) != 0) 1558 return (error); 1559 1560 umtxq_lock(&key); 1561 umtxq_busy(&key); 1562 count = umtxq_count(&key); 1563 umtxq_unlock(&key); 1564 1565 /* 1566 * When unlocking the umtx, it must be marked as unowned if 1567 * there is zero or one thread only waiting for it. 1568 * Otherwise, it must be marked as contested. 1569 */ 1570 if (count > 1) 1571 newlock |= UMUTEX_CONTESTED; 1572 error = casueword32(&m->m_owner, owner, &old, newlock); 1573 umtxq_lock(&key); 1574 umtxq_signal(&key, 1); 1575 umtxq_unbusy(&key); 1576 umtxq_unlock(&key); 1577 umtx_key_release(&key); 1578 if (error == -1) 1579 return (EFAULT); 1580 if (error == 1) { 1581 if (old != owner) 1582 return (EINVAL); 1583 error = thread_check_susp(td, false); 1584 if (error != 0) 1585 return (error); 1586 goto again; 1587 } 1588 return (0); 1589 } 1590 1591 /* 1592 * Check if the mutex is available and wake up a waiter, 1593 * only for simple mutex. 1594 */ 1595 static int 1596 do_wake_umutex(struct thread *td, struct umutex *m) 1597 { 1598 struct umtx_key key; 1599 uint32_t owner; 1600 uint32_t flags; 1601 int error; 1602 int count; 1603 1604 again: 1605 error = fueword32(&m->m_owner, &owner); 1606 if (error == -1) 1607 return (EFAULT); 1608 1609 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1610 owner != UMUTEX_RB_NOTRECOV) 1611 return (0); 1612 1613 error = fueword32(&m->m_flags, &flags); 1614 if (error == -1) 1615 return (EFAULT); 1616 1617 /* We should only ever be in here for contested locks */ 1618 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1619 &key)) != 0) 1620 return (error); 1621 1622 umtxq_lock(&key); 1623 umtxq_busy(&key); 1624 count = umtxq_count(&key); 1625 umtxq_unlock(&key); 1626 1627 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1628 owner != UMUTEX_RB_NOTRECOV) { 1629 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1630 UMUTEX_UNOWNED); 1631 if (error == -1) { 1632 error = EFAULT; 1633 } else if (error == 1) { 1634 umtxq_lock(&key); 1635 umtxq_unbusy(&key); 1636 umtxq_unlock(&key); 1637 umtx_key_release(&key); 1638 error = thread_check_susp(td, false); 1639 if (error != 0) 1640 return (error); 1641 goto again; 1642 } 1643 } 1644 1645 umtxq_lock(&key); 1646 if (error == 0 && count != 0) { 1647 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1648 owner == UMUTEX_RB_OWNERDEAD || 1649 owner == UMUTEX_RB_NOTRECOV); 1650 umtxq_signal(&key, 1); 1651 } 1652 umtxq_unbusy(&key); 1653 umtxq_unlock(&key); 1654 umtx_key_release(&key); 1655 return (error); 1656 } 1657 1658 /* 1659 * Check if the mutex has waiters and tries to fix contention bit. 1660 */ 1661 static int 1662 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1663 { 1664 struct umtx_key key; 1665 uint32_t owner, old; 1666 int type; 1667 int error; 1668 int count; 1669 1670 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1671 UMUTEX_ROBUST)) { 1672 case 0: 1673 case UMUTEX_ROBUST: 1674 type = TYPE_NORMAL_UMUTEX; 1675 break; 1676 case UMUTEX_PRIO_INHERIT: 1677 type = TYPE_PI_UMUTEX; 1678 break; 1679 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1680 type = TYPE_PI_ROBUST_UMUTEX; 1681 break; 1682 case UMUTEX_PRIO_PROTECT: 1683 type = TYPE_PP_UMUTEX; 1684 break; 1685 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1686 type = TYPE_PP_ROBUST_UMUTEX; 1687 break; 1688 default: 1689 return (EINVAL); 1690 } 1691 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1692 return (error); 1693 1694 owner = 0; 1695 umtxq_lock(&key); 1696 umtxq_busy(&key); 1697 count = umtxq_count(&key); 1698 umtxq_unlock(&key); 1699 1700 error = fueword32(&m->m_owner, &owner); 1701 if (error == -1) 1702 error = EFAULT; 1703 1704 /* 1705 * Only repair contention bit if there is a waiter, this means 1706 * the mutex is still being referenced by userland code, 1707 * otherwise don't update any memory. 1708 */ 1709 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1710 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1711 error = casueword32(&m->m_owner, owner, &old, 1712 owner | UMUTEX_CONTESTED); 1713 if (error == -1) { 1714 error = EFAULT; 1715 break; 1716 } 1717 if (error == 0) { 1718 MPASS(old == owner); 1719 break; 1720 } 1721 owner = old; 1722 error = thread_check_susp(td, false); 1723 } 1724 1725 umtxq_lock(&key); 1726 if (error == EFAULT) { 1727 umtxq_signal(&key, INT_MAX); 1728 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1729 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1730 umtxq_signal(&key, 1); 1731 umtxq_unbusy(&key); 1732 umtxq_unlock(&key); 1733 umtx_key_release(&key); 1734 return (error); 1735 } 1736 1737 struct umtx_pi * 1738 umtx_pi_alloc(int flags) 1739 { 1740 struct umtx_pi *pi; 1741 1742 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1743 TAILQ_INIT(&pi->pi_blocked); 1744 atomic_add_int(&umtx_pi_allocated, 1); 1745 return (pi); 1746 } 1747 1748 void 1749 umtx_pi_free(struct umtx_pi *pi) 1750 { 1751 uma_zfree(umtx_pi_zone, pi); 1752 atomic_add_int(&umtx_pi_allocated, -1); 1753 } 1754 1755 /* 1756 * Adjust the thread's position on a pi_state after its priority has been 1757 * changed. 1758 */ 1759 static int 1760 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1761 { 1762 struct umtx_q *uq, *uq1, *uq2; 1763 struct thread *td1; 1764 1765 mtx_assert(&umtx_lock, MA_OWNED); 1766 if (pi == NULL) 1767 return (0); 1768 1769 uq = td->td_umtxq; 1770 1771 /* 1772 * Check if the thread needs to be moved on the blocked chain. 1773 * It needs to be moved if either its priority is lower than 1774 * the previous thread or higher than the next thread. 1775 */ 1776 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1777 uq2 = TAILQ_NEXT(uq, uq_lockq); 1778 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1779 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1780 /* 1781 * Remove thread from blocked chain and determine where 1782 * it should be moved to. 1783 */ 1784 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1785 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1786 td1 = uq1->uq_thread; 1787 MPASS(td1->td_proc->p_magic == P_MAGIC); 1788 if (UPRI(td1) > UPRI(td)) 1789 break; 1790 } 1791 1792 if (uq1 == NULL) 1793 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1794 else 1795 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1796 } 1797 return (1); 1798 } 1799 1800 static struct umtx_pi * 1801 umtx_pi_next(struct umtx_pi *pi) 1802 { 1803 struct umtx_q *uq_owner; 1804 1805 if (pi->pi_owner == NULL) 1806 return (NULL); 1807 uq_owner = pi->pi_owner->td_umtxq; 1808 if (uq_owner == NULL) 1809 return (NULL); 1810 return (uq_owner->uq_pi_blocked); 1811 } 1812 1813 /* 1814 * Floyd's Cycle-Finding Algorithm. 1815 */ 1816 static bool 1817 umtx_pi_check_loop(struct umtx_pi *pi) 1818 { 1819 struct umtx_pi *pi1; /* fast iterator */ 1820 1821 mtx_assert(&umtx_lock, MA_OWNED); 1822 if (pi == NULL) 1823 return (false); 1824 pi1 = pi; 1825 for (;;) { 1826 pi = umtx_pi_next(pi); 1827 if (pi == NULL) 1828 break; 1829 pi1 = umtx_pi_next(pi1); 1830 if (pi1 == NULL) 1831 break; 1832 pi1 = umtx_pi_next(pi1); 1833 if (pi1 == NULL) 1834 break; 1835 if (pi == pi1) 1836 return (true); 1837 } 1838 return (false); 1839 } 1840 1841 /* 1842 * Propagate priority when a thread is blocked on POSIX 1843 * PI mutex. 1844 */ 1845 static void 1846 umtx_propagate_priority(struct thread *td) 1847 { 1848 struct umtx_q *uq; 1849 struct umtx_pi *pi; 1850 int pri; 1851 1852 mtx_assert(&umtx_lock, MA_OWNED); 1853 pri = UPRI(td); 1854 uq = td->td_umtxq; 1855 pi = uq->uq_pi_blocked; 1856 if (pi == NULL) 1857 return; 1858 if (umtx_pi_check_loop(pi)) 1859 return; 1860 1861 for (;;) { 1862 td = pi->pi_owner; 1863 if (td == NULL || td == curthread) 1864 return; 1865 1866 MPASS(td->td_proc != NULL); 1867 MPASS(td->td_proc->p_magic == P_MAGIC); 1868 1869 thread_lock(td); 1870 if (td->td_lend_user_pri > pri) 1871 sched_lend_user_prio(td, pri); 1872 else { 1873 thread_unlock(td); 1874 break; 1875 } 1876 thread_unlock(td); 1877 1878 /* 1879 * Pick up the lock that td is blocked on. 1880 */ 1881 uq = td->td_umtxq; 1882 pi = uq->uq_pi_blocked; 1883 if (pi == NULL) 1884 break; 1885 /* Resort td on the list if needed. */ 1886 umtx_pi_adjust_thread(pi, td); 1887 } 1888 } 1889 1890 /* 1891 * Unpropagate priority for a PI mutex when a thread blocked on 1892 * it is interrupted by signal or resumed by others. 1893 */ 1894 static void 1895 umtx_repropagate_priority(struct umtx_pi *pi) 1896 { 1897 struct umtx_q *uq, *uq_owner; 1898 struct umtx_pi *pi2; 1899 int pri; 1900 1901 mtx_assert(&umtx_lock, MA_OWNED); 1902 1903 if (umtx_pi_check_loop(pi)) 1904 return; 1905 while (pi != NULL && pi->pi_owner != NULL) { 1906 pri = PRI_MAX; 1907 uq_owner = pi->pi_owner->td_umtxq; 1908 1909 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1910 uq = TAILQ_FIRST(&pi2->pi_blocked); 1911 if (uq != NULL) { 1912 if (pri > UPRI(uq->uq_thread)) 1913 pri = UPRI(uq->uq_thread); 1914 } 1915 } 1916 1917 if (pri > uq_owner->uq_inherited_pri) 1918 pri = uq_owner->uq_inherited_pri; 1919 thread_lock(pi->pi_owner); 1920 sched_lend_user_prio(pi->pi_owner, pri); 1921 thread_unlock(pi->pi_owner); 1922 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1923 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1924 } 1925 } 1926 1927 /* 1928 * Insert a PI mutex into owned list. 1929 */ 1930 static void 1931 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1932 { 1933 struct umtx_q *uq_owner; 1934 1935 uq_owner = owner->td_umtxq; 1936 mtx_assert(&umtx_lock, MA_OWNED); 1937 MPASS(pi->pi_owner == NULL); 1938 pi->pi_owner = owner; 1939 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1940 } 1941 1942 /* 1943 * Disown a PI mutex, and remove it from the owned list. 1944 */ 1945 static void 1946 umtx_pi_disown(struct umtx_pi *pi) 1947 { 1948 1949 mtx_assert(&umtx_lock, MA_OWNED); 1950 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1951 pi->pi_owner = NULL; 1952 } 1953 1954 /* 1955 * Claim ownership of a PI mutex. 1956 */ 1957 int 1958 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1959 { 1960 struct umtx_q *uq; 1961 int pri; 1962 1963 mtx_lock(&umtx_lock); 1964 if (pi->pi_owner == owner) { 1965 mtx_unlock(&umtx_lock); 1966 return (0); 1967 } 1968 1969 if (pi->pi_owner != NULL) { 1970 /* 1971 * userland may have already messed the mutex, sigh. 1972 */ 1973 mtx_unlock(&umtx_lock); 1974 return (EPERM); 1975 } 1976 umtx_pi_setowner(pi, owner); 1977 uq = TAILQ_FIRST(&pi->pi_blocked); 1978 if (uq != NULL) { 1979 pri = UPRI(uq->uq_thread); 1980 thread_lock(owner); 1981 if (pri < UPRI(owner)) 1982 sched_lend_user_prio(owner, pri); 1983 thread_unlock(owner); 1984 } 1985 mtx_unlock(&umtx_lock); 1986 return (0); 1987 } 1988 1989 /* 1990 * Adjust a thread's order position in its blocked PI mutex, 1991 * this may result new priority propagating process. 1992 */ 1993 void 1994 umtx_pi_adjust(struct thread *td, u_char oldpri) 1995 { 1996 struct umtx_q *uq; 1997 struct umtx_pi *pi; 1998 1999 uq = td->td_umtxq; 2000 mtx_lock(&umtx_lock); 2001 /* 2002 * Pick up the lock that td is blocked on. 2003 */ 2004 pi = uq->uq_pi_blocked; 2005 if (pi != NULL) { 2006 umtx_pi_adjust_thread(pi, td); 2007 umtx_repropagate_priority(pi); 2008 } 2009 mtx_unlock(&umtx_lock); 2010 } 2011 2012 /* 2013 * Sleep on a PI mutex. 2014 */ 2015 int 2016 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2017 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2018 { 2019 struct thread *td, *td1; 2020 struct umtx_q *uq1; 2021 int error, pri; 2022 #ifdef INVARIANTS 2023 struct umtxq_chain *uc; 2024 2025 uc = umtxq_getchain(&pi->pi_key); 2026 #endif 2027 error = 0; 2028 td = uq->uq_thread; 2029 KASSERT(td == curthread, ("inconsistent uq_thread")); 2030 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2031 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2032 umtxq_insert(uq); 2033 mtx_lock(&umtx_lock); 2034 if (pi->pi_owner == NULL) { 2035 mtx_unlock(&umtx_lock); 2036 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2037 mtx_lock(&umtx_lock); 2038 if (td1 != NULL) { 2039 if (pi->pi_owner == NULL) 2040 umtx_pi_setowner(pi, td1); 2041 PROC_UNLOCK(td1->td_proc); 2042 } 2043 } 2044 2045 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2046 pri = UPRI(uq1->uq_thread); 2047 if (pri > UPRI(td)) 2048 break; 2049 } 2050 2051 if (uq1 != NULL) 2052 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2053 else 2054 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2055 2056 uq->uq_pi_blocked = pi; 2057 thread_lock(td); 2058 td->td_flags |= TDF_UPIBLOCKED; 2059 thread_unlock(td); 2060 umtx_propagate_priority(td); 2061 mtx_unlock(&umtx_lock); 2062 umtxq_unbusy(&uq->uq_key); 2063 2064 error = umtxq_sleep(uq, wmesg, timo); 2065 umtxq_remove(uq); 2066 2067 mtx_lock(&umtx_lock); 2068 uq->uq_pi_blocked = NULL; 2069 thread_lock(td); 2070 td->td_flags &= ~TDF_UPIBLOCKED; 2071 thread_unlock(td); 2072 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2073 umtx_repropagate_priority(pi); 2074 mtx_unlock(&umtx_lock); 2075 umtxq_unlock(&uq->uq_key); 2076 2077 return (error); 2078 } 2079 2080 /* 2081 * Add reference count for a PI mutex. 2082 */ 2083 void 2084 umtx_pi_ref(struct umtx_pi *pi) 2085 { 2086 2087 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2088 pi->pi_refcount++; 2089 } 2090 2091 /* 2092 * Decrease reference count for a PI mutex, if the counter 2093 * is decreased to zero, its memory space is freed. 2094 */ 2095 void 2096 umtx_pi_unref(struct umtx_pi *pi) 2097 { 2098 struct umtxq_chain *uc; 2099 2100 uc = umtxq_getchain(&pi->pi_key); 2101 UMTXQ_LOCKED_ASSERT(uc); 2102 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2103 if (--pi->pi_refcount == 0) { 2104 mtx_lock(&umtx_lock); 2105 if (pi->pi_owner != NULL) 2106 umtx_pi_disown(pi); 2107 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2108 ("blocked queue not empty")); 2109 mtx_unlock(&umtx_lock); 2110 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2111 umtx_pi_free(pi); 2112 } 2113 } 2114 2115 /* 2116 * Find a PI mutex in hash table. 2117 */ 2118 struct umtx_pi * 2119 umtx_pi_lookup(struct umtx_key *key) 2120 { 2121 struct umtxq_chain *uc; 2122 struct umtx_pi *pi; 2123 2124 uc = umtxq_getchain(key); 2125 UMTXQ_LOCKED_ASSERT(uc); 2126 2127 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2128 if (umtx_key_match(&pi->pi_key, key)) { 2129 return (pi); 2130 } 2131 } 2132 return (NULL); 2133 } 2134 2135 /* 2136 * Insert a PI mutex into hash table. 2137 */ 2138 void 2139 umtx_pi_insert(struct umtx_pi *pi) 2140 { 2141 struct umtxq_chain *uc; 2142 2143 uc = umtxq_getchain(&pi->pi_key); 2144 UMTXQ_LOCKED_ASSERT(uc); 2145 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2146 } 2147 2148 /* 2149 * Drop a PI mutex and wakeup a top waiter. 2150 */ 2151 int 2152 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2153 { 2154 struct umtx_q *uq_first, *uq_first2, *uq_me; 2155 struct umtx_pi *pi, *pi2; 2156 int pri; 2157 2158 UMTXQ_ASSERT_LOCKED_BUSY(key); 2159 *count = umtxq_count_pi(key, &uq_first); 2160 if (uq_first != NULL) { 2161 mtx_lock(&umtx_lock); 2162 pi = uq_first->uq_pi_blocked; 2163 KASSERT(pi != NULL, ("pi == NULL?")); 2164 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2165 mtx_unlock(&umtx_lock); 2166 /* userland messed the mutex */ 2167 return (EPERM); 2168 } 2169 uq_me = td->td_umtxq; 2170 if (pi->pi_owner == td) 2171 umtx_pi_disown(pi); 2172 /* get highest priority thread which is still sleeping. */ 2173 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2174 while (uq_first != NULL && 2175 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2176 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2177 } 2178 pri = PRI_MAX; 2179 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2180 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2181 if (uq_first2 != NULL) { 2182 if (pri > UPRI(uq_first2->uq_thread)) 2183 pri = UPRI(uq_first2->uq_thread); 2184 } 2185 } 2186 thread_lock(td); 2187 sched_lend_user_prio(td, pri); 2188 thread_unlock(td); 2189 mtx_unlock(&umtx_lock); 2190 if (uq_first) 2191 umtxq_signal_thread(uq_first); 2192 } else { 2193 pi = umtx_pi_lookup(key); 2194 /* 2195 * A umtx_pi can exist if a signal or timeout removed the 2196 * last waiter from the umtxq, but there is still 2197 * a thread in do_lock_pi() holding the umtx_pi. 2198 */ 2199 if (pi != NULL) { 2200 /* 2201 * The umtx_pi can be unowned, such as when a thread 2202 * has just entered do_lock_pi(), allocated the 2203 * umtx_pi, and unlocked the umtxq. 2204 * If the current thread owns it, it must disown it. 2205 */ 2206 mtx_lock(&umtx_lock); 2207 if (pi->pi_owner == td) 2208 umtx_pi_disown(pi); 2209 mtx_unlock(&umtx_lock); 2210 } 2211 } 2212 return (0); 2213 } 2214 2215 /* 2216 * Lock a PI mutex. 2217 */ 2218 static int 2219 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2220 struct _umtx_time *timeout, int try) 2221 { 2222 struct umtx_abs_timeout timo; 2223 struct umtx_q *uq; 2224 struct umtx_pi *pi, *new_pi; 2225 uint32_t id, old_owner, owner, old; 2226 int error, rv; 2227 2228 id = td->td_tid; 2229 uq = td->td_umtxq; 2230 2231 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2232 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2233 &uq->uq_key)) != 0) 2234 return (error); 2235 2236 if (timeout != NULL) 2237 umtx_abs_timeout_init2(&timo, timeout); 2238 2239 umtxq_lock(&uq->uq_key); 2240 pi = umtx_pi_lookup(&uq->uq_key); 2241 if (pi == NULL) { 2242 new_pi = umtx_pi_alloc(M_NOWAIT); 2243 if (new_pi == NULL) { 2244 umtxq_unlock(&uq->uq_key); 2245 new_pi = umtx_pi_alloc(M_WAITOK); 2246 umtxq_lock(&uq->uq_key); 2247 pi = umtx_pi_lookup(&uq->uq_key); 2248 if (pi != NULL) { 2249 umtx_pi_free(new_pi); 2250 new_pi = NULL; 2251 } 2252 } 2253 if (new_pi != NULL) { 2254 new_pi->pi_key = uq->uq_key; 2255 umtx_pi_insert(new_pi); 2256 pi = new_pi; 2257 } 2258 } 2259 umtx_pi_ref(pi); 2260 umtxq_unlock(&uq->uq_key); 2261 2262 /* 2263 * Care must be exercised when dealing with umtx structure. It 2264 * can fault on any access. 2265 */ 2266 for (;;) { 2267 /* 2268 * Try the uncontested case. This should be done in userland. 2269 */ 2270 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2271 /* The address was invalid. */ 2272 if (rv == -1) { 2273 error = EFAULT; 2274 break; 2275 } 2276 /* The acquire succeeded. */ 2277 if (rv == 0) { 2278 MPASS(owner == UMUTEX_UNOWNED); 2279 error = 0; 2280 break; 2281 } 2282 2283 if (owner == UMUTEX_RB_NOTRECOV) { 2284 error = ENOTRECOVERABLE; 2285 break; 2286 } 2287 2288 /* 2289 * Nobody owns it, but the acquire failed. This can happen 2290 * with ll/sc atomics. 2291 */ 2292 if (owner == UMUTEX_UNOWNED) { 2293 error = thread_check_susp(td, true); 2294 if (error != 0) 2295 break; 2296 continue; 2297 } 2298 2299 /* 2300 * Avoid overwriting a possible error from sleep due 2301 * to the pending signal with suspension check result. 2302 */ 2303 if (error == 0) { 2304 error = thread_check_susp(td, true); 2305 if (error != 0) 2306 break; 2307 } 2308 2309 /* If no one owns it but it is contested try to acquire it. */ 2310 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2311 old_owner = owner; 2312 rv = casueword32(&m->m_owner, owner, &owner, 2313 id | UMUTEX_CONTESTED); 2314 /* The address was invalid. */ 2315 if (rv == -1) { 2316 error = EFAULT; 2317 break; 2318 } 2319 if (rv == 1) { 2320 if (error == 0) { 2321 error = thread_check_susp(td, true); 2322 if (error != 0) 2323 break; 2324 } 2325 2326 /* 2327 * If this failed the lock could 2328 * changed, restart. 2329 */ 2330 continue; 2331 } 2332 2333 MPASS(rv == 0); 2334 MPASS(owner == old_owner); 2335 umtxq_lock(&uq->uq_key); 2336 umtxq_busy(&uq->uq_key); 2337 error = umtx_pi_claim(pi, td); 2338 umtxq_unbusy(&uq->uq_key); 2339 umtxq_unlock(&uq->uq_key); 2340 if (error != 0) { 2341 /* 2342 * Since we're going to return an 2343 * error, restore the m_owner to its 2344 * previous, unowned state to avoid 2345 * compounding the problem. 2346 */ 2347 (void)casuword32(&m->m_owner, 2348 id | UMUTEX_CONTESTED, old_owner); 2349 } 2350 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2351 error = EOWNERDEAD; 2352 break; 2353 } 2354 2355 if ((owner & ~UMUTEX_CONTESTED) == id) { 2356 error = EDEADLK; 2357 break; 2358 } 2359 2360 if (try != 0) { 2361 error = EBUSY; 2362 break; 2363 } 2364 2365 /* 2366 * If we caught a signal, we have retried and now 2367 * exit immediately. 2368 */ 2369 if (error != 0) 2370 break; 2371 2372 umtxq_lock(&uq->uq_key); 2373 umtxq_busy(&uq->uq_key); 2374 umtxq_unlock(&uq->uq_key); 2375 2376 /* 2377 * Set the contested bit so that a release in user space 2378 * knows to use the system call for unlock. If this fails 2379 * either some one else has acquired the lock or it has been 2380 * released. 2381 */ 2382 rv = casueword32(&m->m_owner, owner, &old, owner | 2383 UMUTEX_CONTESTED); 2384 2385 /* The address was invalid. */ 2386 if (rv == -1) { 2387 umtxq_unbusy_unlocked(&uq->uq_key); 2388 error = EFAULT; 2389 break; 2390 } 2391 if (rv == 1) { 2392 umtxq_unbusy_unlocked(&uq->uq_key); 2393 error = thread_check_susp(td, true); 2394 if (error != 0) 2395 break; 2396 2397 /* 2398 * The lock changed and we need to retry or we 2399 * lost a race to the thread unlocking the 2400 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2401 * value for owner is impossible there. 2402 */ 2403 continue; 2404 } 2405 2406 umtxq_lock(&uq->uq_key); 2407 2408 /* We set the contested bit, sleep. */ 2409 MPASS(old == owner); 2410 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2411 "umtxpi", timeout == NULL ? NULL : &timo, 2412 (flags & USYNC_PROCESS_SHARED) != 0); 2413 if (error != 0) 2414 continue; 2415 2416 error = thread_check_susp(td, false); 2417 if (error != 0) 2418 break; 2419 } 2420 2421 umtxq_lock(&uq->uq_key); 2422 umtx_pi_unref(pi); 2423 umtxq_unlock(&uq->uq_key); 2424 2425 umtx_key_release(&uq->uq_key); 2426 return (error); 2427 } 2428 2429 /* 2430 * Unlock a PI mutex. 2431 */ 2432 static int 2433 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2434 { 2435 struct umtx_key key; 2436 uint32_t id, new_owner, old, owner; 2437 int count, error; 2438 2439 id = td->td_tid; 2440 2441 usrloop: 2442 /* 2443 * Make sure we own this mtx. 2444 */ 2445 error = fueword32(&m->m_owner, &owner); 2446 if (error == -1) 2447 return (EFAULT); 2448 2449 if ((owner & ~UMUTEX_CONTESTED) != id) 2450 return (EPERM); 2451 2452 new_owner = umtx_unlock_val(flags, rb); 2453 2454 /* This should be done in userland */ 2455 if ((owner & UMUTEX_CONTESTED) == 0) { 2456 error = casueword32(&m->m_owner, owner, &old, new_owner); 2457 if (error == -1) 2458 return (EFAULT); 2459 if (error == 1) { 2460 error = thread_check_susp(td, true); 2461 if (error != 0) 2462 return (error); 2463 goto usrloop; 2464 } 2465 if (old == owner) 2466 return (0); 2467 owner = old; 2468 } 2469 2470 /* We should only ever be in here for contested locks */ 2471 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2472 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2473 &key)) != 0) 2474 return (error); 2475 2476 umtxq_lock(&key); 2477 umtxq_busy(&key); 2478 error = umtx_pi_drop(td, &key, rb, &count); 2479 if (error != 0) { 2480 umtxq_unbusy(&key); 2481 umtxq_unlock(&key); 2482 umtx_key_release(&key); 2483 /* userland messed the mutex */ 2484 return (error); 2485 } 2486 umtxq_unlock(&key); 2487 2488 /* 2489 * When unlocking the umtx, it must be marked as unowned if 2490 * there is zero or one thread only waiting for it. 2491 * Otherwise, it must be marked as contested. 2492 */ 2493 2494 if (count > 1) 2495 new_owner |= UMUTEX_CONTESTED; 2496 again: 2497 error = casueword32(&m->m_owner, owner, &old, new_owner); 2498 if (error == 1) { 2499 error = thread_check_susp(td, false); 2500 if (error == 0) 2501 goto again; 2502 } 2503 umtxq_unbusy_unlocked(&key); 2504 umtx_key_release(&key); 2505 if (error == -1) 2506 return (EFAULT); 2507 if (error == 0 && old != owner) 2508 return (EINVAL); 2509 return (error); 2510 } 2511 2512 /* 2513 * Lock a PP mutex. 2514 */ 2515 static int 2516 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2517 struct _umtx_time *timeout, int try) 2518 { 2519 struct umtx_abs_timeout timo; 2520 struct umtx_q *uq, *uq2; 2521 struct umtx_pi *pi; 2522 uint32_t ceiling; 2523 uint32_t owner, id; 2524 int error, pri, old_inherited_pri, new_pri, rv; 2525 bool su; 2526 2527 id = td->td_tid; 2528 uq = td->td_umtxq; 2529 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2530 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2531 &uq->uq_key)) != 0) 2532 return (error); 2533 2534 if (timeout != NULL) 2535 umtx_abs_timeout_init2(&timo, timeout); 2536 2537 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2538 for (;;) { 2539 old_inherited_pri = uq->uq_inherited_pri; 2540 umtxq_lock(&uq->uq_key); 2541 umtxq_busy(&uq->uq_key); 2542 umtxq_unlock(&uq->uq_key); 2543 2544 rv = fueword32(&m->m_ceilings[0], &ceiling); 2545 if (rv == -1) { 2546 error = EFAULT; 2547 goto out; 2548 } 2549 ceiling = RTP_PRIO_MAX - ceiling; 2550 if (ceiling > RTP_PRIO_MAX) { 2551 error = EINVAL; 2552 goto out; 2553 } 2554 new_pri = PRI_MIN_REALTIME + ceiling; 2555 2556 if (td->td_base_user_pri < new_pri) { 2557 error = EINVAL; 2558 goto out; 2559 } 2560 if (su) { 2561 mtx_lock(&umtx_lock); 2562 if (new_pri < uq->uq_inherited_pri) { 2563 uq->uq_inherited_pri = new_pri; 2564 thread_lock(td); 2565 if (new_pri < UPRI(td)) 2566 sched_lend_user_prio(td, new_pri); 2567 thread_unlock(td); 2568 } 2569 mtx_unlock(&umtx_lock); 2570 } 2571 2572 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2573 id | UMUTEX_CONTESTED); 2574 /* The address was invalid. */ 2575 if (rv == -1) { 2576 error = EFAULT; 2577 break; 2578 } 2579 if (rv == 0) { 2580 MPASS(owner == UMUTEX_CONTESTED); 2581 error = 0; 2582 break; 2583 } 2584 /* rv == 1 */ 2585 if (owner == UMUTEX_RB_OWNERDEAD) { 2586 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2587 &owner, id | UMUTEX_CONTESTED); 2588 if (rv == -1) { 2589 error = EFAULT; 2590 break; 2591 } 2592 if (rv == 0) { 2593 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2594 error = EOWNERDEAD; /* success */ 2595 break; 2596 } 2597 2598 /* 2599 * rv == 1, only check for suspension if we 2600 * did not already catched a signal. If we 2601 * get an error from the check, the same 2602 * condition is checked by the umtxq_sleep() 2603 * call below, so we should obliterate the 2604 * error to not skip the last loop iteration. 2605 */ 2606 if (error == 0) { 2607 error = thread_check_susp(td, false); 2608 if (error == 0 && try == 0) { 2609 umtxq_unbusy_unlocked(&uq->uq_key); 2610 continue; 2611 } 2612 error = 0; 2613 } 2614 } else if (owner == UMUTEX_RB_NOTRECOV) { 2615 error = ENOTRECOVERABLE; 2616 } 2617 2618 if (try != 0) 2619 error = EBUSY; 2620 2621 /* 2622 * If we caught a signal, we have retried and now 2623 * exit immediately. 2624 */ 2625 if (error != 0) 2626 break; 2627 2628 umtxq_lock(&uq->uq_key); 2629 umtxq_insert(uq); 2630 umtxq_unbusy(&uq->uq_key); 2631 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2632 NULL : &timo); 2633 umtxq_remove(uq); 2634 umtxq_unlock(&uq->uq_key); 2635 2636 mtx_lock(&umtx_lock); 2637 uq->uq_inherited_pri = old_inherited_pri; 2638 pri = PRI_MAX; 2639 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2640 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2641 if (uq2 != NULL) { 2642 if (pri > UPRI(uq2->uq_thread)) 2643 pri = UPRI(uq2->uq_thread); 2644 } 2645 } 2646 if (pri > uq->uq_inherited_pri) 2647 pri = uq->uq_inherited_pri; 2648 thread_lock(td); 2649 sched_lend_user_prio(td, pri); 2650 thread_unlock(td); 2651 mtx_unlock(&umtx_lock); 2652 } 2653 2654 if (error != 0 && error != EOWNERDEAD) { 2655 mtx_lock(&umtx_lock); 2656 uq->uq_inherited_pri = old_inherited_pri; 2657 pri = PRI_MAX; 2658 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2659 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2660 if (uq2 != NULL) { 2661 if (pri > UPRI(uq2->uq_thread)) 2662 pri = UPRI(uq2->uq_thread); 2663 } 2664 } 2665 if (pri > uq->uq_inherited_pri) 2666 pri = uq->uq_inherited_pri; 2667 thread_lock(td); 2668 sched_lend_user_prio(td, pri); 2669 thread_unlock(td); 2670 mtx_unlock(&umtx_lock); 2671 } 2672 2673 out: 2674 umtxq_unbusy_unlocked(&uq->uq_key); 2675 umtx_key_release(&uq->uq_key); 2676 return (error); 2677 } 2678 2679 /* 2680 * Unlock a PP mutex. 2681 */ 2682 static int 2683 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2684 { 2685 struct umtx_key key; 2686 struct umtx_q *uq, *uq2; 2687 struct umtx_pi *pi; 2688 uint32_t id, owner, rceiling; 2689 int error, pri, new_inherited_pri; 2690 bool su; 2691 2692 id = td->td_tid; 2693 uq = td->td_umtxq; 2694 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2695 2696 /* 2697 * Make sure we own this mtx. 2698 */ 2699 error = fueword32(&m->m_owner, &owner); 2700 if (error == -1) 2701 return (EFAULT); 2702 2703 if ((owner & ~UMUTEX_CONTESTED) != id) 2704 return (EPERM); 2705 2706 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2707 if (error != 0) 2708 return (error); 2709 2710 if (rceiling == -1) 2711 new_inherited_pri = PRI_MAX; 2712 else { 2713 rceiling = RTP_PRIO_MAX - rceiling; 2714 if (rceiling > RTP_PRIO_MAX) 2715 return (EINVAL); 2716 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2717 } 2718 2719 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2720 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2721 &key)) != 0) 2722 return (error); 2723 umtxq_lock(&key); 2724 umtxq_busy(&key); 2725 umtxq_unlock(&key); 2726 /* 2727 * For priority protected mutex, always set unlocked state 2728 * to UMUTEX_CONTESTED, so that userland always enters kernel 2729 * to lock the mutex, it is necessary because thread priority 2730 * has to be adjusted for such mutex. 2731 */ 2732 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2733 UMUTEX_CONTESTED); 2734 2735 umtxq_lock(&key); 2736 if (error == 0) 2737 umtxq_signal(&key, 1); 2738 umtxq_unbusy(&key); 2739 umtxq_unlock(&key); 2740 2741 if (error == -1) 2742 error = EFAULT; 2743 else { 2744 mtx_lock(&umtx_lock); 2745 if (su || new_inherited_pri == PRI_MAX) 2746 uq->uq_inherited_pri = new_inherited_pri; 2747 pri = PRI_MAX; 2748 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2749 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2750 if (uq2 != NULL) { 2751 if (pri > UPRI(uq2->uq_thread)) 2752 pri = UPRI(uq2->uq_thread); 2753 } 2754 } 2755 if (pri > uq->uq_inherited_pri) 2756 pri = uq->uq_inherited_pri; 2757 thread_lock(td); 2758 sched_lend_user_prio(td, pri); 2759 thread_unlock(td); 2760 mtx_unlock(&umtx_lock); 2761 } 2762 umtx_key_release(&key); 2763 return (error); 2764 } 2765 2766 static int 2767 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2768 uint32_t *old_ceiling) 2769 { 2770 struct umtx_q *uq; 2771 uint32_t flags, id, owner, save_ceiling; 2772 int error, rv, rv1; 2773 2774 error = fueword32(&m->m_flags, &flags); 2775 if (error == -1) 2776 return (EFAULT); 2777 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2778 return (EINVAL); 2779 if (ceiling > RTP_PRIO_MAX) 2780 return (EINVAL); 2781 id = td->td_tid; 2782 uq = td->td_umtxq; 2783 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2784 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2785 &uq->uq_key)) != 0) 2786 return (error); 2787 for (;;) { 2788 umtxq_lock(&uq->uq_key); 2789 umtxq_busy(&uq->uq_key); 2790 umtxq_unlock(&uq->uq_key); 2791 2792 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2793 if (rv == -1) { 2794 error = EFAULT; 2795 break; 2796 } 2797 2798 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2799 id | UMUTEX_CONTESTED); 2800 if (rv == -1) { 2801 error = EFAULT; 2802 break; 2803 } 2804 2805 if (rv == 0) { 2806 MPASS(owner == UMUTEX_CONTESTED); 2807 rv = suword32(&m->m_ceilings[0], ceiling); 2808 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2809 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2810 break; 2811 } 2812 2813 if ((owner & ~UMUTEX_CONTESTED) == id) { 2814 rv = suword32(&m->m_ceilings[0], ceiling); 2815 error = rv == 0 ? 0 : EFAULT; 2816 break; 2817 } 2818 2819 if (owner == UMUTEX_RB_OWNERDEAD) { 2820 error = EOWNERDEAD; 2821 break; 2822 } else if (owner == UMUTEX_RB_NOTRECOV) { 2823 error = ENOTRECOVERABLE; 2824 break; 2825 } 2826 2827 /* 2828 * If we caught a signal, we have retried and now 2829 * exit immediately. 2830 */ 2831 if (error != 0) 2832 break; 2833 2834 /* 2835 * We set the contested bit, sleep. Otherwise the lock changed 2836 * and we need to retry or we lost a race to the thread 2837 * unlocking the umtx. 2838 */ 2839 umtxq_lock(&uq->uq_key); 2840 umtxq_insert(uq); 2841 umtxq_unbusy(&uq->uq_key); 2842 error = umtxq_sleep(uq, "umtxpp", NULL); 2843 umtxq_remove(uq); 2844 umtxq_unlock(&uq->uq_key); 2845 } 2846 umtxq_lock(&uq->uq_key); 2847 if (error == 0) 2848 umtxq_signal(&uq->uq_key, INT_MAX); 2849 umtxq_unbusy(&uq->uq_key); 2850 umtxq_unlock(&uq->uq_key); 2851 umtx_key_release(&uq->uq_key); 2852 if (error == 0 && old_ceiling != NULL) { 2853 rv = suword32(old_ceiling, save_ceiling); 2854 error = rv == 0 ? 0 : EFAULT; 2855 } 2856 return (error); 2857 } 2858 2859 /* 2860 * Lock a userland POSIX mutex. 2861 */ 2862 static int 2863 do_lock_umutex(struct thread *td, struct umutex *m, 2864 struct _umtx_time *timeout, int mode) 2865 { 2866 uint32_t flags; 2867 int error; 2868 2869 error = fueword32(&m->m_flags, &flags); 2870 if (error == -1) 2871 return (EFAULT); 2872 2873 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2874 case 0: 2875 error = do_lock_normal(td, m, flags, timeout, mode); 2876 break; 2877 case UMUTEX_PRIO_INHERIT: 2878 error = do_lock_pi(td, m, flags, timeout, mode); 2879 break; 2880 case UMUTEX_PRIO_PROTECT: 2881 error = do_lock_pp(td, m, flags, timeout, mode); 2882 break; 2883 default: 2884 return (EINVAL); 2885 } 2886 if (timeout == NULL) { 2887 if (error == EINTR && mode != _UMUTEX_WAIT) 2888 error = ERESTART; 2889 } else { 2890 /* Timed-locking is not restarted. */ 2891 if (error == ERESTART) 2892 error = EINTR; 2893 } 2894 return (error); 2895 } 2896 2897 /* 2898 * Unlock a userland POSIX mutex. 2899 */ 2900 static int 2901 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2902 { 2903 uint32_t flags; 2904 int error; 2905 2906 error = fueword32(&m->m_flags, &flags); 2907 if (error == -1) 2908 return (EFAULT); 2909 2910 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2911 case 0: 2912 return (do_unlock_normal(td, m, flags, rb)); 2913 case UMUTEX_PRIO_INHERIT: 2914 return (do_unlock_pi(td, m, flags, rb)); 2915 case UMUTEX_PRIO_PROTECT: 2916 return (do_unlock_pp(td, m, flags, rb)); 2917 } 2918 2919 return (EINVAL); 2920 } 2921 2922 static int 2923 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2924 struct timespec *timeout, u_long wflags) 2925 { 2926 struct umtx_abs_timeout timo; 2927 struct umtx_q *uq; 2928 uint32_t flags, clockid, hasw; 2929 int error; 2930 2931 uq = td->td_umtxq; 2932 error = fueword32(&cv->c_flags, &flags); 2933 if (error == -1) 2934 return (EFAULT); 2935 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2936 if (error != 0) 2937 return (error); 2938 2939 if ((wflags & CVWAIT_CLOCKID) != 0) { 2940 error = fueword32(&cv->c_clockid, &clockid); 2941 if (error == -1) { 2942 umtx_key_release(&uq->uq_key); 2943 return (EFAULT); 2944 } 2945 if (clockid < CLOCK_REALTIME || 2946 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2947 /* hmm, only HW clock id will work. */ 2948 umtx_key_release(&uq->uq_key); 2949 return (EINVAL); 2950 } 2951 } else { 2952 clockid = CLOCK_REALTIME; 2953 } 2954 2955 umtxq_lock(&uq->uq_key); 2956 umtxq_busy(&uq->uq_key); 2957 umtxq_insert(uq); 2958 umtxq_unlock(&uq->uq_key); 2959 2960 /* 2961 * Set c_has_waiters to 1 before releasing user mutex, also 2962 * don't modify cache line when unnecessary. 2963 */ 2964 error = fueword32(&cv->c_has_waiters, &hasw); 2965 if (error == 0 && hasw == 0) 2966 error = suword32(&cv->c_has_waiters, 1); 2967 if (error != 0) { 2968 umtxq_lock(&uq->uq_key); 2969 umtxq_remove(uq); 2970 umtxq_unbusy(&uq->uq_key); 2971 error = EFAULT; 2972 goto out; 2973 } 2974 2975 umtxq_unbusy_unlocked(&uq->uq_key); 2976 2977 error = do_unlock_umutex(td, m, false); 2978 2979 if (timeout != NULL) 2980 umtx_abs_timeout_init(&timo, clockid, 2981 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2982 2983 umtxq_lock(&uq->uq_key); 2984 if (error == 0) { 2985 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2986 NULL : &timo); 2987 } 2988 2989 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2990 error = 0; 2991 else { 2992 /* 2993 * This must be timeout,interrupted by signal or 2994 * surprious wakeup, clear c_has_waiter flag when 2995 * necessary. 2996 */ 2997 umtxq_busy(&uq->uq_key); 2998 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2999 int oldlen = uq->uq_cur_queue->length; 3000 umtxq_remove(uq); 3001 if (oldlen == 1) { 3002 umtxq_unlock(&uq->uq_key); 3003 if (suword32(&cv->c_has_waiters, 0) != 0 && 3004 error == 0) 3005 error = EFAULT; 3006 umtxq_lock(&uq->uq_key); 3007 } 3008 } 3009 umtxq_unbusy(&uq->uq_key); 3010 if (error == ERESTART) 3011 error = EINTR; 3012 } 3013 out: 3014 umtxq_unlock(&uq->uq_key); 3015 umtx_key_release(&uq->uq_key); 3016 return (error); 3017 } 3018 3019 /* 3020 * Signal a userland condition variable. 3021 */ 3022 static int 3023 do_cv_signal(struct thread *td, struct ucond *cv) 3024 { 3025 struct umtx_key key; 3026 int error, cnt, nwake; 3027 uint32_t flags; 3028 3029 error = fueword32(&cv->c_flags, &flags); 3030 if (error == -1) 3031 return (EFAULT); 3032 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3033 return (error); 3034 umtxq_lock(&key); 3035 umtxq_busy(&key); 3036 cnt = umtxq_count(&key); 3037 nwake = umtxq_signal(&key, 1); 3038 if (cnt <= nwake) { 3039 umtxq_unlock(&key); 3040 error = suword32(&cv->c_has_waiters, 0); 3041 if (error == -1) 3042 error = EFAULT; 3043 umtxq_lock(&key); 3044 } 3045 umtxq_unbusy(&key); 3046 umtxq_unlock(&key); 3047 umtx_key_release(&key); 3048 return (error); 3049 } 3050 3051 static int 3052 do_cv_broadcast(struct thread *td, struct ucond *cv) 3053 { 3054 struct umtx_key key; 3055 int error; 3056 uint32_t flags; 3057 3058 error = fueword32(&cv->c_flags, &flags); 3059 if (error == -1) 3060 return (EFAULT); 3061 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3062 return (error); 3063 3064 umtxq_lock(&key); 3065 umtxq_busy(&key); 3066 umtxq_signal(&key, INT_MAX); 3067 umtxq_unlock(&key); 3068 3069 error = suword32(&cv->c_has_waiters, 0); 3070 if (error == -1) 3071 error = EFAULT; 3072 3073 umtxq_unbusy_unlocked(&key); 3074 3075 umtx_key_release(&key); 3076 return (error); 3077 } 3078 3079 static int 3080 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3081 struct _umtx_time *timeout) 3082 { 3083 struct umtx_abs_timeout timo; 3084 struct umtx_q *uq; 3085 uint32_t flags, wrflags; 3086 int32_t state, oldstate; 3087 int32_t blocked_readers; 3088 int error, error1, rv; 3089 3090 uq = td->td_umtxq; 3091 error = fueword32(&rwlock->rw_flags, &flags); 3092 if (error == -1) 3093 return (EFAULT); 3094 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3095 if (error != 0) 3096 return (error); 3097 3098 if (timeout != NULL) 3099 umtx_abs_timeout_init2(&timo, timeout); 3100 3101 wrflags = URWLOCK_WRITE_OWNER; 3102 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3103 wrflags |= URWLOCK_WRITE_WAITERS; 3104 3105 for (;;) { 3106 rv = fueword32(&rwlock->rw_state, &state); 3107 if (rv == -1) { 3108 umtx_key_release(&uq->uq_key); 3109 return (EFAULT); 3110 } 3111 3112 /* try to lock it */ 3113 while (!(state & wrflags)) { 3114 if (__predict_false(URWLOCK_READER_COUNT(state) == 3115 URWLOCK_MAX_READERS)) { 3116 umtx_key_release(&uq->uq_key); 3117 return (EAGAIN); 3118 } 3119 rv = casueword32(&rwlock->rw_state, state, 3120 &oldstate, state + 1); 3121 if (rv == -1) { 3122 umtx_key_release(&uq->uq_key); 3123 return (EFAULT); 3124 } 3125 if (rv == 0) { 3126 MPASS(oldstate == state); 3127 umtx_key_release(&uq->uq_key); 3128 return (0); 3129 } 3130 error = thread_check_susp(td, true); 3131 if (error != 0) 3132 break; 3133 state = oldstate; 3134 } 3135 3136 if (error) 3137 break; 3138 3139 /* grab monitor lock */ 3140 umtxq_lock(&uq->uq_key); 3141 umtxq_busy(&uq->uq_key); 3142 umtxq_unlock(&uq->uq_key); 3143 3144 /* 3145 * re-read the state, in case it changed between the try-lock above 3146 * and the check below 3147 */ 3148 rv = fueword32(&rwlock->rw_state, &state); 3149 if (rv == -1) 3150 error = EFAULT; 3151 3152 /* set read contention bit */ 3153 while (error == 0 && (state & wrflags) && 3154 !(state & URWLOCK_READ_WAITERS)) { 3155 rv = casueword32(&rwlock->rw_state, state, 3156 &oldstate, state | URWLOCK_READ_WAITERS); 3157 if (rv == -1) { 3158 error = EFAULT; 3159 break; 3160 } 3161 if (rv == 0) { 3162 MPASS(oldstate == state); 3163 goto sleep; 3164 } 3165 state = oldstate; 3166 error = thread_check_susp(td, false); 3167 if (error != 0) 3168 break; 3169 } 3170 if (error != 0) { 3171 umtxq_unbusy_unlocked(&uq->uq_key); 3172 break; 3173 } 3174 3175 /* state is changed while setting flags, restart */ 3176 if (!(state & wrflags)) { 3177 umtxq_unbusy_unlocked(&uq->uq_key); 3178 error = thread_check_susp(td, true); 3179 if (error != 0) 3180 break; 3181 continue; 3182 } 3183 3184 sleep: 3185 /* 3186 * Contention bit is set, before sleeping, increase 3187 * read waiter count. 3188 */ 3189 rv = fueword32(&rwlock->rw_blocked_readers, 3190 &blocked_readers); 3191 if (rv == 0) 3192 rv = suword32(&rwlock->rw_blocked_readers, 3193 blocked_readers + 1); 3194 if (rv == -1) { 3195 umtxq_unbusy_unlocked(&uq->uq_key); 3196 error = EFAULT; 3197 break; 3198 } 3199 3200 while (state & wrflags) { 3201 umtxq_lock(&uq->uq_key); 3202 umtxq_insert(uq); 3203 umtxq_unbusy(&uq->uq_key); 3204 3205 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3206 NULL : &timo); 3207 3208 umtxq_busy(&uq->uq_key); 3209 umtxq_remove(uq); 3210 umtxq_unlock(&uq->uq_key); 3211 if (error) 3212 break; 3213 rv = fueword32(&rwlock->rw_state, &state); 3214 if (rv == -1) { 3215 error = EFAULT; 3216 break; 3217 } 3218 } 3219 3220 /* decrease read waiter count, and may clear read contention bit */ 3221 rv = fueword32(&rwlock->rw_blocked_readers, 3222 &blocked_readers); 3223 if (rv == 0) 3224 rv = suword32(&rwlock->rw_blocked_readers, 3225 blocked_readers - 1); 3226 if (rv == -1) { 3227 umtxq_unbusy_unlocked(&uq->uq_key); 3228 error = EFAULT; 3229 break; 3230 } 3231 if (blocked_readers == 1) { 3232 rv = fueword32(&rwlock->rw_state, &state); 3233 if (rv == -1) { 3234 umtxq_unbusy_unlocked(&uq->uq_key); 3235 error = EFAULT; 3236 break; 3237 } 3238 for (;;) { 3239 rv = casueword32(&rwlock->rw_state, state, 3240 &oldstate, state & ~URWLOCK_READ_WAITERS); 3241 if (rv == -1) { 3242 error = EFAULT; 3243 break; 3244 } 3245 if (rv == 0) { 3246 MPASS(oldstate == state); 3247 break; 3248 } 3249 state = oldstate; 3250 error1 = thread_check_susp(td, false); 3251 if (error1 != 0) { 3252 if (error == 0) 3253 error = error1; 3254 break; 3255 } 3256 } 3257 } 3258 3259 umtxq_unbusy_unlocked(&uq->uq_key); 3260 if (error != 0) 3261 break; 3262 } 3263 umtx_key_release(&uq->uq_key); 3264 if (error == ERESTART) 3265 error = EINTR; 3266 return (error); 3267 } 3268 3269 static int 3270 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3271 { 3272 struct umtx_abs_timeout timo; 3273 struct umtx_q *uq; 3274 uint32_t flags; 3275 int32_t state, oldstate; 3276 int32_t blocked_writers; 3277 int32_t blocked_readers; 3278 int error, error1, rv; 3279 3280 uq = td->td_umtxq; 3281 error = fueword32(&rwlock->rw_flags, &flags); 3282 if (error == -1) 3283 return (EFAULT); 3284 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3285 if (error != 0) 3286 return (error); 3287 3288 if (timeout != NULL) 3289 umtx_abs_timeout_init2(&timo, timeout); 3290 3291 blocked_readers = 0; 3292 for (;;) { 3293 rv = fueword32(&rwlock->rw_state, &state); 3294 if (rv == -1) { 3295 umtx_key_release(&uq->uq_key); 3296 return (EFAULT); 3297 } 3298 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3299 URWLOCK_READER_COUNT(state) == 0) { 3300 rv = casueword32(&rwlock->rw_state, state, 3301 &oldstate, state | URWLOCK_WRITE_OWNER); 3302 if (rv == -1) { 3303 umtx_key_release(&uq->uq_key); 3304 return (EFAULT); 3305 } 3306 if (rv == 0) { 3307 MPASS(oldstate == state); 3308 umtx_key_release(&uq->uq_key); 3309 return (0); 3310 } 3311 state = oldstate; 3312 error = thread_check_susp(td, true); 3313 if (error != 0) 3314 break; 3315 } 3316 3317 if (error) { 3318 if ((state & (URWLOCK_WRITE_OWNER | 3319 URWLOCK_WRITE_WAITERS)) == 0 && 3320 blocked_readers != 0) { 3321 umtxq_lock(&uq->uq_key); 3322 umtxq_busy(&uq->uq_key); 3323 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3324 UMTX_SHARED_QUEUE); 3325 umtxq_unbusy(&uq->uq_key); 3326 umtxq_unlock(&uq->uq_key); 3327 } 3328 3329 break; 3330 } 3331 3332 /* grab monitor lock */ 3333 umtxq_lock(&uq->uq_key); 3334 umtxq_busy(&uq->uq_key); 3335 umtxq_unlock(&uq->uq_key); 3336 3337 /* 3338 * Re-read the state, in case it changed between the 3339 * try-lock above and the check below. 3340 */ 3341 rv = fueword32(&rwlock->rw_state, &state); 3342 if (rv == -1) 3343 error = EFAULT; 3344 3345 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3346 URWLOCK_READER_COUNT(state) != 0) && 3347 (state & URWLOCK_WRITE_WAITERS) == 0) { 3348 rv = casueword32(&rwlock->rw_state, state, 3349 &oldstate, state | URWLOCK_WRITE_WAITERS); 3350 if (rv == -1) { 3351 error = EFAULT; 3352 break; 3353 } 3354 if (rv == 0) { 3355 MPASS(oldstate == state); 3356 goto sleep; 3357 } 3358 state = oldstate; 3359 error = thread_check_susp(td, false); 3360 if (error != 0) 3361 break; 3362 } 3363 if (error != 0) { 3364 umtxq_unbusy_unlocked(&uq->uq_key); 3365 break; 3366 } 3367 3368 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3369 URWLOCK_READER_COUNT(state) == 0) { 3370 umtxq_unbusy_unlocked(&uq->uq_key); 3371 error = thread_check_susp(td, false); 3372 if (error != 0) 3373 break; 3374 continue; 3375 } 3376 sleep: 3377 rv = fueword32(&rwlock->rw_blocked_writers, 3378 &blocked_writers); 3379 if (rv == 0) 3380 rv = suword32(&rwlock->rw_blocked_writers, 3381 blocked_writers + 1); 3382 if (rv == -1) { 3383 umtxq_unbusy_unlocked(&uq->uq_key); 3384 error = EFAULT; 3385 break; 3386 } 3387 3388 while ((state & URWLOCK_WRITE_OWNER) || 3389 URWLOCK_READER_COUNT(state) != 0) { 3390 umtxq_lock(&uq->uq_key); 3391 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3392 umtxq_unbusy(&uq->uq_key); 3393 3394 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3395 NULL : &timo); 3396 3397 umtxq_busy(&uq->uq_key); 3398 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3399 umtxq_unlock(&uq->uq_key); 3400 if (error) 3401 break; 3402 rv = fueword32(&rwlock->rw_state, &state); 3403 if (rv == -1) { 3404 error = EFAULT; 3405 break; 3406 } 3407 } 3408 3409 rv = fueword32(&rwlock->rw_blocked_writers, 3410 &blocked_writers); 3411 if (rv == 0) 3412 rv = suword32(&rwlock->rw_blocked_writers, 3413 blocked_writers - 1); 3414 if (rv == -1) { 3415 umtxq_unbusy_unlocked(&uq->uq_key); 3416 error = EFAULT; 3417 break; 3418 } 3419 if (blocked_writers == 1) { 3420 rv = fueword32(&rwlock->rw_state, &state); 3421 if (rv == -1) { 3422 umtxq_unbusy_unlocked(&uq->uq_key); 3423 error = EFAULT; 3424 break; 3425 } 3426 for (;;) { 3427 rv = casueword32(&rwlock->rw_state, state, 3428 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3429 if (rv == -1) { 3430 error = EFAULT; 3431 break; 3432 } 3433 if (rv == 0) { 3434 MPASS(oldstate == state); 3435 break; 3436 } 3437 state = oldstate; 3438 error1 = thread_check_susp(td, false); 3439 /* 3440 * We are leaving the URWLOCK_WRITE_WAITERS 3441 * behind, but this should not harm the 3442 * correctness. 3443 */ 3444 if (error1 != 0) { 3445 if (error == 0) 3446 error = error1; 3447 break; 3448 } 3449 } 3450 rv = fueword32(&rwlock->rw_blocked_readers, 3451 &blocked_readers); 3452 if (rv == -1) { 3453 umtxq_unbusy_unlocked(&uq->uq_key); 3454 error = EFAULT; 3455 break; 3456 } 3457 } else 3458 blocked_readers = 0; 3459 3460 umtxq_unbusy_unlocked(&uq->uq_key); 3461 } 3462 3463 umtx_key_release(&uq->uq_key); 3464 if (error == ERESTART) 3465 error = EINTR; 3466 return (error); 3467 } 3468 3469 static int 3470 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3471 { 3472 struct umtx_q *uq; 3473 uint32_t flags; 3474 int32_t state, oldstate; 3475 int error, rv, q, count; 3476 3477 uq = td->td_umtxq; 3478 error = fueword32(&rwlock->rw_flags, &flags); 3479 if (error == -1) 3480 return (EFAULT); 3481 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3482 if (error != 0) 3483 return (error); 3484 3485 error = fueword32(&rwlock->rw_state, &state); 3486 if (error == -1) { 3487 error = EFAULT; 3488 goto out; 3489 } 3490 if (state & URWLOCK_WRITE_OWNER) { 3491 for (;;) { 3492 rv = casueword32(&rwlock->rw_state, state, 3493 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3494 if (rv == -1) { 3495 error = EFAULT; 3496 goto out; 3497 } 3498 if (rv == 1) { 3499 state = oldstate; 3500 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3501 error = EPERM; 3502 goto out; 3503 } 3504 error = thread_check_susp(td, true); 3505 if (error != 0) 3506 goto out; 3507 } else 3508 break; 3509 } 3510 } else if (URWLOCK_READER_COUNT(state) != 0) { 3511 for (;;) { 3512 rv = casueword32(&rwlock->rw_state, state, 3513 &oldstate, state - 1); 3514 if (rv == -1) { 3515 error = EFAULT; 3516 goto out; 3517 } 3518 if (rv == 1) { 3519 state = oldstate; 3520 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3521 error = EPERM; 3522 goto out; 3523 } 3524 error = thread_check_susp(td, true); 3525 if (error != 0) 3526 goto out; 3527 } else 3528 break; 3529 } 3530 } else { 3531 error = EPERM; 3532 goto out; 3533 } 3534 3535 count = 0; 3536 3537 if (!(flags & URWLOCK_PREFER_READER)) { 3538 if (state & URWLOCK_WRITE_WAITERS) { 3539 count = 1; 3540 q = UMTX_EXCLUSIVE_QUEUE; 3541 } else if (state & URWLOCK_READ_WAITERS) { 3542 count = INT_MAX; 3543 q = UMTX_SHARED_QUEUE; 3544 } 3545 } else { 3546 if (state & URWLOCK_READ_WAITERS) { 3547 count = INT_MAX; 3548 q = UMTX_SHARED_QUEUE; 3549 } else if (state & URWLOCK_WRITE_WAITERS) { 3550 count = 1; 3551 q = UMTX_EXCLUSIVE_QUEUE; 3552 } 3553 } 3554 3555 if (count) { 3556 umtxq_lock(&uq->uq_key); 3557 umtxq_busy(&uq->uq_key); 3558 umtxq_signal_queue(&uq->uq_key, count, q); 3559 umtxq_unbusy(&uq->uq_key); 3560 umtxq_unlock(&uq->uq_key); 3561 } 3562 out: 3563 umtx_key_release(&uq->uq_key); 3564 return (error); 3565 } 3566 3567 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3568 static int 3569 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3570 { 3571 struct umtx_abs_timeout timo; 3572 struct umtx_q *uq; 3573 uint32_t flags, count, count1; 3574 int error, rv, rv1; 3575 3576 uq = td->td_umtxq; 3577 error = fueword32(&sem->_flags, &flags); 3578 if (error == -1) 3579 return (EFAULT); 3580 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3581 if (error != 0) 3582 return (error); 3583 3584 if (timeout != NULL) 3585 umtx_abs_timeout_init2(&timo, timeout); 3586 3587 again: 3588 umtxq_lock(&uq->uq_key); 3589 umtxq_busy(&uq->uq_key); 3590 umtxq_insert(uq); 3591 umtxq_unlock(&uq->uq_key); 3592 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3593 if (rv != -1) 3594 rv1 = fueword32(&sem->_count, &count); 3595 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3596 if (rv == 0) 3597 rv = suword32(&sem->_has_waiters, 0); 3598 umtxq_lock(&uq->uq_key); 3599 umtxq_unbusy(&uq->uq_key); 3600 umtxq_remove(uq); 3601 umtxq_unlock(&uq->uq_key); 3602 if (rv == -1 || rv1 == -1) { 3603 error = EFAULT; 3604 goto out; 3605 } 3606 if (count != 0) { 3607 error = 0; 3608 goto out; 3609 } 3610 MPASS(rv == 1 && count1 == 0); 3611 rv = thread_check_susp(td, true); 3612 if (rv == 0) 3613 goto again; 3614 error = rv; 3615 goto out; 3616 } 3617 umtxq_lock(&uq->uq_key); 3618 umtxq_unbusy(&uq->uq_key); 3619 3620 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3621 3622 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3623 error = 0; 3624 else { 3625 umtxq_remove(uq); 3626 /* A relative timeout cannot be restarted. */ 3627 if (error == ERESTART && timeout != NULL && 3628 (timeout->_flags & UMTX_ABSTIME) == 0) 3629 error = EINTR; 3630 } 3631 umtxq_unlock(&uq->uq_key); 3632 out: 3633 umtx_key_release(&uq->uq_key); 3634 return (error); 3635 } 3636 3637 /* 3638 * Signal a userland semaphore. 3639 */ 3640 static int 3641 do_sem_wake(struct thread *td, struct _usem *sem) 3642 { 3643 struct umtx_key key; 3644 int error, cnt; 3645 uint32_t flags; 3646 3647 error = fueword32(&sem->_flags, &flags); 3648 if (error == -1) 3649 return (EFAULT); 3650 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3651 return (error); 3652 umtxq_lock(&key); 3653 umtxq_busy(&key); 3654 cnt = umtxq_count(&key); 3655 if (cnt > 0) { 3656 /* 3657 * Check if count is greater than 0, this means the memory is 3658 * still being referenced by user code, so we can safely 3659 * update _has_waiters flag. 3660 */ 3661 if (cnt == 1) { 3662 umtxq_unlock(&key); 3663 error = suword32(&sem->_has_waiters, 0); 3664 umtxq_lock(&key); 3665 if (error == -1) 3666 error = EFAULT; 3667 } 3668 umtxq_signal(&key, 1); 3669 } 3670 umtxq_unbusy(&key); 3671 umtxq_unlock(&key); 3672 umtx_key_release(&key); 3673 return (error); 3674 } 3675 #endif 3676 3677 static int 3678 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3679 { 3680 struct umtx_abs_timeout timo; 3681 struct umtx_q *uq; 3682 uint32_t count, flags; 3683 int error, rv; 3684 3685 uq = td->td_umtxq; 3686 flags = fuword32(&sem->_flags); 3687 if (timeout != NULL) 3688 umtx_abs_timeout_init2(&timo, timeout); 3689 3690 again: 3691 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3692 if (error != 0) 3693 return (error); 3694 umtxq_lock(&uq->uq_key); 3695 umtxq_busy(&uq->uq_key); 3696 umtxq_insert(uq); 3697 umtxq_unlock(&uq->uq_key); 3698 rv = fueword32(&sem->_count, &count); 3699 if (rv == -1) { 3700 umtxq_lock(&uq->uq_key); 3701 umtxq_unbusy(&uq->uq_key); 3702 umtxq_remove(uq); 3703 umtxq_unlock(&uq->uq_key); 3704 umtx_key_release(&uq->uq_key); 3705 return (EFAULT); 3706 } 3707 for (;;) { 3708 if (USEM_COUNT(count) != 0) { 3709 umtxq_lock(&uq->uq_key); 3710 umtxq_unbusy(&uq->uq_key); 3711 umtxq_remove(uq); 3712 umtxq_unlock(&uq->uq_key); 3713 umtx_key_release(&uq->uq_key); 3714 return (0); 3715 } 3716 if (count == USEM_HAS_WAITERS) 3717 break; 3718 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3719 if (rv == 0) 3720 break; 3721 umtxq_lock(&uq->uq_key); 3722 umtxq_unbusy(&uq->uq_key); 3723 umtxq_remove(uq); 3724 umtxq_unlock(&uq->uq_key); 3725 umtx_key_release(&uq->uq_key); 3726 if (rv == -1) 3727 return (EFAULT); 3728 rv = thread_check_susp(td, true); 3729 if (rv != 0) 3730 return (rv); 3731 goto again; 3732 } 3733 umtxq_lock(&uq->uq_key); 3734 umtxq_unbusy(&uq->uq_key); 3735 3736 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3737 3738 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3739 error = 0; 3740 else { 3741 umtxq_remove(uq); 3742 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3743 /* A relative timeout cannot be restarted. */ 3744 if (error == ERESTART) 3745 error = EINTR; 3746 if (error == EINTR) { 3747 kern_clock_gettime(curthread, timo.clockid, 3748 &timo.cur); 3749 timespecsub(&timo.end, &timo.cur, 3750 &timeout->_timeout); 3751 } 3752 } 3753 } 3754 umtxq_unlock(&uq->uq_key); 3755 umtx_key_release(&uq->uq_key); 3756 return (error); 3757 } 3758 3759 /* 3760 * Signal a userland semaphore. 3761 */ 3762 static int 3763 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3764 { 3765 struct umtx_key key; 3766 int error, cnt, rv; 3767 uint32_t count, flags; 3768 3769 rv = fueword32(&sem->_flags, &flags); 3770 if (rv == -1) 3771 return (EFAULT); 3772 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3773 return (error); 3774 umtxq_lock(&key); 3775 umtxq_busy(&key); 3776 cnt = umtxq_count(&key); 3777 if (cnt > 0) { 3778 /* 3779 * If this was the last sleeping thread, clear the waiters 3780 * flag in _count. 3781 */ 3782 if (cnt == 1) { 3783 umtxq_unlock(&key); 3784 rv = fueword32(&sem->_count, &count); 3785 while (rv != -1 && count & USEM_HAS_WAITERS) { 3786 rv = casueword32(&sem->_count, count, &count, 3787 count & ~USEM_HAS_WAITERS); 3788 if (rv == 1) { 3789 rv = thread_check_susp(td, true); 3790 if (rv != 0) 3791 break; 3792 } 3793 } 3794 if (rv == -1) 3795 error = EFAULT; 3796 else if (rv > 0) { 3797 error = rv; 3798 } 3799 umtxq_lock(&key); 3800 } 3801 3802 umtxq_signal(&key, 1); 3803 } 3804 umtxq_unbusy(&key); 3805 umtxq_unlock(&key); 3806 umtx_key_release(&key); 3807 return (error); 3808 } 3809 3810 #ifdef COMPAT_FREEBSD10 3811 int 3812 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3813 { 3814 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3815 } 3816 3817 int 3818 freebsd10__umtx_unlock(struct thread *td, 3819 struct freebsd10__umtx_unlock_args *uap) 3820 { 3821 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3822 } 3823 #endif 3824 3825 inline int 3826 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3827 { 3828 int error; 3829 3830 error = copyin(uaddr, tsp, sizeof(*tsp)); 3831 if (error == 0) { 3832 if (!timespecvalid_interval(tsp)) 3833 error = EINVAL; 3834 } 3835 return (error); 3836 } 3837 3838 static inline int 3839 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3840 { 3841 int error; 3842 3843 if (size <= sizeof(tp->_timeout)) { 3844 tp->_clockid = CLOCK_REALTIME; 3845 tp->_flags = 0; 3846 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3847 } else 3848 error = copyin(uaddr, tp, sizeof(*tp)); 3849 if (error != 0) 3850 return (error); 3851 if (!timespecvalid_interval(&tp->_timeout)) 3852 return (EINVAL); 3853 return (0); 3854 } 3855 3856 static int 3857 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3858 struct umtx_robust_lists_params *rb) 3859 { 3860 3861 if (size > sizeof(*rb)) 3862 return (EINVAL); 3863 return (copyin(uaddr, rb, size)); 3864 } 3865 3866 static int 3867 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3868 { 3869 3870 /* 3871 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3872 * and we're only called if sz >= sizeof(timespec) as supplied in the 3873 * copyops. 3874 */ 3875 KASSERT(sz >= sizeof(*tsp), 3876 ("umtx_copyops specifies incorrect sizes")); 3877 3878 return (copyout(tsp, uaddr, sizeof(*tsp))); 3879 } 3880 3881 #ifdef COMPAT_FREEBSD10 3882 static int 3883 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3884 const struct umtx_copyops *ops) 3885 { 3886 struct timespec *ts, timeout; 3887 int error; 3888 3889 /* Allow a null timespec (wait forever). */ 3890 if (uap->uaddr2 == NULL) 3891 ts = NULL; 3892 else { 3893 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3894 if (error != 0) 3895 return (error); 3896 ts = &timeout; 3897 } 3898 #ifdef COMPAT_FREEBSD32 3899 if (ops->compat32) 3900 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3901 #endif 3902 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3903 } 3904 3905 static int 3906 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3907 const struct umtx_copyops *ops) 3908 { 3909 #ifdef COMPAT_FREEBSD32 3910 if (ops->compat32) 3911 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3912 #endif 3913 return (do_unlock_umtx(td, uap->obj, uap->val)); 3914 } 3915 #endif /* COMPAT_FREEBSD10 */ 3916 3917 #if !defined(COMPAT_FREEBSD10) 3918 static int 3919 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3920 const struct umtx_copyops *ops __unused) 3921 { 3922 return (EOPNOTSUPP); 3923 } 3924 #endif /* COMPAT_FREEBSD10 */ 3925 3926 static int 3927 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3928 const struct umtx_copyops *ops) 3929 { 3930 struct _umtx_time timeout, *tm_p; 3931 int error; 3932 3933 if (uap->uaddr2 == NULL) 3934 tm_p = NULL; 3935 else { 3936 error = ops->copyin_umtx_time( 3937 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3938 if (error != 0) 3939 return (error); 3940 tm_p = &timeout; 3941 } 3942 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3943 } 3944 3945 static int 3946 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3947 const struct umtx_copyops *ops) 3948 { 3949 struct _umtx_time timeout, *tm_p; 3950 int error; 3951 3952 if (uap->uaddr2 == NULL) 3953 tm_p = NULL; 3954 else { 3955 error = ops->copyin_umtx_time( 3956 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3957 if (error != 0) 3958 return (error); 3959 tm_p = &timeout; 3960 } 3961 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3962 } 3963 3964 static int 3965 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3966 const struct umtx_copyops *ops) 3967 { 3968 struct _umtx_time *tm_p, timeout; 3969 int error; 3970 3971 if (uap->uaddr2 == NULL) 3972 tm_p = NULL; 3973 else { 3974 error = ops->copyin_umtx_time( 3975 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3976 if (error != 0) 3977 return (error); 3978 tm_p = &timeout; 3979 } 3980 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3981 } 3982 3983 static int 3984 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3985 const struct umtx_copyops *ops __unused) 3986 { 3987 3988 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3989 } 3990 3991 #define BATCH_SIZE 128 3992 static int 3993 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3994 { 3995 char *uaddrs[BATCH_SIZE], **upp; 3996 int count, error, i, pos, tocopy; 3997 3998 upp = (char **)uap->obj; 3999 error = 0; 4000 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4001 pos += tocopy) { 4002 tocopy = MIN(count, BATCH_SIZE); 4003 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 4004 if (error != 0) 4005 break; 4006 for (i = 0; i < tocopy; ++i) { 4007 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 4008 } 4009 maybe_yield(); 4010 } 4011 return (error); 4012 } 4013 4014 static int 4015 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4016 { 4017 uint32_t uaddrs[BATCH_SIZE], *upp; 4018 int count, error, i, pos, tocopy; 4019 4020 upp = (uint32_t *)uap->obj; 4021 error = 0; 4022 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4023 pos += tocopy) { 4024 tocopy = MIN(count, BATCH_SIZE); 4025 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4026 if (error != 0) 4027 break; 4028 for (i = 0; i < tocopy; ++i) { 4029 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 4030 INT_MAX, 1); 4031 } 4032 maybe_yield(); 4033 } 4034 return (error); 4035 } 4036 4037 static int 4038 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4039 const struct umtx_copyops *ops) 4040 { 4041 4042 if (ops->compat32) 4043 return (__umtx_op_nwake_private_compat32(td, uap)); 4044 return (__umtx_op_nwake_private_native(td, uap)); 4045 } 4046 4047 static int 4048 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4049 const struct umtx_copyops *ops __unused) 4050 { 4051 4052 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4053 } 4054 4055 static int 4056 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4057 const struct umtx_copyops *ops) 4058 { 4059 struct _umtx_time *tm_p, timeout; 4060 int error; 4061 4062 /* Allow a null timespec (wait forever). */ 4063 if (uap->uaddr2 == NULL) 4064 tm_p = NULL; 4065 else { 4066 error = ops->copyin_umtx_time( 4067 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4068 if (error != 0) 4069 return (error); 4070 tm_p = &timeout; 4071 } 4072 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4073 } 4074 4075 static int 4076 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4077 const struct umtx_copyops *ops __unused) 4078 { 4079 4080 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4081 } 4082 4083 static int 4084 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4085 const struct umtx_copyops *ops) 4086 { 4087 struct _umtx_time *tm_p, timeout; 4088 int error; 4089 4090 /* Allow a null timespec (wait forever). */ 4091 if (uap->uaddr2 == NULL) 4092 tm_p = NULL; 4093 else { 4094 error = ops->copyin_umtx_time( 4095 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4096 if (error != 0) 4097 return (error); 4098 tm_p = &timeout; 4099 } 4100 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4101 } 4102 4103 static int 4104 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4105 const struct umtx_copyops *ops __unused) 4106 { 4107 4108 return (do_wake_umutex(td, uap->obj)); 4109 } 4110 4111 static int 4112 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4113 const struct umtx_copyops *ops __unused) 4114 { 4115 4116 return (do_unlock_umutex(td, uap->obj, false)); 4117 } 4118 4119 static int 4120 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4121 const struct umtx_copyops *ops __unused) 4122 { 4123 4124 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4125 } 4126 4127 static int 4128 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4129 const struct umtx_copyops *ops) 4130 { 4131 struct timespec *ts, timeout; 4132 int error; 4133 4134 /* Allow a null timespec (wait forever). */ 4135 if (uap->uaddr2 == NULL) 4136 ts = NULL; 4137 else { 4138 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4139 if (error != 0) 4140 return (error); 4141 ts = &timeout; 4142 } 4143 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4144 } 4145 4146 static int 4147 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4148 const struct umtx_copyops *ops __unused) 4149 { 4150 4151 return (do_cv_signal(td, uap->obj)); 4152 } 4153 4154 static int 4155 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4156 const struct umtx_copyops *ops __unused) 4157 { 4158 4159 return (do_cv_broadcast(td, uap->obj)); 4160 } 4161 4162 static int 4163 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4164 const struct umtx_copyops *ops) 4165 { 4166 struct _umtx_time timeout; 4167 int error; 4168 4169 /* Allow a null timespec (wait forever). */ 4170 if (uap->uaddr2 == NULL) { 4171 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4172 } else { 4173 error = ops->copyin_umtx_time(uap->uaddr2, 4174 (size_t)uap->uaddr1, &timeout); 4175 if (error != 0) 4176 return (error); 4177 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4178 } 4179 return (error); 4180 } 4181 4182 static int 4183 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4184 const struct umtx_copyops *ops) 4185 { 4186 struct _umtx_time timeout; 4187 int error; 4188 4189 /* Allow a null timespec (wait forever). */ 4190 if (uap->uaddr2 == NULL) { 4191 error = do_rw_wrlock(td, uap->obj, 0); 4192 } else { 4193 error = ops->copyin_umtx_time(uap->uaddr2, 4194 (size_t)uap->uaddr1, &timeout); 4195 if (error != 0) 4196 return (error); 4197 4198 error = do_rw_wrlock(td, uap->obj, &timeout); 4199 } 4200 return (error); 4201 } 4202 4203 static int 4204 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4205 const struct umtx_copyops *ops __unused) 4206 { 4207 4208 return (do_rw_unlock(td, uap->obj)); 4209 } 4210 4211 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4212 static int 4213 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4214 const struct umtx_copyops *ops) 4215 { 4216 struct _umtx_time *tm_p, timeout; 4217 int error; 4218 4219 /* Allow a null timespec (wait forever). */ 4220 if (uap->uaddr2 == NULL) 4221 tm_p = NULL; 4222 else { 4223 error = ops->copyin_umtx_time( 4224 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4225 if (error != 0) 4226 return (error); 4227 tm_p = &timeout; 4228 } 4229 return (do_sem_wait(td, uap->obj, tm_p)); 4230 } 4231 4232 static int 4233 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4234 const struct umtx_copyops *ops __unused) 4235 { 4236 4237 return (do_sem_wake(td, uap->obj)); 4238 } 4239 #endif 4240 4241 static int 4242 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4243 const struct umtx_copyops *ops __unused) 4244 { 4245 4246 return (do_wake2_umutex(td, uap->obj, uap->val)); 4247 } 4248 4249 static int 4250 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4251 const struct umtx_copyops *ops) 4252 { 4253 struct _umtx_time *tm_p, timeout; 4254 size_t uasize; 4255 int error; 4256 4257 /* Allow a null timespec (wait forever). */ 4258 if (uap->uaddr2 == NULL) { 4259 uasize = 0; 4260 tm_p = NULL; 4261 } else { 4262 uasize = (size_t)uap->uaddr1; 4263 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4264 if (error != 0) 4265 return (error); 4266 tm_p = &timeout; 4267 } 4268 error = do_sem2_wait(td, uap->obj, tm_p); 4269 if (error == EINTR && uap->uaddr2 != NULL && 4270 (timeout._flags & UMTX_ABSTIME) == 0 && 4271 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4272 error = ops->copyout_timeout( 4273 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4274 uasize - ops->umtx_time_sz, &timeout._timeout); 4275 if (error == 0) { 4276 error = EINTR; 4277 } 4278 } 4279 4280 return (error); 4281 } 4282 4283 static int 4284 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4285 const struct umtx_copyops *ops __unused) 4286 { 4287 4288 return (do_sem2_wake(td, uap->obj)); 4289 } 4290 4291 #define USHM_OBJ_UMTX(o) \ 4292 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4293 4294 #define USHMF_LINKED 0x0001 4295 struct umtx_shm_reg { 4296 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4297 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4298 struct umtx_key ushm_key; 4299 struct ucred *ushm_cred; 4300 struct shmfd *ushm_obj; 4301 u_int ushm_refcnt; 4302 u_int ushm_flags; 4303 }; 4304 4305 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4306 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4307 4308 static uma_zone_t umtx_shm_reg_zone; 4309 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4310 static struct mtx umtx_shm_lock; 4311 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4312 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4313 4314 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4315 4316 static void 4317 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4318 { 4319 struct umtx_shm_reg_head d; 4320 struct umtx_shm_reg *reg, *reg1; 4321 4322 TAILQ_INIT(&d); 4323 mtx_lock(&umtx_shm_lock); 4324 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4325 mtx_unlock(&umtx_shm_lock); 4326 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4327 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4328 umtx_shm_free_reg(reg); 4329 } 4330 } 4331 4332 static struct task umtx_shm_reg_delfree_task = 4333 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4334 4335 /* 4336 * Returns 0 if a SHM with the passed key is found in the registry, in which 4337 * case it is returned through 'oreg'. Otherwise, returns an error among ESRCH 4338 * (no corresponding SHM; ESRCH was chosen for compatibility, ENOENT would have 4339 * been preferable) or EOVERFLOW (there is a corresponding SHM, but reference 4340 * count would overflow, so can't return it), in which case '*oreg' is left 4341 * unchanged. 4342 */ 4343 static int 4344 umtx_shm_find_reg_locked(const struct umtx_key *key, 4345 struct umtx_shm_reg **const oreg) 4346 { 4347 struct umtx_shm_reg *reg; 4348 struct umtx_shm_reg_head *reg_head; 4349 4350 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4351 mtx_assert(&umtx_shm_lock, MA_OWNED); 4352 reg_head = &umtx_shm_registry[key->hash]; 4353 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4354 KASSERT(reg->ushm_key.shared, 4355 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4356 if (reg->ushm_key.info.shared.object == 4357 key->info.shared.object && 4358 reg->ushm_key.info.shared.offset == 4359 key->info.shared.offset) { 4360 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4361 KASSERT(reg->ushm_refcnt != 0, 4362 ("reg %p refcnt 0 onlist", reg)); 4363 KASSERT((reg->ushm_flags & USHMF_LINKED) != 0, 4364 ("reg %p not linked", reg)); 4365 /* 4366 * Don't let overflow happen, just deny a new reference 4367 * (this is additional protection against some reference 4368 * count leak, which is known not to be the case at the 4369 * time of this writing). 4370 */ 4371 if (__predict_false(reg->ushm_refcnt == UINT_MAX)) 4372 return (EOVERFLOW); 4373 reg->ushm_refcnt++; 4374 *oreg = reg; 4375 return (0); 4376 } 4377 } 4378 return (ESRCH); 4379 } 4380 4381 /* 4382 * Calls umtx_shm_find_reg_unlocked() under the 'umtx_shm_lock'. 4383 */ 4384 static int 4385 umtx_shm_find_reg(const struct umtx_key *key, struct umtx_shm_reg **const oreg) 4386 { 4387 int error; 4388 4389 mtx_lock(&umtx_shm_lock); 4390 error = umtx_shm_find_reg_locked(key, oreg); 4391 mtx_unlock(&umtx_shm_lock); 4392 return (error); 4393 } 4394 4395 static void 4396 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4397 { 4398 4399 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4400 crfree(reg->ushm_cred); 4401 shm_drop(reg->ushm_obj); 4402 uma_zfree(umtx_shm_reg_zone, reg); 4403 } 4404 4405 static bool 4406 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool linked_ref) 4407 { 4408 mtx_assert(&umtx_shm_lock, MA_OWNED); 4409 KASSERT(reg->ushm_refcnt != 0, ("ushm_reg %p refcnt 0", reg)); 4410 4411 if (linked_ref) { 4412 if ((reg->ushm_flags & USHMF_LINKED) == 0) 4413 /* 4414 * The reference tied to USHMF_LINKED has already been 4415 * released concurrently. 4416 */ 4417 return (false); 4418 4419 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], reg, 4420 ushm_reg_link); 4421 LIST_REMOVE(reg, ushm_obj_link); 4422 reg->ushm_flags &= ~USHMF_LINKED; 4423 } 4424 4425 reg->ushm_refcnt--; 4426 return (reg->ushm_refcnt == 0); 4427 } 4428 4429 static void 4430 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool linked_ref) 4431 { 4432 vm_object_t object; 4433 bool dofree; 4434 4435 if (linked_ref) { 4436 /* 4437 * Note: This may be executed multiple times on the same 4438 * shared-memory VM object in presence of concurrent callers 4439 * because 'umtx_shm_lock' is not held all along in umtx_shm() 4440 * and here. 4441 */ 4442 object = reg->ushm_obj->shm_object; 4443 VM_OBJECT_WLOCK(object); 4444 vm_object_set_flag(object, OBJ_UMTXDEAD); 4445 VM_OBJECT_WUNLOCK(object); 4446 } 4447 mtx_lock(&umtx_shm_lock); 4448 dofree = umtx_shm_unref_reg_locked(reg, linked_ref); 4449 mtx_unlock(&umtx_shm_lock); 4450 if (dofree) 4451 umtx_shm_free_reg(reg); 4452 } 4453 4454 void 4455 umtx_shm_object_init(vm_object_t object) 4456 { 4457 4458 LIST_INIT(USHM_OBJ_UMTX(object)); 4459 } 4460 4461 void 4462 umtx_shm_object_terminated(vm_object_t object) 4463 { 4464 struct umtx_shm_reg *reg, *reg1; 4465 bool dofree; 4466 4467 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4468 return; 4469 4470 dofree = false; 4471 mtx_lock(&umtx_shm_lock); 4472 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4473 if (umtx_shm_unref_reg_locked(reg, true)) { 4474 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4475 ushm_reg_link); 4476 dofree = true; 4477 } 4478 } 4479 mtx_unlock(&umtx_shm_lock); 4480 if (dofree) 4481 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4482 } 4483 4484 static int 4485 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4486 struct umtx_shm_reg **res) 4487 { 4488 struct shmfd *shm; 4489 struct umtx_shm_reg *reg, *reg1; 4490 struct ucred *cred; 4491 int error; 4492 4493 error = umtx_shm_find_reg(key, res); 4494 if (error != ESRCH) { 4495 /* 4496 * Either no error occured, and '*res' was filled, or EOVERFLOW 4497 * was returned, indicating a reference count limit, and we 4498 * won't create a duplicate registration. In both cases, we are 4499 * done. 4500 */ 4501 return (error); 4502 } 4503 /* No entry, we will create one. */ 4504 4505 cred = td->td_ucred; 4506 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4507 return (ENOMEM); 4508 shm = shm_alloc(td->td_ucred, O_RDWR, false); 4509 if (shm == NULL) { 4510 chgumtxcnt(cred->cr_ruidinfo, -1, 0); 4511 return (ENOMEM); 4512 } 4513 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4514 bcopy(key, ®->ushm_key, sizeof(*key)); 4515 reg->ushm_obj = shm; 4516 reg->ushm_cred = crhold(cred); 4517 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4518 if (error != 0) { 4519 umtx_shm_free_reg(reg); 4520 return (error); 4521 } 4522 mtx_lock(&umtx_shm_lock); 4523 /* Re-lookup as 'umtx_shm_lock' has been temporarily released. */ 4524 error = umtx_shm_find_reg_locked(key, ®1); 4525 switch (error) { 4526 case 0: 4527 mtx_unlock(&umtx_shm_lock); 4528 umtx_shm_free_reg(reg); 4529 *res = reg1; 4530 return (0); 4531 case ESRCH: 4532 break; 4533 default: 4534 mtx_unlock(&umtx_shm_lock); 4535 umtx_shm_free_reg(reg); 4536 return (error); 4537 } 4538 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4539 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4540 ushm_obj_link); 4541 reg->ushm_flags = USHMF_LINKED; 4542 /* 4543 * This is one reference for the registry and the list of shared 4544 * mutexes referenced by the VM object containing the lock pointer, and 4545 * another for the caller, which it will free after use. So, one of 4546 * these is tied to the presence of USHMF_LINKED. 4547 */ 4548 reg->ushm_refcnt = 2; 4549 mtx_unlock(&umtx_shm_lock); 4550 *res = reg; 4551 return (0); 4552 } 4553 4554 static int 4555 umtx_shm_alive(struct thread *td, void *addr) 4556 { 4557 vm_map_t map; 4558 vm_map_entry_t entry; 4559 vm_object_t object; 4560 vm_pindex_t pindex; 4561 vm_prot_t prot; 4562 int res, ret; 4563 boolean_t wired; 4564 4565 map = &td->td_proc->p_vmspace->vm_map; 4566 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4567 &object, &pindex, &prot, &wired); 4568 if (res != KERN_SUCCESS) 4569 return (EFAULT); 4570 if (object == NULL) 4571 ret = EINVAL; 4572 else 4573 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4574 vm_map_lookup_done(map, entry); 4575 return (ret); 4576 } 4577 4578 static void 4579 umtx_shm_init(void) 4580 { 4581 int i; 4582 4583 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4584 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4585 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4586 for (i = 0; i < nitems(umtx_shm_registry); i++) 4587 TAILQ_INIT(&umtx_shm_registry[i]); 4588 } 4589 4590 static int 4591 umtx_shm(struct thread *td, void *addr, u_int flags) 4592 { 4593 struct umtx_key key; 4594 struct umtx_shm_reg *reg; 4595 struct file *fp; 4596 int error, fd; 4597 4598 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4599 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4600 return (EINVAL); 4601 if ((flags & UMTX_SHM_ALIVE) != 0) 4602 return (umtx_shm_alive(td, addr)); 4603 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4604 if (error != 0) 4605 return (error); 4606 KASSERT(key.shared == 1, ("non-shared key")); 4607 error = (flags & UMTX_SHM_CREAT) != 0 ? 4608 umtx_shm_create_reg(td, &key, ®) : 4609 umtx_shm_find_reg(&key, ®); 4610 umtx_key_release(&key); 4611 if (error != 0) 4612 return (error); 4613 KASSERT(reg != NULL, ("no reg")); 4614 if ((flags & UMTX_SHM_DESTROY) != 0) { 4615 umtx_shm_unref_reg(reg, true); 4616 } else { 4617 #if 0 4618 #ifdef MAC 4619 error = mac_posixshm_check_open(td->td_ucred, 4620 reg->ushm_obj, FFLAGS(O_RDWR)); 4621 if (error == 0) 4622 #endif 4623 error = shm_access(reg->ushm_obj, td->td_ucred, 4624 FFLAGS(O_RDWR)); 4625 if (error == 0) 4626 #endif 4627 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4628 if (error == 0) { 4629 shm_hold(reg->ushm_obj); 4630 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4631 &shm_ops); 4632 td->td_retval[0] = fd; 4633 fdrop(fp, td); 4634 } 4635 } 4636 umtx_shm_unref_reg(reg, false); 4637 return (error); 4638 } 4639 4640 static int 4641 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4642 const struct umtx_copyops *ops __unused) 4643 { 4644 4645 return (umtx_shm(td, uap->uaddr1, uap->val)); 4646 } 4647 4648 static int 4649 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4650 const struct umtx_copyops *ops) 4651 { 4652 struct umtx_robust_lists_params rb; 4653 int error; 4654 4655 if (ops->compat32) { 4656 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4657 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4658 td->td_rb_inact != 0)) 4659 return (EBUSY); 4660 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4661 return (EBUSY); 4662 } 4663 4664 bzero(&rb, sizeof(rb)); 4665 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4666 if (error != 0) 4667 return (error); 4668 4669 if (ops->compat32) 4670 td->td_pflags2 |= TDP2_COMPAT32RB; 4671 4672 td->td_rb_list = rb.robust_list_offset; 4673 td->td_rbp_list = rb.robust_priv_list_offset; 4674 td->td_rb_inact = rb.robust_inact_offset; 4675 return (0); 4676 } 4677 4678 static int 4679 __umtx_op_get_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4680 const struct umtx_copyops *ops) 4681 { 4682 long val; 4683 int error, val1; 4684 4685 val = sbttons(td->td_proc->p_umtx_min_timeout); 4686 if (ops->compat32) { 4687 val1 = (int)val; 4688 error = copyout(&val1, uap->uaddr1, sizeof(val1)); 4689 } else { 4690 error = copyout(&val, uap->uaddr1, sizeof(val)); 4691 } 4692 return (error); 4693 } 4694 4695 static int 4696 __umtx_op_set_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4697 const struct umtx_copyops *ops) 4698 { 4699 if (uap->val < 0) 4700 return (EINVAL); 4701 td->td_proc->p_umtx_min_timeout = nstosbt(uap->val); 4702 return (0); 4703 } 4704 4705 #if defined(__i386__) || defined(__amd64__) 4706 /* 4707 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4708 * 32-bit time_t there. Other architectures just need the i386 definitions 4709 * along with their standard compat32. 4710 */ 4711 struct timespecx32 { 4712 int64_t tv_sec; 4713 int32_t tv_nsec; 4714 }; 4715 4716 struct umtx_timex32 { 4717 struct timespecx32 _timeout; 4718 uint32_t _flags; 4719 uint32_t _clockid; 4720 }; 4721 4722 #ifndef __i386__ 4723 #define timespeci386 timespec32 4724 #define umtx_timei386 umtx_time32 4725 #endif 4726 #else /* !__i386__ && !__amd64__ */ 4727 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4728 struct timespeci386 { 4729 int32_t tv_sec; 4730 int32_t tv_nsec; 4731 }; 4732 4733 struct umtx_timei386 { 4734 struct timespeci386 _timeout; 4735 uint32_t _flags; 4736 uint32_t _clockid; 4737 }; 4738 4739 #if defined(__LP64__) 4740 #define timespecx32 timespec32 4741 #define umtx_timex32 umtx_time32 4742 #endif 4743 #endif 4744 4745 static int 4746 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4747 struct umtx_robust_lists_params *rbp) 4748 { 4749 struct umtx_robust_lists_params_compat32 rb32; 4750 int error; 4751 4752 if (size > sizeof(rb32)) 4753 return (EINVAL); 4754 bzero(&rb32, sizeof(rb32)); 4755 error = copyin(uaddr, &rb32, size); 4756 if (error != 0) 4757 return (error); 4758 CP(rb32, *rbp, robust_list_offset); 4759 CP(rb32, *rbp, robust_priv_list_offset); 4760 CP(rb32, *rbp, robust_inact_offset); 4761 return (0); 4762 } 4763 4764 #ifndef __i386__ 4765 static inline int 4766 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4767 { 4768 struct timespeci386 ts32; 4769 int error; 4770 4771 error = copyin(uaddr, &ts32, sizeof(ts32)); 4772 if (error == 0) { 4773 if (!timespecvalid_interval(&ts32)) 4774 error = EINVAL; 4775 else { 4776 CP(ts32, *tsp, tv_sec); 4777 CP(ts32, *tsp, tv_nsec); 4778 } 4779 } 4780 return (error); 4781 } 4782 4783 static inline int 4784 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4785 { 4786 struct umtx_timei386 t32; 4787 int error; 4788 4789 t32._clockid = CLOCK_REALTIME; 4790 t32._flags = 0; 4791 if (size <= sizeof(t32._timeout)) 4792 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4793 else 4794 error = copyin(uaddr, &t32, sizeof(t32)); 4795 if (error != 0) 4796 return (error); 4797 if (!timespecvalid_interval(&t32._timeout)) 4798 return (EINVAL); 4799 TS_CP(t32, *tp, _timeout); 4800 CP(t32, *tp, _flags); 4801 CP(t32, *tp, _clockid); 4802 return (0); 4803 } 4804 4805 static int 4806 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4807 { 4808 struct timespeci386 remain32 = { 4809 .tv_sec = tsp->tv_sec, 4810 .tv_nsec = tsp->tv_nsec, 4811 }; 4812 4813 /* 4814 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4815 * and we're only called if sz >= sizeof(timespec) as supplied in the 4816 * copyops. 4817 */ 4818 KASSERT(sz >= sizeof(remain32), 4819 ("umtx_copyops specifies incorrect sizes")); 4820 4821 return (copyout(&remain32, uaddr, sizeof(remain32))); 4822 } 4823 #endif /* !__i386__ */ 4824 4825 #if defined(__i386__) || defined(__LP64__) 4826 static inline int 4827 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4828 { 4829 struct timespecx32 ts32; 4830 int error; 4831 4832 error = copyin(uaddr, &ts32, sizeof(ts32)); 4833 if (error == 0) { 4834 if (!timespecvalid_interval(&ts32)) 4835 error = EINVAL; 4836 else { 4837 CP(ts32, *tsp, tv_sec); 4838 CP(ts32, *tsp, tv_nsec); 4839 } 4840 } 4841 return (error); 4842 } 4843 4844 static inline int 4845 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4846 { 4847 struct umtx_timex32 t32; 4848 int error; 4849 4850 t32._clockid = CLOCK_REALTIME; 4851 t32._flags = 0; 4852 if (size <= sizeof(t32._timeout)) 4853 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4854 else 4855 error = copyin(uaddr, &t32, sizeof(t32)); 4856 if (error != 0) 4857 return (error); 4858 if (!timespecvalid_interval(&t32._timeout)) 4859 return (EINVAL); 4860 TS_CP(t32, *tp, _timeout); 4861 CP(t32, *tp, _flags); 4862 CP(t32, *tp, _clockid); 4863 return (0); 4864 } 4865 4866 static int 4867 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4868 { 4869 struct timespecx32 remain32 = { 4870 .tv_sec = tsp->tv_sec, 4871 .tv_nsec = tsp->tv_nsec, 4872 }; 4873 4874 /* 4875 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4876 * and we're only called if sz >= sizeof(timespec) as supplied in the 4877 * copyops. 4878 */ 4879 KASSERT(sz >= sizeof(remain32), 4880 ("umtx_copyops specifies incorrect sizes")); 4881 4882 return (copyout(&remain32, uaddr, sizeof(remain32))); 4883 } 4884 #endif /* __i386__ || __LP64__ */ 4885 4886 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4887 const struct umtx_copyops *umtx_ops); 4888 4889 static const _umtx_op_func op_table[] = { 4890 #ifdef COMPAT_FREEBSD10 4891 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4892 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4893 #else 4894 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4895 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4896 #endif 4897 [UMTX_OP_WAIT] = __umtx_op_wait, 4898 [UMTX_OP_WAKE] = __umtx_op_wake, 4899 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4900 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4901 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4902 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4903 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4904 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4905 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4906 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4907 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4908 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4909 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4910 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4911 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4912 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4913 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4914 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4915 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4916 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4917 #else 4918 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4919 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4920 #endif 4921 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4922 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4923 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4924 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4925 [UMTX_OP_SHM] = __umtx_op_shm, 4926 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4927 [UMTX_OP_GET_MIN_TIMEOUT] = __umtx_op_get_min_timeout, 4928 [UMTX_OP_SET_MIN_TIMEOUT] = __umtx_op_set_min_timeout, 4929 }; 4930 4931 static const struct umtx_copyops umtx_native_ops = { 4932 .copyin_timeout = umtx_copyin_timeout, 4933 .copyin_umtx_time = umtx_copyin_umtx_time, 4934 .copyin_robust_lists = umtx_copyin_robust_lists, 4935 .copyout_timeout = umtx_copyout_timeout, 4936 .timespec_sz = sizeof(struct timespec), 4937 .umtx_time_sz = sizeof(struct _umtx_time), 4938 }; 4939 4940 #ifndef __i386__ 4941 static const struct umtx_copyops umtx_native_opsi386 = { 4942 .copyin_timeout = umtx_copyin_timeouti386, 4943 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4944 .copyin_robust_lists = umtx_copyin_robust_lists32, 4945 .copyout_timeout = umtx_copyout_timeouti386, 4946 .timespec_sz = sizeof(struct timespeci386), 4947 .umtx_time_sz = sizeof(struct umtx_timei386), 4948 .compat32 = true, 4949 }; 4950 #endif 4951 4952 #if defined(__i386__) || defined(__LP64__) 4953 /* i386 can emulate other 32-bit archs, too! */ 4954 static const struct umtx_copyops umtx_native_opsx32 = { 4955 .copyin_timeout = umtx_copyin_timeoutx32, 4956 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4957 .copyin_robust_lists = umtx_copyin_robust_lists32, 4958 .copyout_timeout = umtx_copyout_timeoutx32, 4959 .timespec_sz = sizeof(struct timespecx32), 4960 .umtx_time_sz = sizeof(struct umtx_timex32), 4961 .compat32 = true, 4962 }; 4963 4964 #ifdef COMPAT_FREEBSD32 4965 #ifdef __amd64__ 4966 #define umtx_native_ops32 umtx_native_opsi386 4967 #else 4968 #define umtx_native_ops32 umtx_native_opsx32 4969 #endif 4970 #endif /* COMPAT_FREEBSD32 */ 4971 #endif /* __i386__ || __LP64__ */ 4972 4973 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4974 4975 static int 4976 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4977 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4978 { 4979 struct _umtx_op_args uap = { 4980 .obj = obj, 4981 .op = op & ~UMTX_OP__FLAGS, 4982 .val = val, 4983 .uaddr1 = uaddr1, 4984 .uaddr2 = uaddr2 4985 }; 4986 4987 if ((uap.op >= nitems(op_table))) 4988 return (EINVAL); 4989 return ((*op_table[uap.op])(td, &uap, ops)); 4990 } 4991 4992 int 4993 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4994 { 4995 static const struct umtx_copyops *umtx_ops; 4996 4997 umtx_ops = &umtx_native_ops; 4998 #ifdef __LP64__ 4999 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 5000 if ((uap->op & UMTX_OP__I386) != 0) 5001 umtx_ops = &umtx_native_opsi386; 5002 else 5003 umtx_ops = &umtx_native_opsx32; 5004 } 5005 #elif !defined(__i386__) 5006 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 5007 if ((uap->op & UMTX_OP__I386) != 0) 5008 umtx_ops = &umtx_native_opsi386; 5009 #else 5010 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 5011 if ((uap->op & UMTX_OP__32BIT) != 0) 5012 umtx_ops = &umtx_native_opsx32; 5013 #endif 5014 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5015 uap->uaddr2, umtx_ops)); 5016 } 5017 5018 #ifdef COMPAT_FREEBSD32 5019 #ifdef COMPAT_FREEBSD10 5020 int 5021 freebsd10_freebsd32__umtx_lock(struct thread *td, 5022 struct freebsd10_freebsd32__umtx_lock_args *uap) 5023 { 5024 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 5025 } 5026 5027 int 5028 freebsd10_freebsd32__umtx_unlock(struct thread *td, 5029 struct freebsd10_freebsd32__umtx_unlock_args *uap) 5030 { 5031 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 5032 } 5033 #endif /* COMPAT_FREEBSD10 */ 5034 5035 int 5036 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 5037 { 5038 5039 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 5040 uap->uaddr2, &umtx_native_ops32)); 5041 } 5042 #endif /* COMPAT_FREEBSD32 */ 5043 5044 void 5045 umtx_thread_init(struct thread *td) 5046 { 5047 5048 td->td_umtxq = umtxq_alloc(); 5049 td->td_umtxq->uq_thread = td; 5050 } 5051 5052 void 5053 umtx_thread_fini(struct thread *td) 5054 { 5055 5056 umtxq_free(td->td_umtxq); 5057 } 5058 5059 /* 5060 * It will be called when new thread is created, e.g fork(). 5061 */ 5062 void 5063 umtx_thread_alloc(struct thread *td) 5064 { 5065 struct umtx_q *uq; 5066 5067 uq = td->td_umtxq; 5068 uq->uq_inherited_pri = PRI_MAX; 5069 5070 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 5071 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 5072 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 5073 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 5074 } 5075 5076 /* 5077 * exec() hook. 5078 * 5079 * Clear robust lists for all process' threads, not delaying the 5080 * cleanup to thread exit, since the relevant address space is 5081 * destroyed right now. 5082 */ 5083 void 5084 umtx_exec(struct proc *p) 5085 { 5086 struct thread *td; 5087 5088 KASSERT(p == curproc, ("need curproc")); 5089 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 5090 (p->p_flag & P_STOPPED_SINGLE) != 0, 5091 ("curproc must be single-threaded")); 5092 /* 5093 * There is no need to lock the list as only this thread can be 5094 * running. 5095 */ 5096 FOREACH_THREAD_IN_PROC(p, td) { 5097 KASSERT(td == curthread || 5098 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 5099 ("running thread %p %p", p, td)); 5100 umtx_thread_cleanup(td); 5101 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 5102 } 5103 5104 p->p_umtx_min_timeout = 0; 5105 } 5106 5107 /* 5108 * thread exit hook. 5109 */ 5110 void 5111 umtx_thread_exit(struct thread *td) 5112 { 5113 5114 umtx_thread_cleanup(td); 5115 } 5116 5117 static int 5118 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5119 { 5120 u_long res1; 5121 uint32_t res32; 5122 int error; 5123 5124 if (compat32) { 5125 error = fueword32((void *)ptr, &res32); 5126 if (error == 0) 5127 res1 = res32; 5128 } else { 5129 error = fueword((void *)ptr, &res1); 5130 } 5131 if (error == 0) 5132 *res = res1; 5133 else 5134 error = EFAULT; 5135 return (error); 5136 } 5137 5138 static void 5139 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5140 bool compat32) 5141 { 5142 struct umutex32 m32; 5143 5144 if (compat32) { 5145 memcpy(&m32, m, sizeof(m32)); 5146 *rb_list = m32.m_rb_lnk; 5147 } else { 5148 *rb_list = m->m_rb_lnk; 5149 } 5150 } 5151 5152 static int 5153 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5154 bool compat32) 5155 { 5156 struct umutex m; 5157 int error; 5158 5159 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5160 error = copyin((void *)rbp, &m, sizeof(m)); 5161 if (error != 0) 5162 return (error); 5163 if (rb_list != NULL) 5164 umtx_read_rb_list(td, &m, rb_list, compat32); 5165 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5166 return (EINVAL); 5167 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5168 /* inact is cleared after unlock, allow the inconsistency */ 5169 return (inact ? 0 : EINVAL); 5170 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5171 } 5172 5173 static void 5174 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5175 const char *name, bool compat32) 5176 { 5177 int error, i; 5178 uintptr_t rbp; 5179 bool inact; 5180 5181 if (rb_list == 0) 5182 return; 5183 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5184 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5185 if (rbp == *rb_inact) { 5186 inact = true; 5187 *rb_inact = 0; 5188 } else 5189 inact = false; 5190 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5191 } 5192 if (i == umtx_max_rb && umtx_verbose_rb) { 5193 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5194 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5195 } 5196 if (error != 0 && umtx_verbose_rb) { 5197 uprintf("comm %s pid %d: handling %srb error %d\n", 5198 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5199 } 5200 } 5201 5202 /* 5203 * Clean up umtx data. 5204 */ 5205 static void 5206 umtx_thread_cleanup(struct thread *td) 5207 { 5208 struct umtx_q *uq; 5209 struct umtx_pi *pi; 5210 uintptr_t rb_inact; 5211 bool compat32; 5212 5213 /* 5214 * Disown pi mutexes. 5215 */ 5216 uq = td->td_umtxq; 5217 if (uq != NULL) { 5218 if (uq->uq_inherited_pri != PRI_MAX || 5219 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5220 mtx_lock(&umtx_lock); 5221 uq->uq_inherited_pri = PRI_MAX; 5222 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5223 pi->pi_owner = NULL; 5224 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5225 } 5226 mtx_unlock(&umtx_lock); 5227 } 5228 sched_lend_user_prio_cond(td, PRI_MAX); 5229 } 5230 5231 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5232 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5233 5234 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5235 return; 5236 5237 /* 5238 * Handle terminated robust mutexes. Must be done after 5239 * robust pi disown, otherwise unlock could see unowned 5240 * entries. 5241 */ 5242 rb_inact = td->td_rb_inact; 5243 if (rb_inact != 0) 5244 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5245 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5246 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5247 if (rb_inact != 0) 5248 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5249 } 5250