1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 #include "opt_umtx_profiling.h" 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/fcntl.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/limits.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mman.h> 46 #include <sys/mutex.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/rwlock.h> 52 #include <sys/sbuf.h> 53 #include <sys/sched.h> 54 #include <sys/smp.h> 55 #include <sys/sysctl.h> 56 #include <sys/systm.h> 57 #include <sys/sysproto.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/taskqueue.h> 60 #include <sys/time.h> 61 #include <sys/eventhandler.h> 62 #include <sys/umtx.h> 63 #include <sys/umtxvar.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 73 #include <machine/atomic.h> 74 #include <machine/cpu.h> 75 76 #include <compat/freebsd32/freebsd32.h> 77 #ifdef COMPAT_FREEBSD32 78 #include <compat/freebsd32/freebsd32_proto.h> 79 #endif 80 81 #define _UMUTEX_TRY 1 82 #define _UMUTEX_WAIT 2 83 84 #ifdef UMTX_PROFILING 85 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 86 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 87 #endif 88 89 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 90 #ifdef INVARIANTS 91 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do { \ 92 struct umtxq_chain *uc; \ 93 \ 94 uc = umtxq_getchain(key); \ 95 mtx_assert(&uc->uc_lock, MA_OWNED); \ 96 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); \ 97 } while (0) 98 #else 99 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0) 100 #endif 101 102 /* 103 * Don't propagate time-sharing priority, there is a security reason, 104 * a user can simply introduce PI-mutex, let thread A lock the mutex, 105 * and let another thread B block on the mutex, because B is 106 * sleeping, its priority will be boosted, this causes A's priority to 107 * be boosted via priority propagating too and will never be lowered even 108 * if it is using 100%CPU, this is unfair to other processes. 109 */ 110 111 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 112 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 113 PRI_MAX_TIMESHARE : (td)->td_user_pri) 114 115 #define GOLDEN_RATIO_PRIME 2654404609U 116 #ifndef UMTX_CHAINS 117 #define UMTX_CHAINS 512 118 #endif 119 #define UMTX_SHIFTS (__WORD_BIT - 9) 120 121 #define GET_SHARE(flags) \ 122 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 123 124 #define BUSY_SPINS 200 125 126 struct umtx_copyops { 127 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 128 int (*copyin_umtx_time)(const void *uaddr, size_t size, 129 struct _umtx_time *tp); 130 int (*copyin_robust_lists)(const void *uaddr, size_t size, 131 struct umtx_robust_lists_params *rbp); 132 int (*copyout_timeout)(void *uaddr, size_t size, 133 struct timespec *tsp); 134 const size_t timespec_sz; 135 const size_t umtx_time_sz; 136 const bool compat32; 137 }; 138 139 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 140 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 141 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 142 143 int umtx_shm_vnobj_persistent = 0; 144 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 145 &umtx_shm_vnobj_persistent, 0, 146 "False forces destruction of umtx attached to file, on last close"); 147 static int umtx_max_rb = 1000; 148 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 149 &umtx_max_rb, 0, 150 "Maximum number of robust mutexes allowed for each thread"); 151 152 static uma_zone_t umtx_pi_zone; 153 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 154 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 155 static int umtx_pi_allocated; 156 157 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 158 "umtx debug"); 159 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 160 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 161 static int umtx_verbose_rb = 1; 162 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 163 &umtx_verbose_rb, 0, 164 ""); 165 166 #ifdef UMTX_PROFILING 167 static long max_length; 168 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 169 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 170 "umtx chain stats"); 171 #endif 172 173 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 174 const struct _umtx_time *umtxtime); 175 176 static void umtx_shm_init(void); 177 static void umtxq_sysinit(void *); 178 static void umtxq_hash(struct umtx_key *key); 179 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 180 bool rb); 181 static void umtx_thread_cleanup(struct thread *td); 182 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 183 184 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 185 186 static struct mtx umtx_lock; 187 188 #ifdef UMTX_PROFILING 189 static void 190 umtx_init_profiling(void) 191 { 192 struct sysctl_oid *chain_oid; 193 char chain_name[10]; 194 int i; 195 196 for (i = 0; i < UMTX_CHAINS; ++i) { 197 snprintf(chain_name, sizeof(chain_name), "%d", i); 198 chain_oid = SYSCTL_ADD_NODE(NULL, 199 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 200 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 201 "umtx hash stats"); 202 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 203 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 204 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 205 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 206 } 207 } 208 209 static int 210 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 211 { 212 char buf[512]; 213 struct sbuf sb; 214 struct umtxq_chain *uc; 215 u_int fract, i, j, tot, whole; 216 u_int sf0, sf1, sf2, sf3, sf4; 217 u_int si0, si1, si2, si3, si4; 218 u_int sw0, sw1, sw2, sw3, sw4; 219 220 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 221 for (i = 0; i < 2; i++) { 222 tot = 0; 223 for (j = 0; j < UMTX_CHAINS; ++j) { 224 uc = &umtxq_chains[i][j]; 225 mtx_lock(&uc->uc_lock); 226 tot += uc->max_length; 227 mtx_unlock(&uc->uc_lock); 228 } 229 if (tot == 0) 230 sbuf_printf(&sb, "%u) Empty ", i); 231 else { 232 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 233 si0 = si1 = si2 = si3 = si4 = 0; 234 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 235 for (j = 0; j < UMTX_CHAINS; j++) { 236 uc = &umtxq_chains[i][j]; 237 mtx_lock(&uc->uc_lock); 238 whole = uc->max_length * 100; 239 mtx_unlock(&uc->uc_lock); 240 fract = (whole % tot) * 100; 241 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 242 sf0 = fract; 243 si0 = j; 244 sw0 = whole; 245 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 246 sf1)) { 247 sf1 = fract; 248 si1 = j; 249 sw1 = whole; 250 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 251 sf2)) { 252 sf2 = fract; 253 si2 = j; 254 sw2 = whole; 255 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 256 sf3)) { 257 sf3 = fract; 258 si3 = j; 259 sw3 = whole; 260 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 261 sf4)) { 262 sf4 = fract; 263 si4 = j; 264 sw4 = whole; 265 } 266 } 267 sbuf_printf(&sb, "queue %u:\n", i); 268 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 269 sf0 / tot, si0); 270 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 271 sf1 / tot, si1); 272 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 273 sf2 / tot, si2); 274 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 275 sf3 / tot, si3); 276 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 277 sf4 / tot, si4); 278 } 279 } 280 sbuf_trim(&sb); 281 sbuf_finish(&sb); 282 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 283 sbuf_delete(&sb); 284 return (0); 285 } 286 287 static int 288 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 289 { 290 struct umtxq_chain *uc; 291 u_int i, j; 292 int clear, error; 293 294 clear = 0; 295 error = sysctl_handle_int(oidp, &clear, 0, req); 296 if (error != 0 || req->newptr == NULL) 297 return (error); 298 299 if (clear != 0) { 300 for (i = 0; i < 2; ++i) { 301 for (j = 0; j < UMTX_CHAINS; ++j) { 302 uc = &umtxq_chains[i][j]; 303 mtx_lock(&uc->uc_lock); 304 uc->length = 0; 305 uc->max_length = 0; 306 mtx_unlock(&uc->uc_lock); 307 } 308 } 309 } 310 return (0); 311 } 312 313 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 314 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 315 sysctl_debug_umtx_chains_clear, "I", 316 "Clear umtx chains statistics"); 317 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 318 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 319 sysctl_debug_umtx_chains_peaks, "A", 320 "Highest peaks in chains max length"); 321 #endif 322 323 static void 324 umtxq_sysinit(void *arg __unused) 325 { 326 int i, j; 327 328 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 329 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 330 for (i = 0; i < 2; ++i) { 331 for (j = 0; j < UMTX_CHAINS; ++j) { 332 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 333 MTX_DEF | MTX_DUPOK); 334 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 335 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 336 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 337 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 338 umtxq_chains[i][j].uc_busy = 0; 339 umtxq_chains[i][j].uc_waiters = 0; 340 #ifdef UMTX_PROFILING 341 umtxq_chains[i][j].length = 0; 342 umtxq_chains[i][j].max_length = 0; 343 #endif 344 } 345 } 346 #ifdef UMTX_PROFILING 347 umtx_init_profiling(); 348 #endif 349 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 350 umtx_shm_init(); 351 } 352 353 struct umtx_q * 354 umtxq_alloc(void) 355 { 356 struct umtx_q *uq; 357 358 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 359 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 360 M_WAITOK | M_ZERO); 361 TAILQ_INIT(&uq->uq_spare_queue->head); 362 TAILQ_INIT(&uq->uq_pi_contested); 363 uq->uq_inherited_pri = PRI_MAX; 364 return (uq); 365 } 366 367 void 368 umtxq_free(struct umtx_q *uq) 369 { 370 371 MPASS(uq->uq_spare_queue != NULL); 372 free(uq->uq_spare_queue, M_UMTX); 373 free(uq, M_UMTX); 374 } 375 376 static inline void 377 umtxq_hash(struct umtx_key *key) 378 { 379 unsigned n; 380 381 n = (uintptr_t)key->info.both.a + key->info.both.b; 382 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 383 } 384 385 struct umtxq_chain * 386 umtxq_getchain(struct umtx_key *key) 387 { 388 389 if (key->type <= TYPE_SEM) 390 return (&umtxq_chains[1][key->hash]); 391 return (&umtxq_chains[0][key->hash]); 392 } 393 394 /* 395 * Set chain to busy state when following operation 396 * may be blocked (kernel mutex can not be used). 397 */ 398 void 399 umtxq_busy(struct umtx_key *key) 400 { 401 struct umtxq_chain *uc; 402 403 uc = umtxq_getchain(key); 404 mtx_assert(&uc->uc_lock, MA_OWNED); 405 if (uc->uc_busy) { 406 #ifdef SMP 407 if (smp_cpus > 1) { 408 int count = BUSY_SPINS; 409 if (count > 0) { 410 umtxq_unlock(key); 411 while (uc->uc_busy && --count > 0) 412 cpu_spinwait(); 413 umtxq_lock(key); 414 } 415 } 416 #endif 417 while (uc->uc_busy) { 418 uc->uc_waiters++; 419 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 420 uc->uc_waiters--; 421 } 422 } 423 uc->uc_busy = 1; 424 } 425 426 /* 427 * Unbusy a chain. 428 */ 429 void 430 umtxq_unbusy(struct umtx_key *key) 431 { 432 struct umtxq_chain *uc; 433 434 uc = umtxq_getchain(key); 435 mtx_assert(&uc->uc_lock, MA_OWNED); 436 KASSERT(uc->uc_busy != 0, ("not busy")); 437 uc->uc_busy = 0; 438 if (uc->uc_waiters) 439 wakeup_one(uc); 440 } 441 442 void 443 umtxq_unbusy_unlocked(struct umtx_key *key) 444 { 445 446 umtxq_lock(key); 447 umtxq_unbusy(key); 448 umtxq_unlock(key); 449 } 450 451 static struct umtxq_queue * 452 umtxq_queue_lookup(struct umtx_key *key, int q) 453 { 454 struct umtxq_queue *uh; 455 struct umtxq_chain *uc; 456 457 uc = umtxq_getchain(key); 458 UMTXQ_LOCKED_ASSERT(uc); 459 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 460 if (umtx_key_match(&uh->key, key)) 461 return (uh); 462 } 463 464 return (NULL); 465 } 466 467 void 468 umtxq_insert_queue(struct umtx_q *uq, int q) 469 { 470 struct umtxq_queue *uh; 471 struct umtxq_chain *uc; 472 473 uc = umtxq_getchain(&uq->uq_key); 474 UMTXQ_LOCKED_ASSERT(uc); 475 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 476 uh = umtxq_queue_lookup(&uq->uq_key, q); 477 if (uh != NULL) { 478 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 479 } else { 480 uh = uq->uq_spare_queue; 481 uh->key = uq->uq_key; 482 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 483 #ifdef UMTX_PROFILING 484 uc->length++; 485 if (uc->length > uc->max_length) { 486 uc->max_length = uc->length; 487 if (uc->max_length > max_length) 488 max_length = uc->max_length; 489 } 490 #endif 491 } 492 uq->uq_spare_queue = NULL; 493 494 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 495 uh->length++; 496 uq->uq_flags |= UQF_UMTXQ; 497 uq->uq_cur_queue = uh; 498 return; 499 } 500 501 void 502 umtxq_remove_queue(struct umtx_q *uq, int q) 503 { 504 struct umtxq_chain *uc; 505 struct umtxq_queue *uh; 506 507 uc = umtxq_getchain(&uq->uq_key); 508 UMTXQ_LOCKED_ASSERT(uc); 509 if (uq->uq_flags & UQF_UMTXQ) { 510 uh = uq->uq_cur_queue; 511 TAILQ_REMOVE(&uh->head, uq, uq_link); 512 uh->length--; 513 uq->uq_flags &= ~UQF_UMTXQ; 514 if (TAILQ_EMPTY(&uh->head)) { 515 KASSERT(uh->length == 0, 516 ("inconsistent umtxq_queue length")); 517 #ifdef UMTX_PROFILING 518 uc->length--; 519 #endif 520 LIST_REMOVE(uh, link); 521 } else { 522 uh = LIST_FIRST(&uc->uc_spare_queue); 523 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 524 LIST_REMOVE(uh, link); 525 } 526 uq->uq_spare_queue = uh; 527 uq->uq_cur_queue = NULL; 528 } 529 } 530 531 /* 532 * Check if there are multiple waiters 533 */ 534 int 535 umtxq_count(struct umtx_key *key) 536 { 537 struct umtxq_queue *uh; 538 539 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 540 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 541 if (uh != NULL) 542 return (uh->length); 543 return (0); 544 } 545 546 /* 547 * Check if there are multiple PI waiters and returns first 548 * waiter. 549 */ 550 static int 551 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 552 { 553 struct umtxq_queue *uh; 554 555 *first = NULL; 556 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 557 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 558 if (uh != NULL) { 559 *first = TAILQ_FIRST(&uh->head); 560 return (uh->length); 561 } 562 return (0); 563 } 564 565 /* 566 * Wake up threads waiting on an userland object by a bit mask. 567 */ 568 int 569 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset) 570 { 571 struct umtxq_queue *uh; 572 struct umtx_q *uq, *uq_temp; 573 int ret; 574 575 ret = 0; 576 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 577 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 578 if (uh == NULL) 579 return (0); 580 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 581 if ((uq->uq_bitset & bitset) == 0) 582 continue; 583 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE); 584 wakeup_one(uq); 585 if (++ret >= n_wake) 586 break; 587 } 588 return (ret); 589 } 590 591 /* 592 * Wake up threads waiting on an userland object. 593 */ 594 595 static int 596 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtx_q *uq; 600 int ret; 601 602 ret = 0; 603 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 604 uh = umtxq_queue_lookup(key, q); 605 if (uh != NULL) { 606 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 607 umtxq_remove_queue(uq, q); 608 wakeup(uq); 609 if (++ret >= n_wake) 610 return (ret); 611 } 612 } 613 return (ret); 614 } 615 616 /* 617 * Wake up specified thread. 618 */ 619 static inline void 620 umtxq_signal_thread(struct umtx_q *uq) 621 { 622 623 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 624 umtxq_remove(uq); 625 wakeup(uq); 626 } 627 628 /* 629 * Wake up a maximum of n_wake threads that are waiting on an userland 630 * object identified by key. The remaining threads are removed from queue 631 * identified by key and added to the queue identified by key2 (requeued). 632 * The n_requeue specifies an upper limit on the number of threads that 633 * are requeued to the second queue. 634 */ 635 int 636 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2, 637 int n_requeue) 638 { 639 struct umtxq_queue *uh; 640 struct umtx_q *uq, *uq_temp; 641 int ret; 642 643 ret = 0; 644 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 645 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2)); 646 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 647 if (uh == NULL) 648 return (0); 649 TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) { 650 if (++ret <= n_wake) { 651 umtxq_remove(uq); 652 wakeup_one(uq); 653 } else { 654 umtxq_remove(uq); 655 uq->uq_key = *key2; 656 umtxq_insert(uq); 657 if (ret - n_wake == n_requeue) 658 break; 659 } 660 } 661 return (ret); 662 } 663 664 static inline int 665 tstohz(const struct timespec *tsp) 666 { 667 struct timeval tv; 668 669 TIMESPEC_TO_TIMEVAL(&tv, tsp); 670 return tvtohz(&tv); 671 } 672 673 void 674 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid, 675 int absolute, const struct timespec *timeout) 676 { 677 678 timo->clockid = clockid; 679 if (!absolute) { 680 timo->is_abs_real = false; 681 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 682 timespecadd(&timo->cur, timeout, &timo->end); 683 } else { 684 timo->end = *timeout; 685 timo->is_abs_real = clockid == CLOCK_REALTIME || 686 clockid == CLOCK_REALTIME_FAST || 687 clockid == CLOCK_REALTIME_PRECISE || 688 clockid == CLOCK_SECOND; 689 } 690 } 691 692 static void 693 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo, 694 const struct _umtx_time *umtxtime) 695 { 696 697 umtx_abs_timeout_init(timo, umtxtime->_clockid, 698 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 699 } 700 701 static void 702 umtx_abs_timeout_enforce_min(sbintime_t *sbt) 703 { 704 sbintime_t when, mint; 705 706 mint = curproc->p_umtx_min_timeout; 707 if (__predict_false(mint != 0)) { 708 when = sbinuptime() + mint; 709 if (*sbt < when) 710 *sbt = when; 711 } 712 } 713 714 static int 715 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt, 716 int *flags) 717 { 718 struct bintime bt, bbt; 719 struct timespec tts; 720 sbintime_t rem; 721 722 switch (timo->clockid) { 723 724 /* Clocks that can be converted into absolute time. */ 725 case CLOCK_REALTIME: 726 case CLOCK_REALTIME_PRECISE: 727 case CLOCK_REALTIME_FAST: 728 case CLOCK_MONOTONIC: 729 case CLOCK_MONOTONIC_PRECISE: 730 case CLOCK_MONOTONIC_FAST: 731 case CLOCK_UPTIME: 732 case CLOCK_UPTIME_PRECISE: 733 case CLOCK_UPTIME_FAST: 734 case CLOCK_SECOND: 735 timespec2bintime(&timo->end, &bt); 736 switch (timo->clockid) { 737 case CLOCK_REALTIME: 738 case CLOCK_REALTIME_PRECISE: 739 case CLOCK_REALTIME_FAST: 740 case CLOCK_SECOND: 741 getboottimebin(&bbt); 742 bintime_sub(&bt, &bbt); 743 break; 744 } 745 if (bt.sec < 0) 746 return (ETIMEDOUT); 747 if (bt.sec >= (SBT_MAX >> 32)) { 748 *sbt = 0; 749 *flags = 0; 750 return (0); 751 } 752 *sbt = bttosbt(bt); 753 umtx_abs_timeout_enforce_min(sbt); 754 755 /* 756 * Check if the absolute time should be aligned to 757 * avoid firing multiple timer events in non-periodic 758 * timer mode. 759 */ 760 switch (timo->clockid) { 761 case CLOCK_REALTIME_FAST: 762 case CLOCK_MONOTONIC_FAST: 763 case CLOCK_UPTIME_FAST: 764 rem = *sbt % tc_tick_sbt; 765 if (__predict_true(rem != 0)) 766 *sbt += tc_tick_sbt - rem; 767 break; 768 case CLOCK_SECOND: 769 rem = *sbt % SBT_1S; 770 if (__predict_true(rem != 0)) 771 *sbt += SBT_1S - rem; 772 break; 773 } 774 *flags = C_ABSOLUTE; 775 return (0); 776 777 /* Clocks that has to be periodically polled. */ 778 case CLOCK_VIRTUAL: 779 case CLOCK_PROF: 780 case CLOCK_THREAD_CPUTIME_ID: 781 case CLOCK_PROCESS_CPUTIME_ID: 782 default: 783 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 784 if (timespeccmp(&timo->end, &timo->cur, <=)) 785 return (ETIMEDOUT); 786 timespecsub(&timo->end, &timo->cur, &tts); 787 *sbt = tick_sbt * tstohz(&tts); 788 *flags = C_HARDCLOCK; 789 return (0); 790 } 791 } 792 793 static uint32_t 794 umtx_unlock_val(uint32_t flags, bool rb) 795 { 796 797 if (rb) 798 return (UMUTEX_RB_OWNERDEAD); 799 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 800 return (UMUTEX_RB_NOTRECOV); 801 else 802 return (UMUTEX_UNOWNED); 803 804 } 805 806 /* 807 * Put thread into sleep state, before sleeping, check if 808 * thread was removed from umtx queue. 809 */ 810 int 811 umtxq_sleep(struct umtx_q *uq, const char *wmesg, 812 struct umtx_abs_timeout *timo) 813 { 814 struct umtxq_chain *uc; 815 sbintime_t sbt = 0; 816 int error, flags = 0; 817 818 uc = umtxq_getchain(&uq->uq_key); 819 UMTXQ_LOCKED_ASSERT(uc); 820 for (;;) { 821 if (!(uq->uq_flags & UQF_UMTXQ)) { 822 error = 0; 823 break; 824 } 825 if (timo != NULL) { 826 if (timo->is_abs_real) 827 curthread->td_rtcgen = 828 atomic_load_acq_int(&rtc_generation); 829 error = umtx_abs_timeout_getsbt(timo, &sbt, &flags); 830 if (error != 0) 831 break; 832 } 833 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, 834 sbt, 0, flags); 835 uc = umtxq_getchain(&uq->uq_key); 836 mtx_lock(&uc->uc_lock); 837 if (error == EINTR || error == ERESTART) 838 break; 839 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) { 840 error = ETIMEDOUT; 841 break; 842 } 843 } 844 845 curthread->td_rtcgen = 0; 846 return (error); 847 } 848 849 /* 850 * Convert userspace address into unique logical address. 851 */ 852 int 853 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 854 { 855 struct thread *td = curthread; 856 vm_map_t map; 857 vm_map_entry_t entry; 858 vm_pindex_t pindex; 859 vm_prot_t prot; 860 boolean_t wired; 861 862 key->type = type; 863 if (share == THREAD_SHARE) { 864 key->shared = 0; 865 key->info.private.vs = td->td_proc->p_vmspace; 866 key->info.private.addr = (uintptr_t)addr; 867 } else { 868 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 869 map = &td->td_proc->p_vmspace->vm_map; 870 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 871 &entry, &key->info.shared.object, &pindex, &prot, 872 &wired) != KERN_SUCCESS) { 873 return (EFAULT); 874 } 875 876 if ((share == PROCESS_SHARE) || 877 (share == AUTO_SHARE && 878 VM_INHERIT_SHARE == entry->inheritance)) { 879 key->shared = 1; 880 key->info.shared.offset = (vm_offset_t)addr - 881 entry->start + entry->offset; 882 vm_object_reference(key->info.shared.object); 883 } else { 884 key->shared = 0; 885 key->info.private.vs = td->td_proc->p_vmspace; 886 key->info.private.addr = (uintptr_t)addr; 887 } 888 vm_map_lookup_done(map, entry); 889 } 890 891 umtxq_hash(key); 892 return (0); 893 } 894 895 /* 896 * Release key. 897 */ 898 void 899 umtx_key_release(struct umtx_key *key) 900 { 901 if (key->shared) 902 vm_object_deallocate(key->info.shared.object); 903 } 904 905 #ifdef COMPAT_FREEBSD10 906 /* 907 * Lock a umtx object. 908 */ 909 static int 910 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 911 const struct timespec *timeout) 912 { 913 struct umtx_abs_timeout timo; 914 struct umtx_q *uq; 915 u_long owner; 916 u_long old; 917 int error = 0; 918 919 uq = td->td_umtxq; 920 if (timeout != NULL) 921 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 922 923 /* 924 * Care must be exercised when dealing with umtx structure. It 925 * can fault on any access. 926 */ 927 for (;;) { 928 /* 929 * Try the uncontested case. This should be done in userland. 930 */ 931 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 932 933 /* The acquire succeeded. */ 934 if (owner == UMTX_UNOWNED) 935 return (0); 936 937 /* The address was invalid. */ 938 if (owner == -1) 939 return (EFAULT); 940 941 /* If no one owns it but it is contested try to acquire it. */ 942 if (owner == UMTX_CONTESTED) { 943 owner = casuword(&umtx->u_owner, 944 UMTX_CONTESTED, id | UMTX_CONTESTED); 945 946 if (owner == UMTX_CONTESTED) 947 return (0); 948 949 /* The address was invalid. */ 950 if (owner == -1) 951 return (EFAULT); 952 953 error = thread_check_susp(td, false); 954 if (error != 0) 955 break; 956 957 /* If this failed the lock has changed, restart. */ 958 continue; 959 } 960 961 /* 962 * If we caught a signal, we have retried and now 963 * exit immediately. 964 */ 965 if (error != 0) 966 break; 967 968 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 969 AUTO_SHARE, &uq->uq_key)) != 0) 970 return (error); 971 972 umtxq_lock(&uq->uq_key); 973 umtxq_busy(&uq->uq_key); 974 umtxq_insert(uq); 975 umtxq_unbusy(&uq->uq_key); 976 umtxq_unlock(&uq->uq_key); 977 978 /* 979 * Set the contested bit so that a release in user space 980 * knows to use the system call for unlock. If this fails 981 * either some one else has acquired the lock or it has been 982 * released. 983 */ 984 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 985 986 /* The address was invalid. */ 987 if (old == -1) { 988 umtxq_lock(&uq->uq_key); 989 umtxq_remove(uq); 990 umtxq_unlock(&uq->uq_key); 991 umtx_key_release(&uq->uq_key); 992 return (EFAULT); 993 } 994 995 /* 996 * We set the contested bit, sleep. Otherwise the lock changed 997 * and we need to retry or we lost a race to the thread 998 * unlocking the umtx. 999 */ 1000 umtxq_lock(&uq->uq_key); 1001 if (old == owner) 1002 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1003 &timo); 1004 umtxq_remove(uq); 1005 umtxq_unlock(&uq->uq_key); 1006 umtx_key_release(&uq->uq_key); 1007 1008 if (error == 0) 1009 error = thread_check_susp(td, false); 1010 } 1011 1012 if (timeout == NULL) { 1013 /* Mutex locking is restarted if it is interrupted. */ 1014 if (error == EINTR) 1015 error = ERESTART; 1016 } else { 1017 /* Timed-locking is not restarted. */ 1018 if (error == ERESTART) 1019 error = EINTR; 1020 } 1021 return (error); 1022 } 1023 1024 /* 1025 * Unlock a umtx object. 1026 */ 1027 static int 1028 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1029 { 1030 struct umtx_key key; 1031 u_long owner; 1032 u_long old; 1033 int error; 1034 int count; 1035 1036 /* 1037 * Make sure we own this mtx. 1038 */ 1039 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1040 if (owner == -1) 1041 return (EFAULT); 1042 1043 if ((owner & ~UMTX_CONTESTED) != id) 1044 return (EPERM); 1045 1046 /* This should be done in userland */ 1047 if ((owner & UMTX_CONTESTED) == 0) { 1048 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1049 if (old == -1) 1050 return (EFAULT); 1051 if (old == owner) 1052 return (0); 1053 owner = old; 1054 } 1055 1056 /* We should only ever be in here for contested locks */ 1057 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1058 &key)) != 0) 1059 return (error); 1060 1061 umtxq_lock(&key); 1062 umtxq_busy(&key); 1063 count = umtxq_count(&key); 1064 umtxq_unlock(&key); 1065 1066 /* 1067 * When unlocking the umtx, it must be marked as unowned if 1068 * there is zero or one thread only waiting for it. 1069 * Otherwise, it must be marked as contested. 1070 */ 1071 old = casuword(&umtx->u_owner, owner, 1072 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1073 umtxq_lock(&key); 1074 umtxq_signal(&key,1); 1075 umtxq_unbusy(&key); 1076 umtxq_unlock(&key); 1077 umtx_key_release(&key); 1078 if (old == -1) 1079 return (EFAULT); 1080 if (old != owner) 1081 return (EINVAL); 1082 return (0); 1083 } 1084 1085 #ifdef COMPAT_FREEBSD32 1086 1087 /* 1088 * Lock a umtx object. 1089 */ 1090 static int 1091 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1092 const struct timespec *timeout) 1093 { 1094 struct umtx_abs_timeout timo; 1095 struct umtx_q *uq; 1096 uint32_t owner; 1097 uint32_t old; 1098 int error = 0; 1099 1100 uq = td->td_umtxq; 1101 1102 if (timeout != NULL) 1103 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1104 1105 /* 1106 * Care must be exercised when dealing with umtx structure. It 1107 * can fault on any access. 1108 */ 1109 for (;;) { 1110 /* 1111 * Try the uncontested case. This should be done in userland. 1112 */ 1113 owner = casuword32(m, UMUTEX_UNOWNED, id); 1114 1115 /* The acquire succeeded. */ 1116 if (owner == UMUTEX_UNOWNED) 1117 return (0); 1118 1119 /* The address was invalid. */ 1120 if (owner == -1) 1121 return (EFAULT); 1122 1123 /* If no one owns it but it is contested try to acquire it. */ 1124 if (owner == UMUTEX_CONTESTED) { 1125 owner = casuword32(m, 1126 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1127 if (owner == UMUTEX_CONTESTED) 1128 return (0); 1129 1130 /* The address was invalid. */ 1131 if (owner == -1) 1132 return (EFAULT); 1133 1134 error = thread_check_susp(td, false); 1135 if (error != 0) 1136 break; 1137 1138 /* If this failed the lock has changed, restart. */ 1139 continue; 1140 } 1141 1142 /* 1143 * If we caught a signal, we have retried and now 1144 * exit immediately. 1145 */ 1146 if (error != 0) 1147 return (error); 1148 1149 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1150 AUTO_SHARE, &uq->uq_key)) != 0) 1151 return (error); 1152 1153 umtxq_lock(&uq->uq_key); 1154 umtxq_busy(&uq->uq_key); 1155 umtxq_insert(uq); 1156 umtxq_unbusy(&uq->uq_key); 1157 umtxq_unlock(&uq->uq_key); 1158 1159 /* 1160 * Set the contested bit so that a release in user space 1161 * knows to use the system call for unlock. If this fails 1162 * either some one else has acquired the lock or it has been 1163 * released. 1164 */ 1165 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1166 1167 /* The address was invalid. */ 1168 if (old == -1) { 1169 umtxq_lock(&uq->uq_key); 1170 umtxq_remove(uq); 1171 umtxq_unlock(&uq->uq_key); 1172 umtx_key_release(&uq->uq_key); 1173 return (EFAULT); 1174 } 1175 1176 /* 1177 * We set the contested bit, sleep. Otherwise the lock changed 1178 * and we need to retry or we lost a race to the thread 1179 * unlocking the umtx. 1180 */ 1181 umtxq_lock(&uq->uq_key); 1182 if (old == owner) 1183 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1184 NULL : &timo); 1185 umtxq_remove(uq); 1186 umtxq_unlock(&uq->uq_key); 1187 umtx_key_release(&uq->uq_key); 1188 1189 if (error == 0) 1190 error = thread_check_susp(td, false); 1191 } 1192 1193 if (timeout == NULL) { 1194 /* Mutex locking is restarted if it is interrupted. */ 1195 if (error == EINTR) 1196 error = ERESTART; 1197 } else { 1198 /* Timed-locking is not restarted. */ 1199 if (error == ERESTART) 1200 error = EINTR; 1201 } 1202 return (error); 1203 } 1204 1205 /* 1206 * Unlock a umtx object. 1207 */ 1208 static int 1209 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1210 { 1211 struct umtx_key key; 1212 uint32_t owner; 1213 uint32_t old; 1214 int error; 1215 int count; 1216 1217 /* 1218 * Make sure we own this mtx. 1219 */ 1220 owner = fuword32(m); 1221 if (owner == -1) 1222 return (EFAULT); 1223 1224 if ((owner & ~UMUTEX_CONTESTED) != id) 1225 return (EPERM); 1226 1227 /* This should be done in userland */ 1228 if ((owner & UMUTEX_CONTESTED) == 0) { 1229 old = casuword32(m, owner, UMUTEX_UNOWNED); 1230 if (old == -1) 1231 return (EFAULT); 1232 if (old == owner) 1233 return (0); 1234 owner = old; 1235 } 1236 1237 /* We should only ever be in here for contested locks */ 1238 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1239 &key)) != 0) 1240 return (error); 1241 1242 umtxq_lock(&key); 1243 umtxq_busy(&key); 1244 count = umtxq_count(&key); 1245 umtxq_unlock(&key); 1246 1247 /* 1248 * When unlocking the umtx, it must be marked as unowned if 1249 * there is zero or one thread only waiting for it. 1250 * Otherwise, it must be marked as contested. 1251 */ 1252 old = casuword32(m, owner, 1253 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1254 umtxq_lock(&key); 1255 umtxq_signal(&key,1); 1256 umtxq_unbusy(&key); 1257 umtxq_unlock(&key); 1258 umtx_key_release(&key); 1259 if (old == -1) 1260 return (EFAULT); 1261 if (old != owner) 1262 return (EINVAL); 1263 return (0); 1264 } 1265 #endif /* COMPAT_FREEBSD32 */ 1266 #endif /* COMPAT_FREEBSD10 */ 1267 1268 /* 1269 * Fetch and compare value, sleep on the address if value is not changed. 1270 */ 1271 static int 1272 do_wait(struct thread *td, void *addr, u_long id, 1273 struct _umtx_time *timeout, int compat32, int is_private) 1274 { 1275 struct umtx_abs_timeout timo; 1276 struct umtx_q *uq; 1277 u_long tmp; 1278 uint32_t tmp32; 1279 int error = 0; 1280 1281 uq = td->td_umtxq; 1282 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1283 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1284 return (error); 1285 1286 if (timeout != NULL) 1287 umtx_abs_timeout_init2(&timo, timeout); 1288 1289 umtxq_lock(&uq->uq_key); 1290 umtxq_insert(uq); 1291 umtxq_unlock(&uq->uq_key); 1292 if (compat32 == 0) { 1293 error = fueword(addr, &tmp); 1294 if (error != 0) 1295 error = EFAULT; 1296 } else { 1297 error = fueword32(addr, &tmp32); 1298 if (error == 0) 1299 tmp = tmp32; 1300 else 1301 error = EFAULT; 1302 } 1303 umtxq_lock(&uq->uq_key); 1304 if (error == 0) { 1305 if (tmp == id) 1306 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1307 NULL : &timo); 1308 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1309 error = 0; 1310 else 1311 umtxq_remove(uq); 1312 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1313 umtxq_remove(uq); 1314 } 1315 umtxq_unlock(&uq->uq_key); 1316 umtx_key_release(&uq->uq_key); 1317 if (error == ERESTART) 1318 error = EINTR; 1319 return (error); 1320 } 1321 1322 /* 1323 * Wake up threads sleeping on the specified address. 1324 */ 1325 int 1326 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1327 { 1328 struct umtx_key key; 1329 int ret; 1330 1331 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1332 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1333 return (ret); 1334 umtxq_lock(&key); 1335 umtxq_signal(&key, n_wake); 1336 umtxq_unlock(&key); 1337 umtx_key_release(&key); 1338 return (0); 1339 } 1340 1341 /* 1342 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1343 */ 1344 static int 1345 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1346 struct _umtx_time *timeout, int mode) 1347 { 1348 struct umtx_abs_timeout timo; 1349 struct umtx_q *uq; 1350 uint32_t owner, old, id; 1351 int error, rv; 1352 1353 id = td->td_tid; 1354 uq = td->td_umtxq; 1355 error = 0; 1356 if (timeout != NULL) 1357 umtx_abs_timeout_init2(&timo, timeout); 1358 1359 /* 1360 * Care must be exercised when dealing with umtx structure. It 1361 * can fault on any access. 1362 */ 1363 for (;;) { 1364 rv = fueword32(&m->m_owner, &owner); 1365 if (rv == -1) 1366 return (EFAULT); 1367 if (mode == _UMUTEX_WAIT) { 1368 if (owner == UMUTEX_UNOWNED || 1369 owner == UMUTEX_CONTESTED || 1370 owner == UMUTEX_RB_OWNERDEAD || 1371 owner == UMUTEX_RB_NOTRECOV) 1372 return (0); 1373 } else { 1374 /* 1375 * Robust mutex terminated. Kernel duty is to 1376 * return EOWNERDEAD to the userspace. The 1377 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1378 * by the common userspace code. 1379 */ 1380 if (owner == UMUTEX_RB_OWNERDEAD) { 1381 rv = casueword32(&m->m_owner, 1382 UMUTEX_RB_OWNERDEAD, &owner, 1383 id | UMUTEX_CONTESTED); 1384 if (rv == -1) 1385 return (EFAULT); 1386 if (rv == 0) { 1387 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1388 return (EOWNERDEAD); /* success */ 1389 } 1390 MPASS(rv == 1); 1391 rv = thread_check_susp(td, false); 1392 if (rv != 0) 1393 return (rv); 1394 continue; 1395 } 1396 if (owner == UMUTEX_RB_NOTRECOV) 1397 return (ENOTRECOVERABLE); 1398 1399 /* 1400 * Try the uncontested case. This should be 1401 * done in userland. 1402 */ 1403 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1404 &owner, id); 1405 /* The address was invalid. */ 1406 if (rv == -1) 1407 return (EFAULT); 1408 1409 /* The acquire succeeded. */ 1410 if (rv == 0) { 1411 MPASS(owner == UMUTEX_UNOWNED); 1412 return (0); 1413 } 1414 1415 /* 1416 * If no one owns it but it is contested try 1417 * to acquire it. 1418 */ 1419 MPASS(rv == 1); 1420 if (owner == UMUTEX_CONTESTED) { 1421 rv = casueword32(&m->m_owner, 1422 UMUTEX_CONTESTED, &owner, 1423 id | UMUTEX_CONTESTED); 1424 /* The address was invalid. */ 1425 if (rv == -1) 1426 return (EFAULT); 1427 if (rv == 0) { 1428 MPASS(owner == UMUTEX_CONTESTED); 1429 return (0); 1430 } 1431 if (rv == 1) { 1432 rv = thread_check_susp(td, false); 1433 if (rv != 0) 1434 return (rv); 1435 } 1436 1437 /* 1438 * If this failed the lock has 1439 * changed, restart. 1440 */ 1441 continue; 1442 } 1443 1444 /* rv == 1 but not contested, likely store failure */ 1445 rv = thread_check_susp(td, false); 1446 if (rv != 0) 1447 return (rv); 1448 } 1449 1450 if (mode == _UMUTEX_TRY) 1451 return (EBUSY); 1452 1453 /* 1454 * If we caught a signal, we have retried and now 1455 * exit immediately. 1456 */ 1457 if (error != 0) 1458 return (error); 1459 1460 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1461 GET_SHARE(flags), &uq->uq_key)) != 0) 1462 return (error); 1463 1464 umtxq_lock(&uq->uq_key); 1465 umtxq_busy(&uq->uq_key); 1466 umtxq_insert(uq); 1467 umtxq_unlock(&uq->uq_key); 1468 1469 /* 1470 * Set the contested bit so that a release in user space 1471 * knows to use the system call for unlock. If this fails 1472 * either some one else has acquired the lock or it has been 1473 * released. 1474 */ 1475 rv = casueword32(&m->m_owner, owner, &old, 1476 owner | UMUTEX_CONTESTED); 1477 1478 /* The address was invalid or casueword failed to store. */ 1479 if (rv == -1 || rv == 1) { 1480 umtxq_lock(&uq->uq_key); 1481 umtxq_remove(uq); 1482 umtxq_unbusy(&uq->uq_key); 1483 umtxq_unlock(&uq->uq_key); 1484 umtx_key_release(&uq->uq_key); 1485 if (rv == -1) 1486 return (EFAULT); 1487 if (rv == 1) { 1488 rv = thread_check_susp(td, false); 1489 if (rv != 0) 1490 return (rv); 1491 } 1492 continue; 1493 } 1494 1495 /* 1496 * We set the contested bit, sleep. Otherwise the lock changed 1497 * and we need to retry or we lost a race to the thread 1498 * unlocking the umtx. 1499 */ 1500 umtxq_lock(&uq->uq_key); 1501 umtxq_unbusy(&uq->uq_key); 1502 MPASS(old == owner); 1503 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1504 NULL : &timo); 1505 umtxq_remove(uq); 1506 umtxq_unlock(&uq->uq_key); 1507 umtx_key_release(&uq->uq_key); 1508 1509 if (error == 0) 1510 error = thread_check_susp(td, false); 1511 } 1512 1513 return (0); 1514 } 1515 1516 /* 1517 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1518 */ 1519 static int 1520 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1521 { 1522 struct umtx_key key; 1523 uint32_t owner, old, id, newlock; 1524 int error, count; 1525 1526 id = td->td_tid; 1527 1528 again: 1529 /* 1530 * Make sure we own this mtx. 1531 */ 1532 error = fueword32(&m->m_owner, &owner); 1533 if (error == -1) 1534 return (EFAULT); 1535 1536 if ((owner & ~UMUTEX_CONTESTED) != id) 1537 return (EPERM); 1538 1539 newlock = umtx_unlock_val(flags, rb); 1540 if ((owner & UMUTEX_CONTESTED) == 0) { 1541 error = casueword32(&m->m_owner, owner, &old, newlock); 1542 if (error == -1) 1543 return (EFAULT); 1544 if (error == 1) { 1545 error = thread_check_susp(td, false); 1546 if (error != 0) 1547 return (error); 1548 goto again; 1549 } 1550 MPASS(old == owner); 1551 return (0); 1552 } 1553 1554 /* We should only ever be in here for contested locks */ 1555 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1556 &key)) != 0) 1557 return (error); 1558 1559 umtxq_lock(&key); 1560 umtxq_busy(&key); 1561 count = umtxq_count(&key); 1562 umtxq_unlock(&key); 1563 1564 /* 1565 * When unlocking the umtx, it must be marked as unowned if 1566 * there is zero or one thread only waiting for it. 1567 * Otherwise, it must be marked as contested. 1568 */ 1569 if (count > 1) 1570 newlock |= UMUTEX_CONTESTED; 1571 error = casueword32(&m->m_owner, owner, &old, newlock); 1572 umtxq_lock(&key); 1573 umtxq_signal(&key, 1); 1574 umtxq_unbusy(&key); 1575 umtxq_unlock(&key); 1576 umtx_key_release(&key); 1577 if (error == -1) 1578 return (EFAULT); 1579 if (error == 1) { 1580 if (old != owner) 1581 return (EINVAL); 1582 error = thread_check_susp(td, false); 1583 if (error != 0) 1584 return (error); 1585 goto again; 1586 } 1587 return (0); 1588 } 1589 1590 /* 1591 * Check if the mutex is available and wake up a waiter, 1592 * only for simple mutex. 1593 */ 1594 static int 1595 do_wake_umutex(struct thread *td, struct umutex *m) 1596 { 1597 struct umtx_key key; 1598 uint32_t owner; 1599 uint32_t flags; 1600 int error; 1601 int count; 1602 1603 again: 1604 error = fueword32(&m->m_owner, &owner); 1605 if (error == -1) 1606 return (EFAULT); 1607 1608 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1609 owner != UMUTEX_RB_NOTRECOV) 1610 return (0); 1611 1612 error = fueword32(&m->m_flags, &flags); 1613 if (error == -1) 1614 return (EFAULT); 1615 1616 /* We should only ever be in here for contested locks */ 1617 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1618 &key)) != 0) 1619 return (error); 1620 1621 umtxq_lock(&key); 1622 umtxq_busy(&key); 1623 count = umtxq_count(&key); 1624 umtxq_unlock(&key); 1625 1626 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1627 owner != UMUTEX_RB_NOTRECOV) { 1628 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1629 UMUTEX_UNOWNED); 1630 if (error == -1) { 1631 error = EFAULT; 1632 } else if (error == 1) { 1633 umtxq_lock(&key); 1634 umtxq_unbusy(&key); 1635 umtxq_unlock(&key); 1636 umtx_key_release(&key); 1637 error = thread_check_susp(td, false); 1638 if (error != 0) 1639 return (error); 1640 goto again; 1641 } 1642 } 1643 1644 umtxq_lock(&key); 1645 if (error == 0 && count != 0) { 1646 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1647 owner == UMUTEX_RB_OWNERDEAD || 1648 owner == UMUTEX_RB_NOTRECOV); 1649 umtxq_signal(&key, 1); 1650 } 1651 umtxq_unbusy(&key); 1652 umtxq_unlock(&key); 1653 umtx_key_release(&key); 1654 return (error); 1655 } 1656 1657 /* 1658 * Check if the mutex has waiters and tries to fix contention bit. 1659 */ 1660 static int 1661 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1662 { 1663 struct umtx_key key; 1664 uint32_t owner, old; 1665 int type; 1666 int error; 1667 int count; 1668 1669 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1670 UMUTEX_ROBUST)) { 1671 case 0: 1672 case UMUTEX_ROBUST: 1673 type = TYPE_NORMAL_UMUTEX; 1674 break; 1675 case UMUTEX_PRIO_INHERIT: 1676 type = TYPE_PI_UMUTEX; 1677 break; 1678 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1679 type = TYPE_PI_ROBUST_UMUTEX; 1680 break; 1681 case UMUTEX_PRIO_PROTECT: 1682 type = TYPE_PP_UMUTEX; 1683 break; 1684 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1685 type = TYPE_PP_ROBUST_UMUTEX; 1686 break; 1687 default: 1688 return (EINVAL); 1689 } 1690 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1691 return (error); 1692 1693 owner = 0; 1694 umtxq_lock(&key); 1695 umtxq_busy(&key); 1696 count = umtxq_count(&key); 1697 umtxq_unlock(&key); 1698 1699 error = fueword32(&m->m_owner, &owner); 1700 if (error == -1) 1701 error = EFAULT; 1702 1703 /* 1704 * Only repair contention bit if there is a waiter, this means 1705 * the mutex is still being referenced by userland code, 1706 * otherwise don't update any memory. 1707 */ 1708 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1709 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1710 error = casueword32(&m->m_owner, owner, &old, 1711 owner | UMUTEX_CONTESTED); 1712 if (error == -1) { 1713 error = EFAULT; 1714 break; 1715 } 1716 if (error == 0) { 1717 MPASS(old == owner); 1718 break; 1719 } 1720 owner = old; 1721 error = thread_check_susp(td, false); 1722 } 1723 1724 umtxq_lock(&key); 1725 if (error == EFAULT) { 1726 umtxq_signal(&key, INT_MAX); 1727 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1728 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1729 umtxq_signal(&key, 1); 1730 umtxq_unbusy(&key); 1731 umtxq_unlock(&key); 1732 umtx_key_release(&key); 1733 return (error); 1734 } 1735 1736 struct umtx_pi * 1737 umtx_pi_alloc(int flags) 1738 { 1739 struct umtx_pi *pi; 1740 1741 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1742 TAILQ_INIT(&pi->pi_blocked); 1743 atomic_add_int(&umtx_pi_allocated, 1); 1744 return (pi); 1745 } 1746 1747 void 1748 umtx_pi_free(struct umtx_pi *pi) 1749 { 1750 uma_zfree(umtx_pi_zone, pi); 1751 atomic_add_int(&umtx_pi_allocated, -1); 1752 } 1753 1754 /* 1755 * Adjust the thread's position on a pi_state after its priority has been 1756 * changed. 1757 */ 1758 static int 1759 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1760 { 1761 struct umtx_q *uq, *uq1, *uq2; 1762 struct thread *td1; 1763 1764 mtx_assert(&umtx_lock, MA_OWNED); 1765 if (pi == NULL) 1766 return (0); 1767 1768 uq = td->td_umtxq; 1769 1770 /* 1771 * Check if the thread needs to be moved on the blocked chain. 1772 * It needs to be moved if either its priority is lower than 1773 * the previous thread or higher than the next thread. 1774 */ 1775 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1776 uq2 = TAILQ_NEXT(uq, uq_lockq); 1777 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1778 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1779 /* 1780 * Remove thread from blocked chain and determine where 1781 * it should be moved to. 1782 */ 1783 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1784 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1785 td1 = uq1->uq_thread; 1786 MPASS(td1->td_proc->p_magic == P_MAGIC); 1787 if (UPRI(td1) > UPRI(td)) 1788 break; 1789 } 1790 1791 if (uq1 == NULL) 1792 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1793 else 1794 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1795 } 1796 return (1); 1797 } 1798 1799 static struct umtx_pi * 1800 umtx_pi_next(struct umtx_pi *pi) 1801 { 1802 struct umtx_q *uq_owner; 1803 1804 if (pi->pi_owner == NULL) 1805 return (NULL); 1806 uq_owner = pi->pi_owner->td_umtxq; 1807 if (uq_owner == NULL) 1808 return (NULL); 1809 return (uq_owner->uq_pi_blocked); 1810 } 1811 1812 /* 1813 * Floyd's Cycle-Finding Algorithm. 1814 */ 1815 static bool 1816 umtx_pi_check_loop(struct umtx_pi *pi) 1817 { 1818 struct umtx_pi *pi1; /* fast iterator */ 1819 1820 mtx_assert(&umtx_lock, MA_OWNED); 1821 if (pi == NULL) 1822 return (false); 1823 pi1 = pi; 1824 for (;;) { 1825 pi = umtx_pi_next(pi); 1826 if (pi == NULL) 1827 break; 1828 pi1 = umtx_pi_next(pi1); 1829 if (pi1 == NULL) 1830 break; 1831 pi1 = umtx_pi_next(pi1); 1832 if (pi1 == NULL) 1833 break; 1834 if (pi == pi1) 1835 return (true); 1836 } 1837 return (false); 1838 } 1839 1840 /* 1841 * Propagate priority when a thread is blocked on POSIX 1842 * PI mutex. 1843 */ 1844 static void 1845 umtx_propagate_priority(struct thread *td) 1846 { 1847 struct umtx_q *uq; 1848 struct umtx_pi *pi; 1849 int pri; 1850 1851 mtx_assert(&umtx_lock, MA_OWNED); 1852 pri = UPRI(td); 1853 uq = td->td_umtxq; 1854 pi = uq->uq_pi_blocked; 1855 if (pi == NULL) 1856 return; 1857 if (umtx_pi_check_loop(pi)) 1858 return; 1859 1860 for (;;) { 1861 td = pi->pi_owner; 1862 if (td == NULL || td == curthread) 1863 return; 1864 1865 MPASS(td->td_proc != NULL); 1866 MPASS(td->td_proc->p_magic == P_MAGIC); 1867 1868 thread_lock(td); 1869 if (td->td_lend_user_pri > pri) 1870 sched_lend_user_prio(td, pri); 1871 else { 1872 thread_unlock(td); 1873 break; 1874 } 1875 thread_unlock(td); 1876 1877 /* 1878 * Pick up the lock that td is blocked on. 1879 */ 1880 uq = td->td_umtxq; 1881 pi = uq->uq_pi_blocked; 1882 if (pi == NULL) 1883 break; 1884 /* Resort td on the list if needed. */ 1885 umtx_pi_adjust_thread(pi, td); 1886 } 1887 } 1888 1889 /* 1890 * Unpropagate priority for a PI mutex when a thread blocked on 1891 * it is interrupted by signal or resumed by others. 1892 */ 1893 static void 1894 umtx_repropagate_priority(struct umtx_pi *pi) 1895 { 1896 struct umtx_q *uq, *uq_owner; 1897 struct umtx_pi *pi2; 1898 int pri; 1899 1900 mtx_assert(&umtx_lock, MA_OWNED); 1901 1902 if (umtx_pi_check_loop(pi)) 1903 return; 1904 while (pi != NULL && pi->pi_owner != NULL) { 1905 pri = PRI_MAX; 1906 uq_owner = pi->pi_owner->td_umtxq; 1907 1908 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1909 uq = TAILQ_FIRST(&pi2->pi_blocked); 1910 if (uq != NULL) { 1911 if (pri > UPRI(uq->uq_thread)) 1912 pri = UPRI(uq->uq_thread); 1913 } 1914 } 1915 1916 if (pri > uq_owner->uq_inherited_pri) 1917 pri = uq_owner->uq_inherited_pri; 1918 thread_lock(pi->pi_owner); 1919 sched_lend_user_prio(pi->pi_owner, pri); 1920 thread_unlock(pi->pi_owner); 1921 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1922 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1923 } 1924 } 1925 1926 /* 1927 * Insert a PI mutex into owned list. 1928 */ 1929 static void 1930 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1931 { 1932 struct umtx_q *uq_owner; 1933 1934 uq_owner = owner->td_umtxq; 1935 mtx_assert(&umtx_lock, MA_OWNED); 1936 MPASS(pi->pi_owner == NULL); 1937 pi->pi_owner = owner; 1938 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1939 } 1940 1941 /* 1942 * Disown a PI mutex, and remove it from the owned list. 1943 */ 1944 static void 1945 umtx_pi_disown(struct umtx_pi *pi) 1946 { 1947 1948 mtx_assert(&umtx_lock, MA_OWNED); 1949 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1950 pi->pi_owner = NULL; 1951 } 1952 1953 /* 1954 * Claim ownership of a PI mutex. 1955 */ 1956 int 1957 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1958 { 1959 struct umtx_q *uq; 1960 int pri; 1961 1962 mtx_lock(&umtx_lock); 1963 if (pi->pi_owner == owner) { 1964 mtx_unlock(&umtx_lock); 1965 return (0); 1966 } 1967 1968 if (pi->pi_owner != NULL) { 1969 /* 1970 * userland may have already messed the mutex, sigh. 1971 */ 1972 mtx_unlock(&umtx_lock); 1973 return (EPERM); 1974 } 1975 umtx_pi_setowner(pi, owner); 1976 uq = TAILQ_FIRST(&pi->pi_blocked); 1977 if (uq != NULL) { 1978 pri = UPRI(uq->uq_thread); 1979 thread_lock(owner); 1980 if (pri < UPRI(owner)) 1981 sched_lend_user_prio(owner, pri); 1982 thread_unlock(owner); 1983 } 1984 mtx_unlock(&umtx_lock); 1985 return (0); 1986 } 1987 1988 /* 1989 * Adjust a thread's order position in its blocked PI mutex, 1990 * this may result new priority propagating process. 1991 */ 1992 void 1993 umtx_pi_adjust(struct thread *td, u_char oldpri) 1994 { 1995 struct umtx_q *uq; 1996 struct umtx_pi *pi; 1997 1998 uq = td->td_umtxq; 1999 mtx_lock(&umtx_lock); 2000 /* 2001 * Pick up the lock that td is blocked on. 2002 */ 2003 pi = uq->uq_pi_blocked; 2004 if (pi != NULL) { 2005 umtx_pi_adjust_thread(pi, td); 2006 umtx_repropagate_priority(pi); 2007 } 2008 mtx_unlock(&umtx_lock); 2009 } 2010 2011 /* 2012 * Sleep on a PI mutex. 2013 */ 2014 int 2015 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2016 const char *wmesg, struct umtx_abs_timeout *timo, bool shared) 2017 { 2018 struct thread *td, *td1; 2019 struct umtx_q *uq1; 2020 int error, pri; 2021 #ifdef INVARIANTS 2022 struct umtxq_chain *uc; 2023 2024 uc = umtxq_getchain(&pi->pi_key); 2025 #endif 2026 error = 0; 2027 td = uq->uq_thread; 2028 KASSERT(td == curthread, ("inconsistent uq_thread")); 2029 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2030 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2031 umtxq_insert(uq); 2032 mtx_lock(&umtx_lock); 2033 if (pi->pi_owner == NULL) { 2034 mtx_unlock(&umtx_lock); 2035 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2036 mtx_lock(&umtx_lock); 2037 if (td1 != NULL) { 2038 if (pi->pi_owner == NULL) 2039 umtx_pi_setowner(pi, td1); 2040 PROC_UNLOCK(td1->td_proc); 2041 } 2042 } 2043 2044 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2045 pri = UPRI(uq1->uq_thread); 2046 if (pri > UPRI(td)) 2047 break; 2048 } 2049 2050 if (uq1 != NULL) 2051 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2052 else 2053 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2054 2055 uq->uq_pi_blocked = pi; 2056 thread_lock(td); 2057 td->td_flags |= TDF_UPIBLOCKED; 2058 thread_unlock(td); 2059 umtx_propagate_priority(td); 2060 mtx_unlock(&umtx_lock); 2061 umtxq_unbusy(&uq->uq_key); 2062 2063 error = umtxq_sleep(uq, wmesg, timo); 2064 umtxq_remove(uq); 2065 2066 mtx_lock(&umtx_lock); 2067 uq->uq_pi_blocked = NULL; 2068 thread_lock(td); 2069 td->td_flags &= ~TDF_UPIBLOCKED; 2070 thread_unlock(td); 2071 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2072 umtx_repropagate_priority(pi); 2073 mtx_unlock(&umtx_lock); 2074 umtxq_unlock(&uq->uq_key); 2075 2076 return (error); 2077 } 2078 2079 /* 2080 * Add reference count for a PI mutex. 2081 */ 2082 void 2083 umtx_pi_ref(struct umtx_pi *pi) 2084 { 2085 2086 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2087 pi->pi_refcount++; 2088 } 2089 2090 /* 2091 * Decrease reference count for a PI mutex, if the counter 2092 * is decreased to zero, its memory space is freed. 2093 */ 2094 void 2095 umtx_pi_unref(struct umtx_pi *pi) 2096 { 2097 struct umtxq_chain *uc; 2098 2099 uc = umtxq_getchain(&pi->pi_key); 2100 UMTXQ_LOCKED_ASSERT(uc); 2101 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2102 if (--pi->pi_refcount == 0) { 2103 mtx_lock(&umtx_lock); 2104 if (pi->pi_owner != NULL) 2105 umtx_pi_disown(pi); 2106 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2107 ("blocked queue not empty")); 2108 mtx_unlock(&umtx_lock); 2109 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2110 umtx_pi_free(pi); 2111 } 2112 } 2113 2114 /* 2115 * Find a PI mutex in hash table. 2116 */ 2117 struct umtx_pi * 2118 umtx_pi_lookup(struct umtx_key *key) 2119 { 2120 struct umtxq_chain *uc; 2121 struct umtx_pi *pi; 2122 2123 uc = umtxq_getchain(key); 2124 UMTXQ_LOCKED_ASSERT(uc); 2125 2126 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2127 if (umtx_key_match(&pi->pi_key, key)) { 2128 return (pi); 2129 } 2130 } 2131 return (NULL); 2132 } 2133 2134 /* 2135 * Insert a PI mutex into hash table. 2136 */ 2137 void 2138 umtx_pi_insert(struct umtx_pi *pi) 2139 { 2140 struct umtxq_chain *uc; 2141 2142 uc = umtxq_getchain(&pi->pi_key); 2143 UMTXQ_LOCKED_ASSERT(uc); 2144 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2145 } 2146 2147 /* 2148 * Drop a PI mutex and wakeup a top waiter. 2149 */ 2150 int 2151 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count) 2152 { 2153 struct umtx_q *uq_first, *uq_first2, *uq_me; 2154 struct umtx_pi *pi, *pi2; 2155 int pri; 2156 2157 UMTXQ_ASSERT_LOCKED_BUSY(key); 2158 *count = umtxq_count_pi(key, &uq_first); 2159 if (uq_first != NULL) { 2160 mtx_lock(&umtx_lock); 2161 pi = uq_first->uq_pi_blocked; 2162 KASSERT(pi != NULL, ("pi == NULL?")); 2163 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2164 mtx_unlock(&umtx_lock); 2165 /* userland messed the mutex */ 2166 return (EPERM); 2167 } 2168 uq_me = td->td_umtxq; 2169 if (pi->pi_owner == td) 2170 umtx_pi_disown(pi); 2171 /* get highest priority thread which is still sleeping. */ 2172 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2173 while (uq_first != NULL && 2174 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2175 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2176 } 2177 pri = PRI_MAX; 2178 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2179 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2180 if (uq_first2 != NULL) { 2181 if (pri > UPRI(uq_first2->uq_thread)) 2182 pri = UPRI(uq_first2->uq_thread); 2183 } 2184 } 2185 thread_lock(td); 2186 sched_lend_user_prio(td, pri); 2187 thread_unlock(td); 2188 mtx_unlock(&umtx_lock); 2189 if (uq_first) 2190 umtxq_signal_thread(uq_first); 2191 } else { 2192 pi = umtx_pi_lookup(key); 2193 /* 2194 * A umtx_pi can exist if a signal or timeout removed the 2195 * last waiter from the umtxq, but there is still 2196 * a thread in do_lock_pi() holding the umtx_pi. 2197 */ 2198 if (pi != NULL) { 2199 /* 2200 * The umtx_pi can be unowned, such as when a thread 2201 * has just entered do_lock_pi(), allocated the 2202 * umtx_pi, and unlocked the umtxq. 2203 * If the current thread owns it, it must disown it. 2204 */ 2205 mtx_lock(&umtx_lock); 2206 if (pi->pi_owner == td) 2207 umtx_pi_disown(pi); 2208 mtx_unlock(&umtx_lock); 2209 } 2210 } 2211 return (0); 2212 } 2213 2214 /* 2215 * Lock a PI mutex. 2216 */ 2217 static int 2218 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2219 struct _umtx_time *timeout, int try) 2220 { 2221 struct umtx_abs_timeout timo; 2222 struct umtx_q *uq; 2223 struct umtx_pi *pi, *new_pi; 2224 uint32_t id, old_owner, owner, old; 2225 int error, rv; 2226 2227 id = td->td_tid; 2228 uq = td->td_umtxq; 2229 2230 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2231 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2232 &uq->uq_key)) != 0) 2233 return (error); 2234 2235 if (timeout != NULL) 2236 umtx_abs_timeout_init2(&timo, timeout); 2237 2238 umtxq_lock(&uq->uq_key); 2239 pi = umtx_pi_lookup(&uq->uq_key); 2240 if (pi == NULL) { 2241 new_pi = umtx_pi_alloc(M_NOWAIT); 2242 if (new_pi == NULL) { 2243 umtxq_unlock(&uq->uq_key); 2244 new_pi = umtx_pi_alloc(M_WAITOK); 2245 umtxq_lock(&uq->uq_key); 2246 pi = umtx_pi_lookup(&uq->uq_key); 2247 if (pi != NULL) { 2248 umtx_pi_free(new_pi); 2249 new_pi = NULL; 2250 } 2251 } 2252 if (new_pi != NULL) { 2253 new_pi->pi_key = uq->uq_key; 2254 umtx_pi_insert(new_pi); 2255 pi = new_pi; 2256 } 2257 } 2258 umtx_pi_ref(pi); 2259 umtxq_unlock(&uq->uq_key); 2260 2261 /* 2262 * Care must be exercised when dealing with umtx structure. It 2263 * can fault on any access. 2264 */ 2265 for (;;) { 2266 /* 2267 * Try the uncontested case. This should be done in userland. 2268 */ 2269 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2270 /* The address was invalid. */ 2271 if (rv == -1) { 2272 error = EFAULT; 2273 break; 2274 } 2275 /* The acquire succeeded. */ 2276 if (rv == 0) { 2277 MPASS(owner == UMUTEX_UNOWNED); 2278 error = 0; 2279 break; 2280 } 2281 2282 if (owner == UMUTEX_RB_NOTRECOV) { 2283 error = ENOTRECOVERABLE; 2284 break; 2285 } 2286 2287 /* 2288 * Nobody owns it, but the acquire failed. This can happen 2289 * with ll/sc atomics. 2290 */ 2291 if (owner == UMUTEX_UNOWNED) { 2292 error = thread_check_susp(td, true); 2293 if (error != 0) 2294 break; 2295 continue; 2296 } 2297 2298 /* 2299 * Avoid overwriting a possible error from sleep due 2300 * to the pending signal with suspension check result. 2301 */ 2302 if (error == 0) { 2303 error = thread_check_susp(td, true); 2304 if (error != 0) 2305 break; 2306 } 2307 2308 /* If no one owns it but it is contested try to acquire it. */ 2309 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2310 old_owner = owner; 2311 rv = casueword32(&m->m_owner, owner, &owner, 2312 id | UMUTEX_CONTESTED); 2313 /* The address was invalid. */ 2314 if (rv == -1) { 2315 error = EFAULT; 2316 break; 2317 } 2318 if (rv == 1) { 2319 if (error == 0) { 2320 error = thread_check_susp(td, true); 2321 if (error != 0) 2322 break; 2323 } 2324 2325 /* 2326 * If this failed the lock could 2327 * changed, restart. 2328 */ 2329 continue; 2330 } 2331 2332 MPASS(rv == 0); 2333 MPASS(owner == old_owner); 2334 umtxq_lock(&uq->uq_key); 2335 umtxq_busy(&uq->uq_key); 2336 error = umtx_pi_claim(pi, td); 2337 umtxq_unbusy(&uq->uq_key); 2338 umtxq_unlock(&uq->uq_key); 2339 if (error != 0) { 2340 /* 2341 * Since we're going to return an 2342 * error, restore the m_owner to its 2343 * previous, unowned state to avoid 2344 * compounding the problem. 2345 */ 2346 (void)casuword32(&m->m_owner, 2347 id | UMUTEX_CONTESTED, old_owner); 2348 } 2349 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2350 error = EOWNERDEAD; 2351 break; 2352 } 2353 2354 if ((owner & ~UMUTEX_CONTESTED) == id) { 2355 error = EDEADLK; 2356 break; 2357 } 2358 2359 if (try != 0) { 2360 error = EBUSY; 2361 break; 2362 } 2363 2364 /* 2365 * If we caught a signal, we have retried and now 2366 * exit immediately. 2367 */ 2368 if (error != 0) 2369 break; 2370 2371 umtxq_lock(&uq->uq_key); 2372 umtxq_busy(&uq->uq_key); 2373 umtxq_unlock(&uq->uq_key); 2374 2375 /* 2376 * Set the contested bit so that a release in user space 2377 * knows to use the system call for unlock. If this fails 2378 * either some one else has acquired the lock or it has been 2379 * released. 2380 */ 2381 rv = casueword32(&m->m_owner, owner, &old, owner | 2382 UMUTEX_CONTESTED); 2383 2384 /* The address was invalid. */ 2385 if (rv == -1) { 2386 umtxq_unbusy_unlocked(&uq->uq_key); 2387 error = EFAULT; 2388 break; 2389 } 2390 if (rv == 1) { 2391 umtxq_unbusy_unlocked(&uq->uq_key); 2392 error = thread_check_susp(td, true); 2393 if (error != 0) 2394 break; 2395 2396 /* 2397 * The lock changed and we need to retry or we 2398 * lost a race to the thread unlocking the 2399 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2400 * value for owner is impossible there. 2401 */ 2402 continue; 2403 } 2404 2405 umtxq_lock(&uq->uq_key); 2406 2407 /* We set the contested bit, sleep. */ 2408 MPASS(old == owner); 2409 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2410 "umtxpi", timeout == NULL ? NULL : &timo, 2411 (flags & USYNC_PROCESS_SHARED) != 0); 2412 if (error != 0) 2413 continue; 2414 2415 error = thread_check_susp(td, false); 2416 if (error != 0) 2417 break; 2418 } 2419 2420 umtxq_lock(&uq->uq_key); 2421 umtx_pi_unref(pi); 2422 umtxq_unlock(&uq->uq_key); 2423 2424 umtx_key_release(&uq->uq_key); 2425 return (error); 2426 } 2427 2428 /* 2429 * Unlock a PI mutex. 2430 */ 2431 static int 2432 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2433 { 2434 struct umtx_key key; 2435 uint32_t id, new_owner, old, owner; 2436 int count, error; 2437 2438 id = td->td_tid; 2439 2440 usrloop: 2441 /* 2442 * Make sure we own this mtx. 2443 */ 2444 error = fueword32(&m->m_owner, &owner); 2445 if (error == -1) 2446 return (EFAULT); 2447 2448 if ((owner & ~UMUTEX_CONTESTED) != id) 2449 return (EPERM); 2450 2451 new_owner = umtx_unlock_val(flags, rb); 2452 2453 /* This should be done in userland */ 2454 if ((owner & UMUTEX_CONTESTED) == 0) { 2455 error = casueword32(&m->m_owner, owner, &old, new_owner); 2456 if (error == -1) 2457 return (EFAULT); 2458 if (error == 1) { 2459 error = thread_check_susp(td, true); 2460 if (error != 0) 2461 return (error); 2462 goto usrloop; 2463 } 2464 if (old == owner) 2465 return (0); 2466 owner = old; 2467 } 2468 2469 /* We should only ever be in here for contested locks */ 2470 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2471 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2472 &key)) != 0) 2473 return (error); 2474 2475 umtxq_lock(&key); 2476 umtxq_busy(&key); 2477 error = umtx_pi_drop(td, &key, rb, &count); 2478 if (error != 0) { 2479 umtxq_unbusy(&key); 2480 umtxq_unlock(&key); 2481 umtx_key_release(&key); 2482 /* userland messed the mutex */ 2483 return (error); 2484 } 2485 umtxq_unlock(&key); 2486 2487 /* 2488 * When unlocking the umtx, it must be marked as unowned if 2489 * there is zero or one thread only waiting for it. 2490 * Otherwise, it must be marked as contested. 2491 */ 2492 2493 if (count > 1) 2494 new_owner |= UMUTEX_CONTESTED; 2495 again: 2496 error = casueword32(&m->m_owner, owner, &old, new_owner); 2497 if (error == 1) { 2498 error = thread_check_susp(td, false); 2499 if (error == 0) 2500 goto again; 2501 } 2502 umtxq_unbusy_unlocked(&key); 2503 umtx_key_release(&key); 2504 if (error == -1) 2505 return (EFAULT); 2506 if (error == 0 && old != owner) 2507 return (EINVAL); 2508 return (error); 2509 } 2510 2511 /* 2512 * Lock a PP mutex. 2513 */ 2514 static int 2515 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2516 struct _umtx_time *timeout, int try) 2517 { 2518 struct umtx_abs_timeout timo; 2519 struct umtx_q *uq, *uq2; 2520 struct umtx_pi *pi; 2521 uint32_t ceiling; 2522 uint32_t owner, id; 2523 int error, pri, old_inherited_pri, new_pri, rv; 2524 bool su; 2525 2526 id = td->td_tid; 2527 uq = td->td_umtxq; 2528 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2529 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2530 &uq->uq_key)) != 0) 2531 return (error); 2532 2533 if (timeout != NULL) 2534 umtx_abs_timeout_init2(&timo, timeout); 2535 2536 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2537 for (;;) { 2538 old_inherited_pri = uq->uq_inherited_pri; 2539 umtxq_lock(&uq->uq_key); 2540 umtxq_busy(&uq->uq_key); 2541 umtxq_unlock(&uq->uq_key); 2542 2543 rv = fueword32(&m->m_ceilings[0], &ceiling); 2544 if (rv == -1) { 2545 error = EFAULT; 2546 goto out; 2547 } 2548 ceiling = RTP_PRIO_MAX - ceiling; 2549 if (ceiling > RTP_PRIO_MAX) { 2550 error = EINVAL; 2551 goto out; 2552 } 2553 new_pri = PRI_MIN_REALTIME + ceiling; 2554 2555 if (td->td_base_user_pri < new_pri) { 2556 error = EINVAL; 2557 goto out; 2558 } 2559 if (su) { 2560 mtx_lock(&umtx_lock); 2561 if (new_pri < uq->uq_inherited_pri) { 2562 uq->uq_inherited_pri = new_pri; 2563 thread_lock(td); 2564 if (new_pri < UPRI(td)) 2565 sched_lend_user_prio(td, new_pri); 2566 thread_unlock(td); 2567 } 2568 mtx_unlock(&umtx_lock); 2569 } 2570 2571 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2572 id | UMUTEX_CONTESTED); 2573 /* The address was invalid. */ 2574 if (rv == -1) { 2575 error = EFAULT; 2576 break; 2577 } 2578 if (rv == 0) { 2579 MPASS(owner == UMUTEX_CONTESTED); 2580 error = 0; 2581 break; 2582 } 2583 /* rv == 1 */ 2584 if (owner == UMUTEX_RB_OWNERDEAD) { 2585 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2586 &owner, id | UMUTEX_CONTESTED); 2587 if (rv == -1) { 2588 error = EFAULT; 2589 break; 2590 } 2591 if (rv == 0) { 2592 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2593 error = EOWNERDEAD; /* success */ 2594 break; 2595 } 2596 2597 /* 2598 * rv == 1, only check for suspension if we 2599 * did not already catched a signal. If we 2600 * get an error from the check, the same 2601 * condition is checked by the umtxq_sleep() 2602 * call below, so we should obliterate the 2603 * error to not skip the last loop iteration. 2604 */ 2605 if (error == 0) { 2606 error = thread_check_susp(td, false); 2607 if (error == 0) { 2608 if (try != 0) 2609 error = EBUSY; 2610 else 2611 continue; 2612 } 2613 error = 0; 2614 } 2615 } else if (owner == UMUTEX_RB_NOTRECOV) { 2616 error = ENOTRECOVERABLE; 2617 } 2618 2619 if (try != 0) 2620 error = EBUSY; 2621 2622 /* 2623 * If we caught a signal, we have retried and now 2624 * exit immediately. 2625 */ 2626 if (error != 0) 2627 break; 2628 2629 umtxq_lock(&uq->uq_key); 2630 umtxq_insert(uq); 2631 umtxq_unbusy(&uq->uq_key); 2632 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2633 NULL : &timo); 2634 umtxq_remove(uq); 2635 umtxq_unlock(&uq->uq_key); 2636 2637 mtx_lock(&umtx_lock); 2638 uq->uq_inherited_pri = old_inherited_pri; 2639 pri = PRI_MAX; 2640 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2641 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2642 if (uq2 != NULL) { 2643 if (pri > UPRI(uq2->uq_thread)) 2644 pri = UPRI(uq2->uq_thread); 2645 } 2646 } 2647 if (pri > uq->uq_inherited_pri) 2648 pri = uq->uq_inherited_pri; 2649 thread_lock(td); 2650 sched_lend_user_prio(td, pri); 2651 thread_unlock(td); 2652 mtx_unlock(&umtx_lock); 2653 } 2654 2655 if (error != 0 && error != EOWNERDEAD) { 2656 mtx_lock(&umtx_lock); 2657 uq->uq_inherited_pri = old_inherited_pri; 2658 pri = PRI_MAX; 2659 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2660 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2661 if (uq2 != NULL) { 2662 if (pri > UPRI(uq2->uq_thread)) 2663 pri = UPRI(uq2->uq_thread); 2664 } 2665 } 2666 if (pri > uq->uq_inherited_pri) 2667 pri = uq->uq_inherited_pri; 2668 thread_lock(td); 2669 sched_lend_user_prio(td, pri); 2670 thread_unlock(td); 2671 mtx_unlock(&umtx_lock); 2672 } 2673 2674 out: 2675 umtxq_unbusy_unlocked(&uq->uq_key); 2676 umtx_key_release(&uq->uq_key); 2677 return (error); 2678 } 2679 2680 /* 2681 * Unlock a PP mutex. 2682 */ 2683 static int 2684 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2685 { 2686 struct umtx_key key; 2687 struct umtx_q *uq, *uq2; 2688 struct umtx_pi *pi; 2689 uint32_t id, owner, rceiling; 2690 int error, pri, new_inherited_pri; 2691 bool su; 2692 2693 id = td->td_tid; 2694 uq = td->td_umtxq; 2695 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2696 2697 /* 2698 * Make sure we own this mtx. 2699 */ 2700 error = fueword32(&m->m_owner, &owner); 2701 if (error == -1) 2702 return (EFAULT); 2703 2704 if ((owner & ~UMUTEX_CONTESTED) != id) 2705 return (EPERM); 2706 2707 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2708 if (error != 0) 2709 return (error); 2710 2711 if (rceiling == -1) 2712 new_inherited_pri = PRI_MAX; 2713 else { 2714 rceiling = RTP_PRIO_MAX - rceiling; 2715 if (rceiling > RTP_PRIO_MAX) 2716 return (EINVAL); 2717 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2718 } 2719 2720 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2721 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2722 &key)) != 0) 2723 return (error); 2724 umtxq_lock(&key); 2725 umtxq_busy(&key); 2726 umtxq_unlock(&key); 2727 /* 2728 * For priority protected mutex, always set unlocked state 2729 * to UMUTEX_CONTESTED, so that userland always enters kernel 2730 * to lock the mutex, it is necessary because thread priority 2731 * has to be adjusted for such mutex. 2732 */ 2733 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2734 UMUTEX_CONTESTED); 2735 2736 umtxq_lock(&key); 2737 if (error == 0) 2738 umtxq_signal(&key, 1); 2739 umtxq_unbusy(&key); 2740 umtxq_unlock(&key); 2741 2742 if (error == -1) 2743 error = EFAULT; 2744 else { 2745 mtx_lock(&umtx_lock); 2746 if (su || new_inherited_pri == PRI_MAX) 2747 uq->uq_inherited_pri = new_inherited_pri; 2748 pri = PRI_MAX; 2749 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2750 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2751 if (uq2 != NULL) { 2752 if (pri > UPRI(uq2->uq_thread)) 2753 pri = UPRI(uq2->uq_thread); 2754 } 2755 } 2756 if (pri > uq->uq_inherited_pri) 2757 pri = uq->uq_inherited_pri; 2758 thread_lock(td); 2759 sched_lend_user_prio(td, pri); 2760 thread_unlock(td); 2761 mtx_unlock(&umtx_lock); 2762 } 2763 umtx_key_release(&key); 2764 return (error); 2765 } 2766 2767 static int 2768 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2769 uint32_t *old_ceiling) 2770 { 2771 struct umtx_q *uq; 2772 uint32_t flags, id, owner, save_ceiling; 2773 int error, rv, rv1; 2774 2775 error = fueword32(&m->m_flags, &flags); 2776 if (error == -1) 2777 return (EFAULT); 2778 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2779 return (EINVAL); 2780 if (ceiling > RTP_PRIO_MAX) 2781 return (EINVAL); 2782 id = td->td_tid; 2783 uq = td->td_umtxq; 2784 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2785 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2786 &uq->uq_key)) != 0) 2787 return (error); 2788 for (;;) { 2789 umtxq_lock(&uq->uq_key); 2790 umtxq_busy(&uq->uq_key); 2791 umtxq_unlock(&uq->uq_key); 2792 2793 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2794 if (rv == -1) { 2795 error = EFAULT; 2796 break; 2797 } 2798 2799 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2800 id | UMUTEX_CONTESTED); 2801 if (rv == -1) { 2802 error = EFAULT; 2803 break; 2804 } 2805 2806 if (rv == 0) { 2807 MPASS(owner == UMUTEX_CONTESTED); 2808 rv = suword32(&m->m_ceilings[0], ceiling); 2809 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2810 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2811 break; 2812 } 2813 2814 if ((owner & ~UMUTEX_CONTESTED) == id) { 2815 rv = suword32(&m->m_ceilings[0], ceiling); 2816 error = rv == 0 ? 0 : EFAULT; 2817 break; 2818 } 2819 2820 if (owner == UMUTEX_RB_OWNERDEAD) { 2821 error = EOWNERDEAD; 2822 break; 2823 } else if (owner == UMUTEX_RB_NOTRECOV) { 2824 error = ENOTRECOVERABLE; 2825 break; 2826 } 2827 2828 /* 2829 * If we caught a signal, we have retried and now 2830 * exit immediately. 2831 */ 2832 if (error != 0) 2833 break; 2834 2835 /* 2836 * We set the contested bit, sleep. Otherwise the lock changed 2837 * and we need to retry or we lost a race to the thread 2838 * unlocking the umtx. 2839 */ 2840 umtxq_lock(&uq->uq_key); 2841 umtxq_insert(uq); 2842 umtxq_unbusy(&uq->uq_key); 2843 error = umtxq_sleep(uq, "umtxpp", NULL); 2844 umtxq_remove(uq); 2845 umtxq_unlock(&uq->uq_key); 2846 } 2847 umtxq_lock(&uq->uq_key); 2848 if (error == 0) 2849 umtxq_signal(&uq->uq_key, INT_MAX); 2850 umtxq_unbusy(&uq->uq_key); 2851 umtxq_unlock(&uq->uq_key); 2852 umtx_key_release(&uq->uq_key); 2853 if (error == 0 && old_ceiling != NULL) { 2854 rv = suword32(old_ceiling, save_ceiling); 2855 error = rv == 0 ? 0 : EFAULT; 2856 } 2857 return (error); 2858 } 2859 2860 /* 2861 * Lock a userland POSIX mutex. 2862 */ 2863 static int 2864 do_lock_umutex(struct thread *td, struct umutex *m, 2865 struct _umtx_time *timeout, int mode) 2866 { 2867 uint32_t flags; 2868 int error; 2869 2870 error = fueword32(&m->m_flags, &flags); 2871 if (error == -1) 2872 return (EFAULT); 2873 2874 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2875 case 0: 2876 error = do_lock_normal(td, m, flags, timeout, mode); 2877 break; 2878 case UMUTEX_PRIO_INHERIT: 2879 error = do_lock_pi(td, m, flags, timeout, mode); 2880 break; 2881 case UMUTEX_PRIO_PROTECT: 2882 error = do_lock_pp(td, m, flags, timeout, mode); 2883 break; 2884 default: 2885 return (EINVAL); 2886 } 2887 if (timeout == NULL) { 2888 if (error == EINTR && mode != _UMUTEX_WAIT) 2889 error = ERESTART; 2890 } else { 2891 /* Timed-locking is not restarted. */ 2892 if (error == ERESTART) 2893 error = EINTR; 2894 } 2895 return (error); 2896 } 2897 2898 /* 2899 * Unlock a userland POSIX mutex. 2900 */ 2901 static int 2902 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2903 { 2904 uint32_t flags; 2905 int error; 2906 2907 error = fueword32(&m->m_flags, &flags); 2908 if (error == -1) 2909 return (EFAULT); 2910 2911 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2912 case 0: 2913 return (do_unlock_normal(td, m, flags, rb)); 2914 case UMUTEX_PRIO_INHERIT: 2915 return (do_unlock_pi(td, m, flags, rb)); 2916 case UMUTEX_PRIO_PROTECT: 2917 return (do_unlock_pp(td, m, flags, rb)); 2918 } 2919 2920 return (EINVAL); 2921 } 2922 2923 static int 2924 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2925 struct timespec *timeout, u_long wflags) 2926 { 2927 struct umtx_abs_timeout timo; 2928 struct umtx_q *uq; 2929 uint32_t flags, clockid, hasw; 2930 int error; 2931 2932 uq = td->td_umtxq; 2933 error = fueword32(&cv->c_flags, &flags); 2934 if (error == -1) 2935 return (EFAULT); 2936 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2937 if (error != 0) 2938 return (error); 2939 2940 if ((wflags & CVWAIT_CLOCKID) != 0) { 2941 error = fueword32(&cv->c_clockid, &clockid); 2942 if (error == -1) { 2943 umtx_key_release(&uq->uq_key); 2944 return (EFAULT); 2945 } 2946 if (clockid < CLOCK_REALTIME || 2947 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2948 /* hmm, only HW clock id will work. */ 2949 umtx_key_release(&uq->uq_key); 2950 return (EINVAL); 2951 } 2952 } else { 2953 clockid = CLOCK_REALTIME; 2954 } 2955 2956 umtxq_lock(&uq->uq_key); 2957 umtxq_busy(&uq->uq_key); 2958 umtxq_insert(uq); 2959 umtxq_unlock(&uq->uq_key); 2960 2961 /* 2962 * Set c_has_waiters to 1 before releasing user mutex, also 2963 * don't modify cache line when unnecessary. 2964 */ 2965 error = fueword32(&cv->c_has_waiters, &hasw); 2966 if (error == 0 && hasw == 0) 2967 error = suword32(&cv->c_has_waiters, 1); 2968 if (error != 0) { 2969 umtxq_lock(&uq->uq_key); 2970 umtxq_remove(uq); 2971 umtxq_unbusy(&uq->uq_key); 2972 error = EFAULT; 2973 goto out; 2974 } 2975 2976 umtxq_unbusy_unlocked(&uq->uq_key); 2977 2978 error = do_unlock_umutex(td, m, false); 2979 2980 if (timeout != NULL) 2981 umtx_abs_timeout_init(&timo, clockid, 2982 (wflags & CVWAIT_ABSTIME) != 0, timeout); 2983 2984 umtxq_lock(&uq->uq_key); 2985 if (error == 0) { 2986 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2987 NULL : &timo); 2988 } 2989 2990 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2991 error = 0; 2992 else { 2993 /* 2994 * This must be timeout,interrupted by signal or 2995 * surprious wakeup, clear c_has_waiter flag when 2996 * necessary. 2997 */ 2998 umtxq_busy(&uq->uq_key); 2999 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 3000 int oldlen = uq->uq_cur_queue->length; 3001 umtxq_remove(uq); 3002 if (oldlen == 1) { 3003 umtxq_unlock(&uq->uq_key); 3004 if (suword32(&cv->c_has_waiters, 0) != 0 && 3005 error == 0) 3006 error = EFAULT; 3007 umtxq_lock(&uq->uq_key); 3008 } 3009 } 3010 umtxq_unbusy(&uq->uq_key); 3011 if (error == ERESTART) 3012 error = EINTR; 3013 } 3014 out: 3015 umtxq_unlock(&uq->uq_key); 3016 umtx_key_release(&uq->uq_key); 3017 return (error); 3018 } 3019 3020 /* 3021 * Signal a userland condition variable. 3022 */ 3023 static int 3024 do_cv_signal(struct thread *td, struct ucond *cv) 3025 { 3026 struct umtx_key key; 3027 int error, cnt, nwake; 3028 uint32_t flags; 3029 3030 error = fueword32(&cv->c_flags, &flags); 3031 if (error == -1) 3032 return (EFAULT); 3033 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3034 return (error); 3035 umtxq_lock(&key); 3036 umtxq_busy(&key); 3037 cnt = umtxq_count(&key); 3038 nwake = umtxq_signal(&key, 1); 3039 if (cnt <= nwake) { 3040 umtxq_unlock(&key); 3041 error = suword32(&cv->c_has_waiters, 0); 3042 if (error == -1) 3043 error = EFAULT; 3044 umtxq_lock(&key); 3045 } 3046 umtxq_unbusy(&key); 3047 umtxq_unlock(&key); 3048 umtx_key_release(&key); 3049 return (error); 3050 } 3051 3052 static int 3053 do_cv_broadcast(struct thread *td, struct ucond *cv) 3054 { 3055 struct umtx_key key; 3056 int error; 3057 uint32_t flags; 3058 3059 error = fueword32(&cv->c_flags, &flags); 3060 if (error == -1) 3061 return (EFAULT); 3062 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3063 return (error); 3064 3065 umtxq_lock(&key); 3066 umtxq_busy(&key); 3067 umtxq_signal(&key, INT_MAX); 3068 umtxq_unlock(&key); 3069 3070 error = suword32(&cv->c_has_waiters, 0); 3071 if (error == -1) 3072 error = EFAULT; 3073 3074 umtxq_unbusy_unlocked(&key); 3075 3076 umtx_key_release(&key); 3077 return (error); 3078 } 3079 3080 static int 3081 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3082 struct _umtx_time *timeout) 3083 { 3084 struct umtx_abs_timeout timo; 3085 struct umtx_q *uq; 3086 uint32_t flags, wrflags; 3087 int32_t state, oldstate; 3088 int32_t blocked_readers; 3089 int error, error1, rv; 3090 3091 uq = td->td_umtxq; 3092 error = fueword32(&rwlock->rw_flags, &flags); 3093 if (error == -1) 3094 return (EFAULT); 3095 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3096 if (error != 0) 3097 return (error); 3098 3099 if (timeout != NULL) 3100 umtx_abs_timeout_init2(&timo, timeout); 3101 3102 wrflags = URWLOCK_WRITE_OWNER; 3103 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3104 wrflags |= URWLOCK_WRITE_WAITERS; 3105 3106 for (;;) { 3107 rv = fueword32(&rwlock->rw_state, &state); 3108 if (rv == -1) { 3109 umtx_key_release(&uq->uq_key); 3110 return (EFAULT); 3111 } 3112 3113 /* try to lock it */ 3114 while (!(state & wrflags)) { 3115 if (__predict_false(URWLOCK_READER_COUNT(state) == 3116 URWLOCK_MAX_READERS)) { 3117 umtx_key_release(&uq->uq_key); 3118 return (EAGAIN); 3119 } 3120 rv = casueword32(&rwlock->rw_state, state, 3121 &oldstate, state + 1); 3122 if (rv == -1) { 3123 umtx_key_release(&uq->uq_key); 3124 return (EFAULT); 3125 } 3126 if (rv == 0) { 3127 MPASS(oldstate == state); 3128 umtx_key_release(&uq->uq_key); 3129 return (0); 3130 } 3131 error = thread_check_susp(td, true); 3132 if (error != 0) 3133 break; 3134 state = oldstate; 3135 } 3136 3137 if (error) 3138 break; 3139 3140 /* grab monitor lock */ 3141 umtxq_lock(&uq->uq_key); 3142 umtxq_busy(&uq->uq_key); 3143 umtxq_unlock(&uq->uq_key); 3144 3145 /* 3146 * re-read the state, in case it changed between the try-lock above 3147 * and the check below 3148 */ 3149 rv = fueword32(&rwlock->rw_state, &state); 3150 if (rv == -1) 3151 error = EFAULT; 3152 3153 /* set read contention bit */ 3154 while (error == 0 && (state & wrflags) && 3155 !(state & URWLOCK_READ_WAITERS)) { 3156 rv = casueword32(&rwlock->rw_state, state, 3157 &oldstate, state | URWLOCK_READ_WAITERS); 3158 if (rv == -1) { 3159 error = EFAULT; 3160 break; 3161 } 3162 if (rv == 0) { 3163 MPASS(oldstate == state); 3164 goto sleep; 3165 } 3166 state = oldstate; 3167 error = thread_check_susp(td, false); 3168 if (error != 0) 3169 break; 3170 } 3171 if (error != 0) { 3172 umtxq_unbusy_unlocked(&uq->uq_key); 3173 break; 3174 } 3175 3176 /* state is changed while setting flags, restart */ 3177 if (!(state & wrflags)) { 3178 umtxq_unbusy_unlocked(&uq->uq_key); 3179 error = thread_check_susp(td, true); 3180 if (error != 0) 3181 break; 3182 continue; 3183 } 3184 3185 sleep: 3186 /* 3187 * Contention bit is set, before sleeping, increase 3188 * read waiter count. 3189 */ 3190 rv = fueword32(&rwlock->rw_blocked_readers, 3191 &blocked_readers); 3192 if (rv == 0) 3193 rv = suword32(&rwlock->rw_blocked_readers, 3194 blocked_readers + 1); 3195 if (rv == -1) { 3196 umtxq_unbusy_unlocked(&uq->uq_key); 3197 error = EFAULT; 3198 break; 3199 } 3200 3201 while (state & wrflags) { 3202 umtxq_lock(&uq->uq_key); 3203 umtxq_insert(uq); 3204 umtxq_unbusy(&uq->uq_key); 3205 3206 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3207 NULL : &timo); 3208 3209 umtxq_busy(&uq->uq_key); 3210 umtxq_remove(uq); 3211 umtxq_unlock(&uq->uq_key); 3212 if (error) 3213 break; 3214 rv = fueword32(&rwlock->rw_state, &state); 3215 if (rv == -1) { 3216 error = EFAULT; 3217 break; 3218 } 3219 } 3220 3221 /* decrease read waiter count, and may clear read contention bit */ 3222 rv = fueword32(&rwlock->rw_blocked_readers, 3223 &blocked_readers); 3224 if (rv == 0) 3225 rv = suword32(&rwlock->rw_blocked_readers, 3226 blocked_readers - 1); 3227 if (rv == -1) { 3228 umtxq_unbusy_unlocked(&uq->uq_key); 3229 error = EFAULT; 3230 break; 3231 } 3232 if (blocked_readers == 1) { 3233 rv = fueword32(&rwlock->rw_state, &state); 3234 if (rv == -1) { 3235 umtxq_unbusy_unlocked(&uq->uq_key); 3236 error = EFAULT; 3237 break; 3238 } 3239 for (;;) { 3240 rv = casueword32(&rwlock->rw_state, state, 3241 &oldstate, state & ~URWLOCK_READ_WAITERS); 3242 if (rv == -1) { 3243 error = EFAULT; 3244 break; 3245 } 3246 if (rv == 0) { 3247 MPASS(oldstate == state); 3248 break; 3249 } 3250 state = oldstate; 3251 error1 = thread_check_susp(td, false); 3252 if (error1 != 0) { 3253 if (error == 0) 3254 error = error1; 3255 break; 3256 } 3257 } 3258 } 3259 3260 umtxq_unbusy_unlocked(&uq->uq_key); 3261 if (error != 0) 3262 break; 3263 } 3264 umtx_key_release(&uq->uq_key); 3265 if (error == ERESTART) 3266 error = EINTR; 3267 return (error); 3268 } 3269 3270 static int 3271 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3272 { 3273 struct umtx_abs_timeout timo; 3274 struct umtx_q *uq; 3275 uint32_t flags; 3276 int32_t state, oldstate; 3277 int32_t blocked_writers; 3278 int32_t blocked_readers; 3279 int error, error1, rv; 3280 3281 uq = td->td_umtxq; 3282 error = fueword32(&rwlock->rw_flags, &flags); 3283 if (error == -1) 3284 return (EFAULT); 3285 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3286 if (error != 0) 3287 return (error); 3288 3289 if (timeout != NULL) 3290 umtx_abs_timeout_init2(&timo, timeout); 3291 3292 blocked_readers = 0; 3293 for (;;) { 3294 rv = fueword32(&rwlock->rw_state, &state); 3295 if (rv == -1) { 3296 umtx_key_release(&uq->uq_key); 3297 return (EFAULT); 3298 } 3299 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3300 URWLOCK_READER_COUNT(state) == 0) { 3301 rv = casueword32(&rwlock->rw_state, state, 3302 &oldstate, state | URWLOCK_WRITE_OWNER); 3303 if (rv == -1) { 3304 umtx_key_release(&uq->uq_key); 3305 return (EFAULT); 3306 } 3307 if (rv == 0) { 3308 MPASS(oldstate == state); 3309 umtx_key_release(&uq->uq_key); 3310 return (0); 3311 } 3312 state = oldstate; 3313 error = thread_check_susp(td, true); 3314 if (error != 0) 3315 break; 3316 } 3317 3318 if (error) { 3319 if ((state & (URWLOCK_WRITE_OWNER | 3320 URWLOCK_WRITE_WAITERS)) == 0 && 3321 blocked_readers != 0) { 3322 umtxq_lock(&uq->uq_key); 3323 umtxq_busy(&uq->uq_key); 3324 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3325 UMTX_SHARED_QUEUE); 3326 umtxq_unbusy(&uq->uq_key); 3327 umtxq_unlock(&uq->uq_key); 3328 } 3329 3330 break; 3331 } 3332 3333 /* grab monitor lock */ 3334 umtxq_lock(&uq->uq_key); 3335 umtxq_busy(&uq->uq_key); 3336 umtxq_unlock(&uq->uq_key); 3337 3338 /* 3339 * Re-read the state, in case it changed between the 3340 * try-lock above and the check below. 3341 */ 3342 rv = fueword32(&rwlock->rw_state, &state); 3343 if (rv == -1) 3344 error = EFAULT; 3345 3346 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3347 URWLOCK_READER_COUNT(state) != 0) && 3348 (state & URWLOCK_WRITE_WAITERS) == 0) { 3349 rv = casueword32(&rwlock->rw_state, state, 3350 &oldstate, state | URWLOCK_WRITE_WAITERS); 3351 if (rv == -1) { 3352 error = EFAULT; 3353 break; 3354 } 3355 if (rv == 0) { 3356 MPASS(oldstate == state); 3357 goto sleep; 3358 } 3359 state = oldstate; 3360 error = thread_check_susp(td, false); 3361 if (error != 0) 3362 break; 3363 } 3364 if (error != 0) { 3365 umtxq_unbusy_unlocked(&uq->uq_key); 3366 break; 3367 } 3368 3369 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3370 URWLOCK_READER_COUNT(state) == 0) { 3371 umtxq_unbusy_unlocked(&uq->uq_key); 3372 error = thread_check_susp(td, false); 3373 if (error != 0) 3374 break; 3375 continue; 3376 } 3377 sleep: 3378 rv = fueword32(&rwlock->rw_blocked_writers, 3379 &blocked_writers); 3380 if (rv == 0) 3381 rv = suword32(&rwlock->rw_blocked_writers, 3382 blocked_writers + 1); 3383 if (rv == -1) { 3384 umtxq_unbusy_unlocked(&uq->uq_key); 3385 error = EFAULT; 3386 break; 3387 } 3388 3389 while ((state & URWLOCK_WRITE_OWNER) || 3390 URWLOCK_READER_COUNT(state) != 0) { 3391 umtxq_lock(&uq->uq_key); 3392 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3393 umtxq_unbusy(&uq->uq_key); 3394 3395 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3396 NULL : &timo); 3397 3398 umtxq_busy(&uq->uq_key); 3399 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3400 umtxq_unlock(&uq->uq_key); 3401 if (error) 3402 break; 3403 rv = fueword32(&rwlock->rw_state, &state); 3404 if (rv == -1) { 3405 error = EFAULT; 3406 break; 3407 } 3408 } 3409 3410 rv = fueword32(&rwlock->rw_blocked_writers, 3411 &blocked_writers); 3412 if (rv == 0) 3413 rv = suword32(&rwlock->rw_blocked_writers, 3414 blocked_writers - 1); 3415 if (rv == -1) { 3416 umtxq_unbusy_unlocked(&uq->uq_key); 3417 error = EFAULT; 3418 break; 3419 } 3420 if (blocked_writers == 1) { 3421 rv = fueword32(&rwlock->rw_state, &state); 3422 if (rv == -1) { 3423 umtxq_unbusy_unlocked(&uq->uq_key); 3424 error = EFAULT; 3425 break; 3426 } 3427 for (;;) { 3428 rv = casueword32(&rwlock->rw_state, state, 3429 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3430 if (rv == -1) { 3431 error = EFAULT; 3432 break; 3433 } 3434 if (rv == 0) { 3435 MPASS(oldstate == state); 3436 break; 3437 } 3438 state = oldstate; 3439 error1 = thread_check_susp(td, false); 3440 /* 3441 * We are leaving the URWLOCK_WRITE_WAITERS 3442 * behind, but this should not harm the 3443 * correctness. 3444 */ 3445 if (error1 != 0) { 3446 if (error == 0) 3447 error = error1; 3448 break; 3449 } 3450 } 3451 rv = fueword32(&rwlock->rw_blocked_readers, 3452 &blocked_readers); 3453 if (rv == -1) { 3454 umtxq_unbusy_unlocked(&uq->uq_key); 3455 error = EFAULT; 3456 break; 3457 } 3458 } else 3459 blocked_readers = 0; 3460 3461 umtxq_unbusy_unlocked(&uq->uq_key); 3462 } 3463 3464 umtx_key_release(&uq->uq_key); 3465 if (error == ERESTART) 3466 error = EINTR; 3467 return (error); 3468 } 3469 3470 static int 3471 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3472 { 3473 struct umtx_q *uq; 3474 uint32_t flags; 3475 int32_t state, oldstate; 3476 int error, rv, q, count; 3477 3478 uq = td->td_umtxq; 3479 error = fueword32(&rwlock->rw_flags, &flags); 3480 if (error == -1) 3481 return (EFAULT); 3482 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3483 if (error != 0) 3484 return (error); 3485 3486 error = fueword32(&rwlock->rw_state, &state); 3487 if (error == -1) { 3488 error = EFAULT; 3489 goto out; 3490 } 3491 if (state & URWLOCK_WRITE_OWNER) { 3492 for (;;) { 3493 rv = casueword32(&rwlock->rw_state, state, 3494 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3495 if (rv == -1) { 3496 error = EFAULT; 3497 goto out; 3498 } 3499 if (rv == 1) { 3500 state = oldstate; 3501 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3502 error = EPERM; 3503 goto out; 3504 } 3505 error = thread_check_susp(td, true); 3506 if (error != 0) 3507 goto out; 3508 } else 3509 break; 3510 } 3511 } else if (URWLOCK_READER_COUNT(state) != 0) { 3512 for (;;) { 3513 rv = casueword32(&rwlock->rw_state, state, 3514 &oldstate, state - 1); 3515 if (rv == -1) { 3516 error = EFAULT; 3517 goto out; 3518 } 3519 if (rv == 1) { 3520 state = oldstate; 3521 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3522 error = EPERM; 3523 goto out; 3524 } 3525 error = thread_check_susp(td, true); 3526 if (error != 0) 3527 goto out; 3528 } else 3529 break; 3530 } 3531 } else { 3532 error = EPERM; 3533 goto out; 3534 } 3535 3536 count = 0; 3537 3538 if (!(flags & URWLOCK_PREFER_READER)) { 3539 if (state & URWLOCK_WRITE_WAITERS) { 3540 count = 1; 3541 q = UMTX_EXCLUSIVE_QUEUE; 3542 } else if (state & URWLOCK_READ_WAITERS) { 3543 count = INT_MAX; 3544 q = UMTX_SHARED_QUEUE; 3545 } 3546 } else { 3547 if (state & URWLOCK_READ_WAITERS) { 3548 count = INT_MAX; 3549 q = UMTX_SHARED_QUEUE; 3550 } else if (state & URWLOCK_WRITE_WAITERS) { 3551 count = 1; 3552 q = UMTX_EXCLUSIVE_QUEUE; 3553 } 3554 } 3555 3556 if (count) { 3557 umtxq_lock(&uq->uq_key); 3558 umtxq_busy(&uq->uq_key); 3559 umtxq_signal_queue(&uq->uq_key, count, q); 3560 umtxq_unbusy(&uq->uq_key); 3561 umtxq_unlock(&uq->uq_key); 3562 } 3563 out: 3564 umtx_key_release(&uq->uq_key); 3565 return (error); 3566 } 3567 3568 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3569 static int 3570 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3571 { 3572 struct umtx_abs_timeout timo; 3573 struct umtx_q *uq; 3574 uint32_t flags, count, count1; 3575 int error, rv, rv1; 3576 3577 uq = td->td_umtxq; 3578 error = fueword32(&sem->_flags, &flags); 3579 if (error == -1) 3580 return (EFAULT); 3581 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3582 if (error != 0) 3583 return (error); 3584 3585 if (timeout != NULL) 3586 umtx_abs_timeout_init2(&timo, timeout); 3587 3588 again: 3589 umtxq_lock(&uq->uq_key); 3590 umtxq_busy(&uq->uq_key); 3591 umtxq_insert(uq); 3592 umtxq_unlock(&uq->uq_key); 3593 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3594 if (rv != -1) 3595 rv1 = fueword32(&sem->_count, &count); 3596 if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) { 3597 if (rv == 0) 3598 rv = suword32(&sem->_has_waiters, 0); 3599 umtxq_lock(&uq->uq_key); 3600 umtxq_unbusy(&uq->uq_key); 3601 umtxq_remove(uq); 3602 umtxq_unlock(&uq->uq_key); 3603 if (rv == -1 || rv1 == -1) { 3604 error = EFAULT; 3605 goto out; 3606 } 3607 if (count != 0) { 3608 error = 0; 3609 goto out; 3610 } 3611 MPASS(rv == 1 && count1 == 0); 3612 rv = thread_check_susp(td, true); 3613 if (rv == 0) 3614 goto again; 3615 error = rv; 3616 goto out; 3617 } 3618 umtxq_lock(&uq->uq_key); 3619 umtxq_unbusy(&uq->uq_key); 3620 3621 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3622 3623 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3624 error = 0; 3625 else { 3626 umtxq_remove(uq); 3627 /* A relative timeout cannot be restarted. */ 3628 if (error == ERESTART && timeout != NULL && 3629 (timeout->_flags & UMTX_ABSTIME) == 0) 3630 error = EINTR; 3631 } 3632 umtxq_unlock(&uq->uq_key); 3633 out: 3634 umtx_key_release(&uq->uq_key); 3635 return (error); 3636 } 3637 3638 /* 3639 * Signal a userland semaphore. 3640 */ 3641 static int 3642 do_sem_wake(struct thread *td, struct _usem *sem) 3643 { 3644 struct umtx_key key; 3645 int error, cnt; 3646 uint32_t flags; 3647 3648 error = fueword32(&sem->_flags, &flags); 3649 if (error == -1) 3650 return (EFAULT); 3651 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3652 return (error); 3653 umtxq_lock(&key); 3654 umtxq_busy(&key); 3655 cnt = umtxq_count(&key); 3656 if (cnt > 0) { 3657 /* 3658 * Check if count is greater than 0, this means the memory is 3659 * still being referenced by user code, so we can safely 3660 * update _has_waiters flag. 3661 */ 3662 if (cnt == 1) { 3663 umtxq_unlock(&key); 3664 error = suword32(&sem->_has_waiters, 0); 3665 umtxq_lock(&key); 3666 if (error == -1) 3667 error = EFAULT; 3668 } 3669 umtxq_signal(&key, 1); 3670 } 3671 umtxq_unbusy(&key); 3672 umtxq_unlock(&key); 3673 umtx_key_release(&key); 3674 return (error); 3675 } 3676 #endif 3677 3678 static int 3679 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3680 { 3681 struct umtx_abs_timeout timo; 3682 struct umtx_q *uq; 3683 uint32_t count, flags; 3684 int error, rv; 3685 3686 uq = td->td_umtxq; 3687 flags = fuword32(&sem->_flags); 3688 if (timeout != NULL) 3689 umtx_abs_timeout_init2(&timo, timeout); 3690 3691 again: 3692 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3693 if (error != 0) 3694 return (error); 3695 umtxq_lock(&uq->uq_key); 3696 umtxq_busy(&uq->uq_key); 3697 umtxq_insert(uq); 3698 umtxq_unlock(&uq->uq_key); 3699 rv = fueword32(&sem->_count, &count); 3700 if (rv == -1) { 3701 umtxq_lock(&uq->uq_key); 3702 umtxq_unbusy(&uq->uq_key); 3703 umtxq_remove(uq); 3704 umtxq_unlock(&uq->uq_key); 3705 umtx_key_release(&uq->uq_key); 3706 return (EFAULT); 3707 } 3708 for (;;) { 3709 if (USEM_COUNT(count) != 0) { 3710 umtxq_lock(&uq->uq_key); 3711 umtxq_unbusy(&uq->uq_key); 3712 umtxq_remove(uq); 3713 umtxq_unlock(&uq->uq_key); 3714 umtx_key_release(&uq->uq_key); 3715 return (0); 3716 } 3717 if (count == USEM_HAS_WAITERS) 3718 break; 3719 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3720 if (rv == 0) 3721 break; 3722 umtxq_lock(&uq->uq_key); 3723 umtxq_unbusy(&uq->uq_key); 3724 umtxq_remove(uq); 3725 umtxq_unlock(&uq->uq_key); 3726 umtx_key_release(&uq->uq_key); 3727 if (rv == -1) 3728 return (EFAULT); 3729 rv = thread_check_susp(td, true); 3730 if (rv != 0) 3731 return (rv); 3732 goto again; 3733 } 3734 umtxq_lock(&uq->uq_key); 3735 umtxq_unbusy(&uq->uq_key); 3736 3737 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3738 3739 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3740 error = 0; 3741 else { 3742 umtxq_remove(uq); 3743 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3744 /* A relative timeout cannot be restarted. */ 3745 if (error == ERESTART) 3746 error = EINTR; 3747 if (error == EINTR) { 3748 kern_clock_gettime(curthread, timo.clockid, 3749 &timo.cur); 3750 timespecsub(&timo.end, &timo.cur, 3751 &timeout->_timeout); 3752 } 3753 } 3754 } 3755 umtxq_unlock(&uq->uq_key); 3756 umtx_key_release(&uq->uq_key); 3757 return (error); 3758 } 3759 3760 /* 3761 * Signal a userland semaphore. 3762 */ 3763 static int 3764 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3765 { 3766 struct umtx_key key; 3767 int error, cnt, rv; 3768 uint32_t count, flags; 3769 3770 rv = fueword32(&sem->_flags, &flags); 3771 if (rv == -1) 3772 return (EFAULT); 3773 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3774 return (error); 3775 umtxq_lock(&key); 3776 umtxq_busy(&key); 3777 cnt = umtxq_count(&key); 3778 if (cnt > 0) { 3779 /* 3780 * If this was the last sleeping thread, clear the waiters 3781 * flag in _count. 3782 */ 3783 if (cnt == 1) { 3784 umtxq_unlock(&key); 3785 rv = fueword32(&sem->_count, &count); 3786 while (rv != -1 && count & USEM_HAS_WAITERS) { 3787 rv = casueword32(&sem->_count, count, &count, 3788 count & ~USEM_HAS_WAITERS); 3789 if (rv == 1) { 3790 rv = thread_check_susp(td, true); 3791 if (rv != 0) 3792 break; 3793 } 3794 } 3795 if (rv == -1) 3796 error = EFAULT; 3797 else if (rv > 0) { 3798 error = rv; 3799 } 3800 umtxq_lock(&key); 3801 } 3802 3803 umtxq_signal(&key, 1); 3804 } 3805 umtxq_unbusy(&key); 3806 umtxq_unlock(&key); 3807 umtx_key_release(&key); 3808 return (error); 3809 } 3810 3811 #ifdef COMPAT_FREEBSD10 3812 int 3813 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3814 { 3815 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3816 } 3817 3818 int 3819 freebsd10__umtx_unlock(struct thread *td, 3820 struct freebsd10__umtx_unlock_args *uap) 3821 { 3822 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3823 } 3824 #endif 3825 3826 inline int 3827 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3828 { 3829 int error; 3830 3831 error = copyin(uaddr, tsp, sizeof(*tsp)); 3832 if (error == 0) { 3833 if (!timespecvalid_interval(tsp)) 3834 error = EINVAL; 3835 } 3836 return (error); 3837 } 3838 3839 static inline int 3840 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3841 { 3842 int error; 3843 3844 if (size <= sizeof(tp->_timeout)) { 3845 tp->_clockid = CLOCK_REALTIME; 3846 tp->_flags = 0; 3847 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3848 } else 3849 error = copyin(uaddr, tp, sizeof(*tp)); 3850 if (error != 0) 3851 return (error); 3852 if (!timespecvalid_interval(&tp->_timeout)) 3853 return (EINVAL); 3854 return (0); 3855 } 3856 3857 static int 3858 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3859 struct umtx_robust_lists_params *rb) 3860 { 3861 3862 if (size > sizeof(*rb)) 3863 return (EINVAL); 3864 return (copyin(uaddr, rb, size)); 3865 } 3866 3867 static int 3868 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3869 { 3870 3871 /* 3872 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3873 * and we're only called if sz >= sizeof(timespec) as supplied in the 3874 * copyops. 3875 */ 3876 KASSERT(sz >= sizeof(*tsp), 3877 ("umtx_copyops specifies incorrect sizes")); 3878 3879 return (copyout(tsp, uaddr, sizeof(*tsp))); 3880 } 3881 3882 #ifdef COMPAT_FREEBSD10 3883 static int 3884 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3885 const struct umtx_copyops *ops) 3886 { 3887 struct timespec *ts, timeout; 3888 int error; 3889 3890 /* Allow a null timespec (wait forever). */ 3891 if (uap->uaddr2 == NULL) 3892 ts = NULL; 3893 else { 3894 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3895 if (error != 0) 3896 return (error); 3897 ts = &timeout; 3898 } 3899 #ifdef COMPAT_FREEBSD32 3900 if (ops->compat32) 3901 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3902 #endif 3903 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3904 } 3905 3906 static int 3907 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3908 const struct umtx_copyops *ops) 3909 { 3910 #ifdef COMPAT_FREEBSD32 3911 if (ops->compat32) 3912 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3913 #endif 3914 return (do_unlock_umtx(td, uap->obj, uap->val)); 3915 } 3916 #endif /* COMPAT_FREEBSD10 */ 3917 3918 #if !defined(COMPAT_FREEBSD10) 3919 static int 3920 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3921 const struct umtx_copyops *ops __unused) 3922 { 3923 return (EOPNOTSUPP); 3924 } 3925 #endif /* COMPAT_FREEBSD10 */ 3926 3927 static int 3928 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3929 const struct umtx_copyops *ops) 3930 { 3931 struct _umtx_time timeout, *tm_p; 3932 int error; 3933 3934 if (uap->uaddr2 == NULL) 3935 tm_p = NULL; 3936 else { 3937 error = ops->copyin_umtx_time( 3938 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3939 if (error != 0) 3940 return (error); 3941 tm_p = &timeout; 3942 } 3943 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3944 } 3945 3946 static int 3947 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3948 const struct umtx_copyops *ops) 3949 { 3950 struct _umtx_time timeout, *tm_p; 3951 int error; 3952 3953 if (uap->uaddr2 == NULL) 3954 tm_p = NULL; 3955 else { 3956 error = ops->copyin_umtx_time( 3957 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3958 if (error != 0) 3959 return (error); 3960 tm_p = &timeout; 3961 } 3962 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3963 } 3964 3965 static int 3966 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3967 const struct umtx_copyops *ops) 3968 { 3969 struct _umtx_time *tm_p, timeout; 3970 int error; 3971 3972 if (uap->uaddr2 == NULL) 3973 tm_p = NULL; 3974 else { 3975 error = ops->copyin_umtx_time( 3976 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3977 if (error != 0) 3978 return (error); 3979 tm_p = &timeout; 3980 } 3981 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3982 } 3983 3984 static int 3985 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3986 const struct umtx_copyops *ops __unused) 3987 { 3988 3989 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3990 } 3991 3992 #define BATCH_SIZE 128 3993 static int 3994 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3995 { 3996 char *uaddrs[BATCH_SIZE], **upp; 3997 int count, error, i, pos, tocopy; 3998 3999 upp = (char **)uap->obj; 4000 error = 0; 4001 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4002 pos += tocopy) { 4003 tocopy = MIN(count, BATCH_SIZE); 4004 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 4005 if (error != 0) 4006 break; 4007 for (i = 0; i < tocopy; ++i) { 4008 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 4009 } 4010 maybe_yield(); 4011 } 4012 return (error); 4013 } 4014 4015 static int 4016 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4017 { 4018 uint32_t uaddrs[BATCH_SIZE], *upp; 4019 int count, error, i, pos, tocopy; 4020 4021 upp = (uint32_t *)uap->obj; 4022 error = 0; 4023 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4024 pos += tocopy) { 4025 tocopy = MIN(count, BATCH_SIZE); 4026 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4027 if (error != 0) 4028 break; 4029 for (i = 0; i < tocopy; ++i) { 4030 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 4031 INT_MAX, 1); 4032 } 4033 maybe_yield(); 4034 } 4035 return (error); 4036 } 4037 4038 static int 4039 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 4040 const struct umtx_copyops *ops) 4041 { 4042 4043 if (ops->compat32) 4044 return (__umtx_op_nwake_private_compat32(td, uap)); 4045 return (__umtx_op_nwake_private_native(td, uap)); 4046 } 4047 4048 static int 4049 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4050 const struct umtx_copyops *ops __unused) 4051 { 4052 4053 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4054 } 4055 4056 static int 4057 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4058 const struct umtx_copyops *ops) 4059 { 4060 struct _umtx_time *tm_p, timeout; 4061 int error; 4062 4063 /* Allow a null timespec (wait forever). */ 4064 if (uap->uaddr2 == NULL) 4065 tm_p = NULL; 4066 else { 4067 error = ops->copyin_umtx_time( 4068 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4069 if (error != 0) 4070 return (error); 4071 tm_p = &timeout; 4072 } 4073 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4074 } 4075 4076 static int 4077 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4078 const struct umtx_copyops *ops __unused) 4079 { 4080 4081 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4082 } 4083 4084 static int 4085 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4086 const struct umtx_copyops *ops) 4087 { 4088 struct _umtx_time *tm_p, timeout; 4089 int error; 4090 4091 /* Allow a null timespec (wait forever). */ 4092 if (uap->uaddr2 == NULL) 4093 tm_p = NULL; 4094 else { 4095 error = ops->copyin_umtx_time( 4096 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4097 if (error != 0) 4098 return (error); 4099 tm_p = &timeout; 4100 } 4101 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4102 } 4103 4104 static int 4105 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4106 const struct umtx_copyops *ops __unused) 4107 { 4108 4109 return (do_wake_umutex(td, uap->obj)); 4110 } 4111 4112 static int 4113 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4114 const struct umtx_copyops *ops __unused) 4115 { 4116 4117 return (do_unlock_umutex(td, uap->obj, false)); 4118 } 4119 4120 static int 4121 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4122 const struct umtx_copyops *ops __unused) 4123 { 4124 4125 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4126 } 4127 4128 static int 4129 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4130 const struct umtx_copyops *ops) 4131 { 4132 struct timespec *ts, timeout; 4133 int error; 4134 4135 /* Allow a null timespec (wait forever). */ 4136 if (uap->uaddr2 == NULL) 4137 ts = NULL; 4138 else { 4139 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4140 if (error != 0) 4141 return (error); 4142 ts = &timeout; 4143 } 4144 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4145 } 4146 4147 static int 4148 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4149 const struct umtx_copyops *ops __unused) 4150 { 4151 4152 return (do_cv_signal(td, uap->obj)); 4153 } 4154 4155 static int 4156 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4157 const struct umtx_copyops *ops __unused) 4158 { 4159 4160 return (do_cv_broadcast(td, uap->obj)); 4161 } 4162 4163 static int 4164 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4165 const struct umtx_copyops *ops) 4166 { 4167 struct _umtx_time timeout; 4168 int error; 4169 4170 /* Allow a null timespec (wait forever). */ 4171 if (uap->uaddr2 == NULL) { 4172 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4173 } else { 4174 error = ops->copyin_umtx_time(uap->uaddr2, 4175 (size_t)uap->uaddr1, &timeout); 4176 if (error != 0) 4177 return (error); 4178 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4179 } 4180 return (error); 4181 } 4182 4183 static int 4184 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4185 const struct umtx_copyops *ops) 4186 { 4187 struct _umtx_time timeout; 4188 int error; 4189 4190 /* Allow a null timespec (wait forever). */ 4191 if (uap->uaddr2 == NULL) { 4192 error = do_rw_wrlock(td, uap->obj, 0); 4193 } else { 4194 error = ops->copyin_umtx_time(uap->uaddr2, 4195 (size_t)uap->uaddr1, &timeout); 4196 if (error != 0) 4197 return (error); 4198 4199 error = do_rw_wrlock(td, uap->obj, &timeout); 4200 } 4201 return (error); 4202 } 4203 4204 static int 4205 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4206 const struct umtx_copyops *ops __unused) 4207 { 4208 4209 return (do_rw_unlock(td, uap->obj)); 4210 } 4211 4212 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4213 static int 4214 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4215 const struct umtx_copyops *ops) 4216 { 4217 struct _umtx_time *tm_p, timeout; 4218 int error; 4219 4220 /* Allow a null timespec (wait forever). */ 4221 if (uap->uaddr2 == NULL) 4222 tm_p = NULL; 4223 else { 4224 error = ops->copyin_umtx_time( 4225 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4226 if (error != 0) 4227 return (error); 4228 tm_p = &timeout; 4229 } 4230 return (do_sem_wait(td, uap->obj, tm_p)); 4231 } 4232 4233 static int 4234 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4235 const struct umtx_copyops *ops __unused) 4236 { 4237 4238 return (do_sem_wake(td, uap->obj)); 4239 } 4240 #endif 4241 4242 static int 4243 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4244 const struct umtx_copyops *ops __unused) 4245 { 4246 4247 return (do_wake2_umutex(td, uap->obj, uap->val)); 4248 } 4249 4250 static int 4251 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4252 const struct umtx_copyops *ops) 4253 { 4254 struct _umtx_time *tm_p, timeout; 4255 size_t uasize; 4256 int error; 4257 4258 /* Allow a null timespec (wait forever). */ 4259 if (uap->uaddr2 == NULL) { 4260 uasize = 0; 4261 tm_p = NULL; 4262 } else { 4263 uasize = (size_t)uap->uaddr1; 4264 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4265 if (error != 0) 4266 return (error); 4267 tm_p = &timeout; 4268 } 4269 error = do_sem2_wait(td, uap->obj, tm_p); 4270 if (error == EINTR && uap->uaddr2 != NULL && 4271 (timeout._flags & UMTX_ABSTIME) == 0 && 4272 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4273 error = ops->copyout_timeout( 4274 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4275 uasize - ops->umtx_time_sz, &timeout._timeout); 4276 if (error == 0) { 4277 error = EINTR; 4278 } 4279 } 4280 4281 return (error); 4282 } 4283 4284 static int 4285 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4286 const struct umtx_copyops *ops __unused) 4287 { 4288 4289 return (do_sem2_wake(td, uap->obj)); 4290 } 4291 4292 #define USHM_OBJ_UMTX(o) \ 4293 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4294 4295 #define USHMF_REG_LINKED 0x0001 4296 #define USHMF_OBJ_LINKED 0x0002 4297 struct umtx_shm_reg { 4298 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4299 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4300 struct umtx_key ushm_key; 4301 struct ucred *ushm_cred; 4302 struct shmfd *ushm_obj; 4303 u_int ushm_refcnt; 4304 u_int ushm_flags; 4305 }; 4306 4307 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4308 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4309 4310 static uma_zone_t umtx_shm_reg_zone; 4311 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4312 static struct mtx umtx_shm_lock; 4313 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4314 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4315 4316 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4317 4318 static void 4319 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4320 { 4321 struct umtx_shm_reg_head d; 4322 struct umtx_shm_reg *reg, *reg1; 4323 4324 TAILQ_INIT(&d); 4325 mtx_lock(&umtx_shm_lock); 4326 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4327 mtx_unlock(&umtx_shm_lock); 4328 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4329 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4330 umtx_shm_free_reg(reg); 4331 } 4332 } 4333 4334 static struct task umtx_shm_reg_delfree_task = 4335 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4336 4337 static struct umtx_shm_reg * 4338 umtx_shm_find_reg_locked(const struct umtx_key *key) 4339 { 4340 struct umtx_shm_reg *reg; 4341 struct umtx_shm_reg_head *reg_head; 4342 4343 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4344 mtx_assert(&umtx_shm_lock, MA_OWNED); 4345 reg_head = &umtx_shm_registry[key->hash]; 4346 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4347 KASSERT(reg->ushm_key.shared, 4348 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4349 if (reg->ushm_key.info.shared.object == 4350 key->info.shared.object && 4351 reg->ushm_key.info.shared.offset == 4352 key->info.shared.offset) { 4353 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4354 KASSERT(reg->ushm_refcnt > 0, 4355 ("reg %p refcnt 0 onlist", reg)); 4356 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4357 ("reg %p not linked", reg)); 4358 reg->ushm_refcnt++; 4359 return (reg); 4360 } 4361 } 4362 return (NULL); 4363 } 4364 4365 static struct umtx_shm_reg * 4366 umtx_shm_find_reg(const struct umtx_key *key) 4367 { 4368 struct umtx_shm_reg *reg; 4369 4370 mtx_lock(&umtx_shm_lock); 4371 reg = umtx_shm_find_reg_locked(key); 4372 mtx_unlock(&umtx_shm_lock); 4373 return (reg); 4374 } 4375 4376 static void 4377 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4378 { 4379 4380 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4381 crfree(reg->ushm_cred); 4382 shm_drop(reg->ushm_obj); 4383 uma_zfree(umtx_shm_reg_zone, reg); 4384 } 4385 4386 static bool 4387 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4388 { 4389 bool res; 4390 4391 mtx_assert(&umtx_shm_lock, MA_OWNED); 4392 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4393 reg->ushm_refcnt--; 4394 res = reg->ushm_refcnt == 0; 4395 if (res || force) { 4396 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4397 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4398 reg, ushm_reg_link); 4399 reg->ushm_flags &= ~USHMF_REG_LINKED; 4400 } 4401 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4402 LIST_REMOVE(reg, ushm_obj_link); 4403 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4404 } 4405 } 4406 return (res); 4407 } 4408 4409 static void 4410 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4411 { 4412 vm_object_t object; 4413 bool dofree; 4414 4415 if (force) { 4416 object = reg->ushm_obj->shm_object; 4417 VM_OBJECT_WLOCK(object); 4418 vm_object_set_flag(object, OBJ_UMTXDEAD); 4419 VM_OBJECT_WUNLOCK(object); 4420 } 4421 mtx_lock(&umtx_shm_lock); 4422 dofree = umtx_shm_unref_reg_locked(reg, force); 4423 mtx_unlock(&umtx_shm_lock); 4424 if (dofree) 4425 umtx_shm_free_reg(reg); 4426 } 4427 4428 void 4429 umtx_shm_object_init(vm_object_t object) 4430 { 4431 4432 LIST_INIT(USHM_OBJ_UMTX(object)); 4433 } 4434 4435 void 4436 umtx_shm_object_terminated(vm_object_t object) 4437 { 4438 struct umtx_shm_reg *reg, *reg1; 4439 bool dofree; 4440 4441 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4442 return; 4443 4444 dofree = false; 4445 mtx_lock(&umtx_shm_lock); 4446 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4447 if (umtx_shm_unref_reg_locked(reg, true)) { 4448 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4449 ushm_reg_link); 4450 dofree = true; 4451 } 4452 } 4453 mtx_unlock(&umtx_shm_lock); 4454 if (dofree) 4455 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4456 } 4457 4458 static int 4459 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4460 struct umtx_shm_reg **res) 4461 { 4462 struct umtx_shm_reg *reg, *reg1; 4463 struct ucred *cred; 4464 int error; 4465 4466 reg = umtx_shm_find_reg(key); 4467 if (reg != NULL) { 4468 *res = reg; 4469 return (0); 4470 } 4471 cred = td->td_ucred; 4472 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4473 return (ENOMEM); 4474 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4475 reg->ushm_refcnt = 1; 4476 bcopy(key, ®->ushm_key, sizeof(*key)); 4477 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4478 reg->ushm_cred = crhold(cred); 4479 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4480 if (error != 0) { 4481 umtx_shm_free_reg(reg); 4482 return (error); 4483 } 4484 mtx_lock(&umtx_shm_lock); 4485 reg1 = umtx_shm_find_reg_locked(key); 4486 if (reg1 != NULL) { 4487 mtx_unlock(&umtx_shm_lock); 4488 umtx_shm_free_reg(reg); 4489 *res = reg1; 4490 return (0); 4491 } 4492 reg->ushm_refcnt++; 4493 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4494 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4495 ushm_obj_link); 4496 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4497 mtx_unlock(&umtx_shm_lock); 4498 *res = reg; 4499 return (0); 4500 } 4501 4502 static int 4503 umtx_shm_alive(struct thread *td, void *addr) 4504 { 4505 vm_map_t map; 4506 vm_map_entry_t entry; 4507 vm_object_t object; 4508 vm_pindex_t pindex; 4509 vm_prot_t prot; 4510 int res, ret; 4511 boolean_t wired; 4512 4513 map = &td->td_proc->p_vmspace->vm_map; 4514 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4515 &object, &pindex, &prot, &wired); 4516 if (res != KERN_SUCCESS) 4517 return (EFAULT); 4518 if (object == NULL) 4519 ret = EINVAL; 4520 else 4521 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4522 vm_map_lookup_done(map, entry); 4523 return (ret); 4524 } 4525 4526 static void 4527 umtx_shm_init(void) 4528 { 4529 int i; 4530 4531 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4532 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4533 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4534 for (i = 0; i < nitems(umtx_shm_registry); i++) 4535 TAILQ_INIT(&umtx_shm_registry[i]); 4536 } 4537 4538 static int 4539 umtx_shm(struct thread *td, void *addr, u_int flags) 4540 { 4541 struct umtx_key key; 4542 struct umtx_shm_reg *reg; 4543 struct file *fp; 4544 int error, fd; 4545 4546 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4547 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4548 return (EINVAL); 4549 if ((flags & UMTX_SHM_ALIVE) != 0) 4550 return (umtx_shm_alive(td, addr)); 4551 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4552 if (error != 0) 4553 return (error); 4554 KASSERT(key.shared == 1, ("non-shared key")); 4555 if ((flags & UMTX_SHM_CREAT) != 0) { 4556 error = umtx_shm_create_reg(td, &key, ®); 4557 } else { 4558 reg = umtx_shm_find_reg(&key); 4559 if (reg == NULL) 4560 error = ESRCH; 4561 } 4562 umtx_key_release(&key); 4563 if (error != 0) 4564 return (error); 4565 KASSERT(reg != NULL, ("no reg")); 4566 if ((flags & UMTX_SHM_DESTROY) != 0) { 4567 umtx_shm_unref_reg(reg, true); 4568 } else { 4569 #if 0 4570 #ifdef MAC 4571 error = mac_posixshm_check_open(td->td_ucred, 4572 reg->ushm_obj, FFLAGS(O_RDWR)); 4573 if (error == 0) 4574 #endif 4575 error = shm_access(reg->ushm_obj, td->td_ucred, 4576 FFLAGS(O_RDWR)); 4577 if (error == 0) 4578 #endif 4579 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4580 if (error == 0) { 4581 shm_hold(reg->ushm_obj); 4582 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4583 &shm_ops); 4584 td->td_retval[0] = fd; 4585 fdrop(fp, td); 4586 } 4587 } 4588 umtx_shm_unref_reg(reg, false); 4589 return (error); 4590 } 4591 4592 static int 4593 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4594 const struct umtx_copyops *ops __unused) 4595 { 4596 4597 return (umtx_shm(td, uap->uaddr1, uap->val)); 4598 } 4599 4600 static int 4601 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4602 const struct umtx_copyops *ops) 4603 { 4604 struct umtx_robust_lists_params rb; 4605 int error; 4606 4607 if (ops->compat32) { 4608 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4609 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4610 td->td_rb_inact != 0)) 4611 return (EBUSY); 4612 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4613 return (EBUSY); 4614 } 4615 4616 bzero(&rb, sizeof(rb)); 4617 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4618 if (error != 0) 4619 return (error); 4620 4621 if (ops->compat32) 4622 td->td_pflags2 |= TDP2_COMPAT32RB; 4623 4624 td->td_rb_list = rb.robust_list_offset; 4625 td->td_rbp_list = rb.robust_priv_list_offset; 4626 td->td_rb_inact = rb.robust_inact_offset; 4627 return (0); 4628 } 4629 4630 static int 4631 __umtx_op_get_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4632 const struct umtx_copyops *ops) 4633 { 4634 long val; 4635 int error, val1; 4636 4637 val = sbttons(td->td_proc->p_umtx_min_timeout); 4638 if (ops->compat32) { 4639 val1 = (int)val; 4640 error = copyout(&val1, uap->uaddr1, sizeof(val1)); 4641 } else { 4642 error = copyout(&val, uap->uaddr1, sizeof(val)); 4643 } 4644 return (error); 4645 } 4646 4647 static int 4648 __umtx_op_set_min_timeout(struct thread *td, struct _umtx_op_args *uap, 4649 const struct umtx_copyops *ops) 4650 { 4651 if (uap->val < 0) 4652 return (EINVAL); 4653 td->td_proc->p_umtx_min_timeout = nstosbt(uap->val); 4654 return (0); 4655 } 4656 4657 #if defined(__i386__) || defined(__amd64__) 4658 /* 4659 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4660 * 32-bit time_t there. Other architectures just need the i386 definitions 4661 * along with their standard compat32. 4662 */ 4663 struct timespecx32 { 4664 int64_t tv_sec; 4665 int32_t tv_nsec; 4666 }; 4667 4668 struct umtx_timex32 { 4669 struct timespecx32 _timeout; 4670 uint32_t _flags; 4671 uint32_t _clockid; 4672 }; 4673 4674 #ifndef __i386__ 4675 #define timespeci386 timespec32 4676 #define umtx_timei386 umtx_time32 4677 #endif 4678 #else /* !__i386__ && !__amd64__ */ 4679 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4680 struct timespeci386 { 4681 int32_t tv_sec; 4682 int32_t tv_nsec; 4683 }; 4684 4685 struct umtx_timei386 { 4686 struct timespeci386 _timeout; 4687 uint32_t _flags; 4688 uint32_t _clockid; 4689 }; 4690 4691 #if defined(__LP64__) 4692 #define timespecx32 timespec32 4693 #define umtx_timex32 umtx_time32 4694 #endif 4695 #endif 4696 4697 static int 4698 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4699 struct umtx_robust_lists_params *rbp) 4700 { 4701 struct umtx_robust_lists_params_compat32 rb32; 4702 int error; 4703 4704 if (size > sizeof(rb32)) 4705 return (EINVAL); 4706 bzero(&rb32, sizeof(rb32)); 4707 error = copyin(uaddr, &rb32, size); 4708 if (error != 0) 4709 return (error); 4710 CP(rb32, *rbp, robust_list_offset); 4711 CP(rb32, *rbp, robust_priv_list_offset); 4712 CP(rb32, *rbp, robust_inact_offset); 4713 return (0); 4714 } 4715 4716 #ifndef __i386__ 4717 static inline int 4718 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4719 { 4720 struct timespeci386 ts32; 4721 int error; 4722 4723 error = copyin(uaddr, &ts32, sizeof(ts32)); 4724 if (error == 0) { 4725 if (!timespecvalid_interval(&ts32)) 4726 error = EINVAL; 4727 else { 4728 CP(ts32, *tsp, tv_sec); 4729 CP(ts32, *tsp, tv_nsec); 4730 } 4731 } 4732 return (error); 4733 } 4734 4735 static inline int 4736 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4737 { 4738 struct umtx_timei386 t32; 4739 int error; 4740 4741 t32._clockid = CLOCK_REALTIME; 4742 t32._flags = 0; 4743 if (size <= sizeof(t32._timeout)) 4744 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4745 else 4746 error = copyin(uaddr, &t32, sizeof(t32)); 4747 if (error != 0) 4748 return (error); 4749 if (!timespecvalid_interval(&t32._timeout)) 4750 return (EINVAL); 4751 TS_CP(t32, *tp, _timeout); 4752 CP(t32, *tp, _flags); 4753 CP(t32, *tp, _clockid); 4754 return (0); 4755 } 4756 4757 static int 4758 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4759 { 4760 struct timespeci386 remain32 = { 4761 .tv_sec = tsp->tv_sec, 4762 .tv_nsec = tsp->tv_nsec, 4763 }; 4764 4765 /* 4766 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4767 * and we're only called if sz >= sizeof(timespec) as supplied in the 4768 * copyops. 4769 */ 4770 KASSERT(sz >= sizeof(remain32), 4771 ("umtx_copyops specifies incorrect sizes")); 4772 4773 return (copyout(&remain32, uaddr, sizeof(remain32))); 4774 } 4775 #endif /* !__i386__ */ 4776 4777 #if defined(__i386__) || defined(__LP64__) 4778 static inline int 4779 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4780 { 4781 struct timespecx32 ts32; 4782 int error; 4783 4784 error = copyin(uaddr, &ts32, sizeof(ts32)); 4785 if (error == 0) { 4786 if (!timespecvalid_interval(&ts32)) 4787 error = EINVAL; 4788 else { 4789 CP(ts32, *tsp, tv_sec); 4790 CP(ts32, *tsp, tv_nsec); 4791 } 4792 } 4793 return (error); 4794 } 4795 4796 static inline int 4797 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4798 { 4799 struct umtx_timex32 t32; 4800 int error; 4801 4802 t32._clockid = CLOCK_REALTIME; 4803 t32._flags = 0; 4804 if (size <= sizeof(t32._timeout)) 4805 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4806 else 4807 error = copyin(uaddr, &t32, sizeof(t32)); 4808 if (error != 0) 4809 return (error); 4810 if (!timespecvalid_interval(&t32._timeout)) 4811 return (EINVAL); 4812 TS_CP(t32, *tp, _timeout); 4813 CP(t32, *tp, _flags); 4814 CP(t32, *tp, _clockid); 4815 return (0); 4816 } 4817 4818 static int 4819 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4820 { 4821 struct timespecx32 remain32 = { 4822 .tv_sec = tsp->tv_sec, 4823 .tv_nsec = tsp->tv_nsec, 4824 }; 4825 4826 /* 4827 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4828 * and we're only called if sz >= sizeof(timespec) as supplied in the 4829 * copyops. 4830 */ 4831 KASSERT(sz >= sizeof(remain32), 4832 ("umtx_copyops specifies incorrect sizes")); 4833 4834 return (copyout(&remain32, uaddr, sizeof(remain32))); 4835 } 4836 #endif /* __i386__ || __LP64__ */ 4837 4838 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4839 const struct umtx_copyops *umtx_ops); 4840 4841 static const _umtx_op_func op_table[] = { 4842 #ifdef COMPAT_FREEBSD10 4843 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4844 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4845 #else 4846 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4847 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4848 #endif 4849 [UMTX_OP_WAIT] = __umtx_op_wait, 4850 [UMTX_OP_WAKE] = __umtx_op_wake, 4851 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4852 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4853 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4854 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4855 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4856 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4857 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4858 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4859 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4860 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4861 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4862 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4863 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4864 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4865 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4866 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4867 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4868 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4869 #else 4870 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4871 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4872 #endif 4873 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4874 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4875 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4876 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4877 [UMTX_OP_SHM] = __umtx_op_shm, 4878 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4879 [UMTX_OP_GET_MIN_TIMEOUT] = __umtx_op_get_min_timeout, 4880 [UMTX_OP_SET_MIN_TIMEOUT] = __umtx_op_set_min_timeout, 4881 }; 4882 4883 static const struct umtx_copyops umtx_native_ops = { 4884 .copyin_timeout = umtx_copyin_timeout, 4885 .copyin_umtx_time = umtx_copyin_umtx_time, 4886 .copyin_robust_lists = umtx_copyin_robust_lists, 4887 .copyout_timeout = umtx_copyout_timeout, 4888 .timespec_sz = sizeof(struct timespec), 4889 .umtx_time_sz = sizeof(struct _umtx_time), 4890 }; 4891 4892 #ifndef __i386__ 4893 static const struct umtx_copyops umtx_native_opsi386 = { 4894 .copyin_timeout = umtx_copyin_timeouti386, 4895 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4896 .copyin_robust_lists = umtx_copyin_robust_lists32, 4897 .copyout_timeout = umtx_copyout_timeouti386, 4898 .timespec_sz = sizeof(struct timespeci386), 4899 .umtx_time_sz = sizeof(struct umtx_timei386), 4900 .compat32 = true, 4901 }; 4902 #endif 4903 4904 #if defined(__i386__) || defined(__LP64__) 4905 /* i386 can emulate other 32-bit archs, too! */ 4906 static const struct umtx_copyops umtx_native_opsx32 = { 4907 .copyin_timeout = umtx_copyin_timeoutx32, 4908 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4909 .copyin_robust_lists = umtx_copyin_robust_lists32, 4910 .copyout_timeout = umtx_copyout_timeoutx32, 4911 .timespec_sz = sizeof(struct timespecx32), 4912 .umtx_time_sz = sizeof(struct umtx_timex32), 4913 .compat32 = true, 4914 }; 4915 4916 #ifdef COMPAT_FREEBSD32 4917 #ifdef __amd64__ 4918 #define umtx_native_ops32 umtx_native_opsi386 4919 #else 4920 #define umtx_native_ops32 umtx_native_opsx32 4921 #endif 4922 #endif /* COMPAT_FREEBSD32 */ 4923 #endif /* __i386__ || __LP64__ */ 4924 4925 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4926 4927 static int 4928 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4929 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4930 { 4931 struct _umtx_op_args uap = { 4932 .obj = obj, 4933 .op = op & ~UMTX_OP__FLAGS, 4934 .val = val, 4935 .uaddr1 = uaddr1, 4936 .uaddr2 = uaddr2 4937 }; 4938 4939 if ((uap.op >= nitems(op_table))) 4940 return (EINVAL); 4941 return ((*op_table[uap.op])(td, &uap, ops)); 4942 } 4943 4944 int 4945 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4946 { 4947 static const struct umtx_copyops *umtx_ops; 4948 4949 umtx_ops = &umtx_native_ops; 4950 #ifdef __LP64__ 4951 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4952 if ((uap->op & UMTX_OP__I386) != 0) 4953 umtx_ops = &umtx_native_opsi386; 4954 else 4955 umtx_ops = &umtx_native_opsx32; 4956 } 4957 #elif !defined(__i386__) 4958 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4959 if ((uap->op & UMTX_OP__I386) != 0) 4960 umtx_ops = &umtx_native_opsi386; 4961 #else 4962 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4963 if ((uap->op & UMTX_OP__32BIT) != 0) 4964 umtx_ops = &umtx_native_opsx32; 4965 #endif 4966 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4967 uap->uaddr2, umtx_ops)); 4968 } 4969 4970 #ifdef COMPAT_FREEBSD32 4971 #ifdef COMPAT_FREEBSD10 4972 int 4973 freebsd10_freebsd32__umtx_lock(struct thread *td, 4974 struct freebsd10_freebsd32__umtx_lock_args *uap) 4975 { 4976 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4977 } 4978 4979 int 4980 freebsd10_freebsd32__umtx_unlock(struct thread *td, 4981 struct freebsd10_freebsd32__umtx_unlock_args *uap) 4982 { 4983 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4984 } 4985 #endif /* COMPAT_FREEBSD10 */ 4986 4987 int 4988 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4989 { 4990 4991 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4992 uap->uaddr2, &umtx_native_ops32)); 4993 } 4994 #endif /* COMPAT_FREEBSD32 */ 4995 4996 void 4997 umtx_thread_init(struct thread *td) 4998 { 4999 5000 td->td_umtxq = umtxq_alloc(); 5001 td->td_umtxq->uq_thread = td; 5002 } 5003 5004 void 5005 umtx_thread_fini(struct thread *td) 5006 { 5007 5008 umtxq_free(td->td_umtxq); 5009 } 5010 5011 /* 5012 * It will be called when new thread is created, e.g fork(). 5013 */ 5014 void 5015 umtx_thread_alloc(struct thread *td) 5016 { 5017 struct umtx_q *uq; 5018 5019 uq = td->td_umtxq; 5020 uq->uq_inherited_pri = PRI_MAX; 5021 5022 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 5023 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 5024 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 5025 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 5026 } 5027 5028 /* 5029 * exec() hook. 5030 * 5031 * Clear robust lists for all process' threads, not delaying the 5032 * cleanup to thread exit, since the relevant address space is 5033 * destroyed right now. 5034 */ 5035 void 5036 umtx_exec(struct proc *p) 5037 { 5038 struct thread *td; 5039 5040 KASSERT(p == curproc, ("need curproc")); 5041 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 5042 (p->p_flag & P_STOPPED_SINGLE) != 0, 5043 ("curproc must be single-threaded")); 5044 /* 5045 * There is no need to lock the list as only this thread can be 5046 * running. 5047 */ 5048 FOREACH_THREAD_IN_PROC(p, td) { 5049 KASSERT(td == curthread || 5050 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 5051 ("running thread %p %p", p, td)); 5052 umtx_thread_cleanup(td); 5053 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 5054 } 5055 5056 p->p_umtx_min_timeout = 0; 5057 } 5058 5059 /* 5060 * thread exit hook. 5061 */ 5062 void 5063 umtx_thread_exit(struct thread *td) 5064 { 5065 5066 umtx_thread_cleanup(td); 5067 } 5068 5069 static int 5070 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5071 { 5072 u_long res1; 5073 uint32_t res32; 5074 int error; 5075 5076 if (compat32) { 5077 error = fueword32((void *)ptr, &res32); 5078 if (error == 0) 5079 res1 = res32; 5080 } else { 5081 error = fueword((void *)ptr, &res1); 5082 } 5083 if (error == 0) 5084 *res = res1; 5085 else 5086 error = EFAULT; 5087 return (error); 5088 } 5089 5090 static void 5091 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5092 bool compat32) 5093 { 5094 struct umutex32 m32; 5095 5096 if (compat32) { 5097 memcpy(&m32, m, sizeof(m32)); 5098 *rb_list = m32.m_rb_lnk; 5099 } else { 5100 *rb_list = m->m_rb_lnk; 5101 } 5102 } 5103 5104 static int 5105 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5106 bool compat32) 5107 { 5108 struct umutex m; 5109 int error; 5110 5111 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5112 error = copyin((void *)rbp, &m, sizeof(m)); 5113 if (error != 0) 5114 return (error); 5115 if (rb_list != NULL) 5116 umtx_read_rb_list(td, &m, rb_list, compat32); 5117 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5118 return (EINVAL); 5119 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5120 /* inact is cleared after unlock, allow the inconsistency */ 5121 return (inact ? 0 : EINVAL); 5122 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5123 } 5124 5125 static void 5126 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5127 const char *name, bool compat32) 5128 { 5129 int error, i; 5130 uintptr_t rbp; 5131 bool inact; 5132 5133 if (rb_list == 0) 5134 return; 5135 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5136 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5137 if (rbp == *rb_inact) { 5138 inact = true; 5139 *rb_inact = 0; 5140 } else 5141 inact = false; 5142 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5143 } 5144 if (i == umtx_max_rb && umtx_verbose_rb) { 5145 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5146 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5147 } 5148 if (error != 0 && umtx_verbose_rb) { 5149 uprintf("comm %s pid %d: handling %srb error %d\n", 5150 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5151 } 5152 } 5153 5154 /* 5155 * Clean up umtx data. 5156 */ 5157 static void 5158 umtx_thread_cleanup(struct thread *td) 5159 { 5160 struct umtx_q *uq; 5161 struct umtx_pi *pi; 5162 uintptr_t rb_inact; 5163 bool compat32; 5164 5165 /* 5166 * Disown pi mutexes. 5167 */ 5168 uq = td->td_umtxq; 5169 if (uq != NULL) { 5170 if (uq->uq_inherited_pri != PRI_MAX || 5171 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5172 mtx_lock(&umtx_lock); 5173 uq->uq_inherited_pri = PRI_MAX; 5174 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5175 pi->pi_owner = NULL; 5176 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5177 } 5178 mtx_unlock(&umtx_lock); 5179 } 5180 sched_lend_user_prio_cond(td, PRI_MAX); 5181 } 5182 5183 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5184 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5185 5186 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5187 return; 5188 5189 /* 5190 * Handle terminated robust mutexes. Must be done after 5191 * robust pi disown, otherwise unlock could see unowned 5192 * entries. 5193 */ 5194 rb_inact = td->td_rb_inact; 5195 if (rb_inact != 0) 5196 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5197 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5198 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5199 if (rb_inact != 0) 5200 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5201 } 5202