1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #include <compat/freebsd32/freebsd32.h> 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 /* Priority inheritance mutex info. */ 92 struct umtx_pi { 93 /* Owner thread */ 94 struct thread *pi_owner; 95 96 /* Reference count */ 97 int pi_refcount; 98 99 /* List entry to link umtx holding by thread */ 100 TAILQ_ENTRY(umtx_pi) pi_link; 101 102 /* List entry in hash */ 103 TAILQ_ENTRY(umtx_pi) pi_hashlink; 104 105 /* List for waiters */ 106 TAILQ_HEAD(,umtx_q) pi_blocked; 107 108 /* Identify a userland lock object */ 109 struct umtx_key pi_key; 110 }; 111 112 /* A userland synchronous object user. */ 113 struct umtx_q { 114 /* Linked list for the hash. */ 115 TAILQ_ENTRY(umtx_q) uq_link; 116 117 /* Umtx key. */ 118 struct umtx_key uq_key; 119 120 /* Umtx flags. */ 121 int uq_flags; 122 #define UQF_UMTXQ 0x0001 123 124 /* The thread waits on. */ 125 struct thread *uq_thread; 126 127 /* 128 * Blocked on PI mutex. read can use chain lock 129 * or umtx_lock, write must have both chain lock and 130 * umtx_lock being hold. 131 */ 132 struct umtx_pi *uq_pi_blocked; 133 134 /* On blocked list */ 135 TAILQ_ENTRY(umtx_q) uq_lockq; 136 137 /* Thread contending with us */ 138 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 139 140 /* Inherited priority from PP mutex */ 141 u_char uq_inherited_pri; 142 143 /* Spare queue ready to be reused */ 144 struct umtxq_queue *uq_spare_queue; 145 146 /* The queue we on */ 147 struct umtxq_queue *uq_cur_queue; 148 }; 149 150 TAILQ_HEAD(umtxq_head, umtx_q); 151 152 /* Per-key wait-queue */ 153 struct umtxq_queue { 154 struct umtxq_head head; 155 struct umtx_key key; 156 LIST_ENTRY(umtxq_queue) link; 157 int length; 158 }; 159 160 LIST_HEAD(umtxq_list, umtxq_queue); 161 162 /* Userland lock object's wait-queue chain */ 163 struct umtxq_chain { 164 /* Lock for this chain. */ 165 struct mtx uc_lock; 166 167 /* List of sleep queues. */ 168 struct umtxq_list uc_queue[2]; 169 #define UMTX_SHARED_QUEUE 0 170 #define UMTX_EXCLUSIVE_QUEUE 1 171 172 LIST_HEAD(, umtxq_queue) uc_spare_queue; 173 174 /* Busy flag */ 175 char uc_busy; 176 177 /* Chain lock waiters */ 178 int uc_waiters; 179 180 /* All PI in the list */ 181 TAILQ_HEAD(,umtx_pi) uc_pi_list; 182 183 #ifdef UMTX_PROFILING 184 u_int length; 185 u_int max_length; 186 #endif 187 }; 188 189 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 190 191 /* 192 * Don't propagate time-sharing priority, there is a security reason, 193 * a user can simply introduce PI-mutex, let thread A lock the mutex, 194 * and let another thread B block on the mutex, because B is 195 * sleeping, its priority will be boosted, this causes A's priority to 196 * be boosted via priority propagating too and will never be lowered even 197 * if it is using 100%CPU, this is unfair to other processes. 198 */ 199 200 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 201 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 202 PRI_MAX_TIMESHARE : (td)->td_user_pri) 203 204 #define GOLDEN_RATIO_PRIME 2654404609U 205 #ifndef UMTX_CHAINS 206 #define UMTX_CHAINS 512 207 #endif 208 #define UMTX_SHIFTS (__WORD_BIT - 9) 209 210 #define GET_SHARE(flags) \ 211 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 212 213 #define BUSY_SPINS 200 214 215 struct abs_timeout { 216 int clockid; 217 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 218 struct timespec cur; 219 struct timespec end; 220 }; 221 222 struct umtx_copyops { 223 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 224 int (*copyin_umtx_time)(const void *uaddr, size_t size, 225 struct _umtx_time *tp); 226 int (*copyin_robust_lists)(const void *uaddr, size_t size, 227 struct umtx_robust_lists_params *rbp); 228 int (*copyout_timeout)(void *uaddr, size_t size, 229 struct timespec *tsp); 230 const size_t timespec_sz; 231 const size_t umtx_time_sz; 232 const bool compat32; 233 }; 234 235 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 236 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 237 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 238 239 int umtx_shm_vnobj_persistent = 0; 240 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 241 &umtx_shm_vnobj_persistent, 0, 242 "False forces destruction of umtx attached to file, on last close"); 243 static int umtx_max_rb = 1000; 244 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 245 &umtx_max_rb, 0, 246 "Maximum number of robust mutexes allowed for each thread"); 247 248 static uma_zone_t umtx_pi_zone; 249 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 250 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 251 static int umtx_pi_allocated; 252 253 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 254 "umtx debug"); 255 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 256 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 257 static int umtx_verbose_rb = 1; 258 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 259 &umtx_verbose_rb, 0, 260 ""); 261 262 #ifdef UMTX_PROFILING 263 static long max_length; 264 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 265 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 266 "umtx chain stats"); 267 #endif 268 269 static void abs_timeout_update(struct abs_timeout *timo); 270 271 static void umtx_shm_init(void); 272 static void umtxq_sysinit(void *); 273 static void umtxq_hash(struct umtx_key *key); 274 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 275 static void umtxq_unlock(struct umtx_key *key); 276 static void umtxq_busy(struct umtx_key *key); 277 static void umtxq_unbusy(struct umtx_key *key); 278 static void umtxq_insert_queue(struct umtx_q *uq, int q); 279 static void umtxq_remove_queue(struct umtx_q *uq, int q); 280 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 281 static int umtxq_count(struct umtx_key *key); 282 static struct umtx_pi *umtx_pi_alloc(int); 283 static void umtx_pi_free(struct umtx_pi *pi); 284 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 285 bool rb); 286 static void umtx_thread_cleanup(struct thread *td); 287 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 288 289 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 290 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 291 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 292 293 static struct mtx umtx_lock; 294 295 #ifdef UMTX_PROFILING 296 static void 297 umtx_init_profiling(void) 298 { 299 struct sysctl_oid *chain_oid; 300 char chain_name[10]; 301 int i; 302 303 for (i = 0; i < UMTX_CHAINS; ++i) { 304 snprintf(chain_name, sizeof(chain_name), "%d", i); 305 chain_oid = SYSCTL_ADD_NODE(NULL, 306 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 307 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 308 "umtx hash stats"); 309 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 310 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 311 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 312 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 313 } 314 } 315 316 static int 317 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 318 { 319 char buf[512]; 320 struct sbuf sb; 321 struct umtxq_chain *uc; 322 u_int fract, i, j, tot, whole; 323 u_int sf0, sf1, sf2, sf3, sf4; 324 u_int si0, si1, si2, si3, si4; 325 u_int sw0, sw1, sw2, sw3, sw4; 326 327 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 328 for (i = 0; i < 2; i++) { 329 tot = 0; 330 for (j = 0; j < UMTX_CHAINS; ++j) { 331 uc = &umtxq_chains[i][j]; 332 mtx_lock(&uc->uc_lock); 333 tot += uc->max_length; 334 mtx_unlock(&uc->uc_lock); 335 } 336 if (tot == 0) 337 sbuf_printf(&sb, "%u) Empty ", i); 338 else { 339 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 340 si0 = si1 = si2 = si3 = si4 = 0; 341 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 342 for (j = 0; j < UMTX_CHAINS; j++) { 343 uc = &umtxq_chains[i][j]; 344 mtx_lock(&uc->uc_lock); 345 whole = uc->max_length * 100; 346 mtx_unlock(&uc->uc_lock); 347 fract = (whole % tot) * 100; 348 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 349 sf0 = fract; 350 si0 = j; 351 sw0 = whole; 352 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 353 sf1)) { 354 sf1 = fract; 355 si1 = j; 356 sw1 = whole; 357 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 358 sf2)) { 359 sf2 = fract; 360 si2 = j; 361 sw2 = whole; 362 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 363 sf3)) { 364 sf3 = fract; 365 si3 = j; 366 sw3 = whole; 367 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 368 sf4)) { 369 sf4 = fract; 370 si4 = j; 371 sw4 = whole; 372 } 373 } 374 sbuf_printf(&sb, "queue %u:\n", i); 375 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 376 sf0 / tot, si0); 377 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 378 sf1 / tot, si1); 379 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 380 sf2 / tot, si2); 381 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 382 sf3 / tot, si3); 383 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 384 sf4 / tot, si4); 385 } 386 } 387 sbuf_trim(&sb); 388 sbuf_finish(&sb); 389 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 390 sbuf_delete(&sb); 391 return (0); 392 } 393 394 static int 395 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 396 { 397 struct umtxq_chain *uc; 398 u_int i, j; 399 int clear, error; 400 401 clear = 0; 402 error = sysctl_handle_int(oidp, &clear, 0, req); 403 if (error != 0 || req->newptr == NULL) 404 return (error); 405 406 if (clear != 0) { 407 for (i = 0; i < 2; ++i) { 408 for (j = 0; j < UMTX_CHAINS; ++j) { 409 uc = &umtxq_chains[i][j]; 410 mtx_lock(&uc->uc_lock); 411 uc->length = 0; 412 uc->max_length = 0; 413 mtx_unlock(&uc->uc_lock); 414 } 415 } 416 } 417 return (0); 418 } 419 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 421 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_clear, "I", 423 "Clear umtx chains statistics"); 424 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 425 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 426 sysctl_debug_umtx_chains_peaks, "A", 427 "Highest peaks in chains max length"); 428 #endif 429 430 static void 431 umtxq_sysinit(void *arg __unused) 432 { 433 int i, j; 434 435 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 436 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 437 for (i = 0; i < 2; ++i) { 438 for (j = 0; j < UMTX_CHAINS; ++j) { 439 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 440 MTX_DEF | MTX_DUPOK); 441 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 442 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 443 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 444 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 445 umtxq_chains[i][j].uc_busy = 0; 446 umtxq_chains[i][j].uc_waiters = 0; 447 #ifdef UMTX_PROFILING 448 umtxq_chains[i][j].length = 0; 449 umtxq_chains[i][j].max_length = 0; 450 #endif 451 } 452 } 453 #ifdef UMTX_PROFILING 454 umtx_init_profiling(); 455 #endif 456 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 457 umtx_shm_init(); 458 } 459 460 struct umtx_q * 461 umtxq_alloc(void) 462 { 463 struct umtx_q *uq; 464 465 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 466 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 467 M_WAITOK | M_ZERO); 468 TAILQ_INIT(&uq->uq_spare_queue->head); 469 TAILQ_INIT(&uq->uq_pi_contested); 470 uq->uq_inherited_pri = PRI_MAX; 471 return (uq); 472 } 473 474 void 475 umtxq_free(struct umtx_q *uq) 476 { 477 478 MPASS(uq->uq_spare_queue != NULL); 479 free(uq->uq_spare_queue, M_UMTX); 480 free(uq, M_UMTX); 481 } 482 483 static inline void 484 umtxq_hash(struct umtx_key *key) 485 { 486 unsigned n; 487 488 n = (uintptr_t)key->info.both.a + key->info.both.b; 489 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 490 } 491 492 static inline struct umtxq_chain * 493 umtxq_getchain(struct umtx_key *key) 494 { 495 496 if (key->type <= TYPE_SEM) 497 return (&umtxq_chains[1][key->hash]); 498 return (&umtxq_chains[0][key->hash]); 499 } 500 501 /* 502 * Lock a chain. 503 * 504 * The code is a macro so that file/line information is taken from the caller. 505 */ 506 #define umtxq_lock(key) do { \ 507 struct umtx_key *_key = (key); \ 508 struct umtxq_chain *_uc; \ 509 \ 510 _uc = umtxq_getchain(_key); \ 511 mtx_lock(&_uc->uc_lock); \ 512 } while (0) 513 514 /* 515 * Unlock a chain. 516 */ 517 static inline void 518 umtxq_unlock(struct umtx_key *key) 519 { 520 struct umtxq_chain *uc; 521 522 uc = umtxq_getchain(key); 523 mtx_unlock(&uc->uc_lock); 524 } 525 526 /* 527 * Set chain to busy state when following operation 528 * may be blocked (kernel mutex can not be used). 529 */ 530 static inline void 531 umtxq_busy(struct umtx_key *key) 532 { 533 struct umtxq_chain *uc; 534 535 uc = umtxq_getchain(key); 536 mtx_assert(&uc->uc_lock, MA_OWNED); 537 if (uc->uc_busy) { 538 #ifdef SMP 539 if (smp_cpus > 1) { 540 int count = BUSY_SPINS; 541 if (count > 0) { 542 umtxq_unlock(key); 543 while (uc->uc_busy && --count > 0) 544 cpu_spinwait(); 545 umtxq_lock(key); 546 } 547 } 548 #endif 549 while (uc->uc_busy) { 550 uc->uc_waiters++; 551 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 552 uc->uc_waiters--; 553 } 554 } 555 uc->uc_busy = 1; 556 } 557 558 /* 559 * Unbusy a chain. 560 */ 561 static inline void 562 umtxq_unbusy(struct umtx_key *key) 563 { 564 struct umtxq_chain *uc; 565 566 uc = umtxq_getchain(key); 567 mtx_assert(&uc->uc_lock, MA_OWNED); 568 KASSERT(uc->uc_busy != 0, ("not busy")); 569 uc->uc_busy = 0; 570 if (uc->uc_waiters) 571 wakeup_one(uc); 572 } 573 574 static inline void 575 umtxq_unbusy_unlocked(struct umtx_key *key) 576 { 577 578 umtxq_lock(key); 579 umtxq_unbusy(key); 580 umtxq_unlock(key); 581 } 582 583 static struct umtxq_queue * 584 umtxq_queue_lookup(struct umtx_key *key, int q) 585 { 586 struct umtxq_queue *uh; 587 struct umtxq_chain *uc; 588 589 uc = umtxq_getchain(key); 590 UMTXQ_LOCKED_ASSERT(uc); 591 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 592 if (umtx_key_match(&uh->key, key)) 593 return (uh); 594 } 595 596 return (NULL); 597 } 598 599 static inline void 600 umtxq_insert_queue(struct umtx_q *uq, int q) 601 { 602 struct umtxq_queue *uh; 603 struct umtxq_chain *uc; 604 605 uc = umtxq_getchain(&uq->uq_key); 606 UMTXQ_LOCKED_ASSERT(uc); 607 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 608 uh = umtxq_queue_lookup(&uq->uq_key, q); 609 if (uh != NULL) { 610 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 611 } else { 612 uh = uq->uq_spare_queue; 613 uh->key = uq->uq_key; 614 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 615 #ifdef UMTX_PROFILING 616 uc->length++; 617 if (uc->length > uc->max_length) { 618 uc->max_length = uc->length; 619 if (uc->max_length > max_length) 620 max_length = uc->max_length; 621 } 622 #endif 623 } 624 uq->uq_spare_queue = NULL; 625 626 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 627 uh->length++; 628 uq->uq_flags |= UQF_UMTXQ; 629 uq->uq_cur_queue = uh; 630 return; 631 } 632 633 static inline void 634 umtxq_remove_queue(struct umtx_q *uq, int q) 635 { 636 struct umtxq_chain *uc; 637 struct umtxq_queue *uh; 638 639 uc = umtxq_getchain(&uq->uq_key); 640 UMTXQ_LOCKED_ASSERT(uc); 641 if (uq->uq_flags & UQF_UMTXQ) { 642 uh = uq->uq_cur_queue; 643 TAILQ_REMOVE(&uh->head, uq, uq_link); 644 uh->length--; 645 uq->uq_flags &= ~UQF_UMTXQ; 646 if (TAILQ_EMPTY(&uh->head)) { 647 KASSERT(uh->length == 0, 648 ("inconsistent umtxq_queue length")); 649 #ifdef UMTX_PROFILING 650 uc->length--; 651 #endif 652 LIST_REMOVE(uh, link); 653 } else { 654 uh = LIST_FIRST(&uc->uc_spare_queue); 655 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 656 LIST_REMOVE(uh, link); 657 } 658 uq->uq_spare_queue = uh; 659 uq->uq_cur_queue = NULL; 660 } 661 } 662 663 /* 664 * Check if there are multiple waiters 665 */ 666 static int 667 umtxq_count(struct umtx_key *key) 668 { 669 struct umtxq_queue *uh; 670 671 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 672 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 673 if (uh != NULL) 674 return (uh->length); 675 return (0); 676 } 677 678 /* 679 * Check if there are multiple PI waiters and returns first 680 * waiter. 681 */ 682 static int 683 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 684 { 685 struct umtxq_queue *uh; 686 687 *first = NULL; 688 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 689 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 690 if (uh != NULL) { 691 *first = TAILQ_FIRST(&uh->head); 692 return (uh->length); 693 } 694 return (0); 695 } 696 697 /* 698 * Wake up threads waiting on an userland object. 699 */ 700 701 static int 702 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 703 { 704 struct umtxq_queue *uh; 705 struct umtx_q *uq; 706 int ret; 707 708 ret = 0; 709 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 710 uh = umtxq_queue_lookup(key, q); 711 if (uh != NULL) { 712 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 713 umtxq_remove_queue(uq, q); 714 wakeup(uq); 715 if (++ret >= n_wake) 716 return (ret); 717 } 718 } 719 return (ret); 720 } 721 722 /* 723 * Wake up specified thread. 724 */ 725 static inline void 726 umtxq_signal_thread(struct umtx_q *uq) 727 { 728 729 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 730 umtxq_remove(uq); 731 wakeup(uq); 732 } 733 734 static inline int 735 tstohz(const struct timespec *tsp) 736 { 737 struct timeval tv; 738 739 TIMESPEC_TO_TIMEVAL(&tv, tsp); 740 return tvtohz(&tv); 741 } 742 743 static void 744 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 745 const struct timespec *timeout) 746 { 747 748 timo->clockid = clockid; 749 if (!absolute) { 750 timo->is_abs_real = false; 751 abs_timeout_update(timo); 752 timespecadd(&timo->cur, timeout, &timo->end); 753 } else { 754 timo->end = *timeout; 755 timo->is_abs_real = clockid == CLOCK_REALTIME || 756 clockid == CLOCK_REALTIME_FAST || 757 clockid == CLOCK_REALTIME_PRECISE; 758 /* 759 * If is_abs_real, umtxq_sleep will read the clock 760 * after setting td_rtcgen; otherwise, read it here. 761 */ 762 if (!timo->is_abs_real) { 763 abs_timeout_update(timo); 764 } 765 } 766 } 767 768 static void 769 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 770 { 771 772 abs_timeout_init(timo, umtxtime->_clockid, 773 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 774 } 775 776 static inline void 777 abs_timeout_update(struct abs_timeout *timo) 778 { 779 780 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 781 } 782 783 static int 784 abs_timeout_gethz(struct abs_timeout *timo) 785 { 786 struct timespec tts; 787 788 if (timespeccmp(&timo->end, &timo->cur, <=)) 789 return (-1); 790 timespecsub(&timo->end, &timo->cur, &tts); 791 return (tstohz(&tts)); 792 } 793 794 static uint32_t 795 umtx_unlock_val(uint32_t flags, bool rb) 796 { 797 798 if (rb) 799 return (UMUTEX_RB_OWNERDEAD); 800 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 801 return (UMUTEX_RB_NOTRECOV); 802 else 803 return (UMUTEX_UNOWNED); 804 805 } 806 807 /* 808 * Put thread into sleep state, before sleeping, check if 809 * thread was removed from umtx queue. 810 */ 811 static inline int 812 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 813 { 814 struct umtxq_chain *uc; 815 int error, timo; 816 817 if (abstime != NULL && abstime->is_abs_real) { 818 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 819 abs_timeout_update(abstime); 820 } 821 822 uc = umtxq_getchain(&uq->uq_key); 823 UMTXQ_LOCKED_ASSERT(uc); 824 for (;;) { 825 if (!(uq->uq_flags & UQF_UMTXQ)) { 826 error = 0; 827 break; 828 } 829 if (abstime != NULL) { 830 timo = abs_timeout_gethz(abstime); 831 if (timo < 0) { 832 error = ETIMEDOUT; 833 break; 834 } 835 } else 836 timo = 0; 837 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 838 if (error == EINTR || error == ERESTART) { 839 umtxq_lock(&uq->uq_key); 840 break; 841 } 842 if (abstime != NULL) { 843 if (abstime->is_abs_real) 844 curthread->td_rtcgen = 845 atomic_load_acq_int(&rtc_generation); 846 abs_timeout_update(abstime); 847 } 848 umtxq_lock(&uq->uq_key); 849 } 850 851 curthread->td_rtcgen = 0; 852 return (error); 853 } 854 855 /* 856 * Convert userspace address into unique logical address. 857 */ 858 int 859 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 860 { 861 struct thread *td = curthread; 862 vm_map_t map; 863 vm_map_entry_t entry; 864 vm_pindex_t pindex; 865 vm_prot_t prot; 866 boolean_t wired; 867 868 key->type = type; 869 if (share == THREAD_SHARE) { 870 key->shared = 0; 871 key->info.private.vs = td->td_proc->p_vmspace; 872 key->info.private.addr = (uintptr_t)addr; 873 } else { 874 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 875 map = &td->td_proc->p_vmspace->vm_map; 876 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 877 &entry, &key->info.shared.object, &pindex, &prot, 878 &wired) != KERN_SUCCESS) { 879 return (EFAULT); 880 } 881 882 if ((share == PROCESS_SHARE) || 883 (share == AUTO_SHARE && 884 VM_INHERIT_SHARE == entry->inheritance)) { 885 key->shared = 1; 886 key->info.shared.offset = (vm_offset_t)addr - 887 entry->start + entry->offset; 888 vm_object_reference(key->info.shared.object); 889 } else { 890 key->shared = 0; 891 key->info.private.vs = td->td_proc->p_vmspace; 892 key->info.private.addr = (uintptr_t)addr; 893 } 894 vm_map_lookup_done(map, entry); 895 } 896 897 umtxq_hash(key); 898 return (0); 899 } 900 901 /* 902 * Release key. 903 */ 904 void 905 umtx_key_release(struct umtx_key *key) 906 { 907 if (key->shared) 908 vm_object_deallocate(key->info.shared.object); 909 } 910 911 #ifdef COMPAT_FREEBSD10 912 /* 913 * Lock a umtx object. 914 */ 915 static int 916 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, 917 const struct timespec *timeout) 918 { 919 struct abs_timeout timo; 920 struct umtx_q *uq; 921 u_long owner; 922 u_long old; 923 int error = 0; 924 925 uq = td->td_umtxq; 926 if (timeout != NULL) 927 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 928 929 /* 930 * Care must be exercised when dealing with umtx structure. It 931 * can fault on any access. 932 */ 933 for (;;) { 934 /* 935 * Try the uncontested case. This should be done in userland. 936 */ 937 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); 938 939 /* The acquire succeeded. */ 940 if (owner == UMTX_UNOWNED) 941 return (0); 942 943 /* The address was invalid. */ 944 if (owner == -1) 945 return (EFAULT); 946 947 /* If no one owns it but it is contested try to acquire it. */ 948 if (owner == UMTX_CONTESTED) { 949 owner = casuword(&umtx->u_owner, 950 UMTX_CONTESTED, id | UMTX_CONTESTED); 951 952 if (owner == UMTX_CONTESTED) 953 return (0); 954 955 /* The address was invalid. */ 956 if (owner == -1) 957 return (EFAULT); 958 959 error = thread_check_susp(td, false); 960 if (error != 0) 961 break; 962 963 /* If this failed the lock has changed, restart. */ 964 continue; 965 } 966 967 /* 968 * If we caught a signal, we have retried and now 969 * exit immediately. 970 */ 971 if (error != 0) 972 break; 973 974 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, 975 AUTO_SHARE, &uq->uq_key)) != 0) 976 return (error); 977 978 umtxq_lock(&uq->uq_key); 979 umtxq_busy(&uq->uq_key); 980 umtxq_insert(uq); 981 umtxq_unbusy(&uq->uq_key); 982 umtxq_unlock(&uq->uq_key); 983 984 /* 985 * Set the contested bit so that a release in user space 986 * knows to use the system call for unlock. If this fails 987 * either some one else has acquired the lock or it has been 988 * released. 989 */ 990 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); 991 992 /* The address was invalid. */ 993 if (old == -1) { 994 umtxq_lock(&uq->uq_key); 995 umtxq_remove(uq); 996 umtxq_unlock(&uq->uq_key); 997 umtx_key_release(&uq->uq_key); 998 return (EFAULT); 999 } 1000 1001 /* 1002 * We set the contested bit, sleep. Otherwise the lock changed 1003 * and we need to retry or we lost a race to the thread 1004 * unlocking the umtx. 1005 */ 1006 umtxq_lock(&uq->uq_key); 1007 if (old == owner) 1008 error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL : 1009 &timo); 1010 umtxq_remove(uq); 1011 umtxq_unlock(&uq->uq_key); 1012 umtx_key_release(&uq->uq_key); 1013 1014 if (error == 0) 1015 error = thread_check_susp(td, false); 1016 } 1017 1018 if (timeout == NULL) { 1019 /* Mutex locking is restarted if it is interrupted. */ 1020 if (error == EINTR) 1021 error = ERESTART; 1022 } else { 1023 /* Timed-locking is not restarted. */ 1024 if (error == ERESTART) 1025 error = EINTR; 1026 } 1027 return (error); 1028 } 1029 1030 /* 1031 * Unlock a umtx object. 1032 */ 1033 static int 1034 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) 1035 { 1036 struct umtx_key key; 1037 u_long owner; 1038 u_long old; 1039 int error; 1040 int count; 1041 1042 /* 1043 * Make sure we own this mtx. 1044 */ 1045 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); 1046 if (owner == -1) 1047 return (EFAULT); 1048 1049 if ((owner & ~UMTX_CONTESTED) != id) 1050 return (EPERM); 1051 1052 /* This should be done in userland */ 1053 if ((owner & UMTX_CONTESTED) == 0) { 1054 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); 1055 if (old == -1) 1056 return (EFAULT); 1057 if (old == owner) 1058 return (0); 1059 owner = old; 1060 } 1061 1062 /* We should only ever be in here for contested locks */ 1063 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1064 &key)) != 0) 1065 return (error); 1066 1067 umtxq_lock(&key); 1068 umtxq_busy(&key); 1069 count = umtxq_count(&key); 1070 umtxq_unlock(&key); 1071 1072 /* 1073 * When unlocking the umtx, it must be marked as unowned if 1074 * there is zero or one thread only waiting for it. 1075 * Otherwise, it must be marked as contested. 1076 */ 1077 old = casuword(&umtx->u_owner, owner, 1078 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); 1079 umtxq_lock(&key); 1080 umtxq_signal(&key,1); 1081 umtxq_unbusy(&key); 1082 umtxq_unlock(&key); 1083 umtx_key_release(&key); 1084 if (old == -1) 1085 return (EFAULT); 1086 if (old != owner) 1087 return (EINVAL); 1088 return (0); 1089 } 1090 1091 #ifdef COMPAT_FREEBSD32 1092 1093 /* 1094 * Lock a umtx object. 1095 */ 1096 static int 1097 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, 1098 const struct timespec *timeout) 1099 { 1100 struct abs_timeout timo; 1101 struct umtx_q *uq; 1102 uint32_t owner; 1103 uint32_t old; 1104 int error = 0; 1105 1106 uq = td->td_umtxq; 1107 1108 if (timeout != NULL) 1109 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout); 1110 1111 /* 1112 * Care must be exercised when dealing with umtx structure. It 1113 * can fault on any access. 1114 */ 1115 for (;;) { 1116 /* 1117 * Try the uncontested case. This should be done in userland. 1118 */ 1119 owner = casuword32(m, UMUTEX_UNOWNED, id); 1120 1121 /* The acquire succeeded. */ 1122 if (owner == UMUTEX_UNOWNED) 1123 return (0); 1124 1125 /* The address was invalid. */ 1126 if (owner == -1) 1127 return (EFAULT); 1128 1129 /* If no one owns it but it is contested try to acquire it. */ 1130 if (owner == UMUTEX_CONTESTED) { 1131 owner = casuword32(m, 1132 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); 1133 if (owner == UMUTEX_CONTESTED) 1134 return (0); 1135 1136 /* The address was invalid. */ 1137 if (owner == -1) 1138 return (EFAULT); 1139 1140 error = thread_check_susp(td, false); 1141 if (error != 0) 1142 break; 1143 1144 /* If this failed the lock has changed, restart. */ 1145 continue; 1146 } 1147 1148 /* 1149 * If we caught a signal, we have retried and now 1150 * exit immediately. 1151 */ 1152 if (error != 0) 1153 return (error); 1154 1155 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, 1156 AUTO_SHARE, &uq->uq_key)) != 0) 1157 return (error); 1158 1159 umtxq_lock(&uq->uq_key); 1160 umtxq_busy(&uq->uq_key); 1161 umtxq_insert(uq); 1162 umtxq_unbusy(&uq->uq_key); 1163 umtxq_unlock(&uq->uq_key); 1164 1165 /* 1166 * Set the contested bit so that a release in user space 1167 * knows to use the system call for unlock. If this fails 1168 * either some one else has acquired the lock or it has been 1169 * released. 1170 */ 1171 old = casuword32(m, owner, owner | UMUTEX_CONTESTED); 1172 1173 /* The address was invalid. */ 1174 if (old == -1) { 1175 umtxq_lock(&uq->uq_key); 1176 umtxq_remove(uq); 1177 umtxq_unlock(&uq->uq_key); 1178 umtx_key_release(&uq->uq_key); 1179 return (EFAULT); 1180 } 1181 1182 /* 1183 * We set the contested bit, sleep. Otherwise the lock changed 1184 * and we need to retry or we lost a race to the thread 1185 * unlocking the umtx. 1186 */ 1187 umtxq_lock(&uq->uq_key); 1188 if (old == owner) 1189 error = umtxq_sleep(uq, "umtx", timeout == NULL ? 1190 NULL : &timo); 1191 umtxq_remove(uq); 1192 umtxq_unlock(&uq->uq_key); 1193 umtx_key_release(&uq->uq_key); 1194 1195 if (error == 0) 1196 error = thread_check_susp(td, false); 1197 } 1198 1199 if (timeout == NULL) { 1200 /* Mutex locking is restarted if it is interrupted. */ 1201 if (error == EINTR) 1202 error = ERESTART; 1203 } else { 1204 /* Timed-locking is not restarted. */ 1205 if (error == ERESTART) 1206 error = EINTR; 1207 } 1208 return (error); 1209 } 1210 1211 /* 1212 * Unlock a umtx object. 1213 */ 1214 static int 1215 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) 1216 { 1217 struct umtx_key key; 1218 uint32_t owner; 1219 uint32_t old; 1220 int error; 1221 int count; 1222 1223 /* 1224 * Make sure we own this mtx. 1225 */ 1226 owner = fuword32(m); 1227 if (owner == -1) 1228 return (EFAULT); 1229 1230 if ((owner & ~UMUTEX_CONTESTED) != id) 1231 return (EPERM); 1232 1233 /* This should be done in userland */ 1234 if ((owner & UMUTEX_CONTESTED) == 0) { 1235 old = casuword32(m, owner, UMUTEX_UNOWNED); 1236 if (old == -1) 1237 return (EFAULT); 1238 if (old == owner) 1239 return (0); 1240 owner = old; 1241 } 1242 1243 /* We should only ever be in here for contested locks */ 1244 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, 1245 &key)) != 0) 1246 return (error); 1247 1248 umtxq_lock(&key); 1249 umtxq_busy(&key); 1250 count = umtxq_count(&key); 1251 umtxq_unlock(&key); 1252 1253 /* 1254 * When unlocking the umtx, it must be marked as unowned if 1255 * there is zero or one thread only waiting for it. 1256 * Otherwise, it must be marked as contested. 1257 */ 1258 old = casuword32(m, owner, 1259 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); 1260 umtxq_lock(&key); 1261 umtxq_signal(&key,1); 1262 umtxq_unbusy(&key); 1263 umtxq_unlock(&key); 1264 umtx_key_release(&key); 1265 if (old == -1) 1266 return (EFAULT); 1267 if (old != owner) 1268 return (EINVAL); 1269 return (0); 1270 } 1271 #endif /* COMPAT_FREEBSD32 */ 1272 #endif /* COMPAT_FREEBSD10 */ 1273 1274 /* 1275 * Fetch and compare value, sleep on the address if value is not changed. 1276 */ 1277 static int 1278 do_wait(struct thread *td, void *addr, u_long id, 1279 struct _umtx_time *timeout, int compat32, int is_private) 1280 { 1281 struct abs_timeout timo; 1282 struct umtx_q *uq; 1283 u_long tmp; 1284 uint32_t tmp32; 1285 int error = 0; 1286 1287 uq = td->td_umtxq; 1288 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 1289 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 1290 return (error); 1291 1292 if (timeout != NULL) 1293 abs_timeout_init2(&timo, timeout); 1294 1295 umtxq_lock(&uq->uq_key); 1296 umtxq_insert(uq); 1297 umtxq_unlock(&uq->uq_key); 1298 if (compat32 == 0) { 1299 error = fueword(addr, &tmp); 1300 if (error != 0) 1301 error = EFAULT; 1302 } else { 1303 error = fueword32(addr, &tmp32); 1304 if (error == 0) 1305 tmp = tmp32; 1306 else 1307 error = EFAULT; 1308 } 1309 umtxq_lock(&uq->uq_key); 1310 if (error == 0) { 1311 if (tmp == id) 1312 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 1313 NULL : &timo); 1314 if ((uq->uq_flags & UQF_UMTXQ) == 0) 1315 error = 0; 1316 else 1317 umtxq_remove(uq); 1318 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 1319 umtxq_remove(uq); 1320 } 1321 umtxq_unlock(&uq->uq_key); 1322 umtx_key_release(&uq->uq_key); 1323 if (error == ERESTART) 1324 error = EINTR; 1325 return (error); 1326 } 1327 1328 /* 1329 * Wake up threads sleeping on the specified address. 1330 */ 1331 int 1332 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1333 { 1334 struct umtx_key key; 1335 int ret; 1336 1337 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1338 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1339 return (ret); 1340 umtxq_lock(&key); 1341 umtxq_signal(&key, n_wake); 1342 umtxq_unlock(&key); 1343 umtx_key_release(&key); 1344 return (0); 1345 } 1346 1347 /* 1348 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1349 */ 1350 static int 1351 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1352 struct _umtx_time *timeout, int mode) 1353 { 1354 struct abs_timeout timo; 1355 struct umtx_q *uq; 1356 uint32_t owner, old, id; 1357 int error, rv; 1358 1359 id = td->td_tid; 1360 uq = td->td_umtxq; 1361 error = 0; 1362 if (timeout != NULL) 1363 abs_timeout_init2(&timo, timeout); 1364 1365 /* 1366 * Care must be exercised when dealing with umtx structure. It 1367 * can fault on any access. 1368 */ 1369 for (;;) { 1370 rv = fueword32(&m->m_owner, &owner); 1371 if (rv == -1) 1372 return (EFAULT); 1373 if (mode == _UMUTEX_WAIT) { 1374 if (owner == UMUTEX_UNOWNED || 1375 owner == UMUTEX_CONTESTED || 1376 owner == UMUTEX_RB_OWNERDEAD || 1377 owner == UMUTEX_RB_NOTRECOV) 1378 return (0); 1379 } else { 1380 /* 1381 * Robust mutex terminated. Kernel duty is to 1382 * return EOWNERDEAD to the userspace. The 1383 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1384 * by the common userspace code. 1385 */ 1386 if (owner == UMUTEX_RB_OWNERDEAD) { 1387 rv = casueword32(&m->m_owner, 1388 UMUTEX_RB_OWNERDEAD, &owner, 1389 id | UMUTEX_CONTESTED); 1390 if (rv == -1) 1391 return (EFAULT); 1392 if (rv == 0) { 1393 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1394 return (EOWNERDEAD); /* success */ 1395 } 1396 MPASS(rv == 1); 1397 rv = thread_check_susp(td, false); 1398 if (rv != 0) 1399 return (rv); 1400 continue; 1401 } 1402 if (owner == UMUTEX_RB_NOTRECOV) 1403 return (ENOTRECOVERABLE); 1404 1405 /* 1406 * Try the uncontested case. This should be 1407 * done in userland. 1408 */ 1409 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1410 &owner, id); 1411 /* The address was invalid. */ 1412 if (rv == -1) 1413 return (EFAULT); 1414 1415 /* The acquire succeeded. */ 1416 if (rv == 0) { 1417 MPASS(owner == UMUTEX_UNOWNED); 1418 return (0); 1419 } 1420 1421 /* 1422 * If no one owns it but it is contested try 1423 * to acquire it. 1424 */ 1425 MPASS(rv == 1); 1426 if (owner == UMUTEX_CONTESTED) { 1427 rv = casueword32(&m->m_owner, 1428 UMUTEX_CONTESTED, &owner, 1429 id | UMUTEX_CONTESTED); 1430 /* The address was invalid. */ 1431 if (rv == -1) 1432 return (EFAULT); 1433 if (rv == 0) { 1434 MPASS(owner == UMUTEX_CONTESTED); 1435 return (0); 1436 } 1437 if (rv == 1) { 1438 rv = thread_check_susp(td, false); 1439 if (rv != 0) 1440 return (rv); 1441 } 1442 1443 /* 1444 * If this failed the lock has 1445 * changed, restart. 1446 */ 1447 continue; 1448 } 1449 1450 /* rv == 1 but not contested, likely store failure */ 1451 rv = thread_check_susp(td, false); 1452 if (rv != 0) 1453 return (rv); 1454 } 1455 1456 if (mode == _UMUTEX_TRY) 1457 return (EBUSY); 1458 1459 /* 1460 * If we caught a signal, we have retried and now 1461 * exit immediately. 1462 */ 1463 if (error != 0) 1464 return (error); 1465 1466 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1467 GET_SHARE(flags), &uq->uq_key)) != 0) 1468 return (error); 1469 1470 umtxq_lock(&uq->uq_key); 1471 umtxq_busy(&uq->uq_key); 1472 umtxq_insert(uq); 1473 umtxq_unlock(&uq->uq_key); 1474 1475 /* 1476 * Set the contested bit so that a release in user space 1477 * knows to use the system call for unlock. If this fails 1478 * either some one else has acquired the lock or it has been 1479 * released. 1480 */ 1481 rv = casueword32(&m->m_owner, owner, &old, 1482 owner | UMUTEX_CONTESTED); 1483 1484 /* The address was invalid or casueword failed to store. */ 1485 if (rv == -1 || rv == 1) { 1486 umtxq_lock(&uq->uq_key); 1487 umtxq_remove(uq); 1488 umtxq_unbusy(&uq->uq_key); 1489 umtxq_unlock(&uq->uq_key); 1490 umtx_key_release(&uq->uq_key); 1491 if (rv == -1) 1492 return (EFAULT); 1493 if (rv == 1) { 1494 rv = thread_check_susp(td, false); 1495 if (rv != 0) 1496 return (rv); 1497 } 1498 continue; 1499 } 1500 1501 /* 1502 * We set the contested bit, sleep. Otherwise the lock changed 1503 * and we need to retry or we lost a race to the thread 1504 * unlocking the umtx. 1505 */ 1506 umtxq_lock(&uq->uq_key); 1507 umtxq_unbusy(&uq->uq_key); 1508 MPASS(old == owner); 1509 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1510 NULL : &timo); 1511 umtxq_remove(uq); 1512 umtxq_unlock(&uq->uq_key); 1513 umtx_key_release(&uq->uq_key); 1514 1515 if (error == 0) 1516 error = thread_check_susp(td, false); 1517 } 1518 1519 return (0); 1520 } 1521 1522 /* 1523 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1524 */ 1525 static int 1526 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1527 { 1528 struct umtx_key key; 1529 uint32_t owner, old, id, newlock; 1530 int error, count; 1531 1532 id = td->td_tid; 1533 1534 again: 1535 /* 1536 * Make sure we own this mtx. 1537 */ 1538 error = fueword32(&m->m_owner, &owner); 1539 if (error == -1) 1540 return (EFAULT); 1541 1542 if ((owner & ~UMUTEX_CONTESTED) != id) 1543 return (EPERM); 1544 1545 newlock = umtx_unlock_val(flags, rb); 1546 if ((owner & UMUTEX_CONTESTED) == 0) { 1547 error = casueword32(&m->m_owner, owner, &old, newlock); 1548 if (error == -1) 1549 return (EFAULT); 1550 if (error == 1) { 1551 error = thread_check_susp(td, false); 1552 if (error != 0) 1553 return (error); 1554 goto again; 1555 } 1556 MPASS(old == owner); 1557 return (0); 1558 } 1559 1560 /* We should only ever be in here for contested locks */ 1561 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1562 &key)) != 0) 1563 return (error); 1564 1565 umtxq_lock(&key); 1566 umtxq_busy(&key); 1567 count = umtxq_count(&key); 1568 umtxq_unlock(&key); 1569 1570 /* 1571 * When unlocking the umtx, it must be marked as unowned if 1572 * there is zero or one thread only waiting for it. 1573 * Otherwise, it must be marked as contested. 1574 */ 1575 if (count > 1) 1576 newlock |= UMUTEX_CONTESTED; 1577 error = casueword32(&m->m_owner, owner, &old, newlock); 1578 umtxq_lock(&key); 1579 umtxq_signal(&key, 1); 1580 umtxq_unbusy(&key); 1581 umtxq_unlock(&key); 1582 umtx_key_release(&key); 1583 if (error == -1) 1584 return (EFAULT); 1585 if (error == 1) { 1586 if (old != owner) 1587 return (EINVAL); 1588 error = thread_check_susp(td, false); 1589 if (error != 0) 1590 return (error); 1591 goto again; 1592 } 1593 return (0); 1594 } 1595 1596 /* 1597 * Check if the mutex is available and wake up a waiter, 1598 * only for simple mutex. 1599 */ 1600 static int 1601 do_wake_umutex(struct thread *td, struct umutex *m) 1602 { 1603 struct umtx_key key; 1604 uint32_t owner; 1605 uint32_t flags; 1606 int error; 1607 int count; 1608 1609 again: 1610 error = fueword32(&m->m_owner, &owner); 1611 if (error == -1) 1612 return (EFAULT); 1613 1614 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1615 owner != UMUTEX_RB_NOTRECOV) 1616 return (0); 1617 1618 error = fueword32(&m->m_flags, &flags); 1619 if (error == -1) 1620 return (EFAULT); 1621 1622 /* We should only ever be in here for contested locks */ 1623 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1624 &key)) != 0) 1625 return (error); 1626 1627 umtxq_lock(&key); 1628 umtxq_busy(&key); 1629 count = umtxq_count(&key); 1630 umtxq_unlock(&key); 1631 1632 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1633 owner != UMUTEX_RB_NOTRECOV) { 1634 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1635 UMUTEX_UNOWNED); 1636 if (error == -1) { 1637 error = EFAULT; 1638 } else if (error == 1) { 1639 umtxq_lock(&key); 1640 umtxq_unbusy(&key); 1641 umtxq_unlock(&key); 1642 umtx_key_release(&key); 1643 error = thread_check_susp(td, false); 1644 if (error != 0) 1645 return (error); 1646 goto again; 1647 } 1648 } 1649 1650 umtxq_lock(&key); 1651 if (error == 0 && count != 0) { 1652 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1653 owner == UMUTEX_RB_OWNERDEAD || 1654 owner == UMUTEX_RB_NOTRECOV); 1655 umtxq_signal(&key, 1); 1656 } 1657 umtxq_unbusy(&key); 1658 umtxq_unlock(&key); 1659 umtx_key_release(&key); 1660 return (error); 1661 } 1662 1663 /* 1664 * Check if the mutex has waiters and tries to fix contention bit. 1665 */ 1666 static int 1667 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1668 { 1669 struct umtx_key key; 1670 uint32_t owner, old; 1671 int type; 1672 int error; 1673 int count; 1674 1675 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1676 UMUTEX_ROBUST)) { 1677 case 0: 1678 case UMUTEX_ROBUST: 1679 type = TYPE_NORMAL_UMUTEX; 1680 break; 1681 case UMUTEX_PRIO_INHERIT: 1682 type = TYPE_PI_UMUTEX; 1683 break; 1684 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1685 type = TYPE_PI_ROBUST_UMUTEX; 1686 break; 1687 case UMUTEX_PRIO_PROTECT: 1688 type = TYPE_PP_UMUTEX; 1689 break; 1690 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1691 type = TYPE_PP_ROBUST_UMUTEX; 1692 break; 1693 default: 1694 return (EINVAL); 1695 } 1696 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1697 return (error); 1698 1699 owner = 0; 1700 umtxq_lock(&key); 1701 umtxq_busy(&key); 1702 count = umtxq_count(&key); 1703 umtxq_unlock(&key); 1704 1705 error = fueword32(&m->m_owner, &owner); 1706 if (error == -1) 1707 error = EFAULT; 1708 1709 /* 1710 * Only repair contention bit if there is a waiter, this means 1711 * the mutex is still being referenced by userland code, 1712 * otherwise don't update any memory. 1713 */ 1714 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1715 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1716 error = casueword32(&m->m_owner, owner, &old, 1717 owner | UMUTEX_CONTESTED); 1718 if (error == -1) { 1719 error = EFAULT; 1720 break; 1721 } 1722 if (error == 0) { 1723 MPASS(old == owner); 1724 break; 1725 } 1726 owner = old; 1727 error = thread_check_susp(td, false); 1728 } 1729 1730 umtxq_lock(&key); 1731 if (error == EFAULT) { 1732 umtxq_signal(&key, INT_MAX); 1733 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1734 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1735 umtxq_signal(&key, 1); 1736 umtxq_unbusy(&key); 1737 umtxq_unlock(&key); 1738 umtx_key_release(&key); 1739 return (error); 1740 } 1741 1742 static inline struct umtx_pi * 1743 umtx_pi_alloc(int flags) 1744 { 1745 struct umtx_pi *pi; 1746 1747 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1748 TAILQ_INIT(&pi->pi_blocked); 1749 atomic_add_int(&umtx_pi_allocated, 1); 1750 return (pi); 1751 } 1752 1753 static inline void 1754 umtx_pi_free(struct umtx_pi *pi) 1755 { 1756 uma_zfree(umtx_pi_zone, pi); 1757 atomic_add_int(&umtx_pi_allocated, -1); 1758 } 1759 1760 /* 1761 * Adjust the thread's position on a pi_state after its priority has been 1762 * changed. 1763 */ 1764 static int 1765 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1766 { 1767 struct umtx_q *uq, *uq1, *uq2; 1768 struct thread *td1; 1769 1770 mtx_assert(&umtx_lock, MA_OWNED); 1771 if (pi == NULL) 1772 return (0); 1773 1774 uq = td->td_umtxq; 1775 1776 /* 1777 * Check if the thread needs to be moved on the blocked chain. 1778 * It needs to be moved if either its priority is lower than 1779 * the previous thread or higher than the next thread. 1780 */ 1781 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1782 uq2 = TAILQ_NEXT(uq, uq_lockq); 1783 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1784 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1785 /* 1786 * Remove thread from blocked chain and determine where 1787 * it should be moved to. 1788 */ 1789 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1790 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1791 td1 = uq1->uq_thread; 1792 MPASS(td1->td_proc->p_magic == P_MAGIC); 1793 if (UPRI(td1) > UPRI(td)) 1794 break; 1795 } 1796 1797 if (uq1 == NULL) 1798 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1799 else 1800 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1801 } 1802 return (1); 1803 } 1804 1805 static struct umtx_pi * 1806 umtx_pi_next(struct umtx_pi *pi) 1807 { 1808 struct umtx_q *uq_owner; 1809 1810 if (pi->pi_owner == NULL) 1811 return (NULL); 1812 uq_owner = pi->pi_owner->td_umtxq; 1813 if (uq_owner == NULL) 1814 return (NULL); 1815 return (uq_owner->uq_pi_blocked); 1816 } 1817 1818 /* 1819 * Floyd's Cycle-Finding Algorithm. 1820 */ 1821 static bool 1822 umtx_pi_check_loop(struct umtx_pi *pi) 1823 { 1824 struct umtx_pi *pi1; /* fast iterator */ 1825 1826 mtx_assert(&umtx_lock, MA_OWNED); 1827 if (pi == NULL) 1828 return (false); 1829 pi1 = pi; 1830 for (;;) { 1831 pi = umtx_pi_next(pi); 1832 if (pi == NULL) 1833 break; 1834 pi1 = umtx_pi_next(pi1); 1835 if (pi1 == NULL) 1836 break; 1837 pi1 = umtx_pi_next(pi1); 1838 if (pi1 == NULL) 1839 break; 1840 if (pi == pi1) 1841 return (true); 1842 } 1843 return (false); 1844 } 1845 1846 /* 1847 * Propagate priority when a thread is blocked on POSIX 1848 * PI mutex. 1849 */ 1850 static void 1851 umtx_propagate_priority(struct thread *td) 1852 { 1853 struct umtx_q *uq; 1854 struct umtx_pi *pi; 1855 int pri; 1856 1857 mtx_assert(&umtx_lock, MA_OWNED); 1858 pri = UPRI(td); 1859 uq = td->td_umtxq; 1860 pi = uq->uq_pi_blocked; 1861 if (pi == NULL) 1862 return; 1863 if (umtx_pi_check_loop(pi)) 1864 return; 1865 1866 for (;;) { 1867 td = pi->pi_owner; 1868 if (td == NULL || td == curthread) 1869 return; 1870 1871 MPASS(td->td_proc != NULL); 1872 MPASS(td->td_proc->p_magic == P_MAGIC); 1873 1874 thread_lock(td); 1875 if (td->td_lend_user_pri > pri) 1876 sched_lend_user_prio(td, pri); 1877 else { 1878 thread_unlock(td); 1879 break; 1880 } 1881 thread_unlock(td); 1882 1883 /* 1884 * Pick up the lock that td is blocked on. 1885 */ 1886 uq = td->td_umtxq; 1887 pi = uq->uq_pi_blocked; 1888 if (pi == NULL) 1889 break; 1890 /* Resort td on the list if needed. */ 1891 umtx_pi_adjust_thread(pi, td); 1892 } 1893 } 1894 1895 /* 1896 * Unpropagate priority for a PI mutex when a thread blocked on 1897 * it is interrupted by signal or resumed by others. 1898 */ 1899 static void 1900 umtx_repropagate_priority(struct umtx_pi *pi) 1901 { 1902 struct umtx_q *uq, *uq_owner; 1903 struct umtx_pi *pi2; 1904 int pri; 1905 1906 mtx_assert(&umtx_lock, MA_OWNED); 1907 1908 if (umtx_pi_check_loop(pi)) 1909 return; 1910 while (pi != NULL && pi->pi_owner != NULL) { 1911 pri = PRI_MAX; 1912 uq_owner = pi->pi_owner->td_umtxq; 1913 1914 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1915 uq = TAILQ_FIRST(&pi2->pi_blocked); 1916 if (uq != NULL) { 1917 if (pri > UPRI(uq->uq_thread)) 1918 pri = UPRI(uq->uq_thread); 1919 } 1920 } 1921 1922 if (pri > uq_owner->uq_inherited_pri) 1923 pri = uq_owner->uq_inherited_pri; 1924 thread_lock(pi->pi_owner); 1925 sched_lend_user_prio(pi->pi_owner, pri); 1926 thread_unlock(pi->pi_owner); 1927 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1928 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1929 } 1930 } 1931 1932 /* 1933 * Insert a PI mutex into owned list. 1934 */ 1935 static void 1936 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1937 { 1938 struct umtx_q *uq_owner; 1939 1940 uq_owner = owner->td_umtxq; 1941 mtx_assert(&umtx_lock, MA_OWNED); 1942 MPASS(pi->pi_owner == NULL); 1943 pi->pi_owner = owner; 1944 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1945 } 1946 1947 /* 1948 * Disown a PI mutex, and remove it from the owned list. 1949 */ 1950 static void 1951 umtx_pi_disown(struct umtx_pi *pi) 1952 { 1953 1954 mtx_assert(&umtx_lock, MA_OWNED); 1955 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1956 pi->pi_owner = NULL; 1957 } 1958 1959 /* 1960 * Claim ownership of a PI mutex. 1961 */ 1962 static int 1963 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1964 { 1965 struct umtx_q *uq; 1966 int pri; 1967 1968 mtx_lock(&umtx_lock); 1969 if (pi->pi_owner == owner) { 1970 mtx_unlock(&umtx_lock); 1971 return (0); 1972 } 1973 1974 if (pi->pi_owner != NULL) { 1975 /* 1976 * userland may have already messed the mutex, sigh. 1977 */ 1978 mtx_unlock(&umtx_lock); 1979 return (EPERM); 1980 } 1981 umtx_pi_setowner(pi, owner); 1982 uq = TAILQ_FIRST(&pi->pi_blocked); 1983 if (uq != NULL) { 1984 pri = UPRI(uq->uq_thread); 1985 thread_lock(owner); 1986 if (pri < UPRI(owner)) 1987 sched_lend_user_prio(owner, pri); 1988 thread_unlock(owner); 1989 } 1990 mtx_unlock(&umtx_lock); 1991 return (0); 1992 } 1993 1994 /* 1995 * Adjust a thread's order position in its blocked PI mutex, 1996 * this may result new priority propagating process. 1997 */ 1998 void 1999 umtx_pi_adjust(struct thread *td, u_char oldpri) 2000 { 2001 struct umtx_q *uq; 2002 struct umtx_pi *pi; 2003 2004 uq = td->td_umtxq; 2005 mtx_lock(&umtx_lock); 2006 /* 2007 * Pick up the lock that td is blocked on. 2008 */ 2009 pi = uq->uq_pi_blocked; 2010 if (pi != NULL) { 2011 umtx_pi_adjust_thread(pi, td); 2012 umtx_repropagate_priority(pi); 2013 } 2014 mtx_unlock(&umtx_lock); 2015 } 2016 2017 /* 2018 * Sleep on a PI mutex. 2019 */ 2020 static int 2021 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 2022 const char *wmesg, struct abs_timeout *timo, bool shared) 2023 { 2024 struct thread *td, *td1; 2025 struct umtx_q *uq1; 2026 int error, pri; 2027 #ifdef INVARIANTS 2028 struct umtxq_chain *uc; 2029 2030 uc = umtxq_getchain(&pi->pi_key); 2031 #endif 2032 error = 0; 2033 td = uq->uq_thread; 2034 KASSERT(td == curthread, ("inconsistent uq_thread")); 2035 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 2036 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 2037 umtxq_insert(uq); 2038 mtx_lock(&umtx_lock); 2039 if (pi->pi_owner == NULL) { 2040 mtx_unlock(&umtx_lock); 2041 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 2042 mtx_lock(&umtx_lock); 2043 if (td1 != NULL) { 2044 if (pi->pi_owner == NULL) 2045 umtx_pi_setowner(pi, td1); 2046 PROC_UNLOCK(td1->td_proc); 2047 } 2048 } 2049 2050 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 2051 pri = UPRI(uq1->uq_thread); 2052 if (pri > UPRI(td)) 2053 break; 2054 } 2055 2056 if (uq1 != NULL) 2057 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 2058 else 2059 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 2060 2061 uq->uq_pi_blocked = pi; 2062 thread_lock(td); 2063 td->td_flags |= TDF_UPIBLOCKED; 2064 thread_unlock(td); 2065 umtx_propagate_priority(td); 2066 mtx_unlock(&umtx_lock); 2067 umtxq_unbusy(&uq->uq_key); 2068 2069 error = umtxq_sleep(uq, wmesg, timo); 2070 umtxq_remove(uq); 2071 2072 mtx_lock(&umtx_lock); 2073 uq->uq_pi_blocked = NULL; 2074 thread_lock(td); 2075 td->td_flags &= ~TDF_UPIBLOCKED; 2076 thread_unlock(td); 2077 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 2078 umtx_repropagate_priority(pi); 2079 mtx_unlock(&umtx_lock); 2080 umtxq_unlock(&uq->uq_key); 2081 2082 return (error); 2083 } 2084 2085 /* 2086 * Add reference count for a PI mutex. 2087 */ 2088 static void 2089 umtx_pi_ref(struct umtx_pi *pi) 2090 { 2091 2092 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 2093 pi->pi_refcount++; 2094 } 2095 2096 /* 2097 * Decrease reference count for a PI mutex, if the counter 2098 * is decreased to zero, its memory space is freed. 2099 */ 2100 static void 2101 umtx_pi_unref(struct umtx_pi *pi) 2102 { 2103 struct umtxq_chain *uc; 2104 2105 uc = umtxq_getchain(&pi->pi_key); 2106 UMTXQ_LOCKED_ASSERT(uc); 2107 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 2108 if (--pi->pi_refcount == 0) { 2109 mtx_lock(&umtx_lock); 2110 if (pi->pi_owner != NULL) 2111 umtx_pi_disown(pi); 2112 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 2113 ("blocked queue not empty")); 2114 mtx_unlock(&umtx_lock); 2115 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 2116 umtx_pi_free(pi); 2117 } 2118 } 2119 2120 /* 2121 * Find a PI mutex in hash table. 2122 */ 2123 static struct umtx_pi * 2124 umtx_pi_lookup(struct umtx_key *key) 2125 { 2126 struct umtxq_chain *uc; 2127 struct umtx_pi *pi; 2128 2129 uc = umtxq_getchain(key); 2130 UMTXQ_LOCKED_ASSERT(uc); 2131 2132 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 2133 if (umtx_key_match(&pi->pi_key, key)) { 2134 return (pi); 2135 } 2136 } 2137 return (NULL); 2138 } 2139 2140 /* 2141 * Insert a PI mutex into hash table. 2142 */ 2143 static inline void 2144 umtx_pi_insert(struct umtx_pi *pi) 2145 { 2146 struct umtxq_chain *uc; 2147 2148 uc = umtxq_getchain(&pi->pi_key); 2149 UMTXQ_LOCKED_ASSERT(uc); 2150 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 2151 } 2152 2153 /* 2154 * Lock a PI mutex. 2155 */ 2156 static int 2157 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 2158 struct _umtx_time *timeout, int try) 2159 { 2160 struct abs_timeout timo; 2161 struct umtx_q *uq; 2162 struct umtx_pi *pi, *new_pi; 2163 uint32_t id, old_owner, owner, old; 2164 int error, rv; 2165 2166 id = td->td_tid; 2167 uq = td->td_umtxq; 2168 2169 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2170 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2171 &uq->uq_key)) != 0) 2172 return (error); 2173 2174 if (timeout != NULL) 2175 abs_timeout_init2(&timo, timeout); 2176 2177 umtxq_lock(&uq->uq_key); 2178 pi = umtx_pi_lookup(&uq->uq_key); 2179 if (pi == NULL) { 2180 new_pi = umtx_pi_alloc(M_NOWAIT); 2181 if (new_pi == NULL) { 2182 umtxq_unlock(&uq->uq_key); 2183 new_pi = umtx_pi_alloc(M_WAITOK); 2184 umtxq_lock(&uq->uq_key); 2185 pi = umtx_pi_lookup(&uq->uq_key); 2186 if (pi != NULL) { 2187 umtx_pi_free(new_pi); 2188 new_pi = NULL; 2189 } 2190 } 2191 if (new_pi != NULL) { 2192 new_pi->pi_key = uq->uq_key; 2193 umtx_pi_insert(new_pi); 2194 pi = new_pi; 2195 } 2196 } 2197 umtx_pi_ref(pi); 2198 umtxq_unlock(&uq->uq_key); 2199 2200 /* 2201 * Care must be exercised when dealing with umtx structure. It 2202 * can fault on any access. 2203 */ 2204 for (;;) { 2205 /* 2206 * Try the uncontested case. This should be done in userland. 2207 */ 2208 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 2209 /* The address was invalid. */ 2210 if (rv == -1) { 2211 error = EFAULT; 2212 break; 2213 } 2214 /* The acquire succeeded. */ 2215 if (rv == 0) { 2216 MPASS(owner == UMUTEX_UNOWNED); 2217 error = 0; 2218 break; 2219 } 2220 2221 if (owner == UMUTEX_RB_NOTRECOV) { 2222 error = ENOTRECOVERABLE; 2223 break; 2224 } 2225 2226 /* 2227 * Avoid overwriting a possible error from sleep due 2228 * to the pending signal with suspension check result. 2229 */ 2230 if (error == 0) { 2231 error = thread_check_susp(td, true); 2232 if (error != 0) 2233 break; 2234 } 2235 2236 /* If no one owns it but it is contested try to acquire it. */ 2237 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 2238 old_owner = owner; 2239 rv = casueword32(&m->m_owner, owner, &owner, 2240 id | UMUTEX_CONTESTED); 2241 /* The address was invalid. */ 2242 if (rv == -1) { 2243 error = EFAULT; 2244 break; 2245 } 2246 if (rv == 1) { 2247 if (error == 0) { 2248 error = thread_check_susp(td, true); 2249 if (error != 0) 2250 break; 2251 } 2252 2253 /* 2254 * If this failed the lock could 2255 * changed, restart. 2256 */ 2257 continue; 2258 } 2259 2260 MPASS(rv == 0); 2261 MPASS(owner == old_owner); 2262 umtxq_lock(&uq->uq_key); 2263 umtxq_busy(&uq->uq_key); 2264 error = umtx_pi_claim(pi, td); 2265 umtxq_unbusy(&uq->uq_key); 2266 umtxq_unlock(&uq->uq_key); 2267 if (error != 0) { 2268 /* 2269 * Since we're going to return an 2270 * error, restore the m_owner to its 2271 * previous, unowned state to avoid 2272 * compounding the problem. 2273 */ 2274 (void)casuword32(&m->m_owner, 2275 id | UMUTEX_CONTESTED, old_owner); 2276 } 2277 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 2278 error = EOWNERDEAD; 2279 break; 2280 } 2281 2282 if ((owner & ~UMUTEX_CONTESTED) == id) { 2283 error = EDEADLK; 2284 break; 2285 } 2286 2287 if (try != 0) { 2288 error = EBUSY; 2289 break; 2290 } 2291 2292 /* 2293 * If we caught a signal, we have retried and now 2294 * exit immediately. 2295 */ 2296 if (error != 0) 2297 break; 2298 2299 umtxq_lock(&uq->uq_key); 2300 umtxq_busy(&uq->uq_key); 2301 umtxq_unlock(&uq->uq_key); 2302 2303 /* 2304 * Set the contested bit so that a release in user space 2305 * knows to use the system call for unlock. If this fails 2306 * either some one else has acquired the lock or it has been 2307 * released. 2308 */ 2309 rv = casueword32(&m->m_owner, owner, &old, owner | 2310 UMUTEX_CONTESTED); 2311 2312 /* The address was invalid. */ 2313 if (rv == -1) { 2314 umtxq_unbusy_unlocked(&uq->uq_key); 2315 error = EFAULT; 2316 break; 2317 } 2318 if (rv == 1) { 2319 umtxq_unbusy_unlocked(&uq->uq_key); 2320 error = thread_check_susp(td, true); 2321 if (error != 0) 2322 break; 2323 2324 /* 2325 * The lock changed and we need to retry or we 2326 * lost a race to the thread unlocking the 2327 * umtx. Note that the UMUTEX_RB_OWNERDEAD 2328 * value for owner is impossible there. 2329 */ 2330 continue; 2331 } 2332 2333 umtxq_lock(&uq->uq_key); 2334 2335 /* We set the contested bit, sleep. */ 2336 MPASS(old == owner); 2337 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 2338 "umtxpi", timeout == NULL ? NULL : &timo, 2339 (flags & USYNC_PROCESS_SHARED) != 0); 2340 if (error != 0) 2341 continue; 2342 2343 error = thread_check_susp(td, false); 2344 if (error != 0) 2345 break; 2346 } 2347 2348 umtxq_lock(&uq->uq_key); 2349 umtx_pi_unref(pi); 2350 umtxq_unlock(&uq->uq_key); 2351 2352 umtx_key_release(&uq->uq_key); 2353 return (error); 2354 } 2355 2356 /* 2357 * Unlock a PI mutex. 2358 */ 2359 static int 2360 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2361 { 2362 struct umtx_key key; 2363 struct umtx_q *uq_first, *uq_first2, *uq_me; 2364 struct umtx_pi *pi, *pi2; 2365 uint32_t id, new_owner, old, owner; 2366 int count, error, pri; 2367 2368 id = td->td_tid; 2369 2370 usrloop: 2371 /* 2372 * Make sure we own this mtx. 2373 */ 2374 error = fueword32(&m->m_owner, &owner); 2375 if (error == -1) 2376 return (EFAULT); 2377 2378 if ((owner & ~UMUTEX_CONTESTED) != id) 2379 return (EPERM); 2380 2381 new_owner = umtx_unlock_val(flags, rb); 2382 2383 /* This should be done in userland */ 2384 if ((owner & UMUTEX_CONTESTED) == 0) { 2385 error = casueword32(&m->m_owner, owner, &old, new_owner); 2386 if (error == -1) 2387 return (EFAULT); 2388 if (error == 1) { 2389 error = thread_check_susp(td, true); 2390 if (error != 0) 2391 return (error); 2392 goto usrloop; 2393 } 2394 if (old == owner) 2395 return (0); 2396 owner = old; 2397 } 2398 2399 /* We should only ever be in here for contested locks */ 2400 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2401 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2402 &key)) != 0) 2403 return (error); 2404 2405 umtxq_lock(&key); 2406 umtxq_busy(&key); 2407 count = umtxq_count_pi(&key, &uq_first); 2408 if (uq_first != NULL) { 2409 mtx_lock(&umtx_lock); 2410 pi = uq_first->uq_pi_blocked; 2411 KASSERT(pi != NULL, ("pi == NULL?")); 2412 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2413 mtx_unlock(&umtx_lock); 2414 umtxq_unbusy(&key); 2415 umtxq_unlock(&key); 2416 umtx_key_release(&key); 2417 /* userland messed the mutex */ 2418 return (EPERM); 2419 } 2420 uq_me = td->td_umtxq; 2421 if (pi->pi_owner == td) 2422 umtx_pi_disown(pi); 2423 /* get highest priority thread which is still sleeping. */ 2424 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2425 while (uq_first != NULL && 2426 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2427 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2428 } 2429 pri = PRI_MAX; 2430 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2431 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2432 if (uq_first2 != NULL) { 2433 if (pri > UPRI(uq_first2->uq_thread)) 2434 pri = UPRI(uq_first2->uq_thread); 2435 } 2436 } 2437 thread_lock(td); 2438 sched_lend_user_prio(td, pri); 2439 thread_unlock(td); 2440 mtx_unlock(&umtx_lock); 2441 if (uq_first) 2442 umtxq_signal_thread(uq_first); 2443 } else { 2444 pi = umtx_pi_lookup(&key); 2445 /* 2446 * A umtx_pi can exist if a signal or timeout removed the 2447 * last waiter from the umtxq, but there is still 2448 * a thread in do_lock_pi() holding the umtx_pi. 2449 */ 2450 if (pi != NULL) { 2451 /* 2452 * The umtx_pi can be unowned, such as when a thread 2453 * has just entered do_lock_pi(), allocated the 2454 * umtx_pi, and unlocked the umtxq. 2455 * If the current thread owns it, it must disown it. 2456 */ 2457 mtx_lock(&umtx_lock); 2458 if (pi->pi_owner == td) 2459 umtx_pi_disown(pi); 2460 mtx_unlock(&umtx_lock); 2461 } 2462 } 2463 umtxq_unlock(&key); 2464 2465 /* 2466 * When unlocking the umtx, it must be marked as unowned if 2467 * there is zero or one thread only waiting for it. 2468 * Otherwise, it must be marked as contested. 2469 */ 2470 2471 if (count > 1) 2472 new_owner |= UMUTEX_CONTESTED; 2473 again: 2474 error = casueword32(&m->m_owner, owner, &old, new_owner); 2475 if (error == 1) { 2476 error = thread_check_susp(td, false); 2477 if (error == 0) 2478 goto again; 2479 } 2480 umtxq_unbusy_unlocked(&key); 2481 umtx_key_release(&key); 2482 if (error == -1) 2483 return (EFAULT); 2484 if (error == 0 && old != owner) 2485 return (EINVAL); 2486 return (error); 2487 } 2488 2489 /* 2490 * Lock a PP mutex. 2491 */ 2492 static int 2493 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2494 struct _umtx_time *timeout, int try) 2495 { 2496 struct abs_timeout timo; 2497 struct umtx_q *uq, *uq2; 2498 struct umtx_pi *pi; 2499 uint32_t ceiling; 2500 uint32_t owner, id; 2501 int error, pri, old_inherited_pri, su, rv; 2502 2503 id = td->td_tid; 2504 uq = td->td_umtxq; 2505 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2506 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2507 &uq->uq_key)) != 0) 2508 return (error); 2509 2510 if (timeout != NULL) 2511 abs_timeout_init2(&timo, timeout); 2512 2513 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2514 for (;;) { 2515 old_inherited_pri = uq->uq_inherited_pri; 2516 umtxq_lock(&uq->uq_key); 2517 umtxq_busy(&uq->uq_key); 2518 umtxq_unlock(&uq->uq_key); 2519 2520 rv = fueword32(&m->m_ceilings[0], &ceiling); 2521 if (rv == -1) { 2522 error = EFAULT; 2523 goto out; 2524 } 2525 ceiling = RTP_PRIO_MAX - ceiling; 2526 if (ceiling > RTP_PRIO_MAX) { 2527 error = EINVAL; 2528 goto out; 2529 } 2530 2531 mtx_lock(&umtx_lock); 2532 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2533 mtx_unlock(&umtx_lock); 2534 error = EINVAL; 2535 goto out; 2536 } 2537 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2538 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2539 thread_lock(td); 2540 if (uq->uq_inherited_pri < UPRI(td)) 2541 sched_lend_user_prio(td, uq->uq_inherited_pri); 2542 thread_unlock(td); 2543 } 2544 mtx_unlock(&umtx_lock); 2545 2546 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2547 id | UMUTEX_CONTESTED); 2548 /* The address was invalid. */ 2549 if (rv == -1) { 2550 error = EFAULT; 2551 break; 2552 } 2553 if (rv == 0) { 2554 MPASS(owner == UMUTEX_CONTESTED); 2555 error = 0; 2556 break; 2557 } 2558 /* rv == 1 */ 2559 if (owner == UMUTEX_RB_OWNERDEAD) { 2560 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2561 &owner, id | UMUTEX_CONTESTED); 2562 if (rv == -1) { 2563 error = EFAULT; 2564 break; 2565 } 2566 if (rv == 0) { 2567 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2568 error = EOWNERDEAD; /* success */ 2569 break; 2570 } 2571 2572 /* 2573 * rv == 1, only check for suspension if we 2574 * did not already catched a signal. If we 2575 * get an error from the check, the same 2576 * condition is checked by the umtxq_sleep() 2577 * call below, so we should obliterate the 2578 * error to not skip the last loop iteration. 2579 */ 2580 if (error == 0) { 2581 error = thread_check_susp(td, false); 2582 if (error == 0) { 2583 if (try != 0) 2584 error = EBUSY; 2585 else 2586 continue; 2587 } 2588 error = 0; 2589 } 2590 } else if (owner == UMUTEX_RB_NOTRECOV) { 2591 error = ENOTRECOVERABLE; 2592 } 2593 2594 if (try != 0) 2595 error = EBUSY; 2596 2597 /* 2598 * If we caught a signal, we have retried and now 2599 * exit immediately. 2600 */ 2601 if (error != 0) 2602 break; 2603 2604 umtxq_lock(&uq->uq_key); 2605 umtxq_insert(uq); 2606 umtxq_unbusy(&uq->uq_key); 2607 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2608 NULL : &timo); 2609 umtxq_remove(uq); 2610 umtxq_unlock(&uq->uq_key); 2611 2612 mtx_lock(&umtx_lock); 2613 uq->uq_inherited_pri = old_inherited_pri; 2614 pri = PRI_MAX; 2615 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2616 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2617 if (uq2 != NULL) { 2618 if (pri > UPRI(uq2->uq_thread)) 2619 pri = UPRI(uq2->uq_thread); 2620 } 2621 } 2622 if (pri > uq->uq_inherited_pri) 2623 pri = uq->uq_inherited_pri; 2624 thread_lock(td); 2625 sched_lend_user_prio(td, pri); 2626 thread_unlock(td); 2627 mtx_unlock(&umtx_lock); 2628 } 2629 2630 if (error != 0 && error != EOWNERDEAD) { 2631 mtx_lock(&umtx_lock); 2632 uq->uq_inherited_pri = old_inherited_pri; 2633 pri = PRI_MAX; 2634 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2635 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2636 if (uq2 != NULL) { 2637 if (pri > UPRI(uq2->uq_thread)) 2638 pri = UPRI(uq2->uq_thread); 2639 } 2640 } 2641 if (pri > uq->uq_inherited_pri) 2642 pri = uq->uq_inherited_pri; 2643 thread_lock(td); 2644 sched_lend_user_prio(td, pri); 2645 thread_unlock(td); 2646 mtx_unlock(&umtx_lock); 2647 } 2648 2649 out: 2650 umtxq_unbusy_unlocked(&uq->uq_key); 2651 umtx_key_release(&uq->uq_key); 2652 return (error); 2653 } 2654 2655 /* 2656 * Unlock a PP mutex. 2657 */ 2658 static int 2659 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2660 { 2661 struct umtx_key key; 2662 struct umtx_q *uq, *uq2; 2663 struct umtx_pi *pi; 2664 uint32_t id, owner, rceiling; 2665 int error, pri, new_inherited_pri, su; 2666 2667 id = td->td_tid; 2668 uq = td->td_umtxq; 2669 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2670 2671 /* 2672 * Make sure we own this mtx. 2673 */ 2674 error = fueword32(&m->m_owner, &owner); 2675 if (error == -1) 2676 return (EFAULT); 2677 2678 if ((owner & ~UMUTEX_CONTESTED) != id) 2679 return (EPERM); 2680 2681 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2682 if (error != 0) 2683 return (error); 2684 2685 if (rceiling == -1) 2686 new_inherited_pri = PRI_MAX; 2687 else { 2688 rceiling = RTP_PRIO_MAX - rceiling; 2689 if (rceiling > RTP_PRIO_MAX) 2690 return (EINVAL); 2691 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2692 } 2693 2694 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2695 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2696 &key)) != 0) 2697 return (error); 2698 umtxq_lock(&key); 2699 umtxq_busy(&key); 2700 umtxq_unlock(&key); 2701 /* 2702 * For priority protected mutex, always set unlocked state 2703 * to UMUTEX_CONTESTED, so that userland always enters kernel 2704 * to lock the mutex, it is necessary because thread priority 2705 * has to be adjusted for such mutex. 2706 */ 2707 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2708 UMUTEX_CONTESTED); 2709 2710 umtxq_lock(&key); 2711 if (error == 0) 2712 umtxq_signal(&key, 1); 2713 umtxq_unbusy(&key); 2714 umtxq_unlock(&key); 2715 2716 if (error == -1) 2717 error = EFAULT; 2718 else { 2719 mtx_lock(&umtx_lock); 2720 if (su != 0) 2721 uq->uq_inherited_pri = new_inherited_pri; 2722 pri = PRI_MAX; 2723 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2724 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2725 if (uq2 != NULL) { 2726 if (pri > UPRI(uq2->uq_thread)) 2727 pri = UPRI(uq2->uq_thread); 2728 } 2729 } 2730 if (pri > uq->uq_inherited_pri) 2731 pri = uq->uq_inherited_pri; 2732 thread_lock(td); 2733 sched_lend_user_prio(td, pri); 2734 thread_unlock(td); 2735 mtx_unlock(&umtx_lock); 2736 } 2737 umtx_key_release(&key); 2738 return (error); 2739 } 2740 2741 static int 2742 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2743 uint32_t *old_ceiling) 2744 { 2745 struct umtx_q *uq; 2746 uint32_t flags, id, owner, save_ceiling; 2747 int error, rv, rv1; 2748 2749 error = fueword32(&m->m_flags, &flags); 2750 if (error == -1) 2751 return (EFAULT); 2752 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2753 return (EINVAL); 2754 if (ceiling > RTP_PRIO_MAX) 2755 return (EINVAL); 2756 id = td->td_tid; 2757 uq = td->td_umtxq; 2758 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2759 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2760 &uq->uq_key)) != 0) 2761 return (error); 2762 for (;;) { 2763 umtxq_lock(&uq->uq_key); 2764 umtxq_busy(&uq->uq_key); 2765 umtxq_unlock(&uq->uq_key); 2766 2767 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2768 if (rv == -1) { 2769 error = EFAULT; 2770 break; 2771 } 2772 2773 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2774 id | UMUTEX_CONTESTED); 2775 if (rv == -1) { 2776 error = EFAULT; 2777 break; 2778 } 2779 2780 if (rv == 0) { 2781 MPASS(owner == UMUTEX_CONTESTED); 2782 rv = suword32(&m->m_ceilings[0], ceiling); 2783 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2784 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2785 break; 2786 } 2787 2788 if ((owner & ~UMUTEX_CONTESTED) == id) { 2789 rv = suword32(&m->m_ceilings[0], ceiling); 2790 error = rv == 0 ? 0 : EFAULT; 2791 break; 2792 } 2793 2794 if (owner == UMUTEX_RB_OWNERDEAD) { 2795 error = EOWNERDEAD; 2796 break; 2797 } else if (owner == UMUTEX_RB_NOTRECOV) { 2798 error = ENOTRECOVERABLE; 2799 break; 2800 } 2801 2802 /* 2803 * If we caught a signal, we have retried and now 2804 * exit immediately. 2805 */ 2806 if (error != 0) 2807 break; 2808 2809 /* 2810 * We set the contested bit, sleep. Otherwise the lock changed 2811 * and we need to retry or we lost a race to the thread 2812 * unlocking the umtx. 2813 */ 2814 umtxq_lock(&uq->uq_key); 2815 umtxq_insert(uq); 2816 umtxq_unbusy(&uq->uq_key); 2817 error = umtxq_sleep(uq, "umtxpp", NULL); 2818 umtxq_remove(uq); 2819 umtxq_unlock(&uq->uq_key); 2820 } 2821 umtxq_lock(&uq->uq_key); 2822 if (error == 0) 2823 umtxq_signal(&uq->uq_key, INT_MAX); 2824 umtxq_unbusy(&uq->uq_key); 2825 umtxq_unlock(&uq->uq_key); 2826 umtx_key_release(&uq->uq_key); 2827 if (error == 0 && old_ceiling != NULL) { 2828 rv = suword32(old_ceiling, save_ceiling); 2829 error = rv == 0 ? 0 : EFAULT; 2830 } 2831 return (error); 2832 } 2833 2834 /* 2835 * Lock a userland POSIX mutex. 2836 */ 2837 static int 2838 do_lock_umutex(struct thread *td, struct umutex *m, 2839 struct _umtx_time *timeout, int mode) 2840 { 2841 uint32_t flags; 2842 int error; 2843 2844 error = fueword32(&m->m_flags, &flags); 2845 if (error == -1) 2846 return (EFAULT); 2847 2848 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2849 case 0: 2850 error = do_lock_normal(td, m, flags, timeout, mode); 2851 break; 2852 case UMUTEX_PRIO_INHERIT: 2853 error = do_lock_pi(td, m, flags, timeout, mode); 2854 break; 2855 case UMUTEX_PRIO_PROTECT: 2856 error = do_lock_pp(td, m, flags, timeout, mode); 2857 break; 2858 default: 2859 return (EINVAL); 2860 } 2861 if (timeout == NULL) { 2862 if (error == EINTR && mode != _UMUTEX_WAIT) 2863 error = ERESTART; 2864 } else { 2865 /* Timed-locking is not restarted. */ 2866 if (error == ERESTART) 2867 error = EINTR; 2868 } 2869 return (error); 2870 } 2871 2872 /* 2873 * Unlock a userland POSIX mutex. 2874 */ 2875 static int 2876 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2877 { 2878 uint32_t flags; 2879 int error; 2880 2881 error = fueword32(&m->m_flags, &flags); 2882 if (error == -1) 2883 return (EFAULT); 2884 2885 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2886 case 0: 2887 return (do_unlock_normal(td, m, flags, rb)); 2888 case UMUTEX_PRIO_INHERIT: 2889 return (do_unlock_pi(td, m, flags, rb)); 2890 case UMUTEX_PRIO_PROTECT: 2891 return (do_unlock_pp(td, m, flags, rb)); 2892 } 2893 2894 return (EINVAL); 2895 } 2896 2897 static int 2898 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2899 struct timespec *timeout, u_long wflags) 2900 { 2901 struct abs_timeout timo; 2902 struct umtx_q *uq; 2903 uint32_t flags, clockid, hasw; 2904 int error; 2905 2906 uq = td->td_umtxq; 2907 error = fueword32(&cv->c_flags, &flags); 2908 if (error == -1) 2909 return (EFAULT); 2910 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2911 if (error != 0) 2912 return (error); 2913 2914 if ((wflags & CVWAIT_CLOCKID) != 0) { 2915 error = fueword32(&cv->c_clockid, &clockid); 2916 if (error == -1) { 2917 umtx_key_release(&uq->uq_key); 2918 return (EFAULT); 2919 } 2920 if (clockid < CLOCK_REALTIME || 2921 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2922 /* hmm, only HW clock id will work. */ 2923 umtx_key_release(&uq->uq_key); 2924 return (EINVAL); 2925 } 2926 } else { 2927 clockid = CLOCK_REALTIME; 2928 } 2929 2930 umtxq_lock(&uq->uq_key); 2931 umtxq_busy(&uq->uq_key); 2932 umtxq_insert(uq); 2933 umtxq_unlock(&uq->uq_key); 2934 2935 /* 2936 * Set c_has_waiters to 1 before releasing user mutex, also 2937 * don't modify cache line when unnecessary. 2938 */ 2939 error = fueword32(&cv->c_has_waiters, &hasw); 2940 if (error == 0 && hasw == 0) 2941 suword32(&cv->c_has_waiters, 1); 2942 2943 umtxq_unbusy_unlocked(&uq->uq_key); 2944 2945 error = do_unlock_umutex(td, m, false); 2946 2947 if (timeout != NULL) 2948 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2949 timeout); 2950 2951 umtxq_lock(&uq->uq_key); 2952 if (error == 0) { 2953 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2954 NULL : &timo); 2955 } 2956 2957 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2958 error = 0; 2959 else { 2960 /* 2961 * This must be timeout,interrupted by signal or 2962 * surprious wakeup, clear c_has_waiter flag when 2963 * necessary. 2964 */ 2965 umtxq_busy(&uq->uq_key); 2966 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2967 int oldlen = uq->uq_cur_queue->length; 2968 umtxq_remove(uq); 2969 if (oldlen == 1) { 2970 umtxq_unlock(&uq->uq_key); 2971 suword32(&cv->c_has_waiters, 0); 2972 umtxq_lock(&uq->uq_key); 2973 } 2974 } 2975 umtxq_unbusy(&uq->uq_key); 2976 if (error == ERESTART) 2977 error = EINTR; 2978 } 2979 2980 umtxq_unlock(&uq->uq_key); 2981 umtx_key_release(&uq->uq_key); 2982 return (error); 2983 } 2984 2985 /* 2986 * Signal a userland condition variable. 2987 */ 2988 static int 2989 do_cv_signal(struct thread *td, struct ucond *cv) 2990 { 2991 struct umtx_key key; 2992 int error, cnt, nwake; 2993 uint32_t flags; 2994 2995 error = fueword32(&cv->c_flags, &flags); 2996 if (error == -1) 2997 return (EFAULT); 2998 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2999 return (error); 3000 umtxq_lock(&key); 3001 umtxq_busy(&key); 3002 cnt = umtxq_count(&key); 3003 nwake = umtxq_signal(&key, 1); 3004 if (cnt <= nwake) { 3005 umtxq_unlock(&key); 3006 error = suword32(&cv->c_has_waiters, 0); 3007 if (error == -1) 3008 error = EFAULT; 3009 umtxq_lock(&key); 3010 } 3011 umtxq_unbusy(&key); 3012 umtxq_unlock(&key); 3013 umtx_key_release(&key); 3014 return (error); 3015 } 3016 3017 static int 3018 do_cv_broadcast(struct thread *td, struct ucond *cv) 3019 { 3020 struct umtx_key key; 3021 int error; 3022 uint32_t flags; 3023 3024 error = fueword32(&cv->c_flags, &flags); 3025 if (error == -1) 3026 return (EFAULT); 3027 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 3028 return (error); 3029 3030 umtxq_lock(&key); 3031 umtxq_busy(&key); 3032 umtxq_signal(&key, INT_MAX); 3033 umtxq_unlock(&key); 3034 3035 error = suword32(&cv->c_has_waiters, 0); 3036 if (error == -1) 3037 error = EFAULT; 3038 3039 umtxq_unbusy_unlocked(&key); 3040 3041 umtx_key_release(&key); 3042 return (error); 3043 } 3044 3045 static int 3046 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 3047 struct _umtx_time *timeout) 3048 { 3049 struct abs_timeout timo; 3050 struct umtx_q *uq; 3051 uint32_t flags, wrflags; 3052 int32_t state, oldstate; 3053 int32_t blocked_readers; 3054 int error, error1, rv; 3055 3056 uq = td->td_umtxq; 3057 error = fueword32(&rwlock->rw_flags, &flags); 3058 if (error == -1) 3059 return (EFAULT); 3060 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3061 if (error != 0) 3062 return (error); 3063 3064 if (timeout != NULL) 3065 abs_timeout_init2(&timo, timeout); 3066 3067 wrflags = URWLOCK_WRITE_OWNER; 3068 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 3069 wrflags |= URWLOCK_WRITE_WAITERS; 3070 3071 for (;;) { 3072 rv = fueword32(&rwlock->rw_state, &state); 3073 if (rv == -1) { 3074 umtx_key_release(&uq->uq_key); 3075 return (EFAULT); 3076 } 3077 3078 /* try to lock it */ 3079 while (!(state & wrflags)) { 3080 if (__predict_false(URWLOCK_READER_COUNT(state) == 3081 URWLOCK_MAX_READERS)) { 3082 umtx_key_release(&uq->uq_key); 3083 return (EAGAIN); 3084 } 3085 rv = casueword32(&rwlock->rw_state, state, 3086 &oldstate, state + 1); 3087 if (rv == -1) { 3088 umtx_key_release(&uq->uq_key); 3089 return (EFAULT); 3090 } 3091 if (rv == 0) { 3092 MPASS(oldstate == state); 3093 umtx_key_release(&uq->uq_key); 3094 return (0); 3095 } 3096 error = thread_check_susp(td, true); 3097 if (error != 0) 3098 break; 3099 state = oldstate; 3100 } 3101 3102 if (error) 3103 break; 3104 3105 /* grab monitor lock */ 3106 umtxq_lock(&uq->uq_key); 3107 umtxq_busy(&uq->uq_key); 3108 umtxq_unlock(&uq->uq_key); 3109 3110 /* 3111 * re-read the state, in case it changed between the try-lock above 3112 * and the check below 3113 */ 3114 rv = fueword32(&rwlock->rw_state, &state); 3115 if (rv == -1) 3116 error = EFAULT; 3117 3118 /* set read contention bit */ 3119 while (error == 0 && (state & wrflags) && 3120 !(state & URWLOCK_READ_WAITERS)) { 3121 rv = casueword32(&rwlock->rw_state, state, 3122 &oldstate, state | URWLOCK_READ_WAITERS); 3123 if (rv == -1) { 3124 error = EFAULT; 3125 break; 3126 } 3127 if (rv == 0) { 3128 MPASS(oldstate == state); 3129 goto sleep; 3130 } 3131 state = oldstate; 3132 error = thread_check_susp(td, false); 3133 if (error != 0) 3134 break; 3135 } 3136 if (error != 0) { 3137 umtxq_unbusy_unlocked(&uq->uq_key); 3138 break; 3139 } 3140 3141 /* state is changed while setting flags, restart */ 3142 if (!(state & wrflags)) { 3143 umtxq_unbusy_unlocked(&uq->uq_key); 3144 error = thread_check_susp(td, true); 3145 if (error != 0) 3146 break; 3147 continue; 3148 } 3149 3150 sleep: 3151 /* 3152 * Contention bit is set, before sleeping, increase 3153 * read waiter count. 3154 */ 3155 rv = fueword32(&rwlock->rw_blocked_readers, 3156 &blocked_readers); 3157 if (rv == -1) { 3158 umtxq_unbusy_unlocked(&uq->uq_key); 3159 error = EFAULT; 3160 break; 3161 } 3162 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 3163 3164 while (state & wrflags) { 3165 umtxq_lock(&uq->uq_key); 3166 umtxq_insert(uq); 3167 umtxq_unbusy(&uq->uq_key); 3168 3169 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 3170 NULL : &timo); 3171 3172 umtxq_busy(&uq->uq_key); 3173 umtxq_remove(uq); 3174 umtxq_unlock(&uq->uq_key); 3175 if (error) 3176 break; 3177 rv = fueword32(&rwlock->rw_state, &state); 3178 if (rv == -1) { 3179 error = EFAULT; 3180 break; 3181 } 3182 } 3183 3184 /* decrease read waiter count, and may clear read contention bit */ 3185 rv = fueword32(&rwlock->rw_blocked_readers, 3186 &blocked_readers); 3187 if (rv == -1) { 3188 umtxq_unbusy_unlocked(&uq->uq_key); 3189 error = EFAULT; 3190 break; 3191 } 3192 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 3193 if (blocked_readers == 1) { 3194 rv = fueword32(&rwlock->rw_state, &state); 3195 if (rv == -1) { 3196 umtxq_unbusy_unlocked(&uq->uq_key); 3197 error = EFAULT; 3198 break; 3199 } 3200 for (;;) { 3201 rv = casueword32(&rwlock->rw_state, state, 3202 &oldstate, state & ~URWLOCK_READ_WAITERS); 3203 if (rv == -1) { 3204 error = EFAULT; 3205 break; 3206 } 3207 if (rv == 0) { 3208 MPASS(oldstate == state); 3209 break; 3210 } 3211 state = oldstate; 3212 error1 = thread_check_susp(td, false); 3213 if (error1 != 0) { 3214 if (error == 0) 3215 error = error1; 3216 break; 3217 } 3218 } 3219 } 3220 3221 umtxq_unbusy_unlocked(&uq->uq_key); 3222 if (error != 0) 3223 break; 3224 } 3225 umtx_key_release(&uq->uq_key); 3226 if (error == ERESTART) 3227 error = EINTR; 3228 return (error); 3229 } 3230 3231 static int 3232 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 3233 { 3234 struct abs_timeout timo; 3235 struct umtx_q *uq; 3236 uint32_t flags; 3237 int32_t state, oldstate; 3238 int32_t blocked_writers; 3239 int32_t blocked_readers; 3240 int error, error1, rv; 3241 3242 uq = td->td_umtxq; 3243 error = fueword32(&rwlock->rw_flags, &flags); 3244 if (error == -1) 3245 return (EFAULT); 3246 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3247 if (error != 0) 3248 return (error); 3249 3250 if (timeout != NULL) 3251 abs_timeout_init2(&timo, timeout); 3252 3253 blocked_readers = 0; 3254 for (;;) { 3255 rv = fueword32(&rwlock->rw_state, &state); 3256 if (rv == -1) { 3257 umtx_key_release(&uq->uq_key); 3258 return (EFAULT); 3259 } 3260 while ((state & URWLOCK_WRITE_OWNER) == 0 && 3261 URWLOCK_READER_COUNT(state) == 0) { 3262 rv = casueword32(&rwlock->rw_state, state, 3263 &oldstate, state | URWLOCK_WRITE_OWNER); 3264 if (rv == -1) { 3265 umtx_key_release(&uq->uq_key); 3266 return (EFAULT); 3267 } 3268 if (rv == 0) { 3269 MPASS(oldstate == state); 3270 umtx_key_release(&uq->uq_key); 3271 return (0); 3272 } 3273 state = oldstate; 3274 error = thread_check_susp(td, true); 3275 if (error != 0) 3276 break; 3277 } 3278 3279 if (error) { 3280 if ((state & (URWLOCK_WRITE_OWNER | 3281 URWLOCK_WRITE_WAITERS)) == 0 && 3282 blocked_readers != 0) { 3283 umtxq_lock(&uq->uq_key); 3284 umtxq_busy(&uq->uq_key); 3285 umtxq_signal_queue(&uq->uq_key, INT_MAX, 3286 UMTX_SHARED_QUEUE); 3287 umtxq_unbusy(&uq->uq_key); 3288 umtxq_unlock(&uq->uq_key); 3289 } 3290 3291 break; 3292 } 3293 3294 /* grab monitor lock */ 3295 umtxq_lock(&uq->uq_key); 3296 umtxq_busy(&uq->uq_key); 3297 umtxq_unlock(&uq->uq_key); 3298 3299 /* 3300 * Re-read the state, in case it changed between the 3301 * try-lock above and the check below. 3302 */ 3303 rv = fueword32(&rwlock->rw_state, &state); 3304 if (rv == -1) 3305 error = EFAULT; 3306 3307 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 3308 URWLOCK_READER_COUNT(state) != 0) && 3309 (state & URWLOCK_WRITE_WAITERS) == 0) { 3310 rv = casueword32(&rwlock->rw_state, state, 3311 &oldstate, state | URWLOCK_WRITE_WAITERS); 3312 if (rv == -1) { 3313 error = EFAULT; 3314 break; 3315 } 3316 if (rv == 0) { 3317 MPASS(oldstate == state); 3318 goto sleep; 3319 } 3320 state = oldstate; 3321 error = thread_check_susp(td, false); 3322 if (error != 0) 3323 break; 3324 } 3325 if (error != 0) { 3326 umtxq_unbusy_unlocked(&uq->uq_key); 3327 break; 3328 } 3329 3330 if ((state & URWLOCK_WRITE_OWNER) == 0 && 3331 URWLOCK_READER_COUNT(state) == 0) { 3332 umtxq_unbusy_unlocked(&uq->uq_key); 3333 error = thread_check_susp(td, false); 3334 if (error != 0) 3335 break; 3336 continue; 3337 } 3338 sleep: 3339 rv = fueword32(&rwlock->rw_blocked_writers, 3340 &blocked_writers); 3341 if (rv == -1) { 3342 umtxq_unbusy_unlocked(&uq->uq_key); 3343 error = EFAULT; 3344 break; 3345 } 3346 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 3347 3348 while ((state & URWLOCK_WRITE_OWNER) || 3349 URWLOCK_READER_COUNT(state) != 0) { 3350 umtxq_lock(&uq->uq_key); 3351 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3352 umtxq_unbusy(&uq->uq_key); 3353 3354 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 3355 NULL : &timo); 3356 3357 umtxq_busy(&uq->uq_key); 3358 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 3359 umtxq_unlock(&uq->uq_key); 3360 if (error) 3361 break; 3362 rv = fueword32(&rwlock->rw_state, &state); 3363 if (rv == -1) { 3364 error = EFAULT; 3365 break; 3366 } 3367 } 3368 3369 rv = fueword32(&rwlock->rw_blocked_writers, 3370 &blocked_writers); 3371 if (rv == -1) { 3372 umtxq_unbusy_unlocked(&uq->uq_key); 3373 error = EFAULT; 3374 break; 3375 } 3376 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3377 if (blocked_writers == 1) { 3378 rv = fueword32(&rwlock->rw_state, &state); 3379 if (rv == -1) { 3380 umtxq_unbusy_unlocked(&uq->uq_key); 3381 error = EFAULT; 3382 break; 3383 } 3384 for (;;) { 3385 rv = casueword32(&rwlock->rw_state, state, 3386 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3387 if (rv == -1) { 3388 error = EFAULT; 3389 break; 3390 } 3391 if (rv == 0) { 3392 MPASS(oldstate == state); 3393 break; 3394 } 3395 state = oldstate; 3396 error1 = thread_check_susp(td, false); 3397 /* 3398 * We are leaving the URWLOCK_WRITE_WAITERS 3399 * behind, but this should not harm the 3400 * correctness. 3401 */ 3402 if (error1 != 0) { 3403 if (error == 0) 3404 error = error1; 3405 break; 3406 } 3407 } 3408 rv = fueword32(&rwlock->rw_blocked_readers, 3409 &blocked_readers); 3410 if (rv == -1) { 3411 umtxq_unbusy_unlocked(&uq->uq_key); 3412 error = EFAULT; 3413 break; 3414 } 3415 } else 3416 blocked_readers = 0; 3417 3418 umtxq_unbusy_unlocked(&uq->uq_key); 3419 } 3420 3421 umtx_key_release(&uq->uq_key); 3422 if (error == ERESTART) 3423 error = EINTR; 3424 return (error); 3425 } 3426 3427 static int 3428 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3429 { 3430 struct umtx_q *uq; 3431 uint32_t flags; 3432 int32_t state, oldstate; 3433 int error, rv, q, count; 3434 3435 uq = td->td_umtxq; 3436 error = fueword32(&rwlock->rw_flags, &flags); 3437 if (error == -1) 3438 return (EFAULT); 3439 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3440 if (error != 0) 3441 return (error); 3442 3443 error = fueword32(&rwlock->rw_state, &state); 3444 if (error == -1) { 3445 error = EFAULT; 3446 goto out; 3447 } 3448 if (state & URWLOCK_WRITE_OWNER) { 3449 for (;;) { 3450 rv = casueword32(&rwlock->rw_state, state, 3451 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3452 if (rv == -1) { 3453 error = EFAULT; 3454 goto out; 3455 } 3456 if (rv == 1) { 3457 state = oldstate; 3458 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3459 error = EPERM; 3460 goto out; 3461 } 3462 error = thread_check_susp(td, true); 3463 if (error != 0) 3464 goto out; 3465 } else 3466 break; 3467 } 3468 } else if (URWLOCK_READER_COUNT(state) != 0) { 3469 for (;;) { 3470 rv = casueword32(&rwlock->rw_state, state, 3471 &oldstate, state - 1); 3472 if (rv == -1) { 3473 error = EFAULT; 3474 goto out; 3475 } 3476 if (rv == 1) { 3477 state = oldstate; 3478 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3479 error = EPERM; 3480 goto out; 3481 } 3482 error = thread_check_susp(td, true); 3483 if (error != 0) 3484 goto out; 3485 } else 3486 break; 3487 } 3488 } else { 3489 error = EPERM; 3490 goto out; 3491 } 3492 3493 count = 0; 3494 3495 if (!(flags & URWLOCK_PREFER_READER)) { 3496 if (state & URWLOCK_WRITE_WAITERS) { 3497 count = 1; 3498 q = UMTX_EXCLUSIVE_QUEUE; 3499 } else if (state & URWLOCK_READ_WAITERS) { 3500 count = INT_MAX; 3501 q = UMTX_SHARED_QUEUE; 3502 } 3503 } else { 3504 if (state & URWLOCK_READ_WAITERS) { 3505 count = INT_MAX; 3506 q = UMTX_SHARED_QUEUE; 3507 } else if (state & URWLOCK_WRITE_WAITERS) { 3508 count = 1; 3509 q = UMTX_EXCLUSIVE_QUEUE; 3510 } 3511 } 3512 3513 if (count) { 3514 umtxq_lock(&uq->uq_key); 3515 umtxq_busy(&uq->uq_key); 3516 umtxq_signal_queue(&uq->uq_key, count, q); 3517 umtxq_unbusy(&uq->uq_key); 3518 umtxq_unlock(&uq->uq_key); 3519 } 3520 out: 3521 umtx_key_release(&uq->uq_key); 3522 return (error); 3523 } 3524 3525 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3526 static int 3527 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3528 { 3529 struct abs_timeout timo; 3530 struct umtx_q *uq; 3531 uint32_t flags, count, count1; 3532 int error, rv, rv1; 3533 3534 uq = td->td_umtxq; 3535 error = fueword32(&sem->_flags, &flags); 3536 if (error == -1) 3537 return (EFAULT); 3538 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3539 if (error != 0) 3540 return (error); 3541 3542 if (timeout != NULL) 3543 abs_timeout_init2(&timo, timeout); 3544 3545 again: 3546 umtxq_lock(&uq->uq_key); 3547 umtxq_busy(&uq->uq_key); 3548 umtxq_insert(uq); 3549 umtxq_unlock(&uq->uq_key); 3550 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3551 if (rv == 0) 3552 rv1 = fueword32(&sem->_count, &count); 3553 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3554 (rv == 1 && count1 == 0)) { 3555 umtxq_lock(&uq->uq_key); 3556 umtxq_unbusy(&uq->uq_key); 3557 umtxq_remove(uq); 3558 umtxq_unlock(&uq->uq_key); 3559 if (rv == 1) { 3560 rv = thread_check_susp(td, true); 3561 if (rv == 0) 3562 goto again; 3563 error = rv; 3564 goto out; 3565 } 3566 if (rv == 0) 3567 rv = rv1; 3568 error = rv == -1 ? EFAULT : 0; 3569 goto out; 3570 } 3571 umtxq_lock(&uq->uq_key); 3572 umtxq_unbusy(&uq->uq_key); 3573 3574 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3575 3576 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3577 error = 0; 3578 else { 3579 umtxq_remove(uq); 3580 /* A relative timeout cannot be restarted. */ 3581 if (error == ERESTART && timeout != NULL && 3582 (timeout->_flags & UMTX_ABSTIME) == 0) 3583 error = EINTR; 3584 } 3585 umtxq_unlock(&uq->uq_key); 3586 out: 3587 umtx_key_release(&uq->uq_key); 3588 return (error); 3589 } 3590 3591 /* 3592 * Signal a userland semaphore. 3593 */ 3594 static int 3595 do_sem_wake(struct thread *td, struct _usem *sem) 3596 { 3597 struct umtx_key key; 3598 int error, cnt; 3599 uint32_t flags; 3600 3601 error = fueword32(&sem->_flags, &flags); 3602 if (error == -1) 3603 return (EFAULT); 3604 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3605 return (error); 3606 umtxq_lock(&key); 3607 umtxq_busy(&key); 3608 cnt = umtxq_count(&key); 3609 if (cnt > 0) { 3610 /* 3611 * Check if count is greater than 0, this means the memory is 3612 * still being referenced by user code, so we can safely 3613 * update _has_waiters flag. 3614 */ 3615 if (cnt == 1) { 3616 umtxq_unlock(&key); 3617 error = suword32(&sem->_has_waiters, 0); 3618 umtxq_lock(&key); 3619 if (error == -1) 3620 error = EFAULT; 3621 } 3622 umtxq_signal(&key, 1); 3623 } 3624 umtxq_unbusy(&key); 3625 umtxq_unlock(&key); 3626 umtx_key_release(&key); 3627 return (error); 3628 } 3629 #endif 3630 3631 static int 3632 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3633 { 3634 struct abs_timeout timo; 3635 struct umtx_q *uq; 3636 uint32_t count, flags; 3637 int error, rv; 3638 3639 uq = td->td_umtxq; 3640 flags = fuword32(&sem->_flags); 3641 if (timeout != NULL) 3642 abs_timeout_init2(&timo, timeout); 3643 3644 again: 3645 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3646 if (error != 0) 3647 return (error); 3648 umtxq_lock(&uq->uq_key); 3649 umtxq_busy(&uq->uq_key); 3650 umtxq_insert(uq); 3651 umtxq_unlock(&uq->uq_key); 3652 rv = fueword32(&sem->_count, &count); 3653 if (rv == -1) { 3654 umtxq_lock(&uq->uq_key); 3655 umtxq_unbusy(&uq->uq_key); 3656 umtxq_remove(uq); 3657 umtxq_unlock(&uq->uq_key); 3658 umtx_key_release(&uq->uq_key); 3659 return (EFAULT); 3660 } 3661 for (;;) { 3662 if (USEM_COUNT(count) != 0) { 3663 umtxq_lock(&uq->uq_key); 3664 umtxq_unbusy(&uq->uq_key); 3665 umtxq_remove(uq); 3666 umtxq_unlock(&uq->uq_key); 3667 umtx_key_release(&uq->uq_key); 3668 return (0); 3669 } 3670 if (count == USEM_HAS_WAITERS) 3671 break; 3672 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3673 if (rv == 0) 3674 break; 3675 umtxq_lock(&uq->uq_key); 3676 umtxq_unbusy(&uq->uq_key); 3677 umtxq_remove(uq); 3678 umtxq_unlock(&uq->uq_key); 3679 umtx_key_release(&uq->uq_key); 3680 if (rv == -1) 3681 return (EFAULT); 3682 rv = thread_check_susp(td, true); 3683 if (rv != 0) 3684 return (rv); 3685 goto again; 3686 } 3687 umtxq_lock(&uq->uq_key); 3688 umtxq_unbusy(&uq->uq_key); 3689 3690 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3691 3692 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3693 error = 0; 3694 else { 3695 umtxq_remove(uq); 3696 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3697 /* A relative timeout cannot be restarted. */ 3698 if (error == ERESTART) 3699 error = EINTR; 3700 if (error == EINTR) { 3701 abs_timeout_update(&timo); 3702 timespecsub(&timo.end, &timo.cur, 3703 &timeout->_timeout); 3704 } 3705 } 3706 } 3707 umtxq_unlock(&uq->uq_key); 3708 umtx_key_release(&uq->uq_key); 3709 return (error); 3710 } 3711 3712 /* 3713 * Signal a userland semaphore. 3714 */ 3715 static int 3716 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3717 { 3718 struct umtx_key key; 3719 int error, cnt, rv; 3720 uint32_t count, flags; 3721 3722 rv = fueword32(&sem->_flags, &flags); 3723 if (rv == -1) 3724 return (EFAULT); 3725 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3726 return (error); 3727 umtxq_lock(&key); 3728 umtxq_busy(&key); 3729 cnt = umtxq_count(&key); 3730 if (cnt > 0) { 3731 /* 3732 * If this was the last sleeping thread, clear the waiters 3733 * flag in _count. 3734 */ 3735 if (cnt == 1) { 3736 umtxq_unlock(&key); 3737 rv = fueword32(&sem->_count, &count); 3738 while (rv != -1 && count & USEM_HAS_WAITERS) { 3739 rv = casueword32(&sem->_count, count, &count, 3740 count & ~USEM_HAS_WAITERS); 3741 if (rv == 1) { 3742 rv = thread_check_susp(td, true); 3743 if (rv != 0) 3744 break; 3745 } 3746 } 3747 if (rv == -1) 3748 error = EFAULT; 3749 else if (rv > 0) { 3750 error = rv; 3751 } 3752 umtxq_lock(&key); 3753 } 3754 3755 umtxq_signal(&key, 1); 3756 } 3757 umtxq_unbusy(&key); 3758 umtxq_unlock(&key); 3759 umtx_key_release(&key); 3760 return (error); 3761 } 3762 3763 #ifdef COMPAT_FREEBSD10 3764 int 3765 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap) 3766 { 3767 return (do_lock_umtx(td, uap->umtx, td->td_tid, 0)); 3768 } 3769 3770 int 3771 freebsd10__umtx_unlock(struct thread *td, 3772 struct freebsd10__umtx_unlock_args *uap) 3773 { 3774 return (do_unlock_umtx(td, uap->umtx, td->td_tid)); 3775 } 3776 #endif 3777 3778 inline int 3779 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3780 { 3781 int error; 3782 3783 error = copyin(uaddr, tsp, sizeof(*tsp)); 3784 if (error == 0) { 3785 if (tsp->tv_sec < 0 || 3786 tsp->tv_nsec >= 1000000000 || 3787 tsp->tv_nsec < 0) 3788 error = EINVAL; 3789 } 3790 return (error); 3791 } 3792 3793 static inline int 3794 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3795 { 3796 int error; 3797 3798 if (size <= sizeof(tp->_timeout)) { 3799 tp->_clockid = CLOCK_REALTIME; 3800 tp->_flags = 0; 3801 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3802 } else 3803 error = copyin(uaddr, tp, sizeof(*tp)); 3804 if (error != 0) 3805 return (error); 3806 if (tp->_timeout.tv_sec < 0 || 3807 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3808 return (EINVAL); 3809 return (0); 3810 } 3811 3812 static int 3813 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3814 struct umtx_robust_lists_params *rb) 3815 { 3816 3817 if (size > sizeof(*rb)) 3818 return (EINVAL); 3819 return (copyin(uaddr, rb, size)); 3820 } 3821 3822 static int 3823 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3824 { 3825 3826 /* 3827 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3828 * and we're only called if sz >= sizeof(timespec) as supplied in the 3829 * copyops. 3830 */ 3831 KASSERT(sz >= sizeof(*tsp), 3832 ("umtx_copyops specifies incorrect sizes")); 3833 3834 return (copyout(tsp, uaddr, sizeof(*tsp))); 3835 } 3836 3837 #ifdef COMPAT_FREEBSD10 3838 static int 3839 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap, 3840 const struct umtx_copyops *ops) 3841 { 3842 struct timespec *ts, timeout; 3843 int error; 3844 3845 /* Allow a null timespec (wait forever). */ 3846 if (uap->uaddr2 == NULL) 3847 ts = NULL; 3848 else { 3849 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3850 if (error != 0) 3851 return (error); 3852 ts = &timeout; 3853 } 3854 #ifdef COMPAT_FREEBSD32 3855 if (ops->compat32) 3856 return (do_lock_umtx32(td, uap->obj, uap->val, ts)); 3857 #endif 3858 return (do_lock_umtx(td, uap->obj, uap->val, ts)); 3859 } 3860 3861 static int 3862 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap, 3863 const struct umtx_copyops *ops) 3864 { 3865 #ifdef COMPAT_FREEBSD32 3866 if (ops->compat32) 3867 return (do_unlock_umtx32(td, uap->obj, uap->val)); 3868 #endif 3869 return (do_unlock_umtx(td, uap->obj, uap->val)); 3870 } 3871 #endif /* COMPAT_FREEBSD10 */ 3872 3873 #if !defined(COMPAT_FREEBSD10) 3874 static int 3875 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused, 3876 const struct umtx_copyops *ops __unused) 3877 { 3878 return (EOPNOTSUPP); 3879 } 3880 #endif /* COMPAT_FREEBSD10 */ 3881 3882 static int 3883 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3884 const struct umtx_copyops *ops) 3885 { 3886 struct _umtx_time timeout, *tm_p; 3887 int error; 3888 3889 if (uap->uaddr2 == NULL) 3890 tm_p = NULL; 3891 else { 3892 error = ops->copyin_umtx_time( 3893 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3894 if (error != 0) 3895 return (error); 3896 tm_p = &timeout; 3897 } 3898 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3899 } 3900 3901 static int 3902 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3903 const struct umtx_copyops *ops) 3904 { 3905 struct _umtx_time timeout, *tm_p; 3906 int error; 3907 3908 if (uap->uaddr2 == NULL) 3909 tm_p = NULL; 3910 else { 3911 error = ops->copyin_umtx_time( 3912 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3913 if (error != 0) 3914 return (error); 3915 tm_p = &timeout; 3916 } 3917 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3918 } 3919 3920 static int 3921 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3922 const struct umtx_copyops *ops) 3923 { 3924 struct _umtx_time *tm_p, timeout; 3925 int error; 3926 3927 if (uap->uaddr2 == NULL) 3928 tm_p = NULL; 3929 else { 3930 error = ops->copyin_umtx_time( 3931 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3932 if (error != 0) 3933 return (error); 3934 tm_p = &timeout; 3935 } 3936 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3937 } 3938 3939 static int 3940 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3941 const struct umtx_copyops *ops __unused) 3942 { 3943 3944 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3945 } 3946 3947 #define BATCH_SIZE 128 3948 static int 3949 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3950 { 3951 char *uaddrs[BATCH_SIZE], **upp; 3952 int count, error, i, pos, tocopy; 3953 3954 upp = (char **)uap->obj; 3955 error = 0; 3956 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3957 pos += tocopy) { 3958 tocopy = MIN(count, BATCH_SIZE); 3959 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3960 if (error != 0) 3961 break; 3962 for (i = 0; i < tocopy; ++i) { 3963 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3964 } 3965 maybe_yield(); 3966 } 3967 return (error); 3968 } 3969 3970 static int 3971 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3972 { 3973 uint32_t uaddrs[BATCH_SIZE], *upp; 3974 int count, error, i, pos, tocopy; 3975 3976 upp = (uint32_t *)uap->obj; 3977 error = 0; 3978 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3979 pos += tocopy) { 3980 tocopy = MIN(count, BATCH_SIZE); 3981 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3982 if (error != 0) 3983 break; 3984 for (i = 0; i < tocopy; ++i) { 3985 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3986 INT_MAX, 1); 3987 } 3988 maybe_yield(); 3989 } 3990 return (error); 3991 } 3992 3993 static int 3994 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3995 const struct umtx_copyops *ops) 3996 { 3997 3998 if (ops->compat32) 3999 return (__umtx_op_nwake_private_compat32(td, uap)); 4000 return (__umtx_op_nwake_private_native(td, uap)); 4001 } 4002 4003 static int 4004 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 4005 const struct umtx_copyops *ops __unused) 4006 { 4007 4008 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 4009 } 4010 4011 static int 4012 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 4013 const struct umtx_copyops *ops) 4014 { 4015 struct _umtx_time *tm_p, timeout; 4016 int error; 4017 4018 /* Allow a null timespec (wait forever). */ 4019 if (uap->uaddr2 == NULL) 4020 tm_p = NULL; 4021 else { 4022 error = ops->copyin_umtx_time( 4023 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4024 if (error != 0) 4025 return (error); 4026 tm_p = &timeout; 4027 } 4028 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4029 } 4030 4031 static int 4032 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 4033 const struct umtx_copyops *ops __unused) 4034 { 4035 4036 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 4037 } 4038 4039 static int 4040 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 4041 const struct umtx_copyops *ops) 4042 { 4043 struct _umtx_time *tm_p, timeout; 4044 int error; 4045 4046 /* Allow a null timespec (wait forever). */ 4047 if (uap->uaddr2 == NULL) 4048 tm_p = NULL; 4049 else { 4050 error = ops->copyin_umtx_time( 4051 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4052 if (error != 0) 4053 return (error); 4054 tm_p = &timeout; 4055 } 4056 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4057 } 4058 4059 static int 4060 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 4061 const struct umtx_copyops *ops __unused) 4062 { 4063 4064 return (do_wake_umutex(td, uap->obj)); 4065 } 4066 4067 static int 4068 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 4069 const struct umtx_copyops *ops __unused) 4070 { 4071 4072 return (do_unlock_umutex(td, uap->obj, false)); 4073 } 4074 4075 static int 4076 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 4077 const struct umtx_copyops *ops __unused) 4078 { 4079 4080 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 4081 } 4082 4083 static int 4084 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 4085 const struct umtx_copyops *ops) 4086 { 4087 struct timespec *ts, timeout; 4088 int error; 4089 4090 /* Allow a null timespec (wait forever). */ 4091 if (uap->uaddr2 == NULL) 4092 ts = NULL; 4093 else { 4094 error = ops->copyin_timeout(uap->uaddr2, &timeout); 4095 if (error != 0) 4096 return (error); 4097 ts = &timeout; 4098 } 4099 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4100 } 4101 4102 static int 4103 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 4104 const struct umtx_copyops *ops __unused) 4105 { 4106 4107 return (do_cv_signal(td, uap->obj)); 4108 } 4109 4110 static int 4111 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 4112 const struct umtx_copyops *ops __unused) 4113 { 4114 4115 return (do_cv_broadcast(td, uap->obj)); 4116 } 4117 4118 static int 4119 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 4120 const struct umtx_copyops *ops) 4121 { 4122 struct _umtx_time timeout; 4123 int error; 4124 4125 /* Allow a null timespec (wait forever). */ 4126 if (uap->uaddr2 == NULL) { 4127 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4128 } else { 4129 error = ops->copyin_umtx_time(uap->uaddr2, 4130 (size_t)uap->uaddr1, &timeout); 4131 if (error != 0) 4132 return (error); 4133 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4134 } 4135 return (error); 4136 } 4137 4138 static int 4139 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 4140 const struct umtx_copyops *ops) 4141 { 4142 struct _umtx_time timeout; 4143 int error; 4144 4145 /* Allow a null timespec (wait forever). */ 4146 if (uap->uaddr2 == NULL) { 4147 error = do_rw_wrlock(td, uap->obj, 0); 4148 } else { 4149 error = ops->copyin_umtx_time(uap->uaddr2, 4150 (size_t)uap->uaddr1, &timeout); 4151 if (error != 0) 4152 return (error); 4153 4154 error = do_rw_wrlock(td, uap->obj, &timeout); 4155 } 4156 return (error); 4157 } 4158 4159 static int 4160 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 4161 const struct umtx_copyops *ops __unused) 4162 { 4163 4164 return (do_rw_unlock(td, uap->obj)); 4165 } 4166 4167 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4168 static int 4169 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 4170 const struct umtx_copyops *ops) 4171 { 4172 struct _umtx_time *tm_p, timeout; 4173 int error; 4174 4175 /* Allow a null timespec (wait forever). */ 4176 if (uap->uaddr2 == NULL) 4177 tm_p = NULL; 4178 else { 4179 error = ops->copyin_umtx_time( 4180 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 4181 if (error != 0) 4182 return (error); 4183 tm_p = &timeout; 4184 } 4185 return (do_sem_wait(td, uap->obj, tm_p)); 4186 } 4187 4188 static int 4189 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 4190 const struct umtx_copyops *ops __unused) 4191 { 4192 4193 return (do_sem_wake(td, uap->obj)); 4194 } 4195 #endif 4196 4197 static int 4198 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 4199 const struct umtx_copyops *ops __unused) 4200 { 4201 4202 return (do_wake2_umutex(td, uap->obj, uap->val)); 4203 } 4204 4205 static int 4206 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 4207 const struct umtx_copyops *ops) 4208 { 4209 struct _umtx_time *tm_p, timeout; 4210 size_t uasize; 4211 int error; 4212 4213 /* Allow a null timespec (wait forever). */ 4214 if (uap->uaddr2 == NULL) { 4215 uasize = 0; 4216 tm_p = NULL; 4217 } else { 4218 uasize = (size_t)uap->uaddr1; 4219 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 4220 if (error != 0) 4221 return (error); 4222 tm_p = &timeout; 4223 } 4224 error = do_sem2_wait(td, uap->obj, tm_p); 4225 if (error == EINTR && uap->uaddr2 != NULL && 4226 (timeout._flags & UMTX_ABSTIME) == 0 && 4227 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 4228 error = ops->copyout_timeout( 4229 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 4230 uasize - ops->umtx_time_sz, &timeout._timeout); 4231 if (error == 0) { 4232 error = EINTR; 4233 } 4234 } 4235 4236 return (error); 4237 } 4238 4239 static int 4240 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 4241 const struct umtx_copyops *ops __unused) 4242 { 4243 4244 return (do_sem2_wake(td, uap->obj)); 4245 } 4246 4247 #define USHM_OBJ_UMTX(o) \ 4248 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 4249 4250 #define USHMF_REG_LINKED 0x0001 4251 #define USHMF_OBJ_LINKED 0x0002 4252 struct umtx_shm_reg { 4253 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 4254 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 4255 struct umtx_key ushm_key; 4256 struct ucred *ushm_cred; 4257 struct shmfd *ushm_obj; 4258 u_int ushm_refcnt; 4259 u_int ushm_flags; 4260 }; 4261 4262 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 4263 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 4264 4265 static uma_zone_t umtx_shm_reg_zone; 4266 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 4267 static struct mtx umtx_shm_lock; 4268 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 4269 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 4270 4271 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 4272 4273 static void 4274 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 4275 { 4276 struct umtx_shm_reg_head d; 4277 struct umtx_shm_reg *reg, *reg1; 4278 4279 TAILQ_INIT(&d); 4280 mtx_lock(&umtx_shm_lock); 4281 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 4282 mtx_unlock(&umtx_shm_lock); 4283 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 4284 TAILQ_REMOVE(&d, reg, ushm_reg_link); 4285 umtx_shm_free_reg(reg); 4286 } 4287 } 4288 4289 static struct task umtx_shm_reg_delfree_task = 4290 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 4291 4292 static struct umtx_shm_reg * 4293 umtx_shm_find_reg_locked(const struct umtx_key *key) 4294 { 4295 struct umtx_shm_reg *reg; 4296 struct umtx_shm_reg_head *reg_head; 4297 4298 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 4299 mtx_assert(&umtx_shm_lock, MA_OWNED); 4300 reg_head = &umtx_shm_registry[key->hash]; 4301 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 4302 KASSERT(reg->ushm_key.shared, 4303 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 4304 if (reg->ushm_key.info.shared.object == 4305 key->info.shared.object && 4306 reg->ushm_key.info.shared.offset == 4307 key->info.shared.offset) { 4308 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 4309 KASSERT(reg->ushm_refcnt > 0, 4310 ("reg %p refcnt 0 onlist", reg)); 4311 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 4312 ("reg %p not linked", reg)); 4313 reg->ushm_refcnt++; 4314 return (reg); 4315 } 4316 } 4317 return (NULL); 4318 } 4319 4320 static struct umtx_shm_reg * 4321 umtx_shm_find_reg(const struct umtx_key *key) 4322 { 4323 struct umtx_shm_reg *reg; 4324 4325 mtx_lock(&umtx_shm_lock); 4326 reg = umtx_shm_find_reg_locked(key); 4327 mtx_unlock(&umtx_shm_lock); 4328 return (reg); 4329 } 4330 4331 static void 4332 umtx_shm_free_reg(struct umtx_shm_reg *reg) 4333 { 4334 4335 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 4336 crfree(reg->ushm_cred); 4337 shm_drop(reg->ushm_obj); 4338 uma_zfree(umtx_shm_reg_zone, reg); 4339 } 4340 4341 static bool 4342 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 4343 { 4344 bool res; 4345 4346 mtx_assert(&umtx_shm_lock, MA_OWNED); 4347 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 4348 reg->ushm_refcnt--; 4349 res = reg->ushm_refcnt == 0; 4350 if (res || force) { 4351 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 4352 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 4353 reg, ushm_reg_link); 4354 reg->ushm_flags &= ~USHMF_REG_LINKED; 4355 } 4356 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 4357 LIST_REMOVE(reg, ushm_obj_link); 4358 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 4359 } 4360 } 4361 return (res); 4362 } 4363 4364 static void 4365 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 4366 { 4367 vm_object_t object; 4368 bool dofree; 4369 4370 if (force) { 4371 object = reg->ushm_obj->shm_object; 4372 VM_OBJECT_WLOCK(object); 4373 object->flags |= OBJ_UMTXDEAD; 4374 VM_OBJECT_WUNLOCK(object); 4375 } 4376 mtx_lock(&umtx_shm_lock); 4377 dofree = umtx_shm_unref_reg_locked(reg, force); 4378 mtx_unlock(&umtx_shm_lock); 4379 if (dofree) 4380 umtx_shm_free_reg(reg); 4381 } 4382 4383 void 4384 umtx_shm_object_init(vm_object_t object) 4385 { 4386 4387 LIST_INIT(USHM_OBJ_UMTX(object)); 4388 } 4389 4390 void 4391 umtx_shm_object_terminated(vm_object_t object) 4392 { 4393 struct umtx_shm_reg *reg, *reg1; 4394 bool dofree; 4395 4396 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 4397 return; 4398 4399 dofree = false; 4400 mtx_lock(&umtx_shm_lock); 4401 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 4402 if (umtx_shm_unref_reg_locked(reg, true)) { 4403 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 4404 ushm_reg_link); 4405 dofree = true; 4406 } 4407 } 4408 mtx_unlock(&umtx_shm_lock); 4409 if (dofree) 4410 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 4411 } 4412 4413 static int 4414 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4415 struct umtx_shm_reg **res) 4416 { 4417 struct umtx_shm_reg *reg, *reg1; 4418 struct ucred *cred; 4419 int error; 4420 4421 reg = umtx_shm_find_reg(key); 4422 if (reg != NULL) { 4423 *res = reg; 4424 return (0); 4425 } 4426 cred = td->td_ucred; 4427 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4428 return (ENOMEM); 4429 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4430 reg->ushm_refcnt = 1; 4431 bcopy(key, ®->ushm_key, sizeof(*key)); 4432 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4433 reg->ushm_cred = crhold(cred); 4434 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4435 if (error != 0) { 4436 umtx_shm_free_reg(reg); 4437 return (error); 4438 } 4439 mtx_lock(&umtx_shm_lock); 4440 reg1 = umtx_shm_find_reg_locked(key); 4441 if (reg1 != NULL) { 4442 mtx_unlock(&umtx_shm_lock); 4443 umtx_shm_free_reg(reg); 4444 *res = reg1; 4445 return (0); 4446 } 4447 reg->ushm_refcnt++; 4448 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4449 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4450 ushm_obj_link); 4451 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4452 mtx_unlock(&umtx_shm_lock); 4453 *res = reg; 4454 return (0); 4455 } 4456 4457 static int 4458 umtx_shm_alive(struct thread *td, void *addr) 4459 { 4460 vm_map_t map; 4461 vm_map_entry_t entry; 4462 vm_object_t object; 4463 vm_pindex_t pindex; 4464 vm_prot_t prot; 4465 int res, ret; 4466 boolean_t wired; 4467 4468 map = &td->td_proc->p_vmspace->vm_map; 4469 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4470 &object, &pindex, &prot, &wired); 4471 if (res != KERN_SUCCESS) 4472 return (EFAULT); 4473 if (object == NULL) 4474 ret = EINVAL; 4475 else 4476 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4477 vm_map_lookup_done(map, entry); 4478 return (ret); 4479 } 4480 4481 static void 4482 umtx_shm_init(void) 4483 { 4484 int i; 4485 4486 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4487 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4488 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4489 for (i = 0; i < nitems(umtx_shm_registry); i++) 4490 TAILQ_INIT(&umtx_shm_registry[i]); 4491 } 4492 4493 static int 4494 umtx_shm(struct thread *td, void *addr, u_int flags) 4495 { 4496 struct umtx_key key; 4497 struct umtx_shm_reg *reg; 4498 struct file *fp; 4499 int error, fd; 4500 4501 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4502 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4503 return (EINVAL); 4504 if ((flags & UMTX_SHM_ALIVE) != 0) 4505 return (umtx_shm_alive(td, addr)); 4506 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4507 if (error != 0) 4508 return (error); 4509 KASSERT(key.shared == 1, ("non-shared key")); 4510 if ((flags & UMTX_SHM_CREAT) != 0) { 4511 error = umtx_shm_create_reg(td, &key, ®); 4512 } else { 4513 reg = umtx_shm_find_reg(&key); 4514 if (reg == NULL) 4515 error = ESRCH; 4516 } 4517 umtx_key_release(&key); 4518 if (error != 0) 4519 return (error); 4520 KASSERT(reg != NULL, ("no reg")); 4521 if ((flags & UMTX_SHM_DESTROY) != 0) { 4522 umtx_shm_unref_reg(reg, true); 4523 } else { 4524 #if 0 4525 #ifdef MAC 4526 error = mac_posixshm_check_open(td->td_ucred, 4527 reg->ushm_obj, FFLAGS(O_RDWR)); 4528 if (error == 0) 4529 #endif 4530 error = shm_access(reg->ushm_obj, td->td_ucred, 4531 FFLAGS(O_RDWR)); 4532 if (error == 0) 4533 #endif 4534 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4535 if (error == 0) { 4536 shm_hold(reg->ushm_obj); 4537 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4538 &shm_ops); 4539 td->td_retval[0] = fd; 4540 fdrop(fp, td); 4541 } 4542 } 4543 umtx_shm_unref_reg(reg, false); 4544 return (error); 4545 } 4546 4547 static int 4548 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4549 const struct umtx_copyops *ops __unused) 4550 { 4551 4552 return (umtx_shm(td, uap->uaddr1, uap->val)); 4553 } 4554 4555 static int 4556 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4557 const struct umtx_copyops *ops) 4558 { 4559 struct umtx_robust_lists_params rb; 4560 int error; 4561 4562 if (ops->compat32) { 4563 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4564 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4565 td->td_rb_inact != 0)) 4566 return (EBUSY); 4567 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4568 return (EBUSY); 4569 } 4570 4571 bzero(&rb, sizeof(rb)); 4572 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4573 if (error != 0) 4574 return (error); 4575 4576 if (ops->compat32) 4577 td->td_pflags2 |= TDP2_COMPAT32RB; 4578 4579 td->td_rb_list = rb.robust_list_offset; 4580 td->td_rbp_list = rb.robust_priv_list_offset; 4581 td->td_rb_inact = rb.robust_inact_offset; 4582 return (0); 4583 } 4584 4585 #if defined(__i386__) || defined(__amd64__) 4586 /* 4587 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4588 * 32-bit time_t there. Other architectures just need the i386 definitions 4589 * along with their standard compat32. 4590 */ 4591 struct timespecx32 { 4592 int64_t tv_sec; 4593 int32_t tv_nsec; 4594 }; 4595 4596 struct umtx_timex32 { 4597 struct timespecx32 _timeout; 4598 uint32_t _flags; 4599 uint32_t _clockid; 4600 }; 4601 4602 #ifndef __i386__ 4603 #define timespeci386 timespec32 4604 #define umtx_timei386 umtx_time32 4605 #endif 4606 #else /* !__i386__ && !__amd64__ */ 4607 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4608 struct timespeci386 { 4609 int32_t tv_sec; 4610 int32_t tv_nsec; 4611 }; 4612 4613 struct umtx_timei386 { 4614 struct timespeci386 _timeout; 4615 uint32_t _flags; 4616 uint32_t _clockid; 4617 }; 4618 4619 #if defined(__LP64__) 4620 #define timespecx32 timespec32 4621 #define umtx_timex32 umtx_time32 4622 #endif 4623 #endif 4624 4625 static int 4626 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4627 struct umtx_robust_lists_params *rbp) 4628 { 4629 struct umtx_robust_lists_params_compat32 rb32; 4630 int error; 4631 4632 if (size > sizeof(rb32)) 4633 return (EINVAL); 4634 bzero(&rb32, sizeof(rb32)); 4635 error = copyin(uaddr, &rb32, size); 4636 if (error != 0) 4637 return (error); 4638 CP(rb32, *rbp, robust_list_offset); 4639 CP(rb32, *rbp, robust_priv_list_offset); 4640 CP(rb32, *rbp, robust_inact_offset); 4641 return (0); 4642 } 4643 4644 #ifndef __i386__ 4645 static inline int 4646 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4647 { 4648 struct timespeci386 ts32; 4649 int error; 4650 4651 error = copyin(uaddr, &ts32, sizeof(ts32)); 4652 if (error == 0) { 4653 if (ts32.tv_sec < 0 || 4654 ts32.tv_nsec >= 1000000000 || 4655 ts32.tv_nsec < 0) 4656 error = EINVAL; 4657 else { 4658 CP(ts32, *tsp, tv_sec); 4659 CP(ts32, *tsp, tv_nsec); 4660 } 4661 } 4662 return (error); 4663 } 4664 4665 static inline int 4666 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4667 { 4668 struct umtx_timei386 t32; 4669 int error; 4670 4671 t32._clockid = CLOCK_REALTIME; 4672 t32._flags = 0; 4673 if (size <= sizeof(t32._timeout)) 4674 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4675 else 4676 error = copyin(uaddr, &t32, sizeof(t32)); 4677 if (error != 0) 4678 return (error); 4679 if (t32._timeout.tv_sec < 0 || 4680 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4681 return (EINVAL); 4682 TS_CP(t32, *tp, _timeout); 4683 CP(t32, *tp, _flags); 4684 CP(t32, *tp, _clockid); 4685 return (0); 4686 } 4687 4688 static int 4689 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4690 { 4691 struct timespeci386 remain32 = { 4692 .tv_sec = tsp->tv_sec, 4693 .tv_nsec = tsp->tv_nsec, 4694 }; 4695 4696 /* 4697 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4698 * and we're only called if sz >= sizeof(timespec) as supplied in the 4699 * copyops. 4700 */ 4701 KASSERT(sz >= sizeof(remain32), 4702 ("umtx_copyops specifies incorrect sizes")); 4703 4704 return (copyout(&remain32, uaddr, sizeof(remain32))); 4705 } 4706 #endif /* !__i386__ */ 4707 4708 #if defined(__i386__) || defined(__LP64__) 4709 static inline int 4710 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4711 { 4712 struct timespecx32 ts32; 4713 int error; 4714 4715 error = copyin(uaddr, &ts32, sizeof(ts32)); 4716 if (error == 0) { 4717 if (ts32.tv_sec < 0 || 4718 ts32.tv_nsec >= 1000000000 || 4719 ts32.tv_nsec < 0) 4720 error = EINVAL; 4721 else { 4722 CP(ts32, *tsp, tv_sec); 4723 CP(ts32, *tsp, tv_nsec); 4724 } 4725 } 4726 return (error); 4727 } 4728 4729 static inline int 4730 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4731 { 4732 struct umtx_timex32 t32; 4733 int error; 4734 4735 t32._clockid = CLOCK_REALTIME; 4736 t32._flags = 0; 4737 if (size <= sizeof(t32._timeout)) 4738 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4739 else 4740 error = copyin(uaddr, &t32, sizeof(t32)); 4741 if (error != 0) 4742 return (error); 4743 if (t32._timeout.tv_sec < 0 || 4744 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4745 return (EINVAL); 4746 TS_CP(t32, *tp, _timeout); 4747 CP(t32, *tp, _flags); 4748 CP(t32, *tp, _clockid); 4749 return (0); 4750 } 4751 4752 static int 4753 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4754 { 4755 struct timespecx32 remain32 = { 4756 .tv_sec = tsp->tv_sec, 4757 .tv_nsec = tsp->tv_nsec, 4758 }; 4759 4760 /* 4761 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4762 * and we're only called if sz >= sizeof(timespec) as supplied in the 4763 * copyops. 4764 */ 4765 KASSERT(sz >= sizeof(remain32), 4766 ("umtx_copyops specifies incorrect sizes")); 4767 4768 return (copyout(&remain32, uaddr, sizeof(remain32))); 4769 } 4770 #endif /* __i386__ || __LP64__ */ 4771 4772 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4773 const struct umtx_copyops *umtx_ops); 4774 4775 static const _umtx_op_func op_table[] = { 4776 #ifdef COMPAT_FREEBSD10 4777 [UMTX_OP_LOCK] = __umtx_op_lock_umtx, 4778 [UMTX_OP_UNLOCK] = __umtx_op_unlock_umtx, 4779 #else 4780 [UMTX_OP_LOCK] = __umtx_op_unimpl, 4781 [UMTX_OP_UNLOCK] = __umtx_op_unimpl, 4782 #endif 4783 [UMTX_OP_WAIT] = __umtx_op_wait, 4784 [UMTX_OP_WAKE] = __umtx_op_wake, 4785 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4786 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4787 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4788 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4789 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4790 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4791 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4792 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4793 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4794 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4795 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4796 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4797 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4798 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4799 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4800 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4801 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4802 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4803 #else 4804 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4805 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4806 #endif 4807 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4808 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4809 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4810 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4811 [UMTX_OP_SHM] = __umtx_op_shm, 4812 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4813 }; 4814 4815 static const struct umtx_copyops umtx_native_ops = { 4816 .copyin_timeout = umtx_copyin_timeout, 4817 .copyin_umtx_time = umtx_copyin_umtx_time, 4818 .copyin_robust_lists = umtx_copyin_robust_lists, 4819 .copyout_timeout = umtx_copyout_timeout, 4820 .timespec_sz = sizeof(struct timespec), 4821 .umtx_time_sz = sizeof(struct _umtx_time), 4822 }; 4823 4824 #ifndef __i386__ 4825 static const struct umtx_copyops umtx_native_opsi386 = { 4826 .copyin_timeout = umtx_copyin_timeouti386, 4827 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4828 .copyin_robust_lists = umtx_copyin_robust_lists32, 4829 .copyout_timeout = umtx_copyout_timeouti386, 4830 .timespec_sz = sizeof(struct timespeci386), 4831 .umtx_time_sz = sizeof(struct umtx_timei386), 4832 .compat32 = true, 4833 }; 4834 #endif 4835 4836 #if defined(__i386__) || defined(__LP64__) 4837 /* i386 can emulate other 32-bit archs, too! */ 4838 static const struct umtx_copyops umtx_native_opsx32 = { 4839 .copyin_timeout = umtx_copyin_timeoutx32, 4840 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4841 .copyin_robust_lists = umtx_copyin_robust_lists32, 4842 .copyout_timeout = umtx_copyout_timeoutx32, 4843 .timespec_sz = sizeof(struct timespecx32), 4844 .umtx_time_sz = sizeof(struct umtx_timex32), 4845 .compat32 = true, 4846 }; 4847 4848 #ifdef COMPAT_FREEBSD32 4849 #ifdef __amd64__ 4850 #define umtx_native_ops32 umtx_native_opsi386 4851 #else 4852 #define umtx_native_ops32 umtx_native_opsx32 4853 #endif 4854 #endif /* COMPAT_FREEBSD32 */ 4855 #endif /* __i386__ || __LP64__ */ 4856 4857 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4858 4859 static int 4860 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4861 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4862 { 4863 struct _umtx_op_args uap = { 4864 .obj = obj, 4865 .op = op & ~UMTX_OP__FLAGS, 4866 .val = val, 4867 .uaddr1 = uaddr1, 4868 .uaddr2 = uaddr2 4869 }; 4870 4871 if ((uap.op >= nitems(op_table))) 4872 return (EINVAL); 4873 return ((*op_table[uap.op])(td, &uap, ops)); 4874 } 4875 4876 int 4877 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4878 { 4879 static const struct umtx_copyops *umtx_ops; 4880 4881 umtx_ops = &umtx_native_ops; 4882 #ifdef __LP64__ 4883 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4884 if ((uap->op & UMTX_OP__I386) != 0) 4885 umtx_ops = &umtx_native_opsi386; 4886 else 4887 umtx_ops = &umtx_native_opsx32; 4888 } 4889 #elif !defined(__i386__) 4890 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4891 if ((uap->op & UMTX_OP__I386) != 0) 4892 umtx_ops = &umtx_native_opsi386; 4893 #else 4894 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4895 if ((uap->op & UMTX_OP__32BIT) != 0) 4896 umtx_ops = &umtx_native_opsx32; 4897 #endif 4898 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4899 uap->uaddr2, umtx_ops)); 4900 } 4901 4902 #ifdef COMPAT_FREEBSD32 4903 #ifdef COMPAT_FREEBSD10 4904 int 4905 freebsd10_freebsd32_umtx_lock(struct thread *td, 4906 struct freebsd10_freebsd32_umtx_lock_args *uap) 4907 { 4908 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); 4909 } 4910 4911 int 4912 freebsd10_freebsd32_umtx_unlock(struct thread *td, 4913 struct freebsd10_freebsd32_umtx_unlock_args *uap) 4914 { 4915 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); 4916 } 4917 #endif /* COMPAT_FREEBSD10 */ 4918 4919 int 4920 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4921 { 4922 4923 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, 4924 uap->uaddr2, &umtx_native_ops32)); 4925 } 4926 #endif /* COMPAT_FREEBSD32 */ 4927 4928 void 4929 umtx_thread_init(struct thread *td) 4930 { 4931 4932 td->td_umtxq = umtxq_alloc(); 4933 td->td_umtxq->uq_thread = td; 4934 } 4935 4936 void 4937 umtx_thread_fini(struct thread *td) 4938 { 4939 4940 umtxq_free(td->td_umtxq); 4941 } 4942 4943 /* 4944 * It will be called when new thread is created, e.g fork(). 4945 */ 4946 void 4947 umtx_thread_alloc(struct thread *td) 4948 { 4949 struct umtx_q *uq; 4950 4951 uq = td->td_umtxq; 4952 uq->uq_inherited_pri = PRI_MAX; 4953 4954 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4955 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4956 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4957 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4958 } 4959 4960 /* 4961 * exec() hook. 4962 * 4963 * Clear robust lists for all process' threads, not delaying the 4964 * cleanup to thread exit, since the relevant address space is 4965 * destroyed right now. 4966 */ 4967 void 4968 umtx_exec(struct proc *p) 4969 { 4970 struct thread *td; 4971 4972 KASSERT(p == curproc, ("need curproc")); 4973 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4974 (p->p_flag & P_STOPPED_SINGLE) != 0, 4975 ("curproc must be single-threaded")); 4976 /* 4977 * There is no need to lock the list as only this thread can be 4978 * running. 4979 */ 4980 FOREACH_THREAD_IN_PROC(p, td) { 4981 KASSERT(td == curthread || 4982 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4983 ("running thread %p %p", p, td)); 4984 umtx_thread_cleanup(td); 4985 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4986 } 4987 } 4988 4989 /* 4990 * thread exit hook. 4991 */ 4992 void 4993 umtx_thread_exit(struct thread *td) 4994 { 4995 4996 umtx_thread_cleanup(td); 4997 } 4998 4999 static int 5000 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 5001 { 5002 u_long res1; 5003 uint32_t res32; 5004 int error; 5005 5006 if (compat32) { 5007 error = fueword32((void *)ptr, &res32); 5008 if (error == 0) 5009 res1 = res32; 5010 } else { 5011 error = fueword((void *)ptr, &res1); 5012 } 5013 if (error == 0) 5014 *res = res1; 5015 else 5016 error = EFAULT; 5017 return (error); 5018 } 5019 5020 static void 5021 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 5022 bool compat32) 5023 { 5024 struct umutex32 m32; 5025 5026 if (compat32) { 5027 memcpy(&m32, m, sizeof(m32)); 5028 *rb_list = m32.m_rb_lnk; 5029 } else { 5030 *rb_list = m->m_rb_lnk; 5031 } 5032 } 5033 5034 static int 5035 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 5036 bool compat32) 5037 { 5038 struct umutex m; 5039 int error; 5040 5041 KASSERT(td->td_proc == curproc, ("need current vmspace")); 5042 error = copyin((void *)rbp, &m, sizeof(m)); 5043 if (error != 0) 5044 return (error); 5045 if (rb_list != NULL) 5046 umtx_read_rb_list(td, &m, rb_list, compat32); 5047 if ((m.m_flags & UMUTEX_ROBUST) == 0) 5048 return (EINVAL); 5049 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 5050 /* inact is cleared after unlock, allow the inconsistency */ 5051 return (inact ? 0 : EINVAL); 5052 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 5053 } 5054 5055 static void 5056 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 5057 const char *name, bool compat32) 5058 { 5059 int error, i; 5060 uintptr_t rbp; 5061 bool inact; 5062 5063 if (rb_list == 0) 5064 return; 5065 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 5066 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 5067 if (rbp == *rb_inact) { 5068 inact = true; 5069 *rb_inact = 0; 5070 } else 5071 inact = false; 5072 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 5073 } 5074 if (i == umtx_max_rb && umtx_verbose_rb) { 5075 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 5076 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 5077 } 5078 if (error != 0 && umtx_verbose_rb) { 5079 uprintf("comm %s pid %d: handling %srb error %d\n", 5080 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 5081 } 5082 } 5083 5084 /* 5085 * Clean up umtx data. 5086 */ 5087 static void 5088 umtx_thread_cleanup(struct thread *td) 5089 { 5090 struct umtx_q *uq; 5091 struct umtx_pi *pi; 5092 uintptr_t rb_inact; 5093 bool compat32; 5094 5095 /* 5096 * Disown pi mutexes. 5097 */ 5098 uq = td->td_umtxq; 5099 if (uq != NULL) { 5100 if (uq->uq_inherited_pri != PRI_MAX || 5101 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 5102 mtx_lock(&umtx_lock); 5103 uq->uq_inherited_pri = PRI_MAX; 5104 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 5105 pi->pi_owner = NULL; 5106 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 5107 } 5108 mtx_unlock(&umtx_lock); 5109 } 5110 sched_lend_user_prio_cond(td, PRI_MAX); 5111 } 5112 5113 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 5114 td->td_pflags2 &= ~TDP2_COMPAT32RB; 5115 5116 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 5117 return; 5118 5119 /* 5120 * Handle terminated robust mutexes. Must be done after 5121 * robust pi disown, otherwise unlock could see unowned 5122 * entries. 5123 */ 5124 rb_inact = td->td_rb_inact; 5125 if (rb_inact != 0) 5126 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 5127 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 5128 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 5129 if (rb_inact != 0) 5130 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 5131 } 5132