1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #include <compat/freebsd32/freebsd32.h> 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 /* Priority inheritance mutex info. */ 92 struct umtx_pi { 93 /* Owner thread */ 94 struct thread *pi_owner; 95 96 /* Reference count */ 97 int pi_refcount; 98 99 /* List entry to link umtx holding by thread */ 100 TAILQ_ENTRY(umtx_pi) pi_link; 101 102 /* List entry in hash */ 103 TAILQ_ENTRY(umtx_pi) pi_hashlink; 104 105 /* List for waiters */ 106 TAILQ_HEAD(,umtx_q) pi_blocked; 107 108 /* Identify a userland lock object */ 109 struct umtx_key pi_key; 110 }; 111 112 /* A userland synchronous object user. */ 113 struct umtx_q { 114 /* Linked list for the hash. */ 115 TAILQ_ENTRY(umtx_q) uq_link; 116 117 /* Umtx key. */ 118 struct umtx_key uq_key; 119 120 /* Umtx flags. */ 121 int uq_flags; 122 #define UQF_UMTXQ 0x0001 123 124 /* The thread waits on. */ 125 struct thread *uq_thread; 126 127 /* 128 * Blocked on PI mutex. read can use chain lock 129 * or umtx_lock, write must have both chain lock and 130 * umtx_lock being hold. 131 */ 132 struct umtx_pi *uq_pi_blocked; 133 134 /* On blocked list */ 135 TAILQ_ENTRY(umtx_q) uq_lockq; 136 137 /* Thread contending with us */ 138 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 139 140 /* Inherited priority from PP mutex */ 141 u_char uq_inherited_pri; 142 143 /* Spare queue ready to be reused */ 144 struct umtxq_queue *uq_spare_queue; 145 146 /* The queue we on */ 147 struct umtxq_queue *uq_cur_queue; 148 }; 149 150 TAILQ_HEAD(umtxq_head, umtx_q); 151 152 /* Per-key wait-queue */ 153 struct umtxq_queue { 154 struct umtxq_head head; 155 struct umtx_key key; 156 LIST_ENTRY(umtxq_queue) link; 157 int length; 158 }; 159 160 LIST_HEAD(umtxq_list, umtxq_queue); 161 162 /* Userland lock object's wait-queue chain */ 163 struct umtxq_chain { 164 /* Lock for this chain. */ 165 struct mtx uc_lock; 166 167 /* List of sleep queues. */ 168 struct umtxq_list uc_queue[2]; 169 #define UMTX_SHARED_QUEUE 0 170 #define UMTX_EXCLUSIVE_QUEUE 1 171 172 LIST_HEAD(, umtxq_queue) uc_spare_queue; 173 174 /* Busy flag */ 175 char uc_busy; 176 177 /* Chain lock waiters */ 178 int uc_waiters; 179 180 /* All PI in the list */ 181 TAILQ_HEAD(,umtx_pi) uc_pi_list; 182 183 #ifdef UMTX_PROFILING 184 u_int length; 185 u_int max_length; 186 #endif 187 }; 188 189 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 190 191 /* 192 * Don't propagate time-sharing priority, there is a security reason, 193 * a user can simply introduce PI-mutex, let thread A lock the mutex, 194 * and let another thread B block on the mutex, because B is 195 * sleeping, its priority will be boosted, this causes A's priority to 196 * be boosted via priority propagating too and will never be lowered even 197 * if it is using 100%CPU, this is unfair to other processes. 198 */ 199 200 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 201 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 202 PRI_MAX_TIMESHARE : (td)->td_user_pri) 203 204 #define GOLDEN_RATIO_PRIME 2654404609U 205 #ifndef UMTX_CHAINS 206 #define UMTX_CHAINS 512 207 #endif 208 #define UMTX_SHIFTS (__WORD_BIT - 9) 209 210 #define GET_SHARE(flags) \ 211 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 212 213 #define BUSY_SPINS 200 214 215 struct abs_timeout { 216 int clockid; 217 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 218 struct timespec cur; 219 struct timespec end; 220 }; 221 222 struct umtx_copyops { 223 int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); 224 int (*copyin_umtx_time)(const void *uaddr, size_t size, 225 struct _umtx_time *tp); 226 int (*copyin_robust_lists)(const void *uaddr, size_t size, 227 struct umtx_robust_lists_params *rbp); 228 int (*copyout_timeout)(void *uaddr, size_t size, 229 struct timespec *tsp); 230 const size_t timespec_sz; 231 const size_t umtx_time_sz; 232 const bool compat32; 233 }; 234 235 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 236 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 237 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 238 239 int umtx_shm_vnobj_persistent = 0; 240 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 241 &umtx_shm_vnobj_persistent, 0, 242 "False forces destruction of umtx attached to file, on last close"); 243 static int umtx_max_rb = 1000; 244 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 245 &umtx_max_rb, 0, 246 "Maximum number of robust mutexes allowed for each thread"); 247 248 static uma_zone_t umtx_pi_zone; 249 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 250 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 251 static int umtx_pi_allocated; 252 253 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 254 "umtx debug"); 255 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 256 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 257 static int umtx_verbose_rb = 1; 258 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 259 &umtx_verbose_rb, 0, 260 ""); 261 262 #ifdef UMTX_PROFILING 263 static long max_length; 264 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 265 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 266 "umtx chain stats"); 267 #endif 268 269 static void abs_timeout_update(struct abs_timeout *timo); 270 271 static void umtx_shm_init(void); 272 static void umtxq_sysinit(void *); 273 static void umtxq_hash(struct umtx_key *key); 274 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 275 static void umtxq_lock(struct umtx_key *key); 276 static void umtxq_unlock(struct umtx_key *key); 277 static void umtxq_busy(struct umtx_key *key); 278 static void umtxq_unbusy(struct umtx_key *key); 279 static void umtxq_insert_queue(struct umtx_q *uq, int q); 280 static void umtxq_remove_queue(struct umtx_q *uq, int q); 281 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 282 static int umtxq_count(struct umtx_key *key); 283 static struct umtx_pi *umtx_pi_alloc(int); 284 static void umtx_pi_free(struct umtx_pi *pi); 285 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 286 bool rb); 287 static void umtx_thread_cleanup(struct thread *td); 288 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 289 290 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 291 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 292 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 293 294 static struct mtx umtx_lock; 295 296 #ifdef UMTX_PROFILING 297 static void 298 umtx_init_profiling(void) 299 { 300 struct sysctl_oid *chain_oid; 301 char chain_name[10]; 302 int i; 303 304 for (i = 0; i < UMTX_CHAINS; ++i) { 305 snprintf(chain_name, sizeof(chain_name), "%d", i); 306 chain_oid = SYSCTL_ADD_NODE(NULL, 307 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 308 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 309 "umtx hash stats"); 310 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 311 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 312 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 313 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 314 } 315 } 316 317 static int 318 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 319 { 320 char buf[512]; 321 struct sbuf sb; 322 struct umtxq_chain *uc; 323 u_int fract, i, j, tot, whole; 324 u_int sf0, sf1, sf2, sf3, sf4; 325 u_int si0, si1, si2, si3, si4; 326 u_int sw0, sw1, sw2, sw3, sw4; 327 328 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 329 for (i = 0; i < 2; i++) { 330 tot = 0; 331 for (j = 0; j < UMTX_CHAINS; ++j) { 332 uc = &umtxq_chains[i][j]; 333 mtx_lock(&uc->uc_lock); 334 tot += uc->max_length; 335 mtx_unlock(&uc->uc_lock); 336 } 337 if (tot == 0) 338 sbuf_printf(&sb, "%u) Empty ", i); 339 else { 340 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 341 si0 = si1 = si2 = si3 = si4 = 0; 342 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 343 for (j = 0; j < UMTX_CHAINS; j++) { 344 uc = &umtxq_chains[i][j]; 345 mtx_lock(&uc->uc_lock); 346 whole = uc->max_length * 100; 347 mtx_unlock(&uc->uc_lock); 348 fract = (whole % tot) * 100; 349 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 350 sf0 = fract; 351 si0 = j; 352 sw0 = whole; 353 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 354 sf1)) { 355 sf1 = fract; 356 si1 = j; 357 sw1 = whole; 358 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 359 sf2)) { 360 sf2 = fract; 361 si2 = j; 362 sw2 = whole; 363 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 364 sf3)) { 365 sf3 = fract; 366 si3 = j; 367 sw3 = whole; 368 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 369 sf4)) { 370 sf4 = fract; 371 si4 = j; 372 sw4 = whole; 373 } 374 } 375 sbuf_printf(&sb, "queue %u:\n", i); 376 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 377 sf0 / tot, si0); 378 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 379 sf1 / tot, si1); 380 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 381 sf2 / tot, si2); 382 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 383 sf3 / tot, si3); 384 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 385 sf4 / tot, si4); 386 } 387 } 388 sbuf_trim(&sb); 389 sbuf_finish(&sb); 390 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 391 sbuf_delete(&sb); 392 return (0); 393 } 394 395 static int 396 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 397 { 398 struct umtxq_chain *uc; 399 u_int i, j; 400 int clear, error; 401 402 clear = 0; 403 error = sysctl_handle_int(oidp, &clear, 0, req); 404 if (error != 0 || req->newptr == NULL) 405 return (error); 406 407 if (clear != 0) { 408 for (i = 0; i < 2; ++i) { 409 for (j = 0; j < UMTX_CHAINS; ++j) { 410 uc = &umtxq_chains[i][j]; 411 mtx_lock(&uc->uc_lock); 412 uc->length = 0; 413 uc->max_length = 0; 414 mtx_unlock(&uc->uc_lock); 415 } 416 } 417 } 418 return (0); 419 } 420 421 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 422 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 423 sysctl_debug_umtx_chains_clear, "I", 424 "Clear umtx chains statistics"); 425 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 426 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 427 sysctl_debug_umtx_chains_peaks, "A", 428 "Highest peaks in chains max length"); 429 #endif 430 431 static void 432 umtxq_sysinit(void *arg __unused) 433 { 434 int i, j; 435 436 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 437 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 438 for (i = 0; i < 2; ++i) { 439 for (j = 0; j < UMTX_CHAINS; ++j) { 440 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 441 MTX_DEF | MTX_DUPOK); 442 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 443 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 444 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 445 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 446 umtxq_chains[i][j].uc_busy = 0; 447 umtxq_chains[i][j].uc_waiters = 0; 448 #ifdef UMTX_PROFILING 449 umtxq_chains[i][j].length = 0; 450 umtxq_chains[i][j].max_length = 0; 451 #endif 452 } 453 } 454 #ifdef UMTX_PROFILING 455 umtx_init_profiling(); 456 #endif 457 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 458 umtx_shm_init(); 459 } 460 461 struct umtx_q * 462 umtxq_alloc(void) 463 { 464 struct umtx_q *uq; 465 466 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 467 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 468 M_WAITOK | M_ZERO); 469 TAILQ_INIT(&uq->uq_spare_queue->head); 470 TAILQ_INIT(&uq->uq_pi_contested); 471 uq->uq_inherited_pri = PRI_MAX; 472 return (uq); 473 } 474 475 void 476 umtxq_free(struct umtx_q *uq) 477 { 478 479 MPASS(uq->uq_spare_queue != NULL); 480 free(uq->uq_spare_queue, M_UMTX); 481 free(uq, M_UMTX); 482 } 483 484 static inline void 485 umtxq_hash(struct umtx_key *key) 486 { 487 unsigned n; 488 489 n = (uintptr_t)key->info.both.a + key->info.both.b; 490 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 491 } 492 493 static inline struct umtxq_chain * 494 umtxq_getchain(struct umtx_key *key) 495 { 496 497 if (key->type <= TYPE_SEM) 498 return (&umtxq_chains[1][key->hash]); 499 return (&umtxq_chains[0][key->hash]); 500 } 501 502 /* 503 * Lock a chain. 504 */ 505 static inline void 506 umtxq_lock(struct umtx_key *key) 507 { 508 struct umtxq_chain *uc; 509 510 uc = umtxq_getchain(key); 511 mtx_lock(&uc->uc_lock); 512 } 513 514 /* 515 * Unlock a chain. 516 */ 517 static inline void 518 umtxq_unlock(struct umtx_key *key) 519 { 520 struct umtxq_chain *uc; 521 522 uc = umtxq_getchain(key); 523 mtx_unlock(&uc->uc_lock); 524 } 525 526 /* 527 * Set chain to busy state when following operation 528 * may be blocked (kernel mutex can not be used). 529 */ 530 static inline void 531 umtxq_busy(struct umtx_key *key) 532 { 533 struct umtxq_chain *uc; 534 535 uc = umtxq_getchain(key); 536 mtx_assert(&uc->uc_lock, MA_OWNED); 537 if (uc->uc_busy) { 538 #ifdef SMP 539 if (smp_cpus > 1) { 540 int count = BUSY_SPINS; 541 if (count > 0) { 542 umtxq_unlock(key); 543 while (uc->uc_busy && --count > 0) 544 cpu_spinwait(); 545 umtxq_lock(key); 546 } 547 } 548 #endif 549 while (uc->uc_busy) { 550 uc->uc_waiters++; 551 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 552 uc->uc_waiters--; 553 } 554 } 555 uc->uc_busy = 1; 556 } 557 558 /* 559 * Unbusy a chain. 560 */ 561 static inline void 562 umtxq_unbusy(struct umtx_key *key) 563 { 564 struct umtxq_chain *uc; 565 566 uc = umtxq_getchain(key); 567 mtx_assert(&uc->uc_lock, MA_OWNED); 568 KASSERT(uc->uc_busy != 0, ("not busy")); 569 uc->uc_busy = 0; 570 if (uc->uc_waiters) 571 wakeup_one(uc); 572 } 573 574 static inline void 575 umtxq_unbusy_unlocked(struct umtx_key *key) 576 { 577 578 umtxq_lock(key); 579 umtxq_unbusy(key); 580 umtxq_unlock(key); 581 } 582 583 static struct umtxq_queue * 584 umtxq_queue_lookup(struct umtx_key *key, int q) 585 { 586 struct umtxq_queue *uh; 587 struct umtxq_chain *uc; 588 589 uc = umtxq_getchain(key); 590 UMTXQ_LOCKED_ASSERT(uc); 591 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 592 if (umtx_key_match(&uh->key, key)) 593 return (uh); 594 } 595 596 return (NULL); 597 } 598 599 static inline void 600 umtxq_insert_queue(struct umtx_q *uq, int q) 601 { 602 struct umtxq_queue *uh; 603 struct umtxq_chain *uc; 604 605 uc = umtxq_getchain(&uq->uq_key); 606 UMTXQ_LOCKED_ASSERT(uc); 607 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 608 uh = umtxq_queue_lookup(&uq->uq_key, q); 609 if (uh != NULL) { 610 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 611 } else { 612 uh = uq->uq_spare_queue; 613 uh->key = uq->uq_key; 614 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 615 #ifdef UMTX_PROFILING 616 uc->length++; 617 if (uc->length > uc->max_length) { 618 uc->max_length = uc->length; 619 if (uc->max_length > max_length) 620 max_length = uc->max_length; 621 } 622 #endif 623 } 624 uq->uq_spare_queue = NULL; 625 626 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 627 uh->length++; 628 uq->uq_flags |= UQF_UMTXQ; 629 uq->uq_cur_queue = uh; 630 return; 631 } 632 633 static inline void 634 umtxq_remove_queue(struct umtx_q *uq, int q) 635 { 636 struct umtxq_chain *uc; 637 struct umtxq_queue *uh; 638 639 uc = umtxq_getchain(&uq->uq_key); 640 UMTXQ_LOCKED_ASSERT(uc); 641 if (uq->uq_flags & UQF_UMTXQ) { 642 uh = uq->uq_cur_queue; 643 TAILQ_REMOVE(&uh->head, uq, uq_link); 644 uh->length--; 645 uq->uq_flags &= ~UQF_UMTXQ; 646 if (TAILQ_EMPTY(&uh->head)) { 647 KASSERT(uh->length == 0, 648 ("inconsistent umtxq_queue length")); 649 #ifdef UMTX_PROFILING 650 uc->length--; 651 #endif 652 LIST_REMOVE(uh, link); 653 } else { 654 uh = LIST_FIRST(&uc->uc_spare_queue); 655 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 656 LIST_REMOVE(uh, link); 657 } 658 uq->uq_spare_queue = uh; 659 uq->uq_cur_queue = NULL; 660 } 661 } 662 663 /* 664 * Check if there are multiple waiters 665 */ 666 static int 667 umtxq_count(struct umtx_key *key) 668 { 669 struct umtxq_queue *uh; 670 671 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 672 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 673 if (uh != NULL) 674 return (uh->length); 675 return (0); 676 } 677 678 /* 679 * Check if there are multiple PI waiters and returns first 680 * waiter. 681 */ 682 static int 683 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 684 { 685 struct umtxq_queue *uh; 686 687 *first = NULL; 688 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 689 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 690 if (uh != NULL) { 691 *first = TAILQ_FIRST(&uh->head); 692 return (uh->length); 693 } 694 return (0); 695 } 696 697 /* 698 * Wake up threads waiting on an userland object. 699 */ 700 701 static int 702 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 703 { 704 struct umtxq_queue *uh; 705 struct umtx_q *uq; 706 int ret; 707 708 ret = 0; 709 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 710 uh = umtxq_queue_lookup(key, q); 711 if (uh != NULL) { 712 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 713 umtxq_remove_queue(uq, q); 714 wakeup(uq); 715 if (++ret >= n_wake) 716 return (ret); 717 } 718 } 719 return (ret); 720 } 721 722 /* 723 * Wake up specified thread. 724 */ 725 static inline void 726 umtxq_signal_thread(struct umtx_q *uq) 727 { 728 729 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 730 umtxq_remove(uq); 731 wakeup(uq); 732 } 733 734 static inline int 735 tstohz(const struct timespec *tsp) 736 { 737 struct timeval tv; 738 739 TIMESPEC_TO_TIMEVAL(&tv, tsp); 740 return tvtohz(&tv); 741 } 742 743 static void 744 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 745 const struct timespec *timeout) 746 { 747 748 timo->clockid = clockid; 749 if (!absolute) { 750 timo->is_abs_real = false; 751 abs_timeout_update(timo); 752 timespecadd(&timo->cur, timeout, &timo->end); 753 } else { 754 timo->end = *timeout; 755 timo->is_abs_real = clockid == CLOCK_REALTIME || 756 clockid == CLOCK_REALTIME_FAST || 757 clockid == CLOCK_REALTIME_PRECISE; 758 /* 759 * If is_abs_real, umtxq_sleep will read the clock 760 * after setting td_rtcgen; otherwise, read it here. 761 */ 762 if (!timo->is_abs_real) { 763 abs_timeout_update(timo); 764 } 765 } 766 } 767 768 static void 769 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 770 { 771 772 abs_timeout_init(timo, umtxtime->_clockid, 773 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 774 } 775 776 static inline void 777 abs_timeout_update(struct abs_timeout *timo) 778 { 779 780 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 781 } 782 783 static int 784 abs_timeout_gethz(struct abs_timeout *timo) 785 { 786 struct timespec tts; 787 788 if (timespeccmp(&timo->end, &timo->cur, <=)) 789 return (-1); 790 timespecsub(&timo->end, &timo->cur, &tts); 791 return (tstohz(&tts)); 792 } 793 794 static uint32_t 795 umtx_unlock_val(uint32_t flags, bool rb) 796 { 797 798 if (rb) 799 return (UMUTEX_RB_OWNERDEAD); 800 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 801 return (UMUTEX_RB_NOTRECOV); 802 else 803 return (UMUTEX_UNOWNED); 804 805 } 806 807 /* 808 * Put thread into sleep state, before sleeping, check if 809 * thread was removed from umtx queue. 810 */ 811 static inline int 812 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 813 { 814 struct umtxq_chain *uc; 815 int error, timo; 816 817 if (abstime != NULL && abstime->is_abs_real) { 818 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 819 abs_timeout_update(abstime); 820 } 821 822 uc = umtxq_getchain(&uq->uq_key); 823 UMTXQ_LOCKED_ASSERT(uc); 824 for (;;) { 825 if (!(uq->uq_flags & UQF_UMTXQ)) { 826 error = 0; 827 break; 828 } 829 if (abstime != NULL) { 830 timo = abs_timeout_gethz(abstime); 831 if (timo < 0) { 832 error = ETIMEDOUT; 833 break; 834 } 835 } else 836 timo = 0; 837 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 838 if (error == EINTR || error == ERESTART) { 839 umtxq_lock(&uq->uq_key); 840 break; 841 } 842 if (abstime != NULL) { 843 if (abstime->is_abs_real) 844 curthread->td_rtcgen = 845 atomic_load_acq_int(&rtc_generation); 846 abs_timeout_update(abstime); 847 } 848 umtxq_lock(&uq->uq_key); 849 } 850 851 curthread->td_rtcgen = 0; 852 return (error); 853 } 854 855 /* 856 * Convert userspace address into unique logical address. 857 */ 858 int 859 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 860 { 861 struct thread *td = curthread; 862 vm_map_t map; 863 vm_map_entry_t entry; 864 vm_pindex_t pindex; 865 vm_prot_t prot; 866 boolean_t wired; 867 868 key->type = type; 869 if (share == THREAD_SHARE) { 870 key->shared = 0; 871 key->info.private.vs = td->td_proc->p_vmspace; 872 key->info.private.addr = (uintptr_t)addr; 873 } else { 874 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 875 map = &td->td_proc->p_vmspace->vm_map; 876 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 877 &entry, &key->info.shared.object, &pindex, &prot, 878 &wired) != KERN_SUCCESS) { 879 return (EFAULT); 880 } 881 882 if ((share == PROCESS_SHARE) || 883 (share == AUTO_SHARE && 884 VM_INHERIT_SHARE == entry->inheritance)) { 885 key->shared = 1; 886 key->info.shared.offset = (vm_offset_t)addr - 887 entry->start + entry->offset; 888 vm_object_reference(key->info.shared.object); 889 } else { 890 key->shared = 0; 891 key->info.private.vs = td->td_proc->p_vmspace; 892 key->info.private.addr = (uintptr_t)addr; 893 } 894 vm_map_lookup_done(map, entry); 895 } 896 897 umtxq_hash(key); 898 return (0); 899 } 900 901 /* 902 * Release key. 903 */ 904 void 905 umtx_key_release(struct umtx_key *key) 906 { 907 if (key->shared) 908 vm_object_deallocate(key->info.shared.object); 909 } 910 911 /* 912 * Fetch and compare value, sleep on the address if value is not changed. 913 */ 914 static int 915 do_wait(struct thread *td, void *addr, u_long id, 916 struct _umtx_time *timeout, int compat32, int is_private) 917 { 918 struct abs_timeout timo; 919 struct umtx_q *uq; 920 u_long tmp; 921 uint32_t tmp32; 922 int error = 0; 923 924 uq = td->td_umtxq; 925 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 926 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 927 return (error); 928 929 if (timeout != NULL) 930 abs_timeout_init2(&timo, timeout); 931 932 umtxq_lock(&uq->uq_key); 933 umtxq_insert(uq); 934 umtxq_unlock(&uq->uq_key); 935 if (compat32 == 0) { 936 error = fueword(addr, &tmp); 937 if (error != 0) 938 error = EFAULT; 939 } else { 940 error = fueword32(addr, &tmp32); 941 if (error == 0) 942 tmp = tmp32; 943 else 944 error = EFAULT; 945 } 946 umtxq_lock(&uq->uq_key); 947 if (error == 0) { 948 if (tmp == id) 949 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 950 NULL : &timo); 951 if ((uq->uq_flags & UQF_UMTXQ) == 0) 952 error = 0; 953 else 954 umtxq_remove(uq); 955 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 956 umtxq_remove(uq); 957 } 958 umtxq_unlock(&uq->uq_key); 959 umtx_key_release(&uq->uq_key); 960 if (error == ERESTART) 961 error = EINTR; 962 return (error); 963 } 964 965 /* 966 * Wake up threads sleeping on the specified address. 967 */ 968 int 969 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 970 { 971 struct umtx_key key; 972 int ret; 973 974 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 975 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 976 return (ret); 977 umtxq_lock(&key); 978 umtxq_signal(&key, n_wake); 979 umtxq_unlock(&key); 980 umtx_key_release(&key); 981 return (0); 982 } 983 984 /* 985 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 986 */ 987 static int 988 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 989 struct _umtx_time *timeout, int mode) 990 { 991 struct abs_timeout timo; 992 struct umtx_q *uq; 993 uint32_t owner, old, id; 994 int error, rv; 995 996 id = td->td_tid; 997 uq = td->td_umtxq; 998 error = 0; 999 if (timeout != NULL) 1000 abs_timeout_init2(&timo, timeout); 1001 1002 /* 1003 * Care must be exercised when dealing with umtx structure. It 1004 * can fault on any access. 1005 */ 1006 for (;;) { 1007 rv = fueword32(&m->m_owner, &owner); 1008 if (rv == -1) 1009 return (EFAULT); 1010 if (mode == _UMUTEX_WAIT) { 1011 if (owner == UMUTEX_UNOWNED || 1012 owner == UMUTEX_CONTESTED || 1013 owner == UMUTEX_RB_OWNERDEAD || 1014 owner == UMUTEX_RB_NOTRECOV) 1015 return (0); 1016 } else { 1017 /* 1018 * Robust mutex terminated. Kernel duty is to 1019 * return EOWNERDEAD to the userspace. The 1020 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1021 * by the common userspace code. 1022 */ 1023 if (owner == UMUTEX_RB_OWNERDEAD) { 1024 rv = casueword32(&m->m_owner, 1025 UMUTEX_RB_OWNERDEAD, &owner, 1026 id | UMUTEX_CONTESTED); 1027 if (rv == -1) 1028 return (EFAULT); 1029 if (rv == 0) { 1030 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1031 return (EOWNERDEAD); /* success */ 1032 } 1033 MPASS(rv == 1); 1034 rv = thread_check_susp(td, false); 1035 if (rv != 0) 1036 return (rv); 1037 continue; 1038 } 1039 if (owner == UMUTEX_RB_NOTRECOV) 1040 return (ENOTRECOVERABLE); 1041 1042 /* 1043 * Try the uncontested case. This should be 1044 * done in userland. 1045 */ 1046 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1047 &owner, id); 1048 /* The address was invalid. */ 1049 if (rv == -1) 1050 return (EFAULT); 1051 1052 /* The acquire succeeded. */ 1053 if (rv == 0) { 1054 MPASS(owner == UMUTEX_UNOWNED); 1055 return (0); 1056 } 1057 1058 /* 1059 * If no one owns it but it is contested try 1060 * to acquire it. 1061 */ 1062 MPASS(rv == 1); 1063 if (owner == UMUTEX_CONTESTED) { 1064 rv = casueword32(&m->m_owner, 1065 UMUTEX_CONTESTED, &owner, 1066 id | UMUTEX_CONTESTED); 1067 /* The address was invalid. */ 1068 if (rv == -1) 1069 return (EFAULT); 1070 if (rv == 0) { 1071 MPASS(owner == UMUTEX_CONTESTED); 1072 return (0); 1073 } 1074 if (rv == 1) { 1075 rv = thread_check_susp(td, false); 1076 if (rv != 0) 1077 return (rv); 1078 } 1079 1080 /* 1081 * If this failed the lock has 1082 * changed, restart. 1083 */ 1084 continue; 1085 } 1086 1087 /* rv == 1 but not contested, likely store failure */ 1088 rv = thread_check_susp(td, false); 1089 if (rv != 0) 1090 return (rv); 1091 } 1092 1093 if (mode == _UMUTEX_TRY) 1094 return (EBUSY); 1095 1096 /* 1097 * If we caught a signal, we have retried and now 1098 * exit immediately. 1099 */ 1100 if (error != 0) 1101 return (error); 1102 1103 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1104 GET_SHARE(flags), &uq->uq_key)) != 0) 1105 return (error); 1106 1107 umtxq_lock(&uq->uq_key); 1108 umtxq_busy(&uq->uq_key); 1109 umtxq_insert(uq); 1110 umtxq_unlock(&uq->uq_key); 1111 1112 /* 1113 * Set the contested bit so that a release in user space 1114 * knows to use the system call for unlock. If this fails 1115 * either some one else has acquired the lock or it has been 1116 * released. 1117 */ 1118 rv = casueword32(&m->m_owner, owner, &old, 1119 owner | UMUTEX_CONTESTED); 1120 1121 /* The address was invalid or casueword failed to store. */ 1122 if (rv == -1 || rv == 1) { 1123 umtxq_lock(&uq->uq_key); 1124 umtxq_remove(uq); 1125 umtxq_unbusy(&uq->uq_key); 1126 umtxq_unlock(&uq->uq_key); 1127 umtx_key_release(&uq->uq_key); 1128 if (rv == -1) 1129 return (EFAULT); 1130 if (rv == 1) { 1131 rv = thread_check_susp(td, false); 1132 if (rv != 0) 1133 return (rv); 1134 } 1135 continue; 1136 } 1137 1138 /* 1139 * We set the contested bit, sleep. Otherwise the lock changed 1140 * and we need to retry or we lost a race to the thread 1141 * unlocking the umtx. 1142 */ 1143 umtxq_lock(&uq->uq_key); 1144 umtxq_unbusy(&uq->uq_key); 1145 MPASS(old == owner); 1146 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1147 NULL : &timo); 1148 umtxq_remove(uq); 1149 umtxq_unlock(&uq->uq_key); 1150 umtx_key_release(&uq->uq_key); 1151 1152 if (error == 0) 1153 error = thread_check_susp(td, false); 1154 } 1155 1156 return (0); 1157 } 1158 1159 /* 1160 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1161 */ 1162 static int 1163 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1164 { 1165 struct umtx_key key; 1166 uint32_t owner, old, id, newlock; 1167 int error, count; 1168 1169 id = td->td_tid; 1170 1171 again: 1172 /* 1173 * Make sure we own this mtx. 1174 */ 1175 error = fueword32(&m->m_owner, &owner); 1176 if (error == -1) 1177 return (EFAULT); 1178 1179 if ((owner & ~UMUTEX_CONTESTED) != id) 1180 return (EPERM); 1181 1182 newlock = umtx_unlock_val(flags, rb); 1183 if ((owner & UMUTEX_CONTESTED) == 0) { 1184 error = casueword32(&m->m_owner, owner, &old, newlock); 1185 if (error == -1) 1186 return (EFAULT); 1187 if (error == 1) { 1188 error = thread_check_susp(td, false); 1189 if (error != 0) 1190 return (error); 1191 goto again; 1192 } 1193 MPASS(old == owner); 1194 return (0); 1195 } 1196 1197 /* We should only ever be in here for contested locks */ 1198 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1199 &key)) != 0) 1200 return (error); 1201 1202 umtxq_lock(&key); 1203 umtxq_busy(&key); 1204 count = umtxq_count(&key); 1205 umtxq_unlock(&key); 1206 1207 /* 1208 * When unlocking the umtx, it must be marked as unowned if 1209 * there is zero or one thread only waiting for it. 1210 * Otherwise, it must be marked as contested. 1211 */ 1212 if (count > 1) 1213 newlock |= UMUTEX_CONTESTED; 1214 error = casueword32(&m->m_owner, owner, &old, newlock); 1215 umtxq_lock(&key); 1216 umtxq_signal(&key, 1); 1217 umtxq_unbusy(&key); 1218 umtxq_unlock(&key); 1219 umtx_key_release(&key); 1220 if (error == -1) 1221 return (EFAULT); 1222 if (error == 1) { 1223 if (old != owner) 1224 return (EINVAL); 1225 error = thread_check_susp(td, false); 1226 if (error != 0) 1227 return (error); 1228 goto again; 1229 } 1230 return (0); 1231 } 1232 1233 /* 1234 * Check if the mutex is available and wake up a waiter, 1235 * only for simple mutex. 1236 */ 1237 static int 1238 do_wake_umutex(struct thread *td, struct umutex *m) 1239 { 1240 struct umtx_key key; 1241 uint32_t owner; 1242 uint32_t flags; 1243 int error; 1244 int count; 1245 1246 again: 1247 error = fueword32(&m->m_owner, &owner); 1248 if (error == -1) 1249 return (EFAULT); 1250 1251 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1252 owner != UMUTEX_RB_NOTRECOV) 1253 return (0); 1254 1255 error = fueword32(&m->m_flags, &flags); 1256 if (error == -1) 1257 return (EFAULT); 1258 1259 /* We should only ever be in here for contested locks */ 1260 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1261 &key)) != 0) 1262 return (error); 1263 1264 umtxq_lock(&key); 1265 umtxq_busy(&key); 1266 count = umtxq_count(&key); 1267 umtxq_unlock(&key); 1268 1269 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1270 owner != UMUTEX_RB_NOTRECOV) { 1271 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1272 UMUTEX_UNOWNED); 1273 if (error == -1) { 1274 error = EFAULT; 1275 } else if (error == 1) { 1276 umtxq_lock(&key); 1277 umtxq_unbusy(&key); 1278 umtxq_unlock(&key); 1279 umtx_key_release(&key); 1280 error = thread_check_susp(td, false); 1281 if (error != 0) 1282 return (error); 1283 goto again; 1284 } 1285 } 1286 1287 umtxq_lock(&key); 1288 if (error == 0 && count != 0) { 1289 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1290 owner == UMUTEX_RB_OWNERDEAD || 1291 owner == UMUTEX_RB_NOTRECOV); 1292 umtxq_signal(&key, 1); 1293 } 1294 umtxq_unbusy(&key); 1295 umtxq_unlock(&key); 1296 umtx_key_release(&key); 1297 return (error); 1298 } 1299 1300 /* 1301 * Check if the mutex has waiters and tries to fix contention bit. 1302 */ 1303 static int 1304 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1305 { 1306 struct umtx_key key; 1307 uint32_t owner, old; 1308 int type; 1309 int error; 1310 int count; 1311 1312 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1313 UMUTEX_ROBUST)) { 1314 case 0: 1315 case UMUTEX_ROBUST: 1316 type = TYPE_NORMAL_UMUTEX; 1317 break; 1318 case UMUTEX_PRIO_INHERIT: 1319 type = TYPE_PI_UMUTEX; 1320 break; 1321 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1322 type = TYPE_PI_ROBUST_UMUTEX; 1323 break; 1324 case UMUTEX_PRIO_PROTECT: 1325 type = TYPE_PP_UMUTEX; 1326 break; 1327 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1328 type = TYPE_PP_ROBUST_UMUTEX; 1329 break; 1330 default: 1331 return (EINVAL); 1332 } 1333 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1334 return (error); 1335 1336 owner = 0; 1337 umtxq_lock(&key); 1338 umtxq_busy(&key); 1339 count = umtxq_count(&key); 1340 umtxq_unlock(&key); 1341 1342 error = fueword32(&m->m_owner, &owner); 1343 if (error == -1) 1344 error = EFAULT; 1345 1346 /* 1347 * Only repair contention bit if there is a waiter, this means 1348 * the mutex is still being referenced by userland code, 1349 * otherwise don't update any memory. 1350 */ 1351 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1352 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1353 error = casueword32(&m->m_owner, owner, &old, 1354 owner | UMUTEX_CONTESTED); 1355 if (error == -1) { 1356 error = EFAULT; 1357 break; 1358 } 1359 if (error == 0) { 1360 MPASS(old == owner); 1361 break; 1362 } 1363 owner = old; 1364 error = thread_check_susp(td, false); 1365 } 1366 1367 umtxq_lock(&key); 1368 if (error == EFAULT) { 1369 umtxq_signal(&key, INT_MAX); 1370 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1371 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1372 umtxq_signal(&key, 1); 1373 umtxq_unbusy(&key); 1374 umtxq_unlock(&key); 1375 umtx_key_release(&key); 1376 return (error); 1377 } 1378 1379 static inline struct umtx_pi * 1380 umtx_pi_alloc(int flags) 1381 { 1382 struct umtx_pi *pi; 1383 1384 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1385 TAILQ_INIT(&pi->pi_blocked); 1386 atomic_add_int(&umtx_pi_allocated, 1); 1387 return (pi); 1388 } 1389 1390 static inline void 1391 umtx_pi_free(struct umtx_pi *pi) 1392 { 1393 uma_zfree(umtx_pi_zone, pi); 1394 atomic_add_int(&umtx_pi_allocated, -1); 1395 } 1396 1397 /* 1398 * Adjust the thread's position on a pi_state after its priority has been 1399 * changed. 1400 */ 1401 static int 1402 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1403 { 1404 struct umtx_q *uq, *uq1, *uq2; 1405 struct thread *td1; 1406 1407 mtx_assert(&umtx_lock, MA_OWNED); 1408 if (pi == NULL) 1409 return (0); 1410 1411 uq = td->td_umtxq; 1412 1413 /* 1414 * Check if the thread needs to be moved on the blocked chain. 1415 * It needs to be moved if either its priority is lower than 1416 * the previous thread or higher than the next thread. 1417 */ 1418 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1419 uq2 = TAILQ_NEXT(uq, uq_lockq); 1420 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1421 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1422 /* 1423 * Remove thread from blocked chain and determine where 1424 * it should be moved to. 1425 */ 1426 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1427 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1428 td1 = uq1->uq_thread; 1429 MPASS(td1->td_proc->p_magic == P_MAGIC); 1430 if (UPRI(td1) > UPRI(td)) 1431 break; 1432 } 1433 1434 if (uq1 == NULL) 1435 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1436 else 1437 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1438 } 1439 return (1); 1440 } 1441 1442 static struct umtx_pi * 1443 umtx_pi_next(struct umtx_pi *pi) 1444 { 1445 struct umtx_q *uq_owner; 1446 1447 if (pi->pi_owner == NULL) 1448 return (NULL); 1449 uq_owner = pi->pi_owner->td_umtxq; 1450 if (uq_owner == NULL) 1451 return (NULL); 1452 return (uq_owner->uq_pi_blocked); 1453 } 1454 1455 /* 1456 * Floyd's Cycle-Finding Algorithm. 1457 */ 1458 static bool 1459 umtx_pi_check_loop(struct umtx_pi *pi) 1460 { 1461 struct umtx_pi *pi1; /* fast iterator */ 1462 1463 mtx_assert(&umtx_lock, MA_OWNED); 1464 if (pi == NULL) 1465 return (false); 1466 pi1 = pi; 1467 for (;;) { 1468 pi = umtx_pi_next(pi); 1469 if (pi == NULL) 1470 break; 1471 pi1 = umtx_pi_next(pi1); 1472 if (pi1 == NULL) 1473 break; 1474 pi1 = umtx_pi_next(pi1); 1475 if (pi1 == NULL) 1476 break; 1477 if (pi == pi1) 1478 return (true); 1479 } 1480 return (false); 1481 } 1482 1483 /* 1484 * Propagate priority when a thread is blocked on POSIX 1485 * PI mutex. 1486 */ 1487 static void 1488 umtx_propagate_priority(struct thread *td) 1489 { 1490 struct umtx_q *uq; 1491 struct umtx_pi *pi; 1492 int pri; 1493 1494 mtx_assert(&umtx_lock, MA_OWNED); 1495 pri = UPRI(td); 1496 uq = td->td_umtxq; 1497 pi = uq->uq_pi_blocked; 1498 if (pi == NULL) 1499 return; 1500 if (umtx_pi_check_loop(pi)) 1501 return; 1502 1503 for (;;) { 1504 td = pi->pi_owner; 1505 if (td == NULL || td == curthread) 1506 return; 1507 1508 MPASS(td->td_proc != NULL); 1509 MPASS(td->td_proc->p_magic == P_MAGIC); 1510 1511 thread_lock(td); 1512 if (td->td_lend_user_pri > pri) 1513 sched_lend_user_prio(td, pri); 1514 else { 1515 thread_unlock(td); 1516 break; 1517 } 1518 thread_unlock(td); 1519 1520 /* 1521 * Pick up the lock that td is blocked on. 1522 */ 1523 uq = td->td_umtxq; 1524 pi = uq->uq_pi_blocked; 1525 if (pi == NULL) 1526 break; 1527 /* Resort td on the list if needed. */ 1528 umtx_pi_adjust_thread(pi, td); 1529 } 1530 } 1531 1532 /* 1533 * Unpropagate priority for a PI mutex when a thread blocked on 1534 * it is interrupted by signal or resumed by others. 1535 */ 1536 static void 1537 umtx_repropagate_priority(struct umtx_pi *pi) 1538 { 1539 struct umtx_q *uq, *uq_owner; 1540 struct umtx_pi *pi2; 1541 int pri; 1542 1543 mtx_assert(&umtx_lock, MA_OWNED); 1544 1545 if (umtx_pi_check_loop(pi)) 1546 return; 1547 while (pi != NULL && pi->pi_owner != NULL) { 1548 pri = PRI_MAX; 1549 uq_owner = pi->pi_owner->td_umtxq; 1550 1551 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1552 uq = TAILQ_FIRST(&pi2->pi_blocked); 1553 if (uq != NULL) { 1554 if (pri > UPRI(uq->uq_thread)) 1555 pri = UPRI(uq->uq_thread); 1556 } 1557 } 1558 1559 if (pri > uq_owner->uq_inherited_pri) 1560 pri = uq_owner->uq_inherited_pri; 1561 thread_lock(pi->pi_owner); 1562 sched_lend_user_prio(pi->pi_owner, pri); 1563 thread_unlock(pi->pi_owner); 1564 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1565 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1566 } 1567 } 1568 1569 /* 1570 * Insert a PI mutex into owned list. 1571 */ 1572 static void 1573 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1574 { 1575 struct umtx_q *uq_owner; 1576 1577 uq_owner = owner->td_umtxq; 1578 mtx_assert(&umtx_lock, MA_OWNED); 1579 MPASS(pi->pi_owner == NULL); 1580 pi->pi_owner = owner; 1581 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1582 } 1583 1584 /* 1585 * Disown a PI mutex, and remove it from the owned list. 1586 */ 1587 static void 1588 umtx_pi_disown(struct umtx_pi *pi) 1589 { 1590 1591 mtx_assert(&umtx_lock, MA_OWNED); 1592 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1593 pi->pi_owner = NULL; 1594 } 1595 1596 /* 1597 * Claim ownership of a PI mutex. 1598 */ 1599 static int 1600 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1601 { 1602 struct umtx_q *uq; 1603 int pri; 1604 1605 mtx_lock(&umtx_lock); 1606 if (pi->pi_owner == owner) { 1607 mtx_unlock(&umtx_lock); 1608 return (0); 1609 } 1610 1611 if (pi->pi_owner != NULL) { 1612 /* 1613 * userland may have already messed the mutex, sigh. 1614 */ 1615 mtx_unlock(&umtx_lock); 1616 return (EPERM); 1617 } 1618 umtx_pi_setowner(pi, owner); 1619 uq = TAILQ_FIRST(&pi->pi_blocked); 1620 if (uq != NULL) { 1621 pri = UPRI(uq->uq_thread); 1622 thread_lock(owner); 1623 if (pri < UPRI(owner)) 1624 sched_lend_user_prio(owner, pri); 1625 thread_unlock(owner); 1626 } 1627 mtx_unlock(&umtx_lock); 1628 return (0); 1629 } 1630 1631 /* 1632 * Adjust a thread's order position in its blocked PI mutex, 1633 * this may result new priority propagating process. 1634 */ 1635 void 1636 umtx_pi_adjust(struct thread *td, u_char oldpri) 1637 { 1638 struct umtx_q *uq; 1639 struct umtx_pi *pi; 1640 1641 uq = td->td_umtxq; 1642 mtx_lock(&umtx_lock); 1643 /* 1644 * Pick up the lock that td is blocked on. 1645 */ 1646 pi = uq->uq_pi_blocked; 1647 if (pi != NULL) { 1648 umtx_pi_adjust_thread(pi, td); 1649 umtx_repropagate_priority(pi); 1650 } 1651 mtx_unlock(&umtx_lock); 1652 } 1653 1654 /* 1655 * Sleep on a PI mutex. 1656 */ 1657 static int 1658 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1659 const char *wmesg, struct abs_timeout *timo, bool shared) 1660 { 1661 struct thread *td, *td1; 1662 struct umtx_q *uq1; 1663 int error, pri; 1664 #ifdef INVARIANTS 1665 struct umtxq_chain *uc; 1666 1667 uc = umtxq_getchain(&pi->pi_key); 1668 #endif 1669 error = 0; 1670 td = uq->uq_thread; 1671 KASSERT(td == curthread, ("inconsistent uq_thread")); 1672 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1673 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1674 umtxq_insert(uq); 1675 mtx_lock(&umtx_lock); 1676 if (pi->pi_owner == NULL) { 1677 mtx_unlock(&umtx_lock); 1678 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1679 mtx_lock(&umtx_lock); 1680 if (td1 != NULL) { 1681 if (pi->pi_owner == NULL) 1682 umtx_pi_setowner(pi, td1); 1683 PROC_UNLOCK(td1->td_proc); 1684 } 1685 } 1686 1687 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1688 pri = UPRI(uq1->uq_thread); 1689 if (pri > UPRI(td)) 1690 break; 1691 } 1692 1693 if (uq1 != NULL) 1694 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1695 else 1696 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1697 1698 uq->uq_pi_blocked = pi; 1699 thread_lock(td); 1700 td->td_flags |= TDF_UPIBLOCKED; 1701 thread_unlock(td); 1702 umtx_propagate_priority(td); 1703 mtx_unlock(&umtx_lock); 1704 umtxq_unbusy(&uq->uq_key); 1705 1706 error = umtxq_sleep(uq, wmesg, timo); 1707 umtxq_remove(uq); 1708 1709 mtx_lock(&umtx_lock); 1710 uq->uq_pi_blocked = NULL; 1711 thread_lock(td); 1712 td->td_flags &= ~TDF_UPIBLOCKED; 1713 thread_unlock(td); 1714 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1715 umtx_repropagate_priority(pi); 1716 mtx_unlock(&umtx_lock); 1717 umtxq_unlock(&uq->uq_key); 1718 1719 return (error); 1720 } 1721 1722 /* 1723 * Add reference count for a PI mutex. 1724 */ 1725 static void 1726 umtx_pi_ref(struct umtx_pi *pi) 1727 { 1728 1729 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1730 pi->pi_refcount++; 1731 } 1732 1733 /* 1734 * Decrease reference count for a PI mutex, if the counter 1735 * is decreased to zero, its memory space is freed. 1736 */ 1737 static void 1738 umtx_pi_unref(struct umtx_pi *pi) 1739 { 1740 struct umtxq_chain *uc; 1741 1742 uc = umtxq_getchain(&pi->pi_key); 1743 UMTXQ_LOCKED_ASSERT(uc); 1744 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1745 if (--pi->pi_refcount == 0) { 1746 mtx_lock(&umtx_lock); 1747 if (pi->pi_owner != NULL) 1748 umtx_pi_disown(pi); 1749 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1750 ("blocked queue not empty")); 1751 mtx_unlock(&umtx_lock); 1752 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1753 umtx_pi_free(pi); 1754 } 1755 } 1756 1757 /* 1758 * Find a PI mutex in hash table. 1759 */ 1760 static struct umtx_pi * 1761 umtx_pi_lookup(struct umtx_key *key) 1762 { 1763 struct umtxq_chain *uc; 1764 struct umtx_pi *pi; 1765 1766 uc = umtxq_getchain(key); 1767 UMTXQ_LOCKED_ASSERT(uc); 1768 1769 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1770 if (umtx_key_match(&pi->pi_key, key)) { 1771 return (pi); 1772 } 1773 } 1774 return (NULL); 1775 } 1776 1777 /* 1778 * Insert a PI mutex into hash table. 1779 */ 1780 static inline void 1781 umtx_pi_insert(struct umtx_pi *pi) 1782 { 1783 struct umtxq_chain *uc; 1784 1785 uc = umtxq_getchain(&pi->pi_key); 1786 UMTXQ_LOCKED_ASSERT(uc); 1787 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1788 } 1789 1790 /* 1791 * Lock a PI mutex. 1792 */ 1793 static int 1794 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1795 struct _umtx_time *timeout, int try) 1796 { 1797 struct abs_timeout timo; 1798 struct umtx_q *uq; 1799 struct umtx_pi *pi, *new_pi; 1800 uint32_t id, old_owner, owner, old; 1801 int error, rv; 1802 1803 id = td->td_tid; 1804 uq = td->td_umtxq; 1805 1806 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1807 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1808 &uq->uq_key)) != 0) 1809 return (error); 1810 1811 if (timeout != NULL) 1812 abs_timeout_init2(&timo, timeout); 1813 1814 umtxq_lock(&uq->uq_key); 1815 pi = umtx_pi_lookup(&uq->uq_key); 1816 if (pi == NULL) { 1817 new_pi = umtx_pi_alloc(M_NOWAIT); 1818 if (new_pi == NULL) { 1819 umtxq_unlock(&uq->uq_key); 1820 new_pi = umtx_pi_alloc(M_WAITOK); 1821 umtxq_lock(&uq->uq_key); 1822 pi = umtx_pi_lookup(&uq->uq_key); 1823 if (pi != NULL) { 1824 umtx_pi_free(new_pi); 1825 new_pi = NULL; 1826 } 1827 } 1828 if (new_pi != NULL) { 1829 new_pi->pi_key = uq->uq_key; 1830 umtx_pi_insert(new_pi); 1831 pi = new_pi; 1832 } 1833 } 1834 umtx_pi_ref(pi); 1835 umtxq_unlock(&uq->uq_key); 1836 1837 /* 1838 * Care must be exercised when dealing with umtx structure. It 1839 * can fault on any access. 1840 */ 1841 for (;;) { 1842 /* 1843 * Try the uncontested case. This should be done in userland. 1844 */ 1845 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1846 /* The address was invalid. */ 1847 if (rv == -1) { 1848 error = EFAULT; 1849 break; 1850 } 1851 /* The acquire succeeded. */ 1852 if (rv == 0) { 1853 MPASS(owner == UMUTEX_UNOWNED); 1854 error = 0; 1855 break; 1856 } 1857 1858 if (owner == UMUTEX_RB_NOTRECOV) { 1859 error = ENOTRECOVERABLE; 1860 break; 1861 } 1862 1863 /* 1864 * Avoid overwriting a possible error from sleep due 1865 * to the pending signal with suspension check result. 1866 */ 1867 if (error == 0) { 1868 error = thread_check_susp(td, true); 1869 if (error != 0) 1870 break; 1871 } 1872 1873 /* If no one owns it but it is contested try to acquire it. */ 1874 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1875 old_owner = owner; 1876 rv = casueword32(&m->m_owner, owner, &owner, 1877 id | UMUTEX_CONTESTED); 1878 /* The address was invalid. */ 1879 if (rv == -1) { 1880 error = EFAULT; 1881 break; 1882 } 1883 if (rv == 1) { 1884 if (error == 0) { 1885 error = thread_check_susp(td, true); 1886 if (error != 0) 1887 break; 1888 } 1889 1890 /* 1891 * If this failed the lock could 1892 * changed, restart. 1893 */ 1894 continue; 1895 } 1896 1897 MPASS(rv == 0); 1898 MPASS(owner == old_owner); 1899 umtxq_lock(&uq->uq_key); 1900 umtxq_busy(&uq->uq_key); 1901 error = umtx_pi_claim(pi, td); 1902 umtxq_unbusy(&uq->uq_key); 1903 umtxq_unlock(&uq->uq_key); 1904 if (error != 0) { 1905 /* 1906 * Since we're going to return an 1907 * error, restore the m_owner to its 1908 * previous, unowned state to avoid 1909 * compounding the problem. 1910 */ 1911 (void)casuword32(&m->m_owner, 1912 id | UMUTEX_CONTESTED, old_owner); 1913 } 1914 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1915 error = EOWNERDEAD; 1916 break; 1917 } 1918 1919 if ((owner & ~UMUTEX_CONTESTED) == id) { 1920 error = EDEADLK; 1921 break; 1922 } 1923 1924 if (try != 0) { 1925 error = EBUSY; 1926 break; 1927 } 1928 1929 /* 1930 * If we caught a signal, we have retried and now 1931 * exit immediately. 1932 */ 1933 if (error != 0) 1934 break; 1935 1936 umtxq_lock(&uq->uq_key); 1937 umtxq_busy(&uq->uq_key); 1938 umtxq_unlock(&uq->uq_key); 1939 1940 /* 1941 * Set the contested bit so that a release in user space 1942 * knows to use the system call for unlock. If this fails 1943 * either some one else has acquired the lock or it has been 1944 * released. 1945 */ 1946 rv = casueword32(&m->m_owner, owner, &old, owner | 1947 UMUTEX_CONTESTED); 1948 1949 /* The address was invalid. */ 1950 if (rv == -1) { 1951 umtxq_unbusy_unlocked(&uq->uq_key); 1952 error = EFAULT; 1953 break; 1954 } 1955 if (rv == 1) { 1956 umtxq_unbusy_unlocked(&uq->uq_key); 1957 error = thread_check_susp(td, true); 1958 if (error != 0) 1959 break; 1960 1961 /* 1962 * The lock changed and we need to retry or we 1963 * lost a race to the thread unlocking the 1964 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1965 * value for owner is impossible there. 1966 */ 1967 continue; 1968 } 1969 1970 umtxq_lock(&uq->uq_key); 1971 1972 /* We set the contested bit, sleep. */ 1973 MPASS(old == owner); 1974 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1975 "umtxpi", timeout == NULL ? NULL : &timo, 1976 (flags & USYNC_PROCESS_SHARED) != 0); 1977 if (error != 0) 1978 continue; 1979 1980 error = thread_check_susp(td, false); 1981 if (error != 0) 1982 break; 1983 } 1984 1985 umtxq_lock(&uq->uq_key); 1986 umtx_pi_unref(pi); 1987 umtxq_unlock(&uq->uq_key); 1988 1989 umtx_key_release(&uq->uq_key); 1990 return (error); 1991 } 1992 1993 /* 1994 * Unlock a PI mutex. 1995 */ 1996 static int 1997 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1998 { 1999 struct umtx_key key; 2000 struct umtx_q *uq_first, *uq_first2, *uq_me; 2001 struct umtx_pi *pi, *pi2; 2002 uint32_t id, new_owner, old, owner; 2003 int count, error, pri; 2004 2005 id = td->td_tid; 2006 2007 usrloop: 2008 /* 2009 * Make sure we own this mtx. 2010 */ 2011 error = fueword32(&m->m_owner, &owner); 2012 if (error == -1) 2013 return (EFAULT); 2014 2015 if ((owner & ~UMUTEX_CONTESTED) != id) 2016 return (EPERM); 2017 2018 new_owner = umtx_unlock_val(flags, rb); 2019 2020 /* This should be done in userland */ 2021 if ((owner & UMUTEX_CONTESTED) == 0) { 2022 error = casueword32(&m->m_owner, owner, &old, new_owner); 2023 if (error == -1) 2024 return (EFAULT); 2025 if (error == 1) { 2026 error = thread_check_susp(td, true); 2027 if (error != 0) 2028 return (error); 2029 goto usrloop; 2030 } 2031 if (old == owner) 2032 return (0); 2033 owner = old; 2034 } 2035 2036 /* We should only ever be in here for contested locks */ 2037 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2038 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2039 &key)) != 0) 2040 return (error); 2041 2042 umtxq_lock(&key); 2043 umtxq_busy(&key); 2044 count = umtxq_count_pi(&key, &uq_first); 2045 if (uq_first != NULL) { 2046 mtx_lock(&umtx_lock); 2047 pi = uq_first->uq_pi_blocked; 2048 KASSERT(pi != NULL, ("pi == NULL?")); 2049 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2050 mtx_unlock(&umtx_lock); 2051 umtxq_unbusy(&key); 2052 umtxq_unlock(&key); 2053 umtx_key_release(&key); 2054 /* userland messed the mutex */ 2055 return (EPERM); 2056 } 2057 uq_me = td->td_umtxq; 2058 if (pi->pi_owner == td) 2059 umtx_pi_disown(pi); 2060 /* get highest priority thread which is still sleeping. */ 2061 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2062 while (uq_first != NULL && 2063 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2064 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2065 } 2066 pri = PRI_MAX; 2067 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2068 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2069 if (uq_first2 != NULL) { 2070 if (pri > UPRI(uq_first2->uq_thread)) 2071 pri = UPRI(uq_first2->uq_thread); 2072 } 2073 } 2074 thread_lock(td); 2075 sched_lend_user_prio(td, pri); 2076 thread_unlock(td); 2077 mtx_unlock(&umtx_lock); 2078 if (uq_first) 2079 umtxq_signal_thread(uq_first); 2080 } else { 2081 pi = umtx_pi_lookup(&key); 2082 /* 2083 * A umtx_pi can exist if a signal or timeout removed the 2084 * last waiter from the umtxq, but there is still 2085 * a thread in do_lock_pi() holding the umtx_pi. 2086 */ 2087 if (pi != NULL) { 2088 /* 2089 * The umtx_pi can be unowned, such as when a thread 2090 * has just entered do_lock_pi(), allocated the 2091 * umtx_pi, and unlocked the umtxq. 2092 * If the current thread owns it, it must disown it. 2093 */ 2094 mtx_lock(&umtx_lock); 2095 if (pi->pi_owner == td) 2096 umtx_pi_disown(pi); 2097 mtx_unlock(&umtx_lock); 2098 } 2099 } 2100 umtxq_unlock(&key); 2101 2102 /* 2103 * When unlocking the umtx, it must be marked as unowned if 2104 * there is zero or one thread only waiting for it. 2105 * Otherwise, it must be marked as contested. 2106 */ 2107 2108 if (count > 1) 2109 new_owner |= UMUTEX_CONTESTED; 2110 again: 2111 error = casueword32(&m->m_owner, owner, &old, new_owner); 2112 if (error == 1) { 2113 error = thread_check_susp(td, false); 2114 if (error == 0) 2115 goto again; 2116 } 2117 umtxq_unbusy_unlocked(&key); 2118 umtx_key_release(&key); 2119 if (error == -1) 2120 return (EFAULT); 2121 if (error == 0 && old != owner) 2122 return (EINVAL); 2123 return (error); 2124 } 2125 2126 /* 2127 * Lock a PP mutex. 2128 */ 2129 static int 2130 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2131 struct _umtx_time *timeout, int try) 2132 { 2133 struct abs_timeout timo; 2134 struct umtx_q *uq, *uq2; 2135 struct umtx_pi *pi; 2136 uint32_t ceiling; 2137 uint32_t owner, id; 2138 int error, pri, old_inherited_pri, su, rv; 2139 2140 id = td->td_tid; 2141 uq = td->td_umtxq; 2142 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2143 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2144 &uq->uq_key)) != 0) 2145 return (error); 2146 2147 if (timeout != NULL) 2148 abs_timeout_init2(&timo, timeout); 2149 2150 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2151 for (;;) { 2152 old_inherited_pri = uq->uq_inherited_pri; 2153 umtxq_lock(&uq->uq_key); 2154 umtxq_busy(&uq->uq_key); 2155 umtxq_unlock(&uq->uq_key); 2156 2157 rv = fueword32(&m->m_ceilings[0], &ceiling); 2158 if (rv == -1) { 2159 error = EFAULT; 2160 goto out; 2161 } 2162 ceiling = RTP_PRIO_MAX - ceiling; 2163 if (ceiling > RTP_PRIO_MAX) { 2164 error = EINVAL; 2165 goto out; 2166 } 2167 2168 mtx_lock(&umtx_lock); 2169 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2170 mtx_unlock(&umtx_lock); 2171 error = EINVAL; 2172 goto out; 2173 } 2174 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2175 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2176 thread_lock(td); 2177 if (uq->uq_inherited_pri < UPRI(td)) 2178 sched_lend_user_prio(td, uq->uq_inherited_pri); 2179 thread_unlock(td); 2180 } 2181 mtx_unlock(&umtx_lock); 2182 2183 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2184 id | UMUTEX_CONTESTED); 2185 /* The address was invalid. */ 2186 if (rv == -1) { 2187 error = EFAULT; 2188 break; 2189 } 2190 if (rv == 0) { 2191 MPASS(owner == UMUTEX_CONTESTED); 2192 error = 0; 2193 break; 2194 } 2195 /* rv == 1 */ 2196 if (owner == UMUTEX_RB_OWNERDEAD) { 2197 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2198 &owner, id | UMUTEX_CONTESTED); 2199 if (rv == -1) { 2200 error = EFAULT; 2201 break; 2202 } 2203 if (rv == 0) { 2204 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2205 error = EOWNERDEAD; /* success */ 2206 break; 2207 } 2208 2209 /* 2210 * rv == 1, only check for suspension if we 2211 * did not already catched a signal. If we 2212 * get an error from the check, the same 2213 * condition is checked by the umtxq_sleep() 2214 * call below, so we should obliterate the 2215 * error to not skip the last loop iteration. 2216 */ 2217 if (error == 0) { 2218 error = thread_check_susp(td, false); 2219 if (error == 0) { 2220 if (try != 0) 2221 error = EBUSY; 2222 else 2223 continue; 2224 } 2225 error = 0; 2226 } 2227 } else if (owner == UMUTEX_RB_NOTRECOV) { 2228 error = ENOTRECOVERABLE; 2229 } 2230 2231 if (try != 0) 2232 error = EBUSY; 2233 2234 /* 2235 * If we caught a signal, we have retried and now 2236 * exit immediately. 2237 */ 2238 if (error != 0) 2239 break; 2240 2241 umtxq_lock(&uq->uq_key); 2242 umtxq_insert(uq); 2243 umtxq_unbusy(&uq->uq_key); 2244 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2245 NULL : &timo); 2246 umtxq_remove(uq); 2247 umtxq_unlock(&uq->uq_key); 2248 2249 mtx_lock(&umtx_lock); 2250 uq->uq_inherited_pri = old_inherited_pri; 2251 pri = PRI_MAX; 2252 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2253 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2254 if (uq2 != NULL) { 2255 if (pri > UPRI(uq2->uq_thread)) 2256 pri = UPRI(uq2->uq_thread); 2257 } 2258 } 2259 if (pri > uq->uq_inherited_pri) 2260 pri = uq->uq_inherited_pri; 2261 thread_lock(td); 2262 sched_lend_user_prio(td, pri); 2263 thread_unlock(td); 2264 mtx_unlock(&umtx_lock); 2265 } 2266 2267 if (error != 0 && error != EOWNERDEAD) { 2268 mtx_lock(&umtx_lock); 2269 uq->uq_inherited_pri = old_inherited_pri; 2270 pri = PRI_MAX; 2271 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2272 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2273 if (uq2 != NULL) { 2274 if (pri > UPRI(uq2->uq_thread)) 2275 pri = UPRI(uq2->uq_thread); 2276 } 2277 } 2278 if (pri > uq->uq_inherited_pri) 2279 pri = uq->uq_inherited_pri; 2280 thread_lock(td); 2281 sched_lend_user_prio(td, pri); 2282 thread_unlock(td); 2283 mtx_unlock(&umtx_lock); 2284 } 2285 2286 out: 2287 umtxq_unbusy_unlocked(&uq->uq_key); 2288 umtx_key_release(&uq->uq_key); 2289 return (error); 2290 } 2291 2292 /* 2293 * Unlock a PP mutex. 2294 */ 2295 static int 2296 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2297 { 2298 struct umtx_key key; 2299 struct umtx_q *uq, *uq2; 2300 struct umtx_pi *pi; 2301 uint32_t id, owner, rceiling; 2302 int error, pri, new_inherited_pri, su; 2303 2304 id = td->td_tid; 2305 uq = td->td_umtxq; 2306 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2307 2308 /* 2309 * Make sure we own this mtx. 2310 */ 2311 error = fueword32(&m->m_owner, &owner); 2312 if (error == -1) 2313 return (EFAULT); 2314 2315 if ((owner & ~UMUTEX_CONTESTED) != id) 2316 return (EPERM); 2317 2318 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2319 if (error != 0) 2320 return (error); 2321 2322 if (rceiling == -1) 2323 new_inherited_pri = PRI_MAX; 2324 else { 2325 rceiling = RTP_PRIO_MAX - rceiling; 2326 if (rceiling > RTP_PRIO_MAX) 2327 return (EINVAL); 2328 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2329 } 2330 2331 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2332 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2333 &key)) != 0) 2334 return (error); 2335 umtxq_lock(&key); 2336 umtxq_busy(&key); 2337 umtxq_unlock(&key); 2338 /* 2339 * For priority protected mutex, always set unlocked state 2340 * to UMUTEX_CONTESTED, so that userland always enters kernel 2341 * to lock the mutex, it is necessary because thread priority 2342 * has to be adjusted for such mutex. 2343 */ 2344 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2345 UMUTEX_CONTESTED); 2346 2347 umtxq_lock(&key); 2348 if (error == 0) 2349 umtxq_signal(&key, 1); 2350 umtxq_unbusy(&key); 2351 umtxq_unlock(&key); 2352 2353 if (error == -1) 2354 error = EFAULT; 2355 else { 2356 mtx_lock(&umtx_lock); 2357 if (su != 0) 2358 uq->uq_inherited_pri = new_inherited_pri; 2359 pri = PRI_MAX; 2360 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2361 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2362 if (uq2 != NULL) { 2363 if (pri > UPRI(uq2->uq_thread)) 2364 pri = UPRI(uq2->uq_thread); 2365 } 2366 } 2367 if (pri > uq->uq_inherited_pri) 2368 pri = uq->uq_inherited_pri; 2369 thread_lock(td); 2370 sched_lend_user_prio(td, pri); 2371 thread_unlock(td); 2372 mtx_unlock(&umtx_lock); 2373 } 2374 umtx_key_release(&key); 2375 return (error); 2376 } 2377 2378 static int 2379 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2380 uint32_t *old_ceiling) 2381 { 2382 struct umtx_q *uq; 2383 uint32_t flags, id, owner, save_ceiling; 2384 int error, rv, rv1; 2385 2386 error = fueword32(&m->m_flags, &flags); 2387 if (error == -1) 2388 return (EFAULT); 2389 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2390 return (EINVAL); 2391 if (ceiling > RTP_PRIO_MAX) 2392 return (EINVAL); 2393 id = td->td_tid; 2394 uq = td->td_umtxq; 2395 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2396 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2397 &uq->uq_key)) != 0) 2398 return (error); 2399 for (;;) { 2400 umtxq_lock(&uq->uq_key); 2401 umtxq_busy(&uq->uq_key); 2402 umtxq_unlock(&uq->uq_key); 2403 2404 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2405 if (rv == -1) { 2406 error = EFAULT; 2407 break; 2408 } 2409 2410 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2411 id | UMUTEX_CONTESTED); 2412 if (rv == -1) { 2413 error = EFAULT; 2414 break; 2415 } 2416 2417 if (rv == 0) { 2418 MPASS(owner == UMUTEX_CONTESTED); 2419 rv = suword32(&m->m_ceilings[0], ceiling); 2420 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2421 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2422 break; 2423 } 2424 2425 if ((owner & ~UMUTEX_CONTESTED) == id) { 2426 rv = suword32(&m->m_ceilings[0], ceiling); 2427 error = rv == 0 ? 0 : EFAULT; 2428 break; 2429 } 2430 2431 if (owner == UMUTEX_RB_OWNERDEAD) { 2432 error = EOWNERDEAD; 2433 break; 2434 } else if (owner == UMUTEX_RB_NOTRECOV) { 2435 error = ENOTRECOVERABLE; 2436 break; 2437 } 2438 2439 /* 2440 * If we caught a signal, we have retried and now 2441 * exit immediately. 2442 */ 2443 if (error != 0) 2444 break; 2445 2446 /* 2447 * We set the contested bit, sleep. Otherwise the lock changed 2448 * and we need to retry or we lost a race to the thread 2449 * unlocking the umtx. 2450 */ 2451 umtxq_lock(&uq->uq_key); 2452 umtxq_insert(uq); 2453 umtxq_unbusy(&uq->uq_key); 2454 error = umtxq_sleep(uq, "umtxpp", NULL); 2455 umtxq_remove(uq); 2456 umtxq_unlock(&uq->uq_key); 2457 } 2458 umtxq_lock(&uq->uq_key); 2459 if (error == 0) 2460 umtxq_signal(&uq->uq_key, INT_MAX); 2461 umtxq_unbusy(&uq->uq_key); 2462 umtxq_unlock(&uq->uq_key); 2463 umtx_key_release(&uq->uq_key); 2464 if (error == 0 && old_ceiling != NULL) { 2465 rv = suword32(old_ceiling, save_ceiling); 2466 error = rv == 0 ? 0 : EFAULT; 2467 } 2468 return (error); 2469 } 2470 2471 /* 2472 * Lock a userland POSIX mutex. 2473 */ 2474 static int 2475 do_lock_umutex(struct thread *td, struct umutex *m, 2476 struct _umtx_time *timeout, int mode) 2477 { 2478 uint32_t flags; 2479 int error; 2480 2481 error = fueword32(&m->m_flags, &flags); 2482 if (error == -1) 2483 return (EFAULT); 2484 2485 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2486 case 0: 2487 error = do_lock_normal(td, m, flags, timeout, mode); 2488 break; 2489 case UMUTEX_PRIO_INHERIT: 2490 error = do_lock_pi(td, m, flags, timeout, mode); 2491 break; 2492 case UMUTEX_PRIO_PROTECT: 2493 error = do_lock_pp(td, m, flags, timeout, mode); 2494 break; 2495 default: 2496 return (EINVAL); 2497 } 2498 if (timeout == NULL) { 2499 if (error == EINTR && mode != _UMUTEX_WAIT) 2500 error = ERESTART; 2501 } else { 2502 /* Timed-locking is not restarted. */ 2503 if (error == ERESTART) 2504 error = EINTR; 2505 } 2506 return (error); 2507 } 2508 2509 /* 2510 * Unlock a userland POSIX mutex. 2511 */ 2512 static int 2513 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2514 { 2515 uint32_t flags; 2516 int error; 2517 2518 error = fueword32(&m->m_flags, &flags); 2519 if (error == -1) 2520 return (EFAULT); 2521 2522 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2523 case 0: 2524 return (do_unlock_normal(td, m, flags, rb)); 2525 case UMUTEX_PRIO_INHERIT: 2526 return (do_unlock_pi(td, m, flags, rb)); 2527 case UMUTEX_PRIO_PROTECT: 2528 return (do_unlock_pp(td, m, flags, rb)); 2529 } 2530 2531 return (EINVAL); 2532 } 2533 2534 static int 2535 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2536 struct timespec *timeout, u_long wflags) 2537 { 2538 struct abs_timeout timo; 2539 struct umtx_q *uq; 2540 uint32_t flags, clockid, hasw; 2541 int error; 2542 2543 uq = td->td_umtxq; 2544 error = fueword32(&cv->c_flags, &flags); 2545 if (error == -1) 2546 return (EFAULT); 2547 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2548 if (error != 0) 2549 return (error); 2550 2551 if ((wflags & CVWAIT_CLOCKID) != 0) { 2552 error = fueword32(&cv->c_clockid, &clockid); 2553 if (error == -1) { 2554 umtx_key_release(&uq->uq_key); 2555 return (EFAULT); 2556 } 2557 if (clockid < CLOCK_REALTIME || 2558 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2559 /* hmm, only HW clock id will work. */ 2560 umtx_key_release(&uq->uq_key); 2561 return (EINVAL); 2562 } 2563 } else { 2564 clockid = CLOCK_REALTIME; 2565 } 2566 2567 umtxq_lock(&uq->uq_key); 2568 umtxq_busy(&uq->uq_key); 2569 umtxq_insert(uq); 2570 umtxq_unlock(&uq->uq_key); 2571 2572 /* 2573 * Set c_has_waiters to 1 before releasing user mutex, also 2574 * don't modify cache line when unnecessary. 2575 */ 2576 error = fueword32(&cv->c_has_waiters, &hasw); 2577 if (error == 0 && hasw == 0) 2578 suword32(&cv->c_has_waiters, 1); 2579 2580 umtxq_unbusy_unlocked(&uq->uq_key); 2581 2582 error = do_unlock_umutex(td, m, false); 2583 2584 if (timeout != NULL) 2585 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2586 timeout); 2587 2588 umtxq_lock(&uq->uq_key); 2589 if (error == 0) { 2590 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2591 NULL : &timo); 2592 } 2593 2594 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2595 error = 0; 2596 else { 2597 /* 2598 * This must be timeout,interrupted by signal or 2599 * surprious wakeup, clear c_has_waiter flag when 2600 * necessary. 2601 */ 2602 umtxq_busy(&uq->uq_key); 2603 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2604 int oldlen = uq->uq_cur_queue->length; 2605 umtxq_remove(uq); 2606 if (oldlen == 1) { 2607 umtxq_unlock(&uq->uq_key); 2608 suword32(&cv->c_has_waiters, 0); 2609 umtxq_lock(&uq->uq_key); 2610 } 2611 } 2612 umtxq_unbusy(&uq->uq_key); 2613 if (error == ERESTART) 2614 error = EINTR; 2615 } 2616 2617 umtxq_unlock(&uq->uq_key); 2618 umtx_key_release(&uq->uq_key); 2619 return (error); 2620 } 2621 2622 /* 2623 * Signal a userland condition variable. 2624 */ 2625 static int 2626 do_cv_signal(struct thread *td, struct ucond *cv) 2627 { 2628 struct umtx_key key; 2629 int error, cnt, nwake; 2630 uint32_t flags; 2631 2632 error = fueword32(&cv->c_flags, &flags); 2633 if (error == -1) 2634 return (EFAULT); 2635 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2636 return (error); 2637 umtxq_lock(&key); 2638 umtxq_busy(&key); 2639 cnt = umtxq_count(&key); 2640 nwake = umtxq_signal(&key, 1); 2641 if (cnt <= nwake) { 2642 umtxq_unlock(&key); 2643 error = suword32(&cv->c_has_waiters, 0); 2644 if (error == -1) 2645 error = EFAULT; 2646 umtxq_lock(&key); 2647 } 2648 umtxq_unbusy(&key); 2649 umtxq_unlock(&key); 2650 umtx_key_release(&key); 2651 return (error); 2652 } 2653 2654 static int 2655 do_cv_broadcast(struct thread *td, struct ucond *cv) 2656 { 2657 struct umtx_key key; 2658 int error; 2659 uint32_t flags; 2660 2661 error = fueword32(&cv->c_flags, &flags); 2662 if (error == -1) 2663 return (EFAULT); 2664 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2665 return (error); 2666 2667 umtxq_lock(&key); 2668 umtxq_busy(&key); 2669 umtxq_signal(&key, INT_MAX); 2670 umtxq_unlock(&key); 2671 2672 error = suword32(&cv->c_has_waiters, 0); 2673 if (error == -1) 2674 error = EFAULT; 2675 2676 umtxq_unbusy_unlocked(&key); 2677 2678 umtx_key_release(&key); 2679 return (error); 2680 } 2681 2682 static int 2683 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2684 struct _umtx_time *timeout) 2685 { 2686 struct abs_timeout timo; 2687 struct umtx_q *uq; 2688 uint32_t flags, wrflags; 2689 int32_t state, oldstate; 2690 int32_t blocked_readers; 2691 int error, error1, rv; 2692 2693 uq = td->td_umtxq; 2694 error = fueword32(&rwlock->rw_flags, &flags); 2695 if (error == -1) 2696 return (EFAULT); 2697 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2698 if (error != 0) 2699 return (error); 2700 2701 if (timeout != NULL) 2702 abs_timeout_init2(&timo, timeout); 2703 2704 wrflags = URWLOCK_WRITE_OWNER; 2705 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2706 wrflags |= URWLOCK_WRITE_WAITERS; 2707 2708 for (;;) { 2709 rv = fueword32(&rwlock->rw_state, &state); 2710 if (rv == -1) { 2711 umtx_key_release(&uq->uq_key); 2712 return (EFAULT); 2713 } 2714 2715 /* try to lock it */ 2716 while (!(state & wrflags)) { 2717 if (__predict_false(URWLOCK_READER_COUNT(state) == 2718 URWLOCK_MAX_READERS)) { 2719 umtx_key_release(&uq->uq_key); 2720 return (EAGAIN); 2721 } 2722 rv = casueword32(&rwlock->rw_state, state, 2723 &oldstate, state + 1); 2724 if (rv == -1) { 2725 umtx_key_release(&uq->uq_key); 2726 return (EFAULT); 2727 } 2728 if (rv == 0) { 2729 MPASS(oldstate == state); 2730 umtx_key_release(&uq->uq_key); 2731 return (0); 2732 } 2733 error = thread_check_susp(td, true); 2734 if (error != 0) 2735 break; 2736 state = oldstate; 2737 } 2738 2739 if (error) 2740 break; 2741 2742 /* grab monitor lock */ 2743 umtxq_lock(&uq->uq_key); 2744 umtxq_busy(&uq->uq_key); 2745 umtxq_unlock(&uq->uq_key); 2746 2747 /* 2748 * re-read the state, in case it changed between the try-lock above 2749 * and the check below 2750 */ 2751 rv = fueword32(&rwlock->rw_state, &state); 2752 if (rv == -1) 2753 error = EFAULT; 2754 2755 /* set read contention bit */ 2756 while (error == 0 && (state & wrflags) && 2757 !(state & URWLOCK_READ_WAITERS)) { 2758 rv = casueword32(&rwlock->rw_state, state, 2759 &oldstate, state | URWLOCK_READ_WAITERS); 2760 if (rv == -1) { 2761 error = EFAULT; 2762 break; 2763 } 2764 if (rv == 0) { 2765 MPASS(oldstate == state); 2766 goto sleep; 2767 } 2768 state = oldstate; 2769 error = thread_check_susp(td, false); 2770 if (error != 0) 2771 break; 2772 } 2773 if (error != 0) { 2774 umtxq_unbusy_unlocked(&uq->uq_key); 2775 break; 2776 } 2777 2778 /* state is changed while setting flags, restart */ 2779 if (!(state & wrflags)) { 2780 umtxq_unbusy_unlocked(&uq->uq_key); 2781 error = thread_check_susp(td, true); 2782 if (error != 0) 2783 break; 2784 continue; 2785 } 2786 2787 sleep: 2788 /* 2789 * Contention bit is set, before sleeping, increase 2790 * read waiter count. 2791 */ 2792 rv = fueword32(&rwlock->rw_blocked_readers, 2793 &blocked_readers); 2794 if (rv == -1) { 2795 umtxq_unbusy_unlocked(&uq->uq_key); 2796 error = EFAULT; 2797 break; 2798 } 2799 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2800 2801 while (state & wrflags) { 2802 umtxq_lock(&uq->uq_key); 2803 umtxq_insert(uq); 2804 umtxq_unbusy(&uq->uq_key); 2805 2806 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2807 NULL : &timo); 2808 2809 umtxq_busy(&uq->uq_key); 2810 umtxq_remove(uq); 2811 umtxq_unlock(&uq->uq_key); 2812 if (error) 2813 break; 2814 rv = fueword32(&rwlock->rw_state, &state); 2815 if (rv == -1) { 2816 error = EFAULT; 2817 break; 2818 } 2819 } 2820 2821 /* decrease read waiter count, and may clear read contention bit */ 2822 rv = fueword32(&rwlock->rw_blocked_readers, 2823 &blocked_readers); 2824 if (rv == -1) { 2825 umtxq_unbusy_unlocked(&uq->uq_key); 2826 error = EFAULT; 2827 break; 2828 } 2829 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2830 if (blocked_readers == 1) { 2831 rv = fueword32(&rwlock->rw_state, &state); 2832 if (rv == -1) { 2833 umtxq_unbusy_unlocked(&uq->uq_key); 2834 error = EFAULT; 2835 break; 2836 } 2837 for (;;) { 2838 rv = casueword32(&rwlock->rw_state, state, 2839 &oldstate, state & ~URWLOCK_READ_WAITERS); 2840 if (rv == -1) { 2841 error = EFAULT; 2842 break; 2843 } 2844 if (rv == 0) { 2845 MPASS(oldstate == state); 2846 break; 2847 } 2848 state = oldstate; 2849 error1 = thread_check_susp(td, false); 2850 if (error1 != 0) { 2851 if (error == 0) 2852 error = error1; 2853 break; 2854 } 2855 } 2856 } 2857 2858 umtxq_unbusy_unlocked(&uq->uq_key); 2859 if (error != 0) 2860 break; 2861 } 2862 umtx_key_release(&uq->uq_key); 2863 if (error == ERESTART) 2864 error = EINTR; 2865 return (error); 2866 } 2867 2868 static int 2869 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2870 { 2871 struct abs_timeout timo; 2872 struct umtx_q *uq; 2873 uint32_t flags; 2874 int32_t state, oldstate; 2875 int32_t blocked_writers; 2876 int32_t blocked_readers; 2877 int error, error1, rv; 2878 2879 uq = td->td_umtxq; 2880 error = fueword32(&rwlock->rw_flags, &flags); 2881 if (error == -1) 2882 return (EFAULT); 2883 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2884 if (error != 0) 2885 return (error); 2886 2887 if (timeout != NULL) 2888 abs_timeout_init2(&timo, timeout); 2889 2890 blocked_readers = 0; 2891 for (;;) { 2892 rv = fueword32(&rwlock->rw_state, &state); 2893 if (rv == -1) { 2894 umtx_key_release(&uq->uq_key); 2895 return (EFAULT); 2896 } 2897 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2898 URWLOCK_READER_COUNT(state) == 0) { 2899 rv = casueword32(&rwlock->rw_state, state, 2900 &oldstate, state | URWLOCK_WRITE_OWNER); 2901 if (rv == -1) { 2902 umtx_key_release(&uq->uq_key); 2903 return (EFAULT); 2904 } 2905 if (rv == 0) { 2906 MPASS(oldstate == state); 2907 umtx_key_release(&uq->uq_key); 2908 return (0); 2909 } 2910 state = oldstate; 2911 error = thread_check_susp(td, true); 2912 if (error != 0) 2913 break; 2914 } 2915 2916 if (error) { 2917 if ((state & (URWLOCK_WRITE_OWNER | 2918 URWLOCK_WRITE_WAITERS)) == 0 && 2919 blocked_readers != 0) { 2920 umtxq_lock(&uq->uq_key); 2921 umtxq_busy(&uq->uq_key); 2922 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2923 UMTX_SHARED_QUEUE); 2924 umtxq_unbusy(&uq->uq_key); 2925 umtxq_unlock(&uq->uq_key); 2926 } 2927 2928 break; 2929 } 2930 2931 /* grab monitor lock */ 2932 umtxq_lock(&uq->uq_key); 2933 umtxq_busy(&uq->uq_key); 2934 umtxq_unlock(&uq->uq_key); 2935 2936 /* 2937 * Re-read the state, in case it changed between the 2938 * try-lock above and the check below. 2939 */ 2940 rv = fueword32(&rwlock->rw_state, &state); 2941 if (rv == -1) 2942 error = EFAULT; 2943 2944 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2945 URWLOCK_READER_COUNT(state) != 0) && 2946 (state & URWLOCK_WRITE_WAITERS) == 0) { 2947 rv = casueword32(&rwlock->rw_state, state, 2948 &oldstate, state | URWLOCK_WRITE_WAITERS); 2949 if (rv == -1) { 2950 error = EFAULT; 2951 break; 2952 } 2953 if (rv == 0) { 2954 MPASS(oldstate == state); 2955 goto sleep; 2956 } 2957 state = oldstate; 2958 error = thread_check_susp(td, false); 2959 if (error != 0) 2960 break; 2961 } 2962 if (error != 0) { 2963 umtxq_unbusy_unlocked(&uq->uq_key); 2964 break; 2965 } 2966 2967 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2968 URWLOCK_READER_COUNT(state) == 0) { 2969 umtxq_unbusy_unlocked(&uq->uq_key); 2970 error = thread_check_susp(td, false); 2971 if (error != 0) 2972 break; 2973 continue; 2974 } 2975 sleep: 2976 rv = fueword32(&rwlock->rw_blocked_writers, 2977 &blocked_writers); 2978 if (rv == -1) { 2979 umtxq_unbusy_unlocked(&uq->uq_key); 2980 error = EFAULT; 2981 break; 2982 } 2983 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2984 2985 while ((state & URWLOCK_WRITE_OWNER) || 2986 URWLOCK_READER_COUNT(state) != 0) { 2987 umtxq_lock(&uq->uq_key); 2988 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2989 umtxq_unbusy(&uq->uq_key); 2990 2991 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2992 NULL : &timo); 2993 2994 umtxq_busy(&uq->uq_key); 2995 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2996 umtxq_unlock(&uq->uq_key); 2997 if (error) 2998 break; 2999 rv = fueword32(&rwlock->rw_state, &state); 3000 if (rv == -1) { 3001 error = EFAULT; 3002 break; 3003 } 3004 } 3005 3006 rv = fueword32(&rwlock->rw_blocked_writers, 3007 &blocked_writers); 3008 if (rv == -1) { 3009 umtxq_unbusy_unlocked(&uq->uq_key); 3010 error = EFAULT; 3011 break; 3012 } 3013 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3014 if (blocked_writers == 1) { 3015 rv = fueword32(&rwlock->rw_state, &state); 3016 if (rv == -1) { 3017 umtxq_unbusy_unlocked(&uq->uq_key); 3018 error = EFAULT; 3019 break; 3020 } 3021 for (;;) { 3022 rv = casueword32(&rwlock->rw_state, state, 3023 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3024 if (rv == -1) { 3025 error = EFAULT; 3026 break; 3027 } 3028 if (rv == 0) { 3029 MPASS(oldstate == state); 3030 break; 3031 } 3032 state = oldstate; 3033 error1 = thread_check_susp(td, false); 3034 /* 3035 * We are leaving the URWLOCK_WRITE_WAITERS 3036 * behind, but this should not harm the 3037 * correctness. 3038 */ 3039 if (error1 != 0) { 3040 if (error == 0) 3041 error = error1; 3042 break; 3043 } 3044 } 3045 rv = fueword32(&rwlock->rw_blocked_readers, 3046 &blocked_readers); 3047 if (rv == -1) { 3048 umtxq_unbusy_unlocked(&uq->uq_key); 3049 error = EFAULT; 3050 break; 3051 } 3052 } else 3053 blocked_readers = 0; 3054 3055 umtxq_unbusy_unlocked(&uq->uq_key); 3056 } 3057 3058 umtx_key_release(&uq->uq_key); 3059 if (error == ERESTART) 3060 error = EINTR; 3061 return (error); 3062 } 3063 3064 static int 3065 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3066 { 3067 struct umtx_q *uq; 3068 uint32_t flags; 3069 int32_t state, oldstate; 3070 int error, rv, q, count; 3071 3072 uq = td->td_umtxq; 3073 error = fueword32(&rwlock->rw_flags, &flags); 3074 if (error == -1) 3075 return (EFAULT); 3076 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3077 if (error != 0) 3078 return (error); 3079 3080 error = fueword32(&rwlock->rw_state, &state); 3081 if (error == -1) { 3082 error = EFAULT; 3083 goto out; 3084 } 3085 if (state & URWLOCK_WRITE_OWNER) { 3086 for (;;) { 3087 rv = casueword32(&rwlock->rw_state, state, 3088 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3089 if (rv == -1) { 3090 error = EFAULT; 3091 goto out; 3092 } 3093 if (rv == 1) { 3094 state = oldstate; 3095 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3096 error = EPERM; 3097 goto out; 3098 } 3099 error = thread_check_susp(td, true); 3100 if (error != 0) 3101 goto out; 3102 } else 3103 break; 3104 } 3105 } else if (URWLOCK_READER_COUNT(state) != 0) { 3106 for (;;) { 3107 rv = casueword32(&rwlock->rw_state, state, 3108 &oldstate, state - 1); 3109 if (rv == -1) { 3110 error = EFAULT; 3111 goto out; 3112 } 3113 if (rv == 1) { 3114 state = oldstate; 3115 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3116 error = EPERM; 3117 goto out; 3118 } 3119 error = thread_check_susp(td, true); 3120 if (error != 0) 3121 goto out; 3122 } else 3123 break; 3124 } 3125 } else { 3126 error = EPERM; 3127 goto out; 3128 } 3129 3130 count = 0; 3131 3132 if (!(flags & URWLOCK_PREFER_READER)) { 3133 if (state & URWLOCK_WRITE_WAITERS) { 3134 count = 1; 3135 q = UMTX_EXCLUSIVE_QUEUE; 3136 } else if (state & URWLOCK_READ_WAITERS) { 3137 count = INT_MAX; 3138 q = UMTX_SHARED_QUEUE; 3139 } 3140 } else { 3141 if (state & URWLOCK_READ_WAITERS) { 3142 count = INT_MAX; 3143 q = UMTX_SHARED_QUEUE; 3144 } else if (state & URWLOCK_WRITE_WAITERS) { 3145 count = 1; 3146 q = UMTX_EXCLUSIVE_QUEUE; 3147 } 3148 } 3149 3150 if (count) { 3151 umtxq_lock(&uq->uq_key); 3152 umtxq_busy(&uq->uq_key); 3153 umtxq_signal_queue(&uq->uq_key, count, q); 3154 umtxq_unbusy(&uq->uq_key); 3155 umtxq_unlock(&uq->uq_key); 3156 } 3157 out: 3158 umtx_key_release(&uq->uq_key); 3159 return (error); 3160 } 3161 3162 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3163 static int 3164 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3165 { 3166 struct abs_timeout timo; 3167 struct umtx_q *uq; 3168 uint32_t flags, count, count1; 3169 int error, rv, rv1; 3170 3171 uq = td->td_umtxq; 3172 error = fueword32(&sem->_flags, &flags); 3173 if (error == -1) 3174 return (EFAULT); 3175 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3176 if (error != 0) 3177 return (error); 3178 3179 if (timeout != NULL) 3180 abs_timeout_init2(&timo, timeout); 3181 3182 again: 3183 umtxq_lock(&uq->uq_key); 3184 umtxq_busy(&uq->uq_key); 3185 umtxq_insert(uq); 3186 umtxq_unlock(&uq->uq_key); 3187 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3188 if (rv == 0) 3189 rv1 = fueword32(&sem->_count, &count); 3190 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3191 (rv == 1 && count1 == 0)) { 3192 umtxq_lock(&uq->uq_key); 3193 umtxq_unbusy(&uq->uq_key); 3194 umtxq_remove(uq); 3195 umtxq_unlock(&uq->uq_key); 3196 if (rv == 1) { 3197 rv = thread_check_susp(td, true); 3198 if (rv == 0) 3199 goto again; 3200 error = rv; 3201 goto out; 3202 } 3203 if (rv == 0) 3204 rv = rv1; 3205 error = rv == -1 ? EFAULT : 0; 3206 goto out; 3207 } 3208 umtxq_lock(&uq->uq_key); 3209 umtxq_unbusy(&uq->uq_key); 3210 3211 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3212 3213 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3214 error = 0; 3215 else { 3216 umtxq_remove(uq); 3217 /* A relative timeout cannot be restarted. */ 3218 if (error == ERESTART && timeout != NULL && 3219 (timeout->_flags & UMTX_ABSTIME) == 0) 3220 error = EINTR; 3221 } 3222 umtxq_unlock(&uq->uq_key); 3223 out: 3224 umtx_key_release(&uq->uq_key); 3225 return (error); 3226 } 3227 3228 /* 3229 * Signal a userland semaphore. 3230 */ 3231 static int 3232 do_sem_wake(struct thread *td, struct _usem *sem) 3233 { 3234 struct umtx_key key; 3235 int error, cnt; 3236 uint32_t flags; 3237 3238 error = fueword32(&sem->_flags, &flags); 3239 if (error == -1) 3240 return (EFAULT); 3241 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3242 return (error); 3243 umtxq_lock(&key); 3244 umtxq_busy(&key); 3245 cnt = umtxq_count(&key); 3246 if (cnt > 0) { 3247 /* 3248 * Check if count is greater than 0, this means the memory is 3249 * still being referenced by user code, so we can safely 3250 * update _has_waiters flag. 3251 */ 3252 if (cnt == 1) { 3253 umtxq_unlock(&key); 3254 error = suword32(&sem->_has_waiters, 0); 3255 umtxq_lock(&key); 3256 if (error == -1) 3257 error = EFAULT; 3258 } 3259 umtxq_signal(&key, 1); 3260 } 3261 umtxq_unbusy(&key); 3262 umtxq_unlock(&key); 3263 umtx_key_release(&key); 3264 return (error); 3265 } 3266 #endif 3267 3268 static int 3269 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3270 { 3271 struct abs_timeout timo; 3272 struct umtx_q *uq; 3273 uint32_t count, flags; 3274 int error, rv; 3275 3276 uq = td->td_umtxq; 3277 flags = fuword32(&sem->_flags); 3278 if (timeout != NULL) 3279 abs_timeout_init2(&timo, timeout); 3280 3281 again: 3282 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3283 if (error != 0) 3284 return (error); 3285 umtxq_lock(&uq->uq_key); 3286 umtxq_busy(&uq->uq_key); 3287 umtxq_insert(uq); 3288 umtxq_unlock(&uq->uq_key); 3289 rv = fueword32(&sem->_count, &count); 3290 if (rv == -1) { 3291 umtxq_lock(&uq->uq_key); 3292 umtxq_unbusy(&uq->uq_key); 3293 umtxq_remove(uq); 3294 umtxq_unlock(&uq->uq_key); 3295 umtx_key_release(&uq->uq_key); 3296 return (EFAULT); 3297 } 3298 for (;;) { 3299 if (USEM_COUNT(count) != 0) { 3300 umtxq_lock(&uq->uq_key); 3301 umtxq_unbusy(&uq->uq_key); 3302 umtxq_remove(uq); 3303 umtxq_unlock(&uq->uq_key); 3304 umtx_key_release(&uq->uq_key); 3305 return (0); 3306 } 3307 if (count == USEM_HAS_WAITERS) 3308 break; 3309 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3310 if (rv == 0) 3311 break; 3312 umtxq_lock(&uq->uq_key); 3313 umtxq_unbusy(&uq->uq_key); 3314 umtxq_remove(uq); 3315 umtxq_unlock(&uq->uq_key); 3316 umtx_key_release(&uq->uq_key); 3317 if (rv == -1) 3318 return (EFAULT); 3319 rv = thread_check_susp(td, true); 3320 if (rv != 0) 3321 return (rv); 3322 goto again; 3323 } 3324 umtxq_lock(&uq->uq_key); 3325 umtxq_unbusy(&uq->uq_key); 3326 3327 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3328 3329 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3330 error = 0; 3331 else { 3332 umtxq_remove(uq); 3333 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3334 /* A relative timeout cannot be restarted. */ 3335 if (error == ERESTART) 3336 error = EINTR; 3337 if (error == EINTR) { 3338 abs_timeout_update(&timo); 3339 timespecsub(&timo.end, &timo.cur, 3340 &timeout->_timeout); 3341 } 3342 } 3343 } 3344 umtxq_unlock(&uq->uq_key); 3345 umtx_key_release(&uq->uq_key); 3346 return (error); 3347 } 3348 3349 /* 3350 * Signal a userland semaphore. 3351 */ 3352 static int 3353 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3354 { 3355 struct umtx_key key; 3356 int error, cnt, rv; 3357 uint32_t count, flags; 3358 3359 rv = fueword32(&sem->_flags, &flags); 3360 if (rv == -1) 3361 return (EFAULT); 3362 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3363 return (error); 3364 umtxq_lock(&key); 3365 umtxq_busy(&key); 3366 cnt = umtxq_count(&key); 3367 if (cnt > 0) { 3368 /* 3369 * If this was the last sleeping thread, clear the waiters 3370 * flag in _count. 3371 */ 3372 if (cnt == 1) { 3373 umtxq_unlock(&key); 3374 rv = fueword32(&sem->_count, &count); 3375 while (rv != -1 && count & USEM_HAS_WAITERS) { 3376 rv = casueword32(&sem->_count, count, &count, 3377 count & ~USEM_HAS_WAITERS); 3378 if (rv == 1) { 3379 rv = thread_check_susp(td, true); 3380 if (rv != 0) 3381 break; 3382 } 3383 } 3384 if (rv == -1) 3385 error = EFAULT; 3386 else if (rv > 0) { 3387 error = rv; 3388 } 3389 umtxq_lock(&key); 3390 } 3391 3392 umtxq_signal(&key, 1); 3393 } 3394 umtxq_unbusy(&key); 3395 umtxq_unlock(&key); 3396 umtx_key_release(&key); 3397 return (error); 3398 } 3399 3400 inline int 3401 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3402 { 3403 int error; 3404 3405 error = copyin(uaddr, tsp, sizeof(*tsp)); 3406 if (error == 0) { 3407 if (tsp->tv_sec < 0 || 3408 tsp->tv_nsec >= 1000000000 || 3409 tsp->tv_nsec < 0) 3410 error = EINVAL; 3411 } 3412 return (error); 3413 } 3414 3415 static inline int 3416 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3417 { 3418 int error; 3419 3420 if (size <= sizeof(tp->_timeout)) { 3421 tp->_clockid = CLOCK_REALTIME; 3422 tp->_flags = 0; 3423 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3424 } else 3425 error = copyin(uaddr, tp, sizeof(*tp)); 3426 if (error != 0) 3427 return (error); 3428 if (tp->_timeout.tv_sec < 0 || 3429 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3430 return (EINVAL); 3431 return (0); 3432 } 3433 3434 static int 3435 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3436 struct umtx_robust_lists_params *rb) 3437 { 3438 3439 if (size > sizeof(*rb)) 3440 return (EINVAL); 3441 return (copyin(uaddr, rb, size)); 3442 } 3443 3444 static int 3445 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3446 { 3447 3448 /* 3449 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3450 * and we're only called if sz >= sizeof(timespec) as supplied in the 3451 * copyops. 3452 */ 3453 KASSERT(sz >= sizeof(*tsp), 3454 ("umtx_copyops specifies incorrect sizes")); 3455 3456 return (copyout(tsp, uaddr, sizeof(*tsp))); 3457 } 3458 3459 static int 3460 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap, 3461 const struct umtx_copyops *ops __unused) 3462 { 3463 3464 return (EOPNOTSUPP); 3465 } 3466 3467 static int 3468 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3469 const struct umtx_copyops *ops) 3470 { 3471 struct _umtx_time timeout, *tm_p; 3472 int error; 3473 3474 if (uap->uaddr2 == NULL) 3475 tm_p = NULL; 3476 else { 3477 error = ops->copyin_umtx_time( 3478 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3479 if (error != 0) 3480 return (error); 3481 tm_p = &timeout; 3482 } 3483 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3484 } 3485 3486 static int 3487 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3488 const struct umtx_copyops *ops) 3489 { 3490 struct _umtx_time timeout, *tm_p; 3491 int error; 3492 3493 if (uap->uaddr2 == NULL) 3494 tm_p = NULL; 3495 else { 3496 error = ops->copyin_umtx_time( 3497 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3498 if (error != 0) 3499 return (error); 3500 tm_p = &timeout; 3501 } 3502 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3503 } 3504 3505 static int 3506 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3507 const struct umtx_copyops *ops) 3508 { 3509 struct _umtx_time *tm_p, timeout; 3510 int error; 3511 3512 if (uap->uaddr2 == NULL) 3513 tm_p = NULL; 3514 else { 3515 error = ops->copyin_umtx_time( 3516 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3517 if (error != 0) 3518 return (error); 3519 tm_p = &timeout; 3520 } 3521 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3522 } 3523 3524 static int 3525 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3526 const struct umtx_copyops *ops __unused) 3527 { 3528 3529 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3530 } 3531 3532 #define BATCH_SIZE 128 3533 static int 3534 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3535 { 3536 char *uaddrs[BATCH_SIZE], **upp; 3537 int count, error, i, pos, tocopy; 3538 3539 upp = (char **)uap->obj; 3540 error = 0; 3541 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3542 pos += tocopy) { 3543 tocopy = MIN(count, BATCH_SIZE); 3544 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3545 if (error != 0) 3546 break; 3547 for (i = 0; i < tocopy; ++i) { 3548 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3549 } 3550 maybe_yield(); 3551 } 3552 return (error); 3553 } 3554 3555 static int 3556 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3557 { 3558 uint32_t uaddrs[BATCH_SIZE], *upp; 3559 int count, error, i, pos, tocopy; 3560 3561 upp = (uint32_t *)uap->obj; 3562 error = 0; 3563 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3564 pos += tocopy) { 3565 tocopy = MIN(count, BATCH_SIZE); 3566 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3567 if (error != 0) 3568 break; 3569 for (i = 0; i < tocopy; ++i) { 3570 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3571 INT_MAX, 1); 3572 } 3573 maybe_yield(); 3574 } 3575 return (error); 3576 } 3577 3578 static int 3579 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3580 const struct umtx_copyops *ops) 3581 { 3582 3583 if (ops->compat32) 3584 return (__umtx_op_nwake_private_compat32(td, uap)); 3585 return (__umtx_op_nwake_private_native(td, uap)); 3586 } 3587 3588 static int 3589 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3590 const struct umtx_copyops *ops __unused) 3591 { 3592 3593 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3594 } 3595 3596 static int 3597 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3598 const struct umtx_copyops *ops) 3599 { 3600 struct _umtx_time *tm_p, timeout; 3601 int error; 3602 3603 /* Allow a null timespec (wait forever). */ 3604 if (uap->uaddr2 == NULL) 3605 tm_p = NULL; 3606 else { 3607 error = ops->copyin_umtx_time( 3608 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3609 if (error != 0) 3610 return (error); 3611 tm_p = &timeout; 3612 } 3613 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3614 } 3615 3616 static int 3617 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3618 const struct umtx_copyops *ops __unused) 3619 { 3620 3621 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3622 } 3623 3624 static int 3625 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3626 const struct umtx_copyops *ops) 3627 { 3628 struct _umtx_time *tm_p, timeout; 3629 int error; 3630 3631 /* Allow a null timespec (wait forever). */ 3632 if (uap->uaddr2 == NULL) 3633 tm_p = NULL; 3634 else { 3635 error = ops->copyin_umtx_time( 3636 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3637 if (error != 0) 3638 return (error); 3639 tm_p = &timeout; 3640 } 3641 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3642 } 3643 3644 static int 3645 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 3646 const struct umtx_copyops *ops __unused) 3647 { 3648 3649 return (do_wake_umutex(td, uap->obj)); 3650 } 3651 3652 static int 3653 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 3654 const struct umtx_copyops *ops __unused) 3655 { 3656 3657 return (do_unlock_umutex(td, uap->obj, false)); 3658 } 3659 3660 static int 3661 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 3662 const struct umtx_copyops *ops __unused) 3663 { 3664 3665 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3666 } 3667 3668 static int 3669 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 3670 const struct umtx_copyops *ops) 3671 { 3672 struct timespec *ts, timeout; 3673 int error; 3674 3675 /* Allow a null timespec (wait forever). */ 3676 if (uap->uaddr2 == NULL) 3677 ts = NULL; 3678 else { 3679 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3680 if (error != 0) 3681 return (error); 3682 ts = &timeout; 3683 } 3684 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3685 } 3686 3687 static int 3688 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 3689 const struct umtx_copyops *ops __unused) 3690 { 3691 3692 return (do_cv_signal(td, uap->obj)); 3693 } 3694 3695 static int 3696 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 3697 const struct umtx_copyops *ops __unused) 3698 { 3699 3700 return (do_cv_broadcast(td, uap->obj)); 3701 } 3702 3703 static int 3704 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 3705 const struct umtx_copyops *ops) 3706 { 3707 struct _umtx_time timeout; 3708 int error; 3709 3710 /* Allow a null timespec (wait forever). */ 3711 if (uap->uaddr2 == NULL) { 3712 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3713 } else { 3714 error = ops->copyin_umtx_time(uap->uaddr2, 3715 (size_t)uap->uaddr1, &timeout); 3716 if (error != 0) 3717 return (error); 3718 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3719 } 3720 return (error); 3721 } 3722 3723 static int 3724 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 3725 const struct umtx_copyops *ops) 3726 { 3727 struct _umtx_time timeout; 3728 int error; 3729 3730 /* Allow a null timespec (wait forever). */ 3731 if (uap->uaddr2 == NULL) { 3732 error = do_rw_wrlock(td, uap->obj, 0); 3733 } else { 3734 error = ops->copyin_umtx_time(uap->uaddr2, 3735 (size_t)uap->uaddr1, &timeout); 3736 if (error != 0) 3737 return (error); 3738 3739 error = do_rw_wrlock(td, uap->obj, &timeout); 3740 } 3741 return (error); 3742 } 3743 3744 static int 3745 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 3746 const struct umtx_copyops *ops __unused) 3747 { 3748 3749 return (do_rw_unlock(td, uap->obj)); 3750 } 3751 3752 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3753 static int 3754 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 3755 const struct umtx_copyops *ops) 3756 { 3757 struct _umtx_time *tm_p, timeout; 3758 int error; 3759 3760 /* Allow a null timespec (wait forever). */ 3761 if (uap->uaddr2 == NULL) 3762 tm_p = NULL; 3763 else { 3764 error = ops->copyin_umtx_time( 3765 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3766 if (error != 0) 3767 return (error); 3768 tm_p = &timeout; 3769 } 3770 return (do_sem_wait(td, uap->obj, tm_p)); 3771 } 3772 3773 static int 3774 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 3775 const struct umtx_copyops *ops __unused) 3776 { 3777 3778 return (do_sem_wake(td, uap->obj)); 3779 } 3780 #endif 3781 3782 static int 3783 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 3784 const struct umtx_copyops *ops __unused) 3785 { 3786 3787 return (do_wake2_umutex(td, uap->obj, uap->val)); 3788 } 3789 3790 static int 3791 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 3792 const struct umtx_copyops *ops) 3793 { 3794 struct _umtx_time *tm_p, timeout; 3795 size_t uasize; 3796 int error; 3797 3798 /* Allow a null timespec (wait forever). */ 3799 if (uap->uaddr2 == NULL) { 3800 uasize = 0; 3801 tm_p = NULL; 3802 } else { 3803 uasize = (size_t)uap->uaddr1; 3804 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3805 if (error != 0) 3806 return (error); 3807 tm_p = &timeout; 3808 } 3809 error = do_sem2_wait(td, uap->obj, tm_p); 3810 if (error == EINTR && uap->uaddr2 != NULL && 3811 (timeout._flags & UMTX_ABSTIME) == 0 && 3812 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 3813 error = ops->copyout_timeout( 3814 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 3815 uasize - ops->umtx_time_sz, &timeout._timeout); 3816 if (error == 0) { 3817 error = EINTR; 3818 } 3819 } 3820 3821 return (error); 3822 } 3823 3824 static int 3825 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 3826 const struct umtx_copyops *ops __unused) 3827 { 3828 3829 return (do_sem2_wake(td, uap->obj)); 3830 } 3831 3832 #define USHM_OBJ_UMTX(o) \ 3833 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3834 3835 #define USHMF_REG_LINKED 0x0001 3836 #define USHMF_OBJ_LINKED 0x0002 3837 struct umtx_shm_reg { 3838 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3839 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3840 struct umtx_key ushm_key; 3841 struct ucred *ushm_cred; 3842 struct shmfd *ushm_obj; 3843 u_int ushm_refcnt; 3844 u_int ushm_flags; 3845 }; 3846 3847 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3848 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3849 3850 static uma_zone_t umtx_shm_reg_zone; 3851 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3852 static struct mtx umtx_shm_lock; 3853 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3854 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3855 3856 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3857 3858 static void 3859 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3860 { 3861 struct umtx_shm_reg_head d; 3862 struct umtx_shm_reg *reg, *reg1; 3863 3864 TAILQ_INIT(&d); 3865 mtx_lock(&umtx_shm_lock); 3866 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3867 mtx_unlock(&umtx_shm_lock); 3868 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3869 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3870 umtx_shm_free_reg(reg); 3871 } 3872 } 3873 3874 static struct task umtx_shm_reg_delfree_task = 3875 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3876 3877 static struct umtx_shm_reg * 3878 umtx_shm_find_reg_locked(const struct umtx_key *key) 3879 { 3880 struct umtx_shm_reg *reg; 3881 struct umtx_shm_reg_head *reg_head; 3882 3883 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3884 mtx_assert(&umtx_shm_lock, MA_OWNED); 3885 reg_head = &umtx_shm_registry[key->hash]; 3886 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3887 KASSERT(reg->ushm_key.shared, 3888 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3889 if (reg->ushm_key.info.shared.object == 3890 key->info.shared.object && 3891 reg->ushm_key.info.shared.offset == 3892 key->info.shared.offset) { 3893 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3894 KASSERT(reg->ushm_refcnt > 0, 3895 ("reg %p refcnt 0 onlist", reg)); 3896 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3897 ("reg %p not linked", reg)); 3898 reg->ushm_refcnt++; 3899 return (reg); 3900 } 3901 } 3902 return (NULL); 3903 } 3904 3905 static struct umtx_shm_reg * 3906 umtx_shm_find_reg(const struct umtx_key *key) 3907 { 3908 struct umtx_shm_reg *reg; 3909 3910 mtx_lock(&umtx_shm_lock); 3911 reg = umtx_shm_find_reg_locked(key); 3912 mtx_unlock(&umtx_shm_lock); 3913 return (reg); 3914 } 3915 3916 static void 3917 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3918 { 3919 3920 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3921 crfree(reg->ushm_cred); 3922 shm_drop(reg->ushm_obj); 3923 uma_zfree(umtx_shm_reg_zone, reg); 3924 } 3925 3926 static bool 3927 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3928 { 3929 bool res; 3930 3931 mtx_assert(&umtx_shm_lock, MA_OWNED); 3932 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3933 reg->ushm_refcnt--; 3934 res = reg->ushm_refcnt == 0; 3935 if (res || force) { 3936 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3937 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3938 reg, ushm_reg_link); 3939 reg->ushm_flags &= ~USHMF_REG_LINKED; 3940 } 3941 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3942 LIST_REMOVE(reg, ushm_obj_link); 3943 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3944 } 3945 } 3946 return (res); 3947 } 3948 3949 static void 3950 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3951 { 3952 vm_object_t object; 3953 bool dofree; 3954 3955 if (force) { 3956 object = reg->ushm_obj->shm_object; 3957 VM_OBJECT_WLOCK(object); 3958 object->flags |= OBJ_UMTXDEAD; 3959 VM_OBJECT_WUNLOCK(object); 3960 } 3961 mtx_lock(&umtx_shm_lock); 3962 dofree = umtx_shm_unref_reg_locked(reg, force); 3963 mtx_unlock(&umtx_shm_lock); 3964 if (dofree) 3965 umtx_shm_free_reg(reg); 3966 } 3967 3968 void 3969 umtx_shm_object_init(vm_object_t object) 3970 { 3971 3972 LIST_INIT(USHM_OBJ_UMTX(object)); 3973 } 3974 3975 void 3976 umtx_shm_object_terminated(vm_object_t object) 3977 { 3978 struct umtx_shm_reg *reg, *reg1; 3979 bool dofree; 3980 3981 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3982 return; 3983 3984 dofree = false; 3985 mtx_lock(&umtx_shm_lock); 3986 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3987 if (umtx_shm_unref_reg_locked(reg, true)) { 3988 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3989 ushm_reg_link); 3990 dofree = true; 3991 } 3992 } 3993 mtx_unlock(&umtx_shm_lock); 3994 if (dofree) 3995 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3996 } 3997 3998 static int 3999 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 4000 struct umtx_shm_reg **res) 4001 { 4002 struct umtx_shm_reg *reg, *reg1; 4003 struct ucred *cred; 4004 int error; 4005 4006 reg = umtx_shm_find_reg(key); 4007 if (reg != NULL) { 4008 *res = reg; 4009 return (0); 4010 } 4011 cred = td->td_ucred; 4012 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4013 return (ENOMEM); 4014 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4015 reg->ushm_refcnt = 1; 4016 bcopy(key, ®->ushm_key, sizeof(*key)); 4017 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4018 reg->ushm_cred = crhold(cred); 4019 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4020 if (error != 0) { 4021 umtx_shm_free_reg(reg); 4022 return (error); 4023 } 4024 mtx_lock(&umtx_shm_lock); 4025 reg1 = umtx_shm_find_reg_locked(key); 4026 if (reg1 != NULL) { 4027 mtx_unlock(&umtx_shm_lock); 4028 umtx_shm_free_reg(reg); 4029 *res = reg1; 4030 return (0); 4031 } 4032 reg->ushm_refcnt++; 4033 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4034 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4035 ushm_obj_link); 4036 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4037 mtx_unlock(&umtx_shm_lock); 4038 *res = reg; 4039 return (0); 4040 } 4041 4042 static int 4043 umtx_shm_alive(struct thread *td, void *addr) 4044 { 4045 vm_map_t map; 4046 vm_map_entry_t entry; 4047 vm_object_t object; 4048 vm_pindex_t pindex; 4049 vm_prot_t prot; 4050 int res, ret; 4051 boolean_t wired; 4052 4053 map = &td->td_proc->p_vmspace->vm_map; 4054 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4055 &object, &pindex, &prot, &wired); 4056 if (res != KERN_SUCCESS) 4057 return (EFAULT); 4058 if (object == NULL) 4059 ret = EINVAL; 4060 else 4061 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4062 vm_map_lookup_done(map, entry); 4063 return (ret); 4064 } 4065 4066 static void 4067 umtx_shm_init(void) 4068 { 4069 int i; 4070 4071 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4072 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4073 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4074 for (i = 0; i < nitems(umtx_shm_registry); i++) 4075 TAILQ_INIT(&umtx_shm_registry[i]); 4076 } 4077 4078 static int 4079 umtx_shm(struct thread *td, void *addr, u_int flags) 4080 { 4081 struct umtx_key key; 4082 struct umtx_shm_reg *reg; 4083 struct file *fp; 4084 int error, fd; 4085 4086 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4087 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4088 return (EINVAL); 4089 if ((flags & UMTX_SHM_ALIVE) != 0) 4090 return (umtx_shm_alive(td, addr)); 4091 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4092 if (error != 0) 4093 return (error); 4094 KASSERT(key.shared == 1, ("non-shared key")); 4095 if ((flags & UMTX_SHM_CREAT) != 0) { 4096 error = umtx_shm_create_reg(td, &key, ®); 4097 } else { 4098 reg = umtx_shm_find_reg(&key); 4099 if (reg == NULL) 4100 error = ESRCH; 4101 } 4102 umtx_key_release(&key); 4103 if (error != 0) 4104 return (error); 4105 KASSERT(reg != NULL, ("no reg")); 4106 if ((flags & UMTX_SHM_DESTROY) != 0) { 4107 umtx_shm_unref_reg(reg, true); 4108 } else { 4109 #if 0 4110 #ifdef MAC 4111 error = mac_posixshm_check_open(td->td_ucred, 4112 reg->ushm_obj, FFLAGS(O_RDWR)); 4113 if (error == 0) 4114 #endif 4115 error = shm_access(reg->ushm_obj, td->td_ucred, 4116 FFLAGS(O_RDWR)); 4117 if (error == 0) 4118 #endif 4119 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4120 if (error == 0) { 4121 shm_hold(reg->ushm_obj); 4122 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4123 &shm_ops); 4124 td->td_retval[0] = fd; 4125 fdrop(fp, td); 4126 } 4127 } 4128 umtx_shm_unref_reg(reg, false); 4129 return (error); 4130 } 4131 4132 static int 4133 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4134 const struct umtx_copyops *ops __unused) 4135 { 4136 4137 return (umtx_shm(td, uap->uaddr1, uap->val)); 4138 } 4139 4140 static int 4141 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4142 const struct umtx_copyops *ops) 4143 { 4144 struct umtx_robust_lists_params rb; 4145 int error; 4146 4147 if (ops->compat32) { 4148 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 && 4149 (td->td_rb_list != 0 || td->td_rbp_list != 0 || 4150 td->td_rb_inact != 0)) 4151 return (EBUSY); 4152 } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) { 4153 return (EBUSY); 4154 } 4155 4156 bzero(&rb, sizeof(rb)); 4157 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4158 if (error != 0) 4159 return (error); 4160 4161 if (ops->compat32) 4162 td->td_pflags2 |= TDP2_COMPAT32RB; 4163 4164 td->td_rb_list = rb.robust_list_offset; 4165 td->td_rbp_list = rb.robust_priv_list_offset; 4166 td->td_rb_inact = rb.robust_inact_offset; 4167 return (0); 4168 } 4169 4170 #if defined(__i386__) || defined(__amd64__) 4171 /* 4172 * Provide the standard 32-bit definitions for x86, since native/compat32 use a 4173 * 32-bit time_t there. Other architectures just need the i386 definitions 4174 * along with their standard compat32. 4175 */ 4176 struct timespecx32 { 4177 int64_t tv_sec; 4178 int32_t tv_nsec; 4179 }; 4180 4181 struct umtx_timex32 { 4182 struct timespecx32 _timeout; 4183 uint32_t _flags; 4184 uint32_t _clockid; 4185 }; 4186 4187 #ifndef __i386__ 4188 #define timespeci386 timespec32 4189 #define umtx_timei386 umtx_time32 4190 #endif 4191 #else /* !__i386__ && !__amd64__ */ 4192 /* 32-bit architectures can emulate i386, so define these almost everywhere. */ 4193 struct timespeci386 { 4194 int32_t tv_sec; 4195 int32_t tv_nsec; 4196 }; 4197 4198 struct umtx_timei386 { 4199 struct timespeci386 _timeout; 4200 uint32_t _flags; 4201 uint32_t _clockid; 4202 }; 4203 4204 #if defined(__LP64__) 4205 #define timespecx32 timespec32 4206 #define umtx_timex32 umtx_time32 4207 #endif 4208 #endif 4209 4210 static int 4211 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4212 struct umtx_robust_lists_params *rbp) 4213 { 4214 struct umtx_robust_lists_params_compat32 rb32; 4215 int error; 4216 4217 if (size > sizeof(rb32)) 4218 return (EINVAL); 4219 bzero(&rb32, sizeof(rb32)); 4220 error = copyin(uaddr, &rb32, size); 4221 if (error != 0) 4222 return (error); 4223 CP(rb32, *rbp, robust_list_offset); 4224 CP(rb32, *rbp, robust_priv_list_offset); 4225 CP(rb32, *rbp, robust_inact_offset); 4226 return (0); 4227 } 4228 4229 #ifndef __i386__ 4230 static inline int 4231 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp) 4232 { 4233 struct timespeci386 ts32; 4234 int error; 4235 4236 error = copyin(uaddr, &ts32, sizeof(ts32)); 4237 if (error == 0) { 4238 if (ts32.tv_sec < 0 || 4239 ts32.tv_nsec >= 1000000000 || 4240 ts32.tv_nsec < 0) 4241 error = EINVAL; 4242 else { 4243 CP(ts32, *tsp, tv_sec); 4244 CP(ts32, *tsp, tv_nsec); 4245 } 4246 } 4247 return (error); 4248 } 4249 4250 static inline int 4251 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp) 4252 { 4253 struct umtx_timei386 t32; 4254 int error; 4255 4256 t32._clockid = CLOCK_REALTIME; 4257 t32._flags = 0; 4258 if (size <= sizeof(t32._timeout)) 4259 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4260 else 4261 error = copyin(uaddr, &t32, sizeof(t32)); 4262 if (error != 0) 4263 return (error); 4264 if (t32._timeout.tv_sec < 0 || 4265 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4266 return (EINVAL); 4267 TS_CP(t32, *tp, _timeout); 4268 CP(t32, *tp, _flags); 4269 CP(t32, *tp, _clockid); 4270 return (0); 4271 } 4272 4273 static int 4274 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp) 4275 { 4276 struct timespeci386 remain32 = { 4277 .tv_sec = tsp->tv_sec, 4278 .tv_nsec = tsp->tv_nsec, 4279 }; 4280 4281 /* 4282 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4283 * and we're only called if sz >= sizeof(timespec) as supplied in the 4284 * copyops. 4285 */ 4286 KASSERT(sz >= sizeof(remain32), 4287 ("umtx_copyops specifies incorrect sizes")); 4288 4289 return (copyout(&remain32, uaddr, sizeof(remain32))); 4290 } 4291 #endif /* !__i386__ */ 4292 4293 #if defined(__i386__) || defined(__LP64__) 4294 static inline int 4295 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp) 4296 { 4297 struct timespecx32 ts32; 4298 int error; 4299 4300 error = copyin(uaddr, &ts32, sizeof(ts32)); 4301 if (error == 0) { 4302 if (ts32.tv_sec < 0 || 4303 ts32.tv_nsec >= 1000000000 || 4304 ts32.tv_nsec < 0) 4305 error = EINVAL; 4306 else { 4307 CP(ts32, *tsp, tv_sec); 4308 CP(ts32, *tsp, tv_nsec); 4309 } 4310 } 4311 return (error); 4312 } 4313 4314 static inline int 4315 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp) 4316 { 4317 struct umtx_timex32 t32; 4318 int error; 4319 4320 t32._clockid = CLOCK_REALTIME; 4321 t32._flags = 0; 4322 if (size <= sizeof(t32._timeout)) 4323 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4324 else 4325 error = copyin(uaddr, &t32, sizeof(t32)); 4326 if (error != 0) 4327 return (error); 4328 if (t32._timeout.tv_sec < 0 || 4329 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4330 return (EINVAL); 4331 TS_CP(t32, *tp, _timeout); 4332 CP(t32, *tp, _flags); 4333 CP(t32, *tp, _clockid); 4334 return (0); 4335 } 4336 4337 static int 4338 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp) 4339 { 4340 struct timespecx32 remain32 = { 4341 .tv_sec = tsp->tv_sec, 4342 .tv_nsec = tsp->tv_nsec, 4343 }; 4344 4345 /* 4346 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4347 * and we're only called if sz >= sizeof(timespec) as supplied in the 4348 * copyops. 4349 */ 4350 KASSERT(sz >= sizeof(remain32), 4351 ("umtx_copyops specifies incorrect sizes")); 4352 4353 return (copyout(&remain32, uaddr, sizeof(remain32))); 4354 } 4355 #endif /* __i386__ || __LP64__ */ 4356 4357 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4358 const struct umtx_copyops *umtx_ops); 4359 4360 static const _umtx_op_func op_table[] = { 4361 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4362 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4363 [UMTX_OP_WAIT] = __umtx_op_wait, 4364 [UMTX_OP_WAKE] = __umtx_op_wake, 4365 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4366 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4367 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4368 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4369 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4370 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4371 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4372 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4373 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4374 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4375 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4376 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4377 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4378 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4379 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4380 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4381 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4382 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4383 #else 4384 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4385 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4386 #endif 4387 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4388 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4389 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4390 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4391 [UMTX_OP_SHM] = __umtx_op_shm, 4392 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4393 }; 4394 4395 static const struct umtx_copyops umtx_native_ops = { 4396 .copyin_timeout = umtx_copyin_timeout, 4397 .copyin_umtx_time = umtx_copyin_umtx_time, 4398 .copyin_robust_lists = umtx_copyin_robust_lists, 4399 .copyout_timeout = umtx_copyout_timeout, 4400 .timespec_sz = sizeof(struct timespec), 4401 .umtx_time_sz = sizeof(struct _umtx_time), 4402 }; 4403 4404 #ifndef __i386__ 4405 static const struct umtx_copyops umtx_native_opsi386 = { 4406 .copyin_timeout = umtx_copyin_timeouti386, 4407 .copyin_umtx_time = umtx_copyin_umtx_timei386, 4408 .copyin_robust_lists = umtx_copyin_robust_lists32, 4409 .copyout_timeout = umtx_copyout_timeouti386, 4410 .timespec_sz = sizeof(struct timespeci386), 4411 .umtx_time_sz = sizeof(struct umtx_timei386), 4412 .compat32 = true, 4413 }; 4414 #endif 4415 4416 #if defined(__i386__) || defined(__LP64__) 4417 /* i386 can emulate other 32-bit archs, too! */ 4418 static const struct umtx_copyops umtx_native_opsx32 = { 4419 .copyin_timeout = umtx_copyin_timeoutx32, 4420 .copyin_umtx_time = umtx_copyin_umtx_timex32, 4421 .copyin_robust_lists = umtx_copyin_robust_lists32, 4422 .copyout_timeout = umtx_copyout_timeoutx32, 4423 .timespec_sz = sizeof(struct timespecx32), 4424 .umtx_time_sz = sizeof(struct umtx_timex32), 4425 .compat32 = true, 4426 }; 4427 4428 #ifdef COMPAT_FREEBSD32 4429 #ifdef __amd64__ 4430 #define umtx_native_ops32 umtx_native_opsi386 4431 #else 4432 #define umtx_native_ops32 umtx_native_opsx32 4433 #endif 4434 #endif /* COMPAT_FREEBSD32 */ 4435 #endif /* __i386__ || __LP64__ */ 4436 4437 #define UMTX_OP__FLAGS (UMTX_OP__32BIT | UMTX_OP__I386) 4438 4439 static int 4440 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4441 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4442 { 4443 struct _umtx_op_args uap = { 4444 .obj = obj, 4445 .op = op & ~UMTX_OP__FLAGS, 4446 .val = val, 4447 .uaddr1 = uaddr1, 4448 .uaddr2 = uaddr2 4449 }; 4450 4451 if ((uap.op >= nitems(op_table))) 4452 return (EINVAL); 4453 return ((*op_table[uap.op])(td, &uap, ops)); 4454 } 4455 4456 int 4457 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4458 { 4459 static const struct umtx_copyops *umtx_ops; 4460 4461 umtx_ops = &umtx_native_ops; 4462 #ifdef __LP64__ 4463 if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) { 4464 if ((uap->op & UMTX_OP__I386) != 0) 4465 umtx_ops = &umtx_native_opsi386; 4466 else 4467 umtx_ops = &umtx_native_opsx32; 4468 } 4469 #elif !defined(__i386__) 4470 /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */ 4471 if ((uap->op & UMTX_OP__I386) != 0) 4472 umtx_ops = &umtx_native_opsi386; 4473 #else 4474 /* Likewise, UMTX_OP__I386 is a nop on i386. */ 4475 if ((uap->op & UMTX_OP__32BIT) != 0) 4476 umtx_ops = &umtx_native_opsx32; 4477 #endif 4478 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4479 uap->uaddr2, umtx_ops)); 4480 } 4481 4482 #ifdef COMPAT_FREEBSD32 4483 int 4484 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4485 { 4486 4487 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, 4488 uap->uaddr2, &umtx_native_ops32)); 4489 } 4490 #endif 4491 4492 void 4493 umtx_thread_init(struct thread *td) 4494 { 4495 4496 td->td_umtxq = umtxq_alloc(); 4497 td->td_umtxq->uq_thread = td; 4498 } 4499 4500 void 4501 umtx_thread_fini(struct thread *td) 4502 { 4503 4504 umtxq_free(td->td_umtxq); 4505 } 4506 4507 /* 4508 * It will be called when new thread is created, e.g fork(). 4509 */ 4510 void 4511 umtx_thread_alloc(struct thread *td) 4512 { 4513 struct umtx_q *uq; 4514 4515 uq = td->td_umtxq; 4516 uq->uq_inherited_pri = PRI_MAX; 4517 4518 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4519 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4520 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4521 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4522 } 4523 4524 /* 4525 * exec() hook. 4526 * 4527 * Clear robust lists for all process' threads, not delaying the 4528 * cleanup to thread exit, since the relevant address space is 4529 * destroyed right now. 4530 */ 4531 void 4532 umtx_exec(struct proc *p) 4533 { 4534 struct thread *td; 4535 4536 KASSERT(p == curproc, ("need curproc")); 4537 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4538 (p->p_flag & P_STOPPED_SINGLE) != 0, 4539 ("curproc must be single-threaded")); 4540 /* 4541 * There is no need to lock the list as only this thread can be 4542 * running. 4543 */ 4544 FOREACH_THREAD_IN_PROC(p, td) { 4545 KASSERT(td == curthread || 4546 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4547 ("running thread %p %p", p, td)); 4548 umtx_thread_cleanup(td); 4549 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4550 } 4551 } 4552 4553 /* 4554 * thread exit hook. 4555 */ 4556 void 4557 umtx_thread_exit(struct thread *td) 4558 { 4559 4560 umtx_thread_cleanup(td); 4561 } 4562 4563 static int 4564 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4565 { 4566 u_long res1; 4567 uint32_t res32; 4568 int error; 4569 4570 if (compat32) { 4571 error = fueword32((void *)ptr, &res32); 4572 if (error == 0) 4573 res1 = res32; 4574 } else { 4575 error = fueword((void *)ptr, &res1); 4576 } 4577 if (error == 0) 4578 *res = res1; 4579 else 4580 error = EFAULT; 4581 return (error); 4582 } 4583 4584 static void 4585 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4586 bool compat32) 4587 { 4588 struct umutex32 m32; 4589 4590 if (compat32) { 4591 memcpy(&m32, m, sizeof(m32)); 4592 *rb_list = m32.m_rb_lnk; 4593 } else { 4594 *rb_list = m->m_rb_lnk; 4595 } 4596 } 4597 4598 static int 4599 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4600 bool compat32) 4601 { 4602 struct umutex m; 4603 int error; 4604 4605 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4606 error = copyin((void *)rbp, &m, sizeof(m)); 4607 if (error != 0) 4608 return (error); 4609 if (rb_list != NULL) 4610 umtx_read_rb_list(td, &m, rb_list, compat32); 4611 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4612 return (EINVAL); 4613 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4614 /* inact is cleared after unlock, allow the inconsistency */ 4615 return (inact ? 0 : EINVAL); 4616 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4617 } 4618 4619 static void 4620 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4621 const char *name, bool compat32) 4622 { 4623 int error, i; 4624 uintptr_t rbp; 4625 bool inact; 4626 4627 if (rb_list == 0) 4628 return; 4629 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 4630 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4631 if (rbp == *rb_inact) { 4632 inact = true; 4633 *rb_inact = 0; 4634 } else 4635 inact = false; 4636 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 4637 } 4638 if (i == umtx_max_rb && umtx_verbose_rb) { 4639 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4640 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4641 } 4642 if (error != 0 && umtx_verbose_rb) { 4643 uprintf("comm %s pid %d: handling %srb error %d\n", 4644 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4645 } 4646 } 4647 4648 /* 4649 * Clean up umtx data. 4650 */ 4651 static void 4652 umtx_thread_cleanup(struct thread *td) 4653 { 4654 struct umtx_q *uq; 4655 struct umtx_pi *pi; 4656 uintptr_t rb_inact; 4657 bool compat32; 4658 4659 /* 4660 * Disown pi mutexes. 4661 */ 4662 uq = td->td_umtxq; 4663 if (uq != NULL) { 4664 if (uq->uq_inherited_pri != PRI_MAX || 4665 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4666 mtx_lock(&umtx_lock); 4667 uq->uq_inherited_pri = PRI_MAX; 4668 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4669 pi->pi_owner = NULL; 4670 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4671 } 4672 mtx_unlock(&umtx_lock); 4673 } 4674 sched_lend_user_prio_cond(td, PRI_MAX); 4675 } 4676 4677 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 4678 td->td_pflags2 &= ~TDP2_COMPAT32RB; 4679 4680 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4681 return; 4682 4683 /* 4684 * Handle terminated robust mutexes. Must be done after 4685 * robust pi disown, otherwise unlock could see unowned 4686 * entries. 4687 */ 4688 rb_inact = td->td_rb_inact; 4689 if (rb_inact != 0) 4690 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 4691 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 4692 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 4693 if (rb_inact != 0) 4694 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 4695 } 4696