1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 "Maximum number of robust mutexes allowed for each thread"); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 251 "umtx debug"); 252 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 253 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 254 static int umtx_verbose_rb = 1; 255 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 256 &umtx_verbose_rb, 0, 257 ""); 258 259 #ifdef UMTX_PROFILING 260 static long max_length; 261 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 262 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 263 "umtx chain stats"); 264 #endif 265 266 static void abs_timeout_update(struct abs_timeout *timo); 267 268 static void umtx_shm_init(void); 269 static void umtxq_sysinit(void *); 270 static void umtxq_hash(struct umtx_key *key); 271 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 272 static void umtxq_lock(struct umtx_key *key); 273 static void umtxq_unlock(struct umtx_key *key); 274 static void umtxq_busy(struct umtx_key *key); 275 static void umtxq_unbusy(struct umtx_key *key); 276 static void umtxq_insert_queue(struct umtx_q *uq, int q); 277 static void umtxq_remove_queue(struct umtx_q *uq, int q); 278 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 279 static int umtxq_count(struct umtx_key *key); 280 static struct umtx_pi *umtx_pi_alloc(int); 281 static void umtx_pi_free(struct umtx_pi *pi); 282 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 283 bool rb); 284 static void umtx_thread_cleanup(struct thread *td); 285 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 286 struct image_params *imgp __unused); 287 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 288 289 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 290 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 291 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 292 293 static struct mtx umtx_lock; 294 295 #ifdef UMTX_PROFILING 296 static void 297 umtx_init_profiling(void) 298 { 299 struct sysctl_oid *chain_oid; 300 char chain_name[10]; 301 int i; 302 303 for (i = 0; i < UMTX_CHAINS; ++i) { 304 snprintf(chain_name, sizeof(chain_name), "%d", i); 305 chain_oid = SYSCTL_ADD_NODE(NULL, 306 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 307 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 308 "umtx hash stats"); 309 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 310 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 311 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 312 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 313 } 314 } 315 316 static int 317 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 318 { 319 char buf[512]; 320 struct sbuf sb; 321 struct umtxq_chain *uc; 322 u_int fract, i, j, tot, whole; 323 u_int sf0, sf1, sf2, sf3, sf4; 324 u_int si0, si1, si2, si3, si4; 325 u_int sw0, sw1, sw2, sw3, sw4; 326 327 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 328 for (i = 0; i < 2; i++) { 329 tot = 0; 330 for (j = 0; j < UMTX_CHAINS; ++j) { 331 uc = &umtxq_chains[i][j]; 332 mtx_lock(&uc->uc_lock); 333 tot += uc->max_length; 334 mtx_unlock(&uc->uc_lock); 335 } 336 if (tot == 0) 337 sbuf_printf(&sb, "%u) Empty ", i); 338 else { 339 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 340 si0 = si1 = si2 = si3 = si4 = 0; 341 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 342 for (j = 0; j < UMTX_CHAINS; j++) { 343 uc = &umtxq_chains[i][j]; 344 mtx_lock(&uc->uc_lock); 345 whole = uc->max_length * 100; 346 mtx_unlock(&uc->uc_lock); 347 fract = (whole % tot) * 100; 348 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 349 sf0 = fract; 350 si0 = j; 351 sw0 = whole; 352 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 353 sf1)) { 354 sf1 = fract; 355 si1 = j; 356 sw1 = whole; 357 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 358 sf2)) { 359 sf2 = fract; 360 si2 = j; 361 sw2 = whole; 362 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 363 sf3)) { 364 sf3 = fract; 365 si3 = j; 366 sw3 = whole; 367 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 368 sf4)) { 369 sf4 = fract; 370 si4 = j; 371 sw4 = whole; 372 } 373 } 374 sbuf_printf(&sb, "queue %u:\n", i); 375 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 376 sf0 / tot, si0); 377 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 378 sf1 / tot, si1); 379 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 380 sf2 / tot, si2); 381 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 382 sf3 / tot, si3); 383 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 384 sf4 / tot, si4); 385 } 386 } 387 sbuf_trim(&sb); 388 sbuf_finish(&sb); 389 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 390 sbuf_delete(&sb); 391 return (0); 392 } 393 394 static int 395 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 396 { 397 struct umtxq_chain *uc; 398 u_int i, j; 399 int clear, error; 400 401 clear = 0; 402 error = sysctl_handle_int(oidp, &clear, 0, req); 403 if (error != 0 || req->newptr == NULL) 404 return (error); 405 406 if (clear != 0) { 407 for (i = 0; i < 2; ++i) { 408 for (j = 0; j < UMTX_CHAINS; ++j) { 409 uc = &umtxq_chains[i][j]; 410 mtx_lock(&uc->uc_lock); 411 uc->length = 0; 412 uc->max_length = 0; 413 mtx_unlock(&uc->uc_lock); 414 } 415 } 416 } 417 return (0); 418 } 419 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 421 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_clear, "I", 423 "Clear umtx chains statistics"); 424 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 425 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 426 sysctl_debug_umtx_chains_peaks, "A", 427 "Highest peaks in chains max length"); 428 #endif 429 430 static void 431 umtxq_sysinit(void *arg __unused) 432 { 433 int i, j; 434 435 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 436 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 437 for (i = 0; i < 2; ++i) { 438 for (j = 0; j < UMTX_CHAINS; ++j) { 439 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 440 MTX_DEF | MTX_DUPOK); 441 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 442 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 443 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 444 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 445 umtxq_chains[i][j].uc_busy = 0; 446 umtxq_chains[i][j].uc_waiters = 0; 447 #ifdef UMTX_PROFILING 448 umtxq_chains[i][j].length = 0; 449 umtxq_chains[i][j].max_length = 0; 450 #endif 451 } 452 } 453 #ifdef UMTX_PROFILING 454 umtx_init_profiling(); 455 #endif 456 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 457 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 458 EVENTHANDLER_PRI_ANY); 459 umtx_shm_init(); 460 } 461 462 struct umtx_q * 463 umtxq_alloc(void) 464 { 465 struct umtx_q *uq; 466 467 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 468 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 469 M_WAITOK | M_ZERO); 470 TAILQ_INIT(&uq->uq_spare_queue->head); 471 TAILQ_INIT(&uq->uq_pi_contested); 472 uq->uq_inherited_pri = PRI_MAX; 473 return (uq); 474 } 475 476 void 477 umtxq_free(struct umtx_q *uq) 478 { 479 480 MPASS(uq->uq_spare_queue != NULL); 481 free(uq->uq_spare_queue, M_UMTX); 482 free(uq, M_UMTX); 483 } 484 485 static inline void 486 umtxq_hash(struct umtx_key *key) 487 { 488 unsigned n; 489 490 n = (uintptr_t)key->info.both.a + key->info.both.b; 491 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 492 } 493 494 static inline struct umtxq_chain * 495 umtxq_getchain(struct umtx_key *key) 496 { 497 498 if (key->type <= TYPE_SEM) 499 return (&umtxq_chains[1][key->hash]); 500 return (&umtxq_chains[0][key->hash]); 501 } 502 503 /* 504 * Lock a chain. 505 */ 506 static inline void 507 umtxq_lock(struct umtx_key *key) 508 { 509 struct umtxq_chain *uc; 510 511 uc = umtxq_getchain(key); 512 mtx_lock(&uc->uc_lock); 513 } 514 515 /* 516 * Unlock a chain. 517 */ 518 static inline void 519 umtxq_unlock(struct umtx_key *key) 520 { 521 struct umtxq_chain *uc; 522 523 uc = umtxq_getchain(key); 524 mtx_unlock(&uc->uc_lock); 525 } 526 527 /* 528 * Set chain to busy state when following operation 529 * may be blocked (kernel mutex can not be used). 530 */ 531 static inline void 532 umtxq_busy(struct umtx_key *key) 533 { 534 struct umtxq_chain *uc; 535 536 uc = umtxq_getchain(key); 537 mtx_assert(&uc->uc_lock, MA_OWNED); 538 if (uc->uc_busy) { 539 #ifdef SMP 540 if (smp_cpus > 1) { 541 int count = BUSY_SPINS; 542 if (count > 0) { 543 umtxq_unlock(key); 544 while (uc->uc_busy && --count > 0) 545 cpu_spinwait(); 546 umtxq_lock(key); 547 } 548 } 549 #endif 550 while (uc->uc_busy) { 551 uc->uc_waiters++; 552 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 553 uc->uc_waiters--; 554 } 555 } 556 uc->uc_busy = 1; 557 } 558 559 /* 560 * Unbusy a chain. 561 */ 562 static inline void 563 umtxq_unbusy(struct umtx_key *key) 564 { 565 struct umtxq_chain *uc; 566 567 uc = umtxq_getchain(key); 568 mtx_assert(&uc->uc_lock, MA_OWNED); 569 KASSERT(uc->uc_busy != 0, ("not busy")); 570 uc->uc_busy = 0; 571 if (uc->uc_waiters) 572 wakeup_one(uc); 573 } 574 575 static inline void 576 umtxq_unbusy_unlocked(struct umtx_key *key) 577 { 578 579 umtxq_lock(key); 580 umtxq_unbusy(key); 581 umtxq_unlock(key); 582 } 583 584 static struct umtxq_queue * 585 umtxq_queue_lookup(struct umtx_key *key, int q) 586 { 587 struct umtxq_queue *uh; 588 struct umtxq_chain *uc; 589 590 uc = umtxq_getchain(key); 591 UMTXQ_LOCKED_ASSERT(uc); 592 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 593 if (umtx_key_match(&uh->key, key)) 594 return (uh); 595 } 596 597 return (NULL); 598 } 599 600 static inline void 601 umtxq_insert_queue(struct umtx_q *uq, int q) 602 { 603 struct umtxq_queue *uh; 604 struct umtxq_chain *uc; 605 606 uc = umtxq_getchain(&uq->uq_key); 607 UMTXQ_LOCKED_ASSERT(uc); 608 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 609 uh = umtxq_queue_lookup(&uq->uq_key, q); 610 if (uh != NULL) { 611 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 612 } else { 613 uh = uq->uq_spare_queue; 614 uh->key = uq->uq_key; 615 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 616 #ifdef UMTX_PROFILING 617 uc->length++; 618 if (uc->length > uc->max_length) { 619 uc->max_length = uc->length; 620 if (uc->max_length > max_length) 621 max_length = uc->max_length; 622 } 623 #endif 624 } 625 uq->uq_spare_queue = NULL; 626 627 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 628 uh->length++; 629 uq->uq_flags |= UQF_UMTXQ; 630 uq->uq_cur_queue = uh; 631 return; 632 } 633 634 static inline void 635 umtxq_remove_queue(struct umtx_q *uq, int q) 636 { 637 struct umtxq_chain *uc; 638 struct umtxq_queue *uh; 639 640 uc = umtxq_getchain(&uq->uq_key); 641 UMTXQ_LOCKED_ASSERT(uc); 642 if (uq->uq_flags & UQF_UMTXQ) { 643 uh = uq->uq_cur_queue; 644 TAILQ_REMOVE(&uh->head, uq, uq_link); 645 uh->length--; 646 uq->uq_flags &= ~UQF_UMTXQ; 647 if (TAILQ_EMPTY(&uh->head)) { 648 KASSERT(uh->length == 0, 649 ("inconsistent umtxq_queue length")); 650 #ifdef UMTX_PROFILING 651 uc->length--; 652 #endif 653 LIST_REMOVE(uh, link); 654 } else { 655 uh = LIST_FIRST(&uc->uc_spare_queue); 656 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 657 LIST_REMOVE(uh, link); 658 } 659 uq->uq_spare_queue = uh; 660 uq->uq_cur_queue = NULL; 661 } 662 } 663 664 /* 665 * Check if there are multiple waiters 666 */ 667 static int 668 umtxq_count(struct umtx_key *key) 669 { 670 struct umtxq_queue *uh; 671 672 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 673 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 674 if (uh != NULL) 675 return (uh->length); 676 return (0); 677 } 678 679 /* 680 * Check if there are multiple PI waiters and returns first 681 * waiter. 682 */ 683 static int 684 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 685 { 686 struct umtxq_queue *uh; 687 688 *first = NULL; 689 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 690 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 691 if (uh != NULL) { 692 *first = TAILQ_FIRST(&uh->head); 693 return (uh->length); 694 } 695 return (0); 696 } 697 698 /* 699 * Wake up threads waiting on an userland object. 700 */ 701 702 static int 703 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 704 { 705 struct umtxq_queue *uh; 706 struct umtx_q *uq; 707 int ret; 708 709 ret = 0; 710 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 711 uh = umtxq_queue_lookup(key, q); 712 if (uh != NULL) { 713 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 714 umtxq_remove_queue(uq, q); 715 wakeup(uq); 716 if (++ret >= n_wake) 717 return (ret); 718 } 719 } 720 return (ret); 721 } 722 723 /* 724 * Wake up specified thread. 725 */ 726 static inline void 727 umtxq_signal_thread(struct umtx_q *uq) 728 { 729 730 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 731 umtxq_remove(uq); 732 wakeup(uq); 733 } 734 735 static inline int 736 tstohz(const struct timespec *tsp) 737 { 738 struct timeval tv; 739 740 TIMESPEC_TO_TIMEVAL(&tv, tsp); 741 return tvtohz(&tv); 742 } 743 744 static void 745 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 746 const struct timespec *timeout) 747 { 748 749 timo->clockid = clockid; 750 if (!absolute) { 751 timo->is_abs_real = false; 752 abs_timeout_update(timo); 753 timespecadd(&timo->cur, timeout, &timo->end); 754 } else { 755 timo->end = *timeout; 756 timo->is_abs_real = clockid == CLOCK_REALTIME || 757 clockid == CLOCK_REALTIME_FAST || 758 clockid == CLOCK_REALTIME_PRECISE; 759 /* 760 * If is_abs_real, umtxq_sleep will read the clock 761 * after setting td_rtcgen; otherwise, read it here. 762 */ 763 if (!timo->is_abs_real) { 764 abs_timeout_update(timo); 765 } 766 } 767 } 768 769 static void 770 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 771 { 772 773 abs_timeout_init(timo, umtxtime->_clockid, 774 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 775 } 776 777 static inline void 778 abs_timeout_update(struct abs_timeout *timo) 779 { 780 781 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 782 } 783 784 static int 785 abs_timeout_gethz(struct abs_timeout *timo) 786 { 787 struct timespec tts; 788 789 if (timespeccmp(&timo->end, &timo->cur, <=)) 790 return (-1); 791 timespecsub(&timo->end, &timo->cur, &tts); 792 return (tstohz(&tts)); 793 } 794 795 static uint32_t 796 umtx_unlock_val(uint32_t flags, bool rb) 797 { 798 799 if (rb) 800 return (UMUTEX_RB_OWNERDEAD); 801 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 802 return (UMUTEX_RB_NOTRECOV); 803 else 804 return (UMUTEX_UNOWNED); 805 806 } 807 808 /* 809 * Put thread into sleep state, before sleeping, check if 810 * thread was removed from umtx queue. 811 */ 812 static inline int 813 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 814 { 815 struct umtxq_chain *uc; 816 int error, timo; 817 818 if (abstime != NULL && abstime->is_abs_real) { 819 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 820 abs_timeout_update(abstime); 821 } 822 823 uc = umtxq_getchain(&uq->uq_key); 824 UMTXQ_LOCKED_ASSERT(uc); 825 for (;;) { 826 if (!(uq->uq_flags & UQF_UMTXQ)) { 827 error = 0; 828 break; 829 } 830 if (abstime != NULL) { 831 timo = abs_timeout_gethz(abstime); 832 if (timo < 0) { 833 error = ETIMEDOUT; 834 break; 835 } 836 } else 837 timo = 0; 838 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 839 if (error == EINTR || error == ERESTART) { 840 umtxq_lock(&uq->uq_key); 841 break; 842 } 843 if (abstime != NULL) { 844 if (abstime->is_abs_real) 845 curthread->td_rtcgen = 846 atomic_load_acq_int(&rtc_generation); 847 abs_timeout_update(abstime); 848 } 849 umtxq_lock(&uq->uq_key); 850 } 851 852 curthread->td_rtcgen = 0; 853 return (error); 854 } 855 856 /* 857 * Convert userspace address into unique logical address. 858 */ 859 int 860 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 861 { 862 struct thread *td = curthread; 863 vm_map_t map; 864 vm_map_entry_t entry; 865 vm_pindex_t pindex; 866 vm_prot_t prot; 867 boolean_t wired; 868 869 key->type = type; 870 if (share == THREAD_SHARE) { 871 key->shared = 0; 872 key->info.private.vs = td->td_proc->p_vmspace; 873 key->info.private.addr = (uintptr_t)addr; 874 } else { 875 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 876 map = &td->td_proc->p_vmspace->vm_map; 877 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 878 &entry, &key->info.shared.object, &pindex, &prot, 879 &wired) != KERN_SUCCESS) { 880 return (EFAULT); 881 } 882 883 if ((share == PROCESS_SHARE) || 884 (share == AUTO_SHARE && 885 VM_INHERIT_SHARE == entry->inheritance)) { 886 key->shared = 1; 887 key->info.shared.offset = (vm_offset_t)addr - 888 entry->start + entry->offset; 889 vm_object_reference(key->info.shared.object); 890 } else { 891 key->shared = 0; 892 key->info.private.vs = td->td_proc->p_vmspace; 893 key->info.private.addr = (uintptr_t)addr; 894 } 895 vm_map_lookup_done(map, entry); 896 } 897 898 umtxq_hash(key); 899 return (0); 900 } 901 902 /* 903 * Release key. 904 */ 905 void 906 umtx_key_release(struct umtx_key *key) 907 { 908 if (key->shared) 909 vm_object_deallocate(key->info.shared.object); 910 } 911 912 /* 913 * Fetch and compare value, sleep on the address if value is not changed. 914 */ 915 static int 916 do_wait(struct thread *td, void *addr, u_long id, 917 struct _umtx_time *timeout, int compat32, int is_private) 918 { 919 struct abs_timeout timo; 920 struct umtx_q *uq; 921 u_long tmp; 922 uint32_t tmp32; 923 int error = 0; 924 925 uq = td->td_umtxq; 926 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 927 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 928 return (error); 929 930 if (timeout != NULL) 931 abs_timeout_init2(&timo, timeout); 932 933 umtxq_lock(&uq->uq_key); 934 umtxq_insert(uq); 935 umtxq_unlock(&uq->uq_key); 936 if (compat32 == 0) { 937 error = fueword(addr, &tmp); 938 if (error != 0) 939 error = EFAULT; 940 } else { 941 error = fueword32(addr, &tmp32); 942 if (error == 0) 943 tmp = tmp32; 944 else 945 error = EFAULT; 946 } 947 umtxq_lock(&uq->uq_key); 948 if (error == 0) { 949 if (tmp == id) 950 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 951 NULL : &timo); 952 if ((uq->uq_flags & UQF_UMTXQ) == 0) 953 error = 0; 954 else 955 umtxq_remove(uq); 956 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 957 umtxq_remove(uq); 958 } 959 umtxq_unlock(&uq->uq_key); 960 umtx_key_release(&uq->uq_key); 961 if (error == ERESTART) 962 error = EINTR; 963 return (error); 964 } 965 966 /* 967 * Wake up threads sleeping on the specified address. 968 */ 969 int 970 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 971 { 972 struct umtx_key key; 973 int ret; 974 975 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 976 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 977 return (ret); 978 umtxq_lock(&key); 979 umtxq_signal(&key, n_wake); 980 umtxq_unlock(&key); 981 umtx_key_release(&key); 982 return (0); 983 } 984 985 /* 986 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 987 */ 988 static int 989 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 990 struct _umtx_time *timeout, int mode) 991 { 992 struct abs_timeout timo; 993 struct umtx_q *uq; 994 uint32_t owner, old, id; 995 int error, rv; 996 997 id = td->td_tid; 998 uq = td->td_umtxq; 999 error = 0; 1000 if (timeout != NULL) 1001 abs_timeout_init2(&timo, timeout); 1002 1003 /* 1004 * Care must be exercised when dealing with umtx structure. It 1005 * can fault on any access. 1006 */ 1007 for (;;) { 1008 rv = fueword32(&m->m_owner, &owner); 1009 if (rv == -1) 1010 return (EFAULT); 1011 if (mode == _UMUTEX_WAIT) { 1012 if (owner == UMUTEX_UNOWNED || 1013 owner == UMUTEX_CONTESTED || 1014 owner == UMUTEX_RB_OWNERDEAD || 1015 owner == UMUTEX_RB_NOTRECOV) 1016 return (0); 1017 } else { 1018 /* 1019 * Robust mutex terminated. Kernel duty is to 1020 * return EOWNERDEAD to the userspace. The 1021 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1022 * by the common userspace code. 1023 */ 1024 if (owner == UMUTEX_RB_OWNERDEAD) { 1025 rv = casueword32(&m->m_owner, 1026 UMUTEX_RB_OWNERDEAD, &owner, 1027 id | UMUTEX_CONTESTED); 1028 if (rv == -1) 1029 return (EFAULT); 1030 if (rv == 0) { 1031 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1032 return (EOWNERDEAD); /* success */ 1033 } 1034 MPASS(rv == 1); 1035 rv = thread_check_susp(td, false); 1036 if (rv != 0) 1037 return (rv); 1038 continue; 1039 } 1040 if (owner == UMUTEX_RB_NOTRECOV) 1041 return (ENOTRECOVERABLE); 1042 1043 /* 1044 * Try the uncontested case. This should be 1045 * done in userland. 1046 */ 1047 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1048 &owner, id); 1049 /* The address was invalid. */ 1050 if (rv == -1) 1051 return (EFAULT); 1052 1053 /* The acquire succeeded. */ 1054 if (rv == 0) { 1055 MPASS(owner == UMUTEX_UNOWNED); 1056 return (0); 1057 } 1058 1059 /* 1060 * If no one owns it but it is contested try 1061 * to acquire it. 1062 */ 1063 MPASS(rv == 1); 1064 if (owner == UMUTEX_CONTESTED) { 1065 rv = casueword32(&m->m_owner, 1066 UMUTEX_CONTESTED, &owner, 1067 id | UMUTEX_CONTESTED); 1068 /* The address was invalid. */ 1069 if (rv == -1) 1070 return (EFAULT); 1071 if (rv == 0) { 1072 MPASS(owner == UMUTEX_CONTESTED); 1073 return (0); 1074 } 1075 if (rv == 1) { 1076 rv = thread_check_susp(td, false); 1077 if (rv != 0) 1078 return (rv); 1079 } 1080 1081 /* 1082 * If this failed the lock has 1083 * changed, restart. 1084 */ 1085 continue; 1086 } 1087 1088 /* rv == 1 but not contested, likely store failure */ 1089 rv = thread_check_susp(td, false); 1090 if (rv != 0) 1091 return (rv); 1092 } 1093 1094 if (mode == _UMUTEX_TRY) 1095 return (EBUSY); 1096 1097 /* 1098 * If we caught a signal, we have retried and now 1099 * exit immediately. 1100 */ 1101 if (error != 0) 1102 return (error); 1103 1104 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1105 GET_SHARE(flags), &uq->uq_key)) != 0) 1106 return (error); 1107 1108 umtxq_lock(&uq->uq_key); 1109 umtxq_busy(&uq->uq_key); 1110 umtxq_insert(uq); 1111 umtxq_unlock(&uq->uq_key); 1112 1113 /* 1114 * Set the contested bit so that a release in user space 1115 * knows to use the system call for unlock. If this fails 1116 * either some one else has acquired the lock or it has been 1117 * released. 1118 */ 1119 rv = casueword32(&m->m_owner, owner, &old, 1120 owner | UMUTEX_CONTESTED); 1121 1122 /* The address was invalid or casueword failed to store. */ 1123 if (rv == -1 || rv == 1) { 1124 umtxq_lock(&uq->uq_key); 1125 umtxq_remove(uq); 1126 umtxq_unbusy(&uq->uq_key); 1127 umtxq_unlock(&uq->uq_key); 1128 umtx_key_release(&uq->uq_key); 1129 if (rv == -1) 1130 return (EFAULT); 1131 if (rv == 1) { 1132 rv = thread_check_susp(td, false); 1133 if (rv != 0) 1134 return (rv); 1135 } 1136 continue; 1137 } 1138 1139 /* 1140 * We set the contested bit, sleep. Otherwise the lock changed 1141 * and we need to retry or we lost a race to the thread 1142 * unlocking the umtx. 1143 */ 1144 umtxq_lock(&uq->uq_key); 1145 umtxq_unbusy(&uq->uq_key); 1146 MPASS(old == owner); 1147 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1148 NULL : &timo); 1149 umtxq_remove(uq); 1150 umtxq_unlock(&uq->uq_key); 1151 umtx_key_release(&uq->uq_key); 1152 1153 if (error == 0) 1154 error = thread_check_susp(td, false); 1155 } 1156 1157 return (0); 1158 } 1159 1160 /* 1161 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1162 */ 1163 static int 1164 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1165 { 1166 struct umtx_key key; 1167 uint32_t owner, old, id, newlock; 1168 int error, count; 1169 1170 id = td->td_tid; 1171 1172 again: 1173 /* 1174 * Make sure we own this mtx. 1175 */ 1176 error = fueword32(&m->m_owner, &owner); 1177 if (error == -1) 1178 return (EFAULT); 1179 1180 if ((owner & ~UMUTEX_CONTESTED) != id) 1181 return (EPERM); 1182 1183 newlock = umtx_unlock_val(flags, rb); 1184 if ((owner & UMUTEX_CONTESTED) == 0) { 1185 error = casueword32(&m->m_owner, owner, &old, newlock); 1186 if (error == -1) 1187 return (EFAULT); 1188 if (error == 1) { 1189 error = thread_check_susp(td, false); 1190 if (error != 0) 1191 return (error); 1192 goto again; 1193 } 1194 MPASS(old == owner); 1195 return (0); 1196 } 1197 1198 /* We should only ever be in here for contested locks */ 1199 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1200 &key)) != 0) 1201 return (error); 1202 1203 umtxq_lock(&key); 1204 umtxq_busy(&key); 1205 count = umtxq_count(&key); 1206 umtxq_unlock(&key); 1207 1208 /* 1209 * When unlocking the umtx, it must be marked as unowned if 1210 * there is zero or one thread only waiting for it. 1211 * Otherwise, it must be marked as contested. 1212 */ 1213 if (count > 1) 1214 newlock |= UMUTEX_CONTESTED; 1215 error = casueword32(&m->m_owner, owner, &old, newlock); 1216 umtxq_lock(&key); 1217 umtxq_signal(&key, 1); 1218 umtxq_unbusy(&key); 1219 umtxq_unlock(&key); 1220 umtx_key_release(&key); 1221 if (error == -1) 1222 return (EFAULT); 1223 if (error == 1) { 1224 if (old != owner) 1225 return (EINVAL); 1226 error = thread_check_susp(td, false); 1227 if (error != 0) 1228 return (error); 1229 goto again; 1230 } 1231 return (0); 1232 } 1233 1234 /* 1235 * Check if the mutex is available and wake up a waiter, 1236 * only for simple mutex. 1237 */ 1238 static int 1239 do_wake_umutex(struct thread *td, struct umutex *m) 1240 { 1241 struct umtx_key key; 1242 uint32_t owner; 1243 uint32_t flags; 1244 int error; 1245 int count; 1246 1247 again: 1248 error = fueword32(&m->m_owner, &owner); 1249 if (error == -1) 1250 return (EFAULT); 1251 1252 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1253 owner != UMUTEX_RB_NOTRECOV) 1254 return (0); 1255 1256 error = fueword32(&m->m_flags, &flags); 1257 if (error == -1) 1258 return (EFAULT); 1259 1260 /* We should only ever be in here for contested locks */ 1261 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1262 &key)) != 0) 1263 return (error); 1264 1265 umtxq_lock(&key); 1266 umtxq_busy(&key); 1267 count = umtxq_count(&key); 1268 umtxq_unlock(&key); 1269 1270 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1271 owner != UMUTEX_RB_NOTRECOV) { 1272 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1273 UMUTEX_UNOWNED); 1274 if (error == -1) { 1275 error = EFAULT; 1276 } else if (error == 1) { 1277 umtxq_lock(&key); 1278 umtxq_unbusy(&key); 1279 umtxq_unlock(&key); 1280 umtx_key_release(&key); 1281 error = thread_check_susp(td, false); 1282 if (error != 0) 1283 return (error); 1284 goto again; 1285 } 1286 } 1287 1288 umtxq_lock(&key); 1289 if (error == 0 && count != 0) { 1290 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1291 owner == UMUTEX_RB_OWNERDEAD || 1292 owner == UMUTEX_RB_NOTRECOV); 1293 umtxq_signal(&key, 1); 1294 } 1295 umtxq_unbusy(&key); 1296 umtxq_unlock(&key); 1297 umtx_key_release(&key); 1298 return (error); 1299 } 1300 1301 /* 1302 * Check if the mutex has waiters and tries to fix contention bit. 1303 */ 1304 static int 1305 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1306 { 1307 struct umtx_key key; 1308 uint32_t owner, old; 1309 int type; 1310 int error; 1311 int count; 1312 1313 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1314 UMUTEX_ROBUST)) { 1315 case 0: 1316 case UMUTEX_ROBUST: 1317 type = TYPE_NORMAL_UMUTEX; 1318 break; 1319 case UMUTEX_PRIO_INHERIT: 1320 type = TYPE_PI_UMUTEX; 1321 break; 1322 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1323 type = TYPE_PI_ROBUST_UMUTEX; 1324 break; 1325 case UMUTEX_PRIO_PROTECT: 1326 type = TYPE_PP_UMUTEX; 1327 break; 1328 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1329 type = TYPE_PP_ROBUST_UMUTEX; 1330 break; 1331 default: 1332 return (EINVAL); 1333 } 1334 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1335 return (error); 1336 1337 owner = 0; 1338 umtxq_lock(&key); 1339 umtxq_busy(&key); 1340 count = umtxq_count(&key); 1341 umtxq_unlock(&key); 1342 1343 error = fueword32(&m->m_owner, &owner); 1344 if (error == -1) 1345 error = EFAULT; 1346 1347 /* 1348 * Only repair contention bit if there is a waiter, this means 1349 * the mutex is still being referenced by userland code, 1350 * otherwise don't update any memory. 1351 */ 1352 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1353 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1354 error = casueword32(&m->m_owner, owner, &old, 1355 owner | UMUTEX_CONTESTED); 1356 if (error == -1) { 1357 error = EFAULT; 1358 break; 1359 } 1360 if (error == 0) { 1361 MPASS(old == owner); 1362 break; 1363 } 1364 owner = old; 1365 error = thread_check_susp(td, false); 1366 } 1367 1368 umtxq_lock(&key); 1369 if (error == EFAULT) { 1370 umtxq_signal(&key, INT_MAX); 1371 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1372 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1373 umtxq_signal(&key, 1); 1374 umtxq_unbusy(&key); 1375 umtxq_unlock(&key); 1376 umtx_key_release(&key); 1377 return (error); 1378 } 1379 1380 static inline struct umtx_pi * 1381 umtx_pi_alloc(int flags) 1382 { 1383 struct umtx_pi *pi; 1384 1385 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1386 TAILQ_INIT(&pi->pi_blocked); 1387 atomic_add_int(&umtx_pi_allocated, 1); 1388 return (pi); 1389 } 1390 1391 static inline void 1392 umtx_pi_free(struct umtx_pi *pi) 1393 { 1394 uma_zfree(umtx_pi_zone, pi); 1395 atomic_add_int(&umtx_pi_allocated, -1); 1396 } 1397 1398 /* 1399 * Adjust the thread's position on a pi_state after its priority has been 1400 * changed. 1401 */ 1402 static int 1403 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1404 { 1405 struct umtx_q *uq, *uq1, *uq2; 1406 struct thread *td1; 1407 1408 mtx_assert(&umtx_lock, MA_OWNED); 1409 if (pi == NULL) 1410 return (0); 1411 1412 uq = td->td_umtxq; 1413 1414 /* 1415 * Check if the thread needs to be moved on the blocked chain. 1416 * It needs to be moved if either its priority is lower than 1417 * the previous thread or higher than the next thread. 1418 */ 1419 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1420 uq2 = TAILQ_NEXT(uq, uq_lockq); 1421 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1422 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1423 /* 1424 * Remove thread from blocked chain and determine where 1425 * it should be moved to. 1426 */ 1427 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1428 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1429 td1 = uq1->uq_thread; 1430 MPASS(td1->td_proc->p_magic == P_MAGIC); 1431 if (UPRI(td1) > UPRI(td)) 1432 break; 1433 } 1434 1435 if (uq1 == NULL) 1436 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1437 else 1438 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1439 } 1440 return (1); 1441 } 1442 1443 static struct umtx_pi * 1444 umtx_pi_next(struct umtx_pi *pi) 1445 { 1446 struct umtx_q *uq_owner; 1447 1448 if (pi->pi_owner == NULL) 1449 return (NULL); 1450 uq_owner = pi->pi_owner->td_umtxq; 1451 if (uq_owner == NULL) 1452 return (NULL); 1453 return (uq_owner->uq_pi_blocked); 1454 } 1455 1456 /* 1457 * Floyd's Cycle-Finding Algorithm. 1458 */ 1459 static bool 1460 umtx_pi_check_loop(struct umtx_pi *pi) 1461 { 1462 struct umtx_pi *pi1; /* fast iterator */ 1463 1464 mtx_assert(&umtx_lock, MA_OWNED); 1465 if (pi == NULL) 1466 return (false); 1467 pi1 = pi; 1468 for (;;) { 1469 pi = umtx_pi_next(pi); 1470 if (pi == NULL) 1471 break; 1472 pi1 = umtx_pi_next(pi1); 1473 if (pi1 == NULL) 1474 break; 1475 pi1 = umtx_pi_next(pi1); 1476 if (pi1 == NULL) 1477 break; 1478 if (pi == pi1) 1479 return (true); 1480 } 1481 return (false); 1482 } 1483 1484 /* 1485 * Propagate priority when a thread is blocked on POSIX 1486 * PI mutex. 1487 */ 1488 static void 1489 umtx_propagate_priority(struct thread *td) 1490 { 1491 struct umtx_q *uq; 1492 struct umtx_pi *pi; 1493 int pri; 1494 1495 mtx_assert(&umtx_lock, MA_OWNED); 1496 pri = UPRI(td); 1497 uq = td->td_umtxq; 1498 pi = uq->uq_pi_blocked; 1499 if (pi == NULL) 1500 return; 1501 if (umtx_pi_check_loop(pi)) 1502 return; 1503 1504 for (;;) { 1505 td = pi->pi_owner; 1506 if (td == NULL || td == curthread) 1507 return; 1508 1509 MPASS(td->td_proc != NULL); 1510 MPASS(td->td_proc->p_magic == P_MAGIC); 1511 1512 thread_lock(td); 1513 if (td->td_lend_user_pri > pri) 1514 sched_lend_user_prio(td, pri); 1515 else { 1516 thread_unlock(td); 1517 break; 1518 } 1519 thread_unlock(td); 1520 1521 /* 1522 * Pick up the lock that td is blocked on. 1523 */ 1524 uq = td->td_umtxq; 1525 pi = uq->uq_pi_blocked; 1526 if (pi == NULL) 1527 break; 1528 /* Resort td on the list if needed. */ 1529 umtx_pi_adjust_thread(pi, td); 1530 } 1531 } 1532 1533 /* 1534 * Unpropagate priority for a PI mutex when a thread blocked on 1535 * it is interrupted by signal or resumed by others. 1536 */ 1537 static void 1538 umtx_repropagate_priority(struct umtx_pi *pi) 1539 { 1540 struct umtx_q *uq, *uq_owner; 1541 struct umtx_pi *pi2; 1542 int pri; 1543 1544 mtx_assert(&umtx_lock, MA_OWNED); 1545 1546 if (umtx_pi_check_loop(pi)) 1547 return; 1548 while (pi != NULL && pi->pi_owner != NULL) { 1549 pri = PRI_MAX; 1550 uq_owner = pi->pi_owner->td_umtxq; 1551 1552 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1553 uq = TAILQ_FIRST(&pi2->pi_blocked); 1554 if (uq != NULL) { 1555 if (pri > UPRI(uq->uq_thread)) 1556 pri = UPRI(uq->uq_thread); 1557 } 1558 } 1559 1560 if (pri > uq_owner->uq_inherited_pri) 1561 pri = uq_owner->uq_inherited_pri; 1562 thread_lock(pi->pi_owner); 1563 sched_lend_user_prio(pi->pi_owner, pri); 1564 thread_unlock(pi->pi_owner); 1565 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1566 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1567 } 1568 } 1569 1570 /* 1571 * Insert a PI mutex into owned list. 1572 */ 1573 static void 1574 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1575 { 1576 struct umtx_q *uq_owner; 1577 1578 uq_owner = owner->td_umtxq; 1579 mtx_assert(&umtx_lock, MA_OWNED); 1580 MPASS(pi->pi_owner == NULL); 1581 pi->pi_owner = owner; 1582 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1583 } 1584 1585 /* 1586 * Disown a PI mutex, and remove it from the owned list. 1587 */ 1588 static void 1589 umtx_pi_disown(struct umtx_pi *pi) 1590 { 1591 1592 mtx_assert(&umtx_lock, MA_OWNED); 1593 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1594 pi->pi_owner = NULL; 1595 } 1596 1597 /* 1598 * Claim ownership of a PI mutex. 1599 */ 1600 static int 1601 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1602 { 1603 struct umtx_q *uq; 1604 int pri; 1605 1606 mtx_lock(&umtx_lock); 1607 if (pi->pi_owner == owner) { 1608 mtx_unlock(&umtx_lock); 1609 return (0); 1610 } 1611 1612 if (pi->pi_owner != NULL) { 1613 /* 1614 * userland may have already messed the mutex, sigh. 1615 */ 1616 mtx_unlock(&umtx_lock); 1617 return (EPERM); 1618 } 1619 umtx_pi_setowner(pi, owner); 1620 uq = TAILQ_FIRST(&pi->pi_blocked); 1621 if (uq != NULL) { 1622 pri = UPRI(uq->uq_thread); 1623 thread_lock(owner); 1624 if (pri < UPRI(owner)) 1625 sched_lend_user_prio(owner, pri); 1626 thread_unlock(owner); 1627 } 1628 mtx_unlock(&umtx_lock); 1629 return (0); 1630 } 1631 1632 /* 1633 * Adjust a thread's order position in its blocked PI mutex, 1634 * this may result new priority propagating process. 1635 */ 1636 void 1637 umtx_pi_adjust(struct thread *td, u_char oldpri) 1638 { 1639 struct umtx_q *uq; 1640 struct umtx_pi *pi; 1641 1642 uq = td->td_umtxq; 1643 mtx_lock(&umtx_lock); 1644 /* 1645 * Pick up the lock that td is blocked on. 1646 */ 1647 pi = uq->uq_pi_blocked; 1648 if (pi != NULL) { 1649 umtx_pi_adjust_thread(pi, td); 1650 umtx_repropagate_priority(pi); 1651 } 1652 mtx_unlock(&umtx_lock); 1653 } 1654 1655 /* 1656 * Sleep on a PI mutex. 1657 */ 1658 static int 1659 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1660 const char *wmesg, struct abs_timeout *timo, bool shared) 1661 { 1662 struct thread *td, *td1; 1663 struct umtx_q *uq1; 1664 int error, pri; 1665 #ifdef INVARIANTS 1666 struct umtxq_chain *uc; 1667 1668 uc = umtxq_getchain(&pi->pi_key); 1669 #endif 1670 error = 0; 1671 td = uq->uq_thread; 1672 KASSERT(td == curthread, ("inconsistent uq_thread")); 1673 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1674 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1675 umtxq_insert(uq); 1676 mtx_lock(&umtx_lock); 1677 if (pi->pi_owner == NULL) { 1678 mtx_unlock(&umtx_lock); 1679 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1680 mtx_lock(&umtx_lock); 1681 if (td1 != NULL) { 1682 if (pi->pi_owner == NULL) 1683 umtx_pi_setowner(pi, td1); 1684 PROC_UNLOCK(td1->td_proc); 1685 } 1686 } 1687 1688 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1689 pri = UPRI(uq1->uq_thread); 1690 if (pri > UPRI(td)) 1691 break; 1692 } 1693 1694 if (uq1 != NULL) 1695 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1696 else 1697 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1698 1699 uq->uq_pi_blocked = pi; 1700 thread_lock(td); 1701 td->td_flags |= TDF_UPIBLOCKED; 1702 thread_unlock(td); 1703 umtx_propagate_priority(td); 1704 mtx_unlock(&umtx_lock); 1705 umtxq_unbusy(&uq->uq_key); 1706 1707 error = umtxq_sleep(uq, wmesg, timo); 1708 umtxq_remove(uq); 1709 1710 mtx_lock(&umtx_lock); 1711 uq->uq_pi_blocked = NULL; 1712 thread_lock(td); 1713 td->td_flags &= ~TDF_UPIBLOCKED; 1714 thread_unlock(td); 1715 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1716 umtx_repropagate_priority(pi); 1717 mtx_unlock(&umtx_lock); 1718 umtxq_unlock(&uq->uq_key); 1719 1720 return (error); 1721 } 1722 1723 /* 1724 * Add reference count for a PI mutex. 1725 */ 1726 static void 1727 umtx_pi_ref(struct umtx_pi *pi) 1728 { 1729 1730 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1731 pi->pi_refcount++; 1732 } 1733 1734 /* 1735 * Decrease reference count for a PI mutex, if the counter 1736 * is decreased to zero, its memory space is freed. 1737 */ 1738 static void 1739 umtx_pi_unref(struct umtx_pi *pi) 1740 { 1741 struct umtxq_chain *uc; 1742 1743 uc = umtxq_getchain(&pi->pi_key); 1744 UMTXQ_LOCKED_ASSERT(uc); 1745 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1746 if (--pi->pi_refcount == 0) { 1747 mtx_lock(&umtx_lock); 1748 if (pi->pi_owner != NULL) 1749 umtx_pi_disown(pi); 1750 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1751 ("blocked queue not empty")); 1752 mtx_unlock(&umtx_lock); 1753 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1754 umtx_pi_free(pi); 1755 } 1756 } 1757 1758 /* 1759 * Find a PI mutex in hash table. 1760 */ 1761 static struct umtx_pi * 1762 umtx_pi_lookup(struct umtx_key *key) 1763 { 1764 struct umtxq_chain *uc; 1765 struct umtx_pi *pi; 1766 1767 uc = umtxq_getchain(key); 1768 UMTXQ_LOCKED_ASSERT(uc); 1769 1770 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1771 if (umtx_key_match(&pi->pi_key, key)) { 1772 return (pi); 1773 } 1774 } 1775 return (NULL); 1776 } 1777 1778 /* 1779 * Insert a PI mutex into hash table. 1780 */ 1781 static inline void 1782 umtx_pi_insert(struct umtx_pi *pi) 1783 { 1784 struct umtxq_chain *uc; 1785 1786 uc = umtxq_getchain(&pi->pi_key); 1787 UMTXQ_LOCKED_ASSERT(uc); 1788 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1789 } 1790 1791 /* 1792 * Lock a PI mutex. 1793 */ 1794 static int 1795 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1796 struct _umtx_time *timeout, int try) 1797 { 1798 struct abs_timeout timo; 1799 struct umtx_q *uq; 1800 struct umtx_pi *pi, *new_pi; 1801 uint32_t id, old_owner, owner, old; 1802 int error, rv; 1803 1804 id = td->td_tid; 1805 uq = td->td_umtxq; 1806 1807 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1808 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1809 &uq->uq_key)) != 0) 1810 return (error); 1811 1812 if (timeout != NULL) 1813 abs_timeout_init2(&timo, timeout); 1814 1815 umtxq_lock(&uq->uq_key); 1816 pi = umtx_pi_lookup(&uq->uq_key); 1817 if (pi == NULL) { 1818 new_pi = umtx_pi_alloc(M_NOWAIT); 1819 if (new_pi == NULL) { 1820 umtxq_unlock(&uq->uq_key); 1821 new_pi = umtx_pi_alloc(M_WAITOK); 1822 umtxq_lock(&uq->uq_key); 1823 pi = umtx_pi_lookup(&uq->uq_key); 1824 if (pi != NULL) { 1825 umtx_pi_free(new_pi); 1826 new_pi = NULL; 1827 } 1828 } 1829 if (new_pi != NULL) { 1830 new_pi->pi_key = uq->uq_key; 1831 umtx_pi_insert(new_pi); 1832 pi = new_pi; 1833 } 1834 } 1835 umtx_pi_ref(pi); 1836 umtxq_unlock(&uq->uq_key); 1837 1838 /* 1839 * Care must be exercised when dealing with umtx structure. It 1840 * can fault on any access. 1841 */ 1842 for (;;) { 1843 /* 1844 * Try the uncontested case. This should be done in userland. 1845 */ 1846 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1847 /* The address was invalid. */ 1848 if (rv == -1) { 1849 error = EFAULT; 1850 break; 1851 } 1852 /* The acquire succeeded. */ 1853 if (rv == 0) { 1854 MPASS(owner == UMUTEX_UNOWNED); 1855 error = 0; 1856 break; 1857 } 1858 1859 if (owner == UMUTEX_RB_NOTRECOV) { 1860 error = ENOTRECOVERABLE; 1861 break; 1862 } 1863 1864 /* 1865 * Avoid overwriting a possible error from sleep due 1866 * to the pending signal with suspension check result. 1867 */ 1868 if (error == 0) { 1869 error = thread_check_susp(td, true); 1870 if (error != 0) 1871 break; 1872 } 1873 1874 /* If no one owns it but it is contested try to acquire it. */ 1875 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1876 old_owner = owner; 1877 rv = casueword32(&m->m_owner, owner, &owner, 1878 id | UMUTEX_CONTESTED); 1879 /* The address was invalid. */ 1880 if (rv == -1) { 1881 error = EFAULT; 1882 break; 1883 } 1884 if (rv == 1) { 1885 if (error == 0) { 1886 error = thread_check_susp(td, true); 1887 if (error != 0) 1888 break; 1889 } 1890 1891 /* 1892 * If this failed the lock could 1893 * changed, restart. 1894 */ 1895 continue; 1896 } 1897 1898 MPASS(rv == 0); 1899 MPASS(owner == old_owner); 1900 umtxq_lock(&uq->uq_key); 1901 umtxq_busy(&uq->uq_key); 1902 error = umtx_pi_claim(pi, td); 1903 umtxq_unbusy(&uq->uq_key); 1904 umtxq_unlock(&uq->uq_key); 1905 if (error != 0) { 1906 /* 1907 * Since we're going to return an 1908 * error, restore the m_owner to its 1909 * previous, unowned state to avoid 1910 * compounding the problem. 1911 */ 1912 (void)casuword32(&m->m_owner, 1913 id | UMUTEX_CONTESTED, old_owner); 1914 } 1915 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1916 error = EOWNERDEAD; 1917 break; 1918 } 1919 1920 if ((owner & ~UMUTEX_CONTESTED) == id) { 1921 error = EDEADLK; 1922 break; 1923 } 1924 1925 if (try != 0) { 1926 error = EBUSY; 1927 break; 1928 } 1929 1930 /* 1931 * If we caught a signal, we have retried and now 1932 * exit immediately. 1933 */ 1934 if (error != 0) 1935 break; 1936 1937 umtxq_lock(&uq->uq_key); 1938 umtxq_busy(&uq->uq_key); 1939 umtxq_unlock(&uq->uq_key); 1940 1941 /* 1942 * Set the contested bit so that a release in user space 1943 * knows to use the system call for unlock. If this fails 1944 * either some one else has acquired the lock or it has been 1945 * released. 1946 */ 1947 rv = casueword32(&m->m_owner, owner, &old, owner | 1948 UMUTEX_CONTESTED); 1949 1950 /* The address was invalid. */ 1951 if (rv == -1) { 1952 umtxq_unbusy_unlocked(&uq->uq_key); 1953 error = EFAULT; 1954 break; 1955 } 1956 if (rv == 1) { 1957 umtxq_unbusy_unlocked(&uq->uq_key); 1958 error = thread_check_susp(td, true); 1959 if (error != 0) 1960 break; 1961 1962 /* 1963 * The lock changed and we need to retry or we 1964 * lost a race to the thread unlocking the 1965 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1966 * value for owner is impossible there. 1967 */ 1968 continue; 1969 } 1970 1971 umtxq_lock(&uq->uq_key); 1972 1973 /* We set the contested bit, sleep. */ 1974 MPASS(old == owner); 1975 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1976 "umtxpi", timeout == NULL ? NULL : &timo, 1977 (flags & USYNC_PROCESS_SHARED) != 0); 1978 if (error != 0) 1979 continue; 1980 1981 error = thread_check_susp(td, false); 1982 if (error != 0) 1983 break; 1984 } 1985 1986 umtxq_lock(&uq->uq_key); 1987 umtx_pi_unref(pi); 1988 umtxq_unlock(&uq->uq_key); 1989 1990 umtx_key_release(&uq->uq_key); 1991 return (error); 1992 } 1993 1994 /* 1995 * Unlock a PI mutex. 1996 */ 1997 static int 1998 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1999 { 2000 struct umtx_key key; 2001 struct umtx_q *uq_first, *uq_first2, *uq_me; 2002 struct umtx_pi *pi, *pi2; 2003 uint32_t id, new_owner, old, owner; 2004 int count, error, pri; 2005 2006 id = td->td_tid; 2007 2008 usrloop: 2009 /* 2010 * Make sure we own this mtx. 2011 */ 2012 error = fueword32(&m->m_owner, &owner); 2013 if (error == -1) 2014 return (EFAULT); 2015 2016 if ((owner & ~UMUTEX_CONTESTED) != id) 2017 return (EPERM); 2018 2019 new_owner = umtx_unlock_val(flags, rb); 2020 2021 /* This should be done in userland */ 2022 if ((owner & UMUTEX_CONTESTED) == 0) { 2023 error = casueword32(&m->m_owner, owner, &old, new_owner); 2024 if (error == -1) 2025 return (EFAULT); 2026 if (error == 1) { 2027 error = thread_check_susp(td, true); 2028 if (error != 0) 2029 return (error); 2030 goto usrloop; 2031 } 2032 if (old == owner) 2033 return (0); 2034 owner = old; 2035 } 2036 2037 /* We should only ever be in here for contested locks */ 2038 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2039 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2040 &key)) != 0) 2041 return (error); 2042 2043 umtxq_lock(&key); 2044 umtxq_busy(&key); 2045 count = umtxq_count_pi(&key, &uq_first); 2046 if (uq_first != NULL) { 2047 mtx_lock(&umtx_lock); 2048 pi = uq_first->uq_pi_blocked; 2049 KASSERT(pi != NULL, ("pi == NULL?")); 2050 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2051 mtx_unlock(&umtx_lock); 2052 umtxq_unbusy(&key); 2053 umtxq_unlock(&key); 2054 umtx_key_release(&key); 2055 /* userland messed the mutex */ 2056 return (EPERM); 2057 } 2058 uq_me = td->td_umtxq; 2059 if (pi->pi_owner == td) 2060 umtx_pi_disown(pi); 2061 /* get highest priority thread which is still sleeping. */ 2062 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2063 while (uq_first != NULL && 2064 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2065 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2066 } 2067 pri = PRI_MAX; 2068 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2069 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2070 if (uq_first2 != NULL) { 2071 if (pri > UPRI(uq_first2->uq_thread)) 2072 pri = UPRI(uq_first2->uq_thread); 2073 } 2074 } 2075 thread_lock(td); 2076 sched_lend_user_prio(td, pri); 2077 thread_unlock(td); 2078 mtx_unlock(&umtx_lock); 2079 if (uq_first) 2080 umtxq_signal_thread(uq_first); 2081 } else { 2082 pi = umtx_pi_lookup(&key); 2083 /* 2084 * A umtx_pi can exist if a signal or timeout removed the 2085 * last waiter from the umtxq, but there is still 2086 * a thread in do_lock_pi() holding the umtx_pi. 2087 */ 2088 if (pi != NULL) { 2089 /* 2090 * The umtx_pi can be unowned, such as when a thread 2091 * has just entered do_lock_pi(), allocated the 2092 * umtx_pi, and unlocked the umtxq. 2093 * If the current thread owns it, it must disown it. 2094 */ 2095 mtx_lock(&umtx_lock); 2096 if (pi->pi_owner == td) 2097 umtx_pi_disown(pi); 2098 mtx_unlock(&umtx_lock); 2099 } 2100 } 2101 umtxq_unlock(&key); 2102 2103 /* 2104 * When unlocking the umtx, it must be marked as unowned if 2105 * there is zero or one thread only waiting for it. 2106 * Otherwise, it must be marked as contested. 2107 */ 2108 2109 if (count > 1) 2110 new_owner |= UMUTEX_CONTESTED; 2111 again: 2112 error = casueword32(&m->m_owner, owner, &old, new_owner); 2113 if (error == 1) { 2114 error = thread_check_susp(td, false); 2115 if (error == 0) 2116 goto again; 2117 } 2118 umtxq_unbusy_unlocked(&key); 2119 umtx_key_release(&key); 2120 if (error == -1) 2121 return (EFAULT); 2122 if (error == 0 && old != owner) 2123 return (EINVAL); 2124 return (error); 2125 } 2126 2127 /* 2128 * Lock a PP mutex. 2129 */ 2130 static int 2131 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2132 struct _umtx_time *timeout, int try) 2133 { 2134 struct abs_timeout timo; 2135 struct umtx_q *uq, *uq2; 2136 struct umtx_pi *pi; 2137 uint32_t ceiling; 2138 uint32_t owner, id; 2139 int error, pri, old_inherited_pri, su, rv; 2140 2141 id = td->td_tid; 2142 uq = td->td_umtxq; 2143 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2144 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2145 &uq->uq_key)) != 0) 2146 return (error); 2147 2148 if (timeout != NULL) 2149 abs_timeout_init2(&timo, timeout); 2150 2151 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2152 for (;;) { 2153 old_inherited_pri = uq->uq_inherited_pri; 2154 umtxq_lock(&uq->uq_key); 2155 umtxq_busy(&uq->uq_key); 2156 umtxq_unlock(&uq->uq_key); 2157 2158 rv = fueword32(&m->m_ceilings[0], &ceiling); 2159 if (rv == -1) { 2160 error = EFAULT; 2161 goto out; 2162 } 2163 ceiling = RTP_PRIO_MAX - ceiling; 2164 if (ceiling > RTP_PRIO_MAX) { 2165 error = EINVAL; 2166 goto out; 2167 } 2168 2169 mtx_lock(&umtx_lock); 2170 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2171 mtx_unlock(&umtx_lock); 2172 error = EINVAL; 2173 goto out; 2174 } 2175 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2176 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2177 thread_lock(td); 2178 if (uq->uq_inherited_pri < UPRI(td)) 2179 sched_lend_user_prio(td, uq->uq_inherited_pri); 2180 thread_unlock(td); 2181 } 2182 mtx_unlock(&umtx_lock); 2183 2184 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2185 id | UMUTEX_CONTESTED); 2186 /* The address was invalid. */ 2187 if (rv == -1) { 2188 error = EFAULT; 2189 break; 2190 } 2191 if (rv == 0) { 2192 MPASS(owner == UMUTEX_CONTESTED); 2193 error = 0; 2194 break; 2195 } 2196 /* rv == 1 */ 2197 if (owner == UMUTEX_RB_OWNERDEAD) { 2198 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2199 &owner, id | UMUTEX_CONTESTED); 2200 if (rv == -1) { 2201 error = EFAULT; 2202 break; 2203 } 2204 if (rv == 0) { 2205 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2206 error = EOWNERDEAD; /* success */ 2207 break; 2208 } 2209 2210 /* 2211 * rv == 1, only check for suspension if we 2212 * did not already catched a signal. If we 2213 * get an error from the check, the same 2214 * condition is checked by the umtxq_sleep() 2215 * call below, so we should obliterate the 2216 * error to not skip the last loop iteration. 2217 */ 2218 if (error == 0) { 2219 error = thread_check_susp(td, false); 2220 if (error == 0) { 2221 if (try != 0) 2222 error = EBUSY; 2223 else 2224 continue; 2225 } 2226 error = 0; 2227 } 2228 } else if (owner == UMUTEX_RB_NOTRECOV) { 2229 error = ENOTRECOVERABLE; 2230 } 2231 2232 if (try != 0) 2233 error = EBUSY; 2234 2235 /* 2236 * If we caught a signal, we have retried and now 2237 * exit immediately. 2238 */ 2239 if (error != 0) 2240 break; 2241 2242 umtxq_lock(&uq->uq_key); 2243 umtxq_insert(uq); 2244 umtxq_unbusy(&uq->uq_key); 2245 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2246 NULL : &timo); 2247 umtxq_remove(uq); 2248 umtxq_unlock(&uq->uq_key); 2249 2250 mtx_lock(&umtx_lock); 2251 uq->uq_inherited_pri = old_inherited_pri; 2252 pri = PRI_MAX; 2253 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2254 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2255 if (uq2 != NULL) { 2256 if (pri > UPRI(uq2->uq_thread)) 2257 pri = UPRI(uq2->uq_thread); 2258 } 2259 } 2260 if (pri > uq->uq_inherited_pri) 2261 pri = uq->uq_inherited_pri; 2262 thread_lock(td); 2263 sched_lend_user_prio(td, pri); 2264 thread_unlock(td); 2265 mtx_unlock(&umtx_lock); 2266 } 2267 2268 if (error != 0 && error != EOWNERDEAD) { 2269 mtx_lock(&umtx_lock); 2270 uq->uq_inherited_pri = old_inherited_pri; 2271 pri = PRI_MAX; 2272 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2273 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2274 if (uq2 != NULL) { 2275 if (pri > UPRI(uq2->uq_thread)) 2276 pri = UPRI(uq2->uq_thread); 2277 } 2278 } 2279 if (pri > uq->uq_inherited_pri) 2280 pri = uq->uq_inherited_pri; 2281 thread_lock(td); 2282 sched_lend_user_prio(td, pri); 2283 thread_unlock(td); 2284 mtx_unlock(&umtx_lock); 2285 } 2286 2287 out: 2288 umtxq_unbusy_unlocked(&uq->uq_key); 2289 umtx_key_release(&uq->uq_key); 2290 return (error); 2291 } 2292 2293 /* 2294 * Unlock a PP mutex. 2295 */ 2296 static int 2297 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2298 { 2299 struct umtx_key key; 2300 struct umtx_q *uq, *uq2; 2301 struct umtx_pi *pi; 2302 uint32_t id, owner, rceiling; 2303 int error, pri, new_inherited_pri, su; 2304 2305 id = td->td_tid; 2306 uq = td->td_umtxq; 2307 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2308 2309 /* 2310 * Make sure we own this mtx. 2311 */ 2312 error = fueword32(&m->m_owner, &owner); 2313 if (error == -1) 2314 return (EFAULT); 2315 2316 if ((owner & ~UMUTEX_CONTESTED) != id) 2317 return (EPERM); 2318 2319 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2320 if (error != 0) 2321 return (error); 2322 2323 if (rceiling == -1) 2324 new_inherited_pri = PRI_MAX; 2325 else { 2326 rceiling = RTP_PRIO_MAX - rceiling; 2327 if (rceiling > RTP_PRIO_MAX) 2328 return (EINVAL); 2329 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2330 } 2331 2332 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2333 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2334 &key)) != 0) 2335 return (error); 2336 umtxq_lock(&key); 2337 umtxq_busy(&key); 2338 umtxq_unlock(&key); 2339 /* 2340 * For priority protected mutex, always set unlocked state 2341 * to UMUTEX_CONTESTED, so that userland always enters kernel 2342 * to lock the mutex, it is necessary because thread priority 2343 * has to be adjusted for such mutex. 2344 */ 2345 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2346 UMUTEX_CONTESTED); 2347 2348 umtxq_lock(&key); 2349 if (error == 0) 2350 umtxq_signal(&key, 1); 2351 umtxq_unbusy(&key); 2352 umtxq_unlock(&key); 2353 2354 if (error == -1) 2355 error = EFAULT; 2356 else { 2357 mtx_lock(&umtx_lock); 2358 if (su != 0) 2359 uq->uq_inherited_pri = new_inherited_pri; 2360 pri = PRI_MAX; 2361 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2362 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2363 if (uq2 != NULL) { 2364 if (pri > UPRI(uq2->uq_thread)) 2365 pri = UPRI(uq2->uq_thread); 2366 } 2367 } 2368 if (pri > uq->uq_inherited_pri) 2369 pri = uq->uq_inherited_pri; 2370 thread_lock(td); 2371 sched_lend_user_prio(td, pri); 2372 thread_unlock(td); 2373 mtx_unlock(&umtx_lock); 2374 } 2375 umtx_key_release(&key); 2376 return (error); 2377 } 2378 2379 static int 2380 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2381 uint32_t *old_ceiling) 2382 { 2383 struct umtx_q *uq; 2384 uint32_t flags, id, owner, save_ceiling; 2385 int error, rv, rv1; 2386 2387 error = fueword32(&m->m_flags, &flags); 2388 if (error == -1) 2389 return (EFAULT); 2390 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2391 return (EINVAL); 2392 if (ceiling > RTP_PRIO_MAX) 2393 return (EINVAL); 2394 id = td->td_tid; 2395 uq = td->td_umtxq; 2396 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2397 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2398 &uq->uq_key)) != 0) 2399 return (error); 2400 for (;;) { 2401 umtxq_lock(&uq->uq_key); 2402 umtxq_busy(&uq->uq_key); 2403 umtxq_unlock(&uq->uq_key); 2404 2405 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2406 if (rv == -1) { 2407 error = EFAULT; 2408 break; 2409 } 2410 2411 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2412 id | UMUTEX_CONTESTED); 2413 if (rv == -1) { 2414 error = EFAULT; 2415 break; 2416 } 2417 2418 if (rv == 0) { 2419 MPASS(owner == UMUTEX_CONTESTED); 2420 rv = suword32(&m->m_ceilings[0], ceiling); 2421 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2422 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2423 break; 2424 } 2425 2426 if ((owner & ~UMUTEX_CONTESTED) == id) { 2427 rv = suword32(&m->m_ceilings[0], ceiling); 2428 error = rv == 0 ? 0 : EFAULT; 2429 break; 2430 } 2431 2432 if (owner == UMUTEX_RB_OWNERDEAD) { 2433 error = EOWNERDEAD; 2434 break; 2435 } else if (owner == UMUTEX_RB_NOTRECOV) { 2436 error = ENOTRECOVERABLE; 2437 break; 2438 } 2439 2440 /* 2441 * If we caught a signal, we have retried and now 2442 * exit immediately. 2443 */ 2444 if (error != 0) 2445 break; 2446 2447 /* 2448 * We set the contested bit, sleep. Otherwise the lock changed 2449 * and we need to retry or we lost a race to the thread 2450 * unlocking the umtx. 2451 */ 2452 umtxq_lock(&uq->uq_key); 2453 umtxq_insert(uq); 2454 umtxq_unbusy(&uq->uq_key); 2455 error = umtxq_sleep(uq, "umtxpp", NULL); 2456 umtxq_remove(uq); 2457 umtxq_unlock(&uq->uq_key); 2458 } 2459 umtxq_lock(&uq->uq_key); 2460 if (error == 0) 2461 umtxq_signal(&uq->uq_key, INT_MAX); 2462 umtxq_unbusy(&uq->uq_key); 2463 umtxq_unlock(&uq->uq_key); 2464 umtx_key_release(&uq->uq_key); 2465 if (error == 0 && old_ceiling != NULL) { 2466 rv = suword32(old_ceiling, save_ceiling); 2467 error = rv == 0 ? 0 : EFAULT; 2468 } 2469 return (error); 2470 } 2471 2472 /* 2473 * Lock a userland POSIX mutex. 2474 */ 2475 static int 2476 do_lock_umutex(struct thread *td, struct umutex *m, 2477 struct _umtx_time *timeout, int mode) 2478 { 2479 uint32_t flags; 2480 int error; 2481 2482 error = fueword32(&m->m_flags, &flags); 2483 if (error == -1) 2484 return (EFAULT); 2485 2486 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2487 case 0: 2488 error = do_lock_normal(td, m, flags, timeout, mode); 2489 break; 2490 case UMUTEX_PRIO_INHERIT: 2491 error = do_lock_pi(td, m, flags, timeout, mode); 2492 break; 2493 case UMUTEX_PRIO_PROTECT: 2494 error = do_lock_pp(td, m, flags, timeout, mode); 2495 break; 2496 default: 2497 return (EINVAL); 2498 } 2499 if (timeout == NULL) { 2500 if (error == EINTR && mode != _UMUTEX_WAIT) 2501 error = ERESTART; 2502 } else { 2503 /* Timed-locking is not restarted. */ 2504 if (error == ERESTART) 2505 error = EINTR; 2506 } 2507 return (error); 2508 } 2509 2510 /* 2511 * Unlock a userland POSIX mutex. 2512 */ 2513 static int 2514 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2515 { 2516 uint32_t flags; 2517 int error; 2518 2519 error = fueword32(&m->m_flags, &flags); 2520 if (error == -1) 2521 return (EFAULT); 2522 2523 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2524 case 0: 2525 return (do_unlock_normal(td, m, flags, rb)); 2526 case UMUTEX_PRIO_INHERIT: 2527 return (do_unlock_pi(td, m, flags, rb)); 2528 case UMUTEX_PRIO_PROTECT: 2529 return (do_unlock_pp(td, m, flags, rb)); 2530 } 2531 2532 return (EINVAL); 2533 } 2534 2535 static int 2536 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2537 struct timespec *timeout, u_long wflags) 2538 { 2539 struct abs_timeout timo; 2540 struct umtx_q *uq; 2541 uint32_t flags, clockid, hasw; 2542 int error; 2543 2544 uq = td->td_umtxq; 2545 error = fueword32(&cv->c_flags, &flags); 2546 if (error == -1) 2547 return (EFAULT); 2548 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2549 if (error != 0) 2550 return (error); 2551 2552 if ((wflags & CVWAIT_CLOCKID) != 0) { 2553 error = fueword32(&cv->c_clockid, &clockid); 2554 if (error == -1) { 2555 umtx_key_release(&uq->uq_key); 2556 return (EFAULT); 2557 } 2558 if (clockid < CLOCK_REALTIME || 2559 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2560 /* hmm, only HW clock id will work. */ 2561 umtx_key_release(&uq->uq_key); 2562 return (EINVAL); 2563 } 2564 } else { 2565 clockid = CLOCK_REALTIME; 2566 } 2567 2568 umtxq_lock(&uq->uq_key); 2569 umtxq_busy(&uq->uq_key); 2570 umtxq_insert(uq); 2571 umtxq_unlock(&uq->uq_key); 2572 2573 /* 2574 * Set c_has_waiters to 1 before releasing user mutex, also 2575 * don't modify cache line when unnecessary. 2576 */ 2577 error = fueword32(&cv->c_has_waiters, &hasw); 2578 if (error == 0 && hasw == 0) 2579 suword32(&cv->c_has_waiters, 1); 2580 2581 umtxq_unbusy_unlocked(&uq->uq_key); 2582 2583 error = do_unlock_umutex(td, m, false); 2584 2585 if (timeout != NULL) 2586 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2587 timeout); 2588 2589 umtxq_lock(&uq->uq_key); 2590 if (error == 0) { 2591 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2592 NULL : &timo); 2593 } 2594 2595 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2596 error = 0; 2597 else { 2598 /* 2599 * This must be timeout,interrupted by signal or 2600 * surprious wakeup, clear c_has_waiter flag when 2601 * necessary. 2602 */ 2603 umtxq_busy(&uq->uq_key); 2604 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2605 int oldlen = uq->uq_cur_queue->length; 2606 umtxq_remove(uq); 2607 if (oldlen == 1) { 2608 umtxq_unlock(&uq->uq_key); 2609 suword32(&cv->c_has_waiters, 0); 2610 umtxq_lock(&uq->uq_key); 2611 } 2612 } 2613 umtxq_unbusy(&uq->uq_key); 2614 if (error == ERESTART) 2615 error = EINTR; 2616 } 2617 2618 umtxq_unlock(&uq->uq_key); 2619 umtx_key_release(&uq->uq_key); 2620 return (error); 2621 } 2622 2623 /* 2624 * Signal a userland condition variable. 2625 */ 2626 static int 2627 do_cv_signal(struct thread *td, struct ucond *cv) 2628 { 2629 struct umtx_key key; 2630 int error, cnt, nwake; 2631 uint32_t flags; 2632 2633 error = fueword32(&cv->c_flags, &flags); 2634 if (error == -1) 2635 return (EFAULT); 2636 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2637 return (error); 2638 umtxq_lock(&key); 2639 umtxq_busy(&key); 2640 cnt = umtxq_count(&key); 2641 nwake = umtxq_signal(&key, 1); 2642 if (cnt <= nwake) { 2643 umtxq_unlock(&key); 2644 error = suword32(&cv->c_has_waiters, 0); 2645 if (error == -1) 2646 error = EFAULT; 2647 umtxq_lock(&key); 2648 } 2649 umtxq_unbusy(&key); 2650 umtxq_unlock(&key); 2651 umtx_key_release(&key); 2652 return (error); 2653 } 2654 2655 static int 2656 do_cv_broadcast(struct thread *td, struct ucond *cv) 2657 { 2658 struct umtx_key key; 2659 int error; 2660 uint32_t flags; 2661 2662 error = fueword32(&cv->c_flags, &flags); 2663 if (error == -1) 2664 return (EFAULT); 2665 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2666 return (error); 2667 2668 umtxq_lock(&key); 2669 umtxq_busy(&key); 2670 umtxq_signal(&key, INT_MAX); 2671 umtxq_unlock(&key); 2672 2673 error = suword32(&cv->c_has_waiters, 0); 2674 if (error == -1) 2675 error = EFAULT; 2676 2677 umtxq_unbusy_unlocked(&key); 2678 2679 umtx_key_release(&key); 2680 return (error); 2681 } 2682 2683 static int 2684 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2685 struct _umtx_time *timeout) 2686 { 2687 struct abs_timeout timo; 2688 struct umtx_q *uq; 2689 uint32_t flags, wrflags; 2690 int32_t state, oldstate; 2691 int32_t blocked_readers; 2692 int error, error1, rv; 2693 2694 uq = td->td_umtxq; 2695 error = fueword32(&rwlock->rw_flags, &flags); 2696 if (error == -1) 2697 return (EFAULT); 2698 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2699 if (error != 0) 2700 return (error); 2701 2702 if (timeout != NULL) 2703 abs_timeout_init2(&timo, timeout); 2704 2705 wrflags = URWLOCK_WRITE_OWNER; 2706 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2707 wrflags |= URWLOCK_WRITE_WAITERS; 2708 2709 for (;;) { 2710 rv = fueword32(&rwlock->rw_state, &state); 2711 if (rv == -1) { 2712 umtx_key_release(&uq->uq_key); 2713 return (EFAULT); 2714 } 2715 2716 /* try to lock it */ 2717 while (!(state & wrflags)) { 2718 if (__predict_false(URWLOCK_READER_COUNT(state) == 2719 URWLOCK_MAX_READERS)) { 2720 umtx_key_release(&uq->uq_key); 2721 return (EAGAIN); 2722 } 2723 rv = casueword32(&rwlock->rw_state, state, 2724 &oldstate, state + 1); 2725 if (rv == -1) { 2726 umtx_key_release(&uq->uq_key); 2727 return (EFAULT); 2728 } 2729 if (rv == 0) { 2730 MPASS(oldstate == state); 2731 umtx_key_release(&uq->uq_key); 2732 return (0); 2733 } 2734 error = thread_check_susp(td, true); 2735 if (error != 0) 2736 break; 2737 state = oldstate; 2738 } 2739 2740 if (error) 2741 break; 2742 2743 /* grab monitor lock */ 2744 umtxq_lock(&uq->uq_key); 2745 umtxq_busy(&uq->uq_key); 2746 umtxq_unlock(&uq->uq_key); 2747 2748 /* 2749 * re-read the state, in case it changed between the try-lock above 2750 * and the check below 2751 */ 2752 rv = fueword32(&rwlock->rw_state, &state); 2753 if (rv == -1) 2754 error = EFAULT; 2755 2756 /* set read contention bit */ 2757 while (error == 0 && (state & wrflags) && 2758 !(state & URWLOCK_READ_WAITERS)) { 2759 rv = casueword32(&rwlock->rw_state, state, 2760 &oldstate, state | URWLOCK_READ_WAITERS); 2761 if (rv == -1) { 2762 error = EFAULT; 2763 break; 2764 } 2765 if (rv == 0) { 2766 MPASS(oldstate == state); 2767 goto sleep; 2768 } 2769 state = oldstate; 2770 error = thread_check_susp(td, false); 2771 if (error != 0) 2772 break; 2773 } 2774 if (error != 0) { 2775 umtxq_unbusy_unlocked(&uq->uq_key); 2776 break; 2777 } 2778 2779 /* state is changed while setting flags, restart */ 2780 if (!(state & wrflags)) { 2781 umtxq_unbusy_unlocked(&uq->uq_key); 2782 error = thread_check_susp(td, true); 2783 if (error != 0) 2784 break; 2785 continue; 2786 } 2787 2788 sleep: 2789 /* 2790 * Contention bit is set, before sleeping, increase 2791 * read waiter count. 2792 */ 2793 rv = fueword32(&rwlock->rw_blocked_readers, 2794 &blocked_readers); 2795 if (rv == -1) { 2796 umtxq_unbusy_unlocked(&uq->uq_key); 2797 error = EFAULT; 2798 break; 2799 } 2800 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2801 2802 while (state & wrflags) { 2803 umtxq_lock(&uq->uq_key); 2804 umtxq_insert(uq); 2805 umtxq_unbusy(&uq->uq_key); 2806 2807 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2808 NULL : &timo); 2809 2810 umtxq_busy(&uq->uq_key); 2811 umtxq_remove(uq); 2812 umtxq_unlock(&uq->uq_key); 2813 if (error) 2814 break; 2815 rv = fueword32(&rwlock->rw_state, &state); 2816 if (rv == -1) { 2817 error = EFAULT; 2818 break; 2819 } 2820 } 2821 2822 /* decrease read waiter count, and may clear read contention bit */ 2823 rv = fueword32(&rwlock->rw_blocked_readers, 2824 &blocked_readers); 2825 if (rv == -1) { 2826 umtxq_unbusy_unlocked(&uq->uq_key); 2827 error = EFAULT; 2828 break; 2829 } 2830 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2831 if (blocked_readers == 1) { 2832 rv = fueword32(&rwlock->rw_state, &state); 2833 if (rv == -1) { 2834 umtxq_unbusy_unlocked(&uq->uq_key); 2835 error = EFAULT; 2836 break; 2837 } 2838 for (;;) { 2839 rv = casueword32(&rwlock->rw_state, state, 2840 &oldstate, state & ~URWLOCK_READ_WAITERS); 2841 if (rv == -1) { 2842 error = EFAULT; 2843 break; 2844 } 2845 if (rv == 0) { 2846 MPASS(oldstate == state); 2847 break; 2848 } 2849 state = oldstate; 2850 error1 = thread_check_susp(td, false); 2851 if (error1 != 0) { 2852 if (error == 0) 2853 error = error1; 2854 break; 2855 } 2856 } 2857 } 2858 2859 umtxq_unbusy_unlocked(&uq->uq_key); 2860 if (error != 0) 2861 break; 2862 } 2863 umtx_key_release(&uq->uq_key); 2864 if (error == ERESTART) 2865 error = EINTR; 2866 return (error); 2867 } 2868 2869 static int 2870 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2871 { 2872 struct abs_timeout timo; 2873 struct umtx_q *uq; 2874 uint32_t flags; 2875 int32_t state, oldstate; 2876 int32_t blocked_writers; 2877 int32_t blocked_readers; 2878 int error, error1, rv; 2879 2880 uq = td->td_umtxq; 2881 error = fueword32(&rwlock->rw_flags, &flags); 2882 if (error == -1) 2883 return (EFAULT); 2884 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2885 if (error != 0) 2886 return (error); 2887 2888 if (timeout != NULL) 2889 abs_timeout_init2(&timo, timeout); 2890 2891 blocked_readers = 0; 2892 for (;;) { 2893 rv = fueword32(&rwlock->rw_state, &state); 2894 if (rv == -1) { 2895 umtx_key_release(&uq->uq_key); 2896 return (EFAULT); 2897 } 2898 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2899 URWLOCK_READER_COUNT(state) == 0) { 2900 rv = casueword32(&rwlock->rw_state, state, 2901 &oldstate, state | URWLOCK_WRITE_OWNER); 2902 if (rv == -1) { 2903 umtx_key_release(&uq->uq_key); 2904 return (EFAULT); 2905 } 2906 if (rv == 0) { 2907 MPASS(oldstate == state); 2908 umtx_key_release(&uq->uq_key); 2909 return (0); 2910 } 2911 state = oldstate; 2912 error = thread_check_susp(td, true); 2913 if (error != 0) 2914 break; 2915 } 2916 2917 if (error) { 2918 if ((state & (URWLOCK_WRITE_OWNER | 2919 URWLOCK_WRITE_WAITERS)) == 0 && 2920 blocked_readers != 0) { 2921 umtxq_lock(&uq->uq_key); 2922 umtxq_busy(&uq->uq_key); 2923 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2924 UMTX_SHARED_QUEUE); 2925 umtxq_unbusy(&uq->uq_key); 2926 umtxq_unlock(&uq->uq_key); 2927 } 2928 2929 break; 2930 } 2931 2932 /* grab monitor lock */ 2933 umtxq_lock(&uq->uq_key); 2934 umtxq_busy(&uq->uq_key); 2935 umtxq_unlock(&uq->uq_key); 2936 2937 /* 2938 * Re-read the state, in case it changed between the 2939 * try-lock above and the check below. 2940 */ 2941 rv = fueword32(&rwlock->rw_state, &state); 2942 if (rv == -1) 2943 error = EFAULT; 2944 2945 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2946 URWLOCK_READER_COUNT(state) != 0) && 2947 (state & URWLOCK_WRITE_WAITERS) == 0) { 2948 rv = casueword32(&rwlock->rw_state, state, 2949 &oldstate, state | URWLOCK_WRITE_WAITERS); 2950 if (rv == -1) { 2951 error = EFAULT; 2952 break; 2953 } 2954 if (rv == 0) { 2955 MPASS(oldstate == state); 2956 goto sleep; 2957 } 2958 state = oldstate; 2959 error = thread_check_susp(td, false); 2960 if (error != 0) 2961 break; 2962 } 2963 if (error != 0) { 2964 umtxq_unbusy_unlocked(&uq->uq_key); 2965 break; 2966 } 2967 2968 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2969 URWLOCK_READER_COUNT(state) == 0) { 2970 umtxq_unbusy_unlocked(&uq->uq_key); 2971 error = thread_check_susp(td, false); 2972 if (error != 0) 2973 break; 2974 continue; 2975 } 2976 sleep: 2977 rv = fueword32(&rwlock->rw_blocked_writers, 2978 &blocked_writers); 2979 if (rv == -1) { 2980 umtxq_unbusy_unlocked(&uq->uq_key); 2981 error = EFAULT; 2982 break; 2983 } 2984 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2985 2986 while ((state & URWLOCK_WRITE_OWNER) || 2987 URWLOCK_READER_COUNT(state) != 0) { 2988 umtxq_lock(&uq->uq_key); 2989 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2990 umtxq_unbusy(&uq->uq_key); 2991 2992 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2993 NULL : &timo); 2994 2995 umtxq_busy(&uq->uq_key); 2996 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2997 umtxq_unlock(&uq->uq_key); 2998 if (error) 2999 break; 3000 rv = fueword32(&rwlock->rw_state, &state); 3001 if (rv == -1) { 3002 error = EFAULT; 3003 break; 3004 } 3005 } 3006 3007 rv = fueword32(&rwlock->rw_blocked_writers, 3008 &blocked_writers); 3009 if (rv == -1) { 3010 umtxq_unbusy_unlocked(&uq->uq_key); 3011 error = EFAULT; 3012 break; 3013 } 3014 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3015 if (blocked_writers == 1) { 3016 rv = fueword32(&rwlock->rw_state, &state); 3017 if (rv == -1) { 3018 umtxq_unbusy_unlocked(&uq->uq_key); 3019 error = EFAULT; 3020 break; 3021 } 3022 for (;;) { 3023 rv = casueword32(&rwlock->rw_state, state, 3024 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3025 if (rv == -1) { 3026 error = EFAULT; 3027 break; 3028 } 3029 if (rv == 0) { 3030 MPASS(oldstate == state); 3031 break; 3032 } 3033 state = oldstate; 3034 error1 = thread_check_susp(td, false); 3035 /* 3036 * We are leaving the URWLOCK_WRITE_WAITERS 3037 * behind, but this should not harm the 3038 * correctness. 3039 */ 3040 if (error1 != 0) { 3041 if (error == 0) 3042 error = error1; 3043 break; 3044 } 3045 } 3046 rv = fueword32(&rwlock->rw_blocked_readers, 3047 &blocked_readers); 3048 if (rv == -1) { 3049 umtxq_unbusy_unlocked(&uq->uq_key); 3050 error = EFAULT; 3051 break; 3052 } 3053 } else 3054 blocked_readers = 0; 3055 3056 umtxq_unbusy_unlocked(&uq->uq_key); 3057 } 3058 3059 umtx_key_release(&uq->uq_key); 3060 if (error == ERESTART) 3061 error = EINTR; 3062 return (error); 3063 } 3064 3065 static int 3066 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3067 { 3068 struct umtx_q *uq; 3069 uint32_t flags; 3070 int32_t state, oldstate; 3071 int error, rv, q, count; 3072 3073 uq = td->td_umtxq; 3074 error = fueword32(&rwlock->rw_flags, &flags); 3075 if (error == -1) 3076 return (EFAULT); 3077 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3078 if (error != 0) 3079 return (error); 3080 3081 error = fueword32(&rwlock->rw_state, &state); 3082 if (error == -1) { 3083 error = EFAULT; 3084 goto out; 3085 } 3086 if (state & URWLOCK_WRITE_OWNER) { 3087 for (;;) { 3088 rv = casueword32(&rwlock->rw_state, state, 3089 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3090 if (rv == -1) { 3091 error = EFAULT; 3092 goto out; 3093 } 3094 if (rv == 1) { 3095 state = oldstate; 3096 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3097 error = EPERM; 3098 goto out; 3099 } 3100 error = thread_check_susp(td, true); 3101 if (error != 0) 3102 goto out; 3103 } else 3104 break; 3105 } 3106 } else if (URWLOCK_READER_COUNT(state) != 0) { 3107 for (;;) { 3108 rv = casueword32(&rwlock->rw_state, state, 3109 &oldstate, state - 1); 3110 if (rv == -1) { 3111 error = EFAULT; 3112 goto out; 3113 } 3114 if (rv == 1) { 3115 state = oldstate; 3116 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3117 error = EPERM; 3118 goto out; 3119 } 3120 error = thread_check_susp(td, true); 3121 if (error != 0) 3122 goto out; 3123 } else 3124 break; 3125 } 3126 } else { 3127 error = EPERM; 3128 goto out; 3129 } 3130 3131 count = 0; 3132 3133 if (!(flags & URWLOCK_PREFER_READER)) { 3134 if (state & URWLOCK_WRITE_WAITERS) { 3135 count = 1; 3136 q = UMTX_EXCLUSIVE_QUEUE; 3137 } else if (state & URWLOCK_READ_WAITERS) { 3138 count = INT_MAX; 3139 q = UMTX_SHARED_QUEUE; 3140 } 3141 } else { 3142 if (state & URWLOCK_READ_WAITERS) { 3143 count = INT_MAX; 3144 q = UMTX_SHARED_QUEUE; 3145 } else if (state & URWLOCK_WRITE_WAITERS) { 3146 count = 1; 3147 q = UMTX_EXCLUSIVE_QUEUE; 3148 } 3149 } 3150 3151 if (count) { 3152 umtxq_lock(&uq->uq_key); 3153 umtxq_busy(&uq->uq_key); 3154 umtxq_signal_queue(&uq->uq_key, count, q); 3155 umtxq_unbusy(&uq->uq_key); 3156 umtxq_unlock(&uq->uq_key); 3157 } 3158 out: 3159 umtx_key_release(&uq->uq_key); 3160 return (error); 3161 } 3162 3163 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3164 static int 3165 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3166 { 3167 struct abs_timeout timo; 3168 struct umtx_q *uq; 3169 uint32_t flags, count, count1; 3170 int error, rv, rv1; 3171 3172 uq = td->td_umtxq; 3173 error = fueword32(&sem->_flags, &flags); 3174 if (error == -1) 3175 return (EFAULT); 3176 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3177 if (error != 0) 3178 return (error); 3179 3180 if (timeout != NULL) 3181 abs_timeout_init2(&timo, timeout); 3182 3183 again: 3184 umtxq_lock(&uq->uq_key); 3185 umtxq_busy(&uq->uq_key); 3186 umtxq_insert(uq); 3187 umtxq_unlock(&uq->uq_key); 3188 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3189 if (rv == 0) 3190 rv1 = fueword32(&sem->_count, &count); 3191 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3192 (rv == 1 && count1 == 0)) { 3193 umtxq_lock(&uq->uq_key); 3194 umtxq_unbusy(&uq->uq_key); 3195 umtxq_remove(uq); 3196 umtxq_unlock(&uq->uq_key); 3197 if (rv == 1) { 3198 rv = thread_check_susp(td, true); 3199 if (rv == 0) 3200 goto again; 3201 error = rv; 3202 goto out; 3203 } 3204 if (rv == 0) 3205 rv = rv1; 3206 error = rv == -1 ? EFAULT : 0; 3207 goto out; 3208 } 3209 umtxq_lock(&uq->uq_key); 3210 umtxq_unbusy(&uq->uq_key); 3211 3212 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3213 3214 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3215 error = 0; 3216 else { 3217 umtxq_remove(uq); 3218 /* A relative timeout cannot be restarted. */ 3219 if (error == ERESTART && timeout != NULL && 3220 (timeout->_flags & UMTX_ABSTIME) == 0) 3221 error = EINTR; 3222 } 3223 umtxq_unlock(&uq->uq_key); 3224 out: 3225 umtx_key_release(&uq->uq_key); 3226 return (error); 3227 } 3228 3229 /* 3230 * Signal a userland semaphore. 3231 */ 3232 static int 3233 do_sem_wake(struct thread *td, struct _usem *sem) 3234 { 3235 struct umtx_key key; 3236 int error, cnt; 3237 uint32_t flags; 3238 3239 error = fueword32(&sem->_flags, &flags); 3240 if (error == -1) 3241 return (EFAULT); 3242 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3243 return (error); 3244 umtxq_lock(&key); 3245 umtxq_busy(&key); 3246 cnt = umtxq_count(&key); 3247 if (cnt > 0) { 3248 /* 3249 * Check if count is greater than 0, this means the memory is 3250 * still being referenced by user code, so we can safely 3251 * update _has_waiters flag. 3252 */ 3253 if (cnt == 1) { 3254 umtxq_unlock(&key); 3255 error = suword32(&sem->_has_waiters, 0); 3256 umtxq_lock(&key); 3257 if (error == -1) 3258 error = EFAULT; 3259 } 3260 umtxq_signal(&key, 1); 3261 } 3262 umtxq_unbusy(&key); 3263 umtxq_unlock(&key); 3264 umtx_key_release(&key); 3265 return (error); 3266 } 3267 #endif 3268 3269 static int 3270 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3271 { 3272 struct abs_timeout timo; 3273 struct umtx_q *uq; 3274 uint32_t count, flags; 3275 int error, rv; 3276 3277 uq = td->td_umtxq; 3278 flags = fuword32(&sem->_flags); 3279 if (timeout != NULL) 3280 abs_timeout_init2(&timo, timeout); 3281 3282 again: 3283 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3284 if (error != 0) 3285 return (error); 3286 umtxq_lock(&uq->uq_key); 3287 umtxq_busy(&uq->uq_key); 3288 umtxq_insert(uq); 3289 umtxq_unlock(&uq->uq_key); 3290 rv = fueword32(&sem->_count, &count); 3291 if (rv == -1) { 3292 umtxq_lock(&uq->uq_key); 3293 umtxq_unbusy(&uq->uq_key); 3294 umtxq_remove(uq); 3295 umtxq_unlock(&uq->uq_key); 3296 umtx_key_release(&uq->uq_key); 3297 return (EFAULT); 3298 } 3299 for (;;) { 3300 if (USEM_COUNT(count) != 0) { 3301 umtxq_lock(&uq->uq_key); 3302 umtxq_unbusy(&uq->uq_key); 3303 umtxq_remove(uq); 3304 umtxq_unlock(&uq->uq_key); 3305 umtx_key_release(&uq->uq_key); 3306 return (0); 3307 } 3308 if (count == USEM_HAS_WAITERS) 3309 break; 3310 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3311 if (rv == 0) 3312 break; 3313 umtxq_lock(&uq->uq_key); 3314 umtxq_unbusy(&uq->uq_key); 3315 umtxq_remove(uq); 3316 umtxq_unlock(&uq->uq_key); 3317 umtx_key_release(&uq->uq_key); 3318 if (rv == -1) 3319 return (EFAULT); 3320 rv = thread_check_susp(td, true); 3321 if (rv != 0) 3322 return (rv); 3323 goto again; 3324 } 3325 umtxq_lock(&uq->uq_key); 3326 umtxq_unbusy(&uq->uq_key); 3327 3328 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3329 3330 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3331 error = 0; 3332 else { 3333 umtxq_remove(uq); 3334 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3335 /* A relative timeout cannot be restarted. */ 3336 if (error == ERESTART) 3337 error = EINTR; 3338 if (error == EINTR) { 3339 abs_timeout_update(&timo); 3340 timespecsub(&timo.end, &timo.cur, 3341 &timeout->_timeout); 3342 } 3343 } 3344 } 3345 umtxq_unlock(&uq->uq_key); 3346 umtx_key_release(&uq->uq_key); 3347 return (error); 3348 } 3349 3350 /* 3351 * Signal a userland semaphore. 3352 */ 3353 static int 3354 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3355 { 3356 struct umtx_key key; 3357 int error, cnt, rv; 3358 uint32_t count, flags; 3359 3360 rv = fueword32(&sem->_flags, &flags); 3361 if (rv == -1) 3362 return (EFAULT); 3363 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3364 return (error); 3365 umtxq_lock(&key); 3366 umtxq_busy(&key); 3367 cnt = umtxq_count(&key); 3368 if (cnt > 0) { 3369 /* 3370 * If this was the last sleeping thread, clear the waiters 3371 * flag in _count. 3372 */ 3373 if (cnt == 1) { 3374 umtxq_unlock(&key); 3375 rv = fueword32(&sem->_count, &count); 3376 while (rv != -1 && count & USEM_HAS_WAITERS) { 3377 rv = casueword32(&sem->_count, count, &count, 3378 count & ~USEM_HAS_WAITERS); 3379 if (rv == 1) { 3380 rv = thread_check_susp(td, true); 3381 if (rv != 0) 3382 break; 3383 } 3384 } 3385 if (rv == -1) 3386 error = EFAULT; 3387 else if (rv > 0) { 3388 error = rv; 3389 } 3390 umtxq_lock(&key); 3391 } 3392 3393 umtxq_signal(&key, 1); 3394 } 3395 umtxq_unbusy(&key); 3396 umtxq_unlock(&key); 3397 umtx_key_release(&key); 3398 return (error); 3399 } 3400 3401 inline int 3402 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3403 { 3404 int error; 3405 3406 error = copyin(addr, tsp, sizeof(struct timespec)); 3407 if (error == 0) { 3408 if (tsp->tv_sec < 0 || 3409 tsp->tv_nsec >= 1000000000 || 3410 tsp->tv_nsec < 0) 3411 error = EINVAL; 3412 } 3413 return (error); 3414 } 3415 3416 static inline int 3417 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3418 { 3419 int error; 3420 3421 if (size <= sizeof(struct timespec)) { 3422 tp->_clockid = CLOCK_REALTIME; 3423 tp->_flags = 0; 3424 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3425 } else 3426 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3427 if (error != 0) 3428 return (error); 3429 if (tp->_timeout.tv_sec < 0 || 3430 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3431 return (EINVAL); 3432 return (0); 3433 } 3434 3435 static int 3436 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3437 { 3438 3439 return (EOPNOTSUPP); 3440 } 3441 3442 static int 3443 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3444 { 3445 struct _umtx_time timeout, *tm_p; 3446 int error; 3447 3448 if (uap->uaddr2 == NULL) 3449 tm_p = NULL; 3450 else { 3451 error = umtx_copyin_umtx_time( 3452 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3453 if (error != 0) 3454 return (error); 3455 tm_p = &timeout; 3456 } 3457 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3458 } 3459 3460 static int 3461 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3462 { 3463 struct _umtx_time timeout, *tm_p; 3464 int error; 3465 3466 if (uap->uaddr2 == NULL) 3467 tm_p = NULL; 3468 else { 3469 error = umtx_copyin_umtx_time( 3470 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3471 if (error != 0) 3472 return (error); 3473 tm_p = &timeout; 3474 } 3475 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3476 } 3477 3478 static int 3479 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3480 { 3481 struct _umtx_time *tm_p, timeout; 3482 int error; 3483 3484 if (uap->uaddr2 == NULL) 3485 tm_p = NULL; 3486 else { 3487 error = umtx_copyin_umtx_time( 3488 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3489 if (error != 0) 3490 return (error); 3491 tm_p = &timeout; 3492 } 3493 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3494 } 3495 3496 static int 3497 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3498 { 3499 3500 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3501 } 3502 3503 #define BATCH_SIZE 128 3504 static int 3505 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3506 { 3507 char *uaddrs[BATCH_SIZE], **upp; 3508 int count, error, i, pos, tocopy; 3509 3510 upp = (char **)uap->obj; 3511 error = 0; 3512 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3513 pos += tocopy) { 3514 tocopy = MIN(count, BATCH_SIZE); 3515 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3516 if (error != 0) 3517 break; 3518 for (i = 0; i < tocopy; ++i) 3519 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3520 maybe_yield(); 3521 } 3522 return (error); 3523 } 3524 3525 static int 3526 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3527 { 3528 3529 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3530 } 3531 3532 static int 3533 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3534 { 3535 struct _umtx_time *tm_p, timeout; 3536 int error; 3537 3538 /* Allow a null timespec (wait forever). */ 3539 if (uap->uaddr2 == NULL) 3540 tm_p = NULL; 3541 else { 3542 error = umtx_copyin_umtx_time( 3543 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3544 if (error != 0) 3545 return (error); 3546 tm_p = &timeout; 3547 } 3548 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3549 } 3550 3551 static int 3552 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3553 { 3554 3555 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3556 } 3557 3558 static int 3559 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3560 { 3561 struct _umtx_time *tm_p, timeout; 3562 int error; 3563 3564 /* Allow a null timespec (wait forever). */ 3565 if (uap->uaddr2 == NULL) 3566 tm_p = NULL; 3567 else { 3568 error = umtx_copyin_umtx_time( 3569 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3570 if (error != 0) 3571 return (error); 3572 tm_p = &timeout; 3573 } 3574 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3575 } 3576 3577 static int 3578 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3579 { 3580 3581 return (do_wake_umutex(td, uap->obj)); 3582 } 3583 3584 static int 3585 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3586 { 3587 3588 return (do_unlock_umutex(td, uap->obj, false)); 3589 } 3590 3591 static int 3592 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3593 { 3594 3595 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3596 } 3597 3598 static int 3599 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3600 { 3601 struct timespec *ts, timeout; 3602 int error; 3603 3604 /* Allow a null timespec (wait forever). */ 3605 if (uap->uaddr2 == NULL) 3606 ts = NULL; 3607 else { 3608 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3609 if (error != 0) 3610 return (error); 3611 ts = &timeout; 3612 } 3613 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3614 } 3615 3616 static int 3617 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3618 { 3619 3620 return (do_cv_signal(td, uap->obj)); 3621 } 3622 3623 static int 3624 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3625 { 3626 3627 return (do_cv_broadcast(td, uap->obj)); 3628 } 3629 3630 static int 3631 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3632 { 3633 struct _umtx_time timeout; 3634 int error; 3635 3636 /* Allow a null timespec (wait forever). */ 3637 if (uap->uaddr2 == NULL) { 3638 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3639 } else { 3640 error = umtx_copyin_umtx_time(uap->uaddr2, 3641 (size_t)uap->uaddr1, &timeout); 3642 if (error != 0) 3643 return (error); 3644 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3645 } 3646 return (error); 3647 } 3648 3649 static int 3650 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3651 { 3652 struct _umtx_time timeout; 3653 int error; 3654 3655 /* Allow a null timespec (wait forever). */ 3656 if (uap->uaddr2 == NULL) { 3657 error = do_rw_wrlock(td, uap->obj, 0); 3658 } else { 3659 error = umtx_copyin_umtx_time(uap->uaddr2, 3660 (size_t)uap->uaddr1, &timeout); 3661 if (error != 0) 3662 return (error); 3663 3664 error = do_rw_wrlock(td, uap->obj, &timeout); 3665 } 3666 return (error); 3667 } 3668 3669 static int 3670 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3671 { 3672 3673 return (do_rw_unlock(td, uap->obj)); 3674 } 3675 3676 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3677 static int 3678 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3679 { 3680 struct _umtx_time *tm_p, timeout; 3681 int error; 3682 3683 /* Allow a null timespec (wait forever). */ 3684 if (uap->uaddr2 == NULL) 3685 tm_p = NULL; 3686 else { 3687 error = umtx_copyin_umtx_time( 3688 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3689 if (error != 0) 3690 return (error); 3691 tm_p = &timeout; 3692 } 3693 return (do_sem_wait(td, uap->obj, tm_p)); 3694 } 3695 3696 static int 3697 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3698 { 3699 3700 return (do_sem_wake(td, uap->obj)); 3701 } 3702 #endif 3703 3704 static int 3705 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3706 { 3707 3708 return (do_wake2_umutex(td, uap->obj, uap->val)); 3709 } 3710 3711 static int 3712 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3713 { 3714 struct _umtx_time *tm_p, timeout; 3715 size_t uasize; 3716 int error; 3717 3718 /* Allow a null timespec (wait forever). */ 3719 if (uap->uaddr2 == NULL) { 3720 uasize = 0; 3721 tm_p = NULL; 3722 } else { 3723 uasize = (size_t)uap->uaddr1; 3724 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3725 if (error != 0) 3726 return (error); 3727 tm_p = &timeout; 3728 } 3729 error = do_sem2_wait(td, uap->obj, tm_p); 3730 if (error == EINTR && uap->uaddr2 != NULL && 3731 (timeout._flags & UMTX_ABSTIME) == 0 && 3732 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3733 error = copyout(&timeout._timeout, 3734 (struct _umtx_time *)uap->uaddr2 + 1, 3735 sizeof(struct timespec)); 3736 if (error == 0) { 3737 error = EINTR; 3738 } 3739 } 3740 3741 return (error); 3742 } 3743 3744 static int 3745 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3746 { 3747 3748 return (do_sem2_wake(td, uap->obj)); 3749 } 3750 3751 #define USHM_OBJ_UMTX(o) \ 3752 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3753 3754 #define USHMF_REG_LINKED 0x0001 3755 #define USHMF_OBJ_LINKED 0x0002 3756 struct umtx_shm_reg { 3757 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3758 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3759 struct umtx_key ushm_key; 3760 struct ucred *ushm_cred; 3761 struct shmfd *ushm_obj; 3762 u_int ushm_refcnt; 3763 u_int ushm_flags; 3764 }; 3765 3766 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3767 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3768 3769 static uma_zone_t umtx_shm_reg_zone; 3770 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3771 static struct mtx umtx_shm_lock; 3772 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3773 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3774 3775 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3776 3777 static void 3778 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3779 { 3780 struct umtx_shm_reg_head d; 3781 struct umtx_shm_reg *reg, *reg1; 3782 3783 TAILQ_INIT(&d); 3784 mtx_lock(&umtx_shm_lock); 3785 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3786 mtx_unlock(&umtx_shm_lock); 3787 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3788 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3789 umtx_shm_free_reg(reg); 3790 } 3791 } 3792 3793 static struct task umtx_shm_reg_delfree_task = 3794 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3795 3796 static struct umtx_shm_reg * 3797 umtx_shm_find_reg_locked(const struct umtx_key *key) 3798 { 3799 struct umtx_shm_reg *reg; 3800 struct umtx_shm_reg_head *reg_head; 3801 3802 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3803 mtx_assert(&umtx_shm_lock, MA_OWNED); 3804 reg_head = &umtx_shm_registry[key->hash]; 3805 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3806 KASSERT(reg->ushm_key.shared, 3807 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3808 if (reg->ushm_key.info.shared.object == 3809 key->info.shared.object && 3810 reg->ushm_key.info.shared.offset == 3811 key->info.shared.offset) { 3812 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3813 KASSERT(reg->ushm_refcnt > 0, 3814 ("reg %p refcnt 0 onlist", reg)); 3815 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3816 ("reg %p not linked", reg)); 3817 reg->ushm_refcnt++; 3818 return (reg); 3819 } 3820 } 3821 return (NULL); 3822 } 3823 3824 static struct umtx_shm_reg * 3825 umtx_shm_find_reg(const struct umtx_key *key) 3826 { 3827 struct umtx_shm_reg *reg; 3828 3829 mtx_lock(&umtx_shm_lock); 3830 reg = umtx_shm_find_reg_locked(key); 3831 mtx_unlock(&umtx_shm_lock); 3832 return (reg); 3833 } 3834 3835 static void 3836 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3837 { 3838 3839 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3840 crfree(reg->ushm_cred); 3841 shm_drop(reg->ushm_obj); 3842 uma_zfree(umtx_shm_reg_zone, reg); 3843 } 3844 3845 static bool 3846 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3847 { 3848 bool res; 3849 3850 mtx_assert(&umtx_shm_lock, MA_OWNED); 3851 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3852 reg->ushm_refcnt--; 3853 res = reg->ushm_refcnt == 0; 3854 if (res || force) { 3855 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3856 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3857 reg, ushm_reg_link); 3858 reg->ushm_flags &= ~USHMF_REG_LINKED; 3859 } 3860 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3861 LIST_REMOVE(reg, ushm_obj_link); 3862 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3863 } 3864 } 3865 return (res); 3866 } 3867 3868 static void 3869 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3870 { 3871 vm_object_t object; 3872 bool dofree; 3873 3874 if (force) { 3875 object = reg->ushm_obj->shm_object; 3876 VM_OBJECT_WLOCK(object); 3877 object->flags |= OBJ_UMTXDEAD; 3878 VM_OBJECT_WUNLOCK(object); 3879 } 3880 mtx_lock(&umtx_shm_lock); 3881 dofree = umtx_shm_unref_reg_locked(reg, force); 3882 mtx_unlock(&umtx_shm_lock); 3883 if (dofree) 3884 umtx_shm_free_reg(reg); 3885 } 3886 3887 void 3888 umtx_shm_object_init(vm_object_t object) 3889 { 3890 3891 LIST_INIT(USHM_OBJ_UMTX(object)); 3892 } 3893 3894 void 3895 umtx_shm_object_terminated(vm_object_t object) 3896 { 3897 struct umtx_shm_reg *reg, *reg1; 3898 bool dofree; 3899 3900 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3901 return; 3902 3903 dofree = false; 3904 mtx_lock(&umtx_shm_lock); 3905 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3906 if (umtx_shm_unref_reg_locked(reg, true)) { 3907 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3908 ushm_reg_link); 3909 dofree = true; 3910 } 3911 } 3912 mtx_unlock(&umtx_shm_lock); 3913 if (dofree) 3914 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3915 } 3916 3917 static int 3918 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3919 struct umtx_shm_reg **res) 3920 { 3921 struct umtx_shm_reg *reg, *reg1; 3922 struct ucred *cred; 3923 int error; 3924 3925 reg = umtx_shm_find_reg(key); 3926 if (reg != NULL) { 3927 *res = reg; 3928 return (0); 3929 } 3930 cred = td->td_ucred; 3931 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3932 return (ENOMEM); 3933 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3934 reg->ushm_refcnt = 1; 3935 bcopy(key, ®->ushm_key, sizeof(*key)); 3936 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 3937 reg->ushm_cred = crhold(cred); 3938 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3939 if (error != 0) { 3940 umtx_shm_free_reg(reg); 3941 return (error); 3942 } 3943 mtx_lock(&umtx_shm_lock); 3944 reg1 = umtx_shm_find_reg_locked(key); 3945 if (reg1 != NULL) { 3946 mtx_unlock(&umtx_shm_lock); 3947 umtx_shm_free_reg(reg); 3948 *res = reg1; 3949 return (0); 3950 } 3951 reg->ushm_refcnt++; 3952 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3953 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3954 ushm_obj_link); 3955 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3956 mtx_unlock(&umtx_shm_lock); 3957 *res = reg; 3958 return (0); 3959 } 3960 3961 static int 3962 umtx_shm_alive(struct thread *td, void *addr) 3963 { 3964 vm_map_t map; 3965 vm_map_entry_t entry; 3966 vm_object_t object; 3967 vm_pindex_t pindex; 3968 vm_prot_t prot; 3969 int res, ret; 3970 boolean_t wired; 3971 3972 map = &td->td_proc->p_vmspace->vm_map; 3973 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3974 &object, &pindex, &prot, &wired); 3975 if (res != KERN_SUCCESS) 3976 return (EFAULT); 3977 if (object == NULL) 3978 ret = EINVAL; 3979 else 3980 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3981 vm_map_lookup_done(map, entry); 3982 return (ret); 3983 } 3984 3985 static void 3986 umtx_shm_init(void) 3987 { 3988 int i; 3989 3990 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3991 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3992 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3993 for (i = 0; i < nitems(umtx_shm_registry); i++) 3994 TAILQ_INIT(&umtx_shm_registry[i]); 3995 } 3996 3997 static int 3998 umtx_shm(struct thread *td, void *addr, u_int flags) 3999 { 4000 struct umtx_key key; 4001 struct umtx_shm_reg *reg; 4002 struct file *fp; 4003 int error, fd; 4004 4005 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4006 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4007 return (EINVAL); 4008 if ((flags & UMTX_SHM_ALIVE) != 0) 4009 return (umtx_shm_alive(td, addr)); 4010 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4011 if (error != 0) 4012 return (error); 4013 KASSERT(key.shared == 1, ("non-shared key")); 4014 if ((flags & UMTX_SHM_CREAT) != 0) { 4015 error = umtx_shm_create_reg(td, &key, ®); 4016 } else { 4017 reg = umtx_shm_find_reg(&key); 4018 if (reg == NULL) 4019 error = ESRCH; 4020 } 4021 umtx_key_release(&key); 4022 if (error != 0) 4023 return (error); 4024 KASSERT(reg != NULL, ("no reg")); 4025 if ((flags & UMTX_SHM_DESTROY) != 0) { 4026 umtx_shm_unref_reg(reg, true); 4027 } else { 4028 #if 0 4029 #ifdef MAC 4030 error = mac_posixshm_check_open(td->td_ucred, 4031 reg->ushm_obj, FFLAGS(O_RDWR)); 4032 if (error == 0) 4033 #endif 4034 error = shm_access(reg->ushm_obj, td->td_ucred, 4035 FFLAGS(O_RDWR)); 4036 if (error == 0) 4037 #endif 4038 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4039 if (error == 0) { 4040 shm_hold(reg->ushm_obj); 4041 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4042 &shm_ops); 4043 td->td_retval[0] = fd; 4044 fdrop(fp, td); 4045 } 4046 } 4047 umtx_shm_unref_reg(reg, false); 4048 return (error); 4049 } 4050 4051 static int 4052 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 4053 { 4054 4055 return (umtx_shm(td, uap->uaddr1, uap->val)); 4056 } 4057 4058 static int 4059 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 4060 { 4061 4062 td->td_rb_list = rbp->robust_list_offset; 4063 td->td_rbp_list = rbp->robust_priv_list_offset; 4064 td->td_rb_inact = rbp->robust_inact_offset; 4065 return (0); 4066 } 4067 4068 static int 4069 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 4070 { 4071 struct umtx_robust_lists_params rb; 4072 int error; 4073 4074 if (uap->val > sizeof(rb)) 4075 return (EINVAL); 4076 bzero(&rb, sizeof(rb)); 4077 error = copyin(uap->uaddr1, &rb, uap->val); 4078 if (error != 0) 4079 return (error); 4080 return (umtx_robust_lists(td, &rb)); 4081 } 4082 4083 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 4084 4085 static const _umtx_op_func op_table[] = { 4086 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4087 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4088 [UMTX_OP_WAIT] = __umtx_op_wait, 4089 [UMTX_OP_WAKE] = __umtx_op_wake, 4090 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4091 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4092 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4093 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4094 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4095 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4096 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4097 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4098 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4099 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4100 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4101 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4102 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4103 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4104 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4105 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4106 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4107 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4108 #else 4109 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4110 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4111 #endif 4112 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4113 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4114 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4115 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4116 [UMTX_OP_SHM] = __umtx_op_shm, 4117 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4118 }; 4119 4120 int 4121 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4122 { 4123 4124 if ((unsigned)uap->op < nitems(op_table)) 4125 return (*op_table[uap->op])(td, uap); 4126 return (EINVAL); 4127 } 4128 4129 #ifdef COMPAT_FREEBSD32 4130 4131 struct timespec32 { 4132 int32_t tv_sec; 4133 int32_t tv_nsec; 4134 }; 4135 4136 struct umtx_time32 { 4137 struct timespec32 timeout; 4138 uint32_t flags; 4139 uint32_t clockid; 4140 }; 4141 4142 static inline int 4143 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4144 { 4145 struct timespec32 ts32; 4146 int error; 4147 4148 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4149 if (error == 0) { 4150 if (ts32.tv_sec < 0 || 4151 ts32.tv_nsec >= 1000000000 || 4152 ts32.tv_nsec < 0) 4153 error = EINVAL; 4154 else { 4155 tsp->tv_sec = ts32.tv_sec; 4156 tsp->tv_nsec = ts32.tv_nsec; 4157 } 4158 } 4159 return (error); 4160 } 4161 4162 static inline int 4163 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4164 { 4165 struct umtx_time32 t32; 4166 int error; 4167 4168 t32.clockid = CLOCK_REALTIME; 4169 t32.flags = 0; 4170 if (size <= sizeof(struct timespec32)) 4171 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4172 else 4173 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4174 if (error != 0) 4175 return (error); 4176 if (t32.timeout.tv_sec < 0 || 4177 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4178 return (EINVAL); 4179 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4180 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4181 tp->_flags = t32.flags; 4182 tp->_clockid = t32.clockid; 4183 return (0); 4184 } 4185 4186 static int 4187 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4188 { 4189 struct _umtx_time *tm_p, timeout; 4190 int error; 4191 4192 if (uap->uaddr2 == NULL) 4193 tm_p = NULL; 4194 else { 4195 error = umtx_copyin_umtx_time32(uap->uaddr2, 4196 (size_t)uap->uaddr1, &timeout); 4197 if (error != 0) 4198 return (error); 4199 tm_p = &timeout; 4200 } 4201 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4202 } 4203 4204 static int 4205 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4206 { 4207 struct _umtx_time *tm_p, timeout; 4208 int error; 4209 4210 /* Allow a null timespec (wait forever). */ 4211 if (uap->uaddr2 == NULL) 4212 tm_p = NULL; 4213 else { 4214 error = umtx_copyin_umtx_time32(uap->uaddr2, 4215 (size_t)uap->uaddr1, &timeout); 4216 if (error != 0) 4217 return (error); 4218 tm_p = &timeout; 4219 } 4220 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4221 } 4222 4223 static int 4224 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4225 { 4226 struct _umtx_time *tm_p, timeout; 4227 int error; 4228 4229 /* Allow a null timespec (wait forever). */ 4230 if (uap->uaddr2 == NULL) 4231 tm_p = NULL; 4232 else { 4233 error = umtx_copyin_umtx_time32(uap->uaddr2, 4234 (size_t)uap->uaddr1, &timeout); 4235 if (error != 0) 4236 return (error); 4237 tm_p = &timeout; 4238 } 4239 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4240 } 4241 4242 static int 4243 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4244 { 4245 struct timespec *ts, timeout; 4246 int error; 4247 4248 /* Allow a null timespec (wait forever). */ 4249 if (uap->uaddr2 == NULL) 4250 ts = NULL; 4251 else { 4252 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4253 if (error != 0) 4254 return (error); 4255 ts = &timeout; 4256 } 4257 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4258 } 4259 4260 static int 4261 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4262 { 4263 struct _umtx_time timeout; 4264 int error; 4265 4266 /* Allow a null timespec (wait forever). */ 4267 if (uap->uaddr2 == NULL) { 4268 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4269 } else { 4270 error = umtx_copyin_umtx_time32(uap->uaddr2, 4271 (size_t)uap->uaddr1, &timeout); 4272 if (error != 0) 4273 return (error); 4274 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4275 } 4276 return (error); 4277 } 4278 4279 static int 4280 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4281 { 4282 struct _umtx_time timeout; 4283 int error; 4284 4285 /* Allow a null timespec (wait forever). */ 4286 if (uap->uaddr2 == NULL) { 4287 error = do_rw_wrlock(td, uap->obj, 0); 4288 } else { 4289 error = umtx_copyin_umtx_time32(uap->uaddr2, 4290 (size_t)uap->uaddr1, &timeout); 4291 if (error != 0) 4292 return (error); 4293 error = do_rw_wrlock(td, uap->obj, &timeout); 4294 } 4295 return (error); 4296 } 4297 4298 static int 4299 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4300 { 4301 struct _umtx_time *tm_p, timeout; 4302 int error; 4303 4304 if (uap->uaddr2 == NULL) 4305 tm_p = NULL; 4306 else { 4307 error = umtx_copyin_umtx_time32( 4308 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4309 if (error != 0) 4310 return (error); 4311 tm_p = &timeout; 4312 } 4313 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4314 } 4315 4316 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4317 static int 4318 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4319 { 4320 struct _umtx_time *tm_p, timeout; 4321 int error; 4322 4323 /* Allow a null timespec (wait forever). */ 4324 if (uap->uaddr2 == NULL) 4325 tm_p = NULL; 4326 else { 4327 error = umtx_copyin_umtx_time32(uap->uaddr2, 4328 (size_t)uap->uaddr1, &timeout); 4329 if (error != 0) 4330 return (error); 4331 tm_p = &timeout; 4332 } 4333 return (do_sem_wait(td, uap->obj, tm_p)); 4334 } 4335 #endif 4336 4337 static int 4338 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4339 { 4340 struct _umtx_time *tm_p, timeout; 4341 size_t uasize; 4342 int error; 4343 4344 /* Allow a null timespec (wait forever). */ 4345 if (uap->uaddr2 == NULL) { 4346 uasize = 0; 4347 tm_p = NULL; 4348 } else { 4349 uasize = (size_t)uap->uaddr1; 4350 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4351 if (error != 0) 4352 return (error); 4353 tm_p = &timeout; 4354 } 4355 error = do_sem2_wait(td, uap->obj, tm_p); 4356 if (error == EINTR && uap->uaddr2 != NULL && 4357 (timeout._flags & UMTX_ABSTIME) == 0 && 4358 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4359 struct timespec32 remain32 = { 4360 .tv_sec = timeout._timeout.tv_sec, 4361 .tv_nsec = timeout._timeout.tv_nsec 4362 }; 4363 error = copyout(&remain32, 4364 (struct umtx_time32 *)uap->uaddr2 + 1, 4365 sizeof(struct timespec32)); 4366 if (error == 0) { 4367 error = EINTR; 4368 } 4369 } 4370 4371 return (error); 4372 } 4373 4374 static int 4375 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4376 { 4377 uint32_t uaddrs[BATCH_SIZE], **upp; 4378 int count, error, i, pos, tocopy; 4379 4380 upp = (uint32_t **)uap->obj; 4381 error = 0; 4382 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4383 pos += tocopy) { 4384 tocopy = MIN(count, BATCH_SIZE); 4385 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4386 if (error != 0) 4387 break; 4388 for (i = 0; i < tocopy; ++i) 4389 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4390 INT_MAX, 1); 4391 maybe_yield(); 4392 } 4393 return (error); 4394 } 4395 4396 struct umtx_robust_lists_params_compat32 { 4397 uint32_t robust_list_offset; 4398 uint32_t robust_priv_list_offset; 4399 uint32_t robust_inact_offset; 4400 }; 4401 4402 static int 4403 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4404 { 4405 struct umtx_robust_lists_params rb; 4406 struct umtx_robust_lists_params_compat32 rb32; 4407 int error; 4408 4409 if (uap->val > sizeof(rb32)) 4410 return (EINVAL); 4411 bzero(&rb, sizeof(rb)); 4412 bzero(&rb32, sizeof(rb32)); 4413 error = copyin(uap->uaddr1, &rb32, uap->val); 4414 if (error != 0) 4415 return (error); 4416 rb.robust_list_offset = rb32.robust_list_offset; 4417 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4418 rb.robust_inact_offset = rb32.robust_inact_offset; 4419 return (umtx_robust_lists(td, &rb)); 4420 } 4421 4422 static const _umtx_op_func op_table_compat32[] = { 4423 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4424 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4425 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4426 [UMTX_OP_WAKE] = __umtx_op_wake, 4427 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4428 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4429 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4430 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4431 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4432 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4433 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4434 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4435 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4436 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4437 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4438 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4439 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4440 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4441 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4442 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4443 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4444 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4445 #else 4446 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4447 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4448 #endif 4449 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4450 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4451 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4452 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4453 [UMTX_OP_SHM] = __umtx_op_shm, 4454 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4455 }; 4456 4457 int 4458 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4459 { 4460 4461 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4462 return (*op_table_compat32[uap->op])(td, 4463 (struct _umtx_op_args *)uap); 4464 } 4465 return (EINVAL); 4466 } 4467 #endif 4468 4469 void 4470 umtx_thread_init(struct thread *td) 4471 { 4472 4473 td->td_umtxq = umtxq_alloc(); 4474 td->td_umtxq->uq_thread = td; 4475 } 4476 4477 void 4478 umtx_thread_fini(struct thread *td) 4479 { 4480 4481 umtxq_free(td->td_umtxq); 4482 } 4483 4484 /* 4485 * It will be called when new thread is created, e.g fork(). 4486 */ 4487 void 4488 umtx_thread_alloc(struct thread *td) 4489 { 4490 struct umtx_q *uq; 4491 4492 uq = td->td_umtxq; 4493 uq->uq_inherited_pri = PRI_MAX; 4494 4495 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4496 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4497 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4498 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4499 } 4500 4501 /* 4502 * exec() hook. 4503 * 4504 * Clear robust lists for all process' threads, not delaying the 4505 * cleanup to thread_exit hook, since the relevant address space is 4506 * destroyed right now. 4507 */ 4508 static void 4509 umtx_exec_hook(void *arg __unused, struct proc *p, 4510 struct image_params *imgp __unused) 4511 { 4512 struct thread *td; 4513 4514 KASSERT(p == curproc, ("need curproc")); 4515 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4516 (p->p_flag & P_STOPPED_SINGLE) != 0, 4517 ("curproc must be single-threaded")); 4518 /* 4519 * There is no need to lock the list as only this thread can be 4520 * running. 4521 */ 4522 FOREACH_THREAD_IN_PROC(p, td) { 4523 KASSERT(td == curthread || 4524 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4525 ("running thread %p %p", p, td)); 4526 umtx_thread_cleanup(td); 4527 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4528 } 4529 } 4530 4531 /* 4532 * thread_exit() hook. 4533 */ 4534 void 4535 umtx_thread_exit(struct thread *td) 4536 { 4537 4538 umtx_thread_cleanup(td); 4539 } 4540 4541 static int 4542 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4543 { 4544 u_long res1; 4545 #ifdef COMPAT_FREEBSD32 4546 uint32_t res32; 4547 #endif 4548 int error; 4549 4550 #ifdef COMPAT_FREEBSD32 4551 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4552 error = fueword32((void *)ptr, &res32); 4553 if (error == 0) 4554 res1 = res32; 4555 } else 4556 #endif 4557 { 4558 error = fueword((void *)ptr, &res1); 4559 } 4560 if (error == 0) 4561 *res = res1; 4562 else 4563 error = EFAULT; 4564 return (error); 4565 } 4566 4567 static void 4568 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4569 { 4570 #ifdef COMPAT_FREEBSD32 4571 struct umutex32 m32; 4572 4573 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4574 memcpy(&m32, m, sizeof(m32)); 4575 *rb_list = m32.m_rb_lnk; 4576 } else 4577 #endif 4578 *rb_list = m->m_rb_lnk; 4579 } 4580 4581 static int 4582 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4583 { 4584 struct umutex m; 4585 int error; 4586 4587 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4588 error = copyin((void *)rbp, &m, sizeof(m)); 4589 if (error != 0) 4590 return (error); 4591 if (rb_list != NULL) 4592 umtx_read_rb_list(td, &m, rb_list); 4593 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4594 return (EINVAL); 4595 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4596 /* inact is cleared after unlock, allow the inconsistency */ 4597 return (inact ? 0 : EINVAL); 4598 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4599 } 4600 4601 static void 4602 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4603 const char *name) 4604 { 4605 int error, i; 4606 uintptr_t rbp; 4607 bool inact; 4608 4609 if (rb_list == 0) 4610 return; 4611 error = umtx_read_uptr(td, rb_list, &rbp); 4612 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4613 if (rbp == *rb_inact) { 4614 inact = true; 4615 *rb_inact = 0; 4616 } else 4617 inact = false; 4618 error = umtx_handle_rb(td, rbp, &rbp, inact); 4619 } 4620 if (i == umtx_max_rb && umtx_verbose_rb) { 4621 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4622 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4623 } 4624 if (error != 0 && umtx_verbose_rb) { 4625 uprintf("comm %s pid %d: handling %srb error %d\n", 4626 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4627 } 4628 } 4629 4630 /* 4631 * Clean up umtx data. 4632 */ 4633 static void 4634 umtx_thread_cleanup(struct thread *td) 4635 { 4636 struct umtx_q *uq; 4637 struct umtx_pi *pi; 4638 uintptr_t rb_inact; 4639 4640 /* 4641 * Disown pi mutexes. 4642 */ 4643 uq = td->td_umtxq; 4644 if (uq != NULL) { 4645 if (uq->uq_inherited_pri != PRI_MAX || 4646 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4647 mtx_lock(&umtx_lock); 4648 uq->uq_inherited_pri = PRI_MAX; 4649 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4650 pi->pi_owner = NULL; 4651 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4652 } 4653 mtx_unlock(&umtx_lock); 4654 } 4655 sched_lend_user_prio_cond(td, PRI_MAX); 4656 } 4657 4658 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4659 return; 4660 4661 /* 4662 * Handle terminated robust mutexes. Must be done after 4663 * robust pi disown, otherwise unlock could see unowned 4664 * entries. 4665 */ 4666 rb_inact = td->td_rb_inact; 4667 if (rb_inact != 0) 4668 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4669 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4670 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4671 if (rb_inact != 0) 4672 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4673 } 4674