1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_queue *uh; 666 667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 669 if (uh != NULL) 670 return (uh->length); 671 return (0); 672 } 673 674 /* 675 * Check if there are multiple PI waiters and returns first 676 * waiter. 677 */ 678 static int 679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 680 { 681 struct umtxq_queue *uh; 682 683 *first = NULL; 684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 686 if (uh != NULL) { 687 *first = TAILQ_FIRST(&uh->head); 688 return (uh->length); 689 } 690 return (0); 691 } 692 693 static int 694 umtxq_check_susp(struct thread *td) 695 { 696 struct proc *p; 697 int error; 698 699 /* 700 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 701 * eventually break the lockstep loop. 702 */ 703 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 704 return (0); 705 error = 0; 706 p = td->td_proc; 707 PROC_LOCK(p); 708 if (P_SHOULDSTOP(p) || 709 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 710 if (p->p_flag & P_SINGLE_EXIT) 711 error = EINTR; 712 else 713 error = ERESTART; 714 } 715 PROC_UNLOCK(p); 716 return (error); 717 } 718 719 /* 720 * Wake up threads waiting on an userland object. 721 */ 722 723 static int 724 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 725 { 726 struct umtxq_queue *uh; 727 struct umtx_q *uq; 728 int ret; 729 730 ret = 0; 731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 732 uh = umtxq_queue_lookup(key, q); 733 if (uh != NULL) { 734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 735 umtxq_remove_queue(uq, q); 736 wakeup(uq); 737 if (++ret >= n_wake) 738 return (ret); 739 } 740 } 741 return (ret); 742 } 743 744 745 /* 746 * Wake up specified thread. 747 */ 748 static inline void 749 umtxq_signal_thread(struct umtx_q *uq) 750 { 751 752 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 753 umtxq_remove(uq); 754 wakeup(uq); 755 } 756 757 static inline int 758 tstohz(const struct timespec *tsp) 759 { 760 struct timeval tv; 761 762 TIMESPEC_TO_TIMEVAL(&tv, tsp); 763 return tvtohz(&tv); 764 } 765 766 static void 767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 768 const struct timespec *timeout) 769 { 770 771 timo->clockid = clockid; 772 if (!absolute) { 773 timo->is_abs_real = false; 774 abs_timeout_update(timo); 775 timespecadd(&timo->cur, timeout, &timo->end); 776 } else { 777 timo->end = *timeout; 778 timo->is_abs_real = clockid == CLOCK_REALTIME || 779 clockid == CLOCK_REALTIME_FAST || 780 clockid == CLOCK_REALTIME_PRECISE; 781 /* 782 * If is_abs_real, umtxq_sleep will read the clock 783 * after setting td_rtcgen; otherwise, read it here. 784 */ 785 if (!timo->is_abs_real) { 786 abs_timeout_update(timo); 787 } 788 } 789 } 790 791 static void 792 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 793 { 794 795 abs_timeout_init(timo, umtxtime->_clockid, 796 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 797 } 798 799 static inline void 800 abs_timeout_update(struct abs_timeout *timo) 801 { 802 803 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 804 } 805 806 static int 807 abs_timeout_gethz(struct abs_timeout *timo) 808 { 809 struct timespec tts; 810 811 if (timespeccmp(&timo->end, &timo->cur, <=)) 812 return (-1); 813 timespecsub(&timo->end, &timo->cur, &tts); 814 return (tstohz(&tts)); 815 } 816 817 static uint32_t 818 umtx_unlock_val(uint32_t flags, bool rb) 819 { 820 821 if (rb) 822 return (UMUTEX_RB_OWNERDEAD); 823 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 824 return (UMUTEX_RB_NOTRECOV); 825 else 826 return (UMUTEX_UNOWNED); 827 828 } 829 830 /* 831 * Put thread into sleep state, before sleeping, check if 832 * thread was removed from umtx queue. 833 */ 834 static inline int 835 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 836 { 837 struct umtxq_chain *uc; 838 int error, timo; 839 840 if (abstime != NULL && abstime->is_abs_real) { 841 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 842 abs_timeout_update(abstime); 843 } 844 845 uc = umtxq_getchain(&uq->uq_key); 846 UMTXQ_LOCKED_ASSERT(uc); 847 for (;;) { 848 if (!(uq->uq_flags & UQF_UMTXQ)) { 849 error = 0; 850 break; 851 } 852 if (abstime != NULL) { 853 timo = abs_timeout_gethz(abstime); 854 if (timo < 0) { 855 error = ETIMEDOUT; 856 break; 857 } 858 } else 859 timo = 0; 860 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 861 if (error == EINTR || error == ERESTART) { 862 umtxq_lock(&uq->uq_key); 863 break; 864 } 865 if (abstime != NULL) { 866 if (abstime->is_abs_real) 867 curthread->td_rtcgen = 868 atomic_load_acq_int(&rtc_generation); 869 abs_timeout_update(abstime); 870 } 871 umtxq_lock(&uq->uq_key); 872 } 873 874 curthread->td_rtcgen = 0; 875 return (error); 876 } 877 878 /* 879 * Convert userspace address into unique logical address. 880 */ 881 int 882 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 883 { 884 struct thread *td = curthread; 885 vm_map_t map; 886 vm_map_entry_t entry; 887 vm_pindex_t pindex; 888 vm_prot_t prot; 889 boolean_t wired; 890 891 key->type = type; 892 if (share == THREAD_SHARE) { 893 key->shared = 0; 894 key->info.private.vs = td->td_proc->p_vmspace; 895 key->info.private.addr = (uintptr_t)addr; 896 } else { 897 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 898 map = &td->td_proc->p_vmspace->vm_map; 899 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 900 &entry, &key->info.shared.object, &pindex, &prot, 901 &wired) != KERN_SUCCESS) { 902 return (EFAULT); 903 } 904 905 if ((share == PROCESS_SHARE) || 906 (share == AUTO_SHARE && 907 VM_INHERIT_SHARE == entry->inheritance)) { 908 key->shared = 1; 909 key->info.shared.offset = (vm_offset_t)addr - 910 entry->start + entry->offset; 911 vm_object_reference(key->info.shared.object); 912 } else { 913 key->shared = 0; 914 key->info.private.vs = td->td_proc->p_vmspace; 915 key->info.private.addr = (uintptr_t)addr; 916 } 917 vm_map_lookup_done(map, entry); 918 } 919 920 umtxq_hash(key); 921 return (0); 922 } 923 924 /* 925 * Release key. 926 */ 927 void 928 umtx_key_release(struct umtx_key *key) 929 { 930 if (key->shared) 931 vm_object_deallocate(key->info.shared.object); 932 } 933 934 /* 935 * Fetch and compare value, sleep on the address if value is not changed. 936 */ 937 static int 938 do_wait(struct thread *td, void *addr, u_long id, 939 struct _umtx_time *timeout, int compat32, int is_private) 940 { 941 struct abs_timeout timo; 942 struct umtx_q *uq; 943 u_long tmp; 944 uint32_t tmp32; 945 int error = 0; 946 947 uq = td->td_umtxq; 948 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 949 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 950 return (error); 951 952 if (timeout != NULL) 953 abs_timeout_init2(&timo, timeout); 954 955 umtxq_lock(&uq->uq_key); 956 umtxq_insert(uq); 957 umtxq_unlock(&uq->uq_key); 958 if (compat32 == 0) { 959 error = fueword(addr, &tmp); 960 if (error != 0) 961 error = EFAULT; 962 } else { 963 error = fueword32(addr, &tmp32); 964 if (error == 0) 965 tmp = tmp32; 966 else 967 error = EFAULT; 968 } 969 umtxq_lock(&uq->uq_key); 970 if (error == 0) { 971 if (tmp == id) 972 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 973 NULL : &timo); 974 if ((uq->uq_flags & UQF_UMTXQ) == 0) 975 error = 0; 976 else 977 umtxq_remove(uq); 978 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 979 umtxq_remove(uq); 980 } 981 umtxq_unlock(&uq->uq_key); 982 umtx_key_release(&uq->uq_key); 983 if (error == ERESTART) 984 error = EINTR; 985 return (error); 986 } 987 988 /* 989 * Wake up threads sleeping on the specified address. 990 */ 991 int 992 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 993 { 994 struct umtx_key key; 995 int ret; 996 997 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 998 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 999 return (ret); 1000 umtxq_lock(&key); 1001 umtxq_signal(&key, n_wake); 1002 umtxq_unlock(&key); 1003 umtx_key_release(&key); 1004 return (0); 1005 } 1006 1007 /* 1008 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1009 */ 1010 static int 1011 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1012 struct _umtx_time *timeout, int mode) 1013 { 1014 struct abs_timeout timo; 1015 struct umtx_q *uq; 1016 uint32_t owner, old, id; 1017 int error, rv; 1018 1019 id = td->td_tid; 1020 uq = td->td_umtxq; 1021 error = 0; 1022 if (timeout != NULL) 1023 abs_timeout_init2(&timo, timeout); 1024 1025 /* 1026 * Care must be exercised when dealing with umtx structure. It 1027 * can fault on any access. 1028 */ 1029 for (;;) { 1030 rv = fueword32(&m->m_owner, &owner); 1031 if (rv == -1) 1032 return (EFAULT); 1033 if (mode == _UMUTEX_WAIT) { 1034 if (owner == UMUTEX_UNOWNED || 1035 owner == UMUTEX_CONTESTED || 1036 owner == UMUTEX_RB_OWNERDEAD || 1037 owner == UMUTEX_RB_NOTRECOV) 1038 return (0); 1039 } else { 1040 /* 1041 * Robust mutex terminated. Kernel duty is to 1042 * return EOWNERDEAD to the userspace. The 1043 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1044 * by the common userspace code. 1045 */ 1046 if (owner == UMUTEX_RB_OWNERDEAD) { 1047 rv = casueword32(&m->m_owner, 1048 UMUTEX_RB_OWNERDEAD, &owner, 1049 id | UMUTEX_CONTESTED); 1050 if (rv == -1) 1051 return (EFAULT); 1052 if (owner == UMUTEX_RB_OWNERDEAD) 1053 return (EOWNERDEAD); /* success */ 1054 rv = umtxq_check_susp(td); 1055 if (rv != 0) 1056 return (rv); 1057 continue; 1058 } 1059 if (owner == UMUTEX_RB_NOTRECOV) 1060 return (ENOTRECOVERABLE); 1061 1062 /* 1063 * Try the uncontested case. This should be 1064 * done in userland. 1065 */ 1066 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1067 &owner, id); 1068 /* The address was invalid. */ 1069 if (rv == -1) 1070 return (EFAULT); 1071 1072 /* The acquire succeeded. */ 1073 if (owner == UMUTEX_UNOWNED) 1074 return (0); 1075 1076 /* 1077 * If no one owns it but it is contested try 1078 * to acquire it. 1079 */ 1080 if (owner == UMUTEX_CONTESTED) { 1081 rv = casueword32(&m->m_owner, 1082 UMUTEX_CONTESTED, &owner, 1083 id | UMUTEX_CONTESTED); 1084 /* The address was invalid. */ 1085 if (rv == -1) 1086 return (EFAULT); 1087 1088 if (owner == UMUTEX_CONTESTED) 1089 return (0); 1090 1091 rv = umtxq_check_susp(td); 1092 if (rv != 0) 1093 return (rv); 1094 1095 /* 1096 * If this failed the lock has 1097 * changed, restart. 1098 */ 1099 continue; 1100 } 1101 } 1102 1103 if (mode == _UMUTEX_TRY) 1104 return (EBUSY); 1105 1106 /* 1107 * If we caught a signal, we have retried and now 1108 * exit immediately. 1109 */ 1110 if (error != 0) 1111 return (error); 1112 1113 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1114 GET_SHARE(flags), &uq->uq_key)) != 0) 1115 return (error); 1116 1117 umtxq_lock(&uq->uq_key); 1118 umtxq_busy(&uq->uq_key); 1119 umtxq_insert(uq); 1120 umtxq_unlock(&uq->uq_key); 1121 1122 /* 1123 * Set the contested bit so that a release in user space 1124 * knows to use the system call for unlock. If this fails 1125 * either some one else has acquired the lock or it has been 1126 * released. 1127 */ 1128 rv = casueword32(&m->m_owner, owner, &old, 1129 owner | UMUTEX_CONTESTED); 1130 1131 /* The address was invalid. */ 1132 if (rv == -1) { 1133 umtxq_lock(&uq->uq_key); 1134 umtxq_remove(uq); 1135 umtxq_unbusy(&uq->uq_key); 1136 umtxq_unlock(&uq->uq_key); 1137 umtx_key_release(&uq->uq_key); 1138 return (EFAULT); 1139 } 1140 1141 /* 1142 * We set the contested bit, sleep. Otherwise the lock changed 1143 * and we need to retry or we lost a race to the thread 1144 * unlocking the umtx. 1145 */ 1146 umtxq_lock(&uq->uq_key); 1147 umtxq_unbusy(&uq->uq_key); 1148 if (old == owner) 1149 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1150 NULL : &timo); 1151 umtxq_remove(uq); 1152 umtxq_unlock(&uq->uq_key); 1153 umtx_key_release(&uq->uq_key); 1154 1155 if (error == 0) 1156 error = umtxq_check_susp(td); 1157 } 1158 1159 return (0); 1160 } 1161 1162 /* 1163 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1164 */ 1165 static int 1166 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1167 { 1168 struct umtx_key key; 1169 uint32_t owner, old, id, newlock; 1170 int error, count; 1171 1172 id = td->td_tid; 1173 /* 1174 * Make sure we own this mtx. 1175 */ 1176 error = fueword32(&m->m_owner, &owner); 1177 if (error == -1) 1178 return (EFAULT); 1179 1180 if ((owner & ~UMUTEX_CONTESTED) != id) 1181 return (EPERM); 1182 1183 newlock = umtx_unlock_val(flags, rb); 1184 if ((owner & UMUTEX_CONTESTED) == 0) { 1185 error = casueword32(&m->m_owner, owner, &old, newlock); 1186 if (error == -1) 1187 return (EFAULT); 1188 if (old == owner) 1189 return (0); 1190 owner = old; 1191 } 1192 1193 /* We should only ever be in here for contested locks */ 1194 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1195 &key)) != 0) 1196 return (error); 1197 1198 umtxq_lock(&key); 1199 umtxq_busy(&key); 1200 count = umtxq_count(&key); 1201 umtxq_unlock(&key); 1202 1203 /* 1204 * When unlocking the umtx, it must be marked as unowned if 1205 * there is zero or one thread only waiting for it. 1206 * Otherwise, it must be marked as contested. 1207 */ 1208 if (count > 1) 1209 newlock |= UMUTEX_CONTESTED; 1210 error = casueword32(&m->m_owner, owner, &old, newlock); 1211 umtxq_lock(&key); 1212 umtxq_signal(&key, 1); 1213 umtxq_unbusy(&key); 1214 umtxq_unlock(&key); 1215 umtx_key_release(&key); 1216 if (error == -1) 1217 return (EFAULT); 1218 if (old != owner) 1219 return (EINVAL); 1220 return (0); 1221 } 1222 1223 /* 1224 * Check if the mutex is available and wake up a waiter, 1225 * only for simple mutex. 1226 */ 1227 static int 1228 do_wake_umutex(struct thread *td, struct umutex *m) 1229 { 1230 struct umtx_key key; 1231 uint32_t owner; 1232 uint32_t flags; 1233 int error; 1234 int count; 1235 1236 error = fueword32(&m->m_owner, &owner); 1237 if (error == -1) 1238 return (EFAULT); 1239 1240 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1241 owner != UMUTEX_RB_NOTRECOV) 1242 return (0); 1243 1244 error = fueword32(&m->m_flags, &flags); 1245 if (error == -1) 1246 return (EFAULT); 1247 1248 /* We should only ever be in here for contested locks */ 1249 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1250 &key)) != 0) 1251 return (error); 1252 1253 umtxq_lock(&key); 1254 umtxq_busy(&key); 1255 count = umtxq_count(&key); 1256 umtxq_unlock(&key); 1257 1258 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1259 owner != UMUTEX_RB_NOTRECOV) { 1260 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1261 UMUTEX_UNOWNED); 1262 if (error == -1) 1263 error = EFAULT; 1264 } 1265 1266 umtxq_lock(&key); 1267 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1268 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1269 umtxq_signal(&key, 1); 1270 umtxq_unbusy(&key); 1271 umtxq_unlock(&key); 1272 umtx_key_release(&key); 1273 return (error); 1274 } 1275 1276 /* 1277 * Check if the mutex has waiters and tries to fix contention bit. 1278 */ 1279 static int 1280 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1281 { 1282 struct umtx_key key; 1283 uint32_t owner, old; 1284 int type; 1285 int error; 1286 int count; 1287 1288 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1289 UMUTEX_ROBUST)) { 1290 case 0: 1291 case UMUTEX_ROBUST: 1292 type = TYPE_NORMAL_UMUTEX; 1293 break; 1294 case UMUTEX_PRIO_INHERIT: 1295 type = TYPE_PI_UMUTEX; 1296 break; 1297 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1298 type = TYPE_PI_ROBUST_UMUTEX; 1299 break; 1300 case UMUTEX_PRIO_PROTECT: 1301 type = TYPE_PP_UMUTEX; 1302 break; 1303 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1304 type = TYPE_PP_ROBUST_UMUTEX; 1305 break; 1306 default: 1307 return (EINVAL); 1308 } 1309 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1310 return (error); 1311 1312 owner = 0; 1313 umtxq_lock(&key); 1314 umtxq_busy(&key); 1315 count = umtxq_count(&key); 1316 umtxq_unlock(&key); 1317 /* 1318 * Only repair contention bit if there is a waiter, this means the mutex 1319 * is still being referenced by userland code, otherwise don't update 1320 * any memory. 1321 */ 1322 if (count > 1) { 1323 error = fueword32(&m->m_owner, &owner); 1324 if (error == -1) 1325 error = EFAULT; 1326 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1327 error = casueword32(&m->m_owner, owner, &old, 1328 owner | UMUTEX_CONTESTED); 1329 if (error == -1) { 1330 error = EFAULT; 1331 break; 1332 } 1333 if (old == owner) 1334 break; 1335 owner = old; 1336 error = umtxq_check_susp(td); 1337 if (error != 0) 1338 break; 1339 } 1340 } else if (count == 1) { 1341 error = fueword32(&m->m_owner, &owner); 1342 if (error == -1) 1343 error = EFAULT; 1344 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1345 (owner & UMUTEX_CONTESTED) == 0) { 1346 error = casueword32(&m->m_owner, owner, &old, 1347 owner | UMUTEX_CONTESTED); 1348 if (error == -1) { 1349 error = EFAULT; 1350 break; 1351 } 1352 if (old == owner) 1353 break; 1354 owner = old; 1355 error = umtxq_check_susp(td); 1356 if (error != 0) 1357 break; 1358 } 1359 } 1360 umtxq_lock(&key); 1361 if (error == EFAULT) { 1362 umtxq_signal(&key, INT_MAX); 1363 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1364 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1365 umtxq_signal(&key, 1); 1366 umtxq_unbusy(&key); 1367 umtxq_unlock(&key); 1368 umtx_key_release(&key); 1369 return (error); 1370 } 1371 1372 static inline struct umtx_pi * 1373 umtx_pi_alloc(int flags) 1374 { 1375 struct umtx_pi *pi; 1376 1377 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1378 TAILQ_INIT(&pi->pi_blocked); 1379 atomic_add_int(&umtx_pi_allocated, 1); 1380 return (pi); 1381 } 1382 1383 static inline void 1384 umtx_pi_free(struct umtx_pi *pi) 1385 { 1386 uma_zfree(umtx_pi_zone, pi); 1387 atomic_add_int(&umtx_pi_allocated, -1); 1388 } 1389 1390 /* 1391 * Adjust the thread's position on a pi_state after its priority has been 1392 * changed. 1393 */ 1394 static int 1395 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1396 { 1397 struct umtx_q *uq, *uq1, *uq2; 1398 struct thread *td1; 1399 1400 mtx_assert(&umtx_lock, MA_OWNED); 1401 if (pi == NULL) 1402 return (0); 1403 1404 uq = td->td_umtxq; 1405 1406 /* 1407 * Check if the thread needs to be moved on the blocked chain. 1408 * It needs to be moved if either its priority is lower than 1409 * the previous thread or higher than the next thread. 1410 */ 1411 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1412 uq2 = TAILQ_NEXT(uq, uq_lockq); 1413 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1414 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1415 /* 1416 * Remove thread from blocked chain and determine where 1417 * it should be moved to. 1418 */ 1419 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1420 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1421 td1 = uq1->uq_thread; 1422 MPASS(td1->td_proc->p_magic == P_MAGIC); 1423 if (UPRI(td1) > UPRI(td)) 1424 break; 1425 } 1426 1427 if (uq1 == NULL) 1428 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1429 else 1430 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1431 } 1432 return (1); 1433 } 1434 1435 static struct umtx_pi * 1436 umtx_pi_next(struct umtx_pi *pi) 1437 { 1438 struct umtx_q *uq_owner; 1439 1440 if (pi->pi_owner == NULL) 1441 return (NULL); 1442 uq_owner = pi->pi_owner->td_umtxq; 1443 if (uq_owner == NULL) 1444 return (NULL); 1445 return (uq_owner->uq_pi_blocked); 1446 } 1447 1448 /* 1449 * Floyd's Cycle-Finding Algorithm. 1450 */ 1451 static bool 1452 umtx_pi_check_loop(struct umtx_pi *pi) 1453 { 1454 struct umtx_pi *pi1; /* fast iterator */ 1455 1456 mtx_assert(&umtx_lock, MA_OWNED); 1457 if (pi == NULL) 1458 return (false); 1459 pi1 = pi; 1460 for (;;) { 1461 pi = umtx_pi_next(pi); 1462 if (pi == NULL) 1463 break; 1464 pi1 = umtx_pi_next(pi1); 1465 if (pi1 == NULL) 1466 break; 1467 pi1 = umtx_pi_next(pi1); 1468 if (pi1 == NULL) 1469 break; 1470 if (pi == pi1) 1471 return (true); 1472 } 1473 return (false); 1474 } 1475 1476 /* 1477 * Propagate priority when a thread is blocked on POSIX 1478 * PI mutex. 1479 */ 1480 static void 1481 umtx_propagate_priority(struct thread *td) 1482 { 1483 struct umtx_q *uq; 1484 struct umtx_pi *pi; 1485 int pri; 1486 1487 mtx_assert(&umtx_lock, MA_OWNED); 1488 pri = UPRI(td); 1489 uq = td->td_umtxq; 1490 pi = uq->uq_pi_blocked; 1491 if (pi == NULL) 1492 return; 1493 if (umtx_pi_check_loop(pi)) 1494 return; 1495 1496 for (;;) { 1497 td = pi->pi_owner; 1498 if (td == NULL || td == curthread) 1499 return; 1500 1501 MPASS(td->td_proc != NULL); 1502 MPASS(td->td_proc->p_magic == P_MAGIC); 1503 1504 thread_lock(td); 1505 if (td->td_lend_user_pri > pri) 1506 sched_lend_user_prio(td, pri); 1507 else { 1508 thread_unlock(td); 1509 break; 1510 } 1511 thread_unlock(td); 1512 1513 /* 1514 * Pick up the lock that td is blocked on. 1515 */ 1516 uq = td->td_umtxq; 1517 pi = uq->uq_pi_blocked; 1518 if (pi == NULL) 1519 break; 1520 /* Resort td on the list if needed. */ 1521 umtx_pi_adjust_thread(pi, td); 1522 } 1523 } 1524 1525 /* 1526 * Unpropagate priority for a PI mutex when a thread blocked on 1527 * it is interrupted by signal or resumed by others. 1528 */ 1529 static void 1530 umtx_repropagate_priority(struct umtx_pi *pi) 1531 { 1532 struct umtx_q *uq, *uq_owner; 1533 struct umtx_pi *pi2; 1534 int pri; 1535 1536 mtx_assert(&umtx_lock, MA_OWNED); 1537 1538 if (umtx_pi_check_loop(pi)) 1539 return; 1540 while (pi != NULL && pi->pi_owner != NULL) { 1541 pri = PRI_MAX; 1542 uq_owner = pi->pi_owner->td_umtxq; 1543 1544 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1545 uq = TAILQ_FIRST(&pi2->pi_blocked); 1546 if (uq != NULL) { 1547 if (pri > UPRI(uq->uq_thread)) 1548 pri = UPRI(uq->uq_thread); 1549 } 1550 } 1551 1552 if (pri > uq_owner->uq_inherited_pri) 1553 pri = uq_owner->uq_inherited_pri; 1554 thread_lock(pi->pi_owner); 1555 sched_lend_user_prio(pi->pi_owner, pri); 1556 thread_unlock(pi->pi_owner); 1557 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1558 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1559 } 1560 } 1561 1562 /* 1563 * Insert a PI mutex into owned list. 1564 */ 1565 static void 1566 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1567 { 1568 struct umtx_q *uq_owner; 1569 1570 uq_owner = owner->td_umtxq; 1571 mtx_assert(&umtx_lock, MA_OWNED); 1572 MPASS(pi->pi_owner == NULL); 1573 pi->pi_owner = owner; 1574 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1575 } 1576 1577 1578 /* 1579 * Disown a PI mutex, and remove it from the owned list. 1580 */ 1581 static void 1582 umtx_pi_disown(struct umtx_pi *pi) 1583 { 1584 1585 mtx_assert(&umtx_lock, MA_OWNED); 1586 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1587 pi->pi_owner = NULL; 1588 } 1589 1590 /* 1591 * Claim ownership of a PI mutex. 1592 */ 1593 static int 1594 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1595 { 1596 struct umtx_q *uq; 1597 int pri; 1598 1599 mtx_lock(&umtx_lock); 1600 if (pi->pi_owner == owner) { 1601 mtx_unlock(&umtx_lock); 1602 return (0); 1603 } 1604 1605 if (pi->pi_owner != NULL) { 1606 /* 1607 * userland may have already messed the mutex, sigh. 1608 */ 1609 mtx_unlock(&umtx_lock); 1610 return (EPERM); 1611 } 1612 umtx_pi_setowner(pi, owner); 1613 uq = TAILQ_FIRST(&pi->pi_blocked); 1614 if (uq != NULL) { 1615 pri = UPRI(uq->uq_thread); 1616 thread_lock(owner); 1617 if (pri < UPRI(owner)) 1618 sched_lend_user_prio(owner, pri); 1619 thread_unlock(owner); 1620 } 1621 mtx_unlock(&umtx_lock); 1622 return (0); 1623 } 1624 1625 /* 1626 * Adjust a thread's order position in its blocked PI mutex, 1627 * this may result new priority propagating process. 1628 */ 1629 void 1630 umtx_pi_adjust(struct thread *td, u_char oldpri) 1631 { 1632 struct umtx_q *uq; 1633 struct umtx_pi *pi; 1634 1635 uq = td->td_umtxq; 1636 mtx_lock(&umtx_lock); 1637 /* 1638 * Pick up the lock that td is blocked on. 1639 */ 1640 pi = uq->uq_pi_blocked; 1641 if (pi != NULL) { 1642 umtx_pi_adjust_thread(pi, td); 1643 umtx_repropagate_priority(pi); 1644 } 1645 mtx_unlock(&umtx_lock); 1646 } 1647 1648 /* 1649 * Sleep on a PI mutex. 1650 */ 1651 static int 1652 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1653 const char *wmesg, struct abs_timeout *timo, bool shared) 1654 { 1655 struct thread *td, *td1; 1656 struct umtx_q *uq1; 1657 int error, pri; 1658 #ifdef INVARIANTS 1659 struct umtxq_chain *uc; 1660 1661 uc = umtxq_getchain(&pi->pi_key); 1662 #endif 1663 error = 0; 1664 td = uq->uq_thread; 1665 KASSERT(td == curthread, ("inconsistent uq_thread")); 1666 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1667 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1668 umtxq_insert(uq); 1669 mtx_lock(&umtx_lock); 1670 if (pi->pi_owner == NULL) { 1671 mtx_unlock(&umtx_lock); 1672 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1673 mtx_lock(&umtx_lock); 1674 if (td1 != NULL) { 1675 if (pi->pi_owner == NULL) 1676 umtx_pi_setowner(pi, td1); 1677 PROC_UNLOCK(td1->td_proc); 1678 } 1679 } 1680 1681 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1682 pri = UPRI(uq1->uq_thread); 1683 if (pri > UPRI(td)) 1684 break; 1685 } 1686 1687 if (uq1 != NULL) 1688 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1689 else 1690 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1691 1692 uq->uq_pi_blocked = pi; 1693 thread_lock(td); 1694 td->td_flags |= TDF_UPIBLOCKED; 1695 thread_unlock(td); 1696 umtx_propagate_priority(td); 1697 mtx_unlock(&umtx_lock); 1698 umtxq_unbusy(&uq->uq_key); 1699 1700 error = umtxq_sleep(uq, wmesg, timo); 1701 umtxq_remove(uq); 1702 1703 mtx_lock(&umtx_lock); 1704 uq->uq_pi_blocked = NULL; 1705 thread_lock(td); 1706 td->td_flags &= ~TDF_UPIBLOCKED; 1707 thread_unlock(td); 1708 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1709 umtx_repropagate_priority(pi); 1710 mtx_unlock(&umtx_lock); 1711 umtxq_unlock(&uq->uq_key); 1712 1713 return (error); 1714 } 1715 1716 /* 1717 * Add reference count for a PI mutex. 1718 */ 1719 static void 1720 umtx_pi_ref(struct umtx_pi *pi) 1721 { 1722 1723 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1724 pi->pi_refcount++; 1725 } 1726 1727 /* 1728 * Decrease reference count for a PI mutex, if the counter 1729 * is decreased to zero, its memory space is freed. 1730 */ 1731 static void 1732 umtx_pi_unref(struct umtx_pi *pi) 1733 { 1734 struct umtxq_chain *uc; 1735 1736 uc = umtxq_getchain(&pi->pi_key); 1737 UMTXQ_LOCKED_ASSERT(uc); 1738 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1739 if (--pi->pi_refcount == 0) { 1740 mtx_lock(&umtx_lock); 1741 if (pi->pi_owner != NULL) 1742 umtx_pi_disown(pi); 1743 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1744 ("blocked queue not empty")); 1745 mtx_unlock(&umtx_lock); 1746 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1747 umtx_pi_free(pi); 1748 } 1749 } 1750 1751 /* 1752 * Find a PI mutex in hash table. 1753 */ 1754 static struct umtx_pi * 1755 umtx_pi_lookup(struct umtx_key *key) 1756 { 1757 struct umtxq_chain *uc; 1758 struct umtx_pi *pi; 1759 1760 uc = umtxq_getchain(key); 1761 UMTXQ_LOCKED_ASSERT(uc); 1762 1763 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1764 if (umtx_key_match(&pi->pi_key, key)) { 1765 return (pi); 1766 } 1767 } 1768 return (NULL); 1769 } 1770 1771 /* 1772 * Insert a PI mutex into hash table. 1773 */ 1774 static inline void 1775 umtx_pi_insert(struct umtx_pi *pi) 1776 { 1777 struct umtxq_chain *uc; 1778 1779 uc = umtxq_getchain(&pi->pi_key); 1780 UMTXQ_LOCKED_ASSERT(uc); 1781 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1782 } 1783 1784 /* 1785 * Lock a PI mutex. 1786 */ 1787 static int 1788 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1789 struct _umtx_time *timeout, int try) 1790 { 1791 struct abs_timeout timo; 1792 struct umtx_q *uq; 1793 struct umtx_pi *pi, *new_pi; 1794 uint32_t id, old_owner, owner, old; 1795 int error, rv; 1796 1797 id = td->td_tid; 1798 uq = td->td_umtxq; 1799 1800 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1801 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1802 &uq->uq_key)) != 0) 1803 return (error); 1804 1805 if (timeout != NULL) 1806 abs_timeout_init2(&timo, timeout); 1807 1808 umtxq_lock(&uq->uq_key); 1809 pi = umtx_pi_lookup(&uq->uq_key); 1810 if (pi == NULL) { 1811 new_pi = umtx_pi_alloc(M_NOWAIT); 1812 if (new_pi == NULL) { 1813 umtxq_unlock(&uq->uq_key); 1814 new_pi = umtx_pi_alloc(M_WAITOK); 1815 umtxq_lock(&uq->uq_key); 1816 pi = umtx_pi_lookup(&uq->uq_key); 1817 if (pi != NULL) { 1818 umtx_pi_free(new_pi); 1819 new_pi = NULL; 1820 } 1821 } 1822 if (new_pi != NULL) { 1823 new_pi->pi_key = uq->uq_key; 1824 umtx_pi_insert(new_pi); 1825 pi = new_pi; 1826 } 1827 } 1828 umtx_pi_ref(pi); 1829 umtxq_unlock(&uq->uq_key); 1830 1831 /* 1832 * Care must be exercised when dealing with umtx structure. It 1833 * can fault on any access. 1834 */ 1835 for (;;) { 1836 /* 1837 * Try the uncontested case. This should be done in userland. 1838 */ 1839 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1840 /* The address was invalid. */ 1841 if (rv == -1) { 1842 error = EFAULT; 1843 break; 1844 } 1845 1846 /* The acquire succeeded. */ 1847 if (owner == UMUTEX_UNOWNED) { 1848 error = 0; 1849 break; 1850 } 1851 1852 if (owner == UMUTEX_RB_NOTRECOV) { 1853 error = ENOTRECOVERABLE; 1854 break; 1855 } 1856 1857 /* If no one owns it but it is contested try to acquire it. */ 1858 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1859 old_owner = owner; 1860 rv = casueword32(&m->m_owner, owner, &owner, 1861 id | UMUTEX_CONTESTED); 1862 /* The address was invalid. */ 1863 if (rv == -1) { 1864 error = EFAULT; 1865 break; 1866 } 1867 1868 if (owner == old_owner) { 1869 umtxq_lock(&uq->uq_key); 1870 umtxq_busy(&uq->uq_key); 1871 error = umtx_pi_claim(pi, td); 1872 umtxq_unbusy(&uq->uq_key); 1873 umtxq_unlock(&uq->uq_key); 1874 if (error != 0) { 1875 /* 1876 * Since we're going to return an 1877 * error, restore the m_owner to its 1878 * previous, unowned state to avoid 1879 * compounding the problem. 1880 */ 1881 (void)casuword32(&m->m_owner, 1882 id | UMUTEX_CONTESTED, 1883 old_owner); 1884 } 1885 if (error == 0 && 1886 old_owner == UMUTEX_RB_OWNERDEAD) 1887 error = EOWNERDEAD; 1888 break; 1889 } 1890 1891 error = umtxq_check_susp(td); 1892 if (error != 0) 1893 break; 1894 1895 /* If this failed the lock has changed, restart. */ 1896 continue; 1897 } 1898 1899 if ((owner & ~UMUTEX_CONTESTED) == id) { 1900 error = EDEADLK; 1901 break; 1902 } 1903 1904 if (try != 0) { 1905 error = EBUSY; 1906 break; 1907 } 1908 1909 /* 1910 * If we caught a signal, we have retried and now 1911 * exit immediately. 1912 */ 1913 if (error != 0) 1914 break; 1915 1916 umtxq_lock(&uq->uq_key); 1917 umtxq_busy(&uq->uq_key); 1918 umtxq_unlock(&uq->uq_key); 1919 1920 /* 1921 * Set the contested bit so that a release in user space 1922 * knows to use the system call for unlock. If this fails 1923 * either some one else has acquired the lock or it has been 1924 * released. 1925 */ 1926 rv = casueword32(&m->m_owner, owner, &old, owner | 1927 UMUTEX_CONTESTED); 1928 1929 /* The address was invalid. */ 1930 if (rv == -1) { 1931 umtxq_unbusy_unlocked(&uq->uq_key); 1932 error = EFAULT; 1933 break; 1934 } 1935 1936 umtxq_lock(&uq->uq_key); 1937 /* 1938 * We set the contested bit, sleep. Otherwise the lock changed 1939 * and we need to retry or we lost a race to the thread 1940 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1941 * value for owner is impossible there. 1942 */ 1943 if (old == owner) { 1944 error = umtxq_sleep_pi(uq, pi, 1945 owner & ~UMUTEX_CONTESTED, 1946 "umtxpi", timeout == NULL ? NULL : &timo, 1947 (flags & USYNC_PROCESS_SHARED) != 0); 1948 if (error != 0) 1949 continue; 1950 } else { 1951 umtxq_unbusy(&uq->uq_key); 1952 umtxq_unlock(&uq->uq_key); 1953 } 1954 1955 error = umtxq_check_susp(td); 1956 if (error != 0) 1957 break; 1958 } 1959 1960 umtxq_lock(&uq->uq_key); 1961 umtx_pi_unref(pi); 1962 umtxq_unlock(&uq->uq_key); 1963 1964 umtx_key_release(&uq->uq_key); 1965 return (error); 1966 } 1967 1968 /* 1969 * Unlock a PI mutex. 1970 */ 1971 static int 1972 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1973 { 1974 struct umtx_key key; 1975 struct umtx_q *uq_first, *uq_first2, *uq_me; 1976 struct umtx_pi *pi, *pi2; 1977 uint32_t id, new_owner, old, owner; 1978 int count, error, pri; 1979 1980 id = td->td_tid; 1981 /* 1982 * Make sure we own this mtx. 1983 */ 1984 error = fueword32(&m->m_owner, &owner); 1985 if (error == -1) 1986 return (EFAULT); 1987 1988 if ((owner & ~UMUTEX_CONTESTED) != id) 1989 return (EPERM); 1990 1991 new_owner = umtx_unlock_val(flags, rb); 1992 1993 /* This should be done in userland */ 1994 if ((owner & UMUTEX_CONTESTED) == 0) { 1995 error = casueword32(&m->m_owner, owner, &old, new_owner); 1996 if (error == -1) 1997 return (EFAULT); 1998 if (old == owner) 1999 return (0); 2000 owner = old; 2001 } 2002 2003 /* We should only ever be in here for contested locks */ 2004 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2005 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2006 &key)) != 0) 2007 return (error); 2008 2009 umtxq_lock(&key); 2010 umtxq_busy(&key); 2011 count = umtxq_count_pi(&key, &uq_first); 2012 if (uq_first != NULL) { 2013 mtx_lock(&umtx_lock); 2014 pi = uq_first->uq_pi_blocked; 2015 KASSERT(pi != NULL, ("pi == NULL?")); 2016 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2017 mtx_unlock(&umtx_lock); 2018 umtxq_unbusy(&key); 2019 umtxq_unlock(&key); 2020 umtx_key_release(&key); 2021 /* userland messed the mutex */ 2022 return (EPERM); 2023 } 2024 uq_me = td->td_umtxq; 2025 if (pi->pi_owner == td) 2026 umtx_pi_disown(pi); 2027 /* get highest priority thread which is still sleeping. */ 2028 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2029 while (uq_first != NULL && 2030 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2031 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2032 } 2033 pri = PRI_MAX; 2034 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2035 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2036 if (uq_first2 != NULL) { 2037 if (pri > UPRI(uq_first2->uq_thread)) 2038 pri = UPRI(uq_first2->uq_thread); 2039 } 2040 } 2041 thread_lock(td); 2042 sched_lend_user_prio(td, pri); 2043 thread_unlock(td); 2044 mtx_unlock(&umtx_lock); 2045 if (uq_first) 2046 umtxq_signal_thread(uq_first); 2047 } else { 2048 pi = umtx_pi_lookup(&key); 2049 /* 2050 * A umtx_pi can exist if a signal or timeout removed the 2051 * last waiter from the umtxq, but there is still 2052 * a thread in do_lock_pi() holding the umtx_pi. 2053 */ 2054 if (pi != NULL) { 2055 /* 2056 * The umtx_pi can be unowned, such as when a thread 2057 * has just entered do_lock_pi(), allocated the 2058 * umtx_pi, and unlocked the umtxq. 2059 * If the current thread owns it, it must disown it. 2060 */ 2061 mtx_lock(&umtx_lock); 2062 if (pi->pi_owner == td) 2063 umtx_pi_disown(pi); 2064 mtx_unlock(&umtx_lock); 2065 } 2066 } 2067 umtxq_unlock(&key); 2068 2069 /* 2070 * When unlocking the umtx, it must be marked as unowned if 2071 * there is zero or one thread only waiting for it. 2072 * Otherwise, it must be marked as contested. 2073 */ 2074 2075 if (count > 1) 2076 new_owner |= UMUTEX_CONTESTED; 2077 error = casueword32(&m->m_owner, owner, &old, new_owner); 2078 2079 umtxq_unbusy_unlocked(&key); 2080 umtx_key_release(&key); 2081 if (error == -1) 2082 return (EFAULT); 2083 if (old != owner) 2084 return (EINVAL); 2085 return (0); 2086 } 2087 2088 /* 2089 * Lock a PP mutex. 2090 */ 2091 static int 2092 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2093 struct _umtx_time *timeout, int try) 2094 { 2095 struct abs_timeout timo; 2096 struct umtx_q *uq, *uq2; 2097 struct umtx_pi *pi; 2098 uint32_t ceiling; 2099 uint32_t owner, id; 2100 int error, pri, old_inherited_pri, su, rv; 2101 2102 id = td->td_tid; 2103 uq = td->td_umtxq; 2104 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2105 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2106 &uq->uq_key)) != 0) 2107 return (error); 2108 2109 if (timeout != NULL) 2110 abs_timeout_init2(&timo, timeout); 2111 2112 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2113 for (;;) { 2114 old_inherited_pri = uq->uq_inherited_pri; 2115 umtxq_lock(&uq->uq_key); 2116 umtxq_busy(&uq->uq_key); 2117 umtxq_unlock(&uq->uq_key); 2118 2119 rv = fueword32(&m->m_ceilings[0], &ceiling); 2120 if (rv == -1) { 2121 error = EFAULT; 2122 goto out; 2123 } 2124 ceiling = RTP_PRIO_MAX - ceiling; 2125 if (ceiling > RTP_PRIO_MAX) { 2126 error = EINVAL; 2127 goto out; 2128 } 2129 2130 mtx_lock(&umtx_lock); 2131 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2132 mtx_unlock(&umtx_lock); 2133 error = EINVAL; 2134 goto out; 2135 } 2136 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2137 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2138 thread_lock(td); 2139 if (uq->uq_inherited_pri < UPRI(td)) 2140 sched_lend_user_prio(td, uq->uq_inherited_pri); 2141 thread_unlock(td); 2142 } 2143 mtx_unlock(&umtx_lock); 2144 2145 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2146 id | UMUTEX_CONTESTED); 2147 /* The address was invalid. */ 2148 if (rv == -1) { 2149 error = EFAULT; 2150 break; 2151 } 2152 2153 if (owner == UMUTEX_CONTESTED) { 2154 error = 0; 2155 break; 2156 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2157 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2158 &owner, id | UMUTEX_CONTESTED); 2159 if (rv == -1) { 2160 error = EFAULT; 2161 break; 2162 } 2163 if (owner == UMUTEX_RB_OWNERDEAD) { 2164 error = EOWNERDEAD; /* success */ 2165 break; 2166 } 2167 error = 0; 2168 } else if (owner == UMUTEX_RB_NOTRECOV) { 2169 error = ENOTRECOVERABLE; 2170 break; 2171 } 2172 2173 if (try != 0) { 2174 error = EBUSY; 2175 break; 2176 } 2177 2178 /* 2179 * If we caught a signal, we have retried and now 2180 * exit immediately. 2181 */ 2182 if (error != 0) 2183 break; 2184 2185 umtxq_lock(&uq->uq_key); 2186 umtxq_insert(uq); 2187 umtxq_unbusy(&uq->uq_key); 2188 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2189 NULL : &timo); 2190 umtxq_remove(uq); 2191 umtxq_unlock(&uq->uq_key); 2192 2193 mtx_lock(&umtx_lock); 2194 uq->uq_inherited_pri = old_inherited_pri; 2195 pri = PRI_MAX; 2196 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2197 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2198 if (uq2 != NULL) { 2199 if (pri > UPRI(uq2->uq_thread)) 2200 pri = UPRI(uq2->uq_thread); 2201 } 2202 } 2203 if (pri > uq->uq_inherited_pri) 2204 pri = uq->uq_inherited_pri; 2205 thread_lock(td); 2206 sched_lend_user_prio(td, pri); 2207 thread_unlock(td); 2208 mtx_unlock(&umtx_lock); 2209 } 2210 2211 if (error != 0 && error != EOWNERDEAD) { 2212 mtx_lock(&umtx_lock); 2213 uq->uq_inherited_pri = old_inherited_pri; 2214 pri = PRI_MAX; 2215 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2216 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2217 if (uq2 != NULL) { 2218 if (pri > UPRI(uq2->uq_thread)) 2219 pri = UPRI(uq2->uq_thread); 2220 } 2221 } 2222 if (pri > uq->uq_inherited_pri) 2223 pri = uq->uq_inherited_pri; 2224 thread_lock(td); 2225 sched_lend_user_prio(td, pri); 2226 thread_unlock(td); 2227 mtx_unlock(&umtx_lock); 2228 } 2229 2230 out: 2231 umtxq_unbusy_unlocked(&uq->uq_key); 2232 umtx_key_release(&uq->uq_key); 2233 return (error); 2234 } 2235 2236 /* 2237 * Unlock a PP mutex. 2238 */ 2239 static int 2240 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2241 { 2242 struct umtx_key key; 2243 struct umtx_q *uq, *uq2; 2244 struct umtx_pi *pi; 2245 uint32_t id, owner, rceiling; 2246 int error, pri, new_inherited_pri, su; 2247 2248 id = td->td_tid; 2249 uq = td->td_umtxq; 2250 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2251 2252 /* 2253 * Make sure we own this mtx. 2254 */ 2255 error = fueword32(&m->m_owner, &owner); 2256 if (error == -1) 2257 return (EFAULT); 2258 2259 if ((owner & ~UMUTEX_CONTESTED) != id) 2260 return (EPERM); 2261 2262 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2263 if (error != 0) 2264 return (error); 2265 2266 if (rceiling == -1) 2267 new_inherited_pri = PRI_MAX; 2268 else { 2269 rceiling = RTP_PRIO_MAX - rceiling; 2270 if (rceiling > RTP_PRIO_MAX) 2271 return (EINVAL); 2272 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2273 } 2274 2275 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2276 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2277 &key)) != 0) 2278 return (error); 2279 umtxq_lock(&key); 2280 umtxq_busy(&key); 2281 umtxq_unlock(&key); 2282 /* 2283 * For priority protected mutex, always set unlocked state 2284 * to UMUTEX_CONTESTED, so that userland always enters kernel 2285 * to lock the mutex, it is necessary because thread priority 2286 * has to be adjusted for such mutex. 2287 */ 2288 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2289 UMUTEX_CONTESTED); 2290 2291 umtxq_lock(&key); 2292 if (error == 0) 2293 umtxq_signal(&key, 1); 2294 umtxq_unbusy(&key); 2295 umtxq_unlock(&key); 2296 2297 if (error == -1) 2298 error = EFAULT; 2299 else { 2300 mtx_lock(&umtx_lock); 2301 if (su != 0) 2302 uq->uq_inherited_pri = new_inherited_pri; 2303 pri = PRI_MAX; 2304 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2305 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2306 if (uq2 != NULL) { 2307 if (pri > UPRI(uq2->uq_thread)) 2308 pri = UPRI(uq2->uq_thread); 2309 } 2310 } 2311 if (pri > uq->uq_inherited_pri) 2312 pri = uq->uq_inherited_pri; 2313 thread_lock(td); 2314 sched_lend_user_prio(td, pri); 2315 thread_unlock(td); 2316 mtx_unlock(&umtx_lock); 2317 } 2318 umtx_key_release(&key); 2319 return (error); 2320 } 2321 2322 static int 2323 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2324 uint32_t *old_ceiling) 2325 { 2326 struct umtx_q *uq; 2327 uint32_t flags, id, owner, save_ceiling; 2328 int error, rv, rv1; 2329 2330 error = fueword32(&m->m_flags, &flags); 2331 if (error == -1) 2332 return (EFAULT); 2333 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2334 return (EINVAL); 2335 if (ceiling > RTP_PRIO_MAX) 2336 return (EINVAL); 2337 id = td->td_tid; 2338 uq = td->td_umtxq; 2339 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2340 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2341 &uq->uq_key)) != 0) 2342 return (error); 2343 for (;;) { 2344 umtxq_lock(&uq->uq_key); 2345 umtxq_busy(&uq->uq_key); 2346 umtxq_unlock(&uq->uq_key); 2347 2348 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2349 if (rv == -1) { 2350 error = EFAULT; 2351 break; 2352 } 2353 2354 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2355 id | UMUTEX_CONTESTED); 2356 if (rv == -1) { 2357 error = EFAULT; 2358 break; 2359 } 2360 2361 if (owner == UMUTEX_CONTESTED) { 2362 rv = suword32(&m->m_ceilings[0], ceiling); 2363 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2364 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2365 break; 2366 } 2367 2368 if ((owner & ~UMUTEX_CONTESTED) == id) { 2369 rv = suword32(&m->m_ceilings[0], ceiling); 2370 error = rv == 0 ? 0 : EFAULT; 2371 break; 2372 } 2373 2374 if (owner == UMUTEX_RB_OWNERDEAD) { 2375 error = EOWNERDEAD; 2376 break; 2377 } else if (owner == UMUTEX_RB_NOTRECOV) { 2378 error = ENOTRECOVERABLE; 2379 break; 2380 } 2381 2382 /* 2383 * If we caught a signal, we have retried and now 2384 * exit immediately. 2385 */ 2386 if (error != 0) 2387 break; 2388 2389 /* 2390 * We set the contested bit, sleep. Otherwise the lock changed 2391 * and we need to retry or we lost a race to the thread 2392 * unlocking the umtx. 2393 */ 2394 umtxq_lock(&uq->uq_key); 2395 umtxq_insert(uq); 2396 umtxq_unbusy(&uq->uq_key); 2397 error = umtxq_sleep(uq, "umtxpp", NULL); 2398 umtxq_remove(uq); 2399 umtxq_unlock(&uq->uq_key); 2400 } 2401 umtxq_lock(&uq->uq_key); 2402 if (error == 0) 2403 umtxq_signal(&uq->uq_key, INT_MAX); 2404 umtxq_unbusy(&uq->uq_key); 2405 umtxq_unlock(&uq->uq_key); 2406 umtx_key_release(&uq->uq_key); 2407 if (error == 0 && old_ceiling != NULL) { 2408 rv = suword32(old_ceiling, save_ceiling); 2409 error = rv == 0 ? 0 : EFAULT; 2410 } 2411 return (error); 2412 } 2413 2414 /* 2415 * Lock a userland POSIX mutex. 2416 */ 2417 static int 2418 do_lock_umutex(struct thread *td, struct umutex *m, 2419 struct _umtx_time *timeout, int mode) 2420 { 2421 uint32_t flags; 2422 int error; 2423 2424 error = fueword32(&m->m_flags, &flags); 2425 if (error == -1) 2426 return (EFAULT); 2427 2428 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2429 case 0: 2430 error = do_lock_normal(td, m, flags, timeout, mode); 2431 break; 2432 case UMUTEX_PRIO_INHERIT: 2433 error = do_lock_pi(td, m, flags, timeout, mode); 2434 break; 2435 case UMUTEX_PRIO_PROTECT: 2436 error = do_lock_pp(td, m, flags, timeout, mode); 2437 break; 2438 default: 2439 return (EINVAL); 2440 } 2441 if (timeout == NULL) { 2442 if (error == EINTR && mode != _UMUTEX_WAIT) 2443 error = ERESTART; 2444 } else { 2445 /* Timed-locking is not restarted. */ 2446 if (error == ERESTART) 2447 error = EINTR; 2448 } 2449 return (error); 2450 } 2451 2452 /* 2453 * Unlock a userland POSIX mutex. 2454 */ 2455 static int 2456 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2457 { 2458 uint32_t flags; 2459 int error; 2460 2461 error = fueword32(&m->m_flags, &flags); 2462 if (error == -1) 2463 return (EFAULT); 2464 2465 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2466 case 0: 2467 return (do_unlock_normal(td, m, flags, rb)); 2468 case UMUTEX_PRIO_INHERIT: 2469 return (do_unlock_pi(td, m, flags, rb)); 2470 case UMUTEX_PRIO_PROTECT: 2471 return (do_unlock_pp(td, m, flags, rb)); 2472 } 2473 2474 return (EINVAL); 2475 } 2476 2477 static int 2478 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2479 struct timespec *timeout, u_long wflags) 2480 { 2481 struct abs_timeout timo; 2482 struct umtx_q *uq; 2483 uint32_t flags, clockid, hasw; 2484 int error; 2485 2486 uq = td->td_umtxq; 2487 error = fueword32(&cv->c_flags, &flags); 2488 if (error == -1) 2489 return (EFAULT); 2490 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2491 if (error != 0) 2492 return (error); 2493 2494 if ((wflags & CVWAIT_CLOCKID) != 0) { 2495 error = fueword32(&cv->c_clockid, &clockid); 2496 if (error == -1) { 2497 umtx_key_release(&uq->uq_key); 2498 return (EFAULT); 2499 } 2500 if (clockid < CLOCK_REALTIME || 2501 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2502 /* hmm, only HW clock id will work. */ 2503 umtx_key_release(&uq->uq_key); 2504 return (EINVAL); 2505 } 2506 } else { 2507 clockid = CLOCK_REALTIME; 2508 } 2509 2510 umtxq_lock(&uq->uq_key); 2511 umtxq_busy(&uq->uq_key); 2512 umtxq_insert(uq); 2513 umtxq_unlock(&uq->uq_key); 2514 2515 /* 2516 * Set c_has_waiters to 1 before releasing user mutex, also 2517 * don't modify cache line when unnecessary. 2518 */ 2519 error = fueword32(&cv->c_has_waiters, &hasw); 2520 if (error == 0 && hasw == 0) 2521 suword32(&cv->c_has_waiters, 1); 2522 2523 umtxq_unbusy_unlocked(&uq->uq_key); 2524 2525 error = do_unlock_umutex(td, m, false); 2526 2527 if (timeout != NULL) 2528 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2529 timeout); 2530 2531 umtxq_lock(&uq->uq_key); 2532 if (error == 0) { 2533 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2534 NULL : &timo); 2535 } 2536 2537 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2538 error = 0; 2539 else { 2540 /* 2541 * This must be timeout,interrupted by signal or 2542 * surprious wakeup, clear c_has_waiter flag when 2543 * necessary. 2544 */ 2545 umtxq_busy(&uq->uq_key); 2546 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2547 int oldlen = uq->uq_cur_queue->length; 2548 umtxq_remove(uq); 2549 if (oldlen == 1) { 2550 umtxq_unlock(&uq->uq_key); 2551 suword32(&cv->c_has_waiters, 0); 2552 umtxq_lock(&uq->uq_key); 2553 } 2554 } 2555 umtxq_unbusy(&uq->uq_key); 2556 if (error == ERESTART) 2557 error = EINTR; 2558 } 2559 2560 umtxq_unlock(&uq->uq_key); 2561 umtx_key_release(&uq->uq_key); 2562 return (error); 2563 } 2564 2565 /* 2566 * Signal a userland condition variable. 2567 */ 2568 static int 2569 do_cv_signal(struct thread *td, struct ucond *cv) 2570 { 2571 struct umtx_key key; 2572 int error, cnt, nwake; 2573 uint32_t flags; 2574 2575 error = fueword32(&cv->c_flags, &flags); 2576 if (error == -1) 2577 return (EFAULT); 2578 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2579 return (error); 2580 umtxq_lock(&key); 2581 umtxq_busy(&key); 2582 cnt = umtxq_count(&key); 2583 nwake = umtxq_signal(&key, 1); 2584 if (cnt <= nwake) { 2585 umtxq_unlock(&key); 2586 error = suword32(&cv->c_has_waiters, 0); 2587 if (error == -1) 2588 error = EFAULT; 2589 umtxq_lock(&key); 2590 } 2591 umtxq_unbusy(&key); 2592 umtxq_unlock(&key); 2593 umtx_key_release(&key); 2594 return (error); 2595 } 2596 2597 static int 2598 do_cv_broadcast(struct thread *td, struct ucond *cv) 2599 { 2600 struct umtx_key key; 2601 int error; 2602 uint32_t flags; 2603 2604 error = fueword32(&cv->c_flags, &flags); 2605 if (error == -1) 2606 return (EFAULT); 2607 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2608 return (error); 2609 2610 umtxq_lock(&key); 2611 umtxq_busy(&key); 2612 umtxq_signal(&key, INT_MAX); 2613 umtxq_unlock(&key); 2614 2615 error = suword32(&cv->c_has_waiters, 0); 2616 if (error == -1) 2617 error = EFAULT; 2618 2619 umtxq_unbusy_unlocked(&key); 2620 2621 umtx_key_release(&key); 2622 return (error); 2623 } 2624 2625 static int 2626 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2627 struct _umtx_time *timeout) 2628 { 2629 struct abs_timeout timo; 2630 struct umtx_q *uq; 2631 uint32_t flags, wrflags; 2632 int32_t state, oldstate; 2633 int32_t blocked_readers; 2634 int error, error1, rv; 2635 2636 uq = td->td_umtxq; 2637 error = fueword32(&rwlock->rw_flags, &flags); 2638 if (error == -1) 2639 return (EFAULT); 2640 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2641 if (error != 0) 2642 return (error); 2643 2644 if (timeout != NULL) 2645 abs_timeout_init2(&timo, timeout); 2646 2647 wrflags = URWLOCK_WRITE_OWNER; 2648 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2649 wrflags |= URWLOCK_WRITE_WAITERS; 2650 2651 for (;;) { 2652 rv = fueword32(&rwlock->rw_state, &state); 2653 if (rv == -1) { 2654 umtx_key_release(&uq->uq_key); 2655 return (EFAULT); 2656 } 2657 2658 /* try to lock it */ 2659 while (!(state & wrflags)) { 2660 if (__predict_false(URWLOCK_READER_COUNT(state) == 2661 URWLOCK_MAX_READERS)) { 2662 umtx_key_release(&uq->uq_key); 2663 return (EAGAIN); 2664 } 2665 rv = casueword32(&rwlock->rw_state, state, 2666 &oldstate, state + 1); 2667 if (rv == -1) { 2668 umtx_key_release(&uq->uq_key); 2669 return (EFAULT); 2670 } 2671 if (oldstate == state) { 2672 umtx_key_release(&uq->uq_key); 2673 return (0); 2674 } 2675 error = umtxq_check_susp(td); 2676 if (error != 0) 2677 break; 2678 state = oldstate; 2679 } 2680 2681 if (error) 2682 break; 2683 2684 /* grab monitor lock */ 2685 umtxq_lock(&uq->uq_key); 2686 umtxq_busy(&uq->uq_key); 2687 umtxq_unlock(&uq->uq_key); 2688 2689 /* 2690 * re-read the state, in case it changed between the try-lock above 2691 * and the check below 2692 */ 2693 rv = fueword32(&rwlock->rw_state, &state); 2694 if (rv == -1) 2695 error = EFAULT; 2696 2697 /* set read contention bit */ 2698 while (error == 0 && (state & wrflags) && 2699 !(state & URWLOCK_READ_WAITERS)) { 2700 rv = casueword32(&rwlock->rw_state, state, 2701 &oldstate, state | URWLOCK_READ_WAITERS); 2702 if (rv == -1) { 2703 error = EFAULT; 2704 break; 2705 } 2706 if (oldstate == state) 2707 goto sleep; 2708 state = oldstate; 2709 error = umtxq_check_susp(td); 2710 if (error != 0) 2711 break; 2712 } 2713 if (error != 0) { 2714 umtxq_unbusy_unlocked(&uq->uq_key); 2715 break; 2716 } 2717 2718 /* state is changed while setting flags, restart */ 2719 if (!(state & wrflags)) { 2720 umtxq_unbusy_unlocked(&uq->uq_key); 2721 error = umtxq_check_susp(td); 2722 if (error != 0) 2723 break; 2724 continue; 2725 } 2726 2727 sleep: 2728 /* 2729 * Contention bit is set, before sleeping, increase 2730 * read waiter count. 2731 */ 2732 rv = fueword32(&rwlock->rw_blocked_readers, 2733 &blocked_readers); 2734 if (rv == -1) { 2735 umtxq_unbusy_unlocked(&uq->uq_key); 2736 error = EFAULT; 2737 break; 2738 } 2739 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2740 2741 while (state & wrflags) { 2742 umtxq_lock(&uq->uq_key); 2743 umtxq_insert(uq); 2744 umtxq_unbusy(&uq->uq_key); 2745 2746 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2747 NULL : &timo); 2748 2749 umtxq_busy(&uq->uq_key); 2750 umtxq_remove(uq); 2751 umtxq_unlock(&uq->uq_key); 2752 if (error) 2753 break; 2754 rv = fueword32(&rwlock->rw_state, &state); 2755 if (rv == -1) { 2756 error = EFAULT; 2757 break; 2758 } 2759 } 2760 2761 /* decrease read waiter count, and may clear read contention bit */ 2762 rv = fueword32(&rwlock->rw_blocked_readers, 2763 &blocked_readers); 2764 if (rv == -1) { 2765 umtxq_unbusy_unlocked(&uq->uq_key); 2766 error = EFAULT; 2767 break; 2768 } 2769 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2770 if (blocked_readers == 1) { 2771 rv = fueword32(&rwlock->rw_state, &state); 2772 if (rv == -1) { 2773 umtxq_unbusy_unlocked(&uq->uq_key); 2774 error = EFAULT; 2775 break; 2776 } 2777 for (;;) { 2778 rv = casueword32(&rwlock->rw_state, state, 2779 &oldstate, state & ~URWLOCK_READ_WAITERS); 2780 if (rv == -1) { 2781 error = EFAULT; 2782 break; 2783 } 2784 if (oldstate == state) 2785 break; 2786 state = oldstate; 2787 error1 = umtxq_check_susp(td); 2788 if (error1 != 0) { 2789 if (error == 0) 2790 error = error1; 2791 break; 2792 } 2793 } 2794 } 2795 2796 umtxq_unbusy_unlocked(&uq->uq_key); 2797 if (error != 0) 2798 break; 2799 } 2800 umtx_key_release(&uq->uq_key); 2801 if (error == ERESTART) 2802 error = EINTR; 2803 return (error); 2804 } 2805 2806 static int 2807 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2808 { 2809 struct abs_timeout timo; 2810 struct umtx_q *uq; 2811 uint32_t flags; 2812 int32_t state, oldstate; 2813 int32_t blocked_writers; 2814 int32_t blocked_readers; 2815 int error, error1, rv; 2816 2817 uq = td->td_umtxq; 2818 error = fueword32(&rwlock->rw_flags, &flags); 2819 if (error == -1) 2820 return (EFAULT); 2821 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2822 if (error != 0) 2823 return (error); 2824 2825 if (timeout != NULL) 2826 abs_timeout_init2(&timo, timeout); 2827 2828 blocked_readers = 0; 2829 for (;;) { 2830 rv = fueword32(&rwlock->rw_state, &state); 2831 if (rv == -1) { 2832 umtx_key_release(&uq->uq_key); 2833 return (EFAULT); 2834 } 2835 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2836 URWLOCK_READER_COUNT(state) == 0) { 2837 rv = casueword32(&rwlock->rw_state, state, 2838 &oldstate, state | URWLOCK_WRITE_OWNER); 2839 if (rv == -1) { 2840 umtx_key_release(&uq->uq_key); 2841 return (EFAULT); 2842 } 2843 if (oldstate == state) { 2844 umtx_key_release(&uq->uq_key); 2845 return (0); 2846 } 2847 state = oldstate; 2848 error = umtxq_check_susp(td); 2849 if (error != 0) 2850 break; 2851 } 2852 2853 if (error) { 2854 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2855 blocked_readers != 0) { 2856 umtxq_lock(&uq->uq_key); 2857 umtxq_busy(&uq->uq_key); 2858 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2859 umtxq_unbusy(&uq->uq_key); 2860 umtxq_unlock(&uq->uq_key); 2861 } 2862 2863 break; 2864 } 2865 2866 /* grab monitor lock */ 2867 umtxq_lock(&uq->uq_key); 2868 umtxq_busy(&uq->uq_key); 2869 umtxq_unlock(&uq->uq_key); 2870 2871 /* 2872 * Re-read the state, in case it changed between the 2873 * try-lock above and the check below. 2874 */ 2875 rv = fueword32(&rwlock->rw_state, &state); 2876 if (rv == -1) 2877 error = EFAULT; 2878 2879 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2880 URWLOCK_READER_COUNT(state) != 0) && 2881 (state & URWLOCK_WRITE_WAITERS) == 0) { 2882 rv = casueword32(&rwlock->rw_state, state, 2883 &oldstate, state | URWLOCK_WRITE_WAITERS); 2884 if (rv == -1) { 2885 error = EFAULT; 2886 break; 2887 } 2888 if (oldstate == state) 2889 goto sleep; 2890 state = oldstate; 2891 error = umtxq_check_susp(td); 2892 if (error != 0) 2893 break; 2894 } 2895 if (error != 0) { 2896 umtxq_unbusy_unlocked(&uq->uq_key); 2897 break; 2898 } 2899 2900 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2901 URWLOCK_READER_COUNT(state) == 0) { 2902 umtxq_unbusy_unlocked(&uq->uq_key); 2903 error = umtxq_check_susp(td); 2904 if (error != 0) 2905 break; 2906 continue; 2907 } 2908 sleep: 2909 rv = fueword32(&rwlock->rw_blocked_writers, 2910 &blocked_writers); 2911 if (rv == -1) { 2912 umtxq_unbusy_unlocked(&uq->uq_key); 2913 error = EFAULT; 2914 break; 2915 } 2916 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2917 2918 while ((state & URWLOCK_WRITE_OWNER) || 2919 URWLOCK_READER_COUNT(state) != 0) { 2920 umtxq_lock(&uq->uq_key); 2921 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2922 umtxq_unbusy(&uq->uq_key); 2923 2924 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2925 NULL : &timo); 2926 2927 umtxq_busy(&uq->uq_key); 2928 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2929 umtxq_unlock(&uq->uq_key); 2930 if (error) 2931 break; 2932 rv = fueword32(&rwlock->rw_state, &state); 2933 if (rv == -1) { 2934 error = EFAULT; 2935 break; 2936 } 2937 } 2938 2939 rv = fueword32(&rwlock->rw_blocked_writers, 2940 &blocked_writers); 2941 if (rv == -1) { 2942 umtxq_unbusy_unlocked(&uq->uq_key); 2943 error = EFAULT; 2944 break; 2945 } 2946 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2947 if (blocked_writers == 1) { 2948 rv = fueword32(&rwlock->rw_state, &state); 2949 if (rv == -1) { 2950 umtxq_unbusy_unlocked(&uq->uq_key); 2951 error = EFAULT; 2952 break; 2953 } 2954 for (;;) { 2955 rv = casueword32(&rwlock->rw_state, state, 2956 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2957 if (rv == -1) { 2958 error = EFAULT; 2959 break; 2960 } 2961 if (oldstate == state) 2962 break; 2963 state = oldstate; 2964 error1 = umtxq_check_susp(td); 2965 /* 2966 * We are leaving the URWLOCK_WRITE_WAITERS 2967 * behind, but this should not harm the 2968 * correctness. 2969 */ 2970 if (error1 != 0) { 2971 if (error == 0) 2972 error = error1; 2973 break; 2974 } 2975 } 2976 rv = fueword32(&rwlock->rw_blocked_readers, 2977 &blocked_readers); 2978 if (rv == -1) { 2979 umtxq_unbusy_unlocked(&uq->uq_key); 2980 error = EFAULT; 2981 break; 2982 } 2983 } else 2984 blocked_readers = 0; 2985 2986 umtxq_unbusy_unlocked(&uq->uq_key); 2987 } 2988 2989 umtx_key_release(&uq->uq_key); 2990 if (error == ERESTART) 2991 error = EINTR; 2992 return (error); 2993 } 2994 2995 static int 2996 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2997 { 2998 struct umtx_q *uq; 2999 uint32_t flags; 3000 int32_t state, oldstate; 3001 int error, rv, q, count; 3002 3003 uq = td->td_umtxq; 3004 error = fueword32(&rwlock->rw_flags, &flags); 3005 if (error == -1) 3006 return (EFAULT); 3007 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3008 if (error != 0) 3009 return (error); 3010 3011 error = fueword32(&rwlock->rw_state, &state); 3012 if (error == -1) { 3013 error = EFAULT; 3014 goto out; 3015 } 3016 if (state & URWLOCK_WRITE_OWNER) { 3017 for (;;) { 3018 rv = casueword32(&rwlock->rw_state, state, 3019 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3020 if (rv == -1) { 3021 error = EFAULT; 3022 goto out; 3023 } 3024 if (oldstate != state) { 3025 state = oldstate; 3026 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3027 error = EPERM; 3028 goto out; 3029 } 3030 error = umtxq_check_susp(td); 3031 if (error != 0) 3032 goto out; 3033 } else 3034 break; 3035 } 3036 } else if (URWLOCK_READER_COUNT(state) != 0) { 3037 for (;;) { 3038 rv = casueword32(&rwlock->rw_state, state, 3039 &oldstate, state - 1); 3040 if (rv == -1) { 3041 error = EFAULT; 3042 goto out; 3043 } 3044 if (oldstate != state) { 3045 state = oldstate; 3046 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3047 error = EPERM; 3048 goto out; 3049 } 3050 error = umtxq_check_susp(td); 3051 if (error != 0) 3052 goto out; 3053 } else 3054 break; 3055 } 3056 } else { 3057 error = EPERM; 3058 goto out; 3059 } 3060 3061 count = 0; 3062 3063 if (!(flags & URWLOCK_PREFER_READER)) { 3064 if (state & URWLOCK_WRITE_WAITERS) { 3065 count = 1; 3066 q = UMTX_EXCLUSIVE_QUEUE; 3067 } else if (state & URWLOCK_READ_WAITERS) { 3068 count = INT_MAX; 3069 q = UMTX_SHARED_QUEUE; 3070 } 3071 } else { 3072 if (state & URWLOCK_READ_WAITERS) { 3073 count = INT_MAX; 3074 q = UMTX_SHARED_QUEUE; 3075 } else if (state & URWLOCK_WRITE_WAITERS) { 3076 count = 1; 3077 q = UMTX_EXCLUSIVE_QUEUE; 3078 } 3079 } 3080 3081 if (count) { 3082 umtxq_lock(&uq->uq_key); 3083 umtxq_busy(&uq->uq_key); 3084 umtxq_signal_queue(&uq->uq_key, count, q); 3085 umtxq_unbusy(&uq->uq_key); 3086 umtxq_unlock(&uq->uq_key); 3087 } 3088 out: 3089 umtx_key_release(&uq->uq_key); 3090 return (error); 3091 } 3092 3093 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3094 static int 3095 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3096 { 3097 struct abs_timeout timo; 3098 struct umtx_q *uq; 3099 uint32_t flags, count, count1; 3100 int error, rv; 3101 3102 uq = td->td_umtxq; 3103 error = fueword32(&sem->_flags, &flags); 3104 if (error == -1) 3105 return (EFAULT); 3106 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3107 if (error != 0) 3108 return (error); 3109 3110 if (timeout != NULL) 3111 abs_timeout_init2(&timo, timeout); 3112 3113 umtxq_lock(&uq->uq_key); 3114 umtxq_busy(&uq->uq_key); 3115 umtxq_insert(uq); 3116 umtxq_unlock(&uq->uq_key); 3117 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3118 if (rv == 0) 3119 rv = fueword32(&sem->_count, &count); 3120 if (rv == -1 || count != 0) { 3121 umtxq_lock(&uq->uq_key); 3122 umtxq_unbusy(&uq->uq_key); 3123 umtxq_remove(uq); 3124 umtxq_unlock(&uq->uq_key); 3125 umtx_key_release(&uq->uq_key); 3126 return (rv == -1 ? EFAULT : 0); 3127 } 3128 umtxq_lock(&uq->uq_key); 3129 umtxq_unbusy(&uq->uq_key); 3130 3131 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3132 3133 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3134 error = 0; 3135 else { 3136 umtxq_remove(uq); 3137 /* A relative timeout cannot be restarted. */ 3138 if (error == ERESTART && timeout != NULL && 3139 (timeout->_flags & UMTX_ABSTIME) == 0) 3140 error = EINTR; 3141 } 3142 umtxq_unlock(&uq->uq_key); 3143 umtx_key_release(&uq->uq_key); 3144 return (error); 3145 } 3146 3147 /* 3148 * Signal a userland semaphore. 3149 */ 3150 static int 3151 do_sem_wake(struct thread *td, struct _usem *sem) 3152 { 3153 struct umtx_key key; 3154 int error, cnt; 3155 uint32_t flags; 3156 3157 error = fueword32(&sem->_flags, &flags); 3158 if (error == -1) 3159 return (EFAULT); 3160 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3161 return (error); 3162 umtxq_lock(&key); 3163 umtxq_busy(&key); 3164 cnt = umtxq_count(&key); 3165 if (cnt > 0) { 3166 /* 3167 * Check if count is greater than 0, this means the memory is 3168 * still being referenced by user code, so we can safely 3169 * update _has_waiters flag. 3170 */ 3171 if (cnt == 1) { 3172 umtxq_unlock(&key); 3173 error = suword32(&sem->_has_waiters, 0); 3174 umtxq_lock(&key); 3175 if (error == -1) 3176 error = EFAULT; 3177 } 3178 umtxq_signal(&key, 1); 3179 } 3180 umtxq_unbusy(&key); 3181 umtxq_unlock(&key); 3182 umtx_key_release(&key); 3183 return (error); 3184 } 3185 #endif 3186 3187 static int 3188 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3189 { 3190 struct abs_timeout timo; 3191 struct umtx_q *uq; 3192 uint32_t count, flags; 3193 int error, rv; 3194 3195 uq = td->td_umtxq; 3196 flags = fuword32(&sem->_flags); 3197 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3198 if (error != 0) 3199 return (error); 3200 3201 if (timeout != NULL) 3202 abs_timeout_init2(&timo, timeout); 3203 3204 umtxq_lock(&uq->uq_key); 3205 umtxq_busy(&uq->uq_key); 3206 umtxq_insert(uq); 3207 umtxq_unlock(&uq->uq_key); 3208 rv = fueword32(&sem->_count, &count); 3209 if (rv == -1) { 3210 umtxq_lock(&uq->uq_key); 3211 umtxq_unbusy(&uq->uq_key); 3212 umtxq_remove(uq); 3213 umtxq_unlock(&uq->uq_key); 3214 umtx_key_release(&uq->uq_key); 3215 return (EFAULT); 3216 } 3217 for (;;) { 3218 if (USEM_COUNT(count) != 0) { 3219 umtxq_lock(&uq->uq_key); 3220 umtxq_unbusy(&uq->uq_key); 3221 umtxq_remove(uq); 3222 umtxq_unlock(&uq->uq_key); 3223 umtx_key_release(&uq->uq_key); 3224 return (0); 3225 } 3226 if (count == USEM_HAS_WAITERS) 3227 break; 3228 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3229 if (rv == -1) { 3230 umtxq_lock(&uq->uq_key); 3231 umtxq_unbusy(&uq->uq_key); 3232 umtxq_remove(uq); 3233 umtxq_unlock(&uq->uq_key); 3234 umtx_key_release(&uq->uq_key); 3235 return (EFAULT); 3236 } 3237 if (count == 0) 3238 break; 3239 } 3240 umtxq_lock(&uq->uq_key); 3241 umtxq_unbusy(&uq->uq_key); 3242 3243 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3244 3245 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3246 error = 0; 3247 else { 3248 umtxq_remove(uq); 3249 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3250 /* A relative timeout cannot be restarted. */ 3251 if (error == ERESTART) 3252 error = EINTR; 3253 if (error == EINTR) { 3254 abs_timeout_update(&timo); 3255 timespecsub(&timo.end, &timo.cur, 3256 &timeout->_timeout); 3257 } 3258 } 3259 } 3260 umtxq_unlock(&uq->uq_key); 3261 umtx_key_release(&uq->uq_key); 3262 return (error); 3263 } 3264 3265 /* 3266 * Signal a userland semaphore. 3267 */ 3268 static int 3269 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3270 { 3271 struct umtx_key key; 3272 int error, cnt, rv; 3273 uint32_t count, flags; 3274 3275 rv = fueword32(&sem->_flags, &flags); 3276 if (rv == -1) 3277 return (EFAULT); 3278 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3279 return (error); 3280 umtxq_lock(&key); 3281 umtxq_busy(&key); 3282 cnt = umtxq_count(&key); 3283 if (cnt > 0) { 3284 /* 3285 * If this was the last sleeping thread, clear the waiters 3286 * flag in _count. 3287 */ 3288 if (cnt == 1) { 3289 umtxq_unlock(&key); 3290 rv = fueword32(&sem->_count, &count); 3291 while (rv != -1 && count & USEM_HAS_WAITERS) 3292 rv = casueword32(&sem->_count, count, &count, 3293 count & ~USEM_HAS_WAITERS); 3294 if (rv == -1) 3295 error = EFAULT; 3296 umtxq_lock(&key); 3297 } 3298 3299 umtxq_signal(&key, 1); 3300 } 3301 umtxq_unbusy(&key); 3302 umtxq_unlock(&key); 3303 umtx_key_release(&key); 3304 return (error); 3305 } 3306 3307 inline int 3308 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3309 { 3310 int error; 3311 3312 error = copyin(addr, tsp, sizeof(struct timespec)); 3313 if (error == 0) { 3314 if (tsp->tv_sec < 0 || 3315 tsp->tv_nsec >= 1000000000 || 3316 tsp->tv_nsec < 0) 3317 error = EINVAL; 3318 } 3319 return (error); 3320 } 3321 3322 static inline int 3323 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3324 { 3325 int error; 3326 3327 if (size <= sizeof(struct timespec)) { 3328 tp->_clockid = CLOCK_REALTIME; 3329 tp->_flags = 0; 3330 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3331 } else 3332 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3333 if (error != 0) 3334 return (error); 3335 if (tp->_timeout.tv_sec < 0 || 3336 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3337 return (EINVAL); 3338 return (0); 3339 } 3340 3341 static int 3342 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3343 { 3344 3345 return (EOPNOTSUPP); 3346 } 3347 3348 static int 3349 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3350 { 3351 struct _umtx_time timeout, *tm_p; 3352 int error; 3353 3354 if (uap->uaddr2 == NULL) 3355 tm_p = NULL; 3356 else { 3357 error = umtx_copyin_umtx_time( 3358 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3359 if (error != 0) 3360 return (error); 3361 tm_p = &timeout; 3362 } 3363 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3364 } 3365 3366 static int 3367 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3368 { 3369 struct _umtx_time timeout, *tm_p; 3370 int error; 3371 3372 if (uap->uaddr2 == NULL) 3373 tm_p = NULL; 3374 else { 3375 error = umtx_copyin_umtx_time( 3376 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3377 if (error != 0) 3378 return (error); 3379 tm_p = &timeout; 3380 } 3381 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3382 } 3383 3384 static int 3385 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3386 { 3387 struct _umtx_time *tm_p, timeout; 3388 int error; 3389 3390 if (uap->uaddr2 == NULL) 3391 tm_p = NULL; 3392 else { 3393 error = umtx_copyin_umtx_time( 3394 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3395 if (error != 0) 3396 return (error); 3397 tm_p = &timeout; 3398 } 3399 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3400 } 3401 3402 static int 3403 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3404 { 3405 3406 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3407 } 3408 3409 #define BATCH_SIZE 128 3410 static int 3411 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3412 { 3413 char *uaddrs[BATCH_SIZE], **upp; 3414 int count, error, i, pos, tocopy; 3415 3416 upp = (char **)uap->obj; 3417 error = 0; 3418 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3419 pos += tocopy) { 3420 tocopy = MIN(count, BATCH_SIZE); 3421 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3422 if (error != 0) 3423 break; 3424 for (i = 0; i < tocopy; ++i) 3425 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3426 maybe_yield(); 3427 } 3428 return (error); 3429 } 3430 3431 static int 3432 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3433 { 3434 3435 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3436 } 3437 3438 static int 3439 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3440 { 3441 struct _umtx_time *tm_p, timeout; 3442 int error; 3443 3444 /* Allow a null timespec (wait forever). */ 3445 if (uap->uaddr2 == NULL) 3446 tm_p = NULL; 3447 else { 3448 error = umtx_copyin_umtx_time( 3449 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3450 if (error != 0) 3451 return (error); 3452 tm_p = &timeout; 3453 } 3454 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3455 } 3456 3457 static int 3458 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3459 { 3460 3461 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3462 } 3463 3464 static int 3465 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3466 { 3467 struct _umtx_time *tm_p, timeout; 3468 int error; 3469 3470 /* Allow a null timespec (wait forever). */ 3471 if (uap->uaddr2 == NULL) 3472 tm_p = NULL; 3473 else { 3474 error = umtx_copyin_umtx_time( 3475 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3476 if (error != 0) 3477 return (error); 3478 tm_p = &timeout; 3479 } 3480 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3481 } 3482 3483 static int 3484 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3485 { 3486 3487 return (do_wake_umutex(td, uap->obj)); 3488 } 3489 3490 static int 3491 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3492 { 3493 3494 return (do_unlock_umutex(td, uap->obj, false)); 3495 } 3496 3497 static int 3498 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3499 { 3500 3501 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3502 } 3503 3504 static int 3505 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3506 { 3507 struct timespec *ts, timeout; 3508 int error; 3509 3510 /* Allow a null timespec (wait forever). */ 3511 if (uap->uaddr2 == NULL) 3512 ts = NULL; 3513 else { 3514 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3515 if (error != 0) 3516 return (error); 3517 ts = &timeout; 3518 } 3519 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3520 } 3521 3522 static int 3523 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3524 { 3525 3526 return (do_cv_signal(td, uap->obj)); 3527 } 3528 3529 static int 3530 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3531 { 3532 3533 return (do_cv_broadcast(td, uap->obj)); 3534 } 3535 3536 static int 3537 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3538 { 3539 struct _umtx_time timeout; 3540 int error; 3541 3542 /* Allow a null timespec (wait forever). */ 3543 if (uap->uaddr2 == NULL) { 3544 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3545 } else { 3546 error = umtx_copyin_umtx_time(uap->uaddr2, 3547 (size_t)uap->uaddr1, &timeout); 3548 if (error != 0) 3549 return (error); 3550 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3551 } 3552 return (error); 3553 } 3554 3555 static int 3556 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3557 { 3558 struct _umtx_time timeout; 3559 int error; 3560 3561 /* Allow a null timespec (wait forever). */ 3562 if (uap->uaddr2 == NULL) { 3563 error = do_rw_wrlock(td, uap->obj, 0); 3564 } else { 3565 error = umtx_copyin_umtx_time(uap->uaddr2, 3566 (size_t)uap->uaddr1, &timeout); 3567 if (error != 0) 3568 return (error); 3569 3570 error = do_rw_wrlock(td, uap->obj, &timeout); 3571 } 3572 return (error); 3573 } 3574 3575 static int 3576 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3577 { 3578 3579 return (do_rw_unlock(td, uap->obj)); 3580 } 3581 3582 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3583 static int 3584 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3585 { 3586 struct _umtx_time *tm_p, timeout; 3587 int error; 3588 3589 /* Allow a null timespec (wait forever). */ 3590 if (uap->uaddr2 == NULL) 3591 tm_p = NULL; 3592 else { 3593 error = umtx_copyin_umtx_time( 3594 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3595 if (error != 0) 3596 return (error); 3597 tm_p = &timeout; 3598 } 3599 return (do_sem_wait(td, uap->obj, tm_p)); 3600 } 3601 3602 static int 3603 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3604 { 3605 3606 return (do_sem_wake(td, uap->obj)); 3607 } 3608 #endif 3609 3610 static int 3611 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3612 { 3613 3614 return (do_wake2_umutex(td, uap->obj, uap->val)); 3615 } 3616 3617 static int 3618 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3619 { 3620 struct _umtx_time *tm_p, timeout; 3621 size_t uasize; 3622 int error; 3623 3624 /* Allow a null timespec (wait forever). */ 3625 if (uap->uaddr2 == NULL) { 3626 uasize = 0; 3627 tm_p = NULL; 3628 } else { 3629 uasize = (size_t)uap->uaddr1; 3630 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3631 if (error != 0) 3632 return (error); 3633 tm_p = &timeout; 3634 } 3635 error = do_sem2_wait(td, uap->obj, tm_p); 3636 if (error == EINTR && uap->uaddr2 != NULL && 3637 (timeout._flags & UMTX_ABSTIME) == 0 && 3638 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3639 error = copyout(&timeout._timeout, 3640 (struct _umtx_time *)uap->uaddr2 + 1, 3641 sizeof(struct timespec)); 3642 if (error == 0) { 3643 error = EINTR; 3644 } 3645 } 3646 3647 return (error); 3648 } 3649 3650 static int 3651 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3652 { 3653 3654 return (do_sem2_wake(td, uap->obj)); 3655 } 3656 3657 #define USHM_OBJ_UMTX(o) \ 3658 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3659 3660 #define USHMF_REG_LINKED 0x0001 3661 #define USHMF_OBJ_LINKED 0x0002 3662 struct umtx_shm_reg { 3663 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3664 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3665 struct umtx_key ushm_key; 3666 struct ucred *ushm_cred; 3667 struct shmfd *ushm_obj; 3668 u_int ushm_refcnt; 3669 u_int ushm_flags; 3670 }; 3671 3672 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3673 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3674 3675 static uma_zone_t umtx_shm_reg_zone; 3676 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3677 static struct mtx umtx_shm_lock; 3678 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3679 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3680 3681 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3682 3683 static void 3684 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3685 { 3686 struct umtx_shm_reg_head d; 3687 struct umtx_shm_reg *reg, *reg1; 3688 3689 TAILQ_INIT(&d); 3690 mtx_lock(&umtx_shm_lock); 3691 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3692 mtx_unlock(&umtx_shm_lock); 3693 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3694 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3695 umtx_shm_free_reg(reg); 3696 } 3697 } 3698 3699 static struct task umtx_shm_reg_delfree_task = 3700 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3701 3702 static struct umtx_shm_reg * 3703 umtx_shm_find_reg_locked(const struct umtx_key *key) 3704 { 3705 struct umtx_shm_reg *reg; 3706 struct umtx_shm_reg_head *reg_head; 3707 3708 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3709 mtx_assert(&umtx_shm_lock, MA_OWNED); 3710 reg_head = &umtx_shm_registry[key->hash]; 3711 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3712 KASSERT(reg->ushm_key.shared, 3713 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3714 if (reg->ushm_key.info.shared.object == 3715 key->info.shared.object && 3716 reg->ushm_key.info.shared.offset == 3717 key->info.shared.offset) { 3718 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3719 KASSERT(reg->ushm_refcnt > 0, 3720 ("reg %p refcnt 0 onlist", reg)); 3721 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3722 ("reg %p not linked", reg)); 3723 reg->ushm_refcnt++; 3724 return (reg); 3725 } 3726 } 3727 return (NULL); 3728 } 3729 3730 static struct umtx_shm_reg * 3731 umtx_shm_find_reg(const struct umtx_key *key) 3732 { 3733 struct umtx_shm_reg *reg; 3734 3735 mtx_lock(&umtx_shm_lock); 3736 reg = umtx_shm_find_reg_locked(key); 3737 mtx_unlock(&umtx_shm_lock); 3738 return (reg); 3739 } 3740 3741 static void 3742 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3743 { 3744 3745 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3746 crfree(reg->ushm_cred); 3747 shm_drop(reg->ushm_obj); 3748 uma_zfree(umtx_shm_reg_zone, reg); 3749 } 3750 3751 static bool 3752 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3753 { 3754 bool res; 3755 3756 mtx_assert(&umtx_shm_lock, MA_OWNED); 3757 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3758 reg->ushm_refcnt--; 3759 res = reg->ushm_refcnt == 0; 3760 if (res || force) { 3761 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3762 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3763 reg, ushm_reg_link); 3764 reg->ushm_flags &= ~USHMF_REG_LINKED; 3765 } 3766 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3767 LIST_REMOVE(reg, ushm_obj_link); 3768 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3769 } 3770 } 3771 return (res); 3772 } 3773 3774 static void 3775 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3776 { 3777 vm_object_t object; 3778 bool dofree; 3779 3780 if (force) { 3781 object = reg->ushm_obj->shm_object; 3782 VM_OBJECT_WLOCK(object); 3783 object->flags |= OBJ_UMTXDEAD; 3784 VM_OBJECT_WUNLOCK(object); 3785 } 3786 mtx_lock(&umtx_shm_lock); 3787 dofree = umtx_shm_unref_reg_locked(reg, force); 3788 mtx_unlock(&umtx_shm_lock); 3789 if (dofree) 3790 umtx_shm_free_reg(reg); 3791 } 3792 3793 void 3794 umtx_shm_object_init(vm_object_t object) 3795 { 3796 3797 LIST_INIT(USHM_OBJ_UMTX(object)); 3798 } 3799 3800 void 3801 umtx_shm_object_terminated(vm_object_t object) 3802 { 3803 struct umtx_shm_reg *reg, *reg1; 3804 bool dofree; 3805 3806 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3807 return; 3808 3809 dofree = false; 3810 mtx_lock(&umtx_shm_lock); 3811 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3812 if (umtx_shm_unref_reg_locked(reg, true)) { 3813 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3814 ushm_reg_link); 3815 dofree = true; 3816 } 3817 } 3818 mtx_unlock(&umtx_shm_lock); 3819 if (dofree) 3820 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3821 } 3822 3823 static int 3824 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3825 struct umtx_shm_reg **res) 3826 { 3827 struct umtx_shm_reg *reg, *reg1; 3828 struct ucred *cred; 3829 int error; 3830 3831 reg = umtx_shm_find_reg(key); 3832 if (reg != NULL) { 3833 *res = reg; 3834 return (0); 3835 } 3836 cred = td->td_ucred; 3837 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3838 return (ENOMEM); 3839 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3840 reg->ushm_refcnt = 1; 3841 bcopy(key, ®->ushm_key, sizeof(*key)); 3842 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3843 reg->ushm_cred = crhold(cred); 3844 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3845 if (error != 0) { 3846 umtx_shm_free_reg(reg); 3847 return (error); 3848 } 3849 mtx_lock(&umtx_shm_lock); 3850 reg1 = umtx_shm_find_reg_locked(key); 3851 if (reg1 != NULL) { 3852 mtx_unlock(&umtx_shm_lock); 3853 umtx_shm_free_reg(reg); 3854 *res = reg1; 3855 return (0); 3856 } 3857 reg->ushm_refcnt++; 3858 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3859 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3860 ushm_obj_link); 3861 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3862 mtx_unlock(&umtx_shm_lock); 3863 *res = reg; 3864 return (0); 3865 } 3866 3867 static int 3868 umtx_shm_alive(struct thread *td, void *addr) 3869 { 3870 vm_map_t map; 3871 vm_map_entry_t entry; 3872 vm_object_t object; 3873 vm_pindex_t pindex; 3874 vm_prot_t prot; 3875 int res, ret; 3876 boolean_t wired; 3877 3878 map = &td->td_proc->p_vmspace->vm_map; 3879 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3880 &object, &pindex, &prot, &wired); 3881 if (res != KERN_SUCCESS) 3882 return (EFAULT); 3883 if (object == NULL) 3884 ret = EINVAL; 3885 else 3886 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3887 vm_map_lookup_done(map, entry); 3888 return (ret); 3889 } 3890 3891 static void 3892 umtx_shm_init(void) 3893 { 3894 int i; 3895 3896 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3897 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3898 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3899 for (i = 0; i < nitems(umtx_shm_registry); i++) 3900 TAILQ_INIT(&umtx_shm_registry[i]); 3901 } 3902 3903 static int 3904 umtx_shm(struct thread *td, void *addr, u_int flags) 3905 { 3906 struct umtx_key key; 3907 struct umtx_shm_reg *reg; 3908 struct file *fp; 3909 int error, fd; 3910 3911 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3912 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3913 return (EINVAL); 3914 if ((flags & UMTX_SHM_ALIVE) != 0) 3915 return (umtx_shm_alive(td, addr)); 3916 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3917 if (error != 0) 3918 return (error); 3919 KASSERT(key.shared == 1, ("non-shared key")); 3920 if ((flags & UMTX_SHM_CREAT) != 0) { 3921 error = umtx_shm_create_reg(td, &key, ®); 3922 } else { 3923 reg = umtx_shm_find_reg(&key); 3924 if (reg == NULL) 3925 error = ESRCH; 3926 } 3927 umtx_key_release(&key); 3928 if (error != 0) 3929 return (error); 3930 KASSERT(reg != NULL, ("no reg")); 3931 if ((flags & UMTX_SHM_DESTROY) != 0) { 3932 umtx_shm_unref_reg(reg, true); 3933 } else { 3934 #if 0 3935 #ifdef MAC 3936 error = mac_posixshm_check_open(td->td_ucred, 3937 reg->ushm_obj, FFLAGS(O_RDWR)); 3938 if (error == 0) 3939 #endif 3940 error = shm_access(reg->ushm_obj, td->td_ucred, 3941 FFLAGS(O_RDWR)); 3942 if (error == 0) 3943 #endif 3944 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3945 if (error == 0) { 3946 shm_hold(reg->ushm_obj); 3947 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3948 &shm_ops); 3949 td->td_retval[0] = fd; 3950 fdrop(fp, td); 3951 } 3952 } 3953 umtx_shm_unref_reg(reg, false); 3954 return (error); 3955 } 3956 3957 static int 3958 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3959 { 3960 3961 return (umtx_shm(td, uap->uaddr1, uap->val)); 3962 } 3963 3964 static int 3965 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3966 { 3967 3968 td->td_rb_list = rbp->robust_list_offset; 3969 td->td_rbp_list = rbp->robust_priv_list_offset; 3970 td->td_rb_inact = rbp->robust_inact_offset; 3971 return (0); 3972 } 3973 3974 static int 3975 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3976 { 3977 struct umtx_robust_lists_params rb; 3978 int error; 3979 3980 if (uap->val > sizeof(rb)) 3981 return (EINVAL); 3982 bzero(&rb, sizeof(rb)); 3983 error = copyin(uap->uaddr1, &rb, uap->val); 3984 if (error != 0) 3985 return (error); 3986 return (umtx_robust_lists(td, &rb)); 3987 } 3988 3989 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3990 3991 static const _umtx_op_func op_table[] = { 3992 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3993 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3994 [UMTX_OP_WAIT] = __umtx_op_wait, 3995 [UMTX_OP_WAKE] = __umtx_op_wake, 3996 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3997 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3998 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3999 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4000 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4001 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4002 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4003 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4004 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4005 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4006 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4007 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4008 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4009 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4010 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4011 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4012 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4013 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4014 #else 4015 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4016 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4017 #endif 4018 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4019 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4020 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4021 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4022 [UMTX_OP_SHM] = __umtx_op_shm, 4023 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4024 }; 4025 4026 int 4027 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4028 { 4029 4030 if ((unsigned)uap->op < nitems(op_table)) 4031 return (*op_table[uap->op])(td, uap); 4032 return (EINVAL); 4033 } 4034 4035 #ifdef COMPAT_FREEBSD32 4036 4037 struct timespec32 { 4038 int32_t tv_sec; 4039 int32_t tv_nsec; 4040 }; 4041 4042 struct umtx_time32 { 4043 struct timespec32 timeout; 4044 uint32_t flags; 4045 uint32_t clockid; 4046 }; 4047 4048 static inline int 4049 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4050 { 4051 struct timespec32 ts32; 4052 int error; 4053 4054 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4055 if (error == 0) { 4056 if (ts32.tv_sec < 0 || 4057 ts32.tv_nsec >= 1000000000 || 4058 ts32.tv_nsec < 0) 4059 error = EINVAL; 4060 else { 4061 tsp->tv_sec = ts32.tv_sec; 4062 tsp->tv_nsec = ts32.tv_nsec; 4063 } 4064 } 4065 return (error); 4066 } 4067 4068 static inline int 4069 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4070 { 4071 struct umtx_time32 t32; 4072 int error; 4073 4074 t32.clockid = CLOCK_REALTIME; 4075 t32.flags = 0; 4076 if (size <= sizeof(struct timespec32)) 4077 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4078 else 4079 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4080 if (error != 0) 4081 return (error); 4082 if (t32.timeout.tv_sec < 0 || 4083 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4084 return (EINVAL); 4085 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4086 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4087 tp->_flags = t32.flags; 4088 tp->_clockid = t32.clockid; 4089 return (0); 4090 } 4091 4092 static int 4093 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4094 { 4095 struct _umtx_time *tm_p, timeout; 4096 int error; 4097 4098 if (uap->uaddr2 == NULL) 4099 tm_p = NULL; 4100 else { 4101 error = umtx_copyin_umtx_time32(uap->uaddr2, 4102 (size_t)uap->uaddr1, &timeout); 4103 if (error != 0) 4104 return (error); 4105 tm_p = &timeout; 4106 } 4107 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4108 } 4109 4110 static int 4111 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4112 { 4113 struct _umtx_time *tm_p, timeout; 4114 int error; 4115 4116 /* Allow a null timespec (wait forever). */ 4117 if (uap->uaddr2 == NULL) 4118 tm_p = NULL; 4119 else { 4120 error = umtx_copyin_umtx_time32(uap->uaddr2, 4121 (size_t)uap->uaddr1, &timeout); 4122 if (error != 0) 4123 return (error); 4124 tm_p = &timeout; 4125 } 4126 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4127 } 4128 4129 static int 4130 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4131 { 4132 struct _umtx_time *tm_p, timeout; 4133 int error; 4134 4135 /* Allow a null timespec (wait forever). */ 4136 if (uap->uaddr2 == NULL) 4137 tm_p = NULL; 4138 else { 4139 error = umtx_copyin_umtx_time32(uap->uaddr2, 4140 (size_t)uap->uaddr1, &timeout); 4141 if (error != 0) 4142 return (error); 4143 tm_p = &timeout; 4144 } 4145 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4146 } 4147 4148 static int 4149 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4150 { 4151 struct timespec *ts, timeout; 4152 int error; 4153 4154 /* Allow a null timespec (wait forever). */ 4155 if (uap->uaddr2 == NULL) 4156 ts = NULL; 4157 else { 4158 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4159 if (error != 0) 4160 return (error); 4161 ts = &timeout; 4162 } 4163 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4164 } 4165 4166 static int 4167 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4168 { 4169 struct _umtx_time timeout; 4170 int error; 4171 4172 /* Allow a null timespec (wait forever). */ 4173 if (uap->uaddr2 == NULL) { 4174 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4175 } else { 4176 error = umtx_copyin_umtx_time32(uap->uaddr2, 4177 (size_t)uap->uaddr1, &timeout); 4178 if (error != 0) 4179 return (error); 4180 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4181 } 4182 return (error); 4183 } 4184 4185 static int 4186 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4187 { 4188 struct _umtx_time timeout; 4189 int error; 4190 4191 /* Allow a null timespec (wait forever). */ 4192 if (uap->uaddr2 == NULL) { 4193 error = do_rw_wrlock(td, uap->obj, 0); 4194 } else { 4195 error = umtx_copyin_umtx_time32(uap->uaddr2, 4196 (size_t)uap->uaddr1, &timeout); 4197 if (error != 0) 4198 return (error); 4199 error = do_rw_wrlock(td, uap->obj, &timeout); 4200 } 4201 return (error); 4202 } 4203 4204 static int 4205 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4206 { 4207 struct _umtx_time *tm_p, timeout; 4208 int error; 4209 4210 if (uap->uaddr2 == NULL) 4211 tm_p = NULL; 4212 else { 4213 error = umtx_copyin_umtx_time32( 4214 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4215 if (error != 0) 4216 return (error); 4217 tm_p = &timeout; 4218 } 4219 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4220 } 4221 4222 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4223 static int 4224 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4225 { 4226 struct _umtx_time *tm_p, timeout; 4227 int error; 4228 4229 /* Allow a null timespec (wait forever). */ 4230 if (uap->uaddr2 == NULL) 4231 tm_p = NULL; 4232 else { 4233 error = umtx_copyin_umtx_time32(uap->uaddr2, 4234 (size_t)uap->uaddr1, &timeout); 4235 if (error != 0) 4236 return (error); 4237 tm_p = &timeout; 4238 } 4239 return (do_sem_wait(td, uap->obj, tm_p)); 4240 } 4241 #endif 4242 4243 static int 4244 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4245 { 4246 struct _umtx_time *tm_p, timeout; 4247 size_t uasize; 4248 int error; 4249 4250 /* Allow a null timespec (wait forever). */ 4251 if (uap->uaddr2 == NULL) { 4252 uasize = 0; 4253 tm_p = NULL; 4254 } else { 4255 uasize = (size_t)uap->uaddr1; 4256 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4257 if (error != 0) 4258 return (error); 4259 tm_p = &timeout; 4260 } 4261 error = do_sem2_wait(td, uap->obj, tm_p); 4262 if (error == EINTR && uap->uaddr2 != NULL && 4263 (timeout._flags & UMTX_ABSTIME) == 0 && 4264 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4265 struct timespec32 remain32 = { 4266 .tv_sec = timeout._timeout.tv_sec, 4267 .tv_nsec = timeout._timeout.tv_nsec 4268 }; 4269 error = copyout(&remain32, 4270 (struct umtx_time32 *)uap->uaddr2 + 1, 4271 sizeof(struct timespec32)); 4272 if (error == 0) { 4273 error = EINTR; 4274 } 4275 } 4276 4277 return (error); 4278 } 4279 4280 static int 4281 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4282 { 4283 uint32_t uaddrs[BATCH_SIZE], **upp; 4284 int count, error, i, pos, tocopy; 4285 4286 upp = (uint32_t **)uap->obj; 4287 error = 0; 4288 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4289 pos += tocopy) { 4290 tocopy = MIN(count, BATCH_SIZE); 4291 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4292 if (error != 0) 4293 break; 4294 for (i = 0; i < tocopy; ++i) 4295 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4296 INT_MAX, 1); 4297 maybe_yield(); 4298 } 4299 return (error); 4300 } 4301 4302 struct umtx_robust_lists_params_compat32 { 4303 uint32_t robust_list_offset; 4304 uint32_t robust_priv_list_offset; 4305 uint32_t robust_inact_offset; 4306 }; 4307 4308 static int 4309 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4310 { 4311 struct umtx_robust_lists_params rb; 4312 struct umtx_robust_lists_params_compat32 rb32; 4313 int error; 4314 4315 if (uap->val > sizeof(rb32)) 4316 return (EINVAL); 4317 bzero(&rb, sizeof(rb)); 4318 bzero(&rb32, sizeof(rb32)); 4319 error = copyin(uap->uaddr1, &rb32, uap->val); 4320 if (error != 0) 4321 return (error); 4322 rb.robust_list_offset = rb32.robust_list_offset; 4323 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4324 rb.robust_inact_offset = rb32.robust_inact_offset; 4325 return (umtx_robust_lists(td, &rb)); 4326 } 4327 4328 static const _umtx_op_func op_table_compat32[] = { 4329 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4330 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4331 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4332 [UMTX_OP_WAKE] = __umtx_op_wake, 4333 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4334 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4335 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4336 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4337 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4338 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4339 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4340 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4341 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4342 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4343 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4344 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4345 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4346 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4347 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4348 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4349 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4350 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4351 #else 4352 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4353 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4354 #endif 4355 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4356 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4357 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4358 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4359 [UMTX_OP_SHM] = __umtx_op_shm, 4360 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4361 }; 4362 4363 int 4364 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4365 { 4366 4367 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4368 return (*op_table_compat32[uap->op])(td, 4369 (struct _umtx_op_args *)uap); 4370 } 4371 return (EINVAL); 4372 } 4373 #endif 4374 4375 void 4376 umtx_thread_init(struct thread *td) 4377 { 4378 4379 td->td_umtxq = umtxq_alloc(); 4380 td->td_umtxq->uq_thread = td; 4381 } 4382 4383 void 4384 umtx_thread_fini(struct thread *td) 4385 { 4386 4387 umtxq_free(td->td_umtxq); 4388 } 4389 4390 /* 4391 * It will be called when new thread is created, e.g fork(). 4392 */ 4393 void 4394 umtx_thread_alloc(struct thread *td) 4395 { 4396 struct umtx_q *uq; 4397 4398 uq = td->td_umtxq; 4399 uq->uq_inherited_pri = PRI_MAX; 4400 4401 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4402 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4403 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4404 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4405 } 4406 4407 /* 4408 * exec() hook. 4409 * 4410 * Clear robust lists for all process' threads, not delaying the 4411 * cleanup to thread_exit hook, since the relevant address space is 4412 * destroyed right now. 4413 */ 4414 static void 4415 umtx_exec_hook(void *arg __unused, struct proc *p, 4416 struct image_params *imgp __unused) 4417 { 4418 struct thread *td; 4419 4420 KASSERT(p == curproc, ("need curproc")); 4421 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4422 (p->p_flag & P_STOPPED_SINGLE) != 0, 4423 ("curproc must be single-threaded")); 4424 /* 4425 * There is no need to lock the list as only this thread can be 4426 * running. 4427 */ 4428 FOREACH_THREAD_IN_PROC(p, td) { 4429 KASSERT(td == curthread || 4430 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4431 ("running thread %p %p", p, td)); 4432 umtx_thread_cleanup(td); 4433 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4434 } 4435 } 4436 4437 /* 4438 * thread_exit() hook. 4439 */ 4440 void 4441 umtx_thread_exit(struct thread *td) 4442 { 4443 4444 umtx_thread_cleanup(td); 4445 } 4446 4447 static int 4448 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4449 { 4450 u_long res1; 4451 #ifdef COMPAT_FREEBSD32 4452 uint32_t res32; 4453 #endif 4454 int error; 4455 4456 #ifdef COMPAT_FREEBSD32 4457 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4458 error = fueword32((void *)ptr, &res32); 4459 if (error == 0) 4460 res1 = res32; 4461 } else 4462 #endif 4463 { 4464 error = fueword((void *)ptr, &res1); 4465 } 4466 if (error == 0) 4467 *res = res1; 4468 else 4469 error = EFAULT; 4470 return (error); 4471 } 4472 4473 static void 4474 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4475 { 4476 #ifdef COMPAT_FREEBSD32 4477 struct umutex32 m32; 4478 4479 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4480 memcpy(&m32, m, sizeof(m32)); 4481 *rb_list = m32.m_rb_lnk; 4482 } else 4483 #endif 4484 *rb_list = m->m_rb_lnk; 4485 } 4486 4487 static int 4488 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4489 { 4490 struct umutex m; 4491 int error; 4492 4493 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4494 error = copyin((void *)rbp, &m, sizeof(m)); 4495 if (error != 0) 4496 return (error); 4497 if (rb_list != NULL) 4498 umtx_read_rb_list(td, &m, rb_list); 4499 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4500 return (EINVAL); 4501 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4502 /* inact is cleared after unlock, allow the inconsistency */ 4503 return (inact ? 0 : EINVAL); 4504 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4505 } 4506 4507 static void 4508 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4509 const char *name) 4510 { 4511 int error, i; 4512 uintptr_t rbp; 4513 bool inact; 4514 4515 if (rb_list == 0) 4516 return; 4517 error = umtx_read_uptr(td, rb_list, &rbp); 4518 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4519 if (rbp == *rb_inact) { 4520 inact = true; 4521 *rb_inact = 0; 4522 } else 4523 inact = false; 4524 error = umtx_handle_rb(td, rbp, &rbp, inact); 4525 } 4526 if (i == umtx_max_rb && umtx_verbose_rb) { 4527 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4528 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4529 } 4530 if (error != 0 && umtx_verbose_rb) { 4531 uprintf("comm %s pid %d: handling %srb error %d\n", 4532 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4533 } 4534 } 4535 4536 /* 4537 * Clean up umtx data. 4538 */ 4539 static void 4540 umtx_thread_cleanup(struct thread *td) 4541 { 4542 struct umtx_q *uq; 4543 struct umtx_pi *pi; 4544 uintptr_t rb_inact; 4545 4546 /* 4547 * Disown pi mutexes. 4548 */ 4549 uq = td->td_umtxq; 4550 if (uq != NULL) { 4551 if (uq->uq_inherited_pri != PRI_MAX || 4552 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4553 mtx_lock(&umtx_lock); 4554 uq->uq_inherited_pri = PRI_MAX; 4555 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4556 pi->pi_owner = NULL; 4557 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4558 } 4559 mtx_unlock(&umtx_lock); 4560 } 4561 sched_lend_user_prio_cond(td, PRI_MAX); 4562 } 4563 4564 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4565 return; 4566 4567 /* 4568 * Handle terminated robust mutexes. Must be done after 4569 * robust pi disown, otherwise unlock could see unowned 4570 * entries. 4571 */ 4572 rb_inact = td->td_rb_inact; 4573 if (rb_inact != 0) 4574 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4575 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4576 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4577 if (rb_inact != 0) 4578 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4579 } 4580