1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_chain *uc; 666 struct umtxq_queue *uh; 667 668 uc = umtxq_getchain(key); 669 UMTXQ_LOCKED_ASSERT(uc); 670 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 671 if (uh != NULL) 672 return (uh->length); 673 return (0); 674 } 675 676 /* 677 * Check if there are multiple PI waiters and returns first 678 * waiter. 679 */ 680 static int 681 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 682 { 683 struct umtxq_chain *uc; 684 struct umtxq_queue *uh; 685 686 *first = NULL; 687 uc = umtxq_getchain(key); 688 UMTXQ_LOCKED_ASSERT(uc); 689 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 690 if (uh != NULL) { 691 *first = TAILQ_FIRST(&uh->head); 692 return (uh->length); 693 } 694 return (0); 695 } 696 697 static int 698 umtxq_check_susp(struct thread *td) 699 { 700 struct proc *p; 701 int error; 702 703 /* 704 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 705 * eventually break the lockstep loop. 706 */ 707 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 708 return (0); 709 error = 0; 710 p = td->td_proc; 711 PROC_LOCK(p); 712 if (P_SHOULDSTOP(p) || 713 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 714 if (p->p_flag & P_SINGLE_EXIT) 715 error = EINTR; 716 else 717 error = ERESTART; 718 } 719 PROC_UNLOCK(p); 720 return (error); 721 } 722 723 /* 724 * Wake up threads waiting on an userland object. 725 */ 726 727 static int 728 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 729 { 730 struct umtxq_chain *uc; 731 struct umtxq_queue *uh; 732 struct umtx_q *uq; 733 int ret; 734 735 ret = 0; 736 uc = umtxq_getchain(key); 737 UMTXQ_LOCKED_ASSERT(uc); 738 uh = umtxq_queue_lookup(key, q); 739 if (uh != NULL) { 740 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 741 umtxq_remove_queue(uq, q); 742 wakeup(uq); 743 if (++ret >= n_wake) 744 return (ret); 745 } 746 } 747 return (ret); 748 } 749 750 751 /* 752 * Wake up specified thread. 753 */ 754 static inline void 755 umtxq_signal_thread(struct umtx_q *uq) 756 { 757 struct umtxq_chain *uc; 758 759 uc = umtxq_getchain(&uq->uq_key); 760 UMTXQ_LOCKED_ASSERT(uc); 761 umtxq_remove(uq); 762 wakeup(uq); 763 } 764 765 static inline int 766 tstohz(const struct timespec *tsp) 767 { 768 struct timeval tv; 769 770 TIMESPEC_TO_TIMEVAL(&tv, tsp); 771 return tvtohz(&tv); 772 } 773 774 static void 775 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 776 const struct timespec *timeout) 777 { 778 779 timo->clockid = clockid; 780 if (!absolute) { 781 timo->is_abs_real = false; 782 abs_timeout_update(timo); 783 timo->end = timo->cur; 784 timespecadd(&timo->end, timeout); 785 } else { 786 timo->end = *timeout; 787 timo->is_abs_real = clockid == CLOCK_REALTIME || 788 clockid == CLOCK_REALTIME_FAST || 789 clockid == CLOCK_REALTIME_PRECISE; 790 /* 791 * If is_abs_real, umtxq_sleep will read the clock 792 * after setting td_rtcgen; otherwise, read it here. 793 */ 794 if (!timo->is_abs_real) { 795 abs_timeout_update(timo); 796 } 797 } 798 } 799 800 static void 801 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 802 { 803 804 abs_timeout_init(timo, umtxtime->_clockid, 805 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 806 } 807 808 static inline void 809 abs_timeout_update(struct abs_timeout *timo) 810 { 811 812 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 813 } 814 815 static int 816 abs_timeout_gethz(struct abs_timeout *timo) 817 { 818 struct timespec tts; 819 820 if (timespeccmp(&timo->end, &timo->cur, <=)) 821 return (-1); 822 tts = timo->end; 823 timespecsub(&tts, &timo->cur); 824 return (tstohz(&tts)); 825 } 826 827 static uint32_t 828 umtx_unlock_val(uint32_t flags, bool rb) 829 { 830 831 if (rb) 832 return (UMUTEX_RB_OWNERDEAD); 833 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 834 return (UMUTEX_RB_NOTRECOV); 835 else 836 return (UMUTEX_UNOWNED); 837 838 } 839 840 /* 841 * Put thread into sleep state, before sleeping, check if 842 * thread was removed from umtx queue. 843 */ 844 static inline int 845 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 846 { 847 struct umtxq_chain *uc; 848 int error, timo; 849 850 if (abstime != NULL && abstime->is_abs_real) { 851 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 852 abs_timeout_update(abstime); 853 } 854 855 uc = umtxq_getchain(&uq->uq_key); 856 UMTXQ_LOCKED_ASSERT(uc); 857 for (;;) { 858 if (!(uq->uq_flags & UQF_UMTXQ)) { 859 error = 0; 860 break; 861 } 862 if (abstime != NULL) { 863 timo = abs_timeout_gethz(abstime); 864 if (timo < 0) { 865 error = ETIMEDOUT; 866 break; 867 } 868 } else 869 timo = 0; 870 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 871 if (error == EINTR || error == ERESTART) { 872 umtxq_lock(&uq->uq_key); 873 break; 874 } 875 if (abstime != NULL) { 876 if (abstime->is_abs_real) 877 curthread->td_rtcgen = 878 atomic_load_acq_int(&rtc_generation); 879 abs_timeout_update(abstime); 880 } 881 umtxq_lock(&uq->uq_key); 882 } 883 884 curthread->td_rtcgen = 0; 885 return (error); 886 } 887 888 /* 889 * Convert userspace address into unique logical address. 890 */ 891 int 892 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 893 { 894 struct thread *td = curthread; 895 vm_map_t map; 896 vm_map_entry_t entry; 897 vm_pindex_t pindex; 898 vm_prot_t prot; 899 boolean_t wired; 900 901 key->type = type; 902 if (share == THREAD_SHARE) { 903 key->shared = 0; 904 key->info.private.vs = td->td_proc->p_vmspace; 905 key->info.private.addr = (uintptr_t)addr; 906 } else { 907 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 908 map = &td->td_proc->p_vmspace->vm_map; 909 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 910 &entry, &key->info.shared.object, &pindex, &prot, 911 &wired) != KERN_SUCCESS) { 912 return (EFAULT); 913 } 914 915 if ((share == PROCESS_SHARE) || 916 (share == AUTO_SHARE && 917 VM_INHERIT_SHARE == entry->inheritance)) { 918 key->shared = 1; 919 key->info.shared.offset = (vm_offset_t)addr - 920 entry->start + entry->offset; 921 vm_object_reference(key->info.shared.object); 922 } else { 923 key->shared = 0; 924 key->info.private.vs = td->td_proc->p_vmspace; 925 key->info.private.addr = (uintptr_t)addr; 926 } 927 vm_map_lookup_done(map, entry); 928 } 929 930 umtxq_hash(key); 931 return (0); 932 } 933 934 /* 935 * Release key. 936 */ 937 void 938 umtx_key_release(struct umtx_key *key) 939 { 940 if (key->shared) 941 vm_object_deallocate(key->info.shared.object); 942 } 943 944 /* 945 * Fetch and compare value, sleep on the address if value is not changed. 946 */ 947 static int 948 do_wait(struct thread *td, void *addr, u_long id, 949 struct _umtx_time *timeout, int compat32, int is_private) 950 { 951 struct abs_timeout timo; 952 struct umtx_q *uq; 953 u_long tmp; 954 uint32_t tmp32; 955 int error = 0; 956 957 uq = td->td_umtxq; 958 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 959 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 960 return (error); 961 962 if (timeout != NULL) 963 abs_timeout_init2(&timo, timeout); 964 965 umtxq_lock(&uq->uq_key); 966 umtxq_insert(uq); 967 umtxq_unlock(&uq->uq_key); 968 if (compat32 == 0) { 969 error = fueword(addr, &tmp); 970 if (error != 0) 971 error = EFAULT; 972 } else { 973 error = fueword32(addr, &tmp32); 974 if (error == 0) 975 tmp = tmp32; 976 else 977 error = EFAULT; 978 } 979 umtxq_lock(&uq->uq_key); 980 if (error == 0) { 981 if (tmp == id) 982 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 983 NULL : &timo); 984 if ((uq->uq_flags & UQF_UMTXQ) == 0) 985 error = 0; 986 else 987 umtxq_remove(uq); 988 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 989 umtxq_remove(uq); 990 } 991 umtxq_unlock(&uq->uq_key); 992 umtx_key_release(&uq->uq_key); 993 if (error == ERESTART) 994 error = EINTR; 995 return (error); 996 } 997 998 /* 999 * Wake up threads sleeping on the specified address. 1000 */ 1001 int 1002 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1003 { 1004 struct umtx_key key; 1005 int ret; 1006 1007 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1008 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1009 return (ret); 1010 umtxq_lock(&key); 1011 umtxq_signal(&key, n_wake); 1012 umtxq_unlock(&key); 1013 umtx_key_release(&key); 1014 return (0); 1015 } 1016 1017 /* 1018 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1019 */ 1020 static int 1021 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1022 struct _umtx_time *timeout, int mode) 1023 { 1024 struct abs_timeout timo; 1025 struct umtx_q *uq; 1026 uint32_t owner, old, id; 1027 int error, rv; 1028 1029 id = td->td_tid; 1030 uq = td->td_umtxq; 1031 error = 0; 1032 if (timeout != NULL) 1033 abs_timeout_init2(&timo, timeout); 1034 1035 /* 1036 * Care must be exercised when dealing with umtx structure. It 1037 * can fault on any access. 1038 */ 1039 for (;;) { 1040 rv = fueword32(&m->m_owner, &owner); 1041 if (rv == -1) 1042 return (EFAULT); 1043 if (mode == _UMUTEX_WAIT) { 1044 if (owner == UMUTEX_UNOWNED || 1045 owner == UMUTEX_CONTESTED || 1046 owner == UMUTEX_RB_OWNERDEAD || 1047 owner == UMUTEX_RB_NOTRECOV) 1048 return (0); 1049 } else { 1050 /* 1051 * Robust mutex terminated. Kernel duty is to 1052 * return EOWNERDEAD to the userspace. The 1053 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1054 * by the common userspace code. 1055 */ 1056 if (owner == UMUTEX_RB_OWNERDEAD) { 1057 rv = casueword32(&m->m_owner, 1058 UMUTEX_RB_OWNERDEAD, &owner, 1059 id | UMUTEX_CONTESTED); 1060 if (rv == -1) 1061 return (EFAULT); 1062 if (owner == UMUTEX_RB_OWNERDEAD) 1063 return (EOWNERDEAD); /* success */ 1064 rv = umtxq_check_susp(td); 1065 if (rv != 0) 1066 return (rv); 1067 continue; 1068 } 1069 if (owner == UMUTEX_RB_NOTRECOV) 1070 return (ENOTRECOVERABLE); 1071 1072 1073 /* 1074 * Try the uncontested case. This should be 1075 * done in userland. 1076 */ 1077 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1078 &owner, id); 1079 /* The address was invalid. */ 1080 if (rv == -1) 1081 return (EFAULT); 1082 1083 /* The acquire succeeded. */ 1084 if (owner == UMUTEX_UNOWNED) 1085 return (0); 1086 1087 /* 1088 * If no one owns it but it is contested try 1089 * to acquire it. 1090 */ 1091 if (owner == UMUTEX_CONTESTED) { 1092 rv = casueword32(&m->m_owner, 1093 UMUTEX_CONTESTED, &owner, 1094 id | UMUTEX_CONTESTED); 1095 /* The address was invalid. */ 1096 if (rv == -1) 1097 return (EFAULT); 1098 1099 if (owner == UMUTEX_CONTESTED) 1100 return (0); 1101 1102 rv = umtxq_check_susp(td); 1103 if (rv != 0) 1104 return (rv); 1105 1106 /* 1107 * If this failed the lock has 1108 * changed, restart. 1109 */ 1110 continue; 1111 } 1112 } 1113 1114 if (mode == _UMUTEX_TRY) 1115 return (EBUSY); 1116 1117 /* 1118 * If we caught a signal, we have retried and now 1119 * exit immediately. 1120 */ 1121 if (error != 0) 1122 return (error); 1123 1124 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1125 GET_SHARE(flags), &uq->uq_key)) != 0) 1126 return (error); 1127 1128 umtxq_lock(&uq->uq_key); 1129 umtxq_busy(&uq->uq_key); 1130 umtxq_insert(uq); 1131 umtxq_unlock(&uq->uq_key); 1132 1133 /* 1134 * Set the contested bit so that a release in user space 1135 * knows to use the system call for unlock. If this fails 1136 * either some one else has acquired the lock or it has been 1137 * released. 1138 */ 1139 rv = casueword32(&m->m_owner, owner, &old, 1140 owner | UMUTEX_CONTESTED); 1141 1142 /* The address was invalid. */ 1143 if (rv == -1) { 1144 umtxq_lock(&uq->uq_key); 1145 umtxq_remove(uq); 1146 umtxq_unbusy(&uq->uq_key); 1147 umtxq_unlock(&uq->uq_key); 1148 umtx_key_release(&uq->uq_key); 1149 return (EFAULT); 1150 } 1151 1152 /* 1153 * We set the contested bit, sleep. Otherwise the lock changed 1154 * and we need to retry or we lost a race to the thread 1155 * unlocking the umtx. 1156 */ 1157 umtxq_lock(&uq->uq_key); 1158 umtxq_unbusy(&uq->uq_key); 1159 if (old == owner) 1160 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1161 NULL : &timo); 1162 umtxq_remove(uq); 1163 umtxq_unlock(&uq->uq_key); 1164 umtx_key_release(&uq->uq_key); 1165 1166 if (error == 0) 1167 error = umtxq_check_susp(td); 1168 } 1169 1170 return (0); 1171 } 1172 1173 /* 1174 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1175 */ 1176 static int 1177 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1178 { 1179 struct umtx_key key; 1180 uint32_t owner, old, id, newlock; 1181 int error, count; 1182 1183 id = td->td_tid; 1184 /* 1185 * Make sure we own this mtx. 1186 */ 1187 error = fueword32(&m->m_owner, &owner); 1188 if (error == -1) 1189 return (EFAULT); 1190 1191 if ((owner & ~UMUTEX_CONTESTED) != id) 1192 return (EPERM); 1193 1194 newlock = umtx_unlock_val(flags, rb); 1195 if ((owner & UMUTEX_CONTESTED) == 0) { 1196 error = casueword32(&m->m_owner, owner, &old, newlock); 1197 if (error == -1) 1198 return (EFAULT); 1199 if (old == owner) 1200 return (0); 1201 owner = old; 1202 } 1203 1204 /* We should only ever be in here for contested locks */ 1205 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1206 &key)) != 0) 1207 return (error); 1208 1209 umtxq_lock(&key); 1210 umtxq_busy(&key); 1211 count = umtxq_count(&key); 1212 umtxq_unlock(&key); 1213 1214 /* 1215 * When unlocking the umtx, it must be marked as unowned if 1216 * there is zero or one thread only waiting for it. 1217 * Otherwise, it must be marked as contested. 1218 */ 1219 if (count > 1) 1220 newlock |= UMUTEX_CONTESTED; 1221 error = casueword32(&m->m_owner, owner, &old, newlock); 1222 umtxq_lock(&key); 1223 umtxq_signal(&key, 1); 1224 umtxq_unbusy(&key); 1225 umtxq_unlock(&key); 1226 umtx_key_release(&key); 1227 if (error == -1) 1228 return (EFAULT); 1229 if (old != owner) 1230 return (EINVAL); 1231 return (0); 1232 } 1233 1234 /* 1235 * Check if the mutex is available and wake up a waiter, 1236 * only for simple mutex. 1237 */ 1238 static int 1239 do_wake_umutex(struct thread *td, struct umutex *m) 1240 { 1241 struct umtx_key key; 1242 uint32_t owner; 1243 uint32_t flags; 1244 int error; 1245 int count; 1246 1247 error = fueword32(&m->m_owner, &owner); 1248 if (error == -1) 1249 return (EFAULT); 1250 1251 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1252 owner != UMUTEX_RB_NOTRECOV) 1253 return (0); 1254 1255 error = fueword32(&m->m_flags, &flags); 1256 if (error == -1) 1257 return (EFAULT); 1258 1259 /* We should only ever be in here for contested locks */ 1260 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1261 &key)) != 0) 1262 return (error); 1263 1264 umtxq_lock(&key); 1265 umtxq_busy(&key); 1266 count = umtxq_count(&key); 1267 umtxq_unlock(&key); 1268 1269 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1270 owner != UMUTEX_RB_NOTRECOV) { 1271 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1272 UMUTEX_UNOWNED); 1273 if (error == -1) 1274 error = EFAULT; 1275 } 1276 1277 umtxq_lock(&key); 1278 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1279 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1280 umtxq_signal(&key, 1); 1281 umtxq_unbusy(&key); 1282 umtxq_unlock(&key); 1283 umtx_key_release(&key); 1284 return (error); 1285 } 1286 1287 /* 1288 * Check if the mutex has waiters and tries to fix contention bit. 1289 */ 1290 static int 1291 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1292 { 1293 struct umtx_key key; 1294 uint32_t owner, old; 1295 int type; 1296 int error; 1297 int count; 1298 1299 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1300 UMUTEX_ROBUST)) { 1301 case 0: 1302 case UMUTEX_ROBUST: 1303 type = TYPE_NORMAL_UMUTEX; 1304 break; 1305 case UMUTEX_PRIO_INHERIT: 1306 type = TYPE_PI_UMUTEX; 1307 break; 1308 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1309 type = TYPE_PI_ROBUST_UMUTEX; 1310 break; 1311 case UMUTEX_PRIO_PROTECT: 1312 type = TYPE_PP_UMUTEX; 1313 break; 1314 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1315 type = TYPE_PP_ROBUST_UMUTEX; 1316 break; 1317 default: 1318 return (EINVAL); 1319 } 1320 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1321 return (error); 1322 1323 owner = 0; 1324 umtxq_lock(&key); 1325 umtxq_busy(&key); 1326 count = umtxq_count(&key); 1327 umtxq_unlock(&key); 1328 /* 1329 * Only repair contention bit if there is a waiter, this means the mutex 1330 * is still being referenced by userland code, otherwise don't update 1331 * any memory. 1332 */ 1333 if (count > 1) { 1334 error = fueword32(&m->m_owner, &owner); 1335 if (error == -1) 1336 error = EFAULT; 1337 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1338 error = casueword32(&m->m_owner, owner, &old, 1339 owner | UMUTEX_CONTESTED); 1340 if (error == -1) { 1341 error = EFAULT; 1342 break; 1343 } 1344 if (old == owner) 1345 break; 1346 owner = old; 1347 error = umtxq_check_susp(td); 1348 if (error != 0) 1349 break; 1350 } 1351 } else if (count == 1) { 1352 error = fueword32(&m->m_owner, &owner); 1353 if (error == -1) 1354 error = EFAULT; 1355 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1356 (owner & UMUTEX_CONTESTED) == 0) { 1357 error = casueword32(&m->m_owner, owner, &old, 1358 owner | UMUTEX_CONTESTED); 1359 if (error == -1) { 1360 error = EFAULT; 1361 break; 1362 } 1363 if (old == owner) 1364 break; 1365 owner = old; 1366 error = umtxq_check_susp(td); 1367 if (error != 0) 1368 break; 1369 } 1370 } 1371 umtxq_lock(&key); 1372 if (error == EFAULT) { 1373 umtxq_signal(&key, INT_MAX); 1374 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1375 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1376 umtxq_signal(&key, 1); 1377 umtxq_unbusy(&key); 1378 umtxq_unlock(&key); 1379 umtx_key_release(&key); 1380 return (error); 1381 } 1382 1383 static inline struct umtx_pi * 1384 umtx_pi_alloc(int flags) 1385 { 1386 struct umtx_pi *pi; 1387 1388 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1389 TAILQ_INIT(&pi->pi_blocked); 1390 atomic_add_int(&umtx_pi_allocated, 1); 1391 return (pi); 1392 } 1393 1394 static inline void 1395 umtx_pi_free(struct umtx_pi *pi) 1396 { 1397 uma_zfree(umtx_pi_zone, pi); 1398 atomic_add_int(&umtx_pi_allocated, -1); 1399 } 1400 1401 /* 1402 * Adjust the thread's position on a pi_state after its priority has been 1403 * changed. 1404 */ 1405 static int 1406 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1407 { 1408 struct umtx_q *uq, *uq1, *uq2; 1409 struct thread *td1; 1410 1411 mtx_assert(&umtx_lock, MA_OWNED); 1412 if (pi == NULL) 1413 return (0); 1414 1415 uq = td->td_umtxq; 1416 1417 /* 1418 * Check if the thread needs to be moved on the blocked chain. 1419 * It needs to be moved if either its priority is lower than 1420 * the previous thread or higher than the next thread. 1421 */ 1422 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1423 uq2 = TAILQ_NEXT(uq, uq_lockq); 1424 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1425 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1426 /* 1427 * Remove thread from blocked chain and determine where 1428 * it should be moved to. 1429 */ 1430 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1431 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1432 td1 = uq1->uq_thread; 1433 MPASS(td1->td_proc->p_magic == P_MAGIC); 1434 if (UPRI(td1) > UPRI(td)) 1435 break; 1436 } 1437 1438 if (uq1 == NULL) 1439 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1440 else 1441 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1442 } 1443 return (1); 1444 } 1445 1446 static struct umtx_pi * 1447 umtx_pi_next(struct umtx_pi *pi) 1448 { 1449 struct umtx_q *uq_owner; 1450 1451 if (pi->pi_owner == NULL) 1452 return (NULL); 1453 uq_owner = pi->pi_owner->td_umtxq; 1454 if (uq_owner == NULL) 1455 return (NULL); 1456 return (uq_owner->uq_pi_blocked); 1457 } 1458 1459 /* 1460 * Floyd's Cycle-Finding Algorithm. 1461 */ 1462 static bool 1463 umtx_pi_check_loop(struct umtx_pi *pi) 1464 { 1465 struct umtx_pi *pi1; /* fast iterator */ 1466 1467 mtx_assert(&umtx_lock, MA_OWNED); 1468 if (pi == NULL) 1469 return (false); 1470 pi1 = pi; 1471 for (;;) { 1472 pi = umtx_pi_next(pi); 1473 if (pi == NULL) 1474 break; 1475 pi1 = umtx_pi_next(pi1); 1476 if (pi1 == NULL) 1477 break; 1478 pi1 = umtx_pi_next(pi1); 1479 if (pi1 == NULL) 1480 break; 1481 if (pi == pi1) 1482 return (true); 1483 } 1484 return (false); 1485 } 1486 1487 /* 1488 * Propagate priority when a thread is blocked on POSIX 1489 * PI mutex. 1490 */ 1491 static void 1492 umtx_propagate_priority(struct thread *td) 1493 { 1494 struct umtx_q *uq; 1495 struct umtx_pi *pi; 1496 int pri; 1497 1498 mtx_assert(&umtx_lock, MA_OWNED); 1499 pri = UPRI(td); 1500 uq = td->td_umtxq; 1501 pi = uq->uq_pi_blocked; 1502 if (pi == NULL) 1503 return; 1504 if (umtx_pi_check_loop(pi)) 1505 return; 1506 1507 for (;;) { 1508 td = pi->pi_owner; 1509 if (td == NULL || td == curthread) 1510 return; 1511 1512 MPASS(td->td_proc != NULL); 1513 MPASS(td->td_proc->p_magic == P_MAGIC); 1514 1515 thread_lock(td); 1516 if (td->td_lend_user_pri > pri) 1517 sched_lend_user_prio(td, pri); 1518 else { 1519 thread_unlock(td); 1520 break; 1521 } 1522 thread_unlock(td); 1523 1524 /* 1525 * Pick up the lock that td is blocked on. 1526 */ 1527 uq = td->td_umtxq; 1528 pi = uq->uq_pi_blocked; 1529 if (pi == NULL) 1530 break; 1531 /* Resort td on the list if needed. */ 1532 umtx_pi_adjust_thread(pi, td); 1533 } 1534 } 1535 1536 /* 1537 * Unpropagate priority for a PI mutex when a thread blocked on 1538 * it is interrupted by signal or resumed by others. 1539 */ 1540 static void 1541 umtx_repropagate_priority(struct umtx_pi *pi) 1542 { 1543 struct umtx_q *uq, *uq_owner; 1544 struct umtx_pi *pi2; 1545 int pri; 1546 1547 mtx_assert(&umtx_lock, MA_OWNED); 1548 1549 if (umtx_pi_check_loop(pi)) 1550 return; 1551 while (pi != NULL && pi->pi_owner != NULL) { 1552 pri = PRI_MAX; 1553 uq_owner = pi->pi_owner->td_umtxq; 1554 1555 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1556 uq = TAILQ_FIRST(&pi2->pi_blocked); 1557 if (uq != NULL) { 1558 if (pri > UPRI(uq->uq_thread)) 1559 pri = UPRI(uq->uq_thread); 1560 } 1561 } 1562 1563 if (pri > uq_owner->uq_inherited_pri) 1564 pri = uq_owner->uq_inherited_pri; 1565 thread_lock(pi->pi_owner); 1566 sched_lend_user_prio(pi->pi_owner, pri); 1567 thread_unlock(pi->pi_owner); 1568 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1569 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1570 } 1571 } 1572 1573 /* 1574 * Insert a PI mutex into owned list. 1575 */ 1576 static void 1577 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1578 { 1579 struct umtx_q *uq_owner; 1580 1581 uq_owner = owner->td_umtxq; 1582 mtx_assert(&umtx_lock, MA_OWNED); 1583 MPASS(pi->pi_owner == NULL); 1584 pi->pi_owner = owner; 1585 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1586 } 1587 1588 1589 /* 1590 * Disown a PI mutex, and remove it from the owned list. 1591 */ 1592 static void 1593 umtx_pi_disown(struct umtx_pi *pi) 1594 { 1595 1596 mtx_assert(&umtx_lock, MA_OWNED); 1597 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1598 pi->pi_owner = NULL; 1599 } 1600 1601 /* 1602 * Claim ownership of a PI mutex. 1603 */ 1604 static int 1605 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1606 { 1607 struct umtx_q *uq; 1608 int pri; 1609 1610 mtx_lock(&umtx_lock); 1611 if (pi->pi_owner == owner) { 1612 mtx_unlock(&umtx_lock); 1613 return (0); 1614 } 1615 1616 if (pi->pi_owner != NULL) { 1617 /* 1618 * userland may have already messed the mutex, sigh. 1619 */ 1620 mtx_unlock(&umtx_lock); 1621 return (EPERM); 1622 } 1623 umtx_pi_setowner(pi, owner); 1624 uq = TAILQ_FIRST(&pi->pi_blocked); 1625 if (uq != NULL) { 1626 pri = UPRI(uq->uq_thread); 1627 thread_lock(owner); 1628 if (pri < UPRI(owner)) 1629 sched_lend_user_prio(owner, pri); 1630 thread_unlock(owner); 1631 } 1632 mtx_unlock(&umtx_lock); 1633 return (0); 1634 } 1635 1636 /* 1637 * Adjust a thread's order position in its blocked PI mutex, 1638 * this may result new priority propagating process. 1639 */ 1640 void 1641 umtx_pi_adjust(struct thread *td, u_char oldpri) 1642 { 1643 struct umtx_q *uq; 1644 struct umtx_pi *pi; 1645 1646 uq = td->td_umtxq; 1647 mtx_lock(&umtx_lock); 1648 /* 1649 * Pick up the lock that td is blocked on. 1650 */ 1651 pi = uq->uq_pi_blocked; 1652 if (pi != NULL) { 1653 umtx_pi_adjust_thread(pi, td); 1654 umtx_repropagate_priority(pi); 1655 } 1656 mtx_unlock(&umtx_lock); 1657 } 1658 1659 /* 1660 * Sleep on a PI mutex. 1661 */ 1662 static int 1663 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1664 const char *wmesg, struct abs_timeout *timo, bool shared) 1665 { 1666 struct umtxq_chain *uc; 1667 struct thread *td, *td1; 1668 struct umtx_q *uq1; 1669 int error, pri; 1670 1671 error = 0; 1672 td = uq->uq_thread; 1673 KASSERT(td == curthread, ("inconsistent uq_thread")); 1674 uc = umtxq_getchain(&uq->uq_key); 1675 UMTXQ_LOCKED_ASSERT(uc); 1676 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1677 umtxq_insert(uq); 1678 mtx_lock(&umtx_lock); 1679 if (pi->pi_owner == NULL) { 1680 mtx_unlock(&umtx_lock); 1681 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1682 mtx_lock(&umtx_lock); 1683 if (td1 != NULL) { 1684 if (pi->pi_owner == NULL) 1685 umtx_pi_setowner(pi, td1); 1686 PROC_UNLOCK(td1->td_proc); 1687 } 1688 } 1689 1690 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1691 pri = UPRI(uq1->uq_thread); 1692 if (pri > UPRI(td)) 1693 break; 1694 } 1695 1696 if (uq1 != NULL) 1697 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1698 else 1699 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1700 1701 uq->uq_pi_blocked = pi; 1702 thread_lock(td); 1703 td->td_flags |= TDF_UPIBLOCKED; 1704 thread_unlock(td); 1705 umtx_propagate_priority(td); 1706 mtx_unlock(&umtx_lock); 1707 umtxq_unbusy(&uq->uq_key); 1708 1709 error = umtxq_sleep(uq, wmesg, timo); 1710 umtxq_remove(uq); 1711 1712 mtx_lock(&umtx_lock); 1713 uq->uq_pi_blocked = NULL; 1714 thread_lock(td); 1715 td->td_flags &= ~TDF_UPIBLOCKED; 1716 thread_unlock(td); 1717 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1718 umtx_repropagate_priority(pi); 1719 mtx_unlock(&umtx_lock); 1720 umtxq_unlock(&uq->uq_key); 1721 1722 return (error); 1723 } 1724 1725 /* 1726 * Add reference count for a PI mutex. 1727 */ 1728 static void 1729 umtx_pi_ref(struct umtx_pi *pi) 1730 { 1731 struct umtxq_chain *uc; 1732 1733 uc = umtxq_getchain(&pi->pi_key); 1734 UMTXQ_LOCKED_ASSERT(uc); 1735 pi->pi_refcount++; 1736 } 1737 1738 /* 1739 * Decrease reference count for a PI mutex, if the counter 1740 * is decreased to zero, its memory space is freed. 1741 */ 1742 static void 1743 umtx_pi_unref(struct umtx_pi *pi) 1744 { 1745 struct umtxq_chain *uc; 1746 1747 uc = umtxq_getchain(&pi->pi_key); 1748 UMTXQ_LOCKED_ASSERT(uc); 1749 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1750 if (--pi->pi_refcount == 0) { 1751 mtx_lock(&umtx_lock); 1752 if (pi->pi_owner != NULL) 1753 umtx_pi_disown(pi); 1754 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1755 ("blocked queue not empty")); 1756 mtx_unlock(&umtx_lock); 1757 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1758 umtx_pi_free(pi); 1759 } 1760 } 1761 1762 /* 1763 * Find a PI mutex in hash table. 1764 */ 1765 static struct umtx_pi * 1766 umtx_pi_lookup(struct umtx_key *key) 1767 { 1768 struct umtxq_chain *uc; 1769 struct umtx_pi *pi; 1770 1771 uc = umtxq_getchain(key); 1772 UMTXQ_LOCKED_ASSERT(uc); 1773 1774 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1775 if (umtx_key_match(&pi->pi_key, key)) { 1776 return (pi); 1777 } 1778 } 1779 return (NULL); 1780 } 1781 1782 /* 1783 * Insert a PI mutex into hash table. 1784 */ 1785 static inline void 1786 umtx_pi_insert(struct umtx_pi *pi) 1787 { 1788 struct umtxq_chain *uc; 1789 1790 uc = umtxq_getchain(&pi->pi_key); 1791 UMTXQ_LOCKED_ASSERT(uc); 1792 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1793 } 1794 1795 /* 1796 * Lock a PI mutex. 1797 */ 1798 static int 1799 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1800 struct _umtx_time *timeout, int try) 1801 { 1802 struct abs_timeout timo; 1803 struct umtx_q *uq; 1804 struct umtx_pi *pi, *new_pi; 1805 uint32_t id, old_owner, owner, old; 1806 int error, rv; 1807 1808 id = td->td_tid; 1809 uq = td->td_umtxq; 1810 1811 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1812 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1813 &uq->uq_key)) != 0) 1814 return (error); 1815 1816 if (timeout != NULL) 1817 abs_timeout_init2(&timo, timeout); 1818 1819 umtxq_lock(&uq->uq_key); 1820 pi = umtx_pi_lookup(&uq->uq_key); 1821 if (pi == NULL) { 1822 new_pi = umtx_pi_alloc(M_NOWAIT); 1823 if (new_pi == NULL) { 1824 umtxq_unlock(&uq->uq_key); 1825 new_pi = umtx_pi_alloc(M_WAITOK); 1826 umtxq_lock(&uq->uq_key); 1827 pi = umtx_pi_lookup(&uq->uq_key); 1828 if (pi != NULL) { 1829 umtx_pi_free(new_pi); 1830 new_pi = NULL; 1831 } 1832 } 1833 if (new_pi != NULL) { 1834 new_pi->pi_key = uq->uq_key; 1835 umtx_pi_insert(new_pi); 1836 pi = new_pi; 1837 } 1838 } 1839 umtx_pi_ref(pi); 1840 umtxq_unlock(&uq->uq_key); 1841 1842 /* 1843 * Care must be exercised when dealing with umtx structure. It 1844 * can fault on any access. 1845 */ 1846 for (;;) { 1847 /* 1848 * Try the uncontested case. This should be done in userland. 1849 */ 1850 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1851 /* The address was invalid. */ 1852 if (rv == -1) { 1853 error = EFAULT; 1854 break; 1855 } 1856 1857 /* The acquire succeeded. */ 1858 if (owner == UMUTEX_UNOWNED) { 1859 error = 0; 1860 break; 1861 } 1862 1863 if (owner == UMUTEX_RB_NOTRECOV) { 1864 error = ENOTRECOVERABLE; 1865 break; 1866 } 1867 1868 /* If no one owns it but it is contested try to acquire it. */ 1869 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1870 old_owner = owner; 1871 rv = casueword32(&m->m_owner, owner, &owner, 1872 id | UMUTEX_CONTESTED); 1873 /* The address was invalid. */ 1874 if (rv == -1) { 1875 error = EFAULT; 1876 break; 1877 } 1878 1879 if (owner == old_owner) { 1880 umtxq_lock(&uq->uq_key); 1881 umtxq_busy(&uq->uq_key); 1882 error = umtx_pi_claim(pi, td); 1883 umtxq_unbusy(&uq->uq_key); 1884 umtxq_unlock(&uq->uq_key); 1885 if (error != 0) { 1886 /* 1887 * Since we're going to return an 1888 * error, restore the m_owner to its 1889 * previous, unowned state to avoid 1890 * compounding the problem. 1891 */ 1892 (void)casuword32(&m->m_owner, 1893 id | UMUTEX_CONTESTED, 1894 old_owner); 1895 } 1896 if (error == 0 && 1897 old_owner == UMUTEX_RB_OWNERDEAD) 1898 error = EOWNERDEAD; 1899 break; 1900 } 1901 1902 error = umtxq_check_susp(td); 1903 if (error != 0) 1904 break; 1905 1906 /* If this failed the lock has changed, restart. */ 1907 continue; 1908 } 1909 1910 if ((owner & ~UMUTEX_CONTESTED) == id) { 1911 error = EDEADLK; 1912 break; 1913 } 1914 1915 if (try != 0) { 1916 error = EBUSY; 1917 break; 1918 } 1919 1920 /* 1921 * If we caught a signal, we have retried and now 1922 * exit immediately. 1923 */ 1924 if (error != 0) 1925 break; 1926 1927 umtxq_lock(&uq->uq_key); 1928 umtxq_busy(&uq->uq_key); 1929 umtxq_unlock(&uq->uq_key); 1930 1931 /* 1932 * Set the contested bit so that a release in user space 1933 * knows to use the system call for unlock. If this fails 1934 * either some one else has acquired the lock or it has been 1935 * released. 1936 */ 1937 rv = casueword32(&m->m_owner, owner, &old, owner | 1938 UMUTEX_CONTESTED); 1939 1940 /* The address was invalid. */ 1941 if (rv == -1) { 1942 umtxq_unbusy_unlocked(&uq->uq_key); 1943 error = EFAULT; 1944 break; 1945 } 1946 1947 umtxq_lock(&uq->uq_key); 1948 /* 1949 * We set the contested bit, sleep. Otherwise the lock changed 1950 * and we need to retry or we lost a race to the thread 1951 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1952 * value for owner is impossible there. 1953 */ 1954 if (old == owner) { 1955 error = umtxq_sleep_pi(uq, pi, 1956 owner & ~UMUTEX_CONTESTED, 1957 "umtxpi", timeout == NULL ? NULL : &timo, 1958 (flags & USYNC_PROCESS_SHARED) != 0); 1959 if (error != 0) 1960 continue; 1961 } else { 1962 umtxq_unbusy(&uq->uq_key); 1963 umtxq_unlock(&uq->uq_key); 1964 } 1965 1966 error = umtxq_check_susp(td); 1967 if (error != 0) 1968 break; 1969 } 1970 1971 umtxq_lock(&uq->uq_key); 1972 umtx_pi_unref(pi); 1973 umtxq_unlock(&uq->uq_key); 1974 1975 umtx_key_release(&uq->uq_key); 1976 return (error); 1977 } 1978 1979 /* 1980 * Unlock a PI mutex. 1981 */ 1982 static int 1983 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1984 { 1985 struct umtx_key key; 1986 struct umtx_q *uq_first, *uq_first2, *uq_me; 1987 struct umtx_pi *pi, *pi2; 1988 uint32_t id, new_owner, old, owner; 1989 int count, error, pri; 1990 1991 id = td->td_tid; 1992 /* 1993 * Make sure we own this mtx. 1994 */ 1995 error = fueword32(&m->m_owner, &owner); 1996 if (error == -1) 1997 return (EFAULT); 1998 1999 if ((owner & ~UMUTEX_CONTESTED) != id) 2000 return (EPERM); 2001 2002 new_owner = umtx_unlock_val(flags, rb); 2003 2004 /* This should be done in userland */ 2005 if ((owner & UMUTEX_CONTESTED) == 0) { 2006 error = casueword32(&m->m_owner, owner, &old, new_owner); 2007 if (error == -1) 2008 return (EFAULT); 2009 if (old == owner) 2010 return (0); 2011 owner = old; 2012 } 2013 2014 /* We should only ever be in here for contested locks */ 2015 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2016 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2017 &key)) != 0) 2018 return (error); 2019 2020 umtxq_lock(&key); 2021 umtxq_busy(&key); 2022 count = umtxq_count_pi(&key, &uq_first); 2023 if (uq_first != NULL) { 2024 mtx_lock(&umtx_lock); 2025 pi = uq_first->uq_pi_blocked; 2026 KASSERT(pi != NULL, ("pi == NULL?")); 2027 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2028 mtx_unlock(&umtx_lock); 2029 umtxq_unbusy(&key); 2030 umtxq_unlock(&key); 2031 umtx_key_release(&key); 2032 /* userland messed the mutex */ 2033 return (EPERM); 2034 } 2035 uq_me = td->td_umtxq; 2036 if (pi->pi_owner == td) 2037 umtx_pi_disown(pi); 2038 /* get highest priority thread which is still sleeping. */ 2039 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2040 while (uq_first != NULL && 2041 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2042 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2043 } 2044 pri = PRI_MAX; 2045 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2046 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2047 if (uq_first2 != NULL) { 2048 if (pri > UPRI(uq_first2->uq_thread)) 2049 pri = UPRI(uq_first2->uq_thread); 2050 } 2051 } 2052 thread_lock(td); 2053 sched_lend_user_prio(td, pri); 2054 thread_unlock(td); 2055 mtx_unlock(&umtx_lock); 2056 if (uq_first) 2057 umtxq_signal_thread(uq_first); 2058 } else { 2059 pi = umtx_pi_lookup(&key); 2060 /* 2061 * A umtx_pi can exist if a signal or timeout removed the 2062 * last waiter from the umtxq, but there is still 2063 * a thread in do_lock_pi() holding the umtx_pi. 2064 */ 2065 if (pi != NULL) { 2066 /* 2067 * The umtx_pi can be unowned, such as when a thread 2068 * has just entered do_lock_pi(), allocated the 2069 * umtx_pi, and unlocked the umtxq. 2070 * If the current thread owns it, it must disown it. 2071 */ 2072 mtx_lock(&umtx_lock); 2073 if (pi->pi_owner == td) 2074 umtx_pi_disown(pi); 2075 mtx_unlock(&umtx_lock); 2076 } 2077 } 2078 umtxq_unlock(&key); 2079 2080 /* 2081 * When unlocking the umtx, it must be marked as unowned if 2082 * there is zero or one thread only waiting for it. 2083 * Otherwise, it must be marked as contested. 2084 */ 2085 2086 if (count > 1) 2087 new_owner |= UMUTEX_CONTESTED; 2088 error = casueword32(&m->m_owner, owner, &old, new_owner); 2089 2090 umtxq_unbusy_unlocked(&key); 2091 umtx_key_release(&key); 2092 if (error == -1) 2093 return (EFAULT); 2094 if (old != owner) 2095 return (EINVAL); 2096 return (0); 2097 } 2098 2099 /* 2100 * Lock a PP mutex. 2101 */ 2102 static int 2103 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2104 struct _umtx_time *timeout, int try) 2105 { 2106 struct abs_timeout timo; 2107 struct umtx_q *uq, *uq2; 2108 struct umtx_pi *pi; 2109 uint32_t ceiling; 2110 uint32_t owner, id; 2111 int error, pri, old_inherited_pri, su, rv; 2112 2113 id = td->td_tid; 2114 uq = td->td_umtxq; 2115 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2116 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2117 &uq->uq_key)) != 0) 2118 return (error); 2119 2120 if (timeout != NULL) 2121 abs_timeout_init2(&timo, timeout); 2122 2123 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2124 for (;;) { 2125 old_inherited_pri = uq->uq_inherited_pri; 2126 umtxq_lock(&uq->uq_key); 2127 umtxq_busy(&uq->uq_key); 2128 umtxq_unlock(&uq->uq_key); 2129 2130 rv = fueword32(&m->m_ceilings[0], &ceiling); 2131 if (rv == -1) { 2132 error = EFAULT; 2133 goto out; 2134 } 2135 ceiling = RTP_PRIO_MAX - ceiling; 2136 if (ceiling > RTP_PRIO_MAX) { 2137 error = EINVAL; 2138 goto out; 2139 } 2140 2141 mtx_lock(&umtx_lock); 2142 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2143 mtx_unlock(&umtx_lock); 2144 error = EINVAL; 2145 goto out; 2146 } 2147 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2148 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2149 thread_lock(td); 2150 if (uq->uq_inherited_pri < UPRI(td)) 2151 sched_lend_user_prio(td, uq->uq_inherited_pri); 2152 thread_unlock(td); 2153 } 2154 mtx_unlock(&umtx_lock); 2155 2156 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2157 id | UMUTEX_CONTESTED); 2158 /* The address was invalid. */ 2159 if (rv == -1) { 2160 error = EFAULT; 2161 break; 2162 } 2163 2164 if (owner == UMUTEX_CONTESTED) { 2165 error = 0; 2166 break; 2167 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2168 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2169 &owner, id | UMUTEX_CONTESTED); 2170 if (rv == -1) { 2171 error = EFAULT; 2172 break; 2173 } 2174 if (owner == UMUTEX_RB_OWNERDEAD) { 2175 error = EOWNERDEAD; /* success */ 2176 break; 2177 } 2178 error = 0; 2179 } else if (owner == UMUTEX_RB_NOTRECOV) { 2180 error = ENOTRECOVERABLE; 2181 break; 2182 } 2183 2184 if (try != 0) { 2185 error = EBUSY; 2186 break; 2187 } 2188 2189 /* 2190 * If we caught a signal, we have retried and now 2191 * exit immediately. 2192 */ 2193 if (error != 0) 2194 break; 2195 2196 umtxq_lock(&uq->uq_key); 2197 umtxq_insert(uq); 2198 umtxq_unbusy(&uq->uq_key); 2199 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2200 NULL : &timo); 2201 umtxq_remove(uq); 2202 umtxq_unlock(&uq->uq_key); 2203 2204 mtx_lock(&umtx_lock); 2205 uq->uq_inherited_pri = old_inherited_pri; 2206 pri = PRI_MAX; 2207 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2208 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2209 if (uq2 != NULL) { 2210 if (pri > UPRI(uq2->uq_thread)) 2211 pri = UPRI(uq2->uq_thread); 2212 } 2213 } 2214 if (pri > uq->uq_inherited_pri) 2215 pri = uq->uq_inherited_pri; 2216 thread_lock(td); 2217 sched_lend_user_prio(td, pri); 2218 thread_unlock(td); 2219 mtx_unlock(&umtx_lock); 2220 } 2221 2222 if (error != 0 && error != EOWNERDEAD) { 2223 mtx_lock(&umtx_lock); 2224 uq->uq_inherited_pri = old_inherited_pri; 2225 pri = PRI_MAX; 2226 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2227 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2228 if (uq2 != NULL) { 2229 if (pri > UPRI(uq2->uq_thread)) 2230 pri = UPRI(uq2->uq_thread); 2231 } 2232 } 2233 if (pri > uq->uq_inherited_pri) 2234 pri = uq->uq_inherited_pri; 2235 thread_lock(td); 2236 sched_lend_user_prio(td, pri); 2237 thread_unlock(td); 2238 mtx_unlock(&umtx_lock); 2239 } 2240 2241 out: 2242 umtxq_unbusy_unlocked(&uq->uq_key); 2243 umtx_key_release(&uq->uq_key); 2244 return (error); 2245 } 2246 2247 /* 2248 * Unlock a PP mutex. 2249 */ 2250 static int 2251 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2252 { 2253 struct umtx_key key; 2254 struct umtx_q *uq, *uq2; 2255 struct umtx_pi *pi; 2256 uint32_t id, owner, rceiling; 2257 int error, pri, new_inherited_pri, su; 2258 2259 id = td->td_tid; 2260 uq = td->td_umtxq; 2261 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2262 2263 /* 2264 * Make sure we own this mtx. 2265 */ 2266 error = fueword32(&m->m_owner, &owner); 2267 if (error == -1) 2268 return (EFAULT); 2269 2270 if ((owner & ~UMUTEX_CONTESTED) != id) 2271 return (EPERM); 2272 2273 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2274 if (error != 0) 2275 return (error); 2276 2277 if (rceiling == -1) 2278 new_inherited_pri = PRI_MAX; 2279 else { 2280 rceiling = RTP_PRIO_MAX - rceiling; 2281 if (rceiling > RTP_PRIO_MAX) 2282 return (EINVAL); 2283 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2284 } 2285 2286 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2287 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2288 &key)) != 0) 2289 return (error); 2290 umtxq_lock(&key); 2291 umtxq_busy(&key); 2292 umtxq_unlock(&key); 2293 /* 2294 * For priority protected mutex, always set unlocked state 2295 * to UMUTEX_CONTESTED, so that userland always enters kernel 2296 * to lock the mutex, it is necessary because thread priority 2297 * has to be adjusted for such mutex. 2298 */ 2299 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2300 UMUTEX_CONTESTED); 2301 2302 umtxq_lock(&key); 2303 if (error == 0) 2304 umtxq_signal(&key, 1); 2305 umtxq_unbusy(&key); 2306 umtxq_unlock(&key); 2307 2308 if (error == -1) 2309 error = EFAULT; 2310 else { 2311 mtx_lock(&umtx_lock); 2312 if (su != 0) 2313 uq->uq_inherited_pri = new_inherited_pri; 2314 pri = PRI_MAX; 2315 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2316 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2317 if (uq2 != NULL) { 2318 if (pri > UPRI(uq2->uq_thread)) 2319 pri = UPRI(uq2->uq_thread); 2320 } 2321 } 2322 if (pri > uq->uq_inherited_pri) 2323 pri = uq->uq_inherited_pri; 2324 thread_lock(td); 2325 sched_lend_user_prio(td, pri); 2326 thread_unlock(td); 2327 mtx_unlock(&umtx_lock); 2328 } 2329 umtx_key_release(&key); 2330 return (error); 2331 } 2332 2333 static int 2334 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2335 uint32_t *old_ceiling) 2336 { 2337 struct umtx_q *uq; 2338 uint32_t flags, id, owner, save_ceiling; 2339 int error, rv, rv1; 2340 2341 error = fueword32(&m->m_flags, &flags); 2342 if (error == -1) 2343 return (EFAULT); 2344 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2345 return (EINVAL); 2346 if (ceiling > RTP_PRIO_MAX) 2347 return (EINVAL); 2348 id = td->td_tid; 2349 uq = td->td_umtxq; 2350 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2351 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2352 &uq->uq_key)) != 0) 2353 return (error); 2354 for (;;) { 2355 umtxq_lock(&uq->uq_key); 2356 umtxq_busy(&uq->uq_key); 2357 umtxq_unlock(&uq->uq_key); 2358 2359 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2360 if (rv == -1) { 2361 error = EFAULT; 2362 break; 2363 } 2364 2365 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2366 id | UMUTEX_CONTESTED); 2367 if (rv == -1) { 2368 error = EFAULT; 2369 break; 2370 } 2371 2372 if (owner == UMUTEX_CONTESTED) { 2373 rv = suword32(&m->m_ceilings[0], ceiling); 2374 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2375 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2376 break; 2377 } 2378 2379 if ((owner & ~UMUTEX_CONTESTED) == id) { 2380 rv = suword32(&m->m_ceilings[0], ceiling); 2381 error = rv == 0 ? 0 : EFAULT; 2382 break; 2383 } 2384 2385 if (owner == UMUTEX_RB_OWNERDEAD) { 2386 error = EOWNERDEAD; 2387 break; 2388 } else if (owner == UMUTEX_RB_NOTRECOV) { 2389 error = ENOTRECOVERABLE; 2390 break; 2391 } 2392 2393 /* 2394 * If we caught a signal, we have retried and now 2395 * exit immediately. 2396 */ 2397 if (error != 0) 2398 break; 2399 2400 /* 2401 * We set the contested bit, sleep. Otherwise the lock changed 2402 * and we need to retry or we lost a race to the thread 2403 * unlocking the umtx. 2404 */ 2405 umtxq_lock(&uq->uq_key); 2406 umtxq_insert(uq); 2407 umtxq_unbusy(&uq->uq_key); 2408 error = umtxq_sleep(uq, "umtxpp", NULL); 2409 umtxq_remove(uq); 2410 umtxq_unlock(&uq->uq_key); 2411 } 2412 umtxq_lock(&uq->uq_key); 2413 if (error == 0) 2414 umtxq_signal(&uq->uq_key, INT_MAX); 2415 umtxq_unbusy(&uq->uq_key); 2416 umtxq_unlock(&uq->uq_key); 2417 umtx_key_release(&uq->uq_key); 2418 if (error == 0 && old_ceiling != NULL) { 2419 rv = suword32(old_ceiling, save_ceiling); 2420 error = rv == 0 ? 0 : EFAULT; 2421 } 2422 return (error); 2423 } 2424 2425 /* 2426 * Lock a userland POSIX mutex. 2427 */ 2428 static int 2429 do_lock_umutex(struct thread *td, struct umutex *m, 2430 struct _umtx_time *timeout, int mode) 2431 { 2432 uint32_t flags; 2433 int error; 2434 2435 error = fueword32(&m->m_flags, &flags); 2436 if (error == -1) 2437 return (EFAULT); 2438 2439 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2440 case 0: 2441 error = do_lock_normal(td, m, flags, timeout, mode); 2442 break; 2443 case UMUTEX_PRIO_INHERIT: 2444 error = do_lock_pi(td, m, flags, timeout, mode); 2445 break; 2446 case UMUTEX_PRIO_PROTECT: 2447 error = do_lock_pp(td, m, flags, timeout, mode); 2448 break; 2449 default: 2450 return (EINVAL); 2451 } 2452 if (timeout == NULL) { 2453 if (error == EINTR && mode != _UMUTEX_WAIT) 2454 error = ERESTART; 2455 } else { 2456 /* Timed-locking is not restarted. */ 2457 if (error == ERESTART) 2458 error = EINTR; 2459 } 2460 return (error); 2461 } 2462 2463 /* 2464 * Unlock a userland POSIX mutex. 2465 */ 2466 static int 2467 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2468 { 2469 uint32_t flags; 2470 int error; 2471 2472 error = fueword32(&m->m_flags, &flags); 2473 if (error == -1) 2474 return (EFAULT); 2475 2476 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2477 case 0: 2478 return (do_unlock_normal(td, m, flags, rb)); 2479 case UMUTEX_PRIO_INHERIT: 2480 return (do_unlock_pi(td, m, flags, rb)); 2481 case UMUTEX_PRIO_PROTECT: 2482 return (do_unlock_pp(td, m, flags, rb)); 2483 } 2484 2485 return (EINVAL); 2486 } 2487 2488 static int 2489 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2490 struct timespec *timeout, u_long wflags) 2491 { 2492 struct abs_timeout timo; 2493 struct umtx_q *uq; 2494 uint32_t flags, clockid, hasw; 2495 int error; 2496 2497 uq = td->td_umtxq; 2498 error = fueword32(&cv->c_flags, &flags); 2499 if (error == -1) 2500 return (EFAULT); 2501 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2502 if (error != 0) 2503 return (error); 2504 2505 if ((wflags & CVWAIT_CLOCKID) != 0) { 2506 error = fueword32(&cv->c_clockid, &clockid); 2507 if (error == -1) { 2508 umtx_key_release(&uq->uq_key); 2509 return (EFAULT); 2510 } 2511 if (clockid < CLOCK_REALTIME || 2512 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2513 /* hmm, only HW clock id will work. */ 2514 umtx_key_release(&uq->uq_key); 2515 return (EINVAL); 2516 } 2517 } else { 2518 clockid = CLOCK_REALTIME; 2519 } 2520 2521 umtxq_lock(&uq->uq_key); 2522 umtxq_busy(&uq->uq_key); 2523 umtxq_insert(uq); 2524 umtxq_unlock(&uq->uq_key); 2525 2526 /* 2527 * Set c_has_waiters to 1 before releasing user mutex, also 2528 * don't modify cache line when unnecessary. 2529 */ 2530 error = fueword32(&cv->c_has_waiters, &hasw); 2531 if (error == 0 && hasw == 0) 2532 suword32(&cv->c_has_waiters, 1); 2533 2534 umtxq_unbusy_unlocked(&uq->uq_key); 2535 2536 error = do_unlock_umutex(td, m, false); 2537 2538 if (timeout != NULL) 2539 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2540 timeout); 2541 2542 umtxq_lock(&uq->uq_key); 2543 if (error == 0) { 2544 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2545 NULL : &timo); 2546 } 2547 2548 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2549 error = 0; 2550 else { 2551 /* 2552 * This must be timeout,interrupted by signal or 2553 * surprious wakeup, clear c_has_waiter flag when 2554 * necessary. 2555 */ 2556 umtxq_busy(&uq->uq_key); 2557 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2558 int oldlen = uq->uq_cur_queue->length; 2559 umtxq_remove(uq); 2560 if (oldlen == 1) { 2561 umtxq_unlock(&uq->uq_key); 2562 suword32(&cv->c_has_waiters, 0); 2563 umtxq_lock(&uq->uq_key); 2564 } 2565 } 2566 umtxq_unbusy(&uq->uq_key); 2567 if (error == ERESTART) 2568 error = EINTR; 2569 } 2570 2571 umtxq_unlock(&uq->uq_key); 2572 umtx_key_release(&uq->uq_key); 2573 return (error); 2574 } 2575 2576 /* 2577 * Signal a userland condition variable. 2578 */ 2579 static int 2580 do_cv_signal(struct thread *td, struct ucond *cv) 2581 { 2582 struct umtx_key key; 2583 int error, cnt, nwake; 2584 uint32_t flags; 2585 2586 error = fueword32(&cv->c_flags, &flags); 2587 if (error == -1) 2588 return (EFAULT); 2589 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2590 return (error); 2591 umtxq_lock(&key); 2592 umtxq_busy(&key); 2593 cnt = umtxq_count(&key); 2594 nwake = umtxq_signal(&key, 1); 2595 if (cnt <= nwake) { 2596 umtxq_unlock(&key); 2597 error = suword32(&cv->c_has_waiters, 0); 2598 if (error == -1) 2599 error = EFAULT; 2600 umtxq_lock(&key); 2601 } 2602 umtxq_unbusy(&key); 2603 umtxq_unlock(&key); 2604 umtx_key_release(&key); 2605 return (error); 2606 } 2607 2608 static int 2609 do_cv_broadcast(struct thread *td, struct ucond *cv) 2610 { 2611 struct umtx_key key; 2612 int error; 2613 uint32_t flags; 2614 2615 error = fueword32(&cv->c_flags, &flags); 2616 if (error == -1) 2617 return (EFAULT); 2618 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2619 return (error); 2620 2621 umtxq_lock(&key); 2622 umtxq_busy(&key); 2623 umtxq_signal(&key, INT_MAX); 2624 umtxq_unlock(&key); 2625 2626 error = suword32(&cv->c_has_waiters, 0); 2627 if (error == -1) 2628 error = EFAULT; 2629 2630 umtxq_unbusy_unlocked(&key); 2631 2632 umtx_key_release(&key); 2633 return (error); 2634 } 2635 2636 static int 2637 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2638 { 2639 struct abs_timeout timo; 2640 struct umtx_q *uq; 2641 uint32_t flags, wrflags; 2642 int32_t state, oldstate; 2643 int32_t blocked_readers; 2644 int error, error1, rv; 2645 2646 uq = td->td_umtxq; 2647 error = fueword32(&rwlock->rw_flags, &flags); 2648 if (error == -1) 2649 return (EFAULT); 2650 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2651 if (error != 0) 2652 return (error); 2653 2654 if (timeout != NULL) 2655 abs_timeout_init2(&timo, timeout); 2656 2657 wrflags = URWLOCK_WRITE_OWNER; 2658 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2659 wrflags |= URWLOCK_WRITE_WAITERS; 2660 2661 for (;;) { 2662 rv = fueword32(&rwlock->rw_state, &state); 2663 if (rv == -1) { 2664 umtx_key_release(&uq->uq_key); 2665 return (EFAULT); 2666 } 2667 2668 /* try to lock it */ 2669 while (!(state & wrflags)) { 2670 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2671 umtx_key_release(&uq->uq_key); 2672 return (EAGAIN); 2673 } 2674 rv = casueword32(&rwlock->rw_state, state, 2675 &oldstate, state + 1); 2676 if (rv == -1) { 2677 umtx_key_release(&uq->uq_key); 2678 return (EFAULT); 2679 } 2680 if (oldstate == state) { 2681 umtx_key_release(&uq->uq_key); 2682 return (0); 2683 } 2684 error = umtxq_check_susp(td); 2685 if (error != 0) 2686 break; 2687 state = oldstate; 2688 } 2689 2690 if (error) 2691 break; 2692 2693 /* grab monitor lock */ 2694 umtxq_lock(&uq->uq_key); 2695 umtxq_busy(&uq->uq_key); 2696 umtxq_unlock(&uq->uq_key); 2697 2698 /* 2699 * re-read the state, in case it changed between the try-lock above 2700 * and the check below 2701 */ 2702 rv = fueword32(&rwlock->rw_state, &state); 2703 if (rv == -1) 2704 error = EFAULT; 2705 2706 /* set read contention bit */ 2707 while (error == 0 && (state & wrflags) && 2708 !(state & URWLOCK_READ_WAITERS)) { 2709 rv = casueword32(&rwlock->rw_state, state, 2710 &oldstate, state | URWLOCK_READ_WAITERS); 2711 if (rv == -1) { 2712 error = EFAULT; 2713 break; 2714 } 2715 if (oldstate == state) 2716 goto sleep; 2717 state = oldstate; 2718 error = umtxq_check_susp(td); 2719 if (error != 0) 2720 break; 2721 } 2722 if (error != 0) { 2723 umtxq_unbusy_unlocked(&uq->uq_key); 2724 break; 2725 } 2726 2727 /* state is changed while setting flags, restart */ 2728 if (!(state & wrflags)) { 2729 umtxq_unbusy_unlocked(&uq->uq_key); 2730 error = umtxq_check_susp(td); 2731 if (error != 0) 2732 break; 2733 continue; 2734 } 2735 2736 sleep: 2737 /* contention bit is set, before sleeping, increase read waiter count */ 2738 rv = fueword32(&rwlock->rw_blocked_readers, 2739 &blocked_readers); 2740 if (rv == -1) { 2741 umtxq_unbusy_unlocked(&uq->uq_key); 2742 error = EFAULT; 2743 break; 2744 } 2745 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2746 2747 while (state & wrflags) { 2748 umtxq_lock(&uq->uq_key); 2749 umtxq_insert(uq); 2750 umtxq_unbusy(&uq->uq_key); 2751 2752 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2753 NULL : &timo); 2754 2755 umtxq_busy(&uq->uq_key); 2756 umtxq_remove(uq); 2757 umtxq_unlock(&uq->uq_key); 2758 if (error) 2759 break; 2760 rv = fueword32(&rwlock->rw_state, &state); 2761 if (rv == -1) { 2762 error = EFAULT; 2763 break; 2764 } 2765 } 2766 2767 /* decrease read waiter count, and may clear read contention bit */ 2768 rv = fueword32(&rwlock->rw_blocked_readers, 2769 &blocked_readers); 2770 if (rv == -1) { 2771 umtxq_unbusy_unlocked(&uq->uq_key); 2772 error = EFAULT; 2773 break; 2774 } 2775 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2776 if (blocked_readers == 1) { 2777 rv = fueword32(&rwlock->rw_state, &state); 2778 if (rv == -1) { 2779 umtxq_unbusy_unlocked(&uq->uq_key); 2780 error = EFAULT; 2781 break; 2782 } 2783 for (;;) { 2784 rv = casueword32(&rwlock->rw_state, state, 2785 &oldstate, state & ~URWLOCK_READ_WAITERS); 2786 if (rv == -1) { 2787 error = EFAULT; 2788 break; 2789 } 2790 if (oldstate == state) 2791 break; 2792 state = oldstate; 2793 error1 = umtxq_check_susp(td); 2794 if (error1 != 0) { 2795 if (error == 0) 2796 error = error1; 2797 break; 2798 } 2799 } 2800 } 2801 2802 umtxq_unbusy_unlocked(&uq->uq_key); 2803 if (error != 0) 2804 break; 2805 } 2806 umtx_key_release(&uq->uq_key); 2807 if (error == ERESTART) 2808 error = EINTR; 2809 return (error); 2810 } 2811 2812 static int 2813 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2814 { 2815 struct abs_timeout timo; 2816 struct umtx_q *uq; 2817 uint32_t flags; 2818 int32_t state, oldstate; 2819 int32_t blocked_writers; 2820 int32_t blocked_readers; 2821 int error, error1, rv; 2822 2823 uq = td->td_umtxq; 2824 error = fueword32(&rwlock->rw_flags, &flags); 2825 if (error == -1) 2826 return (EFAULT); 2827 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2828 if (error != 0) 2829 return (error); 2830 2831 if (timeout != NULL) 2832 abs_timeout_init2(&timo, timeout); 2833 2834 blocked_readers = 0; 2835 for (;;) { 2836 rv = fueword32(&rwlock->rw_state, &state); 2837 if (rv == -1) { 2838 umtx_key_release(&uq->uq_key); 2839 return (EFAULT); 2840 } 2841 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2842 rv = casueword32(&rwlock->rw_state, state, 2843 &oldstate, state | URWLOCK_WRITE_OWNER); 2844 if (rv == -1) { 2845 umtx_key_release(&uq->uq_key); 2846 return (EFAULT); 2847 } 2848 if (oldstate == state) { 2849 umtx_key_release(&uq->uq_key); 2850 return (0); 2851 } 2852 state = oldstate; 2853 error = umtxq_check_susp(td); 2854 if (error != 0) 2855 break; 2856 } 2857 2858 if (error) { 2859 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2860 blocked_readers != 0) { 2861 umtxq_lock(&uq->uq_key); 2862 umtxq_busy(&uq->uq_key); 2863 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2864 umtxq_unbusy(&uq->uq_key); 2865 umtxq_unlock(&uq->uq_key); 2866 } 2867 2868 break; 2869 } 2870 2871 /* grab monitor lock */ 2872 umtxq_lock(&uq->uq_key); 2873 umtxq_busy(&uq->uq_key); 2874 umtxq_unlock(&uq->uq_key); 2875 2876 /* 2877 * re-read the state, in case it changed between the try-lock above 2878 * and the check below 2879 */ 2880 rv = fueword32(&rwlock->rw_state, &state); 2881 if (rv == -1) 2882 error = EFAULT; 2883 2884 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2885 URWLOCK_READER_COUNT(state) != 0) && 2886 (state & URWLOCK_WRITE_WAITERS) == 0) { 2887 rv = casueword32(&rwlock->rw_state, state, 2888 &oldstate, state | URWLOCK_WRITE_WAITERS); 2889 if (rv == -1) { 2890 error = EFAULT; 2891 break; 2892 } 2893 if (oldstate == state) 2894 goto sleep; 2895 state = oldstate; 2896 error = umtxq_check_susp(td); 2897 if (error != 0) 2898 break; 2899 } 2900 if (error != 0) { 2901 umtxq_unbusy_unlocked(&uq->uq_key); 2902 break; 2903 } 2904 2905 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2906 umtxq_unbusy_unlocked(&uq->uq_key); 2907 error = umtxq_check_susp(td); 2908 if (error != 0) 2909 break; 2910 continue; 2911 } 2912 sleep: 2913 rv = fueword32(&rwlock->rw_blocked_writers, 2914 &blocked_writers); 2915 if (rv == -1) { 2916 umtxq_unbusy_unlocked(&uq->uq_key); 2917 error = EFAULT; 2918 break; 2919 } 2920 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2921 2922 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2923 umtxq_lock(&uq->uq_key); 2924 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2925 umtxq_unbusy(&uq->uq_key); 2926 2927 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2928 NULL : &timo); 2929 2930 umtxq_busy(&uq->uq_key); 2931 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2932 umtxq_unlock(&uq->uq_key); 2933 if (error) 2934 break; 2935 rv = fueword32(&rwlock->rw_state, &state); 2936 if (rv == -1) { 2937 error = EFAULT; 2938 break; 2939 } 2940 } 2941 2942 rv = fueword32(&rwlock->rw_blocked_writers, 2943 &blocked_writers); 2944 if (rv == -1) { 2945 umtxq_unbusy_unlocked(&uq->uq_key); 2946 error = EFAULT; 2947 break; 2948 } 2949 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2950 if (blocked_writers == 1) { 2951 rv = fueword32(&rwlock->rw_state, &state); 2952 if (rv == -1) { 2953 umtxq_unbusy_unlocked(&uq->uq_key); 2954 error = EFAULT; 2955 break; 2956 } 2957 for (;;) { 2958 rv = casueword32(&rwlock->rw_state, state, 2959 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2960 if (rv == -1) { 2961 error = EFAULT; 2962 break; 2963 } 2964 if (oldstate == state) 2965 break; 2966 state = oldstate; 2967 error1 = umtxq_check_susp(td); 2968 /* 2969 * We are leaving the URWLOCK_WRITE_WAITERS 2970 * behind, but this should not harm the 2971 * correctness. 2972 */ 2973 if (error1 != 0) { 2974 if (error == 0) 2975 error = error1; 2976 break; 2977 } 2978 } 2979 rv = fueword32(&rwlock->rw_blocked_readers, 2980 &blocked_readers); 2981 if (rv == -1) { 2982 umtxq_unbusy_unlocked(&uq->uq_key); 2983 error = EFAULT; 2984 break; 2985 } 2986 } else 2987 blocked_readers = 0; 2988 2989 umtxq_unbusy_unlocked(&uq->uq_key); 2990 } 2991 2992 umtx_key_release(&uq->uq_key); 2993 if (error == ERESTART) 2994 error = EINTR; 2995 return (error); 2996 } 2997 2998 static int 2999 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3000 { 3001 struct umtx_q *uq; 3002 uint32_t flags; 3003 int32_t state, oldstate; 3004 int error, rv, q, count; 3005 3006 uq = td->td_umtxq; 3007 error = fueword32(&rwlock->rw_flags, &flags); 3008 if (error == -1) 3009 return (EFAULT); 3010 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3011 if (error != 0) 3012 return (error); 3013 3014 error = fueword32(&rwlock->rw_state, &state); 3015 if (error == -1) { 3016 error = EFAULT; 3017 goto out; 3018 } 3019 if (state & URWLOCK_WRITE_OWNER) { 3020 for (;;) { 3021 rv = casueword32(&rwlock->rw_state, state, 3022 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3023 if (rv == -1) { 3024 error = EFAULT; 3025 goto out; 3026 } 3027 if (oldstate != state) { 3028 state = oldstate; 3029 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3030 error = EPERM; 3031 goto out; 3032 } 3033 error = umtxq_check_susp(td); 3034 if (error != 0) 3035 goto out; 3036 } else 3037 break; 3038 } 3039 } else if (URWLOCK_READER_COUNT(state) != 0) { 3040 for (;;) { 3041 rv = casueword32(&rwlock->rw_state, state, 3042 &oldstate, state - 1); 3043 if (rv == -1) { 3044 error = EFAULT; 3045 goto out; 3046 } 3047 if (oldstate != state) { 3048 state = oldstate; 3049 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3050 error = EPERM; 3051 goto out; 3052 } 3053 error = umtxq_check_susp(td); 3054 if (error != 0) 3055 goto out; 3056 } else 3057 break; 3058 } 3059 } else { 3060 error = EPERM; 3061 goto out; 3062 } 3063 3064 count = 0; 3065 3066 if (!(flags & URWLOCK_PREFER_READER)) { 3067 if (state & URWLOCK_WRITE_WAITERS) { 3068 count = 1; 3069 q = UMTX_EXCLUSIVE_QUEUE; 3070 } else if (state & URWLOCK_READ_WAITERS) { 3071 count = INT_MAX; 3072 q = UMTX_SHARED_QUEUE; 3073 } 3074 } else { 3075 if (state & URWLOCK_READ_WAITERS) { 3076 count = INT_MAX; 3077 q = UMTX_SHARED_QUEUE; 3078 } else if (state & URWLOCK_WRITE_WAITERS) { 3079 count = 1; 3080 q = UMTX_EXCLUSIVE_QUEUE; 3081 } 3082 } 3083 3084 if (count) { 3085 umtxq_lock(&uq->uq_key); 3086 umtxq_busy(&uq->uq_key); 3087 umtxq_signal_queue(&uq->uq_key, count, q); 3088 umtxq_unbusy(&uq->uq_key); 3089 umtxq_unlock(&uq->uq_key); 3090 } 3091 out: 3092 umtx_key_release(&uq->uq_key); 3093 return (error); 3094 } 3095 3096 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3097 static int 3098 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3099 { 3100 struct abs_timeout timo; 3101 struct umtx_q *uq; 3102 uint32_t flags, count, count1; 3103 int error, rv; 3104 3105 uq = td->td_umtxq; 3106 error = fueword32(&sem->_flags, &flags); 3107 if (error == -1) 3108 return (EFAULT); 3109 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3110 if (error != 0) 3111 return (error); 3112 3113 if (timeout != NULL) 3114 abs_timeout_init2(&timo, timeout); 3115 3116 umtxq_lock(&uq->uq_key); 3117 umtxq_busy(&uq->uq_key); 3118 umtxq_insert(uq); 3119 umtxq_unlock(&uq->uq_key); 3120 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3121 if (rv == 0) 3122 rv = fueword32(&sem->_count, &count); 3123 if (rv == -1 || count != 0) { 3124 umtxq_lock(&uq->uq_key); 3125 umtxq_unbusy(&uq->uq_key); 3126 umtxq_remove(uq); 3127 umtxq_unlock(&uq->uq_key); 3128 umtx_key_release(&uq->uq_key); 3129 return (rv == -1 ? EFAULT : 0); 3130 } 3131 umtxq_lock(&uq->uq_key); 3132 umtxq_unbusy(&uq->uq_key); 3133 3134 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3135 3136 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3137 error = 0; 3138 else { 3139 umtxq_remove(uq); 3140 /* A relative timeout cannot be restarted. */ 3141 if (error == ERESTART && timeout != NULL && 3142 (timeout->_flags & UMTX_ABSTIME) == 0) 3143 error = EINTR; 3144 } 3145 umtxq_unlock(&uq->uq_key); 3146 umtx_key_release(&uq->uq_key); 3147 return (error); 3148 } 3149 3150 /* 3151 * Signal a userland semaphore. 3152 */ 3153 static int 3154 do_sem_wake(struct thread *td, struct _usem *sem) 3155 { 3156 struct umtx_key key; 3157 int error, cnt; 3158 uint32_t flags; 3159 3160 error = fueword32(&sem->_flags, &flags); 3161 if (error == -1) 3162 return (EFAULT); 3163 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3164 return (error); 3165 umtxq_lock(&key); 3166 umtxq_busy(&key); 3167 cnt = umtxq_count(&key); 3168 if (cnt > 0) { 3169 /* 3170 * Check if count is greater than 0, this means the memory is 3171 * still being referenced by user code, so we can safely 3172 * update _has_waiters flag. 3173 */ 3174 if (cnt == 1) { 3175 umtxq_unlock(&key); 3176 error = suword32(&sem->_has_waiters, 0); 3177 umtxq_lock(&key); 3178 if (error == -1) 3179 error = EFAULT; 3180 } 3181 umtxq_signal(&key, 1); 3182 } 3183 umtxq_unbusy(&key); 3184 umtxq_unlock(&key); 3185 umtx_key_release(&key); 3186 return (error); 3187 } 3188 #endif 3189 3190 static int 3191 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3192 { 3193 struct abs_timeout timo; 3194 struct umtx_q *uq; 3195 uint32_t count, flags; 3196 int error, rv; 3197 3198 uq = td->td_umtxq; 3199 flags = fuword32(&sem->_flags); 3200 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3201 if (error != 0) 3202 return (error); 3203 3204 if (timeout != NULL) 3205 abs_timeout_init2(&timo, timeout); 3206 3207 umtxq_lock(&uq->uq_key); 3208 umtxq_busy(&uq->uq_key); 3209 umtxq_insert(uq); 3210 umtxq_unlock(&uq->uq_key); 3211 rv = fueword32(&sem->_count, &count); 3212 if (rv == -1) { 3213 umtxq_lock(&uq->uq_key); 3214 umtxq_unbusy(&uq->uq_key); 3215 umtxq_remove(uq); 3216 umtxq_unlock(&uq->uq_key); 3217 umtx_key_release(&uq->uq_key); 3218 return (EFAULT); 3219 } 3220 for (;;) { 3221 if (USEM_COUNT(count) != 0) { 3222 umtxq_lock(&uq->uq_key); 3223 umtxq_unbusy(&uq->uq_key); 3224 umtxq_remove(uq); 3225 umtxq_unlock(&uq->uq_key); 3226 umtx_key_release(&uq->uq_key); 3227 return (0); 3228 } 3229 if (count == USEM_HAS_WAITERS) 3230 break; 3231 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3232 if (rv == -1) { 3233 umtxq_lock(&uq->uq_key); 3234 umtxq_unbusy(&uq->uq_key); 3235 umtxq_remove(uq); 3236 umtxq_unlock(&uq->uq_key); 3237 umtx_key_release(&uq->uq_key); 3238 return (EFAULT); 3239 } 3240 if (count == 0) 3241 break; 3242 } 3243 umtxq_lock(&uq->uq_key); 3244 umtxq_unbusy(&uq->uq_key); 3245 3246 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3247 3248 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3249 error = 0; 3250 else { 3251 umtxq_remove(uq); 3252 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3253 /* A relative timeout cannot be restarted. */ 3254 if (error == ERESTART) 3255 error = EINTR; 3256 if (error == EINTR) { 3257 abs_timeout_update(&timo); 3258 timeout->_timeout = timo.end; 3259 timespecsub(&timeout->_timeout, &timo.cur); 3260 } 3261 } 3262 } 3263 umtxq_unlock(&uq->uq_key); 3264 umtx_key_release(&uq->uq_key); 3265 return (error); 3266 } 3267 3268 /* 3269 * Signal a userland semaphore. 3270 */ 3271 static int 3272 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3273 { 3274 struct umtx_key key; 3275 int error, cnt, rv; 3276 uint32_t count, flags; 3277 3278 rv = fueword32(&sem->_flags, &flags); 3279 if (rv == -1) 3280 return (EFAULT); 3281 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3282 return (error); 3283 umtxq_lock(&key); 3284 umtxq_busy(&key); 3285 cnt = umtxq_count(&key); 3286 if (cnt > 0) { 3287 /* 3288 * If this was the last sleeping thread, clear the waiters 3289 * flag in _count. 3290 */ 3291 if (cnt == 1) { 3292 umtxq_unlock(&key); 3293 rv = fueword32(&sem->_count, &count); 3294 while (rv != -1 && count & USEM_HAS_WAITERS) 3295 rv = casueword32(&sem->_count, count, &count, 3296 count & ~USEM_HAS_WAITERS); 3297 if (rv == -1) 3298 error = EFAULT; 3299 umtxq_lock(&key); 3300 } 3301 3302 umtxq_signal(&key, 1); 3303 } 3304 umtxq_unbusy(&key); 3305 umtxq_unlock(&key); 3306 umtx_key_release(&key); 3307 return (error); 3308 } 3309 3310 inline int 3311 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3312 { 3313 int error; 3314 3315 error = copyin(addr, tsp, sizeof(struct timespec)); 3316 if (error == 0) { 3317 if (tsp->tv_sec < 0 || 3318 tsp->tv_nsec >= 1000000000 || 3319 tsp->tv_nsec < 0) 3320 error = EINVAL; 3321 } 3322 return (error); 3323 } 3324 3325 static inline int 3326 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3327 { 3328 int error; 3329 3330 if (size <= sizeof(struct timespec)) { 3331 tp->_clockid = CLOCK_REALTIME; 3332 tp->_flags = 0; 3333 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3334 } else 3335 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3336 if (error != 0) 3337 return (error); 3338 if (tp->_timeout.tv_sec < 0 || 3339 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3340 return (EINVAL); 3341 return (0); 3342 } 3343 3344 static int 3345 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3346 { 3347 3348 return (EOPNOTSUPP); 3349 } 3350 3351 static int 3352 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3353 { 3354 struct _umtx_time timeout, *tm_p; 3355 int error; 3356 3357 if (uap->uaddr2 == NULL) 3358 tm_p = NULL; 3359 else { 3360 error = umtx_copyin_umtx_time( 3361 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3362 if (error != 0) 3363 return (error); 3364 tm_p = &timeout; 3365 } 3366 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3367 } 3368 3369 static int 3370 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3371 { 3372 struct _umtx_time timeout, *tm_p; 3373 int error; 3374 3375 if (uap->uaddr2 == NULL) 3376 tm_p = NULL; 3377 else { 3378 error = umtx_copyin_umtx_time( 3379 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3380 if (error != 0) 3381 return (error); 3382 tm_p = &timeout; 3383 } 3384 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3385 } 3386 3387 static int 3388 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3389 { 3390 struct _umtx_time *tm_p, timeout; 3391 int error; 3392 3393 if (uap->uaddr2 == NULL) 3394 tm_p = NULL; 3395 else { 3396 error = umtx_copyin_umtx_time( 3397 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3398 if (error != 0) 3399 return (error); 3400 tm_p = &timeout; 3401 } 3402 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3403 } 3404 3405 static int 3406 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3407 { 3408 3409 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3410 } 3411 3412 #define BATCH_SIZE 128 3413 static int 3414 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3415 { 3416 char *uaddrs[BATCH_SIZE], **upp; 3417 int count, error, i, pos, tocopy; 3418 3419 upp = (char **)uap->obj; 3420 error = 0; 3421 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3422 pos += tocopy) { 3423 tocopy = MIN(count, BATCH_SIZE); 3424 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3425 if (error != 0) 3426 break; 3427 for (i = 0; i < tocopy; ++i) 3428 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3429 maybe_yield(); 3430 } 3431 return (error); 3432 } 3433 3434 static int 3435 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3436 { 3437 3438 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3439 } 3440 3441 static int 3442 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3443 { 3444 struct _umtx_time *tm_p, timeout; 3445 int error; 3446 3447 /* Allow a null timespec (wait forever). */ 3448 if (uap->uaddr2 == NULL) 3449 tm_p = NULL; 3450 else { 3451 error = umtx_copyin_umtx_time( 3452 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3453 if (error != 0) 3454 return (error); 3455 tm_p = &timeout; 3456 } 3457 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3458 } 3459 3460 static int 3461 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3462 { 3463 3464 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3465 } 3466 3467 static int 3468 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3469 { 3470 struct _umtx_time *tm_p, timeout; 3471 int error; 3472 3473 /* Allow a null timespec (wait forever). */ 3474 if (uap->uaddr2 == NULL) 3475 tm_p = NULL; 3476 else { 3477 error = umtx_copyin_umtx_time( 3478 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3479 if (error != 0) 3480 return (error); 3481 tm_p = &timeout; 3482 } 3483 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3484 } 3485 3486 static int 3487 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3488 { 3489 3490 return (do_wake_umutex(td, uap->obj)); 3491 } 3492 3493 static int 3494 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3495 { 3496 3497 return (do_unlock_umutex(td, uap->obj, false)); 3498 } 3499 3500 static int 3501 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3502 { 3503 3504 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3505 } 3506 3507 static int 3508 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3509 { 3510 struct timespec *ts, timeout; 3511 int error; 3512 3513 /* Allow a null timespec (wait forever). */ 3514 if (uap->uaddr2 == NULL) 3515 ts = NULL; 3516 else { 3517 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3518 if (error != 0) 3519 return (error); 3520 ts = &timeout; 3521 } 3522 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3523 } 3524 3525 static int 3526 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3527 { 3528 3529 return (do_cv_signal(td, uap->obj)); 3530 } 3531 3532 static int 3533 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3534 { 3535 3536 return (do_cv_broadcast(td, uap->obj)); 3537 } 3538 3539 static int 3540 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3541 { 3542 struct _umtx_time timeout; 3543 int error; 3544 3545 /* Allow a null timespec (wait forever). */ 3546 if (uap->uaddr2 == NULL) { 3547 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3548 } else { 3549 error = umtx_copyin_umtx_time(uap->uaddr2, 3550 (size_t)uap->uaddr1, &timeout); 3551 if (error != 0) 3552 return (error); 3553 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3554 } 3555 return (error); 3556 } 3557 3558 static int 3559 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3560 { 3561 struct _umtx_time timeout; 3562 int error; 3563 3564 /* Allow a null timespec (wait forever). */ 3565 if (uap->uaddr2 == NULL) { 3566 error = do_rw_wrlock(td, uap->obj, 0); 3567 } else { 3568 error = umtx_copyin_umtx_time(uap->uaddr2, 3569 (size_t)uap->uaddr1, &timeout); 3570 if (error != 0) 3571 return (error); 3572 3573 error = do_rw_wrlock(td, uap->obj, &timeout); 3574 } 3575 return (error); 3576 } 3577 3578 static int 3579 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3580 { 3581 3582 return (do_rw_unlock(td, uap->obj)); 3583 } 3584 3585 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3586 static int 3587 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3588 { 3589 struct _umtx_time *tm_p, timeout; 3590 int error; 3591 3592 /* Allow a null timespec (wait forever). */ 3593 if (uap->uaddr2 == NULL) 3594 tm_p = NULL; 3595 else { 3596 error = umtx_copyin_umtx_time( 3597 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3598 if (error != 0) 3599 return (error); 3600 tm_p = &timeout; 3601 } 3602 return (do_sem_wait(td, uap->obj, tm_p)); 3603 } 3604 3605 static int 3606 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3607 { 3608 3609 return (do_sem_wake(td, uap->obj)); 3610 } 3611 #endif 3612 3613 static int 3614 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3615 { 3616 3617 return (do_wake2_umutex(td, uap->obj, uap->val)); 3618 } 3619 3620 static int 3621 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3622 { 3623 struct _umtx_time *tm_p, timeout; 3624 size_t uasize; 3625 int error; 3626 3627 /* Allow a null timespec (wait forever). */ 3628 if (uap->uaddr2 == NULL) { 3629 uasize = 0; 3630 tm_p = NULL; 3631 } else { 3632 uasize = (size_t)uap->uaddr1; 3633 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3634 if (error != 0) 3635 return (error); 3636 tm_p = &timeout; 3637 } 3638 error = do_sem2_wait(td, uap->obj, tm_p); 3639 if (error == EINTR && uap->uaddr2 != NULL && 3640 (timeout._flags & UMTX_ABSTIME) == 0 && 3641 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3642 error = copyout(&timeout._timeout, 3643 (struct _umtx_time *)uap->uaddr2 + 1, 3644 sizeof(struct timespec)); 3645 if (error == 0) { 3646 error = EINTR; 3647 } 3648 } 3649 3650 return (error); 3651 } 3652 3653 static int 3654 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3655 { 3656 3657 return (do_sem2_wake(td, uap->obj)); 3658 } 3659 3660 #define USHM_OBJ_UMTX(o) \ 3661 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3662 3663 #define USHMF_REG_LINKED 0x0001 3664 #define USHMF_OBJ_LINKED 0x0002 3665 struct umtx_shm_reg { 3666 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3667 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3668 struct umtx_key ushm_key; 3669 struct ucred *ushm_cred; 3670 struct shmfd *ushm_obj; 3671 u_int ushm_refcnt; 3672 u_int ushm_flags; 3673 }; 3674 3675 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3676 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3677 3678 static uma_zone_t umtx_shm_reg_zone; 3679 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3680 static struct mtx umtx_shm_lock; 3681 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3682 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3683 3684 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3685 3686 static void 3687 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3688 { 3689 struct umtx_shm_reg_head d; 3690 struct umtx_shm_reg *reg, *reg1; 3691 3692 TAILQ_INIT(&d); 3693 mtx_lock(&umtx_shm_lock); 3694 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3695 mtx_unlock(&umtx_shm_lock); 3696 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3697 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3698 umtx_shm_free_reg(reg); 3699 } 3700 } 3701 3702 static struct task umtx_shm_reg_delfree_task = 3703 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3704 3705 static struct umtx_shm_reg * 3706 umtx_shm_find_reg_locked(const struct umtx_key *key) 3707 { 3708 struct umtx_shm_reg *reg; 3709 struct umtx_shm_reg_head *reg_head; 3710 3711 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3712 mtx_assert(&umtx_shm_lock, MA_OWNED); 3713 reg_head = &umtx_shm_registry[key->hash]; 3714 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3715 KASSERT(reg->ushm_key.shared, 3716 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3717 if (reg->ushm_key.info.shared.object == 3718 key->info.shared.object && 3719 reg->ushm_key.info.shared.offset == 3720 key->info.shared.offset) { 3721 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3722 KASSERT(reg->ushm_refcnt > 0, 3723 ("reg %p refcnt 0 onlist", reg)); 3724 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3725 ("reg %p not linked", reg)); 3726 reg->ushm_refcnt++; 3727 return (reg); 3728 } 3729 } 3730 return (NULL); 3731 } 3732 3733 static struct umtx_shm_reg * 3734 umtx_shm_find_reg(const struct umtx_key *key) 3735 { 3736 struct umtx_shm_reg *reg; 3737 3738 mtx_lock(&umtx_shm_lock); 3739 reg = umtx_shm_find_reg_locked(key); 3740 mtx_unlock(&umtx_shm_lock); 3741 return (reg); 3742 } 3743 3744 static void 3745 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3746 { 3747 3748 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3749 crfree(reg->ushm_cred); 3750 shm_drop(reg->ushm_obj); 3751 uma_zfree(umtx_shm_reg_zone, reg); 3752 } 3753 3754 static bool 3755 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3756 { 3757 bool res; 3758 3759 mtx_assert(&umtx_shm_lock, MA_OWNED); 3760 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3761 reg->ushm_refcnt--; 3762 res = reg->ushm_refcnt == 0; 3763 if (res || force) { 3764 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3765 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3766 reg, ushm_reg_link); 3767 reg->ushm_flags &= ~USHMF_REG_LINKED; 3768 } 3769 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3770 LIST_REMOVE(reg, ushm_obj_link); 3771 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3772 } 3773 } 3774 return (res); 3775 } 3776 3777 static void 3778 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3779 { 3780 vm_object_t object; 3781 bool dofree; 3782 3783 if (force) { 3784 object = reg->ushm_obj->shm_object; 3785 VM_OBJECT_WLOCK(object); 3786 object->flags |= OBJ_UMTXDEAD; 3787 VM_OBJECT_WUNLOCK(object); 3788 } 3789 mtx_lock(&umtx_shm_lock); 3790 dofree = umtx_shm_unref_reg_locked(reg, force); 3791 mtx_unlock(&umtx_shm_lock); 3792 if (dofree) 3793 umtx_shm_free_reg(reg); 3794 } 3795 3796 void 3797 umtx_shm_object_init(vm_object_t object) 3798 { 3799 3800 LIST_INIT(USHM_OBJ_UMTX(object)); 3801 } 3802 3803 void 3804 umtx_shm_object_terminated(vm_object_t object) 3805 { 3806 struct umtx_shm_reg *reg, *reg1; 3807 bool dofree; 3808 3809 dofree = false; 3810 mtx_lock(&umtx_shm_lock); 3811 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3812 if (umtx_shm_unref_reg_locked(reg, true)) { 3813 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3814 ushm_reg_link); 3815 dofree = true; 3816 } 3817 } 3818 mtx_unlock(&umtx_shm_lock); 3819 if (dofree) 3820 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3821 } 3822 3823 static int 3824 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3825 struct umtx_shm_reg **res) 3826 { 3827 struct umtx_shm_reg *reg, *reg1; 3828 struct ucred *cred; 3829 int error; 3830 3831 reg = umtx_shm_find_reg(key); 3832 if (reg != NULL) { 3833 *res = reg; 3834 return (0); 3835 } 3836 cred = td->td_ucred; 3837 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3838 return (ENOMEM); 3839 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3840 reg->ushm_refcnt = 1; 3841 bcopy(key, ®->ushm_key, sizeof(*key)); 3842 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3843 reg->ushm_cred = crhold(cred); 3844 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3845 if (error != 0) { 3846 umtx_shm_free_reg(reg); 3847 return (error); 3848 } 3849 mtx_lock(&umtx_shm_lock); 3850 reg1 = umtx_shm_find_reg_locked(key); 3851 if (reg1 != NULL) { 3852 mtx_unlock(&umtx_shm_lock); 3853 umtx_shm_free_reg(reg); 3854 *res = reg1; 3855 return (0); 3856 } 3857 reg->ushm_refcnt++; 3858 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3859 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3860 ushm_obj_link); 3861 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3862 mtx_unlock(&umtx_shm_lock); 3863 *res = reg; 3864 return (0); 3865 } 3866 3867 static int 3868 umtx_shm_alive(struct thread *td, void *addr) 3869 { 3870 vm_map_t map; 3871 vm_map_entry_t entry; 3872 vm_object_t object; 3873 vm_pindex_t pindex; 3874 vm_prot_t prot; 3875 int res, ret; 3876 boolean_t wired; 3877 3878 map = &td->td_proc->p_vmspace->vm_map; 3879 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3880 &object, &pindex, &prot, &wired); 3881 if (res != KERN_SUCCESS) 3882 return (EFAULT); 3883 if (object == NULL) 3884 ret = EINVAL; 3885 else 3886 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3887 vm_map_lookup_done(map, entry); 3888 return (ret); 3889 } 3890 3891 static void 3892 umtx_shm_init(void) 3893 { 3894 int i; 3895 3896 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3897 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3898 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3899 for (i = 0; i < nitems(umtx_shm_registry); i++) 3900 TAILQ_INIT(&umtx_shm_registry[i]); 3901 } 3902 3903 static int 3904 umtx_shm(struct thread *td, void *addr, u_int flags) 3905 { 3906 struct umtx_key key; 3907 struct umtx_shm_reg *reg; 3908 struct file *fp; 3909 int error, fd; 3910 3911 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3912 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3913 return (EINVAL); 3914 if ((flags & UMTX_SHM_ALIVE) != 0) 3915 return (umtx_shm_alive(td, addr)); 3916 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3917 if (error != 0) 3918 return (error); 3919 KASSERT(key.shared == 1, ("non-shared key")); 3920 if ((flags & UMTX_SHM_CREAT) != 0) { 3921 error = umtx_shm_create_reg(td, &key, ®); 3922 } else { 3923 reg = umtx_shm_find_reg(&key); 3924 if (reg == NULL) 3925 error = ESRCH; 3926 } 3927 umtx_key_release(&key); 3928 if (error != 0) 3929 return (error); 3930 KASSERT(reg != NULL, ("no reg")); 3931 if ((flags & UMTX_SHM_DESTROY) != 0) { 3932 umtx_shm_unref_reg(reg, true); 3933 } else { 3934 #if 0 3935 #ifdef MAC 3936 error = mac_posixshm_check_open(td->td_ucred, 3937 reg->ushm_obj, FFLAGS(O_RDWR)); 3938 if (error == 0) 3939 #endif 3940 error = shm_access(reg->ushm_obj, td->td_ucred, 3941 FFLAGS(O_RDWR)); 3942 if (error == 0) 3943 #endif 3944 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3945 if (error == 0) { 3946 shm_hold(reg->ushm_obj); 3947 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3948 &shm_ops); 3949 td->td_retval[0] = fd; 3950 fdrop(fp, td); 3951 } 3952 } 3953 umtx_shm_unref_reg(reg, false); 3954 return (error); 3955 } 3956 3957 static int 3958 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3959 { 3960 3961 return (umtx_shm(td, uap->uaddr1, uap->val)); 3962 } 3963 3964 static int 3965 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3966 { 3967 3968 td->td_rb_list = rbp->robust_list_offset; 3969 td->td_rbp_list = rbp->robust_priv_list_offset; 3970 td->td_rb_inact = rbp->robust_inact_offset; 3971 return (0); 3972 } 3973 3974 static int 3975 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3976 { 3977 struct umtx_robust_lists_params rb; 3978 int error; 3979 3980 if (uap->val > sizeof(rb)) 3981 return (EINVAL); 3982 bzero(&rb, sizeof(rb)); 3983 error = copyin(uap->uaddr1, &rb, uap->val); 3984 if (error != 0) 3985 return (error); 3986 return (umtx_robust_lists(td, &rb)); 3987 } 3988 3989 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3990 3991 static const _umtx_op_func op_table[] = { 3992 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3993 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3994 [UMTX_OP_WAIT] = __umtx_op_wait, 3995 [UMTX_OP_WAKE] = __umtx_op_wake, 3996 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3997 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3998 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3999 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4000 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4001 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4002 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4003 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4004 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4005 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4006 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4007 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4008 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4009 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4010 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4011 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4012 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4013 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4014 #else 4015 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4016 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4017 #endif 4018 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4019 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4020 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4021 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4022 [UMTX_OP_SHM] = __umtx_op_shm, 4023 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4024 }; 4025 4026 int 4027 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4028 { 4029 4030 if ((unsigned)uap->op < nitems(op_table)) 4031 return (*op_table[uap->op])(td, uap); 4032 return (EINVAL); 4033 } 4034 4035 #ifdef COMPAT_FREEBSD32 4036 4037 struct timespec32 { 4038 int32_t tv_sec; 4039 int32_t tv_nsec; 4040 }; 4041 4042 struct umtx_time32 { 4043 struct timespec32 timeout; 4044 uint32_t flags; 4045 uint32_t clockid; 4046 }; 4047 4048 static inline int 4049 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4050 { 4051 struct timespec32 ts32; 4052 int error; 4053 4054 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4055 if (error == 0) { 4056 if (ts32.tv_sec < 0 || 4057 ts32.tv_nsec >= 1000000000 || 4058 ts32.tv_nsec < 0) 4059 error = EINVAL; 4060 else { 4061 tsp->tv_sec = ts32.tv_sec; 4062 tsp->tv_nsec = ts32.tv_nsec; 4063 } 4064 } 4065 return (error); 4066 } 4067 4068 static inline int 4069 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4070 { 4071 struct umtx_time32 t32; 4072 int error; 4073 4074 t32.clockid = CLOCK_REALTIME; 4075 t32.flags = 0; 4076 if (size <= sizeof(struct timespec32)) 4077 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4078 else 4079 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4080 if (error != 0) 4081 return (error); 4082 if (t32.timeout.tv_sec < 0 || 4083 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4084 return (EINVAL); 4085 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4086 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4087 tp->_flags = t32.flags; 4088 tp->_clockid = t32.clockid; 4089 return (0); 4090 } 4091 4092 static int 4093 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4094 { 4095 struct _umtx_time *tm_p, timeout; 4096 int error; 4097 4098 if (uap->uaddr2 == NULL) 4099 tm_p = NULL; 4100 else { 4101 error = umtx_copyin_umtx_time32(uap->uaddr2, 4102 (size_t)uap->uaddr1, &timeout); 4103 if (error != 0) 4104 return (error); 4105 tm_p = &timeout; 4106 } 4107 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4108 } 4109 4110 static int 4111 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4112 { 4113 struct _umtx_time *tm_p, timeout; 4114 int error; 4115 4116 /* Allow a null timespec (wait forever). */ 4117 if (uap->uaddr2 == NULL) 4118 tm_p = NULL; 4119 else { 4120 error = umtx_copyin_umtx_time32(uap->uaddr2, 4121 (size_t)uap->uaddr1, &timeout); 4122 if (error != 0) 4123 return (error); 4124 tm_p = &timeout; 4125 } 4126 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4127 } 4128 4129 static int 4130 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4131 { 4132 struct _umtx_time *tm_p, timeout; 4133 int error; 4134 4135 /* Allow a null timespec (wait forever). */ 4136 if (uap->uaddr2 == NULL) 4137 tm_p = NULL; 4138 else { 4139 error = umtx_copyin_umtx_time32(uap->uaddr2, 4140 (size_t)uap->uaddr1, &timeout); 4141 if (error != 0) 4142 return (error); 4143 tm_p = &timeout; 4144 } 4145 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4146 } 4147 4148 static int 4149 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4150 { 4151 struct timespec *ts, timeout; 4152 int error; 4153 4154 /* Allow a null timespec (wait forever). */ 4155 if (uap->uaddr2 == NULL) 4156 ts = NULL; 4157 else { 4158 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4159 if (error != 0) 4160 return (error); 4161 ts = &timeout; 4162 } 4163 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4164 } 4165 4166 static int 4167 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4168 { 4169 struct _umtx_time timeout; 4170 int error; 4171 4172 /* Allow a null timespec (wait forever). */ 4173 if (uap->uaddr2 == NULL) { 4174 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4175 } else { 4176 error = umtx_copyin_umtx_time32(uap->uaddr2, 4177 (size_t)uap->uaddr1, &timeout); 4178 if (error != 0) 4179 return (error); 4180 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4181 } 4182 return (error); 4183 } 4184 4185 static int 4186 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4187 { 4188 struct _umtx_time timeout; 4189 int error; 4190 4191 /* Allow a null timespec (wait forever). */ 4192 if (uap->uaddr2 == NULL) { 4193 error = do_rw_wrlock(td, uap->obj, 0); 4194 } else { 4195 error = umtx_copyin_umtx_time32(uap->uaddr2, 4196 (size_t)uap->uaddr1, &timeout); 4197 if (error != 0) 4198 return (error); 4199 error = do_rw_wrlock(td, uap->obj, &timeout); 4200 } 4201 return (error); 4202 } 4203 4204 static int 4205 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4206 { 4207 struct _umtx_time *tm_p, timeout; 4208 int error; 4209 4210 if (uap->uaddr2 == NULL) 4211 tm_p = NULL; 4212 else { 4213 error = umtx_copyin_umtx_time32( 4214 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4215 if (error != 0) 4216 return (error); 4217 tm_p = &timeout; 4218 } 4219 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4220 } 4221 4222 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4223 static int 4224 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4225 { 4226 struct _umtx_time *tm_p, timeout; 4227 int error; 4228 4229 /* Allow a null timespec (wait forever). */ 4230 if (uap->uaddr2 == NULL) 4231 tm_p = NULL; 4232 else { 4233 error = umtx_copyin_umtx_time32(uap->uaddr2, 4234 (size_t)uap->uaddr1, &timeout); 4235 if (error != 0) 4236 return (error); 4237 tm_p = &timeout; 4238 } 4239 return (do_sem_wait(td, uap->obj, tm_p)); 4240 } 4241 #endif 4242 4243 static int 4244 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4245 { 4246 struct _umtx_time *tm_p, timeout; 4247 size_t uasize; 4248 int error; 4249 4250 /* Allow a null timespec (wait forever). */ 4251 if (uap->uaddr2 == NULL) { 4252 uasize = 0; 4253 tm_p = NULL; 4254 } else { 4255 uasize = (size_t)uap->uaddr1; 4256 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4257 if (error != 0) 4258 return (error); 4259 tm_p = &timeout; 4260 } 4261 error = do_sem2_wait(td, uap->obj, tm_p); 4262 if (error == EINTR && uap->uaddr2 != NULL && 4263 (timeout._flags & UMTX_ABSTIME) == 0 && 4264 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4265 struct timespec32 remain32 = { 4266 .tv_sec = timeout._timeout.tv_sec, 4267 .tv_nsec = timeout._timeout.tv_nsec 4268 }; 4269 error = copyout(&remain32, 4270 (struct umtx_time32 *)uap->uaddr2 + 1, 4271 sizeof(struct timespec32)); 4272 if (error == 0) { 4273 error = EINTR; 4274 } 4275 } 4276 4277 return (error); 4278 } 4279 4280 static int 4281 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4282 { 4283 uint32_t uaddrs[BATCH_SIZE], **upp; 4284 int count, error, i, pos, tocopy; 4285 4286 upp = (uint32_t **)uap->obj; 4287 error = 0; 4288 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4289 pos += tocopy) { 4290 tocopy = MIN(count, BATCH_SIZE); 4291 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4292 if (error != 0) 4293 break; 4294 for (i = 0; i < tocopy; ++i) 4295 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4296 INT_MAX, 1); 4297 maybe_yield(); 4298 } 4299 return (error); 4300 } 4301 4302 struct umtx_robust_lists_params_compat32 { 4303 uint32_t robust_list_offset; 4304 uint32_t robust_priv_list_offset; 4305 uint32_t robust_inact_offset; 4306 }; 4307 4308 static int 4309 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4310 { 4311 struct umtx_robust_lists_params rb; 4312 struct umtx_robust_lists_params_compat32 rb32; 4313 int error; 4314 4315 if (uap->val > sizeof(rb32)) 4316 return (EINVAL); 4317 bzero(&rb, sizeof(rb)); 4318 bzero(&rb32, sizeof(rb32)); 4319 error = copyin(uap->uaddr1, &rb32, uap->val); 4320 if (error != 0) 4321 return (error); 4322 rb.robust_list_offset = rb32.robust_list_offset; 4323 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4324 rb.robust_inact_offset = rb32.robust_inact_offset; 4325 return (umtx_robust_lists(td, &rb)); 4326 } 4327 4328 static const _umtx_op_func op_table_compat32[] = { 4329 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4330 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4331 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4332 [UMTX_OP_WAKE] = __umtx_op_wake, 4333 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4334 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4335 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4336 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4337 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4338 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4339 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4340 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4341 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4342 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4343 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4344 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4345 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4346 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4347 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4348 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4349 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4350 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4351 #else 4352 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4353 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4354 #endif 4355 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4356 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4357 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4358 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4359 [UMTX_OP_SHM] = __umtx_op_shm, 4360 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4361 }; 4362 4363 int 4364 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4365 { 4366 4367 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4368 return (*op_table_compat32[uap->op])(td, 4369 (struct _umtx_op_args *)uap); 4370 } 4371 return (EINVAL); 4372 } 4373 #endif 4374 4375 void 4376 umtx_thread_init(struct thread *td) 4377 { 4378 4379 td->td_umtxq = umtxq_alloc(); 4380 td->td_umtxq->uq_thread = td; 4381 } 4382 4383 void 4384 umtx_thread_fini(struct thread *td) 4385 { 4386 4387 umtxq_free(td->td_umtxq); 4388 } 4389 4390 /* 4391 * It will be called when new thread is created, e.g fork(). 4392 */ 4393 void 4394 umtx_thread_alloc(struct thread *td) 4395 { 4396 struct umtx_q *uq; 4397 4398 uq = td->td_umtxq; 4399 uq->uq_inherited_pri = PRI_MAX; 4400 4401 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4402 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4403 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4404 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4405 } 4406 4407 /* 4408 * exec() hook. 4409 * 4410 * Clear robust lists for all process' threads, not delaying the 4411 * cleanup to thread_exit hook, since the relevant address space is 4412 * destroyed right now. 4413 */ 4414 static void 4415 umtx_exec_hook(void *arg __unused, struct proc *p, 4416 struct image_params *imgp __unused) 4417 { 4418 struct thread *td; 4419 4420 KASSERT(p == curproc, ("need curproc")); 4421 PROC_LOCK(p); 4422 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4423 (p->p_flag & P_STOPPED_SINGLE) != 0, 4424 ("curproc must be single-threaded")); 4425 FOREACH_THREAD_IN_PROC(p, td) { 4426 KASSERT(td == curthread || 4427 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4428 ("running thread %p %p", p, td)); 4429 PROC_UNLOCK(p); 4430 umtx_thread_cleanup(td); 4431 PROC_LOCK(p); 4432 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4433 } 4434 PROC_UNLOCK(p); 4435 } 4436 4437 /* 4438 * thread_exit() hook. 4439 */ 4440 void 4441 umtx_thread_exit(struct thread *td) 4442 { 4443 4444 umtx_thread_cleanup(td); 4445 } 4446 4447 static int 4448 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4449 { 4450 u_long res1; 4451 #ifdef COMPAT_FREEBSD32 4452 uint32_t res32; 4453 #endif 4454 int error; 4455 4456 #ifdef COMPAT_FREEBSD32 4457 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4458 error = fueword32((void *)ptr, &res32); 4459 if (error == 0) 4460 res1 = res32; 4461 } else 4462 #endif 4463 { 4464 error = fueword((void *)ptr, &res1); 4465 } 4466 if (error == 0) 4467 *res = res1; 4468 else 4469 error = EFAULT; 4470 return (error); 4471 } 4472 4473 static void 4474 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4475 { 4476 #ifdef COMPAT_FREEBSD32 4477 struct umutex32 m32; 4478 4479 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4480 memcpy(&m32, m, sizeof(m32)); 4481 *rb_list = m32.m_rb_lnk; 4482 } else 4483 #endif 4484 *rb_list = m->m_rb_lnk; 4485 } 4486 4487 static int 4488 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4489 { 4490 struct umutex m; 4491 int error; 4492 4493 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4494 error = copyin((void *)rbp, &m, sizeof(m)); 4495 if (error != 0) 4496 return (error); 4497 if (rb_list != NULL) 4498 umtx_read_rb_list(td, &m, rb_list); 4499 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4500 return (EINVAL); 4501 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4502 /* inact is cleared after unlock, allow the inconsistency */ 4503 return (inact ? 0 : EINVAL); 4504 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4505 } 4506 4507 static void 4508 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4509 const char *name) 4510 { 4511 int error, i; 4512 uintptr_t rbp; 4513 bool inact; 4514 4515 if (rb_list == 0) 4516 return; 4517 error = umtx_read_uptr(td, rb_list, &rbp); 4518 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4519 if (rbp == *rb_inact) { 4520 inact = true; 4521 *rb_inact = 0; 4522 } else 4523 inact = false; 4524 error = umtx_handle_rb(td, rbp, &rbp, inact); 4525 } 4526 if (i == umtx_max_rb && umtx_verbose_rb) { 4527 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4528 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4529 } 4530 if (error != 0 && umtx_verbose_rb) { 4531 uprintf("comm %s pid %d: handling %srb error %d\n", 4532 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4533 } 4534 } 4535 4536 /* 4537 * Clean up umtx data. 4538 */ 4539 static void 4540 umtx_thread_cleanup(struct thread *td) 4541 { 4542 struct umtx_q *uq; 4543 struct umtx_pi *pi; 4544 uintptr_t rb_inact; 4545 4546 /* 4547 * Disown pi mutexes. 4548 */ 4549 uq = td->td_umtxq; 4550 if (uq != NULL) { 4551 mtx_lock(&umtx_lock); 4552 uq->uq_inherited_pri = PRI_MAX; 4553 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4554 pi->pi_owner = NULL; 4555 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4556 } 4557 mtx_unlock(&umtx_lock); 4558 thread_lock(td); 4559 sched_lend_user_prio(td, PRI_MAX); 4560 thread_unlock(td); 4561 } 4562 4563 /* 4564 * Handle terminated robust mutexes. Must be done after 4565 * robust pi disown, otherwise unlock could see unowned 4566 * entries. 4567 */ 4568 rb_inact = td->td_rb_inact; 4569 if (rb_inact != 0) 4570 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4571 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4572 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4573 if (rb_inact != 0) 4574 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4575 } 4576