1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_queue *uh; 666 667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 669 if (uh != NULL) 670 return (uh->length); 671 return (0); 672 } 673 674 /* 675 * Check if there are multiple PI waiters and returns first 676 * waiter. 677 */ 678 static int 679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 680 { 681 struct umtxq_queue *uh; 682 683 *first = NULL; 684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 686 if (uh != NULL) { 687 *first = TAILQ_FIRST(&uh->head); 688 return (uh->length); 689 } 690 return (0); 691 } 692 693 /* 694 * Wake up threads waiting on an userland object. 695 */ 696 697 static int 698 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 699 { 700 struct umtxq_queue *uh; 701 struct umtx_q *uq; 702 int ret; 703 704 ret = 0; 705 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 706 uh = umtxq_queue_lookup(key, q); 707 if (uh != NULL) { 708 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 709 umtxq_remove_queue(uq, q); 710 wakeup(uq); 711 if (++ret >= n_wake) 712 return (ret); 713 } 714 } 715 return (ret); 716 } 717 718 719 /* 720 * Wake up specified thread. 721 */ 722 static inline void 723 umtxq_signal_thread(struct umtx_q *uq) 724 { 725 726 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 727 umtxq_remove(uq); 728 wakeup(uq); 729 } 730 731 static inline int 732 tstohz(const struct timespec *tsp) 733 { 734 struct timeval tv; 735 736 TIMESPEC_TO_TIMEVAL(&tv, tsp); 737 return tvtohz(&tv); 738 } 739 740 static void 741 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 742 const struct timespec *timeout) 743 { 744 745 timo->clockid = clockid; 746 if (!absolute) { 747 timo->is_abs_real = false; 748 abs_timeout_update(timo); 749 timespecadd(&timo->cur, timeout, &timo->end); 750 } else { 751 timo->end = *timeout; 752 timo->is_abs_real = clockid == CLOCK_REALTIME || 753 clockid == CLOCK_REALTIME_FAST || 754 clockid == CLOCK_REALTIME_PRECISE; 755 /* 756 * If is_abs_real, umtxq_sleep will read the clock 757 * after setting td_rtcgen; otherwise, read it here. 758 */ 759 if (!timo->is_abs_real) { 760 abs_timeout_update(timo); 761 } 762 } 763 } 764 765 static void 766 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 767 { 768 769 abs_timeout_init(timo, umtxtime->_clockid, 770 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 771 } 772 773 static inline void 774 abs_timeout_update(struct abs_timeout *timo) 775 { 776 777 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 778 } 779 780 static int 781 abs_timeout_gethz(struct abs_timeout *timo) 782 { 783 struct timespec tts; 784 785 if (timespeccmp(&timo->end, &timo->cur, <=)) 786 return (-1); 787 timespecsub(&timo->end, &timo->cur, &tts); 788 return (tstohz(&tts)); 789 } 790 791 static uint32_t 792 umtx_unlock_val(uint32_t flags, bool rb) 793 { 794 795 if (rb) 796 return (UMUTEX_RB_OWNERDEAD); 797 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 798 return (UMUTEX_RB_NOTRECOV); 799 else 800 return (UMUTEX_UNOWNED); 801 802 } 803 804 /* 805 * Put thread into sleep state, before sleeping, check if 806 * thread was removed from umtx queue. 807 */ 808 static inline int 809 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 810 { 811 struct umtxq_chain *uc; 812 int error, timo; 813 814 if (abstime != NULL && abstime->is_abs_real) { 815 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 816 abs_timeout_update(abstime); 817 } 818 819 uc = umtxq_getchain(&uq->uq_key); 820 UMTXQ_LOCKED_ASSERT(uc); 821 for (;;) { 822 if (!(uq->uq_flags & UQF_UMTXQ)) { 823 error = 0; 824 break; 825 } 826 if (abstime != NULL) { 827 timo = abs_timeout_gethz(abstime); 828 if (timo < 0) { 829 error = ETIMEDOUT; 830 break; 831 } 832 } else 833 timo = 0; 834 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 835 if (error == EINTR || error == ERESTART) { 836 umtxq_lock(&uq->uq_key); 837 break; 838 } 839 if (abstime != NULL) { 840 if (abstime->is_abs_real) 841 curthread->td_rtcgen = 842 atomic_load_acq_int(&rtc_generation); 843 abs_timeout_update(abstime); 844 } 845 umtxq_lock(&uq->uq_key); 846 } 847 848 curthread->td_rtcgen = 0; 849 return (error); 850 } 851 852 /* 853 * Convert userspace address into unique logical address. 854 */ 855 int 856 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 857 { 858 struct thread *td = curthread; 859 vm_map_t map; 860 vm_map_entry_t entry; 861 vm_pindex_t pindex; 862 vm_prot_t prot; 863 boolean_t wired; 864 865 key->type = type; 866 if (share == THREAD_SHARE) { 867 key->shared = 0; 868 key->info.private.vs = td->td_proc->p_vmspace; 869 key->info.private.addr = (uintptr_t)addr; 870 } else { 871 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 872 map = &td->td_proc->p_vmspace->vm_map; 873 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 874 &entry, &key->info.shared.object, &pindex, &prot, 875 &wired) != KERN_SUCCESS) { 876 return (EFAULT); 877 } 878 879 if ((share == PROCESS_SHARE) || 880 (share == AUTO_SHARE && 881 VM_INHERIT_SHARE == entry->inheritance)) { 882 key->shared = 1; 883 key->info.shared.offset = (vm_offset_t)addr - 884 entry->start + entry->offset; 885 vm_object_reference(key->info.shared.object); 886 } else { 887 key->shared = 0; 888 key->info.private.vs = td->td_proc->p_vmspace; 889 key->info.private.addr = (uintptr_t)addr; 890 } 891 vm_map_lookup_done(map, entry); 892 } 893 894 umtxq_hash(key); 895 return (0); 896 } 897 898 /* 899 * Release key. 900 */ 901 void 902 umtx_key_release(struct umtx_key *key) 903 { 904 if (key->shared) 905 vm_object_deallocate(key->info.shared.object); 906 } 907 908 /* 909 * Fetch and compare value, sleep on the address if value is not changed. 910 */ 911 static int 912 do_wait(struct thread *td, void *addr, u_long id, 913 struct _umtx_time *timeout, int compat32, int is_private) 914 { 915 struct abs_timeout timo; 916 struct umtx_q *uq; 917 u_long tmp; 918 uint32_t tmp32; 919 int error = 0; 920 921 uq = td->td_umtxq; 922 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 923 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 924 return (error); 925 926 if (timeout != NULL) 927 abs_timeout_init2(&timo, timeout); 928 929 umtxq_lock(&uq->uq_key); 930 umtxq_insert(uq); 931 umtxq_unlock(&uq->uq_key); 932 if (compat32 == 0) { 933 error = fueword(addr, &tmp); 934 if (error != 0) 935 error = EFAULT; 936 } else { 937 error = fueword32(addr, &tmp32); 938 if (error == 0) 939 tmp = tmp32; 940 else 941 error = EFAULT; 942 } 943 umtxq_lock(&uq->uq_key); 944 if (error == 0) { 945 if (tmp == id) 946 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 947 NULL : &timo); 948 if ((uq->uq_flags & UQF_UMTXQ) == 0) 949 error = 0; 950 else 951 umtxq_remove(uq); 952 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 953 umtxq_remove(uq); 954 } 955 umtxq_unlock(&uq->uq_key); 956 umtx_key_release(&uq->uq_key); 957 if (error == ERESTART) 958 error = EINTR; 959 return (error); 960 } 961 962 /* 963 * Wake up threads sleeping on the specified address. 964 */ 965 int 966 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 967 { 968 struct umtx_key key; 969 int ret; 970 971 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 972 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 973 return (ret); 974 umtxq_lock(&key); 975 umtxq_signal(&key, n_wake); 976 umtxq_unlock(&key); 977 umtx_key_release(&key); 978 return (0); 979 } 980 981 /* 982 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 983 */ 984 static int 985 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 986 struct _umtx_time *timeout, int mode) 987 { 988 struct abs_timeout timo; 989 struct umtx_q *uq; 990 uint32_t owner, old, id; 991 int error, rv; 992 993 id = td->td_tid; 994 uq = td->td_umtxq; 995 error = 0; 996 if (timeout != NULL) 997 abs_timeout_init2(&timo, timeout); 998 999 /* 1000 * Care must be exercised when dealing with umtx structure. It 1001 * can fault on any access. 1002 */ 1003 for (;;) { 1004 rv = fueword32(&m->m_owner, &owner); 1005 if (rv == -1) 1006 return (EFAULT); 1007 if (mode == _UMUTEX_WAIT) { 1008 if (owner == UMUTEX_UNOWNED || 1009 owner == UMUTEX_CONTESTED || 1010 owner == UMUTEX_RB_OWNERDEAD || 1011 owner == UMUTEX_RB_NOTRECOV) 1012 return (0); 1013 } else { 1014 /* 1015 * Robust mutex terminated. Kernel duty is to 1016 * return EOWNERDEAD to the userspace. The 1017 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1018 * by the common userspace code. 1019 */ 1020 if (owner == UMUTEX_RB_OWNERDEAD) { 1021 rv = casueword32(&m->m_owner, 1022 UMUTEX_RB_OWNERDEAD, &owner, 1023 id | UMUTEX_CONTESTED); 1024 if (rv == -1) 1025 return (EFAULT); 1026 if (rv == 0) { 1027 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1028 return (EOWNERDEAD); /* success */ 1029 } 1030 MPASS(rv == 1); 1031 rv = thread_check_susp(td, false); 1032 if (rv != 0) 1033 return (rv); 1034 continue; 1035 } 1036 if (owner == UMUTEX_RB_NOTRECOV) 1037 return (ENOTRECOVERABLE); 1038 1039 /* 1040 * Try the uncontested case. This should be 1041 * done in userland. 1042 */ 1043 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1044 &owner, id); 1045 /* The address was invalid. */ 1046 if (rv == -1) 1047 return (EFAULT); 1048 1049 /* The acquire succeeded. */ 1050 if (rv == 0) { 1051 MPASS(owner == UMUTEX_UNOWNED); 1052 return (0); 1053 } 1054 1055 /* 1056 * If no one owns it but it is contested try 1057 * to acquire it. 1058 */ 1059 MPASS(rv == 1); 1060 if (owner == UMUTEX_CONTESTED) { 1061 rv = casueword32(&m->m_owner, 1062 UMUTEX_CONTESTED, &owner, 1063 id | UMUTEX_CONTESTED); 1064 /* The address was invalid. */ 1065 if (rv == -1) 1066 return (EFAULT); 1067 if (rv == 0) { 1068 MPASS(owner == UMUTEX_CONTESTED); 1069 return (0); 1070 } 1071 if (rv == 1) { 1072 rv = thread_check_susp(td, false); 1073 if (rv != 0) 1074 return (rv); 1075 } 1076 1077 /* 1078 * If this failed the lock has 1079 * changed, restart. 1080 */ 1081 continue; 1082 } 1083 1084 /* rv == 1 but not contested, likely store failure */ 1085 rv = thread_check_susp(td, false); 1086 if (rv != 0) 1087 return (rv); 1088 } 1089 1090 if (mode == _UMUTEX_TRY) 1091 return (EBUSY); 1092 1093 /* 1094 * If we caught a signal, we have retried and now 1095 * exit immediately. 1096 */ 1097 if (error != 0) 1098 return (error); 1099 1100 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1101 GET_SHARE(flags), &uq->uq_key)) != 0) 1102 return (error); 1103 1104 umtxq_lock(&uq->uq_key); 1105 umtxq_busy(&uq->uq_key); 1106 umtxq_insert(uq); 1107 umtxq_unlock(&uq->uq_key); 1108 1109 /* 1110 * Set the contested bit so that a release in user space 1111 * knows to use the system call for unlock. If this fails 1112 * either some one else has acquired the lock or it has been 1113 * released. 1114 */ 1115 rv = casueword32(&m->m_owner, owner, &old, 1116 owner | UMUTEX_CONTESTED); 1117 1118 /* The address was invalid or casueword failed to store. */ 1119 if (rv == -1 || rv == 1) { 1120 umtxq_lock(&uq->uq_key); 1121 umtxq_remove(uq); 1122 umtxq_unbusy(&uq->uq_key); 1123 umtxq_unlock(&uq->uq_key); 1124 umtx_key_release(&uq->uq_key); 1125 if (rv == -1) 1126 return (EFAULT); 1127 if (rv == 1) { 1128 rv = thread_check_susp(td, false); 1129 if (rv != 0) 1130 return (rv); 1131 } 1132 continue; 1133 } 1134 1135 /* 1136 * We set the contested bit, sleep. Otherwise the lock changed 1137 * and we need to retry or we lost a race to the thread 1138 * unlocking the umtx. 1139 */ 1140 umtxq_lock(&uq->uq_key); 1141 umtxq_unbusy(&uq->uq_key); 1142 MPASS(old == owner); 1143 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1144 NULL : &timo); 1145 umtxq_remove(uq); 1146 umtxq_unlock(&uq->uq_key); 1147 umtx_key_release(&uq->uq_key); 1148 1149 if (error == 0) 1150 error = thread_check_susp(td, false); 1151 } 1152 1153 return (0); 1154 } 1155 1156 /* 1157 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1158 */ 1159 static int 1160 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1161 { 1162 struct umtx_key key; 1163 uint32_t owner, old, id, newlock; 1164 int error, count; 1165 1166 id = td->td_tid; 1167 1168 again: 1169 /* 1170 * Make sure we own this mtx. 1171 */ 1172 error = fueword32(&m->m_owner, &owner); 1173 if (error == -1) 1174 return (EFAULT); 1175 1176 if ((owner & ~UMUTEX_CONTESTED) != id) 1177 return (EPERM); 1178 1179 newlock = umtx_unlock_val(flags, rb); 1180 if ((owner & UMUTEX_CONTESTED) == 0) { 1181 error = casueword32(&m->m_owner, owner, &old, newlock); 1182 if (error == -1) 1183 return (EFAULT); 1184 if (error == 1) { 1185 error = thread_check_susp(td, false); 1186 if (error != 0) 1187 return (error); 1188 goto again; 1189 } 1190 MPASS(old == owner); 1191 return (0); 1192 } 1193 1194 /* We should only ever be in here for contested locks */ 1195 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1196 &key)) != 0) 1197 return (error); 1198 1199 umtxq_lock(&key); 1200 umtxq_busy(&key); 1201 count = umtxq_count(&key); 1202 umtxq_unlock(&key); 1203 1204 /* 1205 * When unlocking the umtx, it must be marked as unowned if 1206 * there is zero or one thread only waiting for it. 1207 * Otherwise, it must be marked as contested. 1208 */ 1209 if (count > 1) 1210 newlock |= UMUTEX_CONTESTED; 1211 error = casueword32(&m->m_owner, owner, &old, newlock); 1212 umtxq_lock(&key); 1213 umtxq_signal(&key, 1); 1214 umtxq_unbusy(&key); 1215 umtxq_unlock(&key); 1216 umtx_key_release(&key); 1217 if (error == -1) 1218 return (EFAULT); 1219 if (error == 1) { 1220 if (old != owner) 1221 return (EINVAL); 1222 error = thread_check_susp(td, false); 1223 if (error != 0) 1224 return (error); 1225 goto again; 1226 } 1227 return (0); 1228 } 1229 1230 /* 1231 * Check if the mutex is available and wake up a waiter, 1232 * only for simple mutex. 1233 */ 1234 static int 1235 do_wake_umutex(struct thread *td, struct umutex *m) 1236 { 1237 struct umtx_key key; 1238 uint32_t owner; 1239 uint32_t flags; 1240 int error; 1241 int count; 1242 1243 again: 1244 error = fueword32(&m->m_owner, &owner); 1245 if (error == -1) 1246 return (EFAULT); 1247 1248 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1249 owner != UMUTEX_RB_NOTRECOV) 1250 return (0); 1251 1252 error = fueword32(&m->m_flags, &flags); 1253 if (error == -1) 1254 return (EFAULT); 1255 1256 /* We should only ever be in here for contested locks */ 1257 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1258 &key)) != 0) 1259 return (error); 1260 1261 umtxq_lock(&key); 1262 umtxq_busy(&key); 1263 count = umtxq_count(&key); 1264 umtxq_unlock(&key); 1265 1266 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1267 owner != UMUTEX_RB_NOTRECOV) { 1268 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1269 UMUTEX_UNOWNED); 1270 if (error == -1) { 1271 error = EFAULT; 1272 } else if (error == 1) { 1273 umtxq_lock(&key); 1274 umtxq_unbusy(&key); 1275 umtxq_unlock(&key); 1276 umtx_key_release(&key); 1277 error = thread_check_susp(td, false); 1278 if (error != 0) 1279 return (error); 1280 goto again; 1281 } 1282 } 1283 1284 umtxq_lock(&key); 1285 if (error == 0 && count != 0) { 1286 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1287 owner == UMUTEX_RB_OWNERDEAD || 1288 owner == UMUTEX_RB_NOTRECOV); 1289 umtxq_signal(&key, 1); 1290 } 1291 umtxq_unbusy(&key); 1292 umtxq_unlock(&key); 1293 umtx_key_release(&key); 1294 return (error); 1295 } 1296 1297 /* 1298 * Check if the mutex has waiters and tries to fix contention bit. 1299 */ 1300 static int 1301 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1302 { 1303 struct umtx_key key; 1304 uint32_t owner, old; 1305 int type; 1306 int error; 1307 int count; 1308 1309 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1310 UMUTEX_ROBUST)) { 1311 case 0: 1312 case UMUTEX_ROBUST: 1313 type = TYPE_NORMAL_UMUTEX; 1314 break; 1315 case UMUTEX_PRIO_INHERIT: 1316 type = TYPE_PI_UMUTEX; 1317 break; 1318 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1319 type = TYPE_PI_ROBUST_UMUTEX; 1320 break; 1321 case UMUTEX_PRIO_PROTECT: 1322 type = TYPE_PP_UMUTEX; 1323 break; 1324 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1325 type = TYPE_PP_ROBUST_UMUTEX; 1326 break; 1327 default: 1328 return (EINVAL); 1329 } 1330 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1331 return (error); 1332 1333 owner = 0; 1334 umtxq_lock(&key); 1335 umtxq_busy(&key); 1336 count = umtxq_count(&key); 1337 umtxq_unlock(&key); 1338 1339 error = fueword32(&m->m_owner, &owner); 1340 if (error == -1) 1341 error = EFAULT; 1342 1343 /* 1344 * Only repair contention bit if there is a waiter, this means 1345 * the mutex is still being referenced by userland code, 1346 * otherwise don't update any memory. 1347 */ 1348 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1349 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1350 error = casueword32(&m->m_owner, owner, &old, 1351 owner | UMUTEX_CONTESTED); 1352 if (error == -1) { 1353 error = EFAULT; 1354 break; 1355 } 1356 if (error == 0) { 1357 MPASS(old == owner); 1358 break; 1359 } 1360 owner = old; 1361 error = thread_check_susp(td, false); 1362 } 1363 1364 umtxq_lock(&key); 1365 if (error == EFAULT) { 1366 umtxq_signal(&key, INT_MAX); 1367 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1368 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1369 umtxq_signal(&key, 1); 1370 umtxq_unbusy(&key); 1371 umtxq_unlock(&key); 1372 umtx_key_release(&key); 1373 return (error); 1374 } 1375 1376 static inline struct umtx_pi * 1377 umtx_pi_alloc(int flags) 1378 { 1379 struct umtx_pi *pi; 1380 1381 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1382 TAILQ_INIT(&pi->pi_blocked); 1383 atomic_add_int(&umtx_pi_allocated, 1); 1384 return (pi); 1385 } 1386 1387 static inline void 1388 umtx_pi_free(struct umtx_pi *pi) 1389 { 1390 uma_zfree(umtx_pi_zone, pi); 1391 atomic_add_int(&umtx_pi_allocated, -1); 1392 } 1393 1394 /* 1395 * Adjust the thread's position on a pi_state after its priority has been 1396 * changed. 1397 */ 1398 static int 1399 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1400 { 1401 struct umtx_q *uq, *uq1, *uq2; 1402 struct thread *td1; 1403 1404 mtx_assert(&umtx_lock, MA_OWNED); 1405 if (pi == NULL) 1406 return (0); 1407 1408 uq = td->td_umtxq; 1409 1410 /* 1411 * Check if the thread needs to be moved on the blocked chain. 1412 * It needs to be moved if either its priority is lower than 1413 * the previous thread or higher than the next thread. 1414 */ 1415 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1416 uq2 = TAILQ_NEXT(uq, uq_lockq); 1417 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1418 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1419 /* 1420 * Remove thread from blocked chain and determine where 1421 * it should be moved to. 1422 */ 1423 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1424 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1425 td1 = uq1->uq_thread; 1426 MPASS(td1->td_proc->p_magic == P_MAGIC); 1427 if (UPRI(td1) > UPRI(td)) 1428 break; 1429 } 1430 1431 if (uq1 == NULL) 1432 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1433 else 1434 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1435 } 1436 return (1); 1437 } 1438 1439 static struct umtx_pi * 1440 umtx_pi_next(struct umtx_pi *pi) 1441 { 1442 struct umtx_q *uq_owner; 1443 1444 if (pi->pi_owner == NULL) 1445 return (NULL); 1446 uq_owner = pi->pi_owner->td_umtxq; 1447 if (uq_owner == NULL) 1448 return (NULL); 1449 return (uq_owner->uq_pi_blocked); 1450 } 1451 1452 /* 1453 * Floyd's Cycle-Finding Algorithm. 1454 */ 1455 static bool 1456 umtx_pi_check_loop(struct umtx_pi *pi) 1457 { 1458 struct umtx_pi *pi1; /* fast iterator */ 1459 1460 mtx_assert(&umtx_lock, MA_OWNED); 1461 if (pi == NULL) 1462 return (false); 1463 pi1 = pi; 1464 for (;;) { 1465 pi = umtx_pi_next(pi); 1466 if (pi == NULL) 1467 break; 1468 pi1 = umtx_pi_next(pi1); 1469 if (pi1 == NULL) 1470 break; 1471 pi1 = umtx_pi_next(pi1); 1472 if (pi1 == NULL) 1473 break; 1474 if (pi == pi1) 1475 return (true); 1476 } 1477 return (false); 1478 } 1479 1480 /* 1481 * Propagate priority when a thread is blocked on POSIX 1482 * PI mutex. 1483 */ 1484 static void 1485 umtx_propagate_priority(struct thread *td) 1486 { 1487 struct umtx_q *uq; 1488 struct umtx_pi *pi; 1489 int pri; 1490 1491 mtx_assert(&umtx_lock, MA_OWNED); 1492 pri = UPRI(td); 1493 uq = td->td_umtxq; 1494 pi = uq->uq_pi_blocked; 1495 if (pi == NULL) 1496 return; 1497 if (umtx_pi_check_loop(pi)) 1498 return; 1499 1500 for (;;) { 1501 td = pi->pi_owner; 1502 if (td == NULL || td == curthread) 1503 return; 1504 1505 MPASS(td->td_proc != NULL); 1506 MPASS(td->td_proc->p_magic == P_MAGIC); 1507 1508 thread_lock(td); 1509 if (td->td_lend_user_pri > pri) 1510 sched_lend_user_prio(td, pri); 1511 else { 1512 thread_unlock(td); 1513 break; 1514 } 1515 thread_unlock(td); 1516 1517 /* 1518 * Pick up the lock that td is blocked on. 1519 */ 1520 uq = td->td_umtxq; 1521 pi = uq->uq_pi_blocked; 1522 if (pi == NULL) 1523 break; 1524 /* Resort td on the list if needed. */ 1525 umtx_pi_adjust_thread(pi, td); 1526 } 1527 } 1528 1529 /* 1530 * Unpropagate priority for a PI mutex when a thread blocked on 1531 * it is interrupted by signal or resumed by others. 1532 */ 1533 static void 1534 umtx_repropagate_priority(struct umtx_pi *pi) 1535 { 1536 struct umtx_q *uq, *uq_owner; 1537 struct umtx_pi *pi2; 1538 int pri; 1539 1540 mtx_assert(&umtx_lock, MA_OWNED); 1541 1542 if (umtx_pi_check_loop(pi)) 1543 return; 1544 while (pi != NULL && pi->pi_owner != NULL) { 1545 pri = PRI_MAX; 1546 uq_owner = pi->pi_owner->td_umtxq; 1547 1548 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1549 uq = TAILQ_FIRST(&pi2->pi_blocked); 1550 if (uq != NULL) { 1551 if (pri > UPRI(uq->uq_thread)) 1552 pri = UPRI(uq->uq_thread); 1553 } 1554 } 1555 1556 if (pri > uq_owner->uq_inherited_pri) 1557 pri = uq_owner->uq_inherited_pri; 1558 thread_lock(pi->pi_owner); 1559 sched_lend_user_prio(pi->pi_owner, pri); 1560 thread_unlock(pi->pi_owner); 1561 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1562 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1563 } 1564 } 1565 1566 /* 1567 * Insert a PI mutex into owned list. 1568 */ 1569 static void 1570 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1571 { 1572 struct umtx_q *uq_owner; 1573 1574 uq_owner = owner->td_umtxq; 1575 mtx_assert(&umtx_lock, MA_OWNED); 1576 MPASS(pi->pi_owner == NULL); 1577 pi->pi_owner = owner; 1578 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1579 } 1580 1581 1582 /* 1583 * Disown a PI mutex, and remove it from the owned list. 1584 */ 1585 static void 1586 umtx_pi_disown(struct umtx_pi *pi) 1587 { 1588 1589 mtx_assert(&umtx_lock, MA_OWNED); 1590 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1591 pi->pi_owner = NULL; 1592 } 1593 1594 /* 1595 * Claim ownership of a PI mutex. 1596 */ 1597 static int 1598 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1599 { 1600 struct umtx_q *uq; 1601 int pri; 1602 1603 mtx_lock(&umtx_lock); 1604 if (pi->pi_owner == owner) { 1605 mtx_unlock(&umtx_lock); 1606 return (0); 1607 } 1608 1609 if (pi->pi_owner != NULL) { 1610 /* 1611 * userland may have already messed the mutex, sigh. 1612 */ 1613 mtx_unlock(&umtx_lock); 1614 return (EPERM); 1615 } 1616 umtx_pi_setowner(pi, owner); 1617 uq = TAILQ_FIRST(&pi->pi_blocked); 1618 if (uq != NULL) { 1619 pri = UPRI(uq->uq_thread); 1620 thread_lock(owner); 1621 if (pri < UPRI(owner)) 1622 sched_lend_user_prio(owner, pri); 1623 thread_unlock(owner); 1624 } 1625 mtx_unlock(&umtx_lock); 1626 return (0); 1627 } 1628 1629 /* 1630 * Adjust a thread's order position in its blocked PI mutex, 1631 * this may result new priority propagating process. 1632 */ 1633 void 1634 umtx_pi_adjust(struct thread *td, u_char oldpri) 1635 { 1636 struct umtx_q *uq; 1637 struct umtx_pi *pi; 1638 1639 uq = td->td_umtxq; 1640 mtx_lock(&umtx_lock); 1641 /* 1642 * Pick up the lock that td is blocked on. 1643 */ 1644 pi = uq->uq_pi_blocked; 1645 if (pi != NULL) { 1646 umtx_pi_adjust_thread(pi, td); 1647 umtx_repropagate_priority(pi); 1648 } 1649 mtx_unlock(&umtx_lock); 1650 } 1651 1652 /* 1653 * Sleep on a PI mutex. 1654 */ 1655 static int 1656 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1657 const char *wmesg, struct abs_timeout *timo, bool shared) 1658 { 1659 struct thread *td, *td1; 1660 struct umtx_q *uq1; 1661 int error, pri; 1662 #ifdef INVARIANTS 1663 struct umtxq_chain *uc; 1664 1665 uc = umtxq_getchain(&pi->pi_key); 1666 #endif 1667 error = 0; 1668 td = uq->uq_thread; 1669 KASSERT(td == curthread, ("inconsistent uq_thread")); 1670 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1671 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1672 umtxq_insert(uq); 1673 mtx_lock(&umtx_lock); 1674 if (pi->pi_owner == NULL) { 1675 mtx_unlock(&umtx_lock); 1676 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1677 mtx_lock(&umtx_lock); 1678 if (td1 != NULL) { 1679 if (pi->pi_owner == NULL) 1680 umtx_pi_setowner(pi, td1); 1681 PROC_UNLOCK(td1->td_proc); 1682 } 1683 } 1684 1685 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1686 pri = UPRI(uq1->uq_thread); 1687 if (pri > UPRI(td)) 1688 break; 1689 } 1690 1691 if (uq1 != NULL) 1692 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1693 else 1694 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1695 1696 uq->uq_pi_blocked = pi; 1697 thread_lock(td); 1698 td->td_flags |= TDF_UPIBLOCKED; 1699 thread_unlock(td); 1700 umtx_propagate_priority(td); 1701 mtx_unlock(&umtx_lock); 1702 umtxq_unbusy(&uq->uq_key); 1703 1704 error = umtxq_sleep(uq, wmesg, timo); 1705 umtxq_remove(uq); 1706 1707 mtx_lock(&umtx_lock); 1708 uq->uq_pi_blocked = NULL; 1709 thread_lock(td); 1710 td->td_flags &= ~TDF_UPIBLOCKED; 1711 thread_unlock(td); 1712 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1713 umtx_repropagate_priority(pi); 1714 mtx_unlock(&umtx_lock); 1715 umtxq_unlock(&uq->uq_key); 1716 1717 return (error); 1718 } 1719 1720 /* 1721 * Add reference count for a PI mutex. 1722 */ 1723 static void 1724 umtx_pi_ref(struct umtx_pi *pi) 1725 { 1726 1727 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1728 pi->pi_refcount++; 1729 } 1730 1731 /* 1732 * Decrease reference count for a PI mutex, if the counter 1733 * is decreased to zero, its memory space is freed. 1734 */ 1735 static void 1736 umtx_pi_unref(struct umtx_pi *pi) 1737 { 1738 struct umtxq_chain *uc; 1739 1740 uc = umtxq_getchain(&pi->pi_key); 1741 UMTXQ_LOCKED_ASSERT(uc); 1742 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1743 if (--pi->pi_refcount == 0) { 1744 mtx_lock(&umtx_lock); 1745 if (pi->pi_owner != NULL) 1746 umtx_pi_disown(pi); 1747 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1748 ("blocked queue not empty")); 1749 mtx_unlock(&umtx_lock); 1750 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1751 umtx_pi_free(pi); 1752 } 1753 } 1754 1755 /* 1756 * Find a PI mutex in hash table. 1757 */ 1758 static struct umtx_pi * 1759 umtx_pi_lookup(struct umtx_key *key) 1760 { 1761 struct umtxq_chain *uc; 1762 struct umtx_pi *pi; 1763 1764 uc = umtxq_getchain(key); 1765 UMTXQ_LOCKED_ASSERT(uc); 1766 1767 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1768 if (umtx_key_match(&pi->pi_key, key)) { 1769 return (pi); 1770 } 1771 } 1772 return (NULL); 1773 } 1774 1775 /* 1776 * Insert a PI mutex into hash table. 1777 */ 1778 static inline void 1779 umtx_pi_insert(struct umtx_pi *pi) 1780 { 1781 struct umtxq_chain *uc; 1782 1783 uc = umtxq_getchain(&pi->pi_key); 1784 UMTXQ_LOCKED_ASSERT(uc); 1785 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1786 } 1787 1788 /* 1789 * Lock a PI mutex. 1790 */ 1791 static int 1792 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1793 struct _umtx_time *timeout, int try) 1794 { 1795 struct abs_timeout timo; 1796 struct umtx_q *uq; 1797 struct umtx_pi *pi, *new_pi; 1798 uint32_t id, old_owner, owner, old; 1799 int error, rv; 1800 1801 id = td->td_tid; 1802 uq = td->td_umtxq; 1803 1804 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1805 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1806 &uq->uq_key)) != 0) 1807 return (error); 1808 1809 if (timeout != NULL) 1810 abs_timeout_init2(&timo, timeout); 1811 1812 umtxq_lock(&uq->uq_key); 1813 pi = umtx_pi_lookup(&uq->uq_key); 1814 if (pi == NULL) { 1815 new_pi = umtx_pi_alloc(M_NOWAIT); 1816 if (new_pi == NULL) { 1817 umtxq_unlock(&uq->uq_key); 1818 new_pi = umtx_pi_alloc(M_WAITOK); 1819 umtxq_lock(&uq->uq_key); 1820 pi = umtx_pi_lookup(&uq->uq_key); 1821 if (pi != NULL) { 1822 umtx_pi_free(new_pi); 1823 new_pi = NULL; 1824 } 1825 } 1826 if (new_pi != NULL) { 1827 new_pi->pi_key = uq->uq_key; 1828 umtx_pi_insert(new_pi); 1829 pi = new_pi; 1830 } 1831 } 1832 umtx_pi_ref(pi); 1833 umtxq_unlock(&uq->uq_key); 1834 1835 /* 1836 * Care must be exercised when dealing with umtx structure. It 1837 * can fault on any access. 1838 */ 1839 for (;;) { 1840 /* 1841 * Try the uncontested case. This should be done in userland. 1842 */ 1843 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1844 /* The address was invalid. */ 1845 if (rv == -1) { 1846 error = EFAULT; 1847 break; 1848 } 1849 /* The acquire succeeded. */ 1850 if (rv == 0) { 1851 MPASS(owner == UMUTEX_UNOWNED); 1852 error = 0; 1853 break; 1854 } 1855 1856 if (owner == UMUTEX_RB_NOTRECOV) { 1857 error = ENOTRECOVERABLE; 1858 break; 1859 } 1860 1861 /* 1862 * Avoid overwriting a possible error from sleep due 1863 * to the pending signal with suspension check result. 1864 */ 1865 if (error == 0) { 1866 error = thread_check_susp(td, true); 1867 if (error != 0) 1868 break; 1869 } 1870 1871 /* If no one owns it but it is contested try to acquire it. */ 1872 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1873 old_owner = owner; 1874 rv = casueword32(&m->m_owner, owner, &owner, 1875 id | UMUTEX_CONTESTED); 1876 /* The address was invalid. */ 1877 if (rv == -1) { 1878 error = EFAULT; 1879 break; 1880 } 1881 if (rv == 1) { 1882 if (error == 0) { 1883 error = thread_check_susp(td, true); 1884 if (error != 0) 1885 break; 1886 } 1887 1888 /* 1889 * If this failed the lock could 1890 * changed, restart. 1891 */ 1892 continue; 1893 } 1894 1895 MPASS(rv == 0); 1896 MPASS(owner == old_owner); 1897 umtxq_lock(&uq->uq_key); 1898 umtxq_busy(&uq->uq_key); 1899 error = umtx_pi_claim(pi, td); 1900 umtxq_unbusy(&uq->uq_key); 1901 umtxq_unlock(&uq->uq_key); 1902 if (error != 0) { 1903 /* 1904 * Since we're going to return an 1905 * error, restore the m_owner to its 1906 * previous, unowned state to avoid 1907 * compounding the problem. 1908 */ 1909 (void)casuword32(&m->m_owner, 1910 id | UMUTEX_CONTESTED, old_owner); 1911 } 1912 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1913 error = EOWNERDEAD; 1914 break; 1915 } 1916 1917 if ((owner & ~UMUTEX_CONTESTED) == id) { 1918 error = EDEADLK; 1919 break; 1920 } 1921 1922 if (try != 0) { 1923 error = EBUSY; 1924 break; 1925 } 1926 1927 /* 1928 * If we caught a signal, we have retried and now 1929 * exit immediately. 1930 */ 1931 if (error != 0) 1932 break; 1933 1934 umtxq_lock(&uq->uq_key); 1935 umtxq_busy(&uq->uq_key); 1936 umtxq_unlock(&uq->uq_key); 1937 1938 /* 1939 * Set the contested bit so that a release in user space 1940 * knows to use the system call for unlock. If this fails 1941 * either some one else has acquired the lock or it has been 1942 * released. 1943 */ 1944 rv = casueword32(&m->m_owner, owner, &old, owner | 1945 UMUTEX_CONTESTED); 1946 1947 /* The address was invalid. */ 1948 if (rv == -1) { 1949 umtxq_unbusy_unlocked(&uq->uq_key); 1950 error = EFAULT; 1951 break; 1952 } 1953 if (rv == 1) { 1954 umtxq_unbusy_unlocked(&uq->uq_key); 1955 error = thread_check_susp(td, true); 1956 if (error != 0) 1957 break; 1958 1959 /* 1960 * The lock changed and we need to retry or we 1961 * lost a race to the thread unlocking the 1962 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1963 * value for owner is impossible there. 1964 */ 1965 continue; 1966 } 1967 1968 umtxq_lock(&uq->uq_key); 1969 1970 /* We set the contested bit, sleep. */ 1971 MPASS(old == owner); 1972 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1973 "umtxpi", timeout == NULL ? NULL : &timo, 1974 (flags & USYNC_PROCESS_SHARED) != 0); 1975 if (error != 0) 1976 continue; 1977 1978 error = thread_check_susp(td, false); 1979 if (error != 0) 1980 break; 1981 } 1982 1983 umtxq_lock(&uq->uq_key); 1984 umtx_pi_unref(pi); 1985 umtxq_unlock(&uq->uq_key); 1986 1987 umtx_key_release(&uq->uq_key); 1988 return (error); 1989 } 1990 1991 /* 1992 * Unlock a PI mutex. 1993 */ 1994 static int 1995 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1996 { 1997 struct umtx_key key; 1998 struct umtx_q *uq_first, *uq_first2, *uq_me; 1999 struct umtx_pi *pi, *pi2; 2000 uint32_t id, new_owner, old, owner; 2001 int count, error, pri; 2002 2003 id = td->td_tid; 2004 2005 usrloop: 2006 /* 2007 * Make sure we own this mtx. 2008 */ 2009 error = fueword32(&m->m_owner, &owner); 2010 if (error == -1) 2011 return (EFAULT); 2012 2013 if ((owner & ~UMUTEX_CONTESTED) != id) 2014 return (EPERM); 2015 2016 new_owner = umtx_unlock_val(flags, rb); 2017 2018 /* This should be done in userland */ 2019 if ((owner & UMUTEX_CONTESTED) == 0) { 2020 error = casueword32(&m->m_owner, owner, &old, new_owner); 2021 if (error == -1) 2022 return (EFAULT); 2023 if (error == 1) { 2024 error = thread_check_susp(td, true); 2025 if (error != 0) 2026 return (error); 2027 goto usrloop; 2028 } 2029 if (old == owner) 2030 return (0); 2031 owner = old; 2032 } 2033 2034 /* We should only ever be in here for contested locks */ 2035 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2036 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2037 &key)) != 0) 2038 return (error); 2039 2040 umtxq_lock(&key); 2041 umtxq_busy(&key); 2042 count = umtxq_count_pi(&key, &uq_first); 2043 if (uq_first != NULL) { 2044 mtx_lock(&umtx_lock); 2045 pi = uq_first->uq_pi_blocked; 2046 KASSERT(pi != NULL, ("pi == NULL?")); 2047 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2048 mtx_unlock(&umtx_lock); 2049 umtxq_unbusy(&key); 2050 umtxq_unlock(&key); 2051 umtx_key_release(&key); 2052 /* userland messed the mutex */ 2053 return (EPERM); 2054 } 2055 uq_me = td->td_umtxq; 2056 if (pi->pi_owner == td) 2057 umtx_pi_disown(pi); 2058 /* get highest priority thread which is still sleeping. */ 2059 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2060 while (uq_first != NULL && 2061 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2062 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2063 } 2064 pri = PRI_MAX; 2065 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2066 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2067 if (uq_first2 != NULL) { 2068 if (pri > UPRI(uq_first2->uq_thread)) 2069 pri = UPRI(uq_first2->uq_thread); 2070 } 2071 } 2072 thread_lock(td); 2073 sched_lend_user_prio(td, pri); 2074 thread_unlock(td); 2075 mtx_unlock(&umtx_lock); 2076 if (uq_first) 2077 umtxq_signal_thread(uq_first); 2078 } else { 2079 pi = umtx_pi_lookup(&key); 2080 /* 2081 * A umtx_pi can exist if a signal or timeout removed the 2082 * last waiter from the umtxq, but there is still 2083 * a thread in do_lock_pi() holding the umtx_pi. 2084 */ 2085 if (pi != NULL) { 2086 /* 2087 * The umtx_pi can be unowned, such as when a thread 2088 * has just entered do_lock_pi(), allocated the 2089 * umtx_pi, and unlocked the umtxq. 2090 * If the current thread owns it, it must disown it. 2091 */ 2092 mtx_lock(&umtx_lock); 2093 if (pi->pi_owner == td) 2094 umtx_pi_disown(pi); 2095 mtx_unlock(&umtx_lock); 2096 } 2097 } 2098 umtxq_unlock(&key); 2099 2100 /* 2101 * When unlocking the umtx, it must be marked as unowned if 2102 * there is zero or one thread only waiting for it. 2103 * Otherwise, it must be marked as contested. 2104 */ 2105 2106 if (count > 1) 2107 new_owner |= UMUTEX_CONTESTED; 2108 again: 2109 error = casueword32(&m->m_owner, owner, &old, new_owner); 2110 if (error == 1) { 2111 error = thread_check_susp(td, false); 2112 if (error == 0) 2113 goto again; 2114 } 2115 umtxq_unbusy_unlocked(&key); 2116 umtx_key_release(&key); 2117 if (error == -1) 2118 return (EFAULT); 2119 if (error == 0 && old != owner) 2120 return (EINVAL); 2121 return (error); 2122 } 2123 2124 /* 2125 * Lock a PP mutex. 2126 */ 2127 static int 2128 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2129 struct _umtx_time *timeout, int try) 2130 { 2131 struct abs_timeout timo; 2132 struct umtx_q *uq, *uq2; 2133 struct umtx_pi *pi; 2134 uint32_t ceiling; 2135 uint32_t owner, id; 2136 int error, pri, old_inherited_pri, su, rv; 2137 2138 id = td->td_tid; 2139 uq = td->td_umtxq; 2140 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2141 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2142 &uq->uq_key)) != 0) 2143 return (error); 2144 2145 if (timeout != NULL) 2146 abs_timeout_init2(&timo, timeout); 2147 2148 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2149 for (;;) { 2150 old_inherited_pri = uq->uq_inherited_pri; 2151 umtxq_lock(&uq->uq_key); 2152 umtxq_busy(&uq->uq_key); 2153 umtxq_unlock(&uq->uq_key); 2154 2155 rv = fueword32(&m->m_ceilings[0], &ceiling); 2156 if (rv == -1) { 2157 error = EFAULT; 2158 goto out; 2159 } 2160 ceiling = RTP_PRIO_MAX - ceiling; 2161 if (ceiling > RTP_PRIO_MAX) { 2162 error = EINVAL; 2163 goto out; 2164 } 2165 2166 mtx_lock(&umtx_lock); 2167 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2168 mtx_unlock(&umtx_lock); 2169 error = EINVAL; 2170 goto out; 2171 } 2172 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2173 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2174 thread_lock(td); 2175 if (uq->uq_inherited_pri < UPRI(td)) 2176 sched_lend_user_prio(td, uq->uq_inherited_pri); 2177 thread_unlock(td); 2178 } 2179 mtx_unlock(&umtx_lock); 2180 2181 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2182 id | UMUTEX_CONTESTED); 2183 /* The address was invalid. */ 2184 if (rv == -1) { 2185 error = EFAULT; 2186 break; 2187 } 2188 if (rv == 0) { 2189 MPASS(owner == UMUTEX_CONTESTED); 2190 error = 0; 2191 break; 2192 } 2193 /* rv == 1 */ 2194 if (owner == UMUTEX_RB_OWNERDEAD) { 2195 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2196 &owner, id | UMUTEX_CONTESTED); 2197 if (rv == -1) { 2198 error = EFAULT; 2199 break; 2200 } 2201 if (rv == 0) { 2202 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2203 error = EOWNERDEAD; /* success */ 2204 break; 2205 } 2206 2207 /* 2208 * rv == 1, only check for suspension if we 2209 * did not already catched a signal. If we 2210 * get an error from the check, the same 2211 * condition is checked by the umtxq_sleep() 2212 * call below, so we should obliterate the 2213 * error to not skip the last loop iteration. 2214 */ 2215 if (error == 0) { 2216 error = thread_check_susp(td, false); 2217 if (error == 0) { 2218 if (try != 0) 2219 error = EBUSY; 2220 else 2221 continue; 2222 } 2223 error = 0; 2224 } 2225 } else if (owner == UMUTEX_RB_NOTRECOV) { 2226 error = ENOTRECOVERABLE; 2227 } 2228 2229 if (try != 0) 2230 error = EBUSY; 2231 2232 /* 2233 * If we caught a signal, we have retried and now 2234 * exit immediately. 2235 */ 2236 if (error != 0) 2237 break; 2238 2239 umtxq_lock(&uq->uq_key); 2240 umtxq_insert(uq); 2241 umtxq_unbusy(&uq->uq_key); 2242 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2243 NULL : &timo); 2244 umtxq_remove(uq); 2245 umtxq_unlock(&uq->uq_key); 2246 2247 mtx_lock(&umtx_lock); 2248 uq->uq_inherited_pri = old_inherited_pri; 2249 pri = PRI_MAX; 2250 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2251 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2252 if (uq2 != NULL) { 2253 if (pri > UPRI(uq2->uq_thread)) 2254 pri = UPRI(uq2->uq_thread); 2255 } 2256 } 2257 if (pri > uq->uq_inherited_pri) 2258 pri = uq->uq_inherited_pri; 2259 thread_lock(td); 2260 sched_lend_user_prio(td, pri); 2261 thread_unlock(td); 2262 mtx_unlock(&umtx_lock); 2263 } 2264 2265 if (error != 0 && error != EOWNERDEAD) { 2266 mtx_lock(&umtx_lock); 2267 uq->uq_inherited_pri = old_inherited_pri; 2268 pri = PRI_MAX; 2269 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2270 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2271 if (uq2 != NULL) { 2272 if (pri > UPRI(uq2->uq_thread)) 2273 pri = UPRI(uq2->uq_thread); 2274 } 2275 } 2276 if (pri > uq->uq_inherited_pri) 2277 pri = uq->uq_inherited_pri; 2278 thread_lock(td); 2279 sched_lend_user_prio(td, pri); 2280 thread_unlock(td); 2281 mtx_unlock(&umtx_lock); 2282 } 2283 2284 out: 2285 umtxq_unbusy_unlocked(&uq->uq_key); 2286 umtx_key_release(&uq->uq_key); 2287 return (error); 2288 } 2289 2290 /* 2291 * Unlock a PP mutex. 2292 */ 2293 static int 2294 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2295 { 2296 struct umtx_key key; 2297 struct umtx_q *uq, *uq2; 2298 struct umtx_pi *pi; 2299 uint32_t id, owner, rceiling; 2300 int error, pri, new_inherited_pri, su; 2301 2302 id = td->td_tid; 2303 uq = td->td_umtxq; 2304 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2305 2306 /* 2307 * Make sure we own this mtx. 2308 */ 2309 error = fueword32(&m->m_owner, &owner); 2310 if (error == -1) 2311 return (EFAULT); 2312 2313 if ((owner & ~UMUTEX_CONTESTED) != id) 2314 return (EPERM); 2315 2316 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2317 if (error != 0) 2318 return (error); 2319 2320 if (rceiling == -1) 2321 new_inherited_pri = PRI_MAX; 2322 else { 2323 rceiling = RTP_PRIO_MAX - rceiling; 2324 if (rceiling > RTP_PRIO_MAX) 2325 return (EINVAL); 2326 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2327 } 2328 2329 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2330 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2331 &key)) != 0) 2332 return (error); 2333 umtxq_lock(&key); 2334 umtxq_busy(&key); 2335 umtxq_unlock(&key); 2336 /* 2337 * For priority protected mutex, always set unlocked state 2338 * to UMUTEX_CONTESTED, so that userland always enters kernel 2339 * to lock the mutex, it is necessary because thread priority 2340 * has to be adjusted for such mutex. 2341 */ 2342 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2343 UMUTEX_CONTESTED); 2344 2345 umtxq_lock(&key); 2346 if (error == 0) 2347 umtxq_signal(&key, 1); 2348 umtxq_unbusy(&key); 2349 umtxq_unlock(&key); 2350 2351 if (error == -1) 2352 error = EFAULT; 2353 else { 2354 mtx_lock(&umtx_lock); 2355 if (su != 0) 2356 uq->uq_inherited_pri = new_inherited_pri; 2357 pri = PRI_MAX; 2358 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2359 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2360 if (uq2 != NULL) { 2361 if (pri > UPRI(uq2->uq_thread)) 2362 pri = UPRI(uq2->uq_thread); 2363 } 2364 } 2365 if (pri > uq->uq_inherited_pri) 2366 pri = uq->uq_inherited_pri; 2367 thread_lock(td); 2368 sched_lend_user_prio(td, pri); 2369 thread_unlock(td); 2370 mtx_unlock(&umtx_lock); 2371 } 2372 umtx_key_release(&key); 2373 return (error); 2374 } 2375 2376 static int 2377 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2378 uint32_t *old_ceiling) 2379 { 2380 struct umtx_q *uq; 2381 uint32_t flags, id, owner, save_ceiling; 2382 int error, rv, rv1; 2383 2384 error = fueword32(&m->m_flags, &flags); 2385 if (error == -1) 2386 return (EFAULT); 2387 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2388 return (EINVAL); 2389 if (ceiling > RTP_PRIO_MAX) 2390 return (EINVAL); 2391 id = td->td_tid; 2392 uq = td->td_umtxq; 2393 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2394 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2395 &uq->uq_key)) != 0) 2396 return (error); 2397 for (;;) { 2398 umtxq_lock(&uq->uq_key); 2399 umtxq_busy(&uq->uq_key); 2400 umtxq_unlock(&uq->uq_key); 2401 2402 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2403 if (rv == -1) { 2404 error = EFAULT; 2405 break; 2406 } 2407 2408 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2409 id | UMUTEX_CONTESTED); 2410 if (rv == -1) { 2411 error = EFAULT; 2412 break; 2413 } 2414 2415 if (rv == 0) { 2416 MPASS(owner == UMUTEX_CONTESTED); 2417 rv = suword32(&m->m_ceilings[0], ceiling); 2418 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2419 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2420 break; 2421 } 2422 2423 if ((owner & ~UMUTEX_CONTESTED) == id) { 2424 rv = suword32(&m->m_ceilings[0], ceiling); 2425 error = rv == 0 ? 0 : EFAULT; 2426 break; 2427 } 2428 2429 if (owner == UMUTEX_RB_OWNERDEAD) { 2430 error = EOWNERDEAD; 2431 break; 2432 } else if (owner == UMUTEX_RB_NOTRECOV) { 2433 error = ENOTRECOVERABLE; 2434 break; 2435 } 2436 2437 /* 2438 * If we caught a signal, we have retried and now 2439 * exit immediately. 2440 */ 2441 if (error != 0) 2442 break; 2443 2444 /* 2445 * We set the contested bit, sleep. Otherwise the lock changed 2446 * and we need to retry or we lost a race to the thread 2447 * unlocking the umtx. 2448 */ 2449 umtxq_lock(&uq->uq_key); 2450 umtxq_insert(uq); 2451 umtxq_unbusy(&uq->uq_key); 2452 error = umtxq_sleep(uq, "umtxpp", NULL); 2453 umtxq_remove(uq); 2454 umtxq_unlock(&uq->uq_key); 2455 } 2456 umtxq_lock(&uq->uq_key); 2457 if (error == 0) 2458 umtxq_signal(&uq->uq_key, INT_MAX); 2459 umtxq_unbusy(&uq->uq_key); 2460 umtxq_unlock(&uq->uq_key); 2461 umtx_key_release(&uq->uq_key); 2462 if (error == 0 && old_ceiling != NULL) { 2463 rv = suword32(old_ceiling, save_ceiling); 2464 error = rv == 0 ? 0 : EFAULT; 2465 } 2466 return (error); 2467 } 2468 2469 /* 2470 * Lock a userland POSIX mutex. 2471 */ 2472 static int 2473 do_lock_umutex(struct thread *td, struct umutex *m, 2474 struct _umtx_time *timeout, int mode) 2475 { 2476 uint32_t flags; 2477 int error; 2478 2479 error = fueword32(&m->m_flags, &flags); 2480 if (error == -1) 2481 return (EFAULT); 2482 2483 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2484 case 0: 2485 error = do_lock_normal(td, m, flags, timeout, mode); 2486 break; 2487 case UMUTEX_PRIO_INHERIT: 2488 error = do_lock_pi(td, m, flags, timeout, mode); 2489 break; 2490 case UMUTEX_PRIO_PROTECT: 2491 error = do_lock_pp(td, m, flags, timeout, mode); 2492 break; 2493 default: 2494 return (EINVAL); 2495 } 2496 if (timeout == NULL) { 2497 if (error == EINTR && mode != _UMUTEX_WAIT) 2498 error = ERESTART; 2499 } else { 2500 /* Timed-locking is not restarted. */ 2501 if (error == ERESTART) 2502 error = EINTR; 2503 } 2504 return (error); 2505 } 2506 2507 /* 2508 * Unlock a userland POSIX mutex. 2509 */ 2510 static int 2511 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2512 { 2513 uint32_t flags; 2514 int error; 2515 2516 error = fueword32(&m->m_flags, &flags); 2517 if (error == -1) 2518 return (EFAULT); 2519 2520 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2521 case 0: 2522 return (do_unlock_normal(td, m, flags, rb)); 2523 case UMUTEX_PRIO_INHERIT: 2524 return (do_unlock_pi(td, m, flags, rb)); 2525 case UMUTEX_PRIO_PROTECT: 2526 return (do_unlock_pp(td, m, flags, rb)); 2527 } 2528 2529 return (EINVAL); 2530 } 2531 2532 static int 2533 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2534 struct timespec *timeout, u_long wflags) 2535 { 2536 struct abs_timeout timo; 2537 struct umtx_q *uq; 2538 uint32_t flags, clockid, hasw; 2539 int error; 2540 2541 uq = td->td_umtxq; 2542 error = fueword32(&cv->c_flags, &flags); 2543 if (error == -1) 2544 return (EFAULT); 2545 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2546 if (error != 0) 2547 return (error); 2548 2549 if ((wflags & CVWAIT_CLOCKID) != 0) { 2550 error = fueword32(&cv->c_clockid, &clockid); 2551 if (error == -1) { 2552 umtx_key_release(&uq->uq_key); 2553 return (EFAULT); 2554 } 2555 if (clockid < CLOCK_REALTIME || 2556 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2557 /* hmm, only HW clock id will work. */ 2558 umtx_key_release(&uq->uq_key); 2559 return (EINVAL); 2560 } 2561 } else { 2562 clockid = CLOCK_REALTIME; 2563 } 2564 2565 umtxq_lock(&uq->uq_key); 2566 umtxq_busy(&uq->uq_key); 2567 umtxq_insert(uq); 2568 umtxq_unlock(&uq->uq_key); 2569 2570 /* 2571 * Set c_has_waiters to 1 before releasing user mutex, also 2572 * don't modify cache line when unnecessary. 2573 */ 2574 error = fueword32(&cv->c_has_waiters, &hasw); 2575 if (error == 0 && hasw == 0) 2576 suword32(&cv->c_has_waiters, 1); 2577 2578 umtxq_unbusy_unlocked(&uq->uq_key); 2579 2580 error = do_unlock_umutex(td, m, false); 2581 2582 if (timeout != NULL) 2583 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2584 timeout); 2585 2586 umtxq_lock(&uq->uq_key); 2587 if (error == 0) { 2588 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2589 NULL : &timo); 2590 } 2591 2592 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2593 error = 0; 2594 else { 2595 /* 2596 * This must be timeout,interrupted by signal or 2597 * surprious wakeup, clear c_has_waiter flag when 2598 * necessary. 2599 */ 2600 umtxq_busy(&uq->uq_key); 2601 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2602 int oldlen = uq->uq_cur_queue->length; 2603 umtxq_remove(uq); 2604 if (oldlen == 1) { 2605 umtxq_unlock(&uq->uq_key); 2606 suword32(&cv->c_has_waiters, 0); 2607 umtxq_lock(&uq->uq_key); 2608 } 2609 } 2610 umtxq_unbusy(&uq->uq_key); 2611 if (error == ERESTART) 2612 error = EINTR; 2613 } 2614 2615 umtxq_unlock(&uq->uq_key); 2616 umtx_key_release(&uq->uq_key); 2617 return (error); 2618 } 2619 2620 /* 2621 * Signal a userland condition variable. 2622 */ 2623 static int 2624 do_cv_signal(struct thread *td, struct ucond *cv) 2625 { 2626 struct umtx_key key; 2627 int error, cnt, nwake; 2628 uint32_t flags; 2629 2630 error = fueword32(&cv->c_flags, &flags); 2631 if (error == -1) 2632 return (EFAULT); 2633 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2634 return (error); 2635 umtxq_lock(&key); 2636 umtxq_busy(&key); 2637 cnt = umtxq_count(&key); 2638 nwake = umtxq_signal(&key, 1); 2639 if (cnt <= nwake) { 2640 umtxq_unlock(&key); 2641 error = suword32(&cv->c_has_waiters, 0); 2642 if (error == -1) 2643 error = EFAULT; 2644 umtxq_lock(&key); 2645 } 2646 umtxq_unbusy(&key); 2647 umtxq_unlock(&key); 2648 umtx_key_release(&key); 2649 return (error); 2650 } 2651 2652 static int 2653 do_cv_broadcast(struct thread *td, struct ucond *cv) 2654 { 2655 struct umtx_key key; 2656 int error; 2657 uint32_t flags; 2658 2659 error = fueword32(&cv->c_flags, &flags); 2660 if (error == -1) 2661 return (EFAULT); 2662 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2663 return (error); 2664 2665 umtxq_lock(&key); 2666 umtxq_busy(&key); 2667 umtxq_signal(&key, INT_MAX); 2668 umtxq_unlock(&key); 2669 2670 error = suword32(&cv->c_has_waiters, 0); 2671 if (error == -1) 2672 error = EFAULT; 2673 2674 umtxq_unbusy_unlocked(&key); 2675 2676 umtx_key_release(&key); 2677 return (error); 2678 } 2679 2680 static int 2681 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2682 struct _umtx_time *timeout) 2683 { 2684 struct abs_timeout timo; 2685 struct umtx_q *uq; 2686 uint32_t flags, wrflags; 2687 int32_t state, oldstate; 2688 int32_t blocked_readers; 2689 int error, error1, rv; 2690 2691 uq = td->td_umtxq; 2692 error = fueword32(&rwlock->rw_flags, &flags); 2693 if (error == -1) 2694 return (EFAULT); 2695 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2696 if (error != 0) 2697 return (error); 2698 2699 if (timeout != NULL) 2700 abs_timeout_init2(&timo, timeout); 2701 2702 wrflags = URWLOCK_WRITE_OWNER; 2703 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2704 wrflags |= URWLOCK_WRITE_WAITERS; 2705 2706 for (;;) { 2707 rv = fueword32(&rwlock->rw_state, &state); 2708 if (rv == -1) { 2709 umtx_key_release(&uq->uq_key); 2710 return (EFAULT); 2711 } 2712 2713 /* try to lock it */ 2714 while (!(state & wrflags)) { 2715 if (__predict_false(URWLOCK_READER_COUNT(state) == 2716 URWLOCK_MAX_READERS)) { 2717 umtx_key_release(&uq->uq_key); 2718 return (EAGAIN); 2719 } 2720 rv = casueword32(&rwlock->rw_state, state, 2721 &oldstate, state + 1); 2722 if (rv == -1) { 2723 umtx_key_release(&uq->uq_key); 2724 return (EFAULT); 2725 } 2726 if (rv == 0) { 2727 MPASS(oldstate == state); 2728 umtx_key_release(&uq->uq_key); 2729 return (0); 2730 } 2731 error = thread_check_susp(td, true); 2732 if (error != 0) 2733 break; 2734 state = oldstate; 2735 } 2736 2737 if (error) 2738 break; 2739 2740 /* grab monitor lock */ 2741 umtxq_lock(&uq->uq_key); 2742 umtxq_busy(&uq->uq_key); 2743 umtxq_unlock(&uq->uq_key); 2744 2745 /* 2746 * re-read the state, in case it changed between the try-lock above 2747 * and the check below 2748 */ 2749 rv = fueword32(&rwlock->rw_state, &state); 2750 if (rv == -1) 2751 error = EFAULT; 2752 2753 /* set read contention bit */ 2754 while (error == 0 && (state & wrflags) && 2755 !(state & URWLOCK_READ_WAITERS)) { 2756 rv = casueword32(&rwlock->rw_state, state, 2757 &oldstate, state | URWLOCK_READ_WAITERS); 2758 if (rv == -1) { 2759 error = EFAULT; 2760 break; 2761 } 2762 if (rv == 0) { 2763 MPASS(oldstate == state); 2764 goto sleep; 2765 } 2766 state = oldstate; 2767 error = thread_check_susp(td, false); 2768 if (error != 0) 2769 break; 2770 } 2771 if (error != 0) { 2772 umtxq_unbusy_unlocked(&uq->uq_key); 2773 break; 2774 } 2775 2776 /* state is changed while setting flags, restart */ 2777 if (!(state & wrflags)) { 2778 umtxq_unbusy_unlocked(&uq->uq_key); 2779 error = thread_check_susp(td, true); 2780 if (error != 0) 2781 break; 2782 continue; 2783 } 2784 2785 sleep: 2786 /* 2787 * Contention bit is set, before sleeping, increase 2788 * read waiter count. 2789 */ 2790 rv = fueword32(&rwlock->rw_blocked_readers, 2791 &blocked_readers); 2792 if (rv == -1) { 2793 umtxq_unbusy_unlocked(&uq->uq_key); 2794 error = EFAULT; 2795 break; 2796 } 2797 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2798 2799 while (state & wrflags) { 2800 umtxq_lock(&uq->uq_key); 2801 umtxq_insert(uq); 2802 umtxq_unbusy(&uq->uq_key); 2803 2804 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2805 NULL : &timo); 2806 2807 umtxq_busy(&uq->uq_key); 2808 umtxq_remove(uq); 2809 umtxq_unlock(&uq->uq_key); 2810 if (error) 2811 break; 2812 rv = fueword32(&rwlock->rw_state, &state); 2813 if (rv == -1) { 2814 error = EFAULT; 2815 break; 2816 } 2817 } 2818 2819 /* decrease read waiter count, and may clear read contention bit */ 2820 rv = fueword32(&rwlock->rw_blocked_readers, 2821 &blocked_readers); 2822 if (rv == -1) { 2823 umtxq_unbusy_unlocked(&uq->uq_key); 2824 error = EFAULT; 2825 break; 2826 } 2827 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2828 if (blocked_readers == 1) { 2829 rv = fueword32(&rwlock->rw_state, &state); 2830 if (rv == -1) { 2831 umtxq_unbusy_unlocked(&uq->uq_key); 2832 error = EFAULT; 2833 break; 2834 } 2835 for (;;) { 2836 rv = casueword32(&rwlock->rw_state, state, 2837 &oldstate, state & ~URWLOCK_READ_WAITERS); 2838 if (rv == -1) { 2839 error = EFAULT; 2840 break; 2841 } 2842 if (rv == 0) { 2843 MPASS(oldstate == state); 2844 break; 2845 } 2846 state = oldstate; 2847 error1 = thread_check_susp(td, false); 2848 if (error1 != 0) { 2849 if (error == 0) 2850 error = error1; 2851 break; 2852 } 2853 } 2854 } 2855 2856 umtxq_unbusy_unlocked(&uq->uq_key); 2857 if (error != 0) 2858 break; 2859 } 2860 umtx_key_release(&uq->uq_key); 2861 if (error == ERESTART) 2862 error = EINTR; 2863 return (error); 2864 } 2865 2866 static int 2867 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2868 { 2869 struct abs_timeout timo; 2870 struct umtx_q *uq; 2871 uint32_t flags; 2872 int32_t state, oldstate; 2873 int32_t blocked_writers; 2874 int32_t blocked_readers; 2875 int error, error1, rv; 2876 2877 uq = td->td_umtxq; 2878 error = fueword32(&rwlock->rw_flags, &flags); 2879 if (error == -1) 2880 return (EFAULT); 2881 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2882 if (error != 0) 2883 return (error); 2884 2885 if (timeout != NULL) 2886 abs_timeout_init2(&timo, timeout); 2887 2888 blocked_readers = 0; 2889 for (;;) { 2890 rv = fueword32(&rwlock->rw_state, &state); 2891 if (rv == -1) { 2892 umtx_key_release(&uq->uq_key); 2893 return (EFAULT); 2894 } 2895 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2896 URWLOCK_READER_COUNT(state) == 0) { 2897 rv = casueword32(&rwlock->rw_state, state, 2898 &oldstate, state | URWLOCK_WRITE_OWNER); 2899 if (rv == -1) { 2900 umtx_key_release(&uq->uq_key); 2901 return (EFAULT); 2902 } 2903 if (rv == 0) { 2904 MPASS(oldstate == state); 2905 umtx_key_release(&uq->uq_key); 2906 return (0); 2907 } 2908 state = oldstate; 2909 error = thread_check_susp(td, true); 2910 if (error != 0) 2911 break; 2912 } 2913 2914 if (error) { 2915 if ((state & (URWLOCK_WRITE_OWNER | 2916 URWLOCK_WRITE_WAITERS)) == 0 && 2917 blocked_readers != 0) { 2918 umtxq_lock(&uq->uq_key); 2919 umtxq_busy(&uq->uq_key); 2920 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2921 UMTX_SHARED_QUEUE); 2922 umtxq_unbusy(&uq->uq_key); 2923 umtxq_unlock(&uq->uq_key); 2924 } 2925 2926 break; 2927 } 2928 2929 /* grab monitor lock */ 2930 umtxq_lock(&uq->uq_key); 2931 umtxq_busy(&uq->uq_key); 2932 umtxq_unlock(&uq->uq_key); 2933 2934 /* 2935 * Re-read the state, in case it changed between the 2936 * try-lock above and the check below. 2937 */ 2938 rv = fueword32(&rwlock->rw_state, &state); 2939 if (rv == -1) 2940 error = EFAULT; 2941 2942 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2943 URWLOCK_READER_COUNT(state) != 0) && 2944 (state & URWLOCK_WRITE_WAITERS) == 0) { 2945 rv = casueword32(&rwlock->rw_state, state, 2946 &oldstate, state | URWLOCK_WRITE_WAITERS); 2947 if (rv == -1) { 2948 error = EFAULT; 2949 break; 2950 } 2951 if (rv == 0) { 2952 MPASS(oldstate == state); 2953 goto sleep; 2954 } 2955 state = oldstate; 2956 error = thread_check_susp(td, false); 2957 if (error != 0) 2958 break; 2959 } 2960 if (error != 0) { 2961 umtxq_unbusy_unlocked(&uq->uq_key); 2962 break; 2963 } 2964 2965 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2966 URWLOCK_READER_COUNT(state) == 0) { 2967 umtxq_unbusy_unlocked(&uq->uq_key); 2968 error = thread_check_susp(td, false); 2969 if (error != 0) 2970 break; 2971 continue; 2972 } 2973 sleep: 2974 rv = fueword32(&rwlock->rw_blocked_writers, 2975 &blocked_writers); 2976 if (rv == -1) { 2977 umtxq_unbusy_unlocked(&uq->uq_key); 2978 error = EFAULT; 2979 break; 2980 } 2981 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2982 2983 while ((state & URWLOCK_WRITE_OWNER) || 2984 URWLOCK_READER_COUNT(state) != 0) { 2985 umtxq_lock(&uq->uq_key); 2986 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2987 umtxq_unbusy(&uq->uq_key); 2988 2989 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2990 NULL : &timo); 2991 2992 umtxq_busy(&uq->uq_key); 2993 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2994 umtxq_unlock(&uq->uq_key); 2995 if (error) 2996 break; 2997 rv = fueword32(&rwlock->rw_state, &state); 2998 if (rv == -1) { 2999 error = EFAULT; 3000 break; 3001 } 3002 } 3003 3004 rv = fueword32(&rwlock->rw_blocked_writers, 3005 &blocked_writers); 3006 if (rv == -1) { 3007 umtxq_unbusy_unlocked(&uq->uq_key); 3008 error = EFAULT; 3009 break; 3010 } 3011 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3012 if (blocked_writers == 1) { 3013 rv = fueword32(&rwlock->rw_state, &state); 3014 if (rv == -1) { 3015 umtxq_unbusy_unlocked(&uq->uq_key); 3016 error = EFAULT; 3017 break; 3018 } 3019 for (;;) { 3020 rv = casueword32(&rwlock->rw_state, state, 3021 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3022 if (rv == -1) { 3023 error = EFAULT; 3024 break; 3025 } 3026 if (rv == 0) { 3027 MPASS(oldstate == state); 3028 break; 3029 } 3030 state = oldstate; 3031 error1 = thread_check_susp(td, false); 3032 /* 3033 * We are leaving the URWLOCK_WRITE_WAITERS 3034 * behind, but this should not harm the 3035 * correctness. 3036 */ 3037 if (error1 != 0) { 3038 if (error == 0) 3039 error = error1; 3040 break; 3041 } 3042 } 3043 rv = fueword32(&rwlock->rw_blocked_readers, 3044 &blocked_readers); 3045 if (rv == -1) { 3046 umtxq_unbusy_unlocked(&uq->uq_key); 3047 error = EFAULT; 3048 break; 3049 } 3050 } else 3051 blocked_readers = 0; 3052 3053 umtxq_unbusy_unlocked(&uq->uq_key); 3054 } 3055 3056 umtx_key_release(&uq->uq_key); 3057 if (error == ERESTART) 3058 error = EINTR; 3059 return (error); 3060 } 3061 3062 static int 3063 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3064 { 3065 struct umtx_q *uq; 3066 uint32_t flags; 3067 int32_t state, oldstate; 3068 int error, rv, q, count; 3069 3070 uq = td->td_umtxq; 3071 error = fueword32(&rwlock->rw_flags, &flags); 3072 if (error == -1) 3073 return (EFAULT); 3074 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3075 if (error != 0) 3076 return (error); 3077 3078 error = fueword32(&rwlock->rw_state, &state); 3079 if (error == -1) { 3080 error = EFAULT; 3081 goto out; 3082 } 3083 if (state & URWLOCK_WRITE_OWNER) { 3084 for (;;) { 3085 rv = casueword32(&rwlock->rw_state, state, 3086 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3087 if (rv == -1) { 3088 error = EFAULT; 3089 goto out; 3090 } 3091 if (rv == 1) { 3092 state = oldstate; 3093 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3094 error = EPERM; 3095 goto out; 3096 } 3097 error = thread_check_susp(td, true); 3098 if (error != 0) 3099 goto out; 3100 } else 3101 break; 3102 } 3103 } else if (URWLOCK_READER_COUNT(state) != 0) { 3104 for (;;) { 3105 rv = casueword32(&rwlock->rw_state, state, 3106 &oldstate, state - 1); 3107 if (rv == -1) { 3108 error = EFAULT; 3109 goto out; 3110 } 3111 if (rv == 1) { 3112 state = oldstate; 3113 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3114 error = EPERM; 3115 goto out; 3116 } 3117 error = thread_check_susp(td, true); 3118 if (error != 0) 3119 goto out; 3120 } else 3121 break; 3122 } 3123 } else { 3124 error = EPERM; 3125 goto out; 3126 } 3127 3128 count = 0; 3129 3130 if (!(flags & URWLOCK_PREFER_READER)) { 3131 if (state & URWLOCK_WRITE_WAITERS) { 3132 count = 1; 3133 q = UMTX_EXCLUSIVE_QUEUE; 3134 } else if (state & URWLOCK_READ_WAITERS) { 3135 count = INT_MAX; 3136 q = UMTX_SHARED_QUEUE; 3137 } 3138 } else { 3139 if (state & URWLOCK_READ_WAITERS) { 3140 count = INT_MAX; 3141 q = UMTX_SHARED_QUEUE; 3142 } else if (state & URWLOCK_WRITE_WAITERS) { 3143 count = 1; 3144 q = UMTX_EXCLUSIVE_QUEUE; 3145 } 3146 } 3147 3148 if (count) { 3149 umtxq_lock(&uq->uq_key); 3150 umtxq_busy(&uq->uq_key); 3151 umtxq_signal_queue(&uq->uq_key, count, q); 3152 umtxq_unbusy(&uq->uq_key); 3153 umtxq_unlock(&uq->uq_key); 3154 } 3155 out: 3156 umtx_key_release(&uq->uq_key); 3157 return (error); 3158 } 3159 3160 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3161 static int 3162 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3163 { 3164 struct abs_timeout timo; 3165 struct umtx_q *uq; 3166 uint32_t flags, count, count1; 3167 int error, rv, rv1; 3168 3169 uq = td->td_umtxq; 3170 error = fueword32(&sem->_flags, &flags); 3171 if (error == -1) 3172 return (EFAULT); 3173 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3174 if (error != 0) 3175 return (error); 3176 3177 if (timeout != NULL) 3178 abs_timeout_init2(&timo, timeout); 3179 3180 again: 3181 umtxq_lock(&uq->uq_key); 3182 umtxq_busy(&uq->uq_key); 3183 umtxq_insert(uq); 3184 umtxq_unlock(&uq->uq_key); 3185 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3186 if (rv == 0) 3187 rv1 = fueword32(&sem->_count, &count); 3188 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3189 (rv == 1 && count1 == 0)) { 3190 umtxq_lock(&uq->uq_key); 3191 umtxq_unbusy(&uq->uq_key); 3192 umtxq_remove(uq); 3193 umtxq_unlock(&uq->uq_key); 3194 if (rv == 1) { 3195 rv = thread_check_susp(td, true); 3196 if (rv == 0) 3197 goto again; 3198 error = rv; 3199 goto out; 3200 } 3201 if (rv == 0) 3202 rv = rv1; 3203 error = rv == -1 ? EFAULT : 0; 3204 goto out; 3205 } 3206 umtxq_lock(&uq->uq_key); 3207 umtxq_unbusy(&uq->uq_key); 3208 3209 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3210 3211 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3212 error = 0; 3213 else { 3214 umtxq_remove(uq); 3215 /* A relative timeout cannot be restarted. */ 3216 if (error == ERESTART && timeout != NULL && 3217 (timeout->_flags & UMTX_ABSTIME) == 0) 3218 error = EINTR; 3219 } 3220 umtxq_unlock(&uq->uq_key); 3221 out: 3222 umtx_key_release(&uq->uq_key); 3223 return (error); 3224 } 3225 3226 /* 3227 * Signal a userland semaphore. 3228 */ 3229 static int 3230 do_sem_wake(struct thread *td, struct _usem *sem) 3231 { 3232 struct umtx_key key; 3233 int error, cnt; 3234 uint32_t flags; 3235 3236 error = fueword32(&sem->_flags, &flags); 3237 if (error == -1) 3238 return (EFAULT); 3239 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3240 return (error); 3241 umtxq_lock(&key); 3242 umtxq_busy(&key); 3243 cnt = umtxq_count(&key); 3244 if (cnt > 0) { 3245 /* 3246 * Check if count is greater than 0, this means the memory is 3247 * still being referenced by user code, so we can safely 3248 * update _has_waiters flag. 3249 */ 3250 if (cnt == 1) { 3251 umtxq_unlock(&key); 3252 error = suword32(&sem->_has_waiters, 0); 3253 umtxq_lock(&key); 3254 if (error == -1) 3255 error = EFAULT; 3256 } 3257 umtxq_signal(&key, 1); 3258 } 3259 umtxq_unbusy(&key); 3260 umtxq_unlock(&key); 3261 umtx_key_release(&key); 3262 return (error); 3263 } 3264 #endif 3265 3266 static int 3267 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3268 { 3269 struct abs_timeout timo; 3270 struct umtx_q *uq; 3271 uint32_t count, flags; 3272 int error, rv; 3273 3274 uq = td->td_umtxq; 3275 flags = fuword32(&sem->_flags); 3276 if (timeout != NULL) 3277 abs_timeout_init2(&timo, timeout); 3278 3279 again: 3280 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3281 if (error != 0) 3282 return (error); 3283 umtxq_lock(&uq->uq_key); 3284 umtxq_busy(&uq->uq_key); 3285 umtxq_insert(uq); 3286 umtxq_unlock(&uq->uq_key); 3287 rv = fueword32(&sem->_count, &count); 3288 if (rv == -1) { 3289 umtxq_lock(&uq->uq_key); 3290 umtxq_unbusy(&uq->uq_key); 3291 umtxq_remove(uq); 3292 umtxq_unlock(&uq->uq_key); 3293 umtx_key_release(&uq->uq_key); 3294 return (EFAULT); 3295 } 3296 for (;;) { 3297 if (USEM_COUNT(count) != 0) { 3298 umtxq_lock(&uq->uq_key); 3299 umtxq_unbusy(&uq->uq_key); 3300 umtxq_remove(uq); 3301 umtxq_unlock(&uq->uq_key); 3302 umtx_key_release(&uq->uq_key); 3303 return (0); 3304 } 3305 if (count == USEM_HAS_WAITERS) 3306 break; 3307 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3308 if (rv == 0) 3309 break; 3310 umtxq_lock(&uq->uq_key); 3311 umtxq_unbusy(&uq->uq_key); 3312 umtxq_remove(uq); 3313 umtxq_unlock(&uq->uq_key); 3314 umtx_key_release(&uq->uq_key); 3315 if (rv == -1) 3316 return (EFAULT); 3317 rv = thread_check_susp(td, true); 3318 if (rv != 0) 3319 return (rv); 3320 goto again; 3321 } 3322 umtxq_lock(&uq->uq_key); 3323 umtxq_unbusy(&uq->uq_key); 3324 3325 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3326 3327 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3328 error = 0; 3329 else { 3330 umtxq_remove(uq); 3331 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3332 /* A relative timeout cannot be restarted. */ 3333 if (error == ERESTART) 3334 error = EINTR; 3335 if (error == EINTR) { 3336 abs_timeout_update(&timo); 3337 timespecsub(&timo.end, &timo.cur, 3338 &timeout->_timeout); 3339 } 3340 } 3341 } 3342 umtxq_unlock(&uq->uq_key); 3343 umtx_key_release(&uq->uq_key); 3344 return (error); 3345 } 3346 3347 /* 3348 * Signal a userland semaphore. 3349 */ 3350 static int 3351 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3352 { 3353 struct umtx_key key; 3354 int error, cnt, rv; 3355 uint32_t count, flags; 3356 3357 rv = fueword32(&sem->_flags, &flags); 3358 if (rv == -1) 3359 return (EFAULT); 3360 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3361 return (error); 3362 umtxq_lock(&key); 3363 umtxq_busy(&key); 3364 cnt = umtxq_count(&key); 3365 if (cnt > 0) { 3366 /* 3367 * If this was the last sleeping thread, clear the waiters 3368 * flag in _count. 3369 */ 3370 if (cnt == 1) { 3371 umtxq_unlock(&key); 3372 rv = fueword32(&sem->_count, &count); 3373 while (rv != -1 && count & USEM_HAS_WAITERS) { 3374 rv = casueword32(&sem->_count, count, &count, 3375 count & ~USEM_HAS_WAITERS); 3376 if (rv == 1) { 3377 rv = thread_check_susp(td, true); 3378 if (rv != 0) 3379 break; 3380 } 3381 } 3382 if (rv == -1) 3383 error = EFAULT; 3384 else if (rv > 0) { 3385 error = rv; 3386 } 3387 umtxq_lock(&key); 3388 } 3389 3390 umtxq_signal(&key, 1); 3391 } 3392 umtxq_unbusy(&key); 3393 umtxq_unlock(&key); 3394 umtx_key_release(&key); 3395 return (error); 3396 } 3397 3398 inline int 3399 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3400 { 3401 int error; 3402 3403 error = copyin(addr, tsp, sizeof(struct timespec)); 3404 if (error == 0) { 3405 if (tsp->tv_sec < 0 || 3406 tsp->tv_nsec >= 1000000000 || 3407 tsp->tv_nsec < 0) 3408 error = EINVAL; 3409 } 3410 return (error); 3411 } 3412 3413 static inline int 3414 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3415 { 3416 int error; 3417 3418 if (size <= sizeof(struct timespec)) { 3419 tp->_clockid = CLOCK_REALTIME; 3420 tp->_flags = 0; 3421 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3422 } else 3423 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3424 if (error != 0) 3425 return (error); 3426 if (tp->_timeout.tv_sec < 0 || 3427 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3428 return (EINVAL); 3429 return (0); 3430 } 3431 3432 static int 3433 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3434 { 3435 3436 return (EOPNOTSUPP); 3437 } 3438 3439 static int 3440 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3441 { 3442 struct _umtx_time timeout, *tm_p; 3443 int error; 3444 3445 if (uap->uaddr2 == NULL) 3446 tm_p = NULL; 3447 else { 3448 error = umtx_copyin_umtx_time( 3449 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3450 if (error != 0) 3451 return (error); 3452 tm_p = &timeout; 3453 } 3454 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3455 } 3456 3457 static int 3458 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3459 { 3460 struct _umtx_time timeout, *tm_p; 3461 int error; 3462 3463 if (uap->uaddr2 == NULL) 3464 tm_p = NULL; 3465 else { 3466 error = umtx_copyin_umtx_time( 3467 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3468 if (error != 0) 3469 return (error); 3470 tm_p = &timeout; 3471 } 3472 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3473 } 3474 3475 static int 3476 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3477 { 3478 struct _umtx_time *tm_p, timeout; 3479 int error; 3480 3481 if (uap->uaddr2 == NULL) 3482 tm_p = NULL; 3483 else { 3484 error = umtx_copyin_umtx_time( 3485 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3486 if (error != 0) 3487 return (error); 3488 tm_p = &timeout; 3489 } 3490 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3491 } 3492 3493 static int 3494 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3495 { 3496 3497 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3498 } 3499 3500 #define BATCH_SIZE 128 3501 static int 3502 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3503 { 3504 char *uaddrs[BATCH_SIZE], **upp; 3505 int count, error, i, pos, tocopy; 3506 3507 upp = (char **)uap->obj; 3508 error = 0; 3509 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3510 pos += tocopy) { 3511 tocopy = MIN(count, BATCH_SIZE); 3512 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3513 if (error != 0) 3514 break; 3515 for (i = 0; i < tocopy; ++i) 3516 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3517 maybe_yield(); 3518 } 3519 return (error); 3520 } 3521 3522 static int 3523 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3524 { 3525 3526 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3527 } 3528 3529 static int 3530 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3531 { 3532 struct _umtx_time *tm_p, timeout; 3533 int error; 3534 3535 /* Allow a null timespec (wait forever). */ 3536 if (uap->uaddr2 == NULL) 3537 tm_p = NULL; 3538 else { 3539 error = umtx_copyin_umtx_time( 3540 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3541 if (error != 0) 3542 return (error); 3543 tm_p = &timeout; 3544 } 3545 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3546 } 3547 3548 static int 3549 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3550 { 3551 3552 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3553 } 3554 3555 static int 3556 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3557 { 3558 struct _umtx_time *tm_p, timeout; 3559 int error; 3560 3561 /* Allow a null timespec (wait forever). */ 3562 if (uap->uaddr2 == NULL) 3563 tm_p = NULL; 3564 else { 3565 error = umtx_copyin_umtx_time( 3566 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3567 if (error != 0) 3568 return (error); 3569 tm_p = &timeout; 3570 } 3571 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3572 } 3573 3574 static int 3575 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3576 { 3577 3578 return (do_wake_umutex(td, uap->obj)); 3579 } 3580 3581 static int 3582 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3583 { 3584 3585 return (do_unlock_umutex(td, uap->obj, false)); 3586 } 3587 3588 static int 3589 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3590 { 3591 3592 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3593 } 3594 3595 static int 3596 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3597 { 3598 struct timespec *ts, timeout; 3599 int error; 3600 3601 /* Allow a null timespec (wait forever). */ 3602 if (uap->uaddr2 == NULL) 3603 ts = NULL; 3604 else { 3605 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3606 if (error != 0) 3607 return (error); 3608 ts = &timeout; 3609 } 3610 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3611 } 3612 3613 static int 3614 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3615 { 3616 3617 return (do_cv_signal(td, uap->obj)); 3618 } 3619 3620 static int 3621 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3622 { 3623 3624 return (do_cv_broadcast(td, uap->obj)); 3625 } 3626 3627 static int 3628 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3629 { 3630 struct _umtx_time timeout; 3631 int error; 3632 3633 /* Allow a null timespec (wait forever). */ 3634 if (uap->uaddr2 == NULL) { 3635 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3636 } else { 3637 error = umtx_copyin_umtx_time(uap->uaddr2, 3638 (size_t)uap->uaddr1, &timeout); 3639 if (error != 0) 3640 return (error); 3641 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3642 } 3643 return (error); 3644 } 3645 3646 static int 3647 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3648 { 3649 struct _umtx_time timeout; 3650 int error; 3651 3652 /* Allow a null timespec (wait forever). */ 3653 if (uap->uaddr2 == NULL) { 3654 error = do_rw_wrlock(td, uap->obj, 0); 3655 } else { 3656 error = umtx_copyin_umtx_time(uap->uaddr2, 3657 (size_t)uap->uaddr1, &timeout); 3658 if (error != 0) 3659 return (error); 3660 3661 error = do_rw_wrlock(td, uap->obj, &timeout); 3662 } 3663 return (error); 3664 } 3665 3666 static int 3667 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3668 { 3669 3670 return (do_rw_unlock(td, uap->obj)); 3671 } 3672 3673 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3674 static int 3675 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3676 { 3677 struct _umtx_time *tm_p, timeout; 3678 int error; 3679 3680 /* Allow a null timespec (wait forever). */ 3681 if (uap->uaddr2 == NULL) 3682 tm_p = NULL; 3683 else { 3684 error = umtx_copyin_umtx_time( 3685 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3686 if (error != 0) 3687 return (error); 3688 tm_p = &timeout; 3689 } 3690 return (do_sem_wait(td, uap->obj, tm_p)); 3691 } 3692 3693 static int 3694 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3695 { 3696 3697 return (do_sem_wake(td, uap->obj)); 3698 } 3699 #endif 3700 3701 static int 3702 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3703 { 3704 3705 return (do_wake2_umutex(td, uap->obj, uap->val)); 3706 } 3707 3708 static int 3709 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3710 { 3711 struct _umtx_time *tm_p, timeout; 3712 size_t uasize; 3713 int error; 3714 3715 /* Allow a null timespec (wait forever). */ 3716 if (uap->uaddr2 == NULL) { 3717 uasize = 0; 3718 tm_p = NULL; 3719 } else { 3720 uasize = (size_t)uap->uaddr1; 3721 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3722 if (error != 0) 3723 return (error); 3724 tm_p = &timeout; 3725 } 3726 error = do_sem2_wait(td, uap->obj, tm_p); 3727 if (error == EINTR && uap->uaddr2 != NULL && 3728 (timeout._flags & UMTX_ABSTIME) == 0 && 3729 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3730 error = copyout(&timeout._timeout, 3731 (struct _umtx_time *)uap->uaddr2 + 1, 3732 sizeof(struct timespec)); 3733 if (error == 0) { 3734 error = EINTR; 3735 } 3736 } 3737 3738 return (error); 3739 } 3740 3741 static int 3742 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3743 { 3744 3745 return (do_sem2_wake(td, uap->obj)); 3746 } 3747 3748 #define USHM_OBJ_UMTX(o) \ 3749 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3750 3751 #define USHMF_REG_LINKED 0x0001 3752 #define USHMF_OBJ_LINKED 0x0002 3753 struct umtx_shm_reg { 3754 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3755 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3756 struct umtx_key ushm_key; 3757 struct ucred *ushm_cred; 3758 struct shmfd *ushm_obj; 3759 u_int ushm_refcnt; 3760 u_int ushm_flags; 3761 }; 3762 3763 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3764 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3765 3766 static uma_zone_t umtx_shm_reg_zone; 3767 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3768 static struct mtx umtx_shm_lock; 3769 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3770 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3771 3772 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3773 3774 static void 3775 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3776 { 3777 struct umtx_shm_reg_head d; 3778 struct umtx_shm_reg *reg, *reg1; 3779 3780 TAILQ_INIT(&d); 3781 mtx_lock(&umtx_shm_lock); 3782 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3783 mtx_unlock(&umtx_shm_lock); 3784 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3785 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3786 umtx_shm_free_reg(reg); 3787 } 3788 } 3789 3790 static struct task umtx_shm_reg_delfree_task = 3791 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3792 3793 static struct umtx_shm_reg * 3794 umtx_shm_find_reg_locked(const struct umtx_key *key) 3795 { 3796 struct umtx_shm_reg *reg; 3797 struct umtx_shm_reg_head *reg_head; 3798 3799 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3800 mtx_assert(&umtx_shm_lock, MA_OWNED); 3801 reg_head = &umtx_shm_registry[key->hash]; 3802 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3803 KASSERT(reg->ushm_key.shared, 3804 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3805 if (reg->ushm_key.info.shared.object == 3806 key->info.shared.object && 3807 reg->ushm_key.info.shared.offset == 3808 key->info.shared.offset) { 3809 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3810 KASSERT(reg->ushm_refcnt > 0, 3811 ("reg %p refcnt 0 onlist", reg)); 3812 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3813 ("reg %p not linked", reg)); 3814 reg->ushm_refcnt++; 3815 return (reg); 3816 } 3817 } 3818 return (NULL); 3819 } 3820 3821 static struct umtx_shm_reg * 3822 umtx_shm_find_reg(const struct umtx_key *key) 3823 { 3824 struct umtx_shm_reg *reg; 3825 3826 mtx_lock(&umtx_shm_lock); 3827 reg = umtx_shm_find_reg_locked(key); 3828 mtx_unlock(&umtx_shm_lock); 3829 return (reg); 3830 } 3831 3832 static void 3833 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3834 { 3835 3836 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3837 crfree(reg->ushm_cred); 3838 shm_drop(reg->ushm_obj); 3839 uma_zfree(umtx_shm_reg_zone, reg); 3840 } 3841 3842 static bool 3843 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3844 { 3845 bool res; 3846 3847 mtx_assert(&umtx_shm_lock, MA_OWNED); 3848 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3849 reg->ushm_refcnt--; 3850 res = reg->ushm_refcnt == 0; 3851 if (res || force) { 3852 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3853 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3854 reg, ushm_reg_link); 3855 reg->ushm_flags &= ~USHMF_REG_LINKED; 3856 } 3857 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3858 LIST_REMOVE(reg, ushm_obj_link); 3859 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3860 } 3861 } 3862 return (res); 3863 } 3864 3865 static void 3866 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3867 { 3868 vm_object_t object; 3869 bool dofree; 3870 3871 if (force) { 3872 object = reg->ushm_obj->shm_object; 3873 VM_OBJECT_WLOCK(object); 3874 object->flags |= OBJ_UMTXDEAD; 3875 VM_OBJECT_WUNLOCK(object); 3876 } 3877 mtx_lock(&umtx_shm_lock); 3878 dofree = umtx_shm_unref_reg_locked(reg, force); 3879 mtx_unlock(&umtx_shm_lock); 3880 if (dofree) 3881 umtx_shm_free_reg(reg); 3882 } 3883 3884 void 3885 umtx_shm_object_init(vm_object_t object) 3886 { 3887 3888 LIST_INIT(USHM_OBJ_UMTX(object)); 3889 } 3890 3891 void 3892 umtx_shm_object_terminated(vm_object_t object) 3893 { 3894 struct umtx_shm_reg *reg, *reg1; 3895 bool dofree; 3896 3897 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3898 return; 3899 3900 dofree = false; 3901 mtx_lock(&umtx_shm_lock); 3902 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3903 if (umtx_shm_unref_reg_locked(reg, true)) { 3904 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3905 ushm_reg_link); 3906 dofree = true; 3907 } 3908 } 3909 mtx_unlock(&umtx_shm_lock); 3910 if (dofree) 3911 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3912 } 3913 3914 static int 3915 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3916 struct umtx_shm_reg **res) 3917 { 3918 struct umtx_shm_reg *reg, *reg1; 3919 struct ucred *cred; 3920 int error; 3921 3922 reg = umtx_shm_find_reg(key); 3923 if (reg != NULL) { 3924 *res = reg; 3925 return (0); 3926 } 3927 cred = td->td_ucred; 3928 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3929 return (ENOMEM); 3930 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3931 reg->ushm_refcnt = 1; 3932 bcopy(key, ®->ushm_key, sizeof(*key)); 3933 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3934 reg->ushm_cred = crhold(cred); 3935 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3936 if (error != 0) { 3937 umtx_shm_free_reg(reg); 3938 return (error); 3939 } 3940 mtx_lock(&umtx_shm_lock); 3941 reg1 = umtx_shm_find_reg_locked(key); 3942 if (reg1 != NULL) { 3943 mtx_unlock(&umtx_shm_lock); 3944 umtx_shm_free_reg(reg); 3945 *res = reg1; 3946 return (0); 3947 } 3948 reg->ushm_refcnt++; 3949 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3950 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3951 ushm_obj_link); 3952 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3953 mtx_unlock(&umtx_shm_lock); 3954 *res = reg; 3955 return (0); 3956 } 3957 3958 static int 3959 umtx_shm_alive(struct thread *td, void *addr) 3960 { 3961 vm_map_t map; 3962 vm_map_entry_t entry; 3963 vm_object_t object; 3964 vm_pindex_t pindex; 3965 vm_prot_t prot; 3966 int res, ret; 3967 boolean_t wired; 3968 3969 map = &td->td_proc->p_vmspace->vm_map; 3970 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3971 &object, &pindex, &prot, &wired); 3972 if (res != KERN_SUCCESS) 3973 return (EFAULT); 3974 if (object == NULL) 3975 ret = EINVAL; 3976 else 3977 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3978 vm_map_lookup_done(map, entry); 3979 return (ret); 3980 } 3981 3982 static void 3983 umtx_shm_init(void) 3984 { 3985 int i; 3986 3987 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3988 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3989 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3990 for (i = 0; i < nitems(umtx_shm_registry); i++) 3991 TAILQ_INIT(&umtx_shm_registry[i]); 3992 } 3993 3994 static int 3995 umtx_shm(struct thread *td, void *addr, u_int flags) 3996 { 3997 struct umtx_key key; 3998 struct umtx_shm_reg *reg; 3999 struct file *fp; 4000 int error, fd; 4001 4002 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4003 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4004 return (EINVAL); 4005 if ((flags & UMTX_SHM_ALIVE) != 0) 4006 return (umtx_shm_alive(td, addr)); 4007 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4008 if (error != 0) 4009 return (error); 4010 KASSERT(key.shared == 1, ("non-shared key")); 4011 if ((flags & UMTX_SHM_CREAT) != 0) { 4012 error = umtx_shm_create_reg(td, &key, ®); 4013 } else { 4014 reg = umtx_shm_find_reg(&key); 4015 if (reg == NULL) 4016 error = ESRCH; 4017 } 4018 umtx_key_release(&key); 4019 if (error != 0) 4020 return (error); 4021 KASSERT(reg != NULL, ("no reg")); 4022 if ((flags & UMTX_SHM_DESTROY) != 0) { 4023 umtx_shm_unref_reg(reg, true); 4024 } else { 4025 #if 0 4026 #ifdef MAC 4027 error = mac_posixshm_check_open(td->td_ucred, 4028 reg->ushm_obj, FFLAGS(O_RDWR)); 4029 if (error == 0) 4030 #endif 4031 error = shm_access(reg->ushm_obj, td->td_ucred, 4032 FFLAGS(O_RDWR)); 4033 if (error == 0) 4034 #endif 4035 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4036 if (error == 0) { 4037 shm_hold(reg->ushm_obj); 4038 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4039 &shm_ops); 4040 td->td_retval[0] = fd; 4041 fdrop(fp, td); 4042 } 4043 } 4044 umtx_shm_unref_reg(reg, false); 4045 return (error); 4046 } 4047 4048 static int 4049 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 4050 { 4051 4052 return (umtx_shm(td, uap->uaddr1, uap->val)); 4053 } 4054 4055 static int 4056 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 4057 { 4058 4059 td->td_rb_list = rbp->robust_list_offset; 4060 td->td_rbp_list = rbp->robust_priv_list_offset; 4061 td->td_rb_inact = rbp->robust_inact_offset; 4062 return (0); 4063 } 4064 4065 static int 4066 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 4067 { 4068 struct umtx_robust_lists_params rb; 4069 int error; 4070 4071 if (uap->val > sizeof(rb)) 4072 return (EINVAL); 4073 bzero(&rb, sizeof(rb)); 4074 error = copyin(uap->uaddr1, &rb, uap->val); 4075 if (error != 0) 4076 return (error); 4077 return (umtx_robust_lists(td, &rb)); 4078 } 4079 4080 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 4081 4082 static const _umtx_op_func op_table[] = { 4083 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4084 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4085 [UMTX_OP_WAIT] = __umtx_op_wait, 4086 [UMTX_OP_WAKE] = __umtx_op_wake, 4087 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4088 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4089 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4090 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4091 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4092 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4093 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4094 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4095 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4096 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4097 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4098 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4099 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4100 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4101 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4102 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4103 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4104 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4105 #else 4106 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4107 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4108 #endif 4109 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4110 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4111 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4112 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4113 [UMTX_OP_SHM] = __umtx_op_shm, 4114 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4115 }; 4116 4117 int 4118 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4119 { 4120 4121 if ((unsigned)uap->op < nitems(op_table)) 4122 return (*op_table[uap->op])(td, uap); 4123 return (EINVAL); 4124 } 4125 4126 #ifdef COMPAT_FREEBSD32 4127 4128 struct timespec32 { 4129 int32_t tv_sec; 4130 int32_t tv_nsec; 4131 }; 4132 4133 struct umtx_time32 { 4134 struct timespec32 timeout; 4135 uint32_t flags; 4136 uint32_t clockid; 4137 }; 4138 4139 static inline int 4140 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4141 { 4142 struct timespec32 ts32; 4143 int error; 4144 4145 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4146 if (error == 0) { 4147 if (ts32.tv_sec < 0 || 4148 ts32.tv_nsec >= 1000000000 || 4149 ts32.tv_nsec < 0) 4150 error = EINVAL; 4151 else { 4152 tsp->tv_sec = ts32.tv_sec; 4153 tsp->tv_nsec = ts32.tv_nsec; 4154 } 4155 } 4156 return (error); 4157 } 4158 4159 static inline int 4160 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4161 { 4162 struct umtx_time32 t32; 4163 int error; 4164 4165 t32.clockid = CLOCK_REALTIME; 4166 t32.flags = 0; 4167 if (size <= sizeof(struct timespec32)) 4168 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4169 else 4170 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4171 if (error != 0) 4172 return (error); 4173 if (t32.timeout.tv_sec < 0 || 4174 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4175 return (EINVAL); 4176 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4177 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4178 tp->_flags = t32.flags; 4179 tp->_clockid = t32.clockid; 4180 return (0); 4181 } 4182 4183 static int 4184 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4185 { 4186 struct _umtx_time *tm_p, timeout; 4187 int error; 4188 4189 if (uap->uaddr2 == NULL) 4190 tm_p = NULL; 4191 else { 4192 error = umtx_copyin_umtx_time32(uap->uaddr2, 4193 (size_t)uap->uaddr1, &timeout); 4194 if (error != 0) 4195 return (error); 4196 tm_p = &timeout; 4197 } 4198 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4199 } 4200 4201 static int 4202 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4203 { 4204 struct _umtx_time *tm_p, timeout; 4205 int error; 4206 4207 /* Allow a null timespec (wait forever). */ 4208 if (uap->uaddr2 == NULL) 4209 tm_p = NULL; 4210 else { 4211 error = umtx_copyin_umtx_time32(uap->uaddr2, 4212 (size_t)uap->uaddr1, &timeout); 4213 if (error != 0) 4214 return (error); 4215 tm_p = &timeout; 4216 } 4217 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4218 } 4219 4220 static int 4221 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4222 { 4223 struct _umtx_time *tm_p, timeout; 4224 int error; 4225 4226 /* Allow a null timespec (wait forever). */ 4227 if (uap->uaddr2 == NULL) 4228 tm_p = NULL; 4229 else { 4230 error = umtx_copyin_umtx_time32(uap->uaddr2, 4231 (size_t)uap->uaddr1, &timeout); 4232 if (error != 0) 4233 return (error); 4234 tm_p = &timeout; 4235 } 4236 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4237 } 4238 4239 static int 4240 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4241 { 4242 struct timespec *ts, timeout; 4243 int error; 4244 4245 /* Allow a null timespec (wait forever). */ 4246 if (uap->uaddr2 == NULL) 4247 ts = NULL; 4248 else { 4249 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4250 if (error != 0) 4251 return (error); 4252 ts = &timeout; 4253 } 4254 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4255 } 4256 4257 static int 4258 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4259 { 4260 struct _umtx_time timeout; 4261 int error; 4262 4263 /* Allow a null timespec (wait forever). */ 4264 if (uap->uaddr2 == NULL) { 4265 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4266 } else { 4267 error = umtx_copyin_umtx_time32(uap->uaddr2, 4268 (size_t)uap->uaddr1, &timeout); 4269 if (error != 0) 4270 return (error); 4271 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4272 } 4273 return (error); 4274 } 4275 4276 static int 4277 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4278 { 4279 struct _umtx_time timeout; 4280 int error; 4281 4282 /* Allow a null timespec (wait forever). */ 4283 if (uap->uaddr2 == NULL) { 4284 error = do_rw_wrlock(td, uap->obj, 0); 4285 } else { 4286 error = umtx_copyin_umtx_time32(uap->uaddr2, 4287 (size_t)uap->uaddr1, &timeout); 4288 if (error != 0) 4289 return (error); 4290 error = do_rw_wrlock(td, uap->obj, &timeout); 4291 } 4292 return (error); 4293 } 4294 4295 static int 4296 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4297 { 4298 struct _umtx_time *tm_p, timeout; 4299 int error; 4300 4301 if (uap->uaddr2 == NULL) 4302 tm_p = NULL; 4303 else { 4304 error = umtx_copyin_umtx_time32( 4305 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4306 if (error != 0) 4307 return (error); 4308 tm_p = &timeout; 4309 } 4310 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4311 } 4312 4313 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4314 static int 4315 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4316 { 4317 struct _umtx_time *tm_p, timeout; 4318 int error; 4319 4320 /* Allow a null timespec (wait forever). */ 4321 if (uap->uaddr2 == NULL) 4322 tm_p = NULL; 4323 else { 4324 error = umtx_copyin_umtx_time32(uap->uaddr2, 4325 (size_t)uap->uaddr1, &timeout); 4326 if (error != 0) 4327 return (error); 4328 tm_p = &timeout; 4329 } 4330 return (do_sem_wait(td, uap->obj, tm_p)); 4331 } 4332 #endif 4333 4334 static int 4335 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4336 { 4337 struct _umtx_time *tm_p, timeout; 4338 size_t uasize; 4339 int error; 4340 4341 /* Allow a null timespec (wait forever). */ 4342 if (uap->uaddr2 == NULL) { 4343 uasize = 0; 4344 tm_p = NULL; 4345 } else { 4346 uasize = (size_t)uap->uaddr1; 4347 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4348 if (error != 0) 4349 return (error); 4350 tm_p = &timeout; 4351 } 4352 error = do_sem2_wait(td, uap->obj, tm_p); 4353 if (error == EINTR && uap->uaddr2 != NULL && 4354 (timeout._flags & UMTX_ABSTIME) == 0 && 4355 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4356 struct timespec32 remain32 = { 4357 .tv_sec = timeout._timeout.tv_sec, 4358 .tv_nsec = timeout._timeout.tv_nsec 4359 }; 4360 error = copyout(&remain32, 4361 (struct umtx_time32 *)uap->uaddr2 + 1, 4362 sizeof(struct timespec32)); 4363 if (error == 0) { 4364 error = EINTR; 4365 } 4366 } 4367 4368 return (error); 4369 } 4370 4371 static int 4372 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4373 { 4374 uint32_t uaddrs[BATCH_SIZE], **upp; 4375 int count, error, i, pos, tocopy; 4376 4377 upp = (uint32_t **)uap->obj; 4378 error = 0; 4379 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4380 pos += tocopy) { 4381 tocopy = MIN(count, BATCH_SIZE); 4382 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4383 if (error != 0) 4384 break; 4385 for (i = 0; i < tocopy; ++i) 4386 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4387 INT_MAX, 1); 4388 maybe_yield(); 4389 } 4390 return (error); 4391 } 4392 4393 struct umtx_robust_lists_params_compat32 { 4394 uint32_t robust_list_offset; 4395 uint32_t robust_priv_list_offset; 4396 uint32_t robust_inact_offset; 4397 }; 4398 4399 static int 4400 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4401 { 4402 struct umtx_robust_lists_params rb; 4403 struct umtx_robust_lists_params_compat32 rb32; 4404 int error; 4405 4406 if (uap->val > sizeof(rb32)) 4407 return (EINVAL); 4408 bzero(&rb, sizeof(rb)); 4409 bzero(&rb32, sizeof(rb32)); 4410 error = copyin(uap->uaddr1, &rb32, uap->val); 4411 if (error != 0) 4412 return (error); 4413 rb.robust_list_offset = rb32.robust_list_offset; 4414 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4415 rb.robust_inact_offset = rb32.robust_inact_offset; 4416 return (umtx_robust_lists(td, &rb)); 4417 } 4418 4419 static const _umtx_op_func op_table_compat32[] = { 4420 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4421 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4422 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4423 [UMTX_OP_WAKE] = __umtx_op_wake, 4424 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4425 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4426 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4427 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4428 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4429 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4430 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4431 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4432 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4433 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4434 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4435 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4436 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4437 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4438 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4439 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4440 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4441 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4442 #else 4443 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4444 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4445 #endif 4446 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4447 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4448 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4449 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4450 [UMTX_OP_SHM] = __umtx_op_shm, 4451 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4452 }; 4453 4454 int 4455 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4456 { 4457 4458 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4459 return (*op_table_compat32[uap->op])(td, 4460 (struct _umtx_op_args *)uap); 4461 } 4462 return (EINVAL); 4463 } 4464 #endif 4465 4466 void 4467 umtx_thread_init(struct thread *td) 4468 { 4469 4470 td->td_umtxq = umtxq_alloc(); 4471 td->td_umtxq->uq_thread = td; 4472 } 4473 4474 void 4475 umtx_thread_fini(struct thread *td) 4476 { 4477 4478 umtxq_free(td->td_umtxq); 4479 } 4480 4481 /* 4482 * It will be called when new thread is created, e.g fork(). 4483 */ 4484 void 4485 umtx_thread_alloc(struct thread *td) 4486 { 4487 struct umtx_q *uq; 4488 4489 uq = td->td_umtxq; 4490 uq->uq_inherited_pri = PRI_MAX; 4491 4492 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4493 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4494 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4495 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4496 } 4497 4498 /* 4499 * exec() hook. 4500 * 4501 * Clear robust lists for all process' threads, not delaying the 4502 * cleanup to thread_exit hook, since the relevant address space is 4503 * destroyed right now. 4504 */ 4505 static void 4506 umtx_exec_hook(void *arg __unused, struct proc *p, 4507 struct image_params *imgp __unused) 4508 { 4509 struct thread *td; 4510 4511 KASSERT(p == curproc, ("need curproc")); 4512 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4513 (p->p_flag & P_STOPPED_SINGLE) != 0, 4514 ("curproc must be single-threaded")); 4515 /* 4516 * There is no need to lock the list as only this thread can be 4517 * running. 4518 */ 4519 FOREACH_THREAD_IN_PROC(p, td) { 4520 KASSERT(td == curthread || 4521 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4522 ("running thread %p %p", p, td)); 4523 umtx_thread_cleanup(td); 4524 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4525 } 4526 } 4527 4528 /* 4529 * thread_exit() hook. 4530 */ 4531 void 4532 umtx_thread_exit(struct thread *td) 4533 { 4534 4535 umtx_thread_cleanup(td); 4536 } 4537 4538 static int 4539 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4540 { 4541 u_long res1; 4542 #ifdef COMPAT_FREEBSD32 4543 uint32_t res32; 4544 #endif 4545 int error; 4546 4547 #ifdef COMPAT_FREEBSD32 4548 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4549 error = fueword32((void *)ptr, &res32); 4550 if (error == 0) 4551 res1 = res32; 4552 } else 4553 #endif 4554 { 4555 error = fueword((void *)ptr, &res1); 4556 } 4557 if (error == 0) 4558 *res = res1; 4559 else 4560 error = EFAULT; 4561 return (error); 4562 } 4563 4564 static void 4565 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4566 { 4567 #ifdef COMPAT_FREEBSD32 4568 struct umutex32 m32; 4569 4570 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4571 memcpy(&m32, m, sizeof(m32)); 4572 *rb_list = m32.m_rb_lnk; 4573 } else 4574 #endif 4575 *rb_list = m->m_rb_lnk; 4576 } 4577 4578 static int 4579 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4580 { 4581 struct umutex m; 4582 int error; 4583 4584 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4585 error = copyin((void *)rbp, &m, sizeof(m)); 4586 if (error != 0) 4587 return (error); 4588 if (rb_list != NULL) 4589 umtx_read_rb_list(td, &m, rb_list); 4590 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4591 return (EINVAL); 4592 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4593 /* inact is cleared after unlock, allow the inconsistency */ 4594 return (inact ? 0 : EINVAL); 4595 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4596 } 4597 4598 static void 4599 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4600 const char *name) 4601 { 4602 int error, i; 4603 uintptr_t rbp; 4604 bool inact; 4605 4606 if (rb_list == 0) 4607 return; 4608 error = umtx_read_uptr(td, rb_list, &rbp); 4609 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4610 if (rbp == *rb_inact) { 4611 inact = true; 4612 *rb_inact = 0; 4613 } else 4614 inact = false; 4615 error = umtx_handle_rb(td, rbp, &rbp, inact); 4616 } 4617 if (i == umtx_max_rb && umtx_verbose_rb) { 4618 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4619 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4620 } 4621 if (error != 0 && umtx_verbose_rb) { 4622 uprintf("comm %s pid %d: handling %srb error %d\n", 4623 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4624 } 4625 } 4626 4627 /* 4628 * Clean up umtx data. 4629 */ 4630 static void 4631 umtx_thread_cleanup(struct thread *td) 4632 { 4633 struct umtx_q *uq; 4634 struct umtx_pi *pi; 4635 uintptr_t rb_inact; 4636 4637 /* 4638 * Disown pi mutexes. 4639 */ 4640 uq = td->td_umtxq; 4641 if (uq != NULL) { 4642 if (uq->uq_inherited_pri != PRI_MAX || 4643 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4644 mtx_lock(&umtx_lock); 4645 uq->uq_inherited_pri = PRI_MAX; 4646 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4647 pi->pi_owner = NULL; 4648 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4649 } 4650 mtx_unlock(&umtx_lock); 4651 } 4652 sched_lend_user_prio_cond(td, PRI_MAX); 4653 } 4654 4655 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4656 return; 4657 4658 /* 4659 * Handle terminated robust mutexes. Must be done after 4660 * robust pi disown, otherwise unlock could see unowned 4661 * entries. 4662 */ 4663 rb_inact = td->td_rb_inact; 4664 if (rb_inact != 0) 4665 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4666 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4667 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4668 if (rb_inact != 0) 4669 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4670 } 4671