1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_queue *uh; 666 667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 669 if (uh != NULL) 670 return (uh->length); 671 return (0); 672 } 673 674 /* 675 * Check if there are multiple PI waiters and returns first 676 * waiter. 677 */ 678 static int 679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 680 { 681 struct umtxq_queue *uh; 682 683 *first = NULL; 684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 686 if (uh != NULL) { 687 *first = TAILQ_FIRST(&uh->head); 688 return (uh->length); 689 } 690 return (0); 691 } 692 693 /* 694 * Wake up threads waiting on an userland object. 695 */ 696 697 static int 698 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 699 { 700 struct umtxq_queue *uh; 701 struct umtx_q *uq; 702 int ret; 703 704 ret = 0; 705 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 706 uh = umtxq_queue_lookup(key, q); 707 if (uh != NULL) { 708 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 709 umtxq_remove_queue(uq, q); 710 wakeup(uq); 711 if (++ret >= n_wake) 712 return (ret); 713 } 714 } 715 return (ret); 716 } 717 718 /* 719 * Wake up specified thread. 720 */ 721 static inline void 722 umtxq_signal_thread(struct umtx_q *uq) 723 { 724 725 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 726 umtxq_remove(uq); 727 wakeup(uq); 728 } 729 730 static inline int 731 tstohz(const struct timespec *tsp) 732 { 733 struct timeval tv; 734 735 TIMESPEC_TO_TIMEVAL(&tv, tsp); 736 return tvtohz(&tv); 737 } 738 739 static void 740 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 741 const struct timespec *timeout) 742 { 743 744 timo->clockid = clockid; 745 if (!absolute) { 746 timo->is_abs_real = false; 747 abs_timeout_update(timo); 748 timespecadd(&timo->cur, timeout, &timo->end); 749 } else { 750 timo->end = *timeout; 751 timo->is_abs_real = clockid == CLOCK_REALTIME || 752 clockid == CLOCK_REALTIME_FAST || 753 clockid == CLOCK_REALTIME_PRECISE; 754 /* 755 * If is_abs_real, umtxq_sleep will read the clock 756 * after setting td_rtcgen; otherwise, read it here. 757 */ 758 if (!timo->is_abs_real) { 759 abs_timeout_update(timo); 760 } 761 } 762 } 763 764 static void 765 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 766 { 767 768 abs_timeout_init(timo, umtxtime->_clockid, 769 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 770 } 771 772 static inline void 773 abs_timeout_update(struct abs_timeout *timo) 774 { 775 776 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 777 } 778 779 static int 780 abs_timeout_gethz(struct abs_timeout *timo) 781 { 782 struct timespec tts; 783 784 if (timespeccmp(&timo->end, &timo->cur, <=)) 785 return (-1); 786 timespecsub(&timo->end, &timo->cur, &tts); 787 return (tstohz(&tts)); 788 } 789 790 static uint32_t 791 umtx_unlock_val(uint32_t flags, bool rb) 792 { 793 794 if (rb) 795 return (UMUTEX_RB_OWNERDEAD); 796 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 797 return (UMUTEX_RB_NOTRECOV); 798 else 799 return (UMUTEX_UNOWNED); 800 801 } 802 803 /* 804 * Put thread into sleep state, before sleeping, check if 805 * thread was removed from umtx queue. 806 */ 807 static inline int 808 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 809 { 810 struct umtxq_chain *uc; 811 int error, timo; 812 813 if (abstime != NULL && abstime->is_abs_real) { 814 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 815 abs_timeout_update(abstime); 816 } 817 818 uc = umtxq_getchain(&uq->uq_key); 819 UMTXQ_LOCKED_ASSERT(uc); 820 for (;;) { 821 if (!(uq->uq_flags & UQF_UMTXQ)) { 822 error = 0; 823 break; 824 } 825 if (abstime != NULL) { 826 timo = abs_timeout_gethz(abstime); 827 if (timo < 0) { 828 error = ETIMEDOUT; 829 break; 830 } 831 } else 832 timo = 0; 833 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 834 if (error == EINTR || error == ERESTART) { 835 umtxq_lock(&uq->uq_key); 836 break; 837 } 838 if (abstime != NULL) { 839 if (abstime->is_abs_real) 840 curthread->td_rtcgen = 841 atomic_load_acq_int(&rtc_generation); 842 abs_timeout_update(abstime); 843 } 844 umtxq_lock(&uq->uq_key); 845 } 846 847 curthread->td_rtcgen = 0; 848 return (error); 849 } 850 851 /* 852 * Convert userspace address into unique logical address. 853 */ 854 int 855 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 856 { 857 struct thread *td = curthread; 858 vm_map_t map; 859 vm_map_entry_t entry; 860 vm_pindex_t pindex; 861 vm_prot_t prot; 862 boolean_t wired; 863 864 key->type = type; 865 if (share == THREAD_SHARE) { 866 key->shared = 0; 867 key->info.private.vs = td->td_proc->p_vmspace; 868 key->info.private.addr = (uintptr_t)addr; 869 } else { 870 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 871 map = &td->td_proc->p_vmspace->vm_map; 872 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 873 &entry, &key->info.shared.object, &pindex, &prot, 874 &wired) != KERN_SUCCESS) { 875 return (EFAULT); 876 } 877 878 if ((share == PROCESS_SHARE) || 879 (share == AUTO_SHARE && 880 VM_INHERIT_SHARE == entry->inheritance)) { 881 key->shared = 1; 882 key->info.shared.offset = (vm_offset_t)addr - 883 entry->start + entry->offset; 884 vm_object_reference(key->info.shared.object); 885 } else { 886 key->shared = 0; 887 key->info.private.vs = td->td_proc->p_vmspace; 888 key->info.private.addr = (uintptr_t)addr; 889 } 890 vm_map_lookup_done(map, entry); 891 } 892 893 umtxq_hash(key); 894 return (0); 895 } 896 897 /* 898 * Release key. 899 */ 900 void 901 umtx_key_release(struct umtx_key *key) 902 { 903 if (key->shared) 904 vm_object_deallocate(key->info.shared.object); 905 } 906 907 /* 908 * Fetch and compare value, sleep on the address if value is not changed. 909 */ 910 static int 911 do_wait(struct thread *td, void *addr, u_long id, 912 struct _umtx_time *timeout, int compat32, int is_private) 913 { 914 struct abs_timeout timo; 915 struct umtx_q *uq; 916 u_long tmp; 917 uint32_t tmp32; 918 int error = 0; 919 920 uq = td->td_umtxq; 921 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 922 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 923 return (error); 924 925 if (timeout != NULL) 926 abs_timeout_init2(&timo, timeout); 927 928 umtxq_lock(&uq->uq_key); 929 umtxq_insert(uq); 930 umtxq_unlock(&uq->uq_key); 931 if (compat32 == 0) { 932 error = fueword(addr, &tmp); 933 if (error != 0) 934 error = EFAULT; 935 } else { 936 error = fueword32(addr, &tmp32); 937 if (error == 0) 938 tmp = tmp32; 939 else 940 error = EFAULT; 941 } 942 umtxq_lock(&uq->uq_key); 943 if (error == 0) { 944 if (tmp == id) 945 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 946 NULL : &timo); 947 if ((uq->uq_flags & UQF_UMTXQ) == 0) 948 error = 0; 949 else 950 umtxq_remove(uq); 951 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 952 umtxq_remove(uq); 953 } 954 umtxq_unlock(&uq->uq_key); 955 umtx_key_release(&uq->uq_key); 956 if (error == ERESTART) 957 error = EINTR; 958 return (error); 959 } 960 961 /* 962 * Wake up threads sleeping on the specified address. 963 */ 964 int 965 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 966 { 967 struct umtx_key key; 968 int ret; 969 970 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 971 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 972 return (ret); 973 umtxq_lock(&key); 974 umtxq_signal(&key, n_wake); 975 umtxq_unlock(&key); 976 umtx_key_release(&key); 977 return (0); 978 } 979 980 /* 981 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 982 */ 983 static int 984 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 985 struct _umtx_time *timeout, int mode) 986 { 987 struct abs_timeout timo; 988 struct umtx_q *uq; 989 uint32_t owner, old, id; 990 int error, rv; 991 992 id = td->td_tid; 993 uq = td->td_umtxq; 994 error = 0; 995 if (timeout != NULL) 996 abs_timeout_init2(&timo, timeout); 997 998 /* 999 * Care must be exercised when dealing with umtx structure. It 1000 * can fault on any access. 1001 */ 1002 for (;;) { 1003 rv = fueword32(&m->m_owner, &owner); 1004 if (rv == -1) 1005 return (EFAULT); 1006 if (mode == _UMUTEX_WAIT) { 1007 if (owner == UMUTEX_UNOWNED || 1008 owner == UMUTEX_CONTESTED || 1009 owner == UMUTEX_RB_OWNERDEAD || 1010 owner == UMUTEX_RB_NOTRECOV) 1011 return (0); 1012 } else { 1013 /* 1014 * Robust mutex terminated. Kernel duty is to 1015 * return EOWNERDEAD to the userspace. The 1016 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1017 * by the common userspace code. 1018 */ 1019 if (owner == UMUTEX_RB_OWNERDEAD) { 1020 rv = casueword32(&m->m_owner, 1021 UMUTEX_RB_OWNERDEAD, &owner, 1022 id | UMUTEX_CONTESTED); 1023 if (rv == -1) 1024 return (EFAULT); 1025 if (rv == 0) { 1026 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1027 return (EOWNERDEAD); /* success */ 1028 } 1029 MPASS(rv == 1); 1030 rv = thread_check_susp(td, false); 1031 if (rv != 0) 1032 return (rv); 1033 continue; 1034 } 1035 if (owner == UMUTEX_RB_NOTRECOV) 1036 return (ENOTRECOVERABLE); 1037 1038 /* 1039 * Try the uncontested case. This should be 1040 * done in userland. 1041 */ 1042 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1043 &owner, id); 1044 /* The address was invalid. */ 1045 if (rv == -1) 1046 return (EFAULT); 1047 1048 /* The acquire succeeded. */ 1049 if (rv == 0) { 1050 MPASS(owner == UMUTEX_UNOWNED); 1051 return (0); 1052 } 1053 1054 /* 1055 * If no one owns it but it is contested try 1056 * to acquire it. 1057 */ 1058 MPASS(rv == 1); 1059 if (owner == UMUTEX_CONTESTED) { 1060 rv = casueword32(&m->m_owner, 1061 UMUTEX_CONTESTED, &owner, 1062 id | UMUTEX_CONTESTED); 1063 /* The address was invalid. */ 1064 if (rv == -1) 1065 return (EFAULT); 1066 if (rv == 0) { 1067 MPASS(owner == UMUTEX_CONTESTED); 1068 return (0); 1069 } 1070 if (rv == 1) { 1071 rv = thread_check_susp(td, false); 1072 if (rv != 0) 1073 return (rv); 1074 } 1075 1076 /* 1077 * If this failed the lock has 1078 * changed, restart. 1079 */ 1080 continue; 1081 } 1082 1083 /* rv == 1 but not contested, likely store failure */ 1084 rv = thread_check_susp(td, false); 1085 if (rv != 0) 1086 return (rv); 1087 } 1088 1089 if (mode == _UMUTEX_TRY) 1090 return (EBUSY); 1091 1092 /* 1093 * If we caught a signal, we have retried and now 1094 * exit immediately. 1095 */ 1096 if (error != 0) 1097 return (error); 1098 1099 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1100 GET_SHARE(flags), &uq->uq_key)) != 0) 1101 return (error); 1102 1103 umtxq_lock(&uq->uq_key); 1104 umtxq_busy(&uq->uq_key); 1105 umtxq_insert(uq); 1106 umtxq_unlock(&uq->uq_key); 1107 1108 /* 1109 * Set the contested bit so that a release in user space 1110 * knows to use the system call for unlock. If this fails 1111 * either some one else has acquired the lock or it has been 1112 * released. 1113 */ 1114 rv = casueword32(&m->m_owner, owner, &old, 1115 owner | UMUTEX_CONTESTED); 1116 1117 /* The address was invalid or casueword failed to store. */ 1118 if (rv == -1 || rv == 1) { 1119 umtxq_lock(&uq->uq_key); 1120 umtxq_remove(uq); 1121 umtxq_unbusy(&uq->uq_key); 1122 umtxq_unlock(&uq->uq_key); 1123 umtx_key_release(&uq->uq_key); 1124 if (rv == -1) 1125 return (EFAULT); 1126 if (rv == 1) { 1127 rv = thread_check_susp(td, false); 1128 if (rv != 0) 1129 return (rv); 1130 } 1131 continue; 1132 } 1133 1134 /* 1135 * We set the contested bit, sleep. Otherwise the lock changed 1136 * and we need to retry or we lost a race to the thread 1137 * unlocking the umtx. 1138 */ 1139 umtxq_lock(&uq->uq_key); 1140 umtxq_unbusy(&uq->uq_key); 1141 MPASS(old == owner); 1142 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1143 NULL : &timo); 1144 umtxq_remove(uq); 1145 umtxq_unlock(&uq->uq_key); 1146 umtx_key_release(&uq->uq_key); 1147 1148 if (error == 0) 1149 error = thread_check_susp(td, false); 1150 } 1151 1152 return (0); 1153 } 1154 1155 /* 1156 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1157 */ 1158 static int 1159 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1160 { 1161 struct umtx_key key; 1162 uint32_t owner, old, id, newlock; 1163 int error, count; 1164 1165 id = td->td_tid; 1166 1167 again: 1168 /* 1169 * Make sure we own this mtx. 1170 */ 1171 error = fueword32(&m->m_owner, &owner); 1172 if (error == -1) 1173 return (EFAULT); 1174 1175 if ((owner & ~UMUTEX_CONTESTED) != id) 1176 return (EPERM); 1177 1178 newlock = umtx_unlock_val(flags, rb); 1179 if ((owner & UMUTEX_CONTESTED) == 0) { 1180 error = casueword32(&m->m_owner, owner, &old, newlock); 1181 if (error == -1) 1182 return (EFAULT); 1183 if (error == 1) { 1184 error = thread_check_susp(td, false); 1185 if (error != 0) 1186 return (error); 1187 goto again; 1188 } 1189 MPASS(old == owner); 1190 return (0); 1191 } 1192 1193 /* We should only ever be in here for contested locks */ 1194 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1195 &key)) != 0) 1196 return (error); 1197 1198 umtxq_lock(&key); 1199 umtxq_busy(&key); 1200 count = umtxq_count(&key); 1201 umtxq_unlock(&key); 1202 1203 /* 1204 * When unlocking the umtx, it must be marked as unowned if 1205 * there is zero or one thread only waiting for it. 1206 * Otherwise, it must be marked as contested. 1207 */ 1208 if (count > 1) 1209 newlock |= UMUTEX_CONTESTED; 1210 error = casueword32(&m->m_owner, owner, &old, newlock); 1211 umtxq_lock(&key); 1212 umtxq_signal(&key, 1); 1213 umtxq_unbusy(&key); 1214 umtxq_unlock(&key); 1215 umtx_key_release(&key); 1216 if (error == -1) 1217 return (EFAULT); 1218 if (error == 1) { 1219 if (old != owner) 1220 return (EINVAL); 1221 error = thread_check_susp(td, false); 1222 if (error != 0) 1223 return (error); 1224 goto again; 1225 } 1226 return (0); 1227 } 1228 1229 /* 1230 * Check if the mutex is available and wake up a waiter, 1231 * only for simple mutex. 1232 */ 1233 static int 1234 do_wake_umutex(struct thread *td, struct umutex *m) 1235 { 1236 struct umtx_key key; 1237 uint32_t owner; 1238 uint32_t flags; 1239 int error; 1240 int count; 1241 1242 again: 1243 error = fueword32(&m->m_owner, &owner); 1244 if (error == -1) 1245 return (EFAULT); 1246 1247 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1248 owner != UMUTEX_RB_NOTRECOV) 1249 return (0); 1250 1251 error = fueword32(&m->m_flags, &flags); 1252 if (error == -1) 1253 return (EFAULT); 1254 1255 /* We should only ever be in here for contested locks */ 1256 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1257 &key)) != 0) 1258 return (error); 1259 1260 umtxq_lock(&key); 1261 umtxq_busy(&key); 1262 count = umtxq_count(&key); 1263 umtxq_unlock(&key); 1264 1265 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1266 owner != UMUTEX_RB_NOTRECOV) { 1267 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1268 UMUTEX_UNOWNED); 1269 if (error == -1) { 1270 error = EFAULT; 1271 } else if (error == 1) { 1272 umtxq_lock(&key); 1273 umtxq_unbusy(&key); 1274 umtxq_unlock(&key); 1275 umtx_key_release(&key); 1276 error = thread_check_susp(td, false); 1277 if (error != 0) 1278 return (error); 1279 goto again; 1280 } 1281 } 1282 1283 umtxq_lock(&key); 1284 if (error == 0 && count != 0) { 1285 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1286 owner == UMUTEX_RB_OWNERDEAD || 1287 owner == UMUTEX_RB_NOTRECOV); 1288 umtxq_signal(&key, 1); 1289 } 1290 umtxq_unbusy(&key); 1291 umtxq_unlock(&key); 1292 umtx_key_release(&key); 1293 return (error); 1294 } 1295 1296 /* 1297 * Check if the mutex has waiters and tries to fix contention bit. 1298 */ 1299 static int 1300 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1301 { 1302 struct umtx_key key; 1303 uint32_t owner, old; 1304 int type; 1305 int error; 1306 int count; 1307 1308 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1309 UMUTEX_ROBUST)) { 1310 case 0: 1311 case UMUTEX_ROBUST: 1312 type = TYPE_NORMAL_UMUTEX; 1313 break; 1314 case UMUTEX_PRIO_INHERIT: 1315 type = TYPE_PI_UMUTEX; 1316 break; 1317 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1318 type = TYPE_PI_ROBUST_UMUTEX; 1319 break; 1320 case UMUTEX_PRIO_PROTECT: 1321 type = TYPE_PP_UMUTEX; 1322 break; 1323 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1324 type = TYPE_PP_ROBUST_UMUTEX; 1325 break; 1326 default: 1327 return (EINVAL); 1328 } 1329 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1330 return (error); 1331 1332 owner = 0; 1333 umtxq_lock(&key); 1334 umtxq_busy(&key); 1335 count = umtxq_count(&key); 1336 umtxq_unlock(&key); 1337 1338 error = fueword32(&m->m_owner, &owner); 1339 if (error == -1) 1340 error = EFAULT; 1341 1342 /* 1343 * Only repair contention bit if there is a waiter, this means 1344 * the mutex is still being referenced by userland code, 1345 * otherwise don't update any memory. 1346 */ 1347 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1348 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1349 error = casueword32(&m->m_owner, owner, &old, 1350 owner | UMUTEX_CONTESTED); 1351 if (error == -1) { 1352 error = EFAULT; 1353 break; 1354 } 1355 if (error == 0) { 1356 MPASS(old == owner); 1357 break; 1358 } 1359 owner = old; 1360 error = thread_check_susp(td, false); 1361 } 1362 1363 umtxq_lock(&key); 1364 if (error == EFAULT) { 1365 umtxq_signal(&key, INT_MAX); 1366 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1367 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1368 umtxq_signal(&key, 1); 1369 umtxq_unbusy(&key); 1370 umtxq_unlock(&key); 1371 umtx_key_release(&key); 1372 return (error); 1373 } 1374 1375 static inline struct umtx_pi * 1376 umtx_pi_alloc(int flags) 1377 { 1378 struct umtx_pi *pi; 1379 1380 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1381 TAILQ_INIT(&pi->pi_blocked); 1382 atomic_add_int(&umtx_pi_allocated, 1); 1383 return (pi); 1384 } 1385 1386 static inline void 1387 umtx_pi_free(struct umtx_pi *pi) 1388 { 1389 uma_zfree(umtx_pi_zone, pi); 1390 atomic_add_int(&umtx_pi_allocated, -1); 1391 } 1392 1393 /* 1394 * Adjust the thread's position on a pi_state after its priority has been 1395 * changed. 1396 */ 1397 static int 1398 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1399 { 1400 struct umtx_q *uq, *uq1, *uq2; 1401 struct thread *td1; 1402 1403 mtx_assert(&umtx_lock, MA_OWNED); 1404 if (pi == NULL) 1405 return (0); 1406 1407 uq = td->td_umtxq; 1408 1409 /* 1410 * Check if the thread needs to be moved on the blocked chain. 1411 * It needs to be moved if either its priority is lower than 1412 * the previous thread or higher than the next thread. 1413 */ 1414 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1415 uq2 = TAILQ_NEXT(uq, uq_lockq); 1416 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1417 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1418 /* 1419 * Remove thread from blocked chain and determine where 1420 * it should be moved to. 1421 */ 1422 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1423 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1424 td1 = uq1->uq_thread; 1425 MPASS(td1->td_proc->p_magic == P_MAGIC); 1426 if (UPRI(td1) > UPRI(td)) 1427 break; 1428 } 1429 1430 if (uq1 == NULL) 1431 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1432 else 1433 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1434 } 1435 return (1); 1436 } 1437 1438 static struct umtx_pi * 1439 umtx_pi_next(struct umtx_pi *pi) 1440 { 1441 struct umtx_q *uq_owner; 1442 1443 if (pi->pi_owner == NULL) 1444 return (NULL); 1445 uq_owner = pi->pi_owner->td_umtxq; 1446 if (uq_owner == NULL) 1447 return (NULL); 1448 return (uq_owner->uq_pi_blocked); 1449 } 1450 1451 /* 1452 * Floyd's Cycle-Finding Algorithm. 1453 */ 1454 static bool 1455 umtx_pi_check_loop(struct umtx_pi *pi) 1456 { 1457 struct umtx_pi *pi1; /* fast iterator */ 1458 1459 mtx_assert(&umtx_lock, MA_OWNED); 1460 if (pi == NULL) 1461 return (false); 1462 pi1 = pi; 1463 for (;;) { 1464 pi = umtx_pi_next(pi); 1465 if (pi == NULL) 1466 break; 1467 pi1 = umtx_pi_next(pi1); 1468 if (pi1 == NULL) 1469 break; 1470 pi1 = umtx_pi_next(pi1); 1471 if (pi1 == NULL) 1472 break; 1473 if (pi == pi1) 1474 return (true); 1475 } 1476 return (false); 1477 } 1478 1479 /* 1480 * Propagate priority when a thread is blocked on POSIX 1481 * PI mutex. 1482 */ 1483 static void 1484 umtx_propagate_priority(struct thread *td) 1485 { 1486 struct umtx_q *uq; 1487 struct umtx_pi *pi; 1488 int pri; 1489 1490 mtx_assert(&umtx_lock, MA_OWNED); 1491 pri = UPRI(td); 1492 uq = td->td_umtxq; 1493 pi = uq->uq_pi_blocked; 1494 if (pi == NULL) 1495 return; 1496 if (umtx_pi_check_loop(pi)) 1497 return; 1498 1499 for (;;) { 1500 td = pi->pi_owner; 1501 if (td == NULL || td == curthread) 1502 return; 1503 1504 MPASS(td->td_proc != NULL); 1505 MPASS(td->td_proc->p_magic == P_MAGIC); 1506 1507 thread_lock(td); 1508 if (td->td_lend_user_pri > pri) 1509 sched_lend_user_prio(td, pri); 1510 else { 1511 thread_unlock(td); 1512 break; 1513 } 1514 thread_unlock(td); 1515 1516 /* 1517 * Pick up the lock that td is blocked on. 1518 */ 1519 uq = td->td_umtxq; 1520 pi = uq->uq_pi_blocked; 1521 if (pi == NULL) 1522 break; 1523 /* Resort td on the list if needed. */ 1524 umtx_pi_adjust_thread(pi, td); 1525 } 1526 } 1527 1528 /* 1529 * Unpropagate priority for a PI mutex when a thread blocked on 1530 * it is interrupted by signal or resumed by others. 1531 */ 1532 static void 1533 umtx_repropagate_priority(struct umtx_pi *pi) 1534 { 1535 struct umtx_q *uq, *uq_owner; 1536 struct umtx_pi *pi2; 1537 int pri; 1538 1539 mtx_assert(&umtx_lock, MA_OWNED); 1540 1541 if (umtx_pi_check_loop(pi)) 1542 return; 1543 while (pi != NULL && pi->pi_owner != NULL) { 1544 pri = PRI_MAX; 1545 uq_owner = pi->pi_owner->td_umtxq; 1546 1547 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1548 uq = TAILQ_FIRST(&pi2->pi_blocked); 1549 if (uq != NULL) { 1550 if (pri > UPRI(uq->uq_thread)) 1551 pri = UPRI(uq->uq_thread); 1552 } 1553 } 1554 1555 if (pri > uq_owner->uq_inherited_pri) 1556 pri = uq_owner->uq_inherited_pri; 1557 thread_lock(pi->pi_owner); 1558 sched_lend_user_prio(pi->pi_owner, pri); 1559 thread_unlock(pi->pi_owner); 1560 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1561 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1562 } 1563 } 1564 1565 /* 1566 * Insert a PI mutex into owned list. 1567 */ 1568 static void 1569 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1570 { 1571 struct umtx_q *uq_owner; 1572 1573 uq_owner = owner->td_umtxq; 1574 mtx_assert(&umtx_lock, MA_OWNED); 1575 MPASS(pi->pi_owner == NULL); 1576 pi->pi_owner = owner; 1577 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1578 } 1579 1580 /* 1581 * Disown a PI mutex, and remove it from the owned list. 1582 */ 1583 static void 1584 umtx_pi_disown(struct umtx_pi *pi) 1585 { 1586 1587 mtx_assert(&umtx_lock, MA_OWNED); 1588 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1589 pi->pi_owner = NULL; 1590 } 1591 1592 /* 1593 * Claim ownership of a PI mutex. 1594 */ 1595 static int 1596 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1597 { 1598 struct umtx_q *uq; 1599 int pri; 1600 1601 mtx_lock(&umtx_lock); 1602 if (pi->pi_owner == owner) { 1603 mtx_unlock(&umtx_lock); 1604 return (0); 1605 } 1606 1607 if (pi->pi_owner != NULL) { 1608 /* 1609 * userland may have already messed the mutex, sigh. 1610 */ 1611 mtx_unlock(&umtx_lock); 1612 return (EPERM); 1613 } 1614 umtx_pi_setowner(pi, owner); 1615 uq = TAILQ_FIRST(&pi->pi_blocked); 1616 if (uq != NULL) { 1617 pri = UPRI(uq->uq_thread); 1618 thread_lock(owner); 1619 if (pri < UPRI(owner)) 1620 sched_lend_user_prio(owner, pri); 1621 thread_unlock(owner); 1622 } 1623 mtx_unlock(&umtx_lock); 1624 return (0); 1625 } 1626 1627 /* 1628 * Adjust a thread's order position in its blocked PI mutex, 1629 * this may result new priority propagating process. 1630 */ 1631 void 1632 umtx_pi_adjust(struct thread *td, u_char oldpri) 1633 { 1634 struct umtx_q *uq; 1635 struct umtx_pi *pi; 1636 1637 uq = td->td_umtxq; 1638 mtx_lock(&umtx_lock); 1639 /* 1640 * Pick up the lock that td is blocked on. 1641 */ 1642 pi = uq->uq_pi_blocked; 1643 if (pi != NULL) { 1644 umtx_pi_adjust_thread(pi, td); 1645 umtx_repropagate_priority(pi); 1646 } 1647 mtx_unlock(&umtx_lock); 1648 } 1649 1650 /* 1651 * Sleep on a PI mutex. 1652 */ 1653 static int 1654 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1655 const char *wmesg, struct abs_timeout *timo, bool shared) 1656 { 1657 struct thread *td, *td1; 1658 struct umtx_q *uq1; 1659 int error, pri; 1660 #ifdef INVARIANTS 1661 struct umtxq_chain *uc; 1662 1663 uc = umtxq_getchain(&pi->pi_key); 1664 #endif 1665 error = 0; 1666 td = uq->uq_thread; 1667 KASSERT(td == curthread, ("inconsistent uq_thread")); 1668 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1669 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1670 umtxq_insert(uq); 1671 mtx_lock(&umtx_lock); 1672 if (pi->pi_owner == NULL) { 1673 mtx_unlock(&umtx_lock); 1674 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1675 mtx_lock(&umtx_lock); 1676 if (td1 != NULL) { 1677 if (pi->pi_owner == NULL) 1678 umtx_pi_setowner(pi, td1); 1679 PROC_UNLOCK(td1->td_proc); 1680 } 1681 } 1682 1683 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1684 pri = UPRI(uq1->uq_thread); 1685 if (pri > UPRI(td)) 1686 break; 1687 } 1688 1689 if (uq1 != NULL) 1690 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1691 else 1692 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1693 1694 uq->uq_pi_blocked = pi; 1695 thread_lock(td); 1696 td->td_flags |= TDF_UPIBLOCKED; 1697 thread_unlock(td); 1698 umtx_propagate_priority(td); 1699 mtx_unlock(&umtx_lock); 1700 umtxq_unbusy(&uq->uq_key); 1701 1702 error = umtxq_sleep(uq, wmesg, timo); 1703 umtxq_remove(uq); 1704 1705 mtx_lock(&umtx_lock); 1706 uq->uq_pi_blocked = NULL; 1707 thread_lock(td); 1708 td->td_flags &= ~TDF_UPIBLOCKED; 1709 thread_unlock(td); 1710 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1711 umtx_repropagate_priority(pi); 1712 mtx_unlock(&umtx_lock); 1713 umtxq_unlock(&uq->uq_key); 1714 1715 return (error); 1716 } 1717 1718 /* 1719 * Add reference count for a PI mutex. 1720 */ 1721 static void 1722 umtx_pi_ref(struct umtx_pi *pi) 1723 { 1724 1725 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1726 pi->pi_refcount++; 1727 } 1728 1729 /* 1730 * Decrease reference count for a PI mutex, if the counter 1731 * is decreased to zero, its memory space is freed. 1732 */ 1733 static void 1734 umtx_pi_unref(struct umtx_pi *pi) 1735 { 1736 struct umtxq_chain *uc; 1737 1738 uc = umtxq_getchain(&pi->pi_key); 1739 UMTXQ_LOCKED_ASSERT(uc); 1740 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1741 if (--pi->pi_refcount == 0) { 1742 mtx_lock(&umtx_lock); 1743 if (pi->pi_owner != NULL) 1744 umtx_pi_disown(pi); 1745 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1746 ("blocked queue not empty")); 1747 mtx_unlock(&umtx_lock); 1748 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1749 umtx_pi_free(pi); 1750 } 1751 } 1752 1753 /* 1754 * Find a PI mutex in hash table. 1755 */ 1756 static struct umtx_pi * 1757 umtx_pi_lookup(struct umtx_key *key) 1758 { 1759 struct umtxq_chain *uc; 1760 struct umtx_pi *pi; 1761 1762 uc = umtxq_getchain(key); 1763 UMTXQ_LOCKED_ASSERT(uc); 1764 1765 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1766 if (umtx_key_match(&pi->pi_key, key)) { 1767 return (pi); 1768 } 1769 } 1770 return (NULL); 1771 } 1772 1773 /* 1774 * Insert a PI mutex into hash table. 1775 */ 1776 static inline void 1777 umtx_pi_insert(struct umtx_pi *pi) 1778 { 1779 struct umtxq_chain *uc; 1780 1781 uc = umtxq_getchain(&pi->pi_key); 1782 UMTXQ_LOCKED_ASSERT(uc); 1783 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1784 } 1785 1786 /* 1787 * Lock a PI mutex. 1788 */ 1789 static int 1790 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1791 struct _umtx_time *timeout, int try) 1792 { 1793 struct abs_timeout timo; 1794 struct umtx_q *uq; 1795 struct umtx_pi *pi, *new_pi; 1796 uint32_t id, old_owner, owner, old; 1797 int error, rv; 1798 1799 id = td->td_tid; 1800 uq = td->td_umtxq; 1801 1802 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1803 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1804 &uq->uq_key)) != 0) 1805 return (error); 1806 1807 if (timeout != NULL) 1808 abs_timeout_init2(&timo, timeout); 1809 1810 umtxq_lock(&uq->uq_key); 1811 pi = umtx_pi_lookup(&uq->uq_key); 1812 if (pi == NULL) { 1813 new_pi = umtx_pi_alloc(M_NOWAIT); 1814 if (new_pi == NULL) { 1815 umtxq_unlock(&uq->uq_key); 1816 new_pi = umtx_pi_alloc(M_WAITOK); 1817 umtxq_lock(&uq->uq_key); 1818 pi = umtx_pi_lookup(&uq->uq_key); 1819 if (pi != NULL) { 1820 umtx_pi_free(new_pi); 1821 new_pi = NULL; 1822 } 1823 } 1824 if (new_pi != NULL) { 1825 new_pi->pi_key = uq->uq_key; 1826 umtx_pi_insert(new_pi); 1827 pi = new_pi; 1828 } 1829 } 1830 umtx_pi_ref(pi); 1831 umtxq_unlock(&uq->uq_key); 1832 1833 /* 1834 * Care must be exercised when dealing with umtx structure. It 1835 * can fault on any access. 1836 */ 1837 for (;;) { 1838 /* 1839 * Try the uncontested case. This should be done in userland. 1840 */ 1841 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1842 /* The address was invalid. */ 1843 if (rv == -1) { 1844 error = EFAULT; 1845 break; 1846 } 1847 /* The acquire succeeded. */ 1848 if (rv == 0) { 1849 MPASS(owner == UMUTEX_UNOWNED); 1850 error = 0; 1851 break; 1852 } 1853 1854 if (owner == UMUTEX_RB_NOTRECOV) { 1855 error = ENOTRECOVERABLE; 1856 break; 1857 } 1858 1859 /* 1860 * Avoid overwriting a possible error from sleep due 1861 * to the pending signal with suspension check result. 1862 */ 1863 if (error == 0) { 1864 error = thread_check_susp(td, true); 1865 if (error != 0) 1866 break; 1867 } 1868 1869 /* If no one owns it but it is contested try to acquire it. */ 1870 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1871 old_owner = owner; 1872 rv = casueword32(&m->m_owner, owner, &owner, 1873 id | UMUTEX_CONTESTED); 1874 /* The address was invalid. */ 1875 if (rv == -1) { 1876 error = EFAULT; 1877 break; 1878 } 1879 if (rv == 1) { 1880 if (error == 0) { 1881 error = thread_check_susp(td, true); 1882 if (error != 0) 1883 break; 1884 } 1885 1886 /* 1887 * If this failed the lock could 1888 * changed, restart. 1889 */ 1890 continue; 1891 } 1892 1893 MPASS(rv == 0); 1894 MPASS(owner == old_owner); 1895 umtxq_lock(&uq->uq_key); 1896 umtxq_busy(&uq->uq_key); 1897 error = umtx_pi_claim(pi, td); 1898 umtxq_unbusy(&uq->uq_key); 1899 umtxq_unlock(&uq->uq_key); 1900 if (error != 0) { 1901 /* 1902 * Since we're going to return an 1903 * error, restore the m_owner to its 1904 * previous, unowned state to avoid 1905 * compounding the problem. 1906 */ 1907 (void)casuword32(&m->m_owner, 1908 id | UMUTEX_CONTESTED, old_owner); 1909 } 1910 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1911 error = EOWNERDEAD; 1912 break; 1913 } 1914 1915 if ((owner & ~UMUTEX_CONTESTED) == id) { 1916 error = EDEADLK; 1917 break; 1918 } 1919 1920 if (try != 0) { 1921 error = EBUSY; 1922 break; 1923 } 1924 1925 /* 1926 * If we caught a signal, we have retried and now 1927 * exit immediately. 1928 */ 1929 if (error != 0) 1930 break; 1931 1932 umtxq_lock(&uq->uq_key); 1933 umtxq_busy(&uq->uq_key); 1934 umtxq_unlock(&uq->uq_key); 1935 1936 /* 1937 * Set the contested bit so that a release in user space 1938 * knows to use the system call for unlock. If this fails 1939 * either some one else has acquired the lock or it has been 1940 * released. 1941 */ 1942 rv = casueword32(&m->m_owner, owner, &old, owner | 1943 UMUTEX_CONTESTED); 1944 1945 /* The address was invalid. */ 1946 if (rv == -1) { 1947 umtxq_unbusy_unlocked(&uq->uq_key); 1948 error = EFAULT; 1949 break; 1950 } 1951 if (rv == 1) { 1952 umtxq_unbusy_unlocked(&uq->uq_key); 1953 error = thread_check_susp(td, true); 1954 if (error != 0) 1955 break; 1956 1957 /* 1958 * The lock changed and we need to retry or we 1959 * lost a race to the thread unlocking the 1960 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1961 * value for owner is impossible there. 1962 */ 1963 continue; 1964 } 1965 1966 umtxq_lock(&uq->uq_key); 1967 1968 /* We set the contested bit, sleep. */ 1969 MPASS(old == owner); 1970 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1971 "umtxpi", timeout == NULL ? NULL : &timo, 1972 (flags & USYNC_PROCESS_SHARED) != 0); 1973 if (error != 0) 1974 continue; 1975 1976 error = thread_check_susp(td, false); 1977 if (error != 0) 1978 break; 1979 } 1980 1981 umtxq_lock(&uq->uq_key); 1982 umtx_pi_unref(pi); 1983 umtxq_unlock(&uq->uq_key); 1984 1985 umtx_key_release(&uq->uq_key); 1986 return (error); 1987 } 1988 1989 /* 1990 * Unlock a PI mutex. 1991 */ 1992 static int 1993 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1994 { 1995 struct umtx_key key; 1996 struct umtx_q *uq_first, *uq_first2, *uq_me; 1997 struct umtx_pi *pi, *pi2; 1998 uint32_t id, new_owner, old, owner; 1999 int count, error, pri; 2000 2001 id = td->td_tid; 2002 2003 usrloop: 2004 /* 2005 * Make sure we own this mtx. 2006 */ 2007 error = fueword32(&m->m_owner, &owner); 2008 if (error == -1) 2009 return (EFAULT); 2010 2011 if ((owner & ~UMUTEX_CONTESTED) != id) 2012 return (EPERM); 2013 2014 new_owner = umtx_unlock_val(flags, rb); 2015 2016 /* This should be done in userland */ 2017 if ((owner & UMUTEX_CONTESTED) == 0) { 2018 error = casueword32(&m->m_owner, owner, &old, new_owner); 2019 if (error == -1) 2020 return (EFAULT); 2021 if (error == 1) { 2022 error = thread_check_susp(td, true); 2023 if (error != 0) 2024 return (error); 2025 goto usrloop; 2026 } 2027 if (old == owner) 2028 return (0); 2029 owner = old; 2030 } 2031 2032 /* We should only ever be in here for contested locks */ 2033 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2034 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2035 &key)) != 0) 2036 return (error); 2037 2038 umtxq_lock(&key); 2039 umtxq_busy(&key); 2040 count = umtxq_count_pi(&key, &uq_first); 2041 if (uq_first != NULL) { 2042 mtx_lock(&umtx_lock); 2043 pi = uq_first->uq_pi_blocked; 2044 KASSERT(pi != NULL, ("pi == NULL?")); 2045 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2046 mtx_unlock(&umtx_lock); 2047 umtxq_unbusy(&key); 2048 umtxq_unlock(&key); 2049 umtx_key_release(&key); 2050 /* userland messed the mutex */ 2051 return (EPERM); 2052 } 2053 uq_me = td->td_umtxq; 2054 if (pi->pi_owner == td) 2055 umtx_pi_disown(pi); 2056 /* get highest priority thread which is still sleeping. */ 2057 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2058 while (uq_first != NULL && 2059 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2060 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2061 } 2062 pri = PRI_MAX; 2063 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2064 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2065 if (uq_first2 != NULL) { 2066 if (pri > UPRI(uq_first2->uq_thread)) 2067 pri = UPRI(uq_first2->uq_thread); 2068 } 2069 } 2070 thread_lock(td); 2071 sched_lend_user_prio(td, pri); 2072 thread_unlock(td); 2073 mtx_unlock(&umtx_lock); 2074 if (uq_first) 2075 umtxq_signal_thread(uq_first); 2076 } else { 2077 pi = umtx_pi_lookup(&key); 2078 /* 2079 * A umtx_pi can exist if a signal or timeout removed the 2080 * last waiter from the umtxq, but there is still 2081 * a thread in do_lock_pi() holding the umtx_pi. 2082 */ 2083 if (pi != NULL) { 2084 /* 2085 * The umtx_pi can be unowned, such as when a thread 2086 * has just entered do_lock_pi(), allocated the 2087 * umtx_pi, and unlocked the umtxq. 2088 * If the current thread owns it, it must disown it. 2089 */ 2090 mtx_lock(&umtx_lock); 2091 if (pi->pi_owner == td) 2092 umtx_pi_disown(pi); 2093 mtx_unlock(&umtx_lock); 2094 } 2095 } 2096 umtxq_unlock(&key); 2097 2098 /* 2099 * When unlocking the umtx, it must be marked as unowned if 2100 * there is zero or one thread only waiting for it. 2101 * Otherwise, it must be marked as contested. 2102 */ 2103 2104 if (count > 1) 2105 new_owner |= UMUTEX_CONTESTED; 2106 again: 2107 error = casueword32(&m->m_owner, owner, &old, new_owner); 2108 if (error == 1) { 2109 error = thread_check_susp(td, false); 2110 if (error == 0) 2111 goto again; 2112 } 2113 umtxq_unbusy_unlocked(&key); 2114 umtx_key_release(&key); 2115 if (error == -1) 2116 return (EFAULT); 2117 if (error == 0 && old != owner) 2118 return (EINVAL); 2119 return (error); 2120 } 2121 2122 /* 2123 * Lock a PP mutex. 2124 */ 2125 static int 2126 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2127 struct _umtx_time *timeout, int try) 2128 { 2129 struct abs_timeout timo; 2130 struct umtx_q *uq, *uq2; 2131 struct umtx_pi *pi; 2132 uint32_t ceiling; 2133 uint32_t owner, id; 2134 int error, pri, old_inherited_pri, su, rv; 2135 2136 id = td->td_tid; 2137 uq = td->td_umtxq; 2138 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2139 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2140 &uq->uq_key)) != 0) 2141 return (error); 2142 2143 if (timeout != NULL) 2144 abs_timeout_init2(&timo, timeout); 2145 2146 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2147 for (;;) { 2148 old_inherited_pri = uq->uq_inherited_pri; 2149 umtxq_lock(&uq->uq_key); 2150 umtxq_busy(&uq->uq_key); 2151 umtxq_unlock(&uq->uq_key); 2152 2153 rv = fueword32(&m->m_ceilings[0], &ceiling); 2154 if (rv == -1) { 2155 error = EFAULT; 2156 goto out; 2157 } 2158 ceiling = RTP_PRIO_MAX - ceiling; 2159 if (ceiling > RTP_PRIO_MAX) { 2160 error = EINVAL; 2161 goto out; 2162 } 2163 2164 mtx_lock(&umtx_lock); 2165 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2166 mtx_unlock(&umtx_lock); 2167 error = EINVAL; 2168 goto out; 2169 } 2170 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2171 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2172 thread_lock(td); 2173 if (uq->uq_inherited_pri < UPRI(td)) 2174 sched_lend_user_prio(td, uq->uq_inherited_pri); 2175 thread_unlock(td); 2176 } 2177 mtx_unlock(&umtx_lock); 2178 2179 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2180 id | UMUTEX_CONTESTED); 2181 /* The address was invalid. */ 2182 if (rv == -1) { 2183 error = EFAULT; 2184 break; 2185 } 2186 if (rv == 0) { 2187 MPASS(owner == UMUTEX_CONTESTED); 2188 error = 0; 2189 break; 2190 } 2191 /* rv == 1 */ 2192 if (owner == UMUTEX_RB_OWNERDEAD) { 2193 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2194 &owner, id | UMUTEX_CONTESTED); 2195 if (rv == -1) { 2196 error = EFAULT; 2197 break; 2198 } 2199 if (rv == 0) { 2200 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2201 error = EOWNERDEAD; /* success */ 2202 break; 2203 } 2204 2205 /* 2206 * rv == 1, only check for suspension if we 2207 * did not already catched a signal. If we 2208 * get an error from the check, the same 2209 * condition is checked by the umtxq_sleep() 2210 * call below, so we should obliterate the 2211 * error to not skip the last loop iteration. 2212 */ 2213 if (error == 0) { 2214 error = thread_check_susp(td, false); 2215 if (error == 0) { 2216 if (try != 0) 2217 error = EBUSY; 2218 else 2219 continue; 2220 } 2221 error = 0; 2222 } 2223 } else if (owner == UMUTEX_RB_NOTRECOV) { 2224 error = ENOTRECOVERABLE; 2225 } 2226 2227 if (try != 0) 2228 error = EBUSY; 2229 2230 /* 2231 * If we caught a signal, we have retried and now 2232 * exit immediately. 2233 */ 2234 if (error != 0) 2235 break; 2236 2237 umtxq_lock(&uq->uq_key); 2238 umtxq_insert(uq); 2239 umtxq_unbusy(&uq->uq_key); 2240 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2241 NULL : &timo); 2242 umtxq_remove(uq); 2243 umtxq_unlock(&uq->uq_key); 2244 2245 mtx_lock(&umtx_lock); 2246 uq->uq_inherited_pri = old_inherited_pri; 2247 pri = PRI_MAX; 2248 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2249 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2250 if (uq2 != NULL) { 2251 if (pri > UPRI(uq2->uq_thread)) 2252 pri = UPRI(uq2->uq_thread); 2253 } 2254 } 2255 if (pri > uq->uq_inherited_pri) 2256 pri = uq->uq_inherited_pri; 2257 thread_lock(td); 2258 sched_lend_user_prio(td, pri); 2259 thread_unlock(td); 2260 mtx_unlock(&umtx_lock); 2261 } 2262 2263 if (error != 0 && error != EOWNERDEAD) { 2264 mtx_lock(&umtx_lock); 2265 uq->uq_inherited_pri = old_inherited_pri; 2266 pri = PRI_MAX; 2267 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2268 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2269 if (uq2 != NULL) { 2270 if (pri > UPRI(uq2->uq_thread)) 2271 pri = UPRI(uq2->uq_thread); 2272 } 2273 } 2274 if (pri > uq->uq_inherited_pri) 2275 pri = uq->uq_inherited_pri; 2276 thread_lock(td); 2277 sched_lend_user_prio(td, pri); 2278 thread_unlock(td); 2279 mtx_unlock(&umtx_lock); 2280 } 2281 2282 out: 2283 umtxq_unbusy_unlocked(&uq->uq_key); 2284 umtx_key_release(&uq->uq_key); 2285 return (error); 2286 } 2287 2288 /* 2289 * Unlock a PP mutex. 2290 */ 2291 static int 2292 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2293 { 2294 struct umtx_key key; 2295 struct umtx_q *uq, *uq2; 2296 struct umtx_pi *pi; 2297 uint32_t id, owner, rceiling; 2298 int error, pri, new_inherited_pri, su; 2299 2300 id = td->td_tid; 2301 uq = td->td_umtxq; 2302 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2303 2304 /* 2305 * Make sure we own this mtx. 2306 */ 2307 error = fueword32(&m->m_owner, &owner); 2308 if (error == -1) 2309 return (EFAULT); 2310 2311 if ((owner & ~UMUTEX_CONTESTED) != id) 2312 return (EPERM); 2313 2314 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2315 if (error != 0) 2316 return (error); 2317 2318 if (rceiling == -1) 2319 new_inherited_pri = PRI_MAX; 2320 else { 2321 rceiling = RTP_PRIO_MAX - rceiling; 2322 if (rceiling > RTP_PRIO_MAX) 2323 return (EINVAL); 2324 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2325 } 2326 2327 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2328 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2329 &key)) != 0) 2330 return (error); 2331 umtxq_lock(&key); 2332 umtxq_busy(&key); 2333 umtxq_unlock(&key); 2334 /* 2335 * For priority protected mutex, always set unlocked state 2336 * to UMUTEX_CONTESTED, so that userland always enters kernel 2337 * to lock the mutex, it is necessary because thread priority 2338 * has to be adjusted for such mutex. 2339 */ 2340 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2341 UMUTEX_CONTESTED); 2342 2343 umtxq_lock(&key); 2344 if (error == 0) 2345 umtxq_signal(&key, 1); 2346 umtxq_unbusy(&key); 2347 umtxq_unlock(&key); 2348 2349 if (error == -1) 2350 error = EFAULT; 2351 else { 2352 mtx_lock(&umtx_lock); 2353 if (su != 0) 2354 uq->uq_inherited_pri = new_inherited_pri; 2355 pri = PRI_MAX; 2356 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2357 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2358 if (uq2 != NULL) { 2359 if (pri > UPRI(uq2->uq_thread)) 2360 pri = UPRI(uq2->uq_thread); 2361 } 2362 } 2363 if (pri > uq->uq_inherited_pri) 2364 pri = uq->uq_inherited_pri; 2365 thread_lock(td); 2366 sched_lend_user_prio(td, pri); 2367 thread_unlock(td); 2368 mtx_unlock(&umtx_lock); 2369 } 2370 umtx_key_release(&key); 2371 return (error); 2372 } 2373 2374 static int 2375 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2376 uint32_t *old_ceiling) 2377 { 2378 struct umtx_q *uq; 2379 uint32_t flags, id, owner, save_ceiling; 2380 int error, rv, rv1; 2381 2382 error = fueword32(&m->m_flags, &flags); 2383 if (error == -1) 2384 return (EFAULT); 2385 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2386 return (EINVAL); 2387 if (ceiling > RTP_PRIO_MAX) 2388 return (EINVAL); 2389 id = td->td_tid; 2390 uq = td->td_umtxq; 2391 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2392 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2393 &uq->uq_key)) != 0) 2394 return (error); 2395 for (;;) { 2396 umtxq_lock(&uq->uq_key); 2397 umtxq_busy(&uq->uq_key); 2398 umtxq_unlock(&uq->uq_key); 2399 2400 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2401 if (rv == -1) { 2402 error = EFAULT; 2403 break; 2404 } 2405 2406 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2407 id | UMUTEX_CONTESTED); 2408 if (rv == -1) { 2409 error = EFAULT; 2410 break; 2411 } 2412 2413 if (rv == 0) { 2414 MPASS(owner == UMUTEX_CONTESTED); 2415 rv = suword32(&m->m_ceilings[0], ceiling); 2416 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2417 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2418 break; 2419 } 2420 2421 if ((owner & ~UMUTEX_CONTESTED) == id) { 2422 rv = suword32(&m->m_ceilings[0], ceiling); 2423 error = rv == 0 ? 0 : EFAULT; 2424 break; 2425 } 2426 2427 if (owner == UMUTEX_RB_OWNERDEAD) { 2428 error = EOWNERDEAD; 2429 break; 2430 } else if (owner == UMUTEX_RB_NOTRECOV) { 2431 error = ENOTRECOVERABLE; 2432 break; 2433 } 2434 2435 /* 2436 * If we caught a signal, we have retried and now 2437 * exit immediately. 2438 */ 2439 if (error != 0) 2440 break; 2441 2442 /* 2443 * We set the contested bit, sleep. Otherwise the lock changed 2444 * and we need to retry or we lost a race to the thread 2445 * unlocking the umtx. 2446 */ 2447 umtxq_lock(&uq->uq_key); 2448 umtxq_insert(uq); 2449 umtxq_unbusy(&uq->uq_key); 2450 error = umtxq_sleep(uq, "umtxpp", NULL); 2451 umtxq_remove(uq); 2452 umtxq_unlock(&uq->uq_key); 2453 } 2454 umtxq_lock(&uq->uq_key); 2455 if (error == 0) 2456 umtxq_signal(&uq->uq_key, INT_MAX); 2457 umtxq_unbusy(&uq->uq_key); 2458 umtxq_unlock(&uq->uq_key); 2459 umtx_key_release(&uq->uq_key); 2460 if (error == 0 && old_ceiling != NULL) { 2461 rv = suword32(old_ceiling, save_ceiling); 2462 error = rv == 0 ? 0 : EFAULT; 2463 } 2464 return (error); 2465 } 2466 2467 /* 2468 * Lock a userland POSIX mutex. 2469 */ 2470 static int 2471 do_lock_umutex(struct thread *td, struct umutex *m, 2472 struct _umtx_time *timeout, int mode) 2473 { 2474 uint32_t flags; 2475 int error; 2476 2477 error = fueword32(&m->m_flags, &flags); 2478 if (error == -1) 2479 return (EFAULT); 2480 2481 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2482 case 0: 2483 error = do_lock_normal(td, m, flags, timeout, mode); 2484 break; 2485 case UMUTEX_PRIO_INHERIT: 2486 error = do_lock_pi(td, m, flags, timeout, mode); 2487 break; 2488 case UMUTEX_PRIO_PROTECT: 2489 error = do_lock_pp(td, m, flags, timeout, mode); 2490 break; 2491 default: 2492 return (EINVAL); 2493 } 2494 if (timeout == NULL) { 2495 if (error == EINTR && mode != _UMUTEX_WAIT) 2496 error = ERESTART; 2497 } else { 2498 /* Timed-locking is not restarted. */ 2499 if (error == ERESTART) 2500 error = EINTR; 2501 } 2502 return (error); 2503 } 2504 2505 /* 2506 * Unlock a userland POSIX mutex. 2507 */ 2508 static int 2509 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2510 { 2511 uint32_t flags; 2512 int error; 2513 2514 error = fueword32(&m->m_flags, &flags); 2515 if (error == -1) 2516 return (EFAULT); 2517 2518 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2519 case 0: 2520 return (do_unlock_normal(td, m, flags, rb)); 2521 case UMUTEX_PRIO_INHERIT: 2522 return (do_unlock_pi(td, m, flags, rb)); 2523 case UMUTEX_PRIO_PROTECT: 2524 return (do_unlock_pp(td, m, flags, rb)); 2525 } 2526 2527 return (EINVAL); 2528 } 2529 2530 static int 2531 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2532 struct timespec *timeout, u_long wflags) 2533 { 2534 struct abs_timeout timo; 2535 struct umtx_q *uq; 2536 uint32_t flags, clockid, hasw; 2537 int error; 2538 2539 uq = td->td_umtxq; 2540 error = fueword32(&cv->c_flags, &flags); 2541 if (error == -1) 2542 return (EFAULT); 2543 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2544 if (error != 0) 2545 return (error); 2546 2547 if ((wflags & CVWAIT_CLOCKID) != 0) { 2548 error = fueword32(&cv->c_clockid, &clockid); 2549 if (error == -1) { 2550 umtx_key_release(&uq->uq_key); 2551 return (EFAULT); 2552 } 2553 if (clockid < CLOCK_REALTIME || 2554 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2555 /* hmm, only HW clock id will work. */ 2556 umtx_key_release(&uq->uq_key); 2557 return (EINVAL); 2558 } 2559 } else { 2560 clockid = CLOCK_REALTIME; 2561 } 2562 2563 umtxq_lock(&uq->uq_key); 2564 umtxq_busy(&uq->uq_key); 2565 umtxq_insert(uq); 2566 umtxq_unlock(&uq->uq_key); 2567 2568 /* 2569 * Set c_has_waiters to 1 before releasing user mutex, also 2570 * don't modify cache line when unnecessary. 2571 */ 2572 error = fueword32(&cv->c_has_waiters, &hasw); 2573 if (error == 0 && hasw == 0) 2574 suword32(&cv->c_has_waiters, 1); 2575 2576 umtxq_unbusy_unlocked(&uq->uq_key); 2577 2578 error = do_unlock_umutex(td, m, false); 2579 2580 if (timeout != NULL) 2581 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2582 timeout); 2583 2584 umtxq_lock(&uq->uq_key); 2585 if (error == 0) { 2586 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2587 NULL : &timo); 2588 } 2589 2590 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2591 error = 0; 2592 else { 2593 /* 2594 * This must be timeout,interrupted by signal or 2595 * surprious wakeup, clear c_has_waiter flag when 2596 * necessary. 2597 */ 2598 umtxq_busy(&uq->uq_key); 2599 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2600 int oldlen = uq->uq_cur_queue->length; 2601 umtxq_remove(uq); 2602 if (oldlen == 1) { 2603 umtxq_unlock(&uq->uq_key); 2604 suword32(&cv->c_has_waiters, 0); 2605 umtxq_lock(&uq->uq_key); 2606 } 2607 } 2608 umtxq_unbusy(&uq->uq_key); 2609 if (error == ERESTART) 2610 error = EINTR; 2611 } 2612 2613 umtxq_unlock(&uq->uq_key); 2614 umtx_key_release(&uq->uq_key); 2615 return (error); 2616 } 2617 2618 /* 2619 * Signal a userland condition variable. 2620 */ 2621 static int 2622 do_cv_signal(struct thread *td, struct ucond *cv) 2623 { 2624 struct umtx_key key; 2625 int error, cnt, nwake; 2626 uint32_t flags; 2627 2628 error = fueword32(&cv->c_flags, &flags); 2629 if (error == -1) 2630 return (EFAULT); 2631 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2632 return (error); 2633 umtxq_lock(&key); 2634 umtxq_busy(&key); 2635 cnt = umtxq_count(&key); 2636 nwake = umtxq_signal(&key, 1); 2637 if (cnt <= nwake) { 2638 umtxq_unlock(&key); 2639 error = suword32(&cv->c_has_waiters, 0); 2640 if (error == -1) 2641 error = EFAULT; 2642 umtxq_lock(&key); 2643 } 2644 umtxq_unbusy(&key); 2645 umtxq_unlock(&key); 2646 umtx_key_release(&key); 2647 return (error); 2648 } 2649 2650 static int 2651 do_cv_broadcast(struct thread *td, struct ucond *cv) 2652 { 2653 struct umtx_key key; 2654 int error; 2655 uint32_t flags; 2656 2657 error = fueword32(&cv->c_flags, &flags); 2658 if (error == -1) 2659 return (EFAULT); 2660 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2661 return (error); 2662 2663 umtxq_lock(&key); 2664 umtxq_busy(&key); 2665 umtxq_signal(&key, INT_MAX); 2666 umtxq_unlock(&key); 2667 2668 error = suword32(&cv->c_has_waiters, 0); 2669 if (error == -1) 2670 error = EFAULT; 2671 2672 umtxq_unbusy_unlocked(&key); 2673 2674 umtx_key_release(&key); 2675 return (error); 2676 } 2677 2678 static int 2679 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2680 struct _umtx_time *timeout) 2681 { 2682 struct abs_timeout timo; 2683 struct umtx_q *uq; 2684 uint32_t flags, wrflags; 2685 int32_t state, oldstate; 2686 int32_t blocked_readers; 2687 int error, error1, rv; 2688 2689 uq = td->td_umtxq; 2690 error = fueword32(&rwlock->rw_flags, &flags); 2691 if (error == -1) 2692 return (EFAULT); 2693 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2694 if (error != 0) 2695 return (error); 2696 2697 if (timeout != NULL) 2698 abs_timeout_init2(&timo, timeout); 2699 2700 wrflags = URWLOCK_WRITE_OWNER; 2701 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2702 wrflags |= URWLOCK_WRITE_WAITERS; 2703 2704 for (;;) { 2705 rv = fueword32(&rwlock->rw_state, &state); 2706 if (rv == -1) { 2707 umtx_key_release(&uq->uq_key); 2708 return (EFAULT); 2709 } 2710 2711 /* try to lock it */ 2712 while (!(state & wrflags)) { 2713 if (__predict_false(URWLOCK_READER_COUNT(state) == 2714 URWLOCK_MAX_READERS)) { 2715 umtx_key_release(&uq->uq_key); 2716 return (EAGAIN); 2717 } 2718 rv = casueword32(&rwlock->rw_state, state, 2719 &oldstate, state + 1); 2720 if (rv == -1) { 2721 umtx_key_release(&uq->uq_key); 2722 return (EFAULT); 2723 } 2724 if (rv == 0) { 2725 MPASS(oldstate == state); 2726 umtx_key_release(&uq->uq_key); 2727 return (0); 2728 } 2729 error = thread_check_susp(td, true); 2730 if (error != 0) 2731 break; 2732 state = oldstate; 2733 } 2734 2735 if (error) 2736 break; 2737 2738 /* grab monitor lock */ 2739 umtxq_lock(&uq->uq_key); 2740 umtxq_busy(&uq->uq_key); 2741 umtxq_unlock(&uq->uq_key); 2742 2743 /* 2744 * re-read the state, in case it changed between the try-lock above 2745 * and the check below 2746 */ 2747 rv = fueword32(&rwlock->rw_state, &state); 2748 if (rv == -1) 2749 error = EFAULT; 2750 2751 /* set read contention bit */ 2752 while (error == 0 && (state & wrflags) && 2753 !(state & URWLOCK_READ_WAITERS)) { 2754 rv = casueword32(&rwlock->rw_state, state, 2755 &oldstate, state | URWLOCK_READ_WAITERS); 2756 if (rv == -1) { 2757 error = EFAULT; 2758 break; 2759 } 2760 if (rv == 0) { 2761 MPASS(oldstate == state); 2762 goto sleep; 2763 } 2764 state = oldstate; 2765 error = thread_check_susp(td, false); 2766 if (error != 0) 2767 break; 2768 } 2769 if (error != 0) { 2770 umtxq_unbusy_unlocked(&uq->uq_key); 2771 break; 2772 } 2773 2774 /* state is changed while setting flags, restart */ 2775 if (!(state & wrflags)) { 2776 umtxq_unbusy_unlocked(&uq->uq_key); 2777 error = thread_check_susp(td, true); 2778 if (error != 0) 2779 break; 2780 continue; 2781 } 2782 2783 sleep: 2784 /* 2785 * Contention bit is set, before sleeping, increase 2786 * read waiter count. 2787 */ 2788 rv = fueword32(&rwlock->rw_blocked_readers, 2789 &blocked_readers); 2790 if (rv == -1) { 2791 umtxq_unbusy_unlocked(&uq->uq_key); 2792 error = EFAULT; 2793 break; 2794 } 2795 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2796 2797 while (state & wrflags) { 2798 umtxq_lock(&uq->uq_key); 2799 umtxq_insert(uq); 2800 umtxq_unbusy(&uq->uq_key); 2801 2802 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2803 NULL : &timo); 2804 2805 umtxq_busy(&uq->uq_key); 2806 umtxq_remove(uq); 2807 umtxq_unlock(&uq->uq_key); 2808 if (error) 2809 break; 2810 rv = fueword32(&rwlock->rw_state, &state); 2811 if (rv == -1) { 2812 error = EFAULT; 2813 break; 2814 } 2815 } 2816 2817 /* decrease read waiter count, and may clear read contention bit */ 2818 rv = fueword32(&rwlock->rw_blocked_readers, 2819 &blocked_readers); 2820 if (rv == -1) { 2821 umtxq_unbusy_unlocked(&uq->uq_key); 2822 error = EFAULT; 2823 break; 2824 } 2825 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2826 if (blocked_readers == 1) { 2827 rv = fueword32(&rwlock->rw_state, &state); 2828 if (rv == -1) { 2829 umtxq_unbusy_unlocked(&uq->uq_key); 2830 error = EFAULT; 2831 break; 2832 } 2833 for (;;) { 2834 rv = casueword32(&rwlock->rw_state, state, 2835 &oldstate, state & ~URWLOCK_READ_WAITERS); 2836 if (rv == -1) { 2837 error = EFAULT; 2838 break; 2839 } 2840 if (rv == 0) { 2841 MPASS(oldstate == state); 2842 break; 2843 } 2844 state = oldstate; 2845 error1 = thread_check_susp(td, false); 2846 if (error1 != 0) { 2847 if (error == 0) 2848 error = error1; 2849 break; 2850 } 2851 } 2852 } 2853 2854 umtxq_unbusy_unlocked(&uq->uq_key); 2855 if (error != 0) 2856 break; 2857 } 2858 umtx_key_release(&uq->uq_key); 2859 if (error == ERESTART) 2860 error = EINTR; 2861 return (error); 2862 } 2863 2864 static int 2865 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2866 { 2867 struct abs_timeout timo; 2868 struct umtx_q *uq; 2869 uint32_t flags; 2870 int32_t state, oldstate; 2871 int32_t blocked_writers; 2872 int32_t blocked_readers; 2873 int error, error1, rv; 2874 2875 uq = td->td_umtxq; 2876 error = fueword32(&rwlock->rw_flags, &flags); 2877 if (error == -1) 2878 return (EFAULT); 2879 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2880 if (error != 0) 2881 return (error); 2882 2883 if (timeout != NULL) 2884 abs_timeout_init2(&timo, timeout); 2885 2886 blocked_readers = 0; 2887 for (;;) { 2888 rv = fueword32(&rwlock->rw_state, &state); 2889 if (rv == -1) { 2890 umtx_key_release(&uq->uq_key); 2891 return (EFAULT); 2892 } 2893 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2894 URWLOCK_READER_COUNT(state) == 0) { 2895 rv = casueword32(&rwlock->rw_state, state, 2896 &oldstate, state | URWLOCK_WRITE_OWNER); 2897 if (rv == -1) { 2898 umtx_key_release(&uq->uq_key); 2899 return (EFAULT); 2900 } 2901 if (rv == 0) { 2902 MPASS(oldstate == state); 2903 umtx_key_release(&uq->uq_key); 2904 return (0); 2905 } 2906 state = oldstate; 2907 error = thread_check_susp(td, true); 2908 if (error != 0) 2909 break; 2910 } 2911 2912 if (error) { 2913 if ((state & (URWLOCK_WRITE_OWNER | 2914 URWLOCK_WRITE_WAITERS)) == 0 && 2915 blocked_readers != 0) { 2916 umtxq_lock(&uq->uq_key); 2917 umtxq_busy(&uq->uq_key); 2918 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2919 UMTX_SHARED_QUEUE); 2920 umtxq_unbusy(&uq->uq_key); 2921 umtxq_unlock(&uq->uq_key); 2922 } 2923 2924 break; 2925 } 2926 2927 /* grab monitor lock */ 2928 umtxq_lock(&uq->uq_key); 2929 umtxq_busy(&uq->uq_key); 2930 umtxq_unlock(&uq->uq_key); 2931 2932 /* 2933 * Re-read the state, in case it changed between the 2934 * try-lock above and the check below. 2935 */ 2936 rv = fueword32(&rwlock->rw_state, &state); 2937 if (rv == -1) 2938 error = EFAULT; 2939 2940 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2941 URWLOCK_READER_COUNT(state) != 0) && 2942 (state & URWLOCK_WRITE_WAITERS) == 0) { 2943 rv = casueword32(&rwlock->rw_state, state, 2944 &oldstate, state | URWLOCK_WRITE_WAITERS); 2945 if (rv == -1) { 2946 error = EFAULT; 2947 break; 2948 } 2949 if (rv == 0) { 2950 MPASS(oldstate == state); 2951 goto sleep; 2952 } 2953 state = oldstate; 2954 error = thread_check_susp(td, false); 2955 if (error != 0) 2956 break; 2957 } 2958 if (error != 0) { 2959 umtxq_unbusy_unlocked(&uq->uq_key); 2960 break; 2961 } 2962 2963 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2964 URWLOCK_READER_COUNT(state) == 0) { 2965 umtxq_unbusy_unlocked(&uq->uq_key); 2966 error = thread_check_susp(td, false); 2967 if (error != 0) 2968 break; 2969 continue; 2970 } 2971 sleep: 2972 rv = fueword32(&rwlock->rw_blocked_writers, 2973 &blocked_writers); 2974 if (rv == -1) { 2975 umtxq_unbusy_unlocked(&uq->uq_key); 2976 error = EFAULT; 2977 break; 2978 } 2979 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2980 2981 while ((state & URWLOCK_WRITE_OWNER) || 2982 URWLOCK_READER_COUNT(state) != 0) { 2983 umtxq_lock(&uq->uq_key); 2984 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2985 umtxq_unbusy(&uq->uq_key); 2986 2987 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2988 NULL : &timo); 2989 2990 umtxq_busy(&uq->uq_key); 2991 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2992 umtxq_unlock(&uq->uq_key); 2993 if (error) 2994 break; 2995 rv = fueword32(&rwlock->rw_state, &state); 2996 if (rv == -1) { 2997 error = EFAULT; 2998 break; 2999 } 3000 } 3001 3002 rv = fueword32(&rwlock->rw_blocked_writers, 3003 &blocked_writers); 3004 if (rv == -1) { 3005 umtxq_unbusy_unlocked(&uq->uq_key); 3006 error = EFAULT; 3007 break; 3008 } 3009 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3010 if (blocked_writers == 1) { 3011 rv = fueword32(&rwlock->rw_state, &state); 3012 if (rv == -1) { 3013 umtxq_unbusy_unlocked(&uq->uq_key); 3014 error = EFAULT; 3015 break; 3016 } 3017 for (;;) { 3018 rv = casueword32(&rwlock->rw_state, state, 3019 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3020 if (rv == -1) { 3021 error = EFAULT; 3022 break; 3023 } 3024 if (rv == 0) { 3025 MPASS(oldstate == state); 3026 break; 3027 } 3028 state = oldstate; 3029 error1 = thread_check_susp(td, false); 3030 /* 3031 * We are leaving the URWLOCK_WRITE_WAITERS 3032 * behind, but this should not harm the 3033 * correctness. 3034 */ 3035 if (error1 != 0) { 3036 if (error == 0) 3037 error = error1; 3038 break; 3039 } 3040 } 3041 rv = fueword32(&rwlock->rw_blocked_readers, 3042 &blocked_readers); 3043 if (rv == -1) { 3044 umtxq_unbusy_unlocked(&uq->uq_key); 3045 error = EFAULT; 3046 break; 3047 } 3048 } else 3049 blocked_readers = 0; 3050 3051 umtxq_unbusy_unlocked(&uq->uq_key); 3052 } 3053 3054 umtx_key_release(&uq->uq_key); 3055 if (error == ERESTART) 3056 error = EINTR; 3057 return (error); 3058 } 3059 3060 static int 3061 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3062 { 3063 struct umtx_q *uq; 3064 uint32_t flags; 3065 int32_t state, oldstate; 3066 int error, rv, q, count; 3067 3068 uq = td->td_umtxq; 3069 error = fueword32(&rwlock->rw_flags, &flags); 3070 if (error == -1) 3071 return (EFAULT); 3072 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3073 if (error != 0) 3074 return (error); 3075 3076 error = fueword32(&rwlock->rw_state, &state); 3077 if (error == -1) { 3078 error = EFAULT; 3079 goto out; 3080 } 3081 if (state & URWLOCK_WRITE_OWNER) { 3082 for (;;) { 3083 rv = casueword32(&rwlock->rw_state, state, 3084 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3085 if (rv == -1) { 3086 error = EFAULT; 3087 goto out; 3088 } 3089 if (rv == 1) { 3090 state = oldstate; 3091 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3092 error = EPERM; 3093 goto out; 3094 } 3095 error = thread_check_susp(td, true); 3096 if (error != 0) 3097 goto out; 3098 } else 3099 break; 3100 } 3101 } else if (URWLOCK_READER_COUNT(state) != 0) { 3102 for (;;) { 3103 rv = casueword32(&rwlock->rw_state, state, 3104 &oldstate, state - 1); 3105 if (rv == -1) { 3106 error = EFAULT; 3107 goto out; 3108 } 3109 if (rv == 1) { 3110 state = oldstate; 3111 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3112 error = EPERM; 3113 goto out; 3114 } 3115 error = thread_check_susp(td, true); 3116 if (error != 0) 3117 goto out; 3118 } else 3119 break; 3120 } 3121 } else { 3122 error = EPERM; 3123 goto out; 3124 } 3125 3126 count = 0; 3127 3128 if (!(flags & URWLOCK_PREFER_READER)) { 3129 if (state & URWLOCK_WRITE_WAITERS) { 3130 count = 1; 3131 q = UMTX_EXCLUSIVE_QUEUE; 3132 } else if (state & URWLOCK_READ_WAITERS) { 3133 count = INT_MAX; 3134 q = UMTX_SHARED_QUEUE; 3135 } 3136 } else { 3137 if (state & URWLOCK_READ_WAITERS) { 3138 count = INT_MAX; 3139 q = UMTX_SHARED_QUEUE; 3140 } else if (state & URWLOCK_WRITE_WAITERS) { 3141 count = 1; 3142 q = UMTX_EXCLUSIVE_QUEUE; 3143 } 3144 } 3145 3146 if (count) { 3147 umtxq_lock(&uq->uq_key); 3148 umtxq_busy(&uq->uq_key); 3149 umtxq_signal_queue(&uq->uq_key, count, q); 3150 umtxq_unbusy(&uq->uq_key); 3151 umtxq_unlock(&uq->uq_key); 3152 } 3153 out: 3154 umtx_key_release(&uq->uq_key); 3155 return (error); 3156 } 3157 3158 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3159 static int 3160 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3161 { 3162 struct abs_timeout timo; 3163 struct umtx_q *uq; 3164 uint32_t flags, count, count1; 3165 int error, rv, rv1; 3166 3167 uq = td->td_umtxq; 3168 error = fueword32(&sem->_flags, &flags); 3169 if (error == -1) 3170 return (EFAULT); 3171 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3172 if (error != 0) 3173 return (error); 3174 3175 if (timeout != NULL) 3176 abs_timeout_init2(&timo, timeout); 3177 3178 again: 3179 umtxq_lock(&uq->uq_key); 3180 umtxq_busy(&uq->uq_key); 3181 umtxq_insert(uq); 3182 umtxq_unlock(&uq->uq_key); 3183 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3184 if (rv == 0) 3185 rv1 = fueword32(&sem->_count, &count); 3186 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3187 (rv == 1 && count1 == 0)) { 3188 umtxq_lock(&uq->uq_key); 3189 umtxq_unbusy(&uq->uq_key); 3190 umtxq_remove(uq); 3191 umtxq_unlock(&uq->uq_key); 3192 if (rv == 1) { 3193 rv = thread_check_susp(td, true); 3194 if (rv == 0) 3195 goto again; 3196 error = rv; 3197 goto out; 3198 } 3199 if (rv == 0) 3200 rv = rv1; 3201 error = rv == -1 ? EFAULT : 0; 3202 goto out; 3203 } 3204 umtxq_lock(&uq->uq_key); 3205 umtxq_unbusy(&uq->uq_key); 3206 3207 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3208 3209 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3210 error = 0; 3211 else { 3212 umtxq_remove(uq); 3213 /* A relative timeout cannot be restarted. */ 3214 if (error == ERESTART && timeout != NULL && 3215 (timeout->_flags & UMTX_ABSTIME) == 0) 3216 error = EINTR; 3217 } 3218 umtxq_unlock(&uq->uq_key); 3219 out: 3220 umtx_key_release(&uq->uq_key); 3221 return (error); 3222 } 3223 3224 /* 3225 * Signal a userland semaphore. 3226 */ 3227 static int 3228 do_sem_wake(struct thread *td, struct _usem *sem) 3229 { 3230 struct umtx_key key; 3231 int error, cnt; 3232 uint32_t flags; 3233 3234 error = fueword32(&sem->_flags, &flags); 3235 if (error == -1) 3236 return (EFAULT); 3237 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3238 return (error); 3239 umtxq_lock(&key); 3240 umtxq_busy(&key); 3241 cnt = umtxq_count(&key); 3242 if (cnt > 0) { 3243 /* 3244 * Check if count is greater than 0, this means the memory is 3245 * still being referenced by user code, so we can safely 3246 * update _has_waiters flag. 3247 */ 3248 if (cnt == 1) { 3249 umtxq_unlock(&key); 3250 error = suword32(&sem->_has_waiters, 0); 3251 umtxq_lock(&key); 3252 if (error == -1) 3253 error = EFAULT; 3254 } 3255 umtxq_signal(&key, 1); 3256 } 3257 umtxq_unbusy(&key); 3258 umtxq_unlock(&key); 3259 umtx_key_release(&key); 3260 return (error); 3261 } 3262 #endif 3263 3264 static int 3265 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3266 { 3267 struct abs_timeout timo; 3268 struct umtx_q *uq; 3269 uint32_t count, flags; 3270 int error, rv; 3271 3272 uq = td->td_umtxq; 3273 flags = fuword32(&sem->_flags); 3274 if (timeout != NULL) 3275 abs_timeout_init2(&timo, timeout); 3276 3277 again: 3278 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3279 if (error != 0) 3280 return (error); 3281 umtxq_lock(&uq->uq_key); 3282 umtxq_busy(&uq->uq_key); 3283 umtxq_insert(uq); 3284 umtxq_unlock(&uq->uq_key); 3285 rv = fueword32(&sem->_count, &count); 3286 if (rv == -1) { 3287 umtxq_lock(&uq->uq_key); 3288 umtxq_unbusy(&uq->uq_key); 3289 umtxq_remove(uq); 3290 umtxq_unlock(&uq->uq_key); 3291 umtx_key_release(&uq->uq_key); 3292 return (EFAULT); 3293 } 3294 for (;;) { 3295 if (USEM_COUNT(count) != 0) { 3296 umtxq_lock(&uq->uq_key); 3297 umtxq_unbusy(&uq->uq_key); 3298 umtxq_remove(uq); 3299 umtxq_unlock(&uq->uq_key); 3300 umtx_key_release(&uq->uq_key); 3301 return (0); 3302 } 3303 if (count == USEM_HAS_WAITERS) 3304 break; 3305 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3306 if (rv == 0) 3307 break; 3308 umtxq_lock(&uq->uq_key); 3309 umtxq_unbusy(&uq->uq_key); 3310 umtxq_remove(uq); 3311 umtxq_unlock(&uq->uq_key); 3312 umtx_key_release(&uq->uq_key); 3313 if (rv == -1) 3314 return (EFAULT); 3315 rv = thread_check_susp(td, true); 3316 if (rv != 0) 3317 return (rv); 3318 goto again; 3319 } 3320 umtxq_lock(&uq->uq_key); 3321 umtxq_unbusy(&uq->uq_key); 3322 3323 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3324 3325 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3326 error = 0; 3327 else { 3328 umtxq_remove(uq); 3329 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3330 /* A relative timeout cannot be restarted. */ 3331 if (error == ERESTART) 3332 error = EINTR; 3333 if (error == EINTR) { 3334 abs_timeout_update(&timo); 3335 timespecsub(&timo.end, &timo.cur, 3336 &timeout->_timeout); 3337 } 3338 } 3339 } 3340 umtxq_unlock(&uq->uq_key); 3341 umtx_key_release(&uq->uq_key); 3342 return (error); 3343 } 3344 3345 /* 3346 * Signal a userland semaphore. 3347 */ 3348 static int 3349 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3350 { 3351 struct umtx_key key; 3352 int error, cnt, rv; 3353 uint32_t count, flags; 3354 3355 rv = fueword32(&sem->_flags, &flags); 3356 if (rv == -1) 3357 return (EFAULT); 3358 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3359 return (error); 3360 umtxq_lock(&key); 3361 umtxq_busy(&key); 3362 cnt = umtxq_count(&key); 3363 if (cnt > 0) { 3364 /* 3365 * If this was the last sleeping thread, clear the waiters 3366 * flag in _count. 3367 */ 3368 if (cnt == 1) { 3369 umtxq_unlock(&key); 3370 rv = fueword32(&sem->_count, &count); 3371 while (rv != -1 && count & USEM_HAS_WAITERS) { 3372 rv = casueword32(&sem->_count, count, &count, 3373 count & ~USEM_HAS_WAITERS); 3374 if (rv == 1) { 3375 rv = thread_check_susp(td, true); 3376 if (rv != 0) 3377 break; 3378 } 3379 } 3380 if (rv == -1) 3381 error = EFAULT; 3382 else if (rv > 0) { 3383 error = rv; 3384 } 3385 umtxq_lock(&key); 3386 } 3387 3388 umtxq_signal(&key, 1); 3389 } 3390 umtxq_unbusy(&key); 3391 umtxq_unlock(&key); 3392 umtx_key_release(&key); 3393 return (error); 3394 } 3395 3396 inline int 3397 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3398 { 3399 int error; 3400 3401 error = copyin(addr, tsp, sizeof(struct timespec)); 3402 if (error == 0) { 3403 if (tsp->tv_sec < 0 || 3404 tsp->tv_nsec >= 1000000000 || 3405 tsp->tv_nsec < 0) 3406 error = EINVAL; 3407 } 3408 return (error); 3409 } 3410 3411 static inline int 3412 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3413 { 3414 int error; 3415 3416 if (size <= sizeof(struct timespec)) { 3417 tp->_clockid = CLOCK_REALTIME; 3418 tp->_flags = 0; 3419 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3420 } else 3421 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3422 if (error != 0) 3423 return (error); 3424 if (tp->_timeout.tv_sec < 0 || 3425 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3426 return (EINVAL); 3427 return (0); 3428 } 3429 3430 static int 3431 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3432 { 3433 3434 return (EOPNOTSUPP); 3435 } 3436 3437 static int 3438 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3439 { 3440 struct _umtx_time timeout, *tm_p; 3441 int error; 3442 3443 if (uap->uaddr2 == NULL) 3444 tm_p = NULL; 3445 else { 3446 error = umtx_copyin_umtx_time( 3447 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3448 if (error != 0) 3449 return (error); 3450 tm_p = &timeout; 3451 } 3452 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3453 } 3454 3455 static int 3456 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3457 { 3458 struct _umtx_time timeout, *tm_p; 3459 int error; 3460 3461 if (uap->uaddr2 == NULL) 3462 tm_p = NULL; 3463 else { 3464 error = umtx_copyin_umtx_time( 3465 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3466 if (error != 0) 3467 return (error); 3468 tm_p = &timeout; 3469 } 3470 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3471 } 3472 3473 static int 3474 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3475 { 3476 struct _umtx_time *tm_p, timeout; 3477 int error; 3478 3479 if (uap->uaddr2 == NULL) 3480 tm_p = NULL; 3481 else { 3482 error = umtx_copyin_umtx_time( 3483 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3484 if (error != 0) 3485 return (error); 3486 tm_p = &timeout; 3487 } 3488 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3489 } 3490 3491 static int 3492 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3493 { 3494 3495 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3496 } 3497 3498 #define BATCH_SIZE 128 3499 static int 3500 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3501 { 3502 char *uaddrs[BATCH_SIZE], **upp; 3503 int count, error, i, pos, tocopy; 3504 3505 upp = (char **)uap->obj; 3506 error = 0; 3507 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3508 pos += tocopy) { 3509 tocopy = MIN(count, BATCH_SIZE); 3510 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3511 if (error != 0) 3512 break; 3513 for (i = 0; i < tocopy; ++i) 3514 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3515 maybe_yield(); 3516 } 3517 return (error); 3518 } 3519 3520 static int 3521 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3522 { 3523 3524 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3525 } 3526 3527 static int 3528 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3529 { 3530 struct _umtx_time *tm_p, timeout; 3531 int error; 3532 3533 /* Allow a null timespec (wait forever). */ 3534 if (uap->uaddr2 == NULL) 3535 tm_p = NULL; 3536 else { 3537 error = umtx_copyin_umtx_time( 3538 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3539 if (error != 0) 3540 return (error); 3541 tm_p = &timeout; 3542 } 3543 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3544 } 3545 3546 static int 3547 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3548 { 3549 3550 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3551 } 3552 3553 static int 3554 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3555 { 3556 struct _umtx_time *tm_p, timeout; 3557 int error; 3558 3559 /* Allow a null timespec (wait forever). */ 3560 if (uap->uaddr2 == NULL) 3561 tm_p = NULL; 3562 else { 3563 error = umtx_copyin_umtx_time( 3564 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3565 if (error != 0) 3566 return (error); 3567 tm_p = &timeout; 3568 } 3569 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3570 } 3571 3572 static int 3573 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3574 { 3575 3576 return (do_wake_umutex(td, uap->obj)); 3577 } 3578 3579 static int 3580 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3581 { 3582 3583 return (do_unlock_umutex(td, uap->obj, false)); 3584 } 3585 3586 static int 3587 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3588 { 3589 3590 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3591 } 3592 3593 static int 3594 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3595 { 3596 struct timespec *ts, timeout; 3597 int error; 3598 3599 /* Allow a null timespec (wait forever). */ 3600 if (uap->uaddr2 == NULL) 3601 ts = NULL; 3602 else { 3603 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3604 if (error != 0) 3605 return (error); 3606 ts = &timeout; 3607 } 3608 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3609 } 3610 3611 static int 3612 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3613 { 3614 3615 return (do_cv_signal(td, uap->obj)); 3616 } 3617 3618 static int 3619 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3620 { 3621 3622 return (do_cv_broadcast(td, uap->obj)); 3623 } 3624 3625 static int 3626 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3627 { 3628 struct _umtx_time timeout; 3629 int error; 3630 3631 /* Allow a null timespec (wait forever). */ 3632 if (uap->uaddr2 == NULL) { 3633 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3634 } else { 3635 error = umtx_copyin_umtx_time(uap->uaddr2, 3636 (size_t)uap->uaddr1, &timeout); 3637 if (error != 0) 3638 return (error); 3639 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3640 } 3641 return (error); 3642 } 3643 3644 static int 3645 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3646 { 3647 struct _umtx_time timeout; 3648 int error; 3649 3650 /* Allow a null timespec (wait forever). */ 3651 if (uap->uaddr2 == NULL) { 3652 error = do_rw_wrlock(td, uap->obj, 0); 3653 } else { 3654 error = umtx_copyin_umtx_time(uap->uaddr2, 3655 (size_t)uap->uaddr1, &timeout); 3656 if (error != 0) 3657 return (error); 3658 3659 error = do_rw_wrlock(td, uap->obj, &timeout); 3660 } 3661 return (error); 3662 } 3663 3664 static int 3665 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3666 { 3667 3668 return (do_rw_unlock(td, uap->obj)); 3669 } 3670 3671 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3672 static int 3673 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3674 { 3675 struct _umtx_time *tm_p, timeout; 3676 int error; 3677 3678 /* Allow a null timespec (wait forever). */ 3679 if (uap->uaddr2 == NULL) 3680 tm_p = NULL; 3681 else { 3682 error = umtx_copyin_umtx_time( 3683 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3684 if (error != 0) 3685 return (error); 3686 tm_p = &timeout; 3687 } 3688 return (do_sem_wait(td, uap->obj, tm_p)); 3689 } 3690 3691 static int 3692 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3693 { 3694 3695 return (do_sem_wake(td, uap->obj)); 3696 } 3697 #endif 3698 3699 static int 3700 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3701 { 3702 3703 return (do_wake2_umutex(td, uap->obj, uap->val)); 3704 } 3705 3706 static int 3707 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3708 { 3709 struct _umtx_time *tm_p, timeout; 3710 size_t uasize; 3711 int error; 3712 3713 /* Allow a null timespec (wait forever). */ 3714 if (uap->uaddr2 == NULL) { 3715 uasize = 0; 3716 tm_p = NULL; 3717 } else { 3718 uasize = (size_t)uap->uaddr1; 3719 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3720 if (error != 0) 3721 return (error); 3722 tm_p = &timeout; 3723 } 3724 error = do_sem2_wait(td, uap->obj, tm_p); 3725 if (error == EINTR && uap->uaddr2 != NULL && 3726 (timeout._flags & UMTX_ABSTIME) == 0 && 3727 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3728 error = copyout(&timeout._timeout, 3729 (struct _umtx_time *)uap->uaddr2 + 1, 3730 sizeof(struct timespec)); 3731 if (error == 0) { 3732 error = EINTR; 3733 } 3734 } 3735 3736 return (error); 3737 } 3738 3739 static int 3740 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3741 { 3742 3743 return (do_sem2_wake(td, uap->obj)); 3744 } 3745 3746 #define USHM_OBJ_UMTX(o) \ 3747 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3748 3749 #define USHMF_REG_LINKED 0x0001 3750 #define USHMF_OBJ_LINKED 0x0002 3751 struct umtx_shm_reg { 3752 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3753 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3754 struct umtx_key ushm_key; 3755 struct ucred *ushm_cred; 3756 struct shmfd *ushm_obj; 3757 u_int ushm_refcnt; 3758 u_int ushm_flags; 3759 }; 3760 3761 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3762 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3763 3764 static uma_zone_t umtx_shm_reg_zone; 3765 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3766 static struct mtx umtx_shm_lock; 3767 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3768 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3769 3770 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3771 3772 static void 3773 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3774 { 3775 struct umtx_shm_reg_head d; 3776 struct umtx_shm_reg *reg, *reg1; 3777 3778 TAILQ_INIT(&d); 3779 mtx_lock(&umtx_shm_lock); 3780 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3781 mtx_unlock(&umtx_shm_lock); 3782 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3783 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3784 umtx_shm_free_reg(reg); 3785 } 3786 } 3787 3788 static struct task umtx_shm_reg_delfree_task = 3789 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3790 3791 static struct umtx_shm_reg * 3792 umtx_shm_find_reg_locked(const struct umtx_key *key) 3793 { 3794 struct umtx_shm_reg *reg; 3795 struct umtx_shm_reg_head *reg_head; 3796 3797 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3798 mtx_assert(&umtx_shm_lock, MA_OWNED); 3799 reg_head = &umtx_shm_registry[key->hash]; 3800 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3801 KASSERT(reg->ushm_key.shared, 3802 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3803 if (reg->ushm_key.info.shared.object == 3804 key->info.shared.object && 3805 reg->ushm_key.info.shared.offset == 3806 key->info.shared.offset) { 3807 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3808 KASSERT(reg->ushm_refcnt > 0, 3809 ("reg %p refcnt 0 onlist", reg)); 3810 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3811 ("reg %p not linked", reg)); 3812 reg->ushm_refcnt++; 3813 return (reg); 3814 } 3815 } 3816 return (NULL); 3817 } 3818 3819 static struct umtx_shm_reg * 3820 umtx_shm_find_reg(const struct umtx_key *key) 3821 { 3822 struct umtx_shm_reg *reg; 3823 3824 mtx_lock(&umtx_shm_lock); 3825 reg = umtx_shm_find_reg_locked(key); 3826 mtx_unlock(&umtx_shm_lock); 3827 return (reg); 3828 } 3829 3830 static void 3831 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3832 { 3833 3834 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3835 crfree(reg->ushm_cred); 3836 shm_drop(reg->ushm_obj); 3837 uma_zfree(umtx_shm_reg_zone, reg); 3838 } 3839 3840 static bool 3841 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3842 { 3843 bool res; 3844 3845 mtx_assert(&umtx_shm_lock, MA_OWNED); 3846 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3847 reg->ushm_refcnt--; 3848 res = reg->ushm_refcnt == 0; 3849 if (res || force) { 3850 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3851 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3852 reg, ushm_reg_link); 3853 reg->ushm_flags &= ~USHMF_REG_LINKED; 3854 } 3855 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3856 LIST_REMOVE(reg, ushm_obj_link); 3857 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3858 } 3859 } 3860 return (res); 3861 } 3862 3863 static void 3864 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3865 { 3866 vm_object_t object; 3867 bool dofree; 3868 3869 if (force) { 3870 object = reg->ushm_obj->shm_object; 3871 VM_OBJECT_WLOCK(object); 3872 object->flags |= OBJ_UMTXDEAD; 3873 VM_OBJECT_WUNLOCK(object); 3874 } 3875 mtx_lock(&umtx_shm_lock); 3876 dofree = umtx_shm_unref_reg_locked(reg, force); 3877 mtx_unlock(&umtx_shm_lock); 3878 if (dofree) 3879 umtx_shm_free_reg(reg); 3880 } 3881 3882 void 3883 umtx_shm_object_init(vm_object_t object) 3884 { 3885 3886 LIST_INIT(USHM_OBJ_UMTX(object)); 3887 } 3888 3889 void 3890 umtx_shm_object_terminated(vm_object_t object) 3891 { 3892 struct umtx_shm_reg *reg, *reg1; 3893 bool dofree; 3894 3895 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3896 return; 3897 3898 dofree = false; 3899 mtx_lock(&umtx_shm_lock); 3900 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3901 if (umtx_shm_unref_reg_locked(reg, true)) { 3902 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3903 ushm_reg_link); 3904 dofree = true; 3905 } 3906 } 3907 mtx_unlock(&umtx_shm_lock); 3908 if (dofree) 3909 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3910 } 3911 3912 static int 3913 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3914 struct umtx_shm_reg **res) 3915 { 3916 struct umtx_shm_reg *reg, *reg1; 3917 struct ucred *cred; 3918 int error; 3919 3920 reg = umtx_shm_find_reg(key); 3921 if (reg != NULL) { 3922 *res = reg; 3923 return (0); 3924 } 3925 cred = td->td_ucred; 3926 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3927 return (ENOMEM); 3928 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3929 reg->ushm_refcnt = 1; 3930 bcopy(key, ®->ushm_key, sizeof(*key)); 3931 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3932 reg->ushm_cred = crhold(cred); 3933 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3934 if (error != 0) { 3935 umtx_shm_free_reg(reg); 3936 return (error); 3937 } 3938 mtx_lock(&umtx_shm_lock); 3939 reg1 = umtx_shm_find_reg_locked(key); 3940 if (reg1 != NULL) { 3941 mtx_unlock(&umtx_shm_lock); 3942 umtx_shm_free_reg(reg); 3943 *res = reg1; 3944 return (0); 3945 } 3946 reg->ushm_refcnt++; 3947 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3948 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3949 ushm_obj_link); 3950 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3951 mtx_unlock(&umtx_shm_lock); 3952 *res = reg; 3953 return (0); 3954 } 3955 3956 static int 3957 umtx_shm_alive(struct thread *td, void *addr) 3958 { 3959 vm_map_t map; 3960 vm_map_entry_t entry; 3961 vm_object_t object; 3962 vm_pindex_t pindex; 3963 vm_prot_t prot; 3964 int res, ret; 3965 boolean_t wired; 3966 3967 map = &td->td_proc->p_vmspace->vm_map; 3968 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3969 &object, &pindex, &prot, &wired); 3970 if (res != KERN_SUCCESS) 3971 return (EFAULT); 3972 if (object == NULL) 3973 ret = EINVAL; 3974 else 3975 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3976 vm_map_lookup_done(map, entry); 3977 return (ret); 3978 } 3979 3980 static void 3981 umtx_shm_init(void) 3982 { 3983 int i; 3984 3985 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3986 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3987 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3988 for (i = 0; i < nitems(umtx_shm_registry); i++) 3989 TAILQ_INIT(&umtx_shm_registry[i]); 3990 } 3991 3992 static int 3993 umtx_shm(struct thread *td, void *addr, u_int flags) 3994 { 3995 struct umtx_key key; 3996 struct umtx_shm_reg *reg; 3997 struct file *fp; 3998 int error, fd; 3999 4000 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4001 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4002 return (EINVAL); 4003 if ((flags & UMTX_SHM_ALIVE) != 0) 4004 return (umtx_shm_alive(td, addr)); 4005 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4006 if (error != 0) 4007 return (error); 4008 KASSERT(key.shared == 1, ("non-shared key")); 4009 if ((flags & UMTX_SHM_CREAT) != 0) { 4010 error = umtx_shm_create_reg(td, &key, ®); 4011 } else { 4012 reg = umtx_shm_find_reg(&key); 4013 if (reg == NULL) 4014 error = ESRCH; 4015 } 4016 umtx_key_release(&key); 4017 if (error != 0) 4018 return (error); 4019 KASSERT(reg != NULL, ("no reg")); 4020 if ((flags & UMTX_SHM_DESTROY) != 0) { 4021 umtx_shm_unref_reg(reg, true); 4022 } else { 4023 #if 0 4024 #ifdef MAC 4025 error = mac_posixshm_check_open(td->td_ucred, 4026 reg->ushm_obj, FFLAGS(O_RDWR)); 4027 if (error == 0) 4028 #endif 4029 error = shm_access(reg->ushm_obj, td->td_ucred, 4030 FFLAGS(O_RDWR)); 4031 if (error == 0) 4032 #endif 4033 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4034 if (error == 0) { 4035 shm_hold(reg->ushm_obj); 4036 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4037 &shm_ops); 4038 td->td_retval[0] = fd; 4039 fdrop(fp, td); 4040 } 4041 } 4042 umtx_shm_unref_reg(reg, false); 4043 return (error); 4044 } 4045 4046 static int 4047 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 4048 { 4049 4050 return (umtx_shm(td, uap->uaddr1, uap->val)); 4051 } 4052 4053 static int 4054 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 4055 { 4056 4057 td->td_rb_list = rbp->robust_list_offset; 4058 td->td_rbp_list = rbp->robust_priv_list_offset; 4059 td->td_rb_inact = rbp->robust_inact_offset; 4060 return (0); 4061 } 4062 4063 static int 4064 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 4065 { 4066 struct umtx_robust_lists_params rb; 4067 int error; 4068 4069 if (uap->val > sizeof(rb)) 4070 return (EINVAL); 4071 bzero(&rb, sizeof(rb)); 4072 error = copyin(uap->uaddr1, &rb, uap->val); 4073 if (error != 0) 4074 return (error); 4075 return (umtx_robust_lists(td, &rb)); 4076 } 4077 4078 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 4079 4080 static const _umtx_op_func op_table[] = { 4081 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4082 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4083 [UMTX_OP_WAIT] = __umtx_op_wait, 4084 [UMTX_OP_WAKE] = __umtx_op_wake, 4085 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4086 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4087 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4088 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4089 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4090 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4091 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4092 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4093 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4094 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4095 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4096 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4097 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4098 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4099 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4100 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4101 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4102 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4103 #else 4104 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4105 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4106 #endif 4107 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4108 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4109 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4110 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4111 [UMTX_OP_SHM] = __umtx_op_shm, 4112 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4113 }; 4114 4115 int 4116 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4117 { 4118 4119 if ((unsigned)uap->op < nitems(op_table)) 4120 return (*op_table[uap->op])(td, uap); 4121 return (EINVAL); 4122 } 4123 4124 #ifdef COMPAT_FREEBSD32 4125 4126 struct timespec32 { 4127 int32_t tv_sec; 4128 int32_t tv_nsec; 4129 }; 4130 4131 struct umtx_time32 { 4132 struct timespec32 timeout; 4133 uint32_t flags; 4134 uint32_t clockid; 4135 }; 4136 4137 static inline int 4138 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4139 { 4140 struct timespec32 ts32; 4141 int error; 4142 4143 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4144 if (error == 0) { 4145 if (ts32.tv_sec < 0 || 4146 ts32.tv_nsec >= 1000000000 || 4147 ts32.tv_nsec < 0) 4148 error = EINVAL; 4149 else { 4150 tsp->tv_sec = ts32.tv_sec; 4151 tsp->tv_nsec = ts32.tv_nsec; 4152 } 4153 } 4154 return (error); 4155 } 4156 4157 static inline int 4158 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4159 { 4160 struct umtx_time32 t32; 4161 int error; 4162 4163 t32.clockid = CLOCK_REALTIME; 4164 t32.flags = 0; 4165 if (size <= sizeof(struct timespec32)) 4166 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4167 else 4168 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4169 if (error != 0) 4170 return (error); 4171 if (t32.timeout.tv_sec < 0 || 4172 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4173 return (EINVAL); 4174 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4175 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4176 tp->_flags = t32.flags; 4177 tp->_clockid = t32.clockid; 4178 return (0); 4179 } 4180 4181 static int 4182 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4183 { 4184 struct _umtx_time *tm_p, timeout; 4185 int error; 4186 4187 if (uap->uaddr2 == NULL) 4188 tm_p = NULL; 4189 else { 4190 error = umtx_copyin_umtx_time32(uap->uaddr2, 4191 (size_t)uap->uaddr1, &timeout); 4192 if (error != 0) 4193 return (error); 4194 tm_p = &timeout; 4195 } 4196 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4197 } 4198 4199 static int 4200 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4201 { 4202 struct _umtx_time *tm_p, timeout; 4203 int error; 4204 4205 /* Allow a null timespec (wait forever). */ 4206 if (uap->uaddr2 == NULL) 4207 tm_p = NULL; 4208 else { 4209 error = umtx_copyin_umtx_time32(uap->uaddr2, 4210 (size_t)uap->uaddr1, &timeout); 4211 if (error != 0) 4212 return (error); 4213 tm_p = &timeout; 4214 } 4215 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4216 } 4217 4218 static int 4219 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4220 { 4221 struct _umtx_time *tm_p, timeout; 4222 int error; 4223 4224 /* Allow a null timespec (wait forever). */ 4225 if (uap->uaddr2 == NULL) 4226 tm_p = NULL; 4227 else { 4228 error = umtx_copyin_umtx_time32(uap->uaddr2, 4229 (size_t)uap->uaddr1, &timeout); 4230 if (error != 0) 4231 return (error); 4232 tm_p = &timeout; 4233 } 4234 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4235 } 4236 4237 static int 4238 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4239 { 4240 struct timespec *ts, timeout; 4241 int error; 4242 4243 /* Allow a null timespec (wait forever). */ 4244 if (uap->uaddr2 == NULL) 4245 ts = NULL; 4246 else { 4247 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4248 if (error != 0) 4249 return (error); 4250 ts = &timeout; 4251 } 4252 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4253 } 4254 4255 static int 4256 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4257 { 4258 struct _umtx_time timeout; 4259 int error; 4260 4261 /* Allow a null timespec (wait forever). */ 4262 if (uap->uaddr2 == NULL) { 4263 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4264 } else { 4265 error = umtx_copyin_umtx_time32(uap->uaddr2, 4266 (size_t)uap->uaddr1, &timeout); 4267 if (error != 0) 4268 return (error); 4269 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4270 } 4271 return (error); 4272 } 4273 4274 static int 4275 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4276 { 4277 struct _umtx_time timeout; 4278 int error; 4279 4280 /* Allow a null timespec (wait forever). */ 4281 if (uap->uaddr2 == NULL) { 4282 error = do_rw_wrlock(td, uap->obj, 0); 4283 } else { 4284 error = umtx_copyin_umtx_time32(uap->uaddr2, 4285 (size_t)uap->uaddr1, &timeout); 4286 if (error != 0) 4287 return (error); 4288 error = do_rw_wrlock(td, uap->obj, &timeout); 4289 } 4290 return (error); 4291 } 4292 4293 static int 4294 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4295 { 4296 struct _umtx_time *tm_p, timeout; 4297 int error; 4298 4299 if (uap->uaddr2 == NULL) 4300 tm_p = NULL; 4301 else { 4302 error = umtx_copyin_umtx_time32( 4303 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4304 if (error != 0) 4305 return (error); 4306 tm_p = &timeout; 4307 } 4308 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4309 } 4310 4311 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4312 static int 4313 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4314 { 4315 struct _umtx_time *tm_p, timeout; 4316 int error; 4317 4318 /* Allow a null timespec (wait forever). */ 4319 if (uap->uaddr2 == NULL) 4320 tm_p = NULL; 4321 else { 4322 error = umtx_copyin_umtx_time32(uap->uaddr2, 4323 (size_t)uap->uaddr1, &timeout); 4324 if (error != 0) 4325 return (error); 4326 tm_p = &timeout; 4327 } 4328 return (do_sem_wait(td, uap->obj, tm_p)); 4329 } 4330 #endif 4331 4332 static int 4333 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4334 { 4335 struct _umtx_time *tm_p, timeout; 4336 size_t uasize; 4337 int error; 4338 4339 /* Allow a null timespec (wait forever). */ 4340 if (uap->uaddr2 == NULL) { 4341 uasize = 0; 4342 tm_p = NULL; 4343 } else { 4344 uasize = (size_t)uap->uaddr1; 4345 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4346 if (error != 0) 4347 return (error); 4348 tm_p = &timeout; 4349 } 4350 error = do_sem2_wait(td, uap->obj, tm_p); 4351 if (error == EINTR && uap->uaddr2 != NULL && 4352 (timeout._flags & UMTX_ABSTIME) == 0 && 4353 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4354 struct timespec32 remain32 = { 4355 .tv_sec = timeout._timeout.tv_sec, 4356 .tv_nsec = timeout._timeout.tv_nsec 4357 }; 4358 error = copyout(&remain32, 4359 (struct umtx_time32 *)uap->uaddr2 + 1, 4360 sizeof(struct timespec32)); 4361 if (error == 0) { 4362 error = EINTR; 4363 } 4364 } 4365 4366 return (error); 4367 } 4368 4369 static int 4370 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4371 { 4372 uint32_t uaddrs[BATCH_SIZE], **upp; 4373 int count, error, i, pos, tocopy; 4374 4375 upp = (uint32_t **)uap->obj; 4376 error = 0; 4377 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4378 pos += tocopy) { 4379 tocopy = MIN(count, BATCH_SIZE); 4380 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4381 if (error != 0) 4382 break; 4383 for (i = 0; i < tocopy; ++i) 4384 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4385 INT_MAX, 1); 4386 maybe_yield(); 4387 } 4388 return (error); 4389 } 4390 4391 struct umtx_robust_lists_params_compat32 { 4392 uint32_t robust_list_offset; 4393 uint32_t robust_priv_list_offset; 4394 uint32_t robust_inact_offset; 4395 }; 4396 4397 static int 4398 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4399 { 4400 struct umtx_robust_lists_params rb; 4401 struct umtx_robust_lists_params_compat32 rb32; 4402 int error; 4403 4404 if (uap->val > sizeof(rb32)) 4405 return (EINVAL); 4406 bzero(&rb, sizeof(rb)); 4407 bzero(&rb32, sizeof(rb32)); 4408 error = copyin(uap->uaddr1, &rb32, uap->val); 4409 if (error != 0) 4410 return (error); 4411 rb.robust_list_offset = rb32.robust_list_offset; 4412 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4413 rb.robust_inact_offset = rb32.robust_inact_offset; 4414 return (umtx_robust_lists(td, &rb)); 4415 } 4416 4417 static const _umtx_op_func op_table_compat32[] = { 4418 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4419 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4420 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4421 [UMTX_OP_WAKE] = __umtx_op_wake, 4422 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4423 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4424 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4425 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4426 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4427 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4428 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4429 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4430 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4431 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4432 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4433 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4434 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4435 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4436 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4437 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4438 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4439 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4440 #else 4441 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4442 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4443 #endif 4444 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4445 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4446 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4447 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4448 [UMTX_OP_SHM] = __umtx_op_shm, 4449 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4450 }; 4451 4452 int 4453 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4454 { 4455 4456 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4457 return (*op_table_compat32[uap->op])(td, 4458 (struct _umtx_op_args *)uap); 4459 } 4460 return (EINVAL); 4461 } 4462 #endif 4463 4464 void 4465 umtx_thread_init(struct thread *td) 4466 { 4467 4468 td->td_umtxq = umtxq_alloc(); 4469 td->td_umtxq->uq_thread = td; 4470 } 4471 4472 void 4473 umtx_thread_fini(struct thread *td) 4474 { 4475 4476 umtxq_free(td->td_umtxq); 4477 } 4478 4479 /* 4480 * It will be called when new thread is created, e.g fork(). 4481 */ 4482 void 4483 umtx_thread_alloc(struct thread *td) 4484 { 4485 struct umtx_q *uq; 4486 4487 uq = td->td_umtxq; 4488 uq->uq_inherited_pri = PRI_MAX; 4489 4490 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4491 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4492 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4493 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4494 } 4495 4496 /* 4497 * exec() hook. 4498 * 4499 * Clear robust lists for all process' threads, not delaying the 4500 * cleanup to thread_exit hook, since the relevant address space is 4501 * destroyed right now. 4502 */ 4503 static void 4504 umtx_exec_hook(void *arg __unused, struct proc *p, 4505 struct image_params *imgp __unused) 4506 { 4507 struct thread *td; 4508 4509 KASSERT(p == curproc, ("need curproc")); 4510 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4511 (p->p_flag & P_STOPPED_SINGLE) != 0, 4512 ("curproc must be single-threaded")); 4513 /* 4514 * There is no need to lock the list as only this thread can be 4515 * running. 4516 */ 4517 FOREACH_THREAD_IN_PROC(p, td) { 4518 KASSERT(td == curthread || 4519 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4520 ("running thread %p %p", p, td)); 4521 umtx_thread_cleanup(td); 4522 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4523 } 4524 } 4525 4526 /* 4527 * thread_exit() hook. 4528 */ 4529 void 4530 umtx_thread_exit(struct thread *td) 4531 { 4532 4533 umtx_thread_cleanup(td); 4534 } 4535 4536 static int 4537 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4538 { 4539 u_long res1; 4540 #ifdef COMPAT_FREEBSD32 4541 uint32_t res32; 4542 #endif 4543 int error; 4544 4545 #ifdef COMPAT_FREEBSD32 4546 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4547 error = fueword32((void *)ptr, &res32); 4548 if (error == 0) 4549 res1 = res32; 4550 } else 4551 #endif 4552 { 4553 error = fueword((void *)ptr, &res1); 4554 } 4555 if (error == 0) 4556 *res = res1; 4557 else 4558 error = EFAULT; 4559 return (error); 4560 } 4561 4562 static void 4563 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4564 { 4565 #ifdef COMPAT_FREEBSD32 4566 struct umutex32 m32; 4567 4568 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4569 memcpy(&m32, m, sizeof(m32)); 4570 *rb_list = m32.m_rb_lnk; 4571 } else 4572 #endif 4573 *rb_list = m->m_rb_lnk; 4574 } 4575 4576 static int 4577 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4578 { 4579 struct umutex m; 4580 int error; 4581 4582 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4583 error = copyin((void *)rbp, &m, sizeof(m)); 4584 if (error != 0) 4585 return (error); 4586 if (rb_list != NULL) 4587 umtx_read_rb_list(td, &m, rb_list); 4588 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4589 return (EINVAL); 4590 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4591 /* inact is cleared after unlock, allow the inconsistency */ 4592 return (inact ? 0 : EINVAL); 4593 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4594 } 4595 4596 static void 4597 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4598 const char *name) 4599 { 4600 int error, i; 4601 uintptr_t rbp; 4602 bool inact; 4603 4604 if (rb_list == 0) 4605 return; 4606 error = umtx_read_uptr(td, rb_list, &rbp); 4607 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4608 if (rbp == *rb_inact) { 4609 inact = true; 4610 *rb_inact = 0; 4611 } else 4612 inact = false; 4613 error = umtx_handle_rb(td, rbp, &rbp, inact); 4614 } 4615 if (i == umtx_max_rb && umtx_verbose_rb) { 4616 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4617 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4618 } 4619 if (error != 0 && umtx_verbose_rb) { 4620 uprintf("comm %s pid %d: handling %srb error %d\n", 4621 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4622 } 4623 } 4624 4625 /* 4626 * Clean up umtx data. 4627 */ 4628 static void 4629 umtx_thread_cleanup(struct thread *td) 4630 { 4631 struct umtx_q *uq; 4632 struct umtx_pi *pi; 4633 uintptr_t rb_inact; 4634 4635 /* 4636 * Disown pi mutexes. 4637 */ 4638 uq = td->td_umtxq; 4639 if (uq != NULL) { 4640 if (uq->uq_inherited_pri != PRI_MAX || 4641 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4642 mtx_lock(&umtx_lock); 4643 uq->uq_inherited_pri = PRI_MAX; 4644 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4645 pi->pi_owner = NULL; 4646 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4647 } 4648 mtx_unlock(&umtx_lock); 4649 } 4650 sched_lend_user_prio_cond(td, PRI_MAX); 4651 } 4652 4653 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4654 return; 4655 4656 /* 4657 * Handle terminated robust mutexes. Must be done after 4658 * robust pi disown, otherwise unlock could see unowned 4659 * entries. 4660 */ 4661 rb_inact = td->td_rb_inact; 4662 if (rb_inact != 0) 4663 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4664 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4665 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4666 if (rb_inact != 0) 4667 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4668 } 4669