1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32.h> 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 /* Priority inheritance mutex info. */ 92 struct umtx_pi { 93 /* Owner thread */ 94 struct thread *pi_owner; 95 96 /* Reference count */ 97 int pi_refcount; 98 99 /* List entry to link umtx holding by thread */ 100 TAILQ_ENTRY(umtx_pi) pi_link; 101 102 /* List entry in hash */ 103 TAILQ_ENTRY(umtx_pi) pi_hashlink; 104 105 /* List for waiters */ 106 TAILQ_HEAD(,umtx_q) pi_blocked; 107 108 /* Identify a userland lock object */ 109 struct umtx_key pi_key; 110 }; 111 112 /* A userland synchronous object user. */ 113 struct umtx_q { 114 /* Linked list for the hash. */ 115 TAILQ_ENTRY(umtx_q) uq_link; 116 117 /* Umtx key. */ 118 struct umtx_key uq_key; 119 120 /* Umtx flags. */ 121 int uq_flags; 122 #define UQF_UMTXQ 0x0001 123 124 /* The thread waits on. */ 125 struct thread *uq_thread; 126 127 /* 128 * Blocked on PI mutex. read can use chain lock 129 * or umtx_lock, write must have both chain lock and 130 * umtx_lock being hold. 131 */ 132 struct umtx_pi *uq_pi_blocked; 133 134 /* On blocked list */ 135 TAILQ_ENTRY(umtx_q) uq_lockq; 136 137 /* Thread contending with us */ 138 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 139 140 /* Inherited priority from PP mutex */ 141 u_char uq_inherited_pri; 142 143 /* Spare queue ready to be reused */ 144 struct umtxq_queue *uq_spare_queue; 145 146 /* The queue we on */ 147 struct umtxq_queue *uq_cur_queue; 148 }; 149 150 TAILQ_HEAD(umtxq_head, umtx_q); 151 152 /* Per-key wait-queue */ 153 struct umtxq_queue { 154 struct umtxq_head head; 155 struct umtx_key key; 156 LIST_ENTRY(umtxq_queue) link; 157 int length; 158 }; 159 160 LIST_HEAD(umtxq_list, umtxq_queue); 161 162 /* Userland lock object's wait-queue chain */ 163 struct umtxq_chain { 164 /* Lock for this chain. */ 165 struct mtx uc_lock; 166 167 /* List of sleep queues. */ 168 struct umtxq_list uc_queue[2]; 169 #define UMTX_SHARED_QUEUE 0 170 #define UMTX_EXCLUSIVE_QUEUE 1 171 172 LIST_HEAD(, umtxq_queue) uc_spare_queue; 173 174 /* Busy flag */ 175 char uc_busy; 176 177 /* Chain lock waiters */ 178 int uc_waiters; 179 180 /* All PI in the list */ 181 TAILQ_HEAD(,umtx_pi) uc_pi_list; 182 183 #ifdef UMTX_PROFILING 184 u_int length; 185 u_int max_length; 186 #endif 187 }; 188 189 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 190 191 /* 192 * Don't propagate time-sharing priority, there is a security reason, 193 * a user can simply introduce PI-mutex, let thread A lock the mutex, 194 * and let another thread B block on the mutex, because B is 195 * sleeping, its priority will be boosted, this causes A's priority to 196 * be boosted via priority propagating too and will never be lowered even 197 * if it is using 100%CPU, this is unfair to other processes. 198 */ 199 200 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 201 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 202 PRI_MAX_TIMESHARE : (td)->td_user_pri) 203 204 #define GOLDEN_RATIO_PRIME 2654404609U 205 #ifndef UMTX_CHAINS 206 #define UMTX_CHAINS 512 207 #endif 208 #define UMTX_SHIFTS (__WORD_BIT - 9) 209 210 #define GET_SHARE(flags) \ 211 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 212 213 #define BUSY_SPINS 200 214 215 struct abs_timeout { 216 int clockid; 217 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 218 struct timespec cur; 219 struct timespec end; 220 }; 221 222 #ifdef COMPAT_FREEBSD32 223 struct umutex32 { 224 volatile __lwpid_t m_owner; /* Owner of the mutex */ 225 __uint32_t m_flags; /* Flags of the mutex */ 226 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 227 __uint32_t m_rb_lnk; /* Robust linkage */ 228 __uint32_t m_pad; 229 __uint32_t m_spare[2]; 230 }; 231 232 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 233 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 234 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 235 #endif 236 237 int umtx_shm_vnobj_persistent = 0; 238 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 239 &umtx_shm_vnobj_persistent, 0, 240 "False forces destruction of umtx attached to file, on last close"); 241 static int umtx_max_rb = 1000; 242 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 243 &umtx_max_rb, 0, 244 "Maximum number of robust mutexes allowed for each thread"); 245 246 static uma_zone_t umtx_pi_zone; 247 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 248 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 249 static int umtx_pi_allocated; 250 251 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 252 "umtx debug"); 253 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 254 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 255 static int umtx_verbose_rb = 1; 256 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 257 &umtx_verbose_rb, 0, 258 ""); 259 260 #ifdef UMTX_PROFILING 261 static long max_length; 262 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 263 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 264 "umtx chain stats"); 265 #endif 266 267 static void abs_timeout_update(struct abs_timeout *timo); 268 269 static void umtx_shm_init(void); 270 static void umtxq_sysinit(void *); 271 static void umtxq_hash(struct umtx_key *key); 272 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 273 static void umtxq_lock(struct umtx_key *key); 274 static void umtxq_unlock(struct umtx_key *key); 275 static void umtxq_busy(struct umtx_key *key); 276 static void umtxq_unbusy(struct umtx_key *key); 277 static void umtxq_insert_queue(struct umtx_q *uq, int q); 278 static void umtxq_remove_queue(struct umtx_q *uq, int q); 279 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 280 static int umtxq_count(struct umtx_key *key); 281 static struct umtx_pi *umtx_pi_alloc(int); 282 static void umtx_pi_free(struct umtx_pi *pi); 283 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 284 bool rb); 285 static void umtx_thread_cleanup(struct thread *td); 286 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 287 struct image_params *imgp __unused); 288 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 289 290 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 291 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 292 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 293 294 static struct mtx umtx_lock; 295 296 #ifdef UMTX_PROFILING 297 static void 298 umtx_init_profiling(void) 299 { 300 struct sysctl_oid *chain_oid; 301 char chain_name[10]; 302 int i; 303 304 for (i = 0; i < UMTX_CHAINS; ++i) { 305 snprintf(chain_name, sizeof(chain_name), "%d", i); 306 chain_oid = SYSCTL_ADD_NODE(NULL, 307 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 308 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 309 "umtx hash stats"); 310 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 311 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 312 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 313 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 314 } 315 } 316 317 static int 318 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 319 { 320 char buf[512]; 321 struct sbuf sb; 322 struct umtxq_chain *uc; 323 u_int fract, i, j, tot, whole; 324 u_int sf0, sf1, sf2, sf3, sf4; 325 u_int si0, si1, si2, si3, si4; 326 u_int sw0, sw1, sw2, sw3, sw4; 327 328 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 329 for (i = 0; i < 2; i++) { 330 tot = 0; 331 for (j = 0; j < UMTX_CHAINS; ++j) { 332 uc = &umtxq_chains[i][j]; 333 mtx_lock(&uc->uc_lock); 334 tot += uc->max_length; 335 mtx_unlock(&uc->uc_lock); 336 } 337 if (tot == 0) 338 sbuf_printf(&sb, "%u) Empty ", i); 339 else { 340 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 341 si0 = si1 = si2 = si3 = si4 = 0; 342 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 343 for (j = 0; j < UMTX_CHAINS; j++) { 344 uc = &umtxq_chains[i][j]; 345 mtx_lock(&uc->uc_lock); 346 whole = uc->max_length * 100; 347 mtx_unlock(&uc->uc_lock); 348 fract = (whole % tot) * 100; 349 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 350 sf0 = fract; 351 si0 = j; 352 sw0 = whole; 353 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 354 sf1)) { 355 sf1 = fract; 356 si1 = j; 357 sw1 = whole; 358 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 359 sf2)) { 360 sf2 = fract; 361 si2 = j; 362 sw2 = whole; 363 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 364 sf3)) { 365 sf3 = fract; 366 si3 = j; 367 sw3 = whole; 368 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 369 sf4)) { 370 sf4 = fract; 371 si4 = j; 372 sw4 = whole; 373 } 374 } 375 sbuf_printf(&sb, "queue %u:\n", i); 376 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 377 sf0 / tot, si0); 378 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 379 sf1 / tot, si1); 380 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 381 sf2 / tot, si2); 382 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 383 sf3 / tot, si3); 384 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 385 sf4 / tot, si4); 386 } 387 } 388 sbuf_trim(&sb); 389 sbuf_finish(&sb); 390 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 391 sbuf_delete(&sb); 392 return (0); 393 } 394 395 static int 396 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 397 { 398 struct umtxq_chain *uc; 399 u_int i, j; 400 int clear, error; 401 402 clear = 0; 403 error = sysctl_handle_int(oidp, &clear, 0, req); 404 if (error != 0 || req->newptr == NULL) 405 return (error); 406 407 if (clear != 0) { 408 for (i = 0; i < 2; ++i) { 409 for (j = 0; j < UMTX_CHAINS; ++j) { 410 uc = &umtxq_chains[i][j]; 411 mtx_lock(&uc->uc_lock); 412 uc->length = 0; 413 uc->max_length = 0; 414 mtx_unlock(&uc->uc_lock); 415 } 416 } 417 } 418 return (0); 419 } 420 421 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 422 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 423 sysctl_debug_umtx_chains_clear, "I", 424 "Clear umtx chains statistics"); 425 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 426 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 427 sysctl_debug_umtx_chains_peaks, "A", 428 "Highest peaks in chains max length"); 429 #endif 430 431 static void 432 umtxq_sysinit(void *arg __unused) 433 { 434 int i, j; 435 436 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 437 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 438 for (i = 0; i < 2; ++i) { 439 for (j = 0; j < UMTX_CHAINS; ++j) { 440 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 441 MTX_DEF | MTX_DUPOK); 442 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 443 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 444 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 445 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 446 umtxq_chains[i][j].uc_busy = 0; 447 umtxq_chains[i][j].uc_waiters = 0; 448 #ifdef UMTX_PROFILING 449 umtxq_chains[i][j].length = 0; 450 umtxq_chains[i][j].max_length = 0; 451 #endif 452 } 453 } 454 #ifdef UMTX_PROFILING 455 umtx_init_profiling(); 456 #endif 457 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 458 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 459 EVENTHANDLER_PRI_ANY); 460 umtx_shm_init(); 461 } 462 463 struct umtx_q * 464 umtxq_alloc(void) 465 { 466 struct umtx_q *uq; 467 468 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 469 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 470 M_WAITOK | M_ZERO); 471 TAILQ_INIT(&uq->uq_spare_queue->head); 472 TAILQ_INIT(&uq->uq_pi_contested); 473 uq->uq_inherited_pri = PRI_MAX; 474 return (uq); 475 } 476 477 void 478 umtxq_free(struct umtx_q *uq) 479 { 480 481 MPASS(uq->uq_spare_queue != NULL); 482 free(uq->uq_spare_queue, M_UMTX); 483 free(uq, M_UMTX); 484 } 485 486 static inline void 487 umtxq_hash(struct umtx_key *key) 488 { 489 unsigned n; 490 491 n = (uintptr_t)key->info.both.a + key->info.both.b; 492 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 493 } 494 495 static inline struct umtxq_chain * 496 umtxq_getchain(struct umtx_key *key) 497 { 498 499 if (key->type <= TYPE_SEM) 500 return (&umtxq_chains[1][key->hash]); 501 return (&umtxq_chains[0][key->hash]); 502 } 503 504 /* 505 * Lock a chain. 506 */ 507 static inline void 508 umtxq_lock(struct umtx_key *key) 509 { 510 struct umtxq_chain *uc; 511 512 uc = umtxq_getchain(key); 513 mtx_lock(&uc->uc_lock); 514 } 515 516 /* 517 * Unlock a chain. 518 */ 519 static inline void 520 umtxq_unlock(struct umtx_key *key) 521 { 522 struct umtxq_chain *uc; 523 524 uc = umtxq_getchain(key); 525 mtx_unlock(&uc->uc_lock); 526 } 527 528 /* 529 * Set chain to busy state when following operation 530 * may be blocked (kernel mutex can not be used). 531 */ 532 static inline void 533 umtxq_busy(struct umtx_key *key) 534 { 535 struct umtxq_chain *uc; 536 537 uc = umtxq_getchain(key); 538 mtx_assert(&uc->uc_lock, MA_OWNED); 539 if (uc->uc_busy) { 540 #ifdef SMP 541 if (smp_cpus > 1) { 542 int count = BUSY_SPINS; 543 if (count > 0) { 544 umtxq_unlock(key); 545 while (uc->uc_busy && --count > 0) 546 cpu_spinwait(); 547 umtxq_lock(key); 548 } 549 } 550 #endif 551 while (uc->uc_busy) { 552 uc->uc_waiters++; 553 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 554 uc->uc_waiters--; 555 } 556 } 557 uc->uc_busy = 1; 558 } 559 560 /* 561 * Unbusy a chain. 562 */ 563 static inline void 564 umtxq_unbusy(struct umtx_key *key) 565 { 566 struct umtxq_chain *uc; 567 568 uc = umtxq_getchain(key); 569 mtx_assert(&uc->uc_lock, MA_OWNED); 570 KASSERT(uc->uc_busy != 0, ("not busy")); 571 uc->uc_busy = 0; 572 if (uc->uc_waiters) 573 wakeup_one(uc); 574 } 575 576 static inline void 577 umtxq_unbusy_unlocked(struct umtx_key *key) 578 { 579 580 umtxq_lock(key); 581 umtxq_unbusy(key); 582 umtxq_unlock(key); 583 } 584 585 static struct umtxq_queue * 586 umtxq_queue_lookup(struct umtx_key *key, int q) 587 { 588 struct umtxq_queue *uh; 589 struct umtxq_chain *uc; 590 591 uc = umtxq_getchain(key); 592 UMTXQ_LOCKED_ASSERT(uc); 593 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 594 if (umtx_key_match(&uh->key, key)) 595 return (uh); 596 } 597 598 return (NULL); 599 } 600 601 static inline void 602 umtxq_insert_queue(struct umtx_q *uq, int q) 603 { 604 struct umtxq_queue *uh; 605 struct umtxq_chain *uc; 606 607 uc = umtxq_getchain(&uq->uq_key); 608 UMTXQ_LOCKED_ASSERT(uc); 609 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 610 uh = umtxq_queue_lookup(&uq->uq_key, q); 611 if (uh != NULL) { 612 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 613 } else { 614 uh = uq->uq_spare_queue; 615 uh->key = uq->uq_key; 616 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 617 #ifdef UMTX_PROFILING 618 uc->length++; 619 if (uc->length > uc->max_length) { 620 uc->max_length = uc->length; 621 if (uc->max_length > max_length) 622 max_length = uc->max_length; 623 } 624 #endif 625 } 626 uq->uq_spare_queue = NULL; 627 628 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 629 uh->length++; 630 uq->uq_flags |= UQF_UMTXQ; 631 uq->uq_cur_queue = uh; 632 return; 633 } 634 635 static inline void 636 umtxq_remove_queue(struct umtx_q *uq, int q) 637 { 638 struct umtxq_chain *uc; 639 struct umtxq_queue *uh; 640 641 uc = umtxq_getchain(&uq->uq_key); 642 UMTXQ_LOCKED_ASSERT(uc); 643 if (uq->uq_flags & UQF_UMTXQ) { 644 uh = uq->uq_cur_queue; 645 TAILQ_REMOVE(&uh->head, uq, uq_link); 646 uh->length--; 647 uq->uq_flags &= ~UQF_UMTXQ; 648 if (TAILQ_EMPTY(&uh->head)) { 649 KASSERT(uh->length == 0, 650 ("inconsistent umtxq_queue length")); 651 #ifdef UMTX_PROFILING 652 uc->length--; 653 #endif 654 LIST_REMOVE(uh, link); 655 } else { 656 uh = LIST_FIRST(&uc->uc_spare_queue); 657 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 658 LIST_REMOVE(uh, link); 659 } 660 uq->uq_spare_queue = uh; 661 uq->uq_cur_queue = NULL; 662 } 663 } 664 665 /* 666 * Check if there are multiple waiters 667 */ 668 static int 669 umtxq_count(struct umtx_key *key) 670 { 671 struct umtxq_queue *uh; 672 673 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 674 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 675 if (uh != NULL) 676 return (uh->length); 677 return (0); 678 } 679 680 /* 681 * Check if there are multiple PI waiters and returns first 682 * waiter. 683 */ 684 static int 685 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 686 { 687 struct umtxq_queue *uh; 688 689 *first = NULL; 690 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 691 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 692 if (uh != NULL) { 693 *first = TAILQ_FIRST(&uh->head); 694 return (uh->length); 695 } 696 return (0); 697 } 698 699 /* 700 * Wake up threads waiting on an userland object. 701 */ 702 703 static int 704 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 705 { 706 struct umtxq_queue *uh; 707 struct umtx_q *uq; 708 int ret; 709 710 ret = 0; 711 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 712 uh = umtxq_queue_lookup(key, q); 713 if (uh != NULL) { 714 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 715 umtxq_remove_queue(uq, q); 716 wakeup(uq); 717 if (++ret >= n_wake) 718 return (ret); 719 } 720 } 721 return (ret); 722 } 723 724 /* 725 * Wake up specified thread. 726 */ 727 static inline void 728 umtxq_signal_thread(struct umtx_q *uq) 729 { 730 731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 732 umtxq_remove(uq); 733 wakeup(uq); 734 } 735 736 static inline int 737 tstohz(const struct timespec *tsp) 738 { 739 struct timeval tv; 740 741 TIMESPEC_TO_TIMEVAL(&tv, tsp); 742 return tvtohz(&tv); 743 } 744 745 static void 746 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 747 const struct timespec *timeout) 748 { 749 750 timo->clockid = clockid; 751 if (!absolute) { 752 timo->is_abs_real = false; 753 abs_timeout_update(timo); 754 timespecadd(&timo->cur, timeout, &timo->end); 755 } else { 756 timo->end = *timeout; 757 timo->is_abs_real = clockid == CLOCK_REALTIME || 758 clockid == CLOCK_REALTIME_FAST || 759 clockid == CLOCK_REALTIME_PRECISE; 760 /* 761 * If is_abs_real, umtxq_sleep will read the clock 762 * after setting td_rtcgen; otherwise, read it here. 763 */ 764 if (!timo->is_abs_real) { 765 abs_timeout_update(timo); 766 } 767 } 768 } 769 770 static void 771 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 772 { 773 774 abs_timeout_init(timo, umtxtime->_clockid, 775 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 776 } 777 778 static inline void 779 abs_timeout_update(struct abs_timeout *timo) 780 { 781 782 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 783 } 784 785 static int 786 abs_timeout_gethz(struct abs_timeout *timo) 787 { 788 struct timespec tts; 789 790 if (timespeccmp(&timo->end, &timo->cur, <=)) 791 return (-1); 792 timespecsub(&timo->end, &timo->cur, &tts); 793 return (tstohz(&tts)); 794 } 795 796 static uint32_t 797 umtx_unlock_val(uint32_t flags, bool rb) 798 { 799 800 if (rb) 801 return (UMUTEX_RB_OWNERDEAD); 802 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 803 return (UMUTEX_RB_NOTRECOV); 804 else 805 return (UMUTEX_UNOWNED); 806 807 } 808 809 /* 810 * Put thread into sleep state, before sleeping, check if 811 * thread was removed from umtx queue. 812 */ 813 static inline int 814 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 815 { 816 struct umtxq_chain *uc; 817 int error, timo; 818 819 if (abstime != NULL && abstime->is_abs_real) { 820 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 821 abs_timeout_update(abstime); 822 } 823 824 uc = umtxq_getchain(&uq->uq_key); 825 UMTXQ_LOCKED_ASSERT(uc); 826 for (;;) { 827 if (!(uq->uq_flags & UQF_UMTXQ)) { 828 error = 0; 829 break; 830 } 831 if (abstime != NULL) { 832 timo = abs_timeout_gethz(abstime); 833 if (timo < 0) { 834 error = ETIMEDOUT; 835 break; 836 } 837 } else 838 timo = 0; 839 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 840 if (error == EINTR || error == ERESTART) { 841 umtxq_lock(&uq->uq_key); 842 break; 843 } 844 if (abstime != NULL) { 845 if (abstime->is_abs_real) 846 curthread->td_rtcgen = 847 atomic_load_acq_int(&rtc_generation); 848 abs_timeout_update(abstime); 849 } 850 umtxq_lock(&uq->uq_key); 851 } 852 853 curthread->td_rtcgen = 0; 854 return (error); 855 } 856 857 /* 858 * Convert userspace address into unique logical address. 859 */ 860 int 861 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 862 { 863 struct thread *td = curthread; 864 vm_map_t map; 865 vm_map_entry_t entry; 866 vm_pindex_t pindex; 867 vm_prot_t prot; 868 boolean_t wired; 869 870 key->type = type; 871 if (share == THREAD_SHARE) { 872 key->shared = 0; 873 key->info.private.vs = td->td_proc->p_vmspace; 874 key->info.private.addr = (uintptr_t)addr; 875 } else { 876 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 877 map = &td->td_proc->p_vmspace->vm_map; 878 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 879 &entry, &key->info.shared.object, &pindex, &prot, 880 &wired) != KERN_SUCCESS) { 881 return (EFAULT); 882 } 883 884 if ((share == PROCESS_SHARE) || 885 (share == AUTO_SHARE && 886 VM_INHERIT_SHARE == entry->inheritance)) { 887 key->shared = 1; 888 key->info.shared.offset = (vm_offset_t)addr - 889 entry->start + entry->offset; 890 vm_object_reference(key->info.shared.object); 891 } else { 892 key->shared = 0; 893 key->info.private.vs = td->td_proc->p_vmspace; 894 key->info.private.addr = (uintptr_t)addr; 895 } 896 vm_map_lookup_done(map, entry); 897 } 898 899 umtxq_hash(key); 900 return (0); 901 } 902 903 /* 904 * Release key. 905 */ 906 void 907 umtx_key_release(struct umtx_key *key) 908 { 909 if (key->shared) 910 vm_object_deallocate(key->info.shared.object); 911 } 912 913 /* 914 * Fetch and compare value, sleep on the address if value is not changed. 915 */ 916 static int 917 do_wait(struct thread *td, void *addr, u_long id, 918 struct _umtx_time *timeout, int compat32, int is_private) 919 { 920 struct abs_timeout timo; 921 struct umtx_q *uq; 922 u_long tmp; 923 uint32_t tmp32; 924 int error = 0; 925 926 uq = td->td_umtxq; 927 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 928 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 929 return (error); 930 931 if (timeout != NULL) 932 abs_timeout_init2(&timo, timeout); 933 934 umtxq_lock(&uq->uq_key); 935 umtxq_insert(uq); 936 umtxq_unlock(&uq->uq_key); 937 if (compat32 == 0) { 938 error = fueword(addr, &tmp); 939 if (error != 0) 940 error = EFAULT; 941 } else { 942 error = fueword32(addr, &tmp32); 943 if (error == 0) 944 tmp = tmp32; 945 else 946 error = EFAULT; 947 } 948 umtxq_lock(&uq->uq_key); 949 if (error == 0) { 950 if (tmp == id) 951 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 952 NULL : &timo); 953 if ((uq->uq_flags & UQF_UMTXQ) == 0) 954 error = 0; 955 else 956 umtxq_remove(uq); 957 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 958 umtxq_remove(uq); 959 } 960 umtxq_unlock(&uq->uq_key); 961 umtx_key_release(&uq->uq_key); 962 if (error == ERESTART) 963 error = EINTR; 964 return (error); 965 } 966 967 /* 968 * Wake up threads sleeping on the specified address. 969 */ 970 int 971 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 972 { 973 struct umtx_key key; 974 int ret; 975 976 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 977 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 978 return (ret); 979 umtxq_lock(&key); 980 umtxq_signal(&key, n_wake); 981 umtxq_unlock(&key); 982 umtx_key_release(&key); 983 return (0); 984 } 985 986 /* 987 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 988 */ 989 static int 990 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 991 struct _umtx_time *timeout, int mode) 992 { 993 struct abs_timeout timo; 994 struct umtx_q *uq; 995 uint32_t owner, old, id; 996 int error, rv; 997 998 id = td->td_tid; 999 uq = td->td_umtxq; 1000 error = 0; 1001 if (timeout != NULL) 1002 abs_timeout_init2(&timo, timeout); 1003 1004 /* 1005 * Care must be exercised when dealing with umtx structure. It 1006 * can fault on any access. 1007 */ 1008 for (;;) { 1009 rv = fueword32(&m->m_owner, &owner); 1010 if (rv == -1) 1011 return (EFAULT); 1012 if (mode == _UMUTEX_WAIT) { 1013 if (owner == UMUTEX_UNOWNED || 1014 owner == UMUTEX_CONTESTED || 1015 owner == UMUTEX_RB_OWNERDEAD || 1016 owner == UMUTEX_RB_NOTRECOV) 1017 return (0); 1018 } else { 1019 /* 1020 * Robust mutex terminated. Kernel duty is to 1021 * return EOWNERDEAD to the userspace. The 1022 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1023 * by the common userspace code. 1024 */ 1025 if (owner == UMUTEX_RB_OWNERDEAD) { 1026 rv = casueword32(&m->m_owner, 1027 UMUTEX_RB_OWNERDEAD, &owner, 1028 id | UMUTEX_CONTESTED); 1029 if (rv == -1) 1030 return (EFAULT); 1031 if (rv == 0) { 1032 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1033 return (EOWNERDEAD); /* success */ 1034 } 1035 MPASS(rv == 1); 1036 rv = thread_check_susp(td, false); 1037 if (rv != 0) 1038 return (rv); 1039 continue; 1040 } 1041 if (owner == UMUTEX_RB_NOTRECOV) 1042 return (ENOTRECOVERABLE); 1043 1044 /* 1045 * Try the uncontested case. This should be 1046 * done in userland. 1047 */ 1048 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1049 &owner, id); 1050 /* The address was invalid. */ 1051 if (rv == -1) 1052 return (EFAULT); 1053 1054 /* The acquire succeeded. */ 1055 if (rv == 0) { 1056 MPASS(owner == UMUTEX_UNOWNED); 1057 return (0); 1058 } 1059 1060 /* 1061 * If no one owns it but it is contested try 1062 * to acquire it. 1063 */ 1064 MPASS(rv == 1); 1065 if (owner == UMUTEX_CONTESTED) { 1066 rv = casueword32(&m->m_owner, 1067 UMUTEX_CONTESTED, &owner, 1068 id | UMUTEX_CONTESTED); 1069 /* The address was invalid. */ 1070 if (rv == -1) 1071 return (EFAULT); 1072 if (rv == 0) { 1073 MPASS(owner == UMUTEX_CONTESTED); 1074 return (0); 1075 } 1076 if (rv == 1) { 1077 rv = thread_check_susp(td, false); 1078 if (rv != 0) 1079 return (rv); 1080 } 1081 1082 /* 1083 * If this failed the lock has 1084 * changed, restart. 1085 */ 1086 continue; 1087 } 1088 1089 /* rv == 1 but not contested, likely store failure */ 1090 rv = thread_check_susp(td, false); 1091 if (rv != 0) 1092 return (rv); 1093 } 1094 1095 if (mode == _UMUTEX_TRY) 1096 return (EBUSY); 1097 1098 /* 1099 * If we caught a signal, we have retried and now 1100 * exit immediately. 1101 */ 1102 if (error != 0) 1103 return (error); 1104 1105 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1106 GET_SHARE(flags), &uq->uq_key)) != 0) 1107 return (error); 1108 1109 umtxq_lock(&uq->uq_key); 1110 umtxq_busy(&uq->uq_key); 1111 umtxq_insert(uq); 1112 umtxq_unlock(&uq->uq_key); 1113 1114 /* 1115 * Set the contested bit so that a release in user space 1116 * knows to use the system call for unlock. If this fails 1117 * either some one else has acquired the lock or it has been 1118 * released. 1119 */ 1120 rv = casueword32(&m->m_owner, owner, &old, 1121 owner | UMUTEX_CONTESTED); 1122 1123 /* The address was invalid or casueword failed to store. */ 1124 if (rv == -1 || rv == 1) { 1125 umtxq_lock(&uq->uq_key); 1126 umtxq_remove(uq); 1127 umtxq_unbusy(&uq->uq_key); 1128 umtxq_unlock(&uq->uq_key); 1129 umtx_key_release(&uq->uq_key); 1130 if (rv == -1) 1131 return (EFAULT); 1132 if (rv == 1) { 1133 rv = thread_check_susp(td, false); 1134 if (rv != 0) 1135 return (rv); 1136 } 1137 continue; 1138 } 1139 1140 /* 1141 * We set the contested bit, sleep. Otherwise the lock changed 1142 * and we need to retry or we lost a race to the thread 1143 * unlocking the umtx. 1144 */ 1145 umtxq_lock(&uq->uq_key); 1146 umtxq_unbusy(&uq->uq_key); 1147 MPASS(old == owner); 1148 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1149 NULL : &timo); 1150 umtxq_remove(uq); 1151 umtxq_unlock(&uq->uq_key); 1152 umtx_key_release(&uq->uq_key); 1153 1154 if (error == 0) 1155 error = thread_check_susp(td, false); 1156 } 1157 1158 return (0); 1159 } 1160 1161 /* 1162 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1163 */ 1164 static int 1165 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1166 { 1167 struct umtx_key key; 1168 uint32_t owner, old, id, newlock; 1169 int error, count; 1170 1171 id = td->td_tid; 1172 1173 again: 1174 /* 1175 * Make sure we own this mtx. 1176 */ 1177 error = fueword32(&m->m_owner, &owner); 1178 if (error == -1) 1179 return (EFAULT); 1180 1181 if ((owner & ~UMUTEX_CONTESTED) != id) 1182 return (EPERM); 1183 1184 newlock = umtx_unlock_val(flags, rb); 1185 if ((owner & UMUTEX_CONTESTED) == 0) { 1186 error = casueword32(&m->m_owner, owner, &old, newlock); 1187 if (error == -1) 1188 return (EFAULT); 1189 if (error == 1) { 1190 error = thread_check_susp(td, false); 1191 if (error != 0) 1192 return (error); 1193 goto again; 1194 } 1195 MPASS(old == owner); 1196 return (0); 1197 } 1198 1199 /* We should only ever be in here for contested locks */ 1200 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1201 &key)) != 0) 1202 return (error); 1203 1204 umtxq_lock(&key); 1205 umtxq_busy(&key); 1206 count = umtxq_count(&key); 1207 umtxq_unlock(&key); 1208 1209 /* 1210 * When unlocking the umtx, it must be marked as unowned if 1211 * there is zero or one thread only waiting for it. 1212 * Otherwise, it must be marked as contested. 1213 */ 1214 if (count > 1) 1215 newlock |= UMUTEX_CONTESTED; 1216 error = casueword32(&m->m_owner, owner, &old, newlock); 1217 umtxq_lock(&key); 1218 umtxq_signal(&key, 1); 1219 umtxq_unbusy(&key); 1220 umtxq_unlock(&key); 1221 umtx_key_release(&key); 1222 if (error == -1) 1223 return (EFAULT); 1224 if (error == 1) { 1225 if (old != owner) 1226 return (EINVAL); 1227 error = thread_check_susp(td, false); 1228 if (error != 0) 1229 return (error); 1230 goto again; 1231 } 1232 return (0); 1233 } 1234 1235 /* 1236 * Check if the mutex is available and wake up a waiter, 1237 * only for simple mutex. 1238 */ 1239 static int 1240 do_wake_umutex(struct thread *td, struct umutex *m) 1241 { 1242 struct umtx_key key; 1243 uint32_t owner; 1244 uint32_t flags; 1245 int error; 1246 int count; 1247 1248 again: 1249 error = fueword32(&m->m_owner, &owner); 1250 if (error == -1) 1251 return (EFAULT); 1252 1253 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1254 owner != UMUTEX_RB_NOTRECOV) 1255 return (0); 1256 1257 error = fueword32(&m->m_flags, &flags); 1258 if (error == -1) 1259 return (EFAULT); 1260 1261 /* We should only ever be in here for contested locks */ 1262 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1263 &key)) != 0) 1264 return (error); 1265 1266 umtxq_lock(&key); 1267 umtxq_busy(&key); 1268 count = umtxq_count(&key); 1269 umtxq_unlock(&key); 1270 1271 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1272 owner != UMUTEX_RB_NOTRECOV) { 1273 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1274 UMUTEX_UNOWNED); 1275 if (error == -1) { 1276 error = EFAULT; 1277 } else if (error == 1) { 1278 umtxq_lock(&key); 1279 umtxq_unbusy(&key); 1280 umtxq_unlock(&key); 1281 umtx_key_release(&key); 1282 error = thread_check_susp(td, false); 1283 if (error != 0) 1284 return (error); 1285 goto again; 1286 } 1287 } 1288 1289 umtxq_lock(&key); 1290 if (error == 0 && count != 0) { 1291 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1292 owner == UMUTEX_RB_OWNERDEAD || 1293 owner == UMUTEX_RB_NOTRECOV); 1294 umtxq_signal(&key, 1); 1295 } 1296 umtxq_unbusy(&key); 1297 umtxq_unlock(&key); 1298 umtx_key_release(&key); 1299 return (error); 1300 } 1301 1302 /* 1303 * Check if the mutex has waiters and tries to fix contention bit. 1304 */ 1305 static int 1306 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1307 { 1308 struct umtx_key key; 1309 uint32_t owner, old; 1310 int type; 1311 int error; 1312 int count; 1313 1314 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1315 UMUTEX_ROBUST)) { 1316 case 0: 1317 case UMUTEX_ROBUST: 1318 type = TYPE_NORMAL_UMUTEX; 1319 break; 1320 case UMUTEX_PRIO_INHERIT: 1321 type = TYPE_PI_UMUTEX; 1322 break; 1323 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1324 type = TYPE_PI_ROBUST_UMUTEX; 1325 break; 1326 case UMUTEX_PRIO_PROTECT: 1327 type = TYPE_PP_UMUTEX; 1328 break; 1329 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1330 type = TYPE_PP_ROBUST_UMUTEX; 1331 break; 1332 default: 1333 return (EINVAL); 1334 } 1335 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1336 return (error); 1337 1338 owner = 0; 1339 umtxq_lock(&key); 1340 umtxq_busy(&key); 1341 count = umtxq_count(&key); 1342 umtxq_unlock(&key); 1343 1344 error = fueword32(&m->m_owner, &owner); 1345 if (error == -1) 1346 error = EFAULT; 1347 1348 /* 1349 * Only repair contention bit if there is a waiter, this means 1350 * the mutex is still being referenced by userland code, 1351 * otherwise don't update any memory. 1352 */ 1353 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1354 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1355 error = casueword32(&m->m_owner, owner, &old, 1356 owner | UMUTEX_CONTESTED); 1357 if (error == -1) { 1358 error = EFAULT; 1359 break; 1360 } 1361 if (error == 0) { 1362 MPASS(old == owner); 1363 break; 1364 } 1365 owner = old; 1366 error = thread_check_susp(td, false); 1367 } 1368 1369 umtxq_lock(&key); 1370 if (error == EFAULT) { 1371 umtxq_signal(&key, INT_MAX); 1372 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1373 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1374 umtxq_signal(&key, 1); 1375 umtxq_unbusy(&key); 1376 umtxq_unlock(&key); 1377 umtx_key_release(&key); 1378 return (error); 1379 } 1380 1381 static inline struct umtx_pi * 1382 umtx_pi_alloc(int flags) 1383 { 1384 struct umtx_pi *pi; 1385 1386 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1387 TAILQ_INIT(&pi->pi_blocked); 1388 atomic_add_int(&umtx_pi_allocated, 1); 1389 return (pi); 1390 } 1391 1392 static inline void 1393 umtx_pi_free(struct umtx_pi *pi) 1394 { 1395 uma_zfree(umtx_pi_zone, pi); 1396 atomic_add_int(&umtx_pi_allocated, -1); 1397 } 1398 1399 /* 1400 * Adjust the thread's position on a pi_state after its priority has been 1401 * changed. 1402 */ 1403 static int 1404 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1405 { 1406 struct umtx_q *uq, *uq1, *uq2; 1407 struct thread *td1; 1408 1409 mtx_assert(&umtx_lock, MA_OWNED); 1410 if (pi == NULL) 1411 return (0); 1412 1413 uq = td->td_umtxq; 1414 1415 /* 1416 * Check if the thread needs to be moved on the blocked chain. 1417 * It needs to be moved if either its priority is lower than 1418 * the previous thread or higher than the next thread. 1419 */ 1420 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1421 uq2 = TAILQ_NEXT(uq, uq_lockq); 1422 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1423 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1424 /* 1425 * Remove thread from blocked chain and determine where 1426 * it should be moved to. 1427 */ 1428 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1429 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1430 td1 = uq1->uq_thread; 1431 MPASS(td1->td_proc->p_magic == P_MAGIC); 1432 if (UPRI(td1) > UPRI(td)) 1433 break; 1434 } 1435 1436 if (uq1 == NULL) 1437 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1438 else 1439 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1440 } 1441 return (1); 1442 } 1443 1444 static struct umtx_pi * 1445 umtx_pi_next(struct umtx_pi *pi) 1446 { 1447 struct umtx_q *uq_owner; 1448 1449 if (pi->pi_owner == NULL) 1450 return (NULL); 1451 uq_owner = pi->pi_owner->td_umtxq; 1452 if (uq_owner == NULL) 1453 return (NULL); 1454 return (uq_owner->uq_pi_blocked); 1455 } 1456 1457 /* 1458 * Floyd's Cycle-Finding Algorithm. 1459 */ 1460 static bool 1461 umtx_pi_check_loop(struct umtx_pi *pi) 1462 { 1463 struct umtx_pi *pi1; /* fast iterator */ 1464 1465 mtx_assert(&umtx_lock, MA_OWNED); 1466 if (pi == NULL) 1467 return (false); 1468 pi1 = pi; 1469 for (;;) { 1470 pi = umtx_pi_next(pi); 1471 if (pi == NULL) 1472 break; 1473 pi1 = umtx_pi_next(pi1); 1474 if (pi1 == NULL) 1475 break; 1476 pi1 = umtx_pi_next(pi1); 1477 if (pi1 == NULL) 1478 break; 1479 if (pi == pi1) 1480 return (true); 1481 } 1482 return (false); 1483 } 1484 1485 /* 1486 * Propagate priority when a thread is blocked on POSIX 1487 * PI mutex. 1488 */ 1489 static void 1490 umtx_propagate_priority(struct thread *td) 1491 { 1492 struct umtx_q *uq; 1493 struct umtx_pi *pi; 1494 int pri; 1495 1496 mtx_assert(&umtx_lock, MA_OWNED); 1497 pri = UPRI(td); 1498 uq = td->td_umtxq; 1499 pi = uq->uq_pi_blocked; 1500 if (pi == NULL) 1501 return; 1502 if (umtx_pi_check_loop(pi)) 1503 return; 1504 1505 for (;;) { 1506 td = pi->pi_owner; 1507 if (td == NULL || td == curthread) 1508 return; 1509 1510 MPASS(td->td_proc != NULL); 1511 MPASS(td->td_proc->p_magic == P_MAGIC); 1512 1513 thread_lock(td); 1514 if (td->td_lend_user_pri > pri) 1515 sched_lend_user_prio(td, pri); 1516 else { 1517 thread_unlock(td); 1518 break; 1519 } 1520 thread_unlock(td); 1521 1522 /* 1523 * Pick up the lock that td is blocked on. 1524 */ 1525 uq = td->td_umtxq; 1526 pi = uq->uq_pi_blocked; 1527 if (pi == NULL) 1528 break; 1529 /* Resort td on the list if needed. */ 1530 umtx_pi_adjust_thread(pi, td); 1531 } 1532 } 1533 1534 /* 1535 * Unpropagate priority for a PI mutex when a thread blocked on 1536 * it is interrupted by signal or resumed by others. 1537 */ 1538 static void 1539 umtx_repropagate_priority(struct umtx_pi *pi) 1540 { 1541 struct umtx_q *uq, *uq_owner; 1542 struct umtx_pi *pi2; 1543 int pri; 1544 1545 mtx_assert(&umtx_lock, MA_OWNED); 1546 1547 if (umtx_pi_check_loop(pi)) 1548 return; 1549 while (pi != NULL && pi->pi_owner != NULL) { 1550 pri = PRI_MAX; 1551 uq_owner = pi->pi_owner->td_umtxq; 1552 1553 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1554 uq = TAILQ_FIRST(&pi2->pi_blocked); 1555 if (uq != NULL) { 1556 if (pri > UPRI(uq->uq_thread)) 1557 pri = UPRI(uq->uq_thread); 1558 } 1559 } 1560 1561 if (pri > uq_owner->uq_inherited_pri) 1562 pri = uq_owner->uq_inherited_pri; 1563 thread_lock(pi->pi_owner); 1564 sched_lend_user_prio(pi->pi_owner, pri); 1565 thread_unlock(pi->pi_owner); 1566 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1567 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1568 } 1569 } 1570 1571 /* 1572 * Insert a PI mutex into owned list. 1573 */ 1574 static void 1575 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1576 { 1577 struct umtx_q *uq_owner; 1578 1579 uq_owner = owner->td_umtxq; 1580 mtx_assert(&umtx_lock, MA_OWNED); 1581 MPASS(pi->pi_owner == NULL); 1582 pi->pi_owner = owner; 1583 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1584 } 1585 1586 /* 1587 * Disown a PI mutex, and remove it from the owned list. 1588 */ 1589 static void 1590 umtx_pi_disown(struct umtx_pi *pi) 1591 { 1592 1593 mtx_assert(&umtx_lock, MA_OWNED); 1594 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1595 pi->pi_owner = NULL; 1596 } 1597 1598 /* 1599 * Claim ownership of a PI mutex. 1600 */ 1601 static int 1602 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1603 { 1604 struct umtx_q *uq; 1605 int pri; 1606 1607 mtx_lock(&umtx_lock); 1608 if (pi->pi_owner == owner) { 1609 mtx_unlock(&umtx_lock); 1610 return (0); 1611 } 1612 1613 if (pi->pi_owner != NULL) { 1614 /* 1615 * userland may have already messed the mutex, sigh. 1616 */ 1617 mtx_unlock(&umtx_lock); 1618 return (EPERM); 1619 } 1620 umtx_pi_setowner(pi, owner); 1621 uq = TAILQ_FIRST(&pi->pi_blocked); 1622 if (uq != NULL) { 1623 pri = UPRI(uq->uq_thread); 1624 thread_lock(owner); 1625 if (pri < UPRI(owner)) 1626 sched_lend_user_prio(owner, pri); 1627 thread_unlock(owner); 1628 } 1629 mtx_unlock(&umtx_lock); 1630 return (0); 1631 } 1632 1633 /* 1634 * Adjust a thread's order position in its blocked PI mutex, 1635 * this may result new priority propagating process. 1636 */ 1637 void 1638 umtx_pi_adjust(struct thread *td, u_char oldpri) 1639 { 1640 struct umtx_q *uq; 1641 struct umtx_pi *pi; 1642 1643 uq = td->td_umtxq; 1644 mtx_lock(&umtx_lock); 1645 /* 1646 * Pick up the lock that td is blocked on. 1647 */ 1648 pi = uq->uq_pi_blocked; 1649 if (pi != NULL) { 1650 umtx_pi_adjust_thread(pi, td); 1651 umtx_repropagate_priority(pi); 1652 } 1653 mtx_unlock(&umtx_lock); 1654 } 1655 1656 /* 1657 * Sleep on a PI mutex. 1658 */ 1659 static int 1660 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1661 const char *wmesg, struct abs_timeout *timo, bool shared) 1662 { 1663 struct thread *td, *td1; 1664 struct umtx_q *uq1; 1665 int error, pri; 1666 #ifdef INVARIANTS 1667 struct umtxq_chain *uc; 1668 1669 uc = umtxq_getchain(&pi->pi_key); 1670 #endif 1671 error = 0; 1672 td = uq->uq_thread; 1673 KASSERT(td == curthread, ("inconsistent uq_thread")); 1674 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1675 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1676 umtxq_insert(uq); 1677 mtx_lock(&umtx_lock); 1678 if (pi->pi_owner == NULL) { 1679 mtx_unlock(&umtx_lock); 1680 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1681 mtx_lock(&umtx_lock); 1682 if (td1 != NULL) { 1683 if (pi->pi_owner == NULL) 1684 umtx_pi_setowner(pi, td1); 1685 PROC_UNLOCK(td1->td_proc); 1686 } 1687 } 1688 1689 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1690 pri = UPRI(uq1->uq_thread); 1691 if (pri > UPRI(td)) 1692 break; 1693 } 1694 1695 if (uq1 != NULL) 1696 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1697 else 1698 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1699 1700 uq->uq_pi_blocked = pi; 1701 thread_lock(td); 1702 td->td_flags |= TDF_UPIBLOCKED; 1703 thread_unlock(td); 1704 umtx_propagate_priority(td); 1705 mtx_unlock(&umtx_lock); 1706 umtxq_unbusy(&uq->uq_key); 1707 1708 error = umtxq_sleep(uq, wmesg, timo); 1709 umtxq_remove(uq); 1710 1711 mtx_lock(&umtx_lock); 1712 uq->uq_pi_blocked = NULL; 1713 thread_lock(td); 1714 td->td_flags &= ~TDF_UPIBLOCKED; 1715 thread_unlock(td); 1716 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1717 umtx_repropagate_priority(pi); 1718 mtx_unlock(&umtx_lock); 1719 umtxq_unlock(&uq->uq_key); 1720 1721 return (error); 1722 } 1723 1724 /* 1725 * Add reference count for a PI mutex. 1726 */ 1727 static void 1728 umtx_pi_ref(struct umtx_pi *pi) 1729 { 1730 1731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1732 pi->pi_refcount++; 1733 } 1734 1735 /* 1736 * Decrease reference count for a PI mutex, if the counter 1737 * is decreased to zero, its memory space is freed. 1738 */ 1739 static void 1740 umtx_pi_unref(struct umtx_pi *pi) 1741 { 1742 struct umtxq_chain *uc; 1743 1744 uc = umtxq_getchain(&pi->pi_key); 1745 UMTXQ_LOCKED_ASSERT(uc); 1746 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1747 if (--pi->pi_refcount == 0) { 1748 mtx_lock(&umtx_lock); 1749 if (pi->pi_owner != NULL) 1750 umtx_pi_disown(pi); 1751 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1752 ("blocked queue not empty")); 1753 mtx_unlock(&umtx_lock); 1754 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1755 umtx_pi_free(pi); 1756 } 1757 } 1758 1759 /* 1760 * Find a PI mutex in hash table. 1761 */ 1762 static struct umtx_pi * 1763 umtx_pi_lookup(struct umtx_key *key) 1764 { 1765 struct umtxq_chain *uc; 1766 struct umtx_pi *pi; 1767 1768 uc = umtxq_getchain(key); 1769 UMTXQ_LOCKED_ASSERT(uc); 1770 1771 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1772 if (umtx_key_match(&pi->pi_key, key)) { 1773 return (pi); 1774 } 1775 } 1776 return (NULL); 1777 } 1778 1779 /* 1780 * Insert a PI mutex into hash table. 1781 */ 1782 static inline void 1783 umtx_pi_insert(struct umtx_pi *pi) 1784 { 1785 struct umtxq_chain *uc; 1786 1787 uc = umtxq_getchain(&pi->pi_key); 1788 UMTXQ_LOCKED_ASSERT(uc); 1789 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1790 } 1791 1792 /* 1793 * Lock a PI mutex. 1794 */ 1795 static int 1796 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1797 struct _umtx_time *timeout, int try) 1798 { 1799 struct abs_timeout timo; 1800 struct umtx_q *uq; 1801 struct umtx_pi *pi, *new_pi; 1802 uint32_t id, old_owner, owner, old; 1803 int error, rv; 1804 1805 id = td->td_tid; 1806 uq = td->td_umtxq; 1807 1808 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1809 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1810 &uq->uq_key)) != 0) 1811 return (error); 1812 1813 if (timeout != NULL) 1814 abs_timeout_init2(&timo, timeout); 1815 1816 umtxq_lock(&uq->uq_key); 1817 pi = umtx_pi_lookup(&uq->uq_key); 1818 if (pi == NULL) { 1819 new_pi = umtx_pi_alloc(M_NOWAIT); 1820 if (new_pi == NULL) { 1821 umtxq_unlock(&uq->uq_key); 1822 new_pi = umtx_pi_alloc(M_WAITOK); 1823 umtxq_lock(&uq->uq_key); 1824 pi = umtx_pi_lookup(&uq->uq_key); 1825 if (pi != NULL) { 1826 umtx_pi_free(new_pi); 1827 new_pi = NULL; 1828 } 1829 } 1830 if (new_pi != NULL) { 1831 new_pi->pi_key = uq->uq_key; 1832 umtx_pi_insert(new_pi); 1833 pi = new_pi; 1834 } 1835 } 1836 umtx_pi_ref(pi); 1837 umtxq_unlock(&uq->uq_key); 1838 1839 /* 1840 * Care must be exercised when dealing with umtx structure. It 1841 * can fault on any access. 1842 */ 1843 for (;;) { 1844 /* 1845 * Try the uncontested case. This should be done in userland. 1846 */ 1847 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1848 /* The address was invalid. */ 1849 if (rv == -1) { 1850 error = EFAULT; 1851 break; 1852 } 1853 /* The acquire succeeded. */ 1854 if (rv == 0) { 1855 MPASS(owner == UMUTEX_UNOWNED); 1856 error = 0; 1857 break; 1858 } 1859 1860 if (owner == UMUTEX_RB_NOTRECOV) { 1861 error = ENOTRECOVERABLE; 1862 break; 1863 } 1864 1865 /* 1866 * Avoid overwriting a possible error from sleep due 1867 * to the pending signal with suspension check result. 1868 */ 1869 if (error == 0) { 1870 error = thread_check_susp(td, true); 1871 if (error != 0) 1872 break; 1873 } 1874 1875 /* If no one owns it but it is contested try to acquire it. */ 1876 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1877 old_owner = owner; 1878 rv = casueword32(&m->m_owner, owner, &owner, 1879 id | UMUTEX_CONTESTED); 1880 /* The address was invalid. */ 1881 if (rv == -1) { 1882 error = EFAULT; 1883 break; 1884 } 1885 if (rv == 1) { 1886 if (error == 0) { 1887 error = thread_check_susp(td, true); 1888 if (error != 0) 1889 break; 1890 } 1891 1892 /* 1893 * If this failed the lock could 1894 * changed, restart. 1895 */ 1896 continue; 1897 } 1898 1899 MPASS(rv == 0); 1900 MPASS(owner == old_owner); 1901 umtxq_lock(&uq->uq_key); 1902 umtxq_busy(&uq->uq_key); 1903 error = umtx_pi_claim(pi, td); 1904 umtxq_unbusy(&uq->uq_key); 1905 umtxq_unlock(&uq->uq_key); 1906 if (error != 0) { 1907 /* 1908 * Since we're going to return an 1909 * error, restore the m_owner to its 1910 * previous, unowned state to avoid 1911 * compounding the problem. 1912 */ 1913 (void)casuword32(&m->m_owner, 1914 id | UMUTEX_CONTESTED, old_owner); 1915 } 1916 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1917 error = EOWNERDEAD; 1918 break; 1919 } 1920 1921 if ((owner & ~UMUTEX_CONTESTED) == id) { 1922 error = EDEADLK; 1923 break; 1924 } 1925 1926 if (try != 0) { 1927 error = EBUSY; 1928 break; 1929 } 1930 1931 /* 1932 * If we caught a signal, we have retried and now 1933 * exit immediately. 1934 */ 1935 if (error != 0) 1936 break; 1937 1938 umtxq_lock(&uq->uq_key); 1939 umtxq_busy(&uq->uq_key); 1940 umtxq_unlock(&uq->uq_key); 1941 1942 /* 1943 * Set the contested bit so that a release in user space 1944 * knows to use the system call for unlock. If this fails 1945 * either some one else has acquired the lock or it has been 1946 * released. 1947 */ 1948 rv = casueword32(&m->m_owner, owner, &old, owner | 1949 UMUTEX_CONTESTED); 1950 1951 /* The address was invalid. */ 1952 if (rv == -1) { 1953 umtxq_unbusy_unlocked(&uq->uq_key); 1954 error = EFAULT; 1955 break; 1956 } 1957 if (rv == 1) { 1958 umtxq_unbusy_unlocked(&uq->uq_key); 1959 error = thread_check_susp(td, true); 1960 if (error != 0) 1961 break; 1962 1963 /* 1964 * The lock changed and we need to retry or we 1965 * lost a race to the thread unlocking the 1966 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1967 * value for owner is impossible there. 1968 */ 1969 continue; 1970 } 1971 1972 umtxq_lock(&uq->uq_key); 1973 1974 /* We set the contested bit, sleep. */ 1975 MPASS(old == owner); 1976 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1977 "umtxpi", timeout == NULL ? NULL : &timo, 1978 (flags & USYNC_PROCESS_SHARED) != 0); 1979 if (error != 0) 1980 continue; 1981 1982 error = thread_check_susp(td, false); 1983 if (error != 0) 1984 break; 1985 } 1986 1987 umtxq_lock(&uq->uq_key); 1988 umtx_pi_unref(pi); 1989 umtxq_unlock(&uq->uq_key); 1990 1991 umtx_key_release(&uq->uq_key); 1992 return (error); 1993 } 1994 1995 /* 1996 * Unlock a PI mutex. 1997 */ 1998 static int 1999 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2000 { 2001 struct umtx_key key; 2002 struct umtx_q *uq_first, *uq_first2, *uq_me; 2003 struct umtx_pi *pi, *pi2; 2004 uint32_t id, new_owner, old, owner; 2005 int count, error, pri; 2006 2007 id = td->td_tid; 2008 2009 usrloop: 2010 /* 2011 * Make sure we own this mtx. 2012 */ 2013 error = fueword32(&m->m_owner, &owner); 2014 if (error == -1) 2015 return (EFAULT); 2016 2017 if ((owner & ~UMUTEX_CONTESTED) != id) 2018 return (EPERM); 2019 2020 new_owner = umtx_unlock_val(flags, rb); 2021 2022 /* This should be done in userland */ 2023 if ((owner & UMUTEX_CONTESTED) == 0) { 2024 error = casueword32(&m->m_owner, owner, &old, new_owner); 2025 if (error == -1) 2026 return (EFAULT); 2027 if (error == 1) { 2028 error = thread_check_susp(td, true); 2029 if (error != 0) 2030 return (error); 2031 goto usrloop; 2032 } 2033 if (old == owner) 2034 return (0); 2035 owner = old; 2036 } 2037 2038 /* We should only ever be in here for contested locks */ 2039 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2040 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2041 &key)) != 0) 2042 return (error); 2043 2044 umtxq_lock(&key); 2045 umtxq_busy(&key); 2046 count = umtxq_count_pi(&key, &uq_first); 2047 if (uq_first != NULL) { 2048 mtx_lock(&umtx_lock); 2049 pi = uq_first->uq_pi_blocked; 2050 KASSERT(pi != NULL, ("pi == NULL?")); 2051 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2052 mtx_unlock(&umtx_lock); 2053 umtxq_unbusy(&key); 2054 umtxq_unlock(&key); 2055 umtx_key_release(&key); 2056 /* userland messed the mutex */ 2057 return (EPERM); 2058 } 2059 uq_me = td->td_umtxq; 2060 if (pi->pi_owner == td) 2061 umtx_pi_disown(pi); 2062 /* get highest priority thread which is still sleeping. */ 2063 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2064 while (uq_first != NULL && 2065 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2066 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2067 } 2068 pri = PRI_MAX; 2069 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2070 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2071 if (uq_first2 != NULL) { 2072 if (pri > UPRI(uq_first2->uq_thread)) 2073 pri = UPRI(uq_first2->uq_thread); 2074 } 2075 } 2076 thread_lock(td); 2077 sched_lend_user_prio(td, pri); 2078 thread_unlock(td); 2079 mtx_unlock(&umtx_lock); 2080 if (uq_first) 2081 umtxq_signal_thread(uq_first); 2082 } else { 2083 pi = umtx_pi_lookup(&key); 2084 /* 2085 * A umtx_pi can exist if a signal or timeout removed the 2086 * last waiter from the umtxq, but there is still 2087 * a thread in do_lock_pi() holding the umtx_pi. 2088 */ 2089 if (pi != NULL) { 2090 /* 2091 * The umtx_pi can be unowned, such as when a thread 2092 * has just entered do_lock_pi(), allocated the 2093 * umtx_pi, and unlocked the umtxq. 2094 * If the current thread owns it, it must disown it. 2095 */ 2096 mtx_lock(&umtx_lock); 2097 if (pi->pi_owner == td) 2098 umtx_pi_disown(pi); 2099 mtx_unlock(&umtx_lock); 2100 } 2101 } 2102 umtxq_unlock(&key); 2103 2104 /* 2105 * When unlocking the umtx, it must be marked as unowned if 2106 * there is zero or one thread only waiting for it. 2107 * Otherwise, it must be marked as contested. 2108 */ 2109 2110 if (count > 1) 2111 new_owner |= UMUTEX_CONTESTED; 2112 again: 2113 error = casueword32(&m->m_owner, owner, &old, new_owner); 2114 if (error == 1) { 2115 error = thread_check_susp(td, false); 2116 if (error == 0) 2117 goto again; 2118 } 2119 umtxq_unbusy_unlocked(&key); 2120 umtx_key_release(&key); 2121 if (error == -1) 2122 return (EFAULT); 2123 if (error == 0 && old != owner) 2124 return (EINVAL); 2125 return (error); 2126 } 2127 2128 /* 2129 * Lock a PP mutex. 2130 */ 2131 static int 2132 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2133 struct _umtx_time *timeout, int try) 2134 { 2135 struct abs_timeout timo; 2136 struct umtx_q *uq, *uq2; 2137 struct umtx_pi *pi; 2138 uint32_t ceiling; 2139 uint32_t owner, id; 2140 int error, pri, old_inherited_pri, su, rv; 2141 2142 id = td->td_tid; 2143 uq = td->td_umtxq; 2144 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2145 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2146 &uq->uq_key)) != 0) 2147 return (error); 2148 2149 if (timeout != NULL) 2150 abs_timeout_init2(&timo, timeout); 2151 2152 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2153 for (;;) { 2154 old_inherited_pri = uq->uq_inherited_pri; 2155 umtxq_lock(&uq->uq_key); 2156 umtxq_busy(&uq->uq_key); 2157 umtxq_unlock(&uq->uq_key); 2158 2159 rv = fueword32(&m->m_ceilings[0], &ceiling); 2160 if (rv == -1) { 2161 error = EFAULT; 2162 goto out; 2163 } 2164 ceiling = RTP_PRIO_MAX - ceiling; 2165 if (ceiling > RTP_PRIO_MAX) { 2166 error = EINVAL; 2167 goto out; 2168 } 2169 2170 mtx_lock(&umtx_lock); 2171 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2172 mtx_unlock(&umtx_lock); 2173 error = EINVAL; 2174 goto out; 2175 } 2176 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2177 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2178 thread_lock(td); 2179 if (uq->uq_inherited_pri < UPRI(td)) 2180 sched_lend_user_prio(td, uq->uq_inherited_pri); 2181 thread_unlock(td); 2182 } 2183 mtx_unlock(&umtx_lock); 2184 2185 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2186 id | UMUTEX_CONTESTED); 2187 /* The address was invalid. */ 2188 if (rv == -1) { 2189 error = EFAULT; 2190 break; 2191 } 2192 if (rv == 0) { 2193 MPASS(owner == UMUTEX_CONTESTED); 2194 error = 0; 2195 break; 2196 } 2197 /* rv == 1 */ 2198 if (owner == UMUTEX_RB_OWNERDEAD) { 2199 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2200 &owner, id | UMUTEX_CONTESTED); 2201 if (rv == -1) { 2202 error = EFAULT; 2203 break; 2204 } 2205 if (rv == 0) { 2206 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2207 error = EOWNERDEAD; /* success */ 2208 break; 2209 } 2210 2211 /* 2212 * rv == 1, only check for suspension if we 2213 * did not already catched a signal. If we 2214 * get an error from the check, the same 2215 * condition is checked by the umtxq_sleep() 2216 * call below, so we should obliterate the 2217 * error to not skip the last loop iteration. 2218 */ 2219 if (error == 0) { 2220 error = thread_check_susp(td, false); 2221 if (error == 0) { 2222 if (try != 0) 2223 error = EBUSY; 2224 else 2225 continue; 2226 } 2227 error = 0; 2228 } 2229 } else if (owner == UMUTEX_RB_NOTRECOV) { 2230 error = ENOTRECOVERABLE; 2231 } 2232 2233 if (try != 0) 2234 error = EBUSY; 2235 2236 /* 2237 * If we caught a signal, we have retried and now 2238 * exit immediately. 2239 */ 2240 if (error != 0) 2241 break; 2242 2243 umtxq_lock(&uq->uq_key); 2244 umtxq_insert(uq); 2245 umtxq_unbusy(&uq->uq_key); 2246 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2247 NULL : &timo); 2248 umtxq_remove(uq); 2249 umtxq_unlock(&uq->uq_key); 2250 2251 mtx_lock(&umtx_lock); 2252 uq->uq_inherited_pri = old_inherited_pri; 2253 pri = PRI_MAX; 2254 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2255 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2256 if (uq2 != NULL) { 2257 if (pri > UPRI(uq2->uq_thread)) 2258 pri = UPRI(uq2->uq_thread); 2259 } 2260 } 2261 if (pri > uq->uq_inherited_pri) 2262 pri = uq->uq_inherited_pri; 2263 thread_lock(td); 2264 sched_lend_user_prio(td, pri); 2265 thread_unlock(td); 2266 mtx_unlock(&umtx_lock); 2267 } 2268 2269 if (error != 0 && error != EOWNERDEAD) { 2270 mtx_lock(&umtx_lock); 2271 uq->uq_inherited_pri = old_inherited_pri; 2272 pri = PRI_MAX; 2273 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2274 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2275 if (uq2 != NULL) { 2276 if (pri > UPRI(uq2->uq_thread)) 2277 pri = UPRI(uq2->uq_thread); 2278 } 2279 } 2280 if (pri > uq->uq_inherited_pri) 2281 pri = uq->uq_inherited_pri; 2282 thread_lock(td); 2283 sched_lend_user_prio(td, pri); 2284 thread_unlock(td); 2285 mtx_unlock(&umtx_lock); 2286 } 2287 2288 out: 2289 umtxq_unbusy_unlocked(&uq->uq_key); 2290 umtx_key_release(&uq->uq_key); 2291 return (error); 2292 } 2293 2294 /* 2295 * Unlock a PP mutex. 2296 */ 2297 static int 2298 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2299 { 2300 struct umtx_key key; 2301 struct umtx_q *uq, *uq2; 2302 struct umtx_pi *pi; 2303 uint32_t id, owner, rceiling; 2304 int error, pri, new_inherited_pri, su; 2305 2306 id = td->td_tid; 2307 uq = td->td_umtxq; 2308 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2309 2310 /* 2311 * Make sure we own this mtx. 2312 */ 2313 error = fueword32(&m->m_owner, &owner); 2314 if (error == -1) 2315 return (EFAULT); 2316 2317 if ((owner & ~UMUTEX_CONTESTED) != id) 2318 return (EPERM); 2319 2320 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2321 if (error != 0) 2322 return (error); 2323 2324 if (rceiling == -1) 2325 new_inherited_pri = PRI_MAX; 2326 else { 2327 rceiling = RTP_PRIO_MAX - rceiling; 2328 if (rceiling > RTP_PRIO_MAX) 2329 return (EINVAL); 2330 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2331 } 2332 2333 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2334 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2335 &key)) != 0) 2336 return (error); 2337 umtxq_lock(&key); 2338 umtxq_busy(&key); 2339 umtxq_unlock(&key); 2340 /* 2341 * For priority protected mutex, always set unlocked state 2342 * to UMUTEX_CONTESTED, so that userland always enters kernel 2343 * to lock the mutex, it is necessary because thread priority 2344 * has to be adjusted for such mutex. 2345 */ 2346 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2347 UMUTEX_CONTESTED); 2348 2349 umtxq_lock(&key); 2350 if (error == 0) 2351 umtxq_signal(&key, 1); 2352 umtxq_unbusy(&key); 2353 umtxq_unlock(&key); 2354 2355 if (error == -1) 2356 error = EFAULT; 2357 else { 2358 mtx_lock(&umtx_lock); 2359 if (su != 0) 2360 uq->uq_inherited_pri = new_inherited_pri; 2361 pri = PRI_MAX; 2362 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2363 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2364 if (uq2 != NULL) { 2365 if (pri > UPRI(uq2->uq_thread)) 2366 pri = UPRI(uq2->uq_thread); 2367 } 2368 } 2369 if (pri > uq->uq_inherited_pri) 2370 pri = uq->uq_inherited_pri; 2371 thread_lock(td); 2372 sched_lend_user_prio(td, pri); 2373 thread_unlock(td); 2374 mtx_unlock(&umtx_lock); 2375 } 2376 umtx_key_release(&key); 2377 return (error); 2378 } 2379 2380 static int 2381 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2382 uint32_t *old_ceiling) 2383 { 2384 struct umtx_q *uq; 2385 uint32_t flags, id, owner, save_ceiling; 2386 int error, rv, rv1; 2387 2388 error = fueword32(&m->m_flags, &flags); 2389 if (error == -1) 2390 return (EFAULT); 2391 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2392 return (EINVAL); 2393 if (ceiling > RTP_PRIO_MAX) 2394 return (EINVAL); 2395 id = td->td_tid; 2396 uq = td->td_umtxq; 2397 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2398 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2399 &uq->uq_key)) != 0) 2400 return (error); 2401 for (;;) { 2402 umtxq_lock(&uq->uq_key); 2403 umtxq_busy(&uq->uq_key); 2404 umtxq_unlock(&uq->uq_key); 2405 2406 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2407 if (rv == -1) { 2408 error = EFAULT; 2409 break; 2410 } 2411 2412 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2413 id | UMUTEX_CONTESTED); 2414 if (rv == -1) { 2415 error = EFAULT; 2416 break; 2417 } 2418 2419 if (rv == 0) { 2420 MPASS(owner == UMUTEX_CONTESTED); 2421 rv = suword32(&m->m_ceilings[0], ceiling); 2422 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2423 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2424 break; 2425 } 2426 2427 if ((owner & ~UMUTEX_CONTESTED) == id) { 2428 rv = suword32(&m->m_ceilings[0], ceiling); 2429 error = rv == 0 ? 0 : EFAULT; 2430 break; 2431 } 2432 2433 if (owner == UMUTEX_RB_OWNERDEAD) { 2434 error = EOWNERDEAD; 2435 break; 2436 } else if (owner == UMUTEX_RB_NOTRECOV) { 2437 error = ENOTRECOVERABLE; 2438 break; 2439 } 2440 2441 /* 2442 * If we caught a signal, we have retried and now 2443 * exit immediately. 2444 */ 2445 if (error != 0) 2446 break; 2447 2448 /* 2449 * We set the contested bit, sleep. Otherwise the lock changed 2450 * and we need to retry or we lost a race to the thread 2451 * unlocking the umtx. 2452 */ 2453 umtxq_lock(&uq->uq_key); 2454 umtxq_insert(uq); 2455 umtxq_unbusy(&uq->uq_key); 2456 error = umtxq_sleep(uq, "umtxpp", NULL); 2457 umtxq_remove(uq); 2458 umtxq_unlock(&uq->uq_key); 2459 } 2460 umtxq_lock(&uq->uq_key); 2461 if (error == 0) 2462 umtxq_signal(&uq->uq_key, INT_MAX); 2463 umtxq_unbusy(&uq->uq_key); 2464 umtxq_unlock(&uq->uq_key); 2465 umtx_key_release(&uq->uq_key); 2466 if (error == 0 && old_ceiling != NULL) { 2467 rv = suword32(old_ceiling, save_ceiling); 2468 error = rv == 0 ? 0 : EFAULT; 2469 } 2470 return (error); 2471 } 2472 2473 /* 2474 * Lock a userland POSIX mutex. 2475 */ 2476 static int 2477 do_lock_umutex(struct thread *td, struct umutex *m, 2478 struct _umtx_time *timeout, int mode) 2479 { 2480 uint32_t flags; 2481 int error; 2482 2483 error = fueword32(&m->m_flags, &flags); 2484 if (error == -1) 2485 return (EFAULT); 2486 2487 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2488 case 0: 2489 error = do_lock_normal(td, m, flags, timeout, mode); 2490 break; 2491 case UMUTEX_PRIO_INHERIT: 2492 error = do_lock_pi(td, m, flags, timeout, mode); 2493 break; 2494 case UMUTEX_PRIO_PROTECT: 2495 error = do_lock_pp(td, m, flags, timeout, mode); 2496 break; 2497 default: 2498 return (EINVAL); 2499 } 2500 if (timeout == NULL) { 2501 if (error == EINTR && mode != _UMUTEX_WAIT) 2502 error = ERESTART; 2503 } else { 2504 /* Timed-locking is not restarted. */ 2505 if (error == ERESTART) 2506 error = EINTR; 2507 } 2508 return (error); 2509 } 2510 2511 /* 2512 * Unlock a userland POSIX mutex. 2513 */ 2514 static int 2515 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2516 { 2517 uint32_t flags; 2518 int error; 2519 2520 error = fueword32(&m->m_flags, &flags); 2521 if (error == -1) 2522 return (EFAULT); 2523 2524 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2525 case 0: 2526 return (do_unlock_normal(td, m, flags, rb)); 2527 case UMUTEX_PRIO_INHERIT: 2528 return (do_unlock_pi(td, m, flags, rb)); 2529 case UMUTEX_PRIO_PROTECT: 2530 return (do_unlock_pp(td, m, flags, rb)); 2531 } 2532 2533 return (EINVAL); 2534 } 2535 2536 static int 2537 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2538 struct timespec *timeout, u_long wflags) 2539 { 2540 struct abs_timeout timo; 2541 struct umtx_q *uq; 2542 uint32_t flags, clockid, hasw; 2543 int error; 2544 2545 uq = td->td_umtxq; 2546 error = fueword32(&cv->c_flags, &flags); 2547 if (error == -1) 2548 return (EFAULT); 2549 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2550 if (error != 0) 2551 return (error); 2552 2553 if ((wflags & CVWAIT_CLOCKID) != 0) { 2554 error = fueword32(&cv->c_clockid, &clockid); 2555 if (error == -1) { 2556 umtx_key_release(&uq->uq_key); 2557 return (EFAULT); 2558 } 2559 if (clockid < CLOCK_REALTIME || 2560 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2561 /* hmm, only HW clock id will work. */ 2562 umtx_key_release(&uq->uq_key); 2563 return (EINVAL); 2564 } 2565 } else { 2566 clockid = CLOCK_REALTIME; 2567 } 2568 2569 umtxq_lock(&uq->uq_key); 2570 umtxq_busy(&uq->uq_key); 2571 umtxq_insert(uq); 2572 umtxq_unlock(&uq->uq_key); 2573 2574 /* 2575 * Set c_has_waiters to 1 before releasing user mutex, also 2576 * don't modify cache line when unnecessary. 2577 */ 2578 error = fueword32(&cv->c_has_waiters, &hasw); 2579 if (error == 0 && hasw == 0) 2580 suword32(&cv->c_has_waiters, 1); 2581 2582 umtxq_unbusy_unlocked(&uq->uq_key); 2583 2584 error = do_unlock_umutex(td, m, false); 2585 2586 if (timeout != NULL) 2587 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2588 timeout); 2589 2590 umtxq_lock(&uq->uq_key); 2591 if (error == 0) { 2592 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2593 NULL : &timo); 2594 } 2595 2596 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2597 error = 0; 2598 else { 2599 /* 2600 * This must be timeout,interrupted by signal or 2601 * surprious wakeup, clear c_has_waiter flag when 2602 * necessary. 2603 */ 2604 umtxq_busy(&uq->uq_key); 2605 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2606 int oldlen = uq->uq_cur_queue->length; 2607 umtxq_remove(uq); 2608 if (oldlen == 1) { 2609 umtxq_unlock(&uq->uq_key); 2610 suword32(&cv->c_has_waiters, 0); 2611 umtxq_lock(&uq->uq_key); 2612 } 2613 } 2614 umtxq_unbusy(&uq->uq_key); 2615 if (error == ERESTART) 2616 error = EINTR; 2617 } 2618 2619 umtxq_unlock(&uq->uq_key); 2620 umtx_key_release(&uq->uq_key); 2621 return (error); 2622 } 2623 2624 /* 2625 * Signal a userland condition variable. 2626 */ 2627 static int 2628 do_cv_signal(struct thread *td, struct ucond *cv) 2629 { 2630 struct umtx_key key; 2631 int error, cnt, nwake; 2632 uint32_t flags; 2633 2634 error = fueword32(&cv->c_flags, &flags); 2635 if (error == -1) 2636 return (EFAULT); 2637 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2638 return (error); 2639 umtxq_lock(&key); 2640 umtxq_busy(&key); 2641 cnt = umtxq_count(&key); 2642 nwake = umtxq_signal(&key, 1); 2643 if (cnt <= nwake) { 2644 umtxq_unlock(&key); 2645 error = suword32(&cv->c_has_waiters, 0); 2646 if (error == -1) 2647 error = EFAULT; 2648 umtxq_lock(&key); 2649 } 2650 umtxq_unbusy(&key); 2651 umtxq_unlock(&key); 2652 umtx_key_release(&key); 2653 return (error); 2654 } 2655 2656 static int 2657 do_cv_broadcast(struct thread *td, struct ucond *cv) 2658 { 2659 struct umtx_key key; 2660 int error; 2661 uint32_t flags; 2662 2663 error = fueword32(&cv->c_flags, &flags); 2664 if (error == -1) 2665 return (EFAULT); 2666 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2667 return (error); 2668 2669 umtxq_lock(&key); 2670 umtxq_busy(&key); 2671 umtxq_signal(&key, INT_MAX); 2672 umtxq_unlock(&key); 2673 2674 error = suword32(&cv->c_has_waiters, 0); 2675 if (error == -1) 2676 error = EFAULT; 2677 2678 umtxq_unbusy_unlocked(&key); 2679 2680 umtx_key_release(&key); 2681 return (error); 2682 } 2683 2684 static int 2685 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2686 struct _umtx_time *timeout) 2687 { 2688 struct abs_timeout timo; 2689 struct umtx_q *uq; 2690 uint32_t flags, wrflags; 2691 int32_t state, oldstate; 2692 int32_t blocked_readers; 2693 int error, error1, rv; 2694 2695 uq = td->td_umtxq; 2696 error = fueword32(&rwlock->rw_flags, &flags); 2697 if (error == -1) 2698 return (EFAULT); 2699 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2700 if (error != 0) 2701 return (error); 2702 2703 if (timeout != NULL) 2704 abs_timeout_init2(&timo, timeout); 2705 2706 wrflags = URWLOCK_WRITE_OWNER; 2707 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2708 wrflags |= URWLOCK_WRITE_WAITERS; 2709 2710 for (;;) { 2711 rv = fueword32(&rwlock->rw_state, &state); 2712 if (rv == -1) { 2713 umtx_key_release(&uq->uq_key); 2714 return (EFAULT); 2715 } 2716 2717 /* try to lock it */ 2718 while (!(state & wrflags)) { 2719 if (__predict_false(URWLOCK_READER_COUNT(state) == 2720 URWLOCK_MAX_READERS)) { 2721 umtx_key_release(&uq->uq_key); 2722 return (EAGAIN); 2723 } 2724 rv = casueword32(&rwlock->rw_state, state, 2725 &oldstate, state + 1); 2726 if (rv == -1) { 2727 umtx_key_release(&uq->uq_key); 2728 return (EFAULT); 2729 } 2730 if (rv == 0) { 2731 MPASS(oldstate == state); 2732 umtx_key_release(&uq->uq_key); 2733 return (0); 2734 } 2735 error = thread_check_susp(td, true); 2736 if (error != 0) 2737 break; 2738 state = oldstate; 2739 } 2740 2741 if (error) 2742 break; 2743 2744 /* grab monitor lock */ 2745 umtxq_lock(&uq->uq_key); 2746 umtxq_busy(&uq->uq_key); 2747 umtxq_unlock(&uq->uq_key); 2748 2749 /* 2750 * re-read the state, in case it changed between the try-lock above 2751 * and the check below 2752 */ 2753 rv = fueword32(&rwlock->rw_state, &state); 2754 if (rv == -1) 2755 error = EFAULT; 2756 2757 /* set read contention bit */ 2758 while (error == 0 && (state & wrflags) && 2759 !(state & URWLOCK_READ_WAITERS)) { 2760 rv = casueword32(&rwlock->rw_state, state, 2761 &oldstate, state | URWLOCK_READ_WAITERS); 2762 if (rv == -1) { 2763 error = EFAULT; 2764 break; 2765 } 2766 if (rv == 0) { 2767 MPASS(oldstate == state); 2768 goto sleep; 2769 } 2770 state = oldstate; 2771 error = thread_check_susp(td, false); 2772 if (error != 0) 2773 break; 2774 } 2775 if (error != 0) { 2776 umtxq_unbusy_unlocked(&uq->uq_key); 2777 break; 2778 } 2779 2780 /* state is changed while setting flags, restart */ 2781 if (!(state & wrflags)) { 2782 umtxq_unbusy_unlocked(&uq->uq_key); 2783 error = thread_check_susp(td, true); 2784 if (error != 0) 2785 break; 2786 continue; 2787 } 2788 2789 sleep: 2790 /* 2791 * Contention bit is set, before sleeping, increase 2792 * read waiter count. 2793 */ 2794 rv = fueword32(&rwlock->rw_blocked_readers, 2795 &blocked_readers); 2796 if (rv == -1) { 2797 umtxq_unbusy_unlocked(&uq->uq_key); 2798 error = EFAULT; 2799 break; 2800 } 2801 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2802 2803 while (state & wrflags) { 2804 umtxq_lock(&uq->uq_key); 2805 umtxq_insert(uq); 2806 umtxq_unbusy(&uq->uq_key); 2807 2808 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2809 NULL : &timo); 2810 2811 umtxq_busy(&uq->uq_key); 2812 umtxq_remove(uq); 2813 umtxq_unlock(&uq->uq_key); 2814 if (error) 2815 break; 2816 rv = fueword32(&rwlock->rw_state, &state); 2817 if (rv == -1) { 2818 error = EFAULT; 2819 break; 2820 } 2821 } 2822 2823 /* decrease read waiter count, and may clear read contention bit */ 2824 rv = fueword32(&rwlock->rw_blocked_readers, 2825 &blocked_readers); 2826 if (rv == -1) { 2827 umtxq_unbusy_unlocked(&uq->uq_key); 2828 error = EFAULT; 2829 break; 2830 } 2831 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2832 if (blocked_readers == 1) { 2833 rv = fueword32(&rwlock->rw_state, &state); 2834 if (rv == -1) { 2835 umtxq_unbusy_unlocked(&uq->uq_key); 2836 error = EFAULT; 2837 break; 2838 } 2839 for (;;) { 2840 rv = casueword32(&rwlock->rw_state, state, 2841 &oldstate, state & ~URWLOCK_READ_WAITERS); 2842 if (rv == -1) { 2843 error = EFAULT; 2844 break; 2845 } 2846 if (rv == 0) { 2847 MPASS(oldstate == state); 2848 break; 2849 } 2850 state = oldstate; 2851 error1 = thread_check_susp(td, false); 2852 if (error1 != 0) { 2853 if (error == 0) 2854 error = error1; 2855 break; 2856 } 2857 } 2858 } 2859 2860 umtxq_unbusy_unlocked(&uq->uq_key); 2861 if (error != 0) 2862 break; 2863 } 2864 umtx_key_release(&uq->uq_key); 2865 if (error == ERESTART) 2866 error = EINTR; 2867 return (error); 2868 } 2869 2870 static int 2871 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2872 { 2873 struct abs_timeout timo; 2874 struct umtx_q *uq; 2875 uint32_t flags; 2876 int32_t state, oldstate; 2877 int32_t blocked_writers; 2878 int32_t blocked_readers; 2879 int error, error1, rv; 2880 2881 uq = td->td_umtxq; 2882 error = fueword32(&rwlock->rw_flags, &flags); 2883 if (error == -1) 2884 return (EFAULT); 2885 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2886 if (error != 0) 2887 return (error); 2888 2889 if (timeout != NULL) 2890 abs_timeout_init2(&timo, timeout); 2891 2892 blocked_readers = 0; 2893 for (;;) { 2894 rv = fueword32(&rwlock->rw_state, &state); 2895 if (rv == -1) { 2896 umtx_key_release(&uq->uq_key); 2897 return (EFAULT); 2898 } 2899 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2900 URWLOCK_READER_COUNT(state) == 0) { 2901 rv = casueword32(&rwlock->rw_state, state, 2902 &oldstate, state | URWLOCK_WRITE_OWNER); 2903 if (rv == -1) { 2904 umtx_key_release(&uq->uq_key); 2905 return (EFAULT); 2906 } 2907 if (rv == 0) { 2908 MPASS(oldstate == state); 2909 umtx_key_release(&uq->uq_key); 2910 return (0); 2911 } 2912 state = oldstate; 2913 error = thread_check_susp(td, true); 2914 if (error != 0) 2915 break; 2916 } 2917 2918 if (error) { 2919 if ((state & (URWLOCK_WRITE_OWNER | 2920 URWLOCK_WRITE_WAITERS)) == 0 && 2921 blocked_readers != 0) { 2922 umtxq_lock(&uq->uq_key); 2923 umtxq_busy(&uq->uq_key); 2924 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2925 UMTX_SHARED_QUEUE); 2926 umtxq_unbusy(&uq->uq_key); 2927 umtxq_unlock(&uq->uq_key); 2928 } 2929 2930 break; 2931 } 2932 2933 /* grab monitor lock */ 2934 umtxq_lock(&uq->uq_key); 2935 umtxq_busy(&uq->uq_key); 2936 umtxq_unlock(&uq->uq_key); 2937 2938 /* 2939 * Re-read the state, in case it changed between the 2940 * try-lock above and the check below. 2941 */ 2942 rv = fueword32(&rwlock->rw_state, &state); 2943 if (rv == -1) 2944 error = EFAULT; 2945 2946 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2947 URWLOCK_READER_COUNT(state) != 0) && 2948 (state & URWLOCK_WRITE_WAITERS) == 0) { 2949 rv = casueword32(&rwlock->rw_state, state, 2950 &oldstate, state | URWLOCK_WRITE_WAITERS); 2951 if (rv == -1) { 2952 error = EFAULT; 2953 break; 2954 } 2955 if (rv == 0) { 2956 MPASS(oldstate == state); 2957 goto sleep; 2958 } 2959 state = oldstate; 2960 error = thread_check_susp(td, false); 2961 if (error != 0) 2962 break; 2963 } 2964 if (error != 0) { 2965 umtxq_unbusy_unlocked(&uq->uq_key); 2966 break; 2967 } 2968 2969 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2970 URWLOCK_READER_COUNT(state) == 0) { 2971 umtxq_unbusy_unlocked(&uq->uq_key); 2972 error = thread_check_susp(td, false); 2973 if (error != 0) 2974 break; 2975 continue; 2976 } 2977 sleep: 2978 rv = fueword32(&rwlock->rw_blocked_writers, 2979 &blocked_writers); 2980 if (rv == -1) { 2981 umtxq_unbusy_unlocked(&uq->uq_key); 2982 error = EFAULT; 2983 break; 2984 } 2985 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2986 2987 while ((state & URWLOCK_WRITE_OWNER) || 2988 URWLOCK_READER_COUNT(state) != 0) { 2989 umtxq_lock(&uq->uq_key); 2990 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2991 umtxq_unbusy(&uq->uq_key); 2992 2993 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2994 NULL : &timo); 2995 2996 umtxq_busy(&uq->uq_key); 2997 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2998 umtxq_unlock(&uq->uq_key); 2999 if (error) 3000 break; 3001 rv = fueword32(&rwlock->rw_state, &state); 3002 if (rv == -1) { 3003 error = EFAULT; 3004 break; 3005 } 3006 } 3007 3008 rv = fueword32(&rwlock->rw_blocked_writers, 3009 &blocked_writers); 3010 if (rv == -1) { 3011 umtxq_unbusy_unlocked(&uq->uq_key); 3012 error = EFAULT; 3013 break; 3014 } 3015 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3016 if (blocked_writers == 1) { 3017 rv = fueword32(&rwlock->rw_state, &state); 3018 if (rv == -1) { 3019 umtxq_unbusy_unlocked(&uq->uq_key); 3020 error = EFAULT; 3021 break; 3022 } 3023 for (;;) { 3024 rv = casueword32(&rwlock->rw_state, state, 3025 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3026 if (rv == -1) { 3027 error = EFAULT; 3028 break; 3029 } 3030 if (rv == 0) { 3031 MPASS(oldstate == state); 3032 break; 3033 } 3034 state = oldstate; 3035 error1 = thread_check_susp(td, false); 3036 /* 3037 * We are leaving the URWLOCK_WRITE_WAITERS 3038 * behind, but this should not harm the 3039 * correctness. 3040 */ 3041 if (error1 != 0) { 3042 if (error == 0) 3043 error = error1; 3044 break; 3045 } 3046 } 3047 rv = fueword32(&rwlock->rw_blocked_readers, 3048 &blocked_readers); 3049 if (rv == -1) { 3050 umtxq_unbusy_unlocked(&uq->uq_key); 3051 error = EFAULT; 3052 break; 3053 } 3054 } else 3055 blocked_readers = 0; 3056 3057 umtxq_unbusy_unlocked(&uq->uq_key); 3058 } 3059 3060 umtx_key_release(&uq->uq_key); 3061 if (error == ERESTART) 3062 error = EINTR; 3063 return (error); 3064 } 3065 3066 static int 3067 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3068 { 3069 struct umtx_q *uq; 3070 uint32_t flags; 3071 int32_t state, oldstate; 3072 int error, rv, q, count; 3073 3074 uq = td->td_umtxq; 3075 error = fueword32(&rwlock->rw_flags, &flags); 3076 if (error == -1) 3077 return (EFAULT); 3078 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3079 if (error != 0) 3080 return (error); 3081 3082 error = fueword32(&rwlock->rw_state, &state); 3083 if (error == -1) { 3084 error = EFAULT; 3085 goto out; 3086 } 3087 if (state & URWLOCK_WRITE_OWNER) { 3088 for (;;) { 3089 rv = casueword32(&rwlock->rw_state, state, 3090 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3091 if (rv == -1) { 3092 error = EFAULT; 3093 goto out; 3094 } 3095 if (rv == 1) { 3096 state = oldstate; 3097 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3098 error = EPERM; 3099 goto out; 3100 } 3101 error = thread_check_susp(td, true); 3102 if (error != 0) 3103 goto out; 3104 } else 3105 break; 3106 } 3107 } else if (URWLOCK_READER_COUNT(state) != 0) { 3108 for (;;) { 3109 rv = casueword32(&rwlock->rw_state, state, 3110 &oldstate, state - 1); 3111 if (rv == -1) { 3112 error = EFAULT; 3113 goto out; 3114 } 3115 if (rv == 1) { 3116 state = oldstate; 3117 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3118 error = EPERM; 3119 goto out; 3120 } 3121 error = thread_check_susp(td, true); 3122 if (error != 0) 3123 goto out; 3124 } else 3125 break; 3126 } 3127 } else { 3128 error = EPERM; 3129 goto out; 3130 } 3131 3132 count = 0; 3133 3134 if (!(flags & URWLOCK_PREFER_READER)) { 3135 if (state & URWLOCK_WRITE_WAITERS) { 3136 count = 1; 3137 q = UMTX_EXCLUSIVE_QUEUE; 3138 } else if (state & URWLOCK_READ_WAITERS) { 3139 count = INT_MAX; 3140 q = UMTX_SHARED_QUEUE; 3141 } 3142 } else { 3143 if (state & URWLOCK_READ_WAITERS) { 3144 count = INT_MAX; 3145 q = UMTX_SHARED_QUEUE; 3146 } else if (state & URWLOCK_WRITE_WAITERS) { 3147 count = 1; 3148 q = UMTX_EXCLUSIVE_QUEUE; 3149 } 3150 } 3151 3152 if (count) { 3153 umtxq_lock(&uq->uq_key); 3154 umtxq_busy(&uq->uq_key); 3155 umtxq_signal_queue(&uq->uq_key, count, q); 3156 umtxq_unbusy(&uq->uq_key); 3157 umtxq_unlock(&uq->uq_key); 3158 } 3159 out: 3160 umtx_key_release(&uq->uq_key); 3161 return (error); 3162 } 3163 3164 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3165 static int 3166 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3167 { 3168 struct abs_timeout timo; 3169 struct umtx_q *uq; 3170 uint32_t flags, count, count1; 3171 int error, rv, rv1; 3172 3173 uq = td->td_umtxq; 3174 error = fueword32(&sem->_flags, &flags); 3175 if (error == -1) 3176 return (EFAULT); 3177 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3178 if (error != 0) 3179 return (error); 3180 3181 if (timeout != NULL) 3182 abs_timeout_init2(&timo, timeout); 3183 3184 again: 3185 umtxq_lock(&uq->uq_key); 3186 umtxq_busy(&uq->uq_key); 3187 umtxq_insert(uq); 3188 umtxq_unlock(&uq->uq_key); 3189 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3190 if (rv == 0) 3191 rv1 = fueword32(&sem->_count, &count); 3192 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3193 (rv == 1 && count1 == 0)) { 3194 umtxq_lock(&uq->uq_key); 3195 umtxq_unbusy(&uq->uq_key); 3196 umtxq_remove(uq); 3197 umtxq_unlock(&uq->uq_key); 3198 if (rv == 1) { 3199 rv = thread_check_susp(td, true); 3200 if (rv == 0) 3201 goto again; 3202 error = rv; 3203 goto out; 3204 } 3205 if (rv == 0) 3206 rv = rv1; 3207 error = rv == -1 ? EFAULT : 0; 3208 goto out; 3209 } 3210 umtxq_lock(&uq->uq_key); 3211 umtxq_unbusy(&uq->uq_key); 3212 3213 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3214 3215 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3216 error = 0; 3217 else { 3218 umtxq_remove(uq); 3219 /* A relative timeout cannot be restarted. */ 3220 if (error == ERESTART && timeout != NULL && 3221 (timeout->_flags & UMTX_ABSTIME) == 0) 3222 error = EINTR; 3223 } 3224 umtxq_unlock(&uq->uq_key); 3225 out: 3226 umtx_key_release(&uq->uq_key); 3227 return (error); 3228 } 3229 3230 /* 3231 * Signal a userland semaphore. 3232 */ 3233 static int 3234 do_sem_wake(struct thread *td, struct _usem *sem) 3235 { 3236 struct umtx_key key; 3237 int error, cnt; 3238 uint32_t flags; 3239 3240 error = fueword32(&sem->_flags, &flags); 3241 if (error == -1) 3242 return (EFAULT); 3243 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3244 return (error); 3245 umtxq_lock(&key); 3246 umtxq_busy(&key); 3247 cnt = umtxq_count(&key); 3248 if (cnt > 0) { 3249 /* 3250 * Check if count is greater than 0, this means the memory is 3251 * still being referenced by user code, so we can safely 3252 * update _has_waiters flag. 3253 */ 3254 if (cnt == 1) { 3255 umtxq_unlock(&key); 3256 error = suword32(&sem->_has_waiters, 0); 3257 umtxq_lock(&key); 3258 if (error == -1) 3259 error = EFAULT; 3260 } 3261 umtxq_signal(&key, 1); 3262 } 3263 umtxq_unbusy(&key); 3264 umtxq_unlock(&key); 3265 umtx_key_release(&key); 3266 return (error); 3267 } 3268 #endif 3269 3270 static int 3271 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3272 { 3273 struct abs_timeout timo; 3274 struct umtx_q *uq; 3275 uint32_t count, flags; 3276 int error, rv; 3277 3278 uq = td->td_umtxq; 3279 flags = fuword32(&sem->_flags); 3280 if (timeout != NULL) 3281 abs_timeout_init2(&timo, timeout); 3282 3283 again: 3284 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3285 if (error != 0) 3286 return (error); 3287 umtxq_lock(&uq->uq_key); 3288 umtxq_busy(&uq->uq_key); 3289 umtxq_insert(uq); 3290 umtxq_unlock(&uq->uq_key); 3291 rv = fueword32(&sem->_count, &count); 3292 if (rv == -1) { 3293 umtxq_lock(&uq->uq_key); 3294 umtxq_unbusy(&uq->uq_key); 3295 umtxq_remove(uq); 3296 umtxq_unlock(&uq->uq_key); 3297 umtx_key_release(&uq->uq_key); 3298 return (EFAULT); 3299 } 3300 for (;;) { 3301 if (USEM_COUNT(count) != 0) { 3302 umtxq_lock(&uq->uq_key); 3303 umtxq_unbusy(&uq->uq_key); 3304 umtxq_remove(uq); 3305 umtxq_unlock(&uq->uq_key); 3306 umtx_key_release(&uq->uq_key); 3307 return (0); 3308 } 3309 if (count == USEM_HAS_WAITERS) 3310 break; 3311 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3312 if (rv == 0) 3313 break; 3314 umtxq_lock(&uq->uq_key); 3315 umtxq_unbusy(&uq->uq_key); 3316 umtxq_remove(uq); 3317 umtxq_unlock(&uq->uq_key); 3318 umtx_key_release(&uq->uq_key); 3319 if (rv == -1) 3320 return (EFAULT); 3321 rv = thread_check_susp(td, true); 3322 if (rv != 0) 3323 return (rv); 3324 goto again; 3325 } 3326 umtxq_lock(&uq->uq_key); 3327 umtxq_unbusy(&uq->uq_key); 3328 3329 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3330 3331 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3332 error = 0; 3333 else { 3334 umtxq_remove(uq); 3335 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3336 /* A relative timeout cannot be restarted. */ 3337 if (error == ERESTART) 3338 error = EINTR; 3339 if (error == EINTR) { 3340 abs_timeout_update(&timo); 3341 timespecsub(&timo.end, &timo.cur, 3342 &timeout->_timeout); 3343 } 3344 } 3345 } 3346 umtxq_unlock(&uq->uq_key); 3347 umtx_key_release(&uq->uq_key); 3348 return (error); 3349 } 3350 3351 /* 3352 * Signal a userland semaphore. 3353 */ 3354 static int 3355 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3356 { 3357 struct umtx_key key; 3358 int error, cnt, rv; 3359 uint32_t count, flags; 3360 3361 rv = fueword32(&sem->_flags, &flags); 3362 if (rv == -1) 3363 return (EFAULT); 3364 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3365 return (error); 3366 umtxq_lock(&key); 3367 umtxq_busy(&key); 3368 cnt = umtxq_count(&key); 3369 if (cnt > 0) { 3370 /* 3371 * If this was the last sleeping thread, clear the waiters 3372 * flag in _count. 3373 */ 3374 if (cnt == 1) { 3375 umtxq_unlock(&key); 3376 rv = fueword32(&sem->_count, &count); 3377 while (rv != -1 && count & USEM_HAS_WAITERS) { 3378 rv = casueword32(&sem->_count, count, &count, 3379 count & ~USEM_HAS_WAITERS); 3380 if (rv == 1) { 3381 rv = thread_check_susp(td, true); 3382 if (rv != 0) 3383 break; 3384 } 3385 } 3386 if (rv == -1) 3387 error = EFAULT; 3388 else if (rv > 0) { 3389 error = rv; 3390 } 3391 umtxq_lock(&key); 3392 } 3393 3394 umtxq_signal(&key, 1); 3395 } 3396 umtxq_unbusy(&key); 3397 umtxq_unlock(&key); 3398 umtx_key_release(&key); 3399 return (error); 3400 } 3401 3402 inline int 3403 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3404 { 3405 int error; 3406 3407 error = copyin(addr, tsp, sizeof(struct timespec)); 3408 if (error == 0) { 3409 if (tsp->tv_sec < 0 || 3410 tsp->tv_nsec >= 1000000000 || 3411 tsp->tv_nsec < 0) 3412 error = EINVAL; 3413 } 3414 return (error); 3415 } 3416 3417 static inline int 3418 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3419 { 3420 int error; 3421 3422 if (size <= sizeof(struct timespec)) { 3423 tp->_clockid = CLOCK_REALTIME; 3424 tp->_flags = 0; 3425 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3426 } else 3427 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3428 if (error != 0) 3429 return (error); 3430 if (tp->_timeout.tv_sec < 0 || 3431 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3432 return (EINVAL); 3433 return (0); 3434 } 3435 3436 static int 3437 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3438 { 3439 3440 return (EOPNOTSUPP); 3441 } 3442 3443 static int 3444 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3445 { 3446 struct _umtx_time timeout, *tm_p; 3447 int error; 3448 3449 if (uap->uaddr2 == NULL) 3450 tm_p = NULL; 3451 else { 3452 error = umtx_copyin_umtx_time( 3453 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3454 if (error != 0) 3455 return (error); 3456 tm_p = &timeout; 3457 } 3458 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3459 } 3460 3461 static int 3462 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3463 { 3464 struct _umtx_time timeout, *tm_p; 3465 int error; 3466 3467 if (uap->uaddr2 == NULL) 3468 tm_p = NULL; 3469 else { 3470 error = umtx_copyin_umtx_time( 3471 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3472 if (error != 0) 3473 return (error); 3474 tm_p = &timeout; 3475 } 3476 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3477 } 3478 3479 static int 3480 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3481 { 3482 struct _umtx_time *tm_p, timeout; 3483 int error; 3484 3485 if (uap->uaddr2 == NULL) 3486 tm_p = NULL; 3487 else { 3488 error = umtx_copyin_umtx_time( 3489 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3490 if (error != 0) 3491 return (error); 3492 tm_p = &timeout; 3493 } 3494 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3495 } 3496 3497 static int 3498 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3499 { 3500 3501 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3502 } 3503 3504 #define BATCH_SIZE 128 3505 static int 3506 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3507 { 3508 char *uaddrs[BATCH_SIZE], **upp; 3509 int count, error, i, pos, tocopy; 3510 3511 upp = (char **)uap->obj; 3512 error = 0; 3513 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3514 pos += tocopy) { 3515 tocopy = MIN(count, BATCH_SIZE); 3516 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3517 if (error != 0) 3518 break; 3519 for (i = 0; i < tocopy; ++i) 3520 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3521 maybe_yield(); 3522 } 3523 return (error); 3524 } 3525 3526 static int 3527 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3528 { 3529 3530 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3531 } 3532 3533 static int 3534 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3535 { 3536 struct _umtx_time *tm_p, timeout; 3537 int error; 3538 3539 /* Allow a null timespec (wait forever). */ 3540 if (uap->uaddr2 == NULL) 3541 tm_p = NULL; 3542 else { 3543 error = umtx_copyin_umtx_time( 3544 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3545 if (error != 0) 3546 return (error); 3547 tm_p = &timeout; 3548 } 3549 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3550 } 3551 3552 static int 3553 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3554 { 3555 3556 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3557 } 3558 3559 static int 3560 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3561 { 3562 struct _umtx_time *tm_p, timeout; 3563 int error; 3564 3565 /* Allow a null timespec (wait forever). */ 3566 if (uap->uaddr2 == NULL) 3567 tm_p = NULL; 3568 else { 3569 error = umtx_copyin_umtx_time( 3570 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3571 if (error != 0) 3572 return (error); 3573 tm_p = &timeout; 3574 } 3575 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3576 } 3577 3578 static int 3579 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3580 { 3581 3582 return (do_wake_umutex(td, uap->obj)); 3583 } 3584 3585 static int 3586 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3587 { 3588 3589 return (do_unlock_umutex(td, uap->obj, false)); 3590 } 3591 3592 static int 3593 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3594 { 3595 3596 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3597 } 3598 3599 static int 3600 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3601 { 3602 struct timespec *ts, timeout; 3603 int error; 3604 3605 /* Allow a null timespec (wait forever). */ 3606 if (uap->uaddr2 == NULL) 3607 ts = NULL; 3608 else { 3609 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3610 if (error != 0) 3611 return (error); 3612 ts = &timeout; 3613 } 3614 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3615 } 3616 3617 static int 3618 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3619 { 3620 3621 return (do_cv_signal(td, uap->obj)); 3622 } 3623 3624 static int 3625 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3626 { 3627 3628 return (do_cv_broadcast(td, uap->obj)); 3629 } 3630 3631 static int 3632 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3633 { 3634 struct _umtx_time timeout; 3635 int error; 3636 3637 /* Allow a null timespec (wait forever). */ 3638 if (uap->uaddr2 == NULL) { 3639 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3640 } else { 3641 error = umtx_copyin_umtx_time(uap->uaddr2, 3642 (size_t)uap->uaddr1, &timeout); 3643 if (error != 0) 3644 return (error); 3645 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3646 } 3647 return (error); 3648 } 3649 3650 static int 3651 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3652 { 3653 struct _umtx_time timeout; 3654 int error; 3655 3656 /* Allow a null timespec (wait forever). */ 3657 if (uap->uaddr2 == NULL) { 3658 error = do_rw_wrlock(td, uap->obj, 0); 3659 } else { 3660 error = umtx_copyin_umtx_time(uap->uaddr2, 3661 (size_t)uap->uaddr1, &timeout); 3662 if (error != 0) 3663 return (error); 3664 3665 error = do_rw_wrlock(td, uap->obj, &timeout); 3666 } 3667 return (error); 3668 } 3669 3670 static int 3671 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3672 { 3673 3674 return (do_rw_unlock(td, uap->obj)); 3675 } 3676 3677 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3678 static int 3679 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3680 { 3681 struct _umtx_time *tm_p, timeout; 3682 int error; 3683 3684 /* Allow a null timespec (wait forever). */ 3685 if (uap->uaddr2 == NULL) 3686 tm_p = NULL; 3687 else { 3688 error = umtx_copyin_umtx_time( 3689 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3690 if (error != 0) 3691 return (error); 3692 tm_p = &timeout; 3693 } 3694 return (do_sem_wait(td, uap->obj, tm_p)); 3695 } 3696 3697 static int 3698 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3699 { 3700 3701 return (do_sem_wake(td, uap->obj)); 3702 } 3703 #endif 3704 3705 static int 3706 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3707 { 3708 3709 return (do_wake2_umutex(td, uap->obj, uap->val)); 3710 } 3711 3712 static int 3713 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3714 { 3715 struct _umtx_time *tm_p, timeout; 3716 size_t uasize; 3717 int error; 3718 3719 /* Allow a null timespec (wait forever). */ 3720 if (uap->uaddr2 == NULL) { 3721 uasize = 0; 3722 tm_p = NULL; 3723 } else { 3724 uasize = (size_t)uap->uaddr1; 3725 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3726 if (error != 0) 3727 return (error); 3728 tm_p = &timeout; 3729 } 3730 error = do_sem2_wait(td, uap->obj, tm_p); 3731 if (error == EINTR && uap->uaddr2 != NULL && 3732 (timeout._flags & UMTX_ABSTIME) == 0 && 3733 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3734 error = copyout(&timeout._timeout, 3735 (struct _umtx_time *)uap->uaddr2 + 1, 3736 sizeof(struct timespec)); 3737 if (error == 0) { 3738 error = EINTR; 3739 } 3740 } 3741 3742 return (error); 3743 } 3744 3745 static int 3746 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3747 { 3748 3749 return (do_sem2_wake(td, uap->obj)); 3750 } 3751 3752 #define USHM_OBJ_UMTX(o) \ 3753 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3754 3755 #define USHMF_REG_LINKED 0x0001 3756 #define USHMF_OBJ_LINKED 0x0002 3757 struct umtx_shm_reg { 3758 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3759 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3760 struct umtx_key ushm_key; 3761 struct ucred *ushm_cred; 3762 struct shmfd *ushm_obj; 3763 u_int ushm_refcnt; 3764 u_int ushm_flags; 3765 }; 3766 3767 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3768 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3769 3770 static uma_zone_t umtx_shm_reg_zone; 3771 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3772 static struct mtx umtx_shm_lock; 3773 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3774 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3775 3776 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3777 3778 static void 3779 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3780 { 3781 struct umtx_shm_reg_head d; 3782 struct umtx_shm_reg *reg, *reg1; 3783 3784 TAILQ_INIT(&d); 3785 mtx_lock(&umtx_shm_lock); 3786 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3787 mtx_unlock(&umtx_shm_lock); 3788 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3789 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3790 umtx_shm_free_reg(reg); 3791 } 3792 } 3793 3794 static struct task umtx_shm_reg_delfree_task = 3795 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3796 3797 static struct umtx_shm_reg * 3798 umtx_shm_find_reg_locked(const struct umtx_key *key) 3799 { 3800 struct umtx_shm_reg *reg; 3801 struct umtx_shm_reg_head *reg_head; 3802 3803 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3804 mtx_assert(&umtx_shm_lock, MA_OWNED); 3805 reg_head = &umtx_shm_registry[key->hash]; 3806 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3807 KASSERT(reg->ushm_key.shared, 3808 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3809 if (reg->ushm_key.info.shared.object == 3810 key->info.shared.object && 3811 reg->ushm_key.info.shared.offset == 3812 key->info.shared.offset) { 3813 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3814 KASSERT(reg->ushm_refcnt > 0, 3815 ("reg %p refcnt 0 onlist", reg)); 3816 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3817 ("reg %p not linked", reg)); 3818 reg->ushm_refcnt++; 3819 return (reg); 3820 } 3821 } 3822 return (NULL); 3823 } 3824 3825 static struct umtx_shm_reg * 3826 umtx_shm_find_reg(const struct umtx_key *key) 3827 { 3828 struct umtx_shm_reg *reg; 3829 3830 mtx_lock(&umtx_shm_lock); 3831 reg = umtx_shm_find_reg_locked(key); 3832 mtx_unlock(&umtx_shm_lock); 3833 return (reg); 3834 } 3835 3836 static void 3837 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3838 { 3839 3840 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3841 crfree(reg->ushm_cred); 3842 shm_drop(reg->ushm_obj); 3843 uma_zfree(umtx_shm_reg_zone, reg); 3844 } 3845 3846 static bool 3847 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3848 { 3849 bool res; 3850 3851 mtx_assert(&umtx_shm_lock, MA_OWNED); 3852 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3853 reg->ushm_refcnt--; 3854 res = reg->ushm_refcnt == 0; 3855 if (res || force) { 3856 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3857 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3858 reg, ushm_reg_link); 3859 reg->ushm_flags &= ~USHMF_REG_LINKED; 3860 } 3861 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3862 LIST_REMOVE(reg, ushm_obj_link); 3863 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3864 } 3865 } 3866 return (res); 3867 } 3868 3869 static void 3870 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3871 { 3872 vm_object_t object; 3873 bool dofree; 3874 3875 if (force) { 3876 object = reg->ushm_obj->shm_object; 3877 VM_OBJECT_WLOCK(object); 3878 object->flags |= OBJ_UMTXDEAD; 3879 VM_OBJECT_WUNLOCK(object); 3880 } 3881 mtx_lock(&umtx_shm_lock); 3882 dofree = umtx_shm_unref_reg_locked(reg, force); 3883 mtx_unlock(&umtx_shm_lock); 3884 if (dofree) 3885 umtx_shm_free_reg(reg); 3886 } 3887 3888 void 3889 umtx_shm_object_init(vm_object_t object) 3890 { 3891 3892 LIST_INIT(USHM_OBJ_UMTX(object)); 3893 } 3894 3895 void 3896 umtx_shm_object_terminated(vm_object_t object) 3897 { 3898 struct umtx_shm_reg *reg, *reg1; 3899 bool dofree; 3900 3901 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3902 return; 3903 3904 dofree = false; 3905 mtx_lock(&umtx_shm_lock); 3906 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3907 if (umtx_shm_unref_reg_locked(reg, true)) { 3908 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3909 ushm_reg_link); 3910 dofree = true; 3911 } 3912 } 3913 mtx_unlock(&umtx_shm_lock); 3914 if (dofree) 3915 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3916 } 3917 3918 static int 3919 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3920 struct umtx_shm_reg **res) 3921 { 3922 struct umtx_shm_reg *reg, *reg1; 3923 struct ucred *cred; 3924 int error; 3925 3926 reg = umtx_shm_find_reg(key); 3927 if (reg != NULL) { 3928 *res = reg; 3929 return (0); 3930 } 3931 cred = td->td_ucred; 3932 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3933 return (ENOMEM); 3934 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3935 reg->ushm_refcnt = 1; 3936 bcopy(key, ®->ushm_key, sizeof(*key)); 3937 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 3938 reg->ushm_cred = crhold(cred); 3939 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3940 if (error != 0) { 3941 umtx_shm_free_reg(reg); 3942 return (error); 3943 } 3944 mtx_lock(&umtx_shm_lock); 3945 reg1 = umtx_shm_find_reg_locked(key); 3946 if (reg1 != NULL) { 3947 mtx_unlock(&umtx_shm_lock); 3948 umtx_shm_free_reg(reg); 3949 *res = reg1; 3950 return (0); 3951 } 3952 reg->ushm_refcnt++; 3953 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3954 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3955 ushm_obj_link); 3956 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3957 mtx_unlock(&umtx_shm_lock); 3958 *res = reg; 3959 return (0); 3960 } 3961 3962 static int 3963 umtx_shm_alive(struct thread *td, void *addr) 3964 { 3965 vm_map_t map; 3966 vm_map_entry_t entry; 3967 vm_object_t object; 3968 vm_pindex_t pindex; 3969 vm_prot_t prot; 3970 int res, ret; 3971 boolean_t wired; 3972 3973 map = &td->td_proc->p_vmspace->vm_map; 3974 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3975 &object, &pindex, &prot, &wired); 3976 if (res != KERN_SUCCESS) 3977 return (EFAULT); 3978 if (object == NULL) 3979 ret = EINVAL; 3980 else 3981 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3982 vm_map_lookup_done(map, entry); 3983 return (ret); 3984 } 3985 3986 static void 3987 umtx_shm_init(void) 3988 { 3989 int i; 3990 3991 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3992 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3993 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3994 for (i = 0; i < nitems(umtx_shm_registry); i++) 3995 TAILQ_INIT(&umtx_shm_registry[i]); 3996 } 3997 3998 static int 3999 umtx_shm(struct thread *td, void *addr, u_int flags) 4000 { 4001 struct umtx_key key; 4002 struct umtx_shm_reg *reg; 4003 struct file *fp; 4004 int error, fd; 4005 4006 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4007 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4008 return (EINVAL); 4009 if ((flags & UMTX_SHM_ALIVE) != 0) 4010 return (umtx_shm_alive(td, addr)); 4011 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4012 if (error != 0) 4013 return (error); 4014 KASSERT(key.shared == 1, ("non-shared key")); 4015 if ((flags & UMTX_SHM_CREAT) != 0) { 4016 error = umtx_shm_create_reg(td, &key, ®); 4017 } else { 4018 reg = umtx_shm_find_reg(&key); 4019 if (reg == NULL) 4020 error = ESRCH; 4021 } 4022 umtx_key_release(&key); 4023 if (error != 0) 4024 return (error); 4025 KASSERT(reg != NULL, ("no reg")); 4026 if ((flags & UMTX_SHM_DESTROY) != 0) { 4027 umtx_shm_unref_reg(reg, true); 4028 } else { 4029 #if 0 4030 #ifdef MAC 4031 error = mac_posixshm_check_open(td->td_ucred, 4032 reg->ushm_obj, FFLAGS(O_RDWR)); 4033 if (error == 0) 4034 #endif 4035 error = shm_access(reg->ushm_obj, td->td_ucred, 4036 FFLAGS(O_RDWR)); 4037 if (error == 0) 4038 #endif 4039 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4040 if (error == 0) { 4041 shm_hold(reg->ushm_obj); 4042 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4043 &shm_ops); 4044 td->td_retval[0] = fd; 4045 fdrop(fp, td); 4046 } 4047 } 4048 umtx_shm_unref_reg(reg, false); 4049 return (error); 4050 } 4051 4052 static int 4053 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 4054 { 4055 4056 return (umtx_shm(td, uap->uaddr1, uap->val)); 4057 } 4058 4059 static int 4060 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 4061 { 4062 4063 td->td_rb_list = rbp->robust_list_offset; 4064 td->td_rbp_list = rbp->robust_priv_list_offset; 4065 td->td_rb_inact = rbp->robust_inact_offset; 4066 return (0); 4067 } 4068 4069 static int 4070 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 4071 { 4072 struct umtx_robust_lists_params rb; 4073 int error; 4074 4075 if (uap->val > sizeof(rb)) 4076 return (EINVAL); 4077 bzero(&rb, sizeof(rb)); 4078 error = copyin(uap->uaddr1, &rb, uap->val); 4079 if (error != 0) 4080 return (error); 4081 return (umtx_robust_lists(td, &rb)); 4082 } 4083 4084 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 4085 4086 static const _umtx_op_func op_table[] = { 4087 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4088 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4089 [UMTX_OP_WAIT] = __umtx_op_wait, 4090 [UMTX_OP_WAKE] = __umtx_op_wake, 4091 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4092 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4093 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4094 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4095 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4096 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4097 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4098 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4099 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4100 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4101 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4102 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4103 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4104 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4105 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4106 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4107 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4108 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4109 #else 4110 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4111 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4112 #endif 4113 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4114 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4115 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4116 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4117 [UMTX_OP_SHM] = __umtx_op_shm, 4118 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4119 }; 4120 4121 int 4122 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4123 { 4124 4125 if ((unsigned)uap->op < nitems(op_table)) 4126 return (*op_table[uap->op])(td, uap); 4127 return (EINVAL); 4128 } 4129 4130 #ifdef COMPAT_FREEBSD32 4131 4132 struct umtx_time32 { 4133 struct timespec32 timeout; 4134 uint32_t flags; 4135 uint32_t clockid; 4136 }; 4137 4138 static inline int 4139 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4140 { 4141 struct timespec32 ts32; 4142 int error; 4143 4144 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4145 if (error == 0) { 4146 if (ts32.tv_sec < 0 || 4147 ts32.tv_nsec >= 1000000000 || 4148 ts32.tv_nsec < 0) 4149 error = EINVAL; 4150 else { 4151 tsp->tv_sec = ts32.tv_sec; 4152 tsp->tv_nsec = ts32.tv_nsec; 4153 } 4154 } 4155 return (error); 4156 } 4157 4158 static inline int 4159 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4160 { 4161 struct umtx_time32 t32; 4162 int error; 4163 4164 t32.clockid = CLOCK_REALTIME; 4165 t32.flags = 0; 4166 if (size <= sizeof(struct timespec32)) 4167 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4168 else 4169 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4170 if (error != 0) 4171 return (error); 4172 if (t32.timeout.tv_sec < 0 || 4173 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4174 return (EINVAL); 4175 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4176 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4177 tp->_flags = t32.flags; 4178 tp->_clockid = t32.clockid; 4179 return (0); 4180 } 4181 4182 static int 4183 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4184 { 4185 struct _umtx_time *tm_p, timeout; 4186 int error; 4187 4188 if (uap->uaddr2 == NULL) 4189 tm_p = NULL; 4190 else { 4191 error = umtx_copyin_umtx_time32(uap->uaddr2, 4192 (size_t)uap->uaddr1, &timeout); 4193 if (error != 0) 4194 return (error); 4195 tm_p = &timeout; 4196 } 4197 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4198 } 4199 4200 static int 4201 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4202 { 4203 struct _umtx_time *tm_p, timeout; 4204 int error; 4205 4206 /* Allow a null timespec (wait forever). */ 4207 if (uap->uaddr2 == NULL) 4208 tm_p = NULL; 4209 else { 4210 error = umtx_copyin_umtx_time32(uap->uaddr2, 4211 (size_t)uap->uaddr1, &timeout); 4212 if (error != 0) 4213 return (error); 4214 tm_p = &timeout; 4215 } 4216 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4217 } 4218 4219 static int 4220 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4221 { 4222 struct _umtx_time *tm_p, timeout; 4223 int error; 4224 4225 /* Allow a null timespec (wait forever). */ 4226 if (uap->uaddr2 == NULL) 4227 tm_p = NULL; 4228 else { 4229 error = umtx_copyin_umtx_time32(uap->uaddr2, 4230 (size_t)uap->uaddr1, &timeout); 4231 if (error != 0) 4232 return (error); 4233 tm_p = &timeout; 4234 } 4235 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4236 } 4237 4238 static int 4239 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4240 { 4241 struct timespec *ts, timeout; 4242 int error; 4243 4244 /* Allow a null timespec (wait forever). */ 4245 if (uap->uaddr2 == NULL) 4246 ts = NULL; 4247 else { 4248 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4249 if (error != 0) 4250 return (error); 4251 ts = &timeout; 4252 } 4253 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4254 } 4255 4256 static int 4257 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4258 { 4259 struct _umtx_time timeout; 4260 int error; 4261 4262 /* Allow a null timespec (wait forever). */ 4263 if (uap->uaddr2 == NULL) { 4264 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4265 } else { 4266 error = umtx_copyin_umtx_time32(uap->uaddr2, 4267 (size_t)uap->uaddr1, &timeout); 4268 if (error != 0) 4269 return (error); 4270 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4271 } 4272 return (error); 4273 } 4274 4275 static int 4276 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4277 { 4278 struct _umtx_time timeout; 4279 int error; 4280 4281 /* Allow a null timespec (wait forever). */ 4282 if (uap->uaddr2 == NULL) { 4283 error = do_rw_wrlock(td, uap->obj, 0); 4284 } else { 4285 error = umtx_copyin_umtx_time32(uap->uaddr2, 4286 (size_t)uap->uaddr1, &timeout); 4287 if (error != 0) 4288 return (error); 4289 error = do_rw_wrlock(td, uap->obj, &timeout); 4290 } 4291 return (error); 4292 } 4293 4294 static int 4295 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4296 { 4297 struct _umtx_time *tm_p, timeout; 4298 int error; 4299 4300 if (uap->uaddr2 == NULL) 4301 tm_p = NULL; 4302 else { 4303 error = umtx_copyin_umtx_time32( 4304 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4305 if (error != 0) 4306 return (error); 4307 tm_p = &timeout; 4308 } 4309 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4310 } 4311 4312 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4313 static int 4314 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4315 { 4316 struct _umtx_time *tm_p, timeout; 4317 int error; 4318 4319 /* Allow a null timespec (wait forever). */ 4320 if (uap->uaddr2 == NULL) 4321 tm_p = NULL; 4322 else { 4323 error = umtx_copyin_umtx_time32(uap->uaddr2, 4324 (size_t)uap->uaddr1, &timeout); 4325 if (error != 0) 4326 return (error); 4327 tm_p = &timeout; 4328 } 4329 return (do_sem_wait(td, uap->obj, tm_p)); 4330 } 4331 #endif 4332 4333 static int 4334 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4335 { 4336 struct _umtx_time *tm_p, timeout; 4337 size_t uasize; 4338 int error; 4339 4340 /* Allow a null timespec (wait forever). */ 4341 if (uap->uaddr2 == NULL) { 4342 uasize = 0; 4343 tm_p = NULL; 4344 } else { 4345 uasize = (size_t)uap->uaddr1; 4346 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4347 if (error != 0) 4348 return (error); 4349 tm_p = &timeout; 4350 } 4351 error = do_sem2_wait(td, uap->obj, tm_p); 4352 if (error == EINTR && uap->uaddr2 != NULL && 4353 (timeout._flags & UMTX_ABSTIME) == 0 && 4354 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4355 struct timespec32 remain32 = { 4356 .tv_sec = timeout._timeout.tv_sec, 4357 .tv_nsec = timeout._timeout.tv_nsec 4358 }; 4359 error = copyout(&remain32, 4360 (struct umtx_time32 *)uap->uaddr2 + 1, 4361 sizeof(struct timespec32)); 4362 if (error == 0) { 4363 error = EINTR; 4364 } 4365 } 4366 4367 return (error); 4368 } 4369 4370 static int 4371 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4372 { 4373 uint32_t uaddrs[BATCH_SIZE], **upp; 4374 int count, error, i, pos, tocopy; 4375 4376 upp = (uint32_t **)uap->obj; 4377 error = 0; 4378 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4379 pos += tocopy) { 4380 tocopy = MIN(count, BATCH_SIZE); 4381 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4382 if (error != 0) 4383 break; 4384 for (i = 0; i < tocopy; ++i) 4385 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4386 INT_MAX, 1); 4387 maybe_yield(); 4388 } 4389 return (error); 4390 } 4391 4392 struct umtx_robust_lists_params_compat32 { 4393 uint32_t robust_list_offset; 4394 uint32_t robust_priv_list_offset; 4395 uint32_t robust_inact_offset; 4396 }; 4397 4398 static int 4399 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4400 { 4401 struct umtx_robust_lists_params rb; 4402 struct umtx_robust_lists_params_compat32 rb32; 4403 int error; 4404 4405 if (uap->val > sizeof(rb32)) 4406 return (EINVAL); 4407 bzero(&rb, sizeof(rb)); 4408 bzero(&rb32, sizeof(rb32)); 4409 error = copyin(uap->uaddr1, &rb32, uap->val); 4410 if (error != 0) 4411 return (error); 4412 rb.robust_list_offset = rb32.robust_list_offset; 4413 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4414 rb.robust_inact_offset = rb32.robust_inact_offset; 4415 return (umtx_robust_lists(td, &rb)); 4416 } 4417 4418 static const _umtx_op_func op_table_compat32[] = { 4419 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4420 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4421 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4422 [UMTX_OP_WAKE] = __umtx_op_wake, 4423 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4424 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4425 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4426 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4427 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4428 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4429 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4430 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4431 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4432 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4433 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4434 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4435 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4436 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4437 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4438 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4439 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4440 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4441 #else 4442 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4443 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4444 #endif 4445 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4446 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4447 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4448 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4449 [UMTX_OP_SHM] = __umtx_op_shm, 4450 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4451 }; 4452 4453 int 4454 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4455 { 4456 4457 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4458 return (*op_table_compat32[uap->op])(td, 4459 (struct _umtx_op_args *)uap); 4460 } 4461 return (EINVAL); 4462 } 4463 #endif 4464 4465 void 4466 umtx_thread_init(struct thread *td) 4467 { 4468 4469 td->td_umtxq = umtxq_alloc(); 4470 td->td_umtxq->uq_thread = td; 4471 } 4472 4473 void 4474 umtx_thread_fini(struct thread *td) 4475 { 4476 4477 umtxq_free(td->td_umtxq); 4478 } 4479 4480 /* 4481 * It will be called when new thread is created, e.g fork(). 4482 */ 4483 void 4484 umtx_thread_alloc(struct thread *td) 4485 { 4486 struct umtx_q *uq; 4487 4488 uq = td->td_umtxq; 4489 uq->uq_inherited_pri = PRI_MAX; 4490 4491 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4492 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4493 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4494 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4495 } 4496 4497 /* 4498 * exec() hook. 4499 * 4500 * Clear robust lists for all process' threads, not delaying the 4501 * cleanup to thread_exit hook, since the relevant address space is 4502 * destroyed right now. 4503 */ 4504 static void 4505 umtx_exec_hook(void *arg __unused, struct proc *p, 4506 struct image_params *imgp __unused) 4507 { 4508 struct thread *td; 4509 4510 KASSERT(p == curproc, ("need curproc")); 4511 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4512 (p->p_flag & P_STOPPED_SINGLE) != 0, 4513 ("curproc must be single-threaded")); 4514 /* 4515 * There is no need to lock the list as only this thread can be 4516 * running. 4517 */ 4518 FOREACH_THREAD_IN_PROC(p, td) { 4519 KASSERT(td == curthread || 4520 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4521 ("running thread %p %p", p, td)); 4522 umtx_thread_cleanup(td); 4523 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4524 } 4525 } 4526 4527 /* 4528 * thread_exit() hook. 4529 */ 4530 void 4531 umtx_thread_exit(struct thread *td) 4532 { 4533 4534 umtx_thread_cleanup(td); 4535 } 4536 4537 static int 4538 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4539 { 4540 u_long res1; 4541 #ifdef COMPAT_FREEBSD32 4542 uint32_t res32; 4543 #endif 4544 int error; 4545 4546 #ifdef COMPAT_FREEBSD32 4547 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4548 error = fueword32((void *)ptr, &res32); 4549 if (error == 0) 4550 res1 = res32; 4551 } else 4552 #endif 4553 { 4554 error = fueword((void *)ptr, &res1); 4555 } 4556 if (error == 0) 4557 *res = res1; 4558 else 4559 error = EFAULT; 4560 return (error); 4561 } 4562 4563 static void 4564 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4565 { 4566 #ifdef COMPAT_FREEBSD32 4567 struct umutex32 m32; 4568 4569 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4570 memcpy(&m32, m, sizeof(m32)); 4571 *rb_list = m32.m_rb_lnk; 4572 } else 4573 #endif 4574 *rb_list = m->m_rb_lnk; 4575 } 4576 4577 static int 4578 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4579 { 4580 struct umutex m; 4581 int error; 4582 4583 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4584 error = copyin((void *)rbp, &m, sizeof(m)); 4585 if (error != 0) 4586 return (error); 4587 if (rb_list != NULL) 4588 umtx_read_rb_list(td, &m, rb_list); 4589 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4590 return (EINVAL); 4591 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4592 /* inact is cleared after unlock, allow the inconsistency */ 4593 return (inact ? 0 : EINVAL); 4594 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4595 } 4596 4597 static void 4598 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4599 const char *name) 4600 { 4601 int error, i; 4602 uintptr_t rbp; 4603 bool inact; 4604 4605 if (rb_list == 0) 4606 return; 4607 error = umtx_read_uptr(td, rb_list, &rbp); 4608 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4609 if (rbp == *rb_inact) { 4610 inact = true; 4611 *rb_inact = 0; 4612 } else 4613 inact = false; 4614 error = umtx_handle_rb(td, rbp, &rbp, inact); 4615 } 4616 if (i == umtx_max_rb && umtx_verbose_rb) { 4617 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4618 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4619 } 4620 if (error != 0 && umtx_verbose_rb) { 4621 uprintf("comm %s pid %d: handling %srb error %d\n", 4622 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4623 } 4624 } 4625 4626 /* 4627 * Clean up umtx data. 4628 */ 4629 static void 4630 umtx_thread_cleanup(struct thread *td) 4631 { 4632 struct umtx_q *uq; 4633 struct umtx_pi *pi; 4634 uintptr_t rb_inact; 4635 4636 /* 4637 * Disown pi mutexes. 4638 */ 4639 uq = td->td_umtxq; 4640 if (uq != NULL) { 4641 if (uq->uq_inherited_pri != PRI_MAX || 4642 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4643 mtx_lock(&umtx_lock); 4644 uq->uq_inherited_pri = PRI_MAX; 4645 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4646 pi->pi_owner = NULL; 4647 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4648 } 4649 mtx_unlock(&umtx_lock); 4650 } 4651 sched_lend_user_prio_cond(td, PRI_MAX); 4652 } 4653 4654 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4655 return; 4656 4657 /* 4658 * Handle terminated robust mutexes. Must be done after 4659 * robust pi disown, otherwise unlock could see unowned 4660 * entries. 4661 */ 4662 rb_inact = td->td_rb_inact; 4663 if (rb_inact != 0) 4664 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4665 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4666 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4667 if (rb_inact != 0) 4668 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4669 } 4670