1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_queue *uh; 666 667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 669 if (uh != NULL) 670 return (uh->length); 671 return (0); 672 } 673 674 /* 675 * Check if there are multiple PI waiters and returns first 676 * waiter. 677 */ 678 static int 679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 680 { 681 struct umtxq_queue *uh; 682 683 *first = NULL; 684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 686 if (uh != NULL) { 687 *first = TAILQ_FIRST(&uh->head); 688 return (uh->length); 689 } 690 return (0); 691 } 692 693 static int 694 umtxq_check_susp(struct thread *td) 695 { 696 struct proc *p; 697 int error; 698 699 /* 700 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 701 * eventually break the lockstep loop. 702 */ 703 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 704 return (0); 705 error = 0; 706 p = td->td_proc; 707 PROC_LOCK(p); 708 if (P_SHOULDSTOP(p) || 709 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 710 if (p->p_flag & P_SINGLE_EXIT) 711 error = EINTR; 712 else 713 error = ERESTART; 714 } 715 PROC_UNLOCK(p); 716 return (error); 717 } 718 719 /* 720 * Wake up threads waiting on an userland object. 721 */ 722 723 static int 724 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 725 { 726 struct umtxq_queue *uh; 727 struct umtx_q *uq; 728 int ret; 729 730 ret = 0; 731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 732 uh = umtxq_queue_lookup(key, q); 733 if (uh != NULL) { 734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 735 umtxq_remove_queue(uq, q); 736 wakeup(uq); 737 if (++ret >= n_wake) 738 return (ret); 739 } 740 } 741 return (ret); 742 } 743 744 745 /* 746 * Wake up specified thread. 747 */ 748 static inline void 749 umtxq_signal_thread(struct umtx_q *uq) 750 { 751 752 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 753 umtxq_remove(uq); 754 wakeup(uq); 755 } 756 757 static inline int 758 tstohz(const struct timespec *tsp) 759 { 760 struct timeval tv; 761 762 TIMESPEC_TO_TIMEVAL(&tv, tsp); 763 return tvtohz(&tv); 764 } 765 766 static void 767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 768 const struct timespec *timeout) 769 { 770 771 timo->clockid = clockid; 772 if (!absolute) { 773 timo->is_abs_real = false; 774 abs_timeout_update(timo); 775 timespecadd(&timo->cur, timeout, &timo->end); 776 } else { 777 timo->end = *timeout; 778 timo->is_abs_real = clockid == CLOCK_REALTIME || 779 clockid == CLOCK_REALTIME_FAST || 780 clockid == CLOCK_REALTIME_PRECISE; 781 /* 782 * If is_abs_real, umtxq_sleep will read the clock 783 * after setting td_rtcgen; otherwise, read it here. 784 */ 785 if (!timo->is_abs_real) { 786 abs_timeout_update(timo); 787 } 788 } 789 } 790 791 static void 792 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 793 { 794 795 abs_timeout_init(timo, umtxtime->_clockid, 796 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 797 } 798 799 static inline void 800 abs_timeout_update(struct abs_timeout *timo) 801 { 802 803 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 804 } 805 806 static int 807 abs_timeout_gethz(struct abs_timeout *timo) 808 { 809 struct timespec tts; 810 811 if (timespeccmp(&timo->end, &timo->cur, <=)) 812 return (-1); 813 timespecsub(&timo->end, &timo->cur, &tts); 814 return (tstohz(&tts)); 815 } 816 817 static uint32_t 818 umtx_unlock_val(uint32_t flags, bool rb) 819 { 820 821 if (rb) 822 return (UMUTEX_RB_OWNERDEAD); 823 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 824 return (UMUTEX_RB_NOTRECOV); 825 else 826 return (UMUTEX_UNOWNED); 827 828 } 829 830 /* 831 * Put thread into sleep state, before sleeping, check if 832 * thread was removed from umtx queue. 833 */ 834 static inline int 835 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 836 { 837 struct umtxq_chain *uc; 838 int error, timo; 839 840 if (abstime != NULL && abstime->is_abs_real) { 841 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 842 abs_timeout_update(abstime); 843 } 844 845 uc = umtxq_getchain(&uq->uq_key); 846 UMTXQ_LOCKED_ASSERT(uc); 847 for (;;) { 848 if (!(uq->uq_flags & UQF_UMTXQ)) { 849 error = 0; 850 break; 851 } 852 if (abstime != NULL) { 853 timo = abs_timeout_gethz(abstime); 854 if (timo < 0) { 855 error = ETIMEDOUT; 856 break; 857 } 858 } else 859 timo = 0; 860 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 861 if (error == EINTR || error == ERESTART) { 862 umtxq_lock(&uq->uq_key); 863 break; 864 } 865 if (abstime != NULL) { 866 if (abstime->is_abs_real) 867 curthread->td_rtcgen = 868 atomic_load_acq_int(&rtc_generation); 869 abs_timeout_update(abstime); 870 } 871 umtxq_lock(&uq->uq_key); 872 } 873 874 curthread->td_rtcgen = 0; 875 return (error); 876 } 877 878 /* 879 * Convert userspace address into unique logical address. 880 */ 881 int 882 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 883 { 884 struct thread *td = curthread; 885 vm_map_t map; 886 vm_map_entry_t entry; 887 vm_pindex_t pindex; 888 vm_prot_t prot; 889 boolean_t wired; 890 891 key->type = type; 892 if (share == THREAD_SHARE) { 893 key->shared = 0; 894 key->info.private.vs = td->td_proc->p_vmspace; 895 key->info.private.addr = (uintptr_t)addr; 896 } else { 897 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 898 map = &td->td_proc->p_vmspace->vm_map; 899 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 900 &entry, &key->info.shared.object, &pindex, &prot, 901 &wired) != KERN_SUCCESS) { 902 return (EFAULT); 903 } 904 905 if ((share == PROCESS_SHARE) || 906 (share == AUTO_SHARE && 907 VM_INHERIT_SHARE == entry->inheritance)) { 908 key->shared = 1; 909 key->info.shared.offset = (vm_offset_t)addr - 910 entry->start + entry->offset; 911 vm_object_reference(key->info.shared.object); 912 } else { 913 key->shared = 0; 914 key->info.private.vs = td->td_proc->p_vmspace; 915 key->info.private.addr = (uintptr_t)addr; 916 } 917 vm_map_lookup_done(map, entry); 918 } 919 920 umtxq_hash(key); 921 return (0); 922 } 923 924 /* 925 * Release key. 926 */ 927 void 928 umtx_key_release(struct umtx_key *key) 929 { 930 if (key->shared) 931 vm_object_deallocate(key->info.shared.object); 932 } 933 934 /* 935 * Fetch and compare value, sleep on the address if value is not changed. 936 */ 937 static int 938 do_wait(struct thread *td, void *addr, u_long id, 939 struct _umtx_time *timeout, int compat32, int is_private) 940 { 941 struct abs_timeout timo; 942 struct umtx_q *uq; 943 u_long tmp; 944 uint32_t tmp32; 945 int error = 0; 946 947 uq = td->td_umtxq; 948 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 949 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 950 return (error); 951 952 if (timeout != NULL) 953 abs_timeout_init2(&timo, timeout); 954 955 umtxq_lock(&uq->uq_key); 956 umtxq_insert(uq); 957 umtxq_unlock(&uq->uq_key); 958 if (compat32 == 0) { 959 error = fueword(addr, &tmp); 960 if (error != 0) 961 error = EFAULT; 962 } else { 963 error = fueword32(addr, &tmp32); 964 if (error == 0) 965 tmp = tmp32; 966 else 967 error = EFAULT; 968 } 969 umtxq_lock(&uq->uq_key); 970 if (error == 0) { 971 if (tmp == id) 972 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 973 NULL : &timo); 974 if ((uq->uq_flags & UQF_UMTXQ) == 0) 975 error = 0; 976 else 977 umtxq_remove(uq); 978 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 979 umtxq_remove(uq); 980 } 981 umtxq_unlock(&uq->uq_key); 982 umtx_key_release(&uq->uq_key); 983 if (error == ERESTART) 984 error = EINTR; 985 return (error); 986 } 987 988 /* 989 * Wake up threads sleeping on the specified address. 990 */ 991 int 992 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 993 { 994 struct umtx_key key; 995 int ret; 996 997 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 998 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 999 return (ret); 1000 umtxq_lock(&key); 1001 umtxq_signal(&key, n_wake); 1002 umtxq_unlock(&key); 1003 umtx_key_release(&key); 1004 return (0); 1005 } 1006 1007 /* 1008 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1009 */ 1010 static int 1011 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1012 struct _umtx_time *timeout, int mode) 1013 { 1014 struct abs_timeout timo; 1015 struct umtx_q *uq; 1016 uint32_t owner, old, id; 1017 int error, rv; 1018 1019 id = td->td_tid; 1020 uq = td->td_umtxq; 1021 error = 0; 1022 if (timeout != NULL) 1023 abs_timeout_init2(&timo, timeout); 1024 1025 /* 1026 * Care must be exercised when dealing with umtx structure. It 1027 * can fault on any access. 1028 */ 1029 for (;;) { 1030 rv = fueword32(&m->m_owner, &owner); 1031 if (rv == -1) 1032 return (EFAULT); 1033 if (mode == _UMUTEX_WAIT) { 1034 if (owner == UMUTEX_UNOWNED || 1035 owner == UMUTEX_CONTESTED || 1036 owner == UMUTEX_RB_OWNERDEAD || 1037 owner == UMUTEX_RB_NOTRECOV) 1038 return (0); 1039 } else { 1040 /* 1041 * Robust mutex terminated. Kernel duty is to 1042 * return EOWNERDEAD to the userspace. The 1043 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1044 * by the common userspace code. 1045 */ 1046 if (owner == UMUTEX_RB_OWNERDEAD) { 1047 rv = casueword32(&m->m_owner, 1048 UMUTEX_RB_OWNERDEAD, &owner, 1049 id | UMUTEX_CONTESTED); 1050 if (rv == -1) 1051 return (EFAULT); 1052 if (owner == UMUTEX_RB_OWNERDEAD) 1053 return (EOWNERDEAD); /* success */ 1054 rv = umtxq_check_susp(td); 1055 if (rv != 0) 1056 return (rv); 1057 continue; 1058 } 1059 if (owner == UMUTEX_RB_NOTRECOV) 1060 return (ENOTRECOVERABLE); 1061 1062 1063 /* 1064 * Try the uncontested case. This should be 1065 * done in userland. 1066 */ 1067 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1068 &owner, id); 1069 /* The address was invalid. */ 1070 if (rv == -1) 1071 return (EFAULT); 1072 1073 /* The acquire succeeded. */ 1074 if (owner == UMUTEX_UNOWNED) 1075 return (0); 1076 1077 /* 1078 * If no one owns it but it is contested try 1079 * to acquire it. 1080 */ 1081 if (owner == UMUTEX_CONTESTED) { 1082 rv = casueword32(&m->m_owner, 1083 UMUTEX_CONTESTED, &owner, 1084 id | UMUTEX_CONTESTED); 1085 /* The address was invalid. */ 1086 if (rv == -1) 1087 return (EFAULT); 1088 1089 if (owner == UMUTEX_CONTESTED) 1090 return (0); 1091 1092 rv = umtxq_check_susp(td); 1093 if (rv != 0) 1094 return (rv); 1095 1096 /* 1097 * If this failed the lock has 1098 * changed, restart. 1099 */ 1100 continue; 1101 } 1102 } 1103 1104 if (mode == _UMUTEX_TRY) 1105 return (EBUSY); 1106 1107 /* 1108 * If we caught a signal, we have retried and now 1109 * exit immediately. 1110 */ 1111 if (error != 0) 1112 return (error); 1113 1114 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1115 GET_SHARE(flags), &uq->uq_key)) != 0) 1116 return (error); 1117 1118 umtxq_lock(&uq->uq_key); 1119 umtxq_busy(&uq->uq_key); 1120 umtxq_insert(uq); 1121 umtxq_unlock(&uq->uq_key); 1122 1123 /* 1124 * Set the contested bit so that a release in user space 1125 * knows to use the system call for unlock. If this fails 1126 * either some one else has acquired the lock or it has been 1127 * released. 1128 */ 1129 rv = casueword32(&m->m_owner, owner, &old, 1130 owner | UMUTEX_CONTESTED); 1131 1132 /* The address was invalid. */ 1133 if (rv == -1) { 1134 umtxq_lock(&uq->uq_key); 1135 umtxq_remove(uq); 1136 umtxq_unbusy(&uq->uq_key); 1137 umtxq_unlock(&uq->uq_key); 1138 umtx_key_release(&uq->uq_key); 1139 return (EFAULT); 1140 } 1141 1142 /* 1143 * We set the contested bit, sleep. Otherwise the lock changed 1144 * and we need to retry or we lost a race to the thread 1145 * unlocking the umtx. 1146 */ 1147 umtxq_lock(&uq->uq_key); 1148 umtxq_unbusy(&uq->uq_key); 1149 if (old == owner) 1150 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1151 NULL : &timo); 1152 umtxq_remove(uq); 1153 umtxq_unlock(&uq->uq_key); 1154 umtx_key_release(&uq->uq_key); 1155 1156 if (error == 0) 1157 error = umtxq_check_susp(td); 1158 } 1159 1160 return (0); 1161 } 1162 1163 /* 1164 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1165 */ 1166 static int 1167 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1168 { 1169 struct umtx_key key; 1170 uint32_t owner, old, id, newlock; 1171 int error, count; 1172 1173 id = td->td_tid; 1174 /* 1175 * Make sure we own this mtx. 1176 */ 1177 error = fueword32(&m->m_owner, &owner); 1178 if (error == -1) 1179 return (EFAULT); 1180 1181 if ((owner & ~UMUTEX_CONTESTED) != id) 1182 return (EPERM); 1183 1184 newlock = umtx_unlock_val(flags, rb); 1185 if ((owner & UMUTEX_CONTESTED) == 0) { 1186 error = casueword32(&m->m_owner, owner, &old, newlock); 1187 if (error == -1) 1188 return (EFAULT); 1189 if (old == owner) 1190 return (0); 1191 owner = old; 1192 } 1193 1194 /* We should only ever be in here for contested locks */ 1195 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1196 &key)) != 0) 1197 return (error); 1198 1199 umtxq_lock(&key); 1200 umtxq_busy(&key); 1201 count = umtxq_count(&key); 1202 umtxq_unlock(&key); 1203 1204 /* 1205 * When unlocking the umtx, it must be marked as unowned if 1206 * there is zero or one thread only waiting for it. 1207 * Otherwise, it must be marked as contested. 1208 */ 1209 if (count > 1) 1210 newlock |= UMUTEX_CONTESTED; 1211 error = casueword32(&m->m_owner, owner, &old, newlock); 1212 umtxq_lock(&key); 1213 umtxq_signal(&key, 1); 1214 umtxq_unbusy(&key); 1215 umtxq_unlock(&key); 1216 umtx_key_release(&key); 1217 if (error == -1) 1218 return (EFAULT); 1219 if (old != owner) 1220 return (EINVAL); 1221 return (0); 1222 } 1223 1224 /* 1225 * Check if the mutex is available and wake up a waiter, 1226 * only for simple mutex. 1227 */ 1228 static int 1229 do_wake_umutex(struct thread *td, struct umutex *m) 1230 { 1231 struct umtx_key key; 1232 uint32_t owner; 1233 uint32_t flags; 1234 int error; 1235 int count; 1236 1237 error = fueword32(&m->m_owner, &owner); 1238 if (error == -1) 1239 return (EFAULT); 1240 1241 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1242 owner != UMUTEX_RB_NOTRECOV) 1243 return (0); 1244 1245 error = fueword32(&m->m_flags, &flags); 1246 if (error == -1) 1247 return (EFAULT); 1248 1249 /* We should only ever be in here for contested locks */ 1250 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1251 &key)) != 0) 1252 return (error); 1253 1254 umtxq_lock(&key); 1255 umtxq_busy(&key); 1256 count = umtxq_count(&key); 1257 umtxq_unlock(&key); 1258 1259 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1260 owner != UMUTEX_RB_NOTRECOV) { 1261 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1262 UMUTEX_UNOWNED); 1263 if (error == -1) 1264 error = EFAULT; 1265 } 1266 1267 umtxq_lock(&key); 1268 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1269 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1270 umtxq_signal(&key, 1); 1271 umtxq_unbusy(&key); 1272 umtxq_unlock(&key); 1273 umtx_key_release(&key); 1274 return (error); 1275 } 1276 1277 /* 1278 * Check if the mutex has waiters and tries to fix contention bit. 1279 */ 1280 static int 1281 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1282 { 1283 struct umtx_key key; 1284 uint32_t owner, old; 1285 int type; 1286 int error; 1287 int count; 1288 1289 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1290 UMUTEX_ROBUST)) { 1291 case 0: 1292 case UMUTEX_ROBUST: 1293 type = TYPE_NORMAL_UMUTEX; 1294 break; 1295 case UMUTEX_PRIO_INHERIT: 1296 type = TYPE_PI_UMUTEX; 1297 break; 1298 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1299 type = TYPE_PI_ROBUST_UMUTEX; 1300 break; 1301 case UMUTEX_PRIO_PROTECT: 1302 type = TYPE_PP_UMUTEX; 1303 break; 1304 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1305 type = TYPE_PP_ROBUST_UMUTEX; 1306 break; 1307 default: 1308 return (EINVAL); 1309 } 1310 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1311 return (error); 1312 1313 owner = 0; 1314 umtxq_lock(&key); 1315 umtxq_busy(&key); 1316 count = umtxq_count(&key); 1317 umtxq_unlock(&key); 1318 /* 1319 * Only repair contention bit if there is a waiter, this means the mutex 1320 * is still being referenced by userland code, otherwise don't update 1321 * any memory. 1322 */ 1323 if (count > 1) { 1324 error = fueword32(&m->m_owner, &owner); 1325 if (error == -1) 1326 error = EFAULT; 1327 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1328 error = casueword32(&m->m_owner, owner, &old, 1329 owner | UMUTEX_CONTESTED); 1330 if (error == -1) { 1331 error = EFAULT; 1332 break; 1333 } 1334 if (old == owner) 1335 break; 1336 owner = old; 1337 error = umtxq_check_susp(td); 1338 if (error != 0) 1339 break; 1340 } 1341 } else if (count == 1) { 1342 error = fueword32(&m->m_owner, &owner); 1343 if (error == -1) 1344 error = EFAULT; 1345 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1346 (owner & UMUTEX_CONTESTED) == 0) { 1347 error = casueword32(&m->m_owner, owner, &old, 1348 owner | UMUTEX_CONTESTED); 1349 if (error == -1) { 1350 error = EFAULT; 1351 break; 1352 } 1353 if (old == owner) 1354 break; 1355 owner = old; 1356 error = umtxq_check_susp(td); 1357 if (error != 0) 1358 break; 1359 } 1360 } 1361 umtxq_lock(&key); 1362 if (error == EFAULT) { 1363 umtxq_signal(&key, INT_MAX); 1364 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1365 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1366 umtxq_signal(&key, 1); 1367 umtxq_unbusy(&key); 1368 umtxq_unlock(&key); 1369 umtx_key_release(&key); 1370 return (error); 1371 } 1372 1373 static inline struct umtx_pi * 1374 umtx_pi_alloc(int flags) 1375 { 1376 struct umtx_pi *pi; 1377 1378 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1379 TAILQ_INIT(&pi->pi_blocked); 1380 atomic_add_int(&umtx_pi_allocated, 1); 1381 return (pi); 1382 } 1383 1384 static inline void 1385 umtx_pi_free(struct umtx_pi *pi) 1386 { 1387 uma_zfree(umtx_pi_zone, pi); 1388 atomic_add_int(&umtx_pi_allocated, -1); 1389 } 1390 1391 /* 1392 * Adjust the thread's position on a pi_state after its priority has been 1393 * changed. 1394 */ 1395 static int 1396 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1397 { 1398 struct umtx_q *uq, *uq1, *uq2; 1399 struct thread *td1; 1400 1401 mtx_assert(&umtx_lock, MA_OWNED); 1402 if (pi == NULL) 1403 return (0); 1404 1405 uq = td->td_umtxq; 1406 1407 /* 1408 * Check if the thread needs to be moved on the blocked chain. 1409 * It needs to be moved if either its priority is lower than 1410 * the previous thread or higher than the next thread. 1411 */ 1412 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1413 uq2 = TAILQ_NEXT(uq, uq_lockq); 1414 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1415 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1416 /* 1417 * Remove thread from blocked chain and determine where 1418 * it should be moved to. 1419 */ 1420 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1421 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1422 td1 = uq1->uq_thread; 1423 MPASS(td1->td_proc->p_magic == P_MAGIC); 1424 if (UPRI(td1) > UPRI(td)) 1425 break; 1426 } 1427 1428 if (uq1 == NULL) 1429 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1430 else 1431 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1432 } 1433 return (1); 1434 } 1435 1436 static struct umtx_pi * 1437 umtx_pi_next(struct umtx_pi *pi) 1438 { 1439 struct umtx_q *uq_owner; 1440 1441 if (pi->pi_owner == NULL) 1442 return (NULL); 1443 uq_owner = pi->pi_owner->td_umtxq; 1444 if (uq_owner == NULL) 1445 return (NULL); 1446 return (uq_owner->uq_pi_blocked); 1447 } 1448 1449 /* 1450 * Floyd's Cycle-Finding Algorithm. 1451 */ 1452 static bool 1453 umtx_pi_check_loop(struct umtx_pi *pi) 1454 { 1455 struct umtx_pi *pi1; /* fast iterator */ 1456 1457 mtx_assert(&umtx_lock, MA_OWNED); 1458 if (pi == NULL) 1459 return (false); 1460 pi1 = pi; 1461 for (;;) { 1462 pi = umtx_pi_next(pi); 1463 if (pi == NULL) 1464 break; 1465 pi1 = umtx_pi_next(pi1); 1466 if (pi1 == NULL) 1467 break; 1468 pi1 = umtx_pi_next(pi1); 1469 if (pi1 == NULL) 1470 break; 1471 if (pi == pi1) 1472 return (true); 1473 } 1474 return (false); 1475 } 1476 1477 /* 1478 * Propagate priority when a thread is blocked on POSIX 1479 * PI mutex. 1480 */ 1481 static void 1482 umtx_propagate_priority(struct thread *td) 1483 { 1484 struct umtx_q *uq; 1485 struct umtx_pi *pi; 1486 int pri; 1487 1488 mtx_assert(&umtx_lock, MA_OWNED); 1489 pri = UPRI(td); 1490 uq = td->td_umtxq; 1491 pi = uq->uq_pi_blocked; 1492 if (pi == NULL) 1493 return; 1494 if (umtx_pi_check_loop(pi)) 1495 return; 1496 1497 for (;;) { 1498 td = pi->pi_owner; 1499 if (td == NULL || td == curthread) 1500 return; 1501 1502 MPASS(td->td_proc != NULL); 1503 MPASS(td->td_proc->p_magic == P_MAGIC); 1504 1505 thread_lock(td); 1506 if (td->td_lend_user_pri > pri) 1507 sched_lend_user_prio(td, pri); 1508 else { 1509 thread_unlock(td); 1510 break; 1511 } 1512 thread_unlock(td); 1513 1514 /* 1515 * Pick up the lock that td is blocked on. 1516 */ 1517 uq = td->td_umtxq; 1518 pi = uq->uq_pi_blocked; 1519 if (pi == NULL) 1520 break; 1521 /* Resort td on the list if needed. */ 1522 umtx_pi_adjust_thread(pi, td); 1523 } 1524 } 1525 1526 /* 1527 * Unpropagate priority for a PI mutex when a thread blocked on 1528 * it is interrupted by signal or resumed by others. 1529 */ 1530 static void 1531 umtx_repropagate_priority(struct umtx_pi *pi) 1532 { 1533 struct umtx_q *uq, *uq_owner; 1534 struct umtx_pi *pi2; 1535 int pri; 1536 1537 mtx_assert(&umtx_lock, MA_OWNED); 1538 1539 if (umtx_pi_check_loop(pi)) 1540 return; 1541 while (pi != NULL && pi->pi_owner != NULL) { 1542 pri = PRI_MAX; 1543 uq_owner = pi->pi_owner->td_umtxq; 1544 1545 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1546 uq = TAILQ_FIRST(&pi2->pi_blocked); 1547 if (uq != NULL) { 1548 if (pri > UPRI(uq->uq_thread)) 1549 pri = UPRI(uq->uq_thread); 1550 } 1551 } 1552 1553 if (pri > uq_owner->uq_inherited_pri) 1554 pri = uq_owner->uq_inherited_pri; 1555 thread_lock(pi->pi_owner); 1556 sched_lend_user_prio(pi->pi_owner, pri); 1557 thread_unlock(pi->pi_owner); 1558 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1559 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1560 } 1561 } 1562 1563 /* 1564 * Insert a PI mutex into owned list. 1565 */ 1566 static void 1567 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1568 { 1569 struct umtx_q *uq_owner; 1570 1571 uq_owner = owner->td_umtxq; 1572 mtx_assert(&umtx_lock, MA_OWNED); 1573 MPASS(pi->pi_owner == NULL); 1574 pi->pi_owner = owner; 1575 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1576 } 1577 1578 1579 /* 1580 * Disown a PI mutex, and remove it from the owned list. 1581 */ 1582 static void 1583 umtx_pi_disown(struct umtx_pi *pi) 1584 { 1585 1586 mtx_assert(&umtx_lock, MA_OWNED); 1587 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1588 pi->pi_owner = NULL; 1589 } 1590 1591 /* 1592 * Claim ownership of a PI mutex. 1593 */ 1594 static int 1595 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1596 { 1597 struct umtx_q *uq; 1598 int pri; 1599 1600 mtx_lock(&umtx_lock); 1601 if (pi->pi_owner == owner) { 1602 mtx_unlock(&umtx_lock); 1603 return (0); 1604 } 1605 1606 if (pi->pi_owner != NULL) { 1607 /* 1608 * userland may have already messed the mutex, sigh. 1609 */ 1610 mtx_unlock(&umtx_lock); 1611 return (EPERM); 1612 } 1613 umtx_pi_setowner(pi, owner); 1614 uq = TAILQ_FIRST(&pi->pi_blocked); 1615 if (uq != NULL) { 1616 pri = UPRI(uq->uq_thread); 1617 thread_lock(owner); 1618 if (pri < UPRI(owner)) 1619 sched_lend_user_prio(owner, pri); 1620 thread_unlock(owner); 1621 } 1622 mtx_unlock(&umtx_lock); 1623 return (0); 1624 } 1625 1626 /* 1627 * Adjust a thread's order position in its blocked PI mutex, 1628 * this may result new priority propagating process. 1629 */ 1630 void 1631 umtx_pi_adjust(struct thread *td, u_char oldpri) 1632 { 1633 struct umtx_q *uq; 1634 struct umtx_pi *pi; 1635 1636 uq = td->td_umtxq; 1637 mtx_lock(&umtx_lock); 1638 /* 1639 * Pick up the lock that td is blocked on. 1640 */ 1641 pi = uq->uq_pi_blocked; 1642 if (pi != NULL) { 1643 umtx_pi_adjust_thread(pi, td); 1644 umtx_repropagate_priority(pi); 1645 } 1646 mtx_unlock(&umtx_lock); 1647 } 1648 1649 /* 1650 * Sleep on a PI mutex. 1651 */ 1652 static int 1653 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1654 const char *wmesg, struct abs_timeout *timo, bool shared) 1655 { 1656 struct thread *td, *td1; 1657 struct umtx_q *uq1; 1658 int error, pri; 1659 #ifdef INVARIANTS 1660 struct umtxq_chain *uc; 1661 1662 uc = umtxq_getchain(&pi->pi_key); 1663 #endif 1664 error = 0; 1665 td = uq->uq_thread; 1666 KASSERT(td == curthread, ("inconsistent uq_thread")); 1667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1668 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1669 umtxq_insert(uq); 1670 mtx_lock(&umtx_lock); 1671 if (pi->pi_owner == NULL) { 1672 mtx_unlock(&umtx_lock); 1673 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1674 mtx_lock(&umtx_lock); 1675 if (td1 != NULL) { 1676 if (pi->pi_owner == NULL) 1677 umtx_pi_setowner(pi, td1); 1678 PROC_UNLOCK(td1->td_proc); 1679 } 1680 } 1681 1682 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1683 pri = UPRI(uq1->uq_thread); 1684 if (pri > UPRI(td)) 1685 break; 1686 } 1687 1688 if (uq1 != NULL) 1689 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1690 else 1691 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1692 1693 uq->uq_pi_blocked = pi; 1694 thread_lock(td); 1695 td->td_flags |= TDF_UPIBLOCKED; 1696 thread_unlock(td); 1697 umtx_propagate_priority(td); 1698 mtx_unlock(&umtx_lock); 1699 umtxq_unbusy(&uq->uq_key); 1700 1701 error = umtxq_sleep(uq, wmesg, timo); 1702 umtxq_remove(uq); 1703 1704 mtx_lock(&umtx_lock); 1705 uq->uq_pi_blocked = NULL; 1706 thread_lock(td); 1707 td->td_flags &= ~TDF_UPIBLOCKED; 1708 thread_unlock(td); 1709 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1710 umtx_repropagate_priority(pi); 1711 mtx_unlock(&umtx_lock); 1712 umtxq_unlock(&uq->uq_key); 1713 1714 return (error); 1715 } 1716 1717 /* 1718 * Add reference count for a PI mutex. 1719 */ 1720 static void 1721 umtx_pi_ref(struct umtx_pi *pi) 1722 { 1723 1724 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1725 pi->pi_refcount++; 1726 } 1727 1728 /* 1729 * Decrease reference count for a PI mutex, if the counter 1730 * is decreased to zero, its memory space is freed. 1731 */ 1732 static void 1733 umtx_pi_unref(struct umtx_pi *pi) 1734 { 1735 struct umtxq_chain *uc; 1736 1737 uc = umtxq_getchain(&pi->pi_key); 1738 UMTXQ_LOCKED_ASSERT(uc); 1739 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1740 if (--pi->pi_refcount == 0) { 1741 mtx_lock(&umtx_lock); 1742 if (pi->pi_owner != NULL) 1743 umtx_pi_disown(pi); 1744 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1745 ("blocked queue not empty")); 1746 mtx_unlock(&umtx_lock); 1747 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1748 umtx_pi_free(pi); 1749 } 1750 } 1751 1752 /* 1753 * Find a PI mutex in hash table. 1754 */ 1755 static struct umtx_pi * 1756 umtx_pi_lookup(struct umtx_key *key) 1757 { 1758 struct umtxq_chain *uc; 1759 struct umtx_pi *pi; 1760 1761 uc = umtxq_getchain(key); 1762 UMTXQ_LOCKED_ASSERT(uc); 1763 1764 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1765 if (umtx_key_match(&pi->pi_key, key)) { 1766 return (pi); 1767 } 1768 } 1769 return (NULL); 1770 } 1771 1772 /* 1773 * Insert a PI mutex into hash table. 1774 */ 1775 static inline void 1776 umtx_pi_insert(struct umtx_pi *pi) 1777 { 1778 struct umtxq_chain *uc; 1779 1780 uc = umtxq_getchain(&pi->pi_key); 1781 UMTXQ_LOCKED_ASSERT(uc); 1782 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1783 } 1784 1785 /* 1786 * Lock a PI mutex. 1787 */ 1788 static int 1789 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1790 struct _umtx_time *timeout, int try) 1791 { 1792 struct abs_timeout timo; 1793 struct umtx_q *uq; 1794 struct umtx_pi *pi, *new_pi; 1795 uint32_t id, old_owner, owner, old; 1796 int error, rv; 1797 1798 id = td->td_tid; 1799 uq = td->td_umtxq; 1800 1801 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1802 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1803 &uq->uq_key)) != 0) 1804 return (error); 1805 1806 if (timeout != NULL) 1807 abs_timeout_init2(&timo, timeout); 1808 1809 umtxq_lock(&uq->uq_key); 1810 pi = umtx_pi_lookup(&uq->uq_key); 1811 if (pi == NULL) { 1812 new_pi = umtx_pi_alloc(M_NOWAIT); 1813 if (new_pi == NULL) { 1814 umtxq_unlock(&uq->uq_key); 1815 new_pi = umtx_pi_alloc(M_WAITOK); 1816 umtxq_lock(&uq->uq_key); 1817 pi = umtx_pi_lookup(&uq->uq_key); 1818 if (pi != NULL) { 1819 umtx_pi_free(new_pi); 1820 new_pi = NULL; 1821 } 1822 } 1823 if (new_pi != NULL) { 1824 new_pi->pi_key = uq->uq_key; 1825 umtx_pi_insert(new_pi); 1826 pi = new_pi; 1827 } 1828 } 1829 umtx_pi_ref(pi); 1830 umtxq_unlock(&uq->uq_key); 1831 1832 /* 1833 * Care must be exercised when dealing with umtx structure. It 1834 * can fault on any access. 1835 */ 1836 for (;;) { 1837 /* 1838 * Try the uncontested case. This should be done in userland. 1839 */ 1840 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1841 /* The address was invalid. */ 1842 if (rv == -1) { 1843 error = EFAULT; 1844 break; 1845 } 1846 1847 /* The acquire succeeded. */ 1848 if (owner == UMUTEX_UNOWNED) { 1849 error = 0; 1850 break; 1851 } 1852 1853 if (owner == UMUTEX_RB_NOTRECOV) { 1854 error = ENOTRECOVERABLE; 1855 break; 1856 } 1857 1858 /* If no one owns it but it is contested try to acquire it. */ 1859 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1860 old_owner = owner; 1861 rv = casueword32(&m->m_owner, owner, &owner, 1862 id | UMUTEX_CONTESTED); 1863 /* The address was invalid. */ 1864 if (rv == -1) { 1865 error = EFAULT; 1866 break; 1867 } 1868 1869 if (owner == old_owner) { 1870 umtxq_lock(&uq->uq_key); 1871 umtxq_busy(&uq->uq_key); 1872 error = umtx_pi_claim(pi, td); 1873 umtxq_unbusy(&uq->uq_key); 1874 umtxq_unlock(&uq->uq_key); 1875 if (error != 0) { 1876 /* 1877 * Since we're going to return an 1878 * error, restore the m_owner to its 1879 * previous, unowned state to avoid 1880 * compounding the problem. 1881 */ 1882 (void)casuword32(&m->m_owner, 1883 id | UMUTEX_CONTESTED, 1884 old_owner); 1885 } 1886 if (error == 0 && 1887 old_owner == UMUTEX_RB_OWNERDEAD) 1888 error = EOWNERDEAD; 1889 break; 1890 } 1891 1892 error = umtxq_check_susp(td); 1893 if (error != 0) 1894 break; 1895 1896 /* If this failed the lock has changed, restart. */ 1897 continue; 1898 } 1899 1900 if ((owner & ~UMUTEX_CONTESTED) == id) { 1901 error = EDEADLK; 1902 break; 1903 } 1904 1905 if (try != 0) { 1906 error = EBUSY; 1907 break; 1908 } 1909 1910 /* 1911 * If we caught a signal, we have retried and now 1912 * exit immediately. 1913 */ 1914 if (error != 0) 1915 break; 1916 1917 umtxq_lock(&uq->uq_key); 1918 umtxq_busy(&uq->uq_key); 1919 umtxq_unlock(&uq->uq_key); 1920 1921 /* 1922 * Set the contested bit so that a release in user space 1923 * knows to use the system call for unlock. If this fails 1924 * either some one else has acquired the lock or it has been 1925 * released. 1926 */ 1927 rv = casueword32(&m->m_owner, owner, &old, owner | 1928 UMUTEX_CONTESTED); 1929 1930 /* The address was invalid. */ 1931 if (rv == -1) { 1932 umtxq_unbusy_unlocked(&uq->uq_key); 1933 error = EFAULT; 1934 break; 1935 } 1936 1937 umtxq_lock(&uq->uq_key); 1938 /* 1939 * We set the contested bit, sleep. Otherwise the lock changed 1940 * and we need to retry or we lost a race to the thread 1941 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1942 * value for owner is impossible there. 1943 */ 1944 if (old == owner) { 1945 error = umtxq_sleep_pi(uq, pi, 1946 owner & ~UMUTEX_CONTESTED, 1947 "umtxpi", timeout == NULL ? NULL : &timo, 1948 (flags & USYNC_PROCESS_SHARED) != 0); 1949 if (error != 0) 1950 continue; 1951 } else { 1952 umtxq_unbusy(&uq->uq_key); 1953 umtxq_unlock(&uq->uq_key); 1954 } 1955 1956 error = umtxq_check_susp(td); 1957 if (error != 0) 1958 break; 1959 } 1960 1961 umtxq_lock(&uq->uq_key); 1962 umtx_pi_unref(pi); 1963 umtxq_unlock(&uq->uq_key); 1964 1965 umtx_key_release(&uq->uq_key); 1966 return (error); 1967 } 1968 1969 /* 1970 * Unlock a PI mutex. 1971 */ 1972 static int 1973 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1974 { 1975 struct umtx_key key; 1976 struct umtx_q *uq_first, *uq_first2, *uq_me; 1977 struct umtx_pi *pi, *pi2; 1978 uint32_t id, new_owner, old, owner; 1979 int count, error, pri; 1980 1981 id = td->td_tid; 1982 /* 1983 * Make sure we own this mtx. 1984 */ 1985 error = fueword32(&m->m_owner, &owner); 1986 if (error == -1) 1987 return (EFAULT); 1988 1989 if ((owner & ~UMUTEX_CONTESTED) != id) 1990 return (EPERM); 1991 1992 new_owner = umtx_unlock_val(flags, rb); 1993 1994 /* This should be done in userland */ 1995 if ((owner & UMUTEX_CONTESTED) == 0) { 1996 error = casueword32(&m->m_owner, owner, &old, new_owner); 1997 if (error == -1) 1998 return (EFAULT); 1999 if (old == owner) 2000 return (0); 2001 owner = old; 2002 } 2003 2004 /* We should only ever be in here for contested locks */ 2005 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2006 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2007 &key)) != 0) 2008 return (error); 2009 2010 umtxq_lock(&key); 2011 umtxq_busy(&key); 2012 count = umtxq_count_pi(&key, &uq_first); 2013 if (uq_first != NULL) { 2014 mtx_lock(&umtx_lock); 2015 pi = uq_first->uq_pi_blocked; 2016 KASSERT(pi != NULL, ("pi == NULL?")); 2017 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2018 mtx_unlock(&umtx_lock); 2019 umtxq_unbusy(&key); 2020 umtxq_unlock(&key); 2021 umtx_key_release(&key); 2022 /* userland messed the mutex */ 2023 return (EPERM); 2024 } 2025 uq_me = td->td_umtxq; 2026 if (pi->pi_owner == td) 2027 umtx_pi_disown(pi); 2028 /* get highest priority thread which is still sleeping. */ 2029 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2030 while (uq_first != NULL && 2031 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2032 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2033 } 2034 pri = PRI_MAX; 2035 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2036 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2037 if (uq_first2 != NULL) { 2038 if (pri > UPRI(uq_first2->uq_thread)) 2039 pri = UPRI(uq_first2->uq_thread); 2040 } 2041 } 2042 thread_lock(td); 2043 sched_lend_user_prio(td, pri); 2044 thread_unlock(td); 2045 mtx_unlock(&umtx_lock); 2046 if (uq_first) 2047 umtxq_signal_thread(uq_first); 2048 } else { 2049 pi = umtx_pi_lookup(&key); 2050 /* 2051 * A umtx_pi can exist if a signal or timeout removed the 2052 * last waiter from the umtxq, but there is still 2053 * a thread in do_lock_pi() holding the umtx_pi. 2054 */ 2055 if (pi != NULL) { 2056 /* 2057 * The umtx_pi can be unowned, such as when a thread 2058 * has just entered do_lock_pi(), allocated the 2059 * umtx_pi, and unlocked the umtxq. 2060 * If the current thread owns it, it must disown it. 2061 */ 2062 mtx_lock(&umtx_lock); 2063 if (pi->pi_owner == td) 2064 umtx_pi_disown(pi); 2065 mtx_unlock(&umtx_lock); 2066 } 2067 } 2068 umtxq_unlock(&key); 2069 2070 /* 2071 * When unlocking the umtx, it must be marked as unowned if 2072 * there is zero or one thread only waiting for it. 2073 * Otherwise, it must be marked as contested. 2074 */ 2075 2076 if (count > 1) 2077 new_owner |= UMUTEX_CONTESTED; 2078 error = casueword32(&m->m_owner, owner, &old, new_owner); 2079 2080 umtxq_unbusy_unlocked(&key); 2081 umtx_key_release(&key); 2082 if (error == -1) 2083 return (EFAULT); 2084 if (old != owner) 2085 return (EINVAL); 2086 return (0); 2087 } 2088 2089 /* 2090 * Lock a PP mutex. 2091 */ 2092 static int 2093 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2094 struct _umtx_time *timeout, int try) 2095 { 2096 struct abs_timeout timo; 2097 struct umtx_q *uq, *uq2; 2098 struct umtx_pi *pi; 2099 uint32_t ceiling; 2100 uint32_t owner, id; 2101 int error, pri, old_inherited_pri, su, rv; 2102 2103 id = td->td_tid; 2104 uq = td->td_umtxq; 2105 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2106 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2107 &uq->uq_key)) != 0) 2108 return (error); 2109 2110 if (timeout != NULL) 2111 abs_timeout_init2(&timo, timeout); 2112 2113 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2114 for (;;) { 2115 old_inherited_pri = uq->uq_inherited_pri; 2116 umtxq_lock(&uq->uq_key); 2117 umtxq_busy(&uq->uq_key); 2118 umtxq_unlock(&uq->uq_key); 2119 2120 rv = fueword32(&m->m_ceilings[0], &ceiling); 2121 if (rv == -1) { 2122 error = EFAULT; 2123 goto out; 2124 } 2125 ceiling = RTP_PRIO_MAX - ceiling; 2126 if (ceiling > RTP_PRIO_MAX) { 2127 error = EINVAL; 2128 goto out; 2129 } 2130 2131 mtx_lock(&umtx_lock); 2132 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2133 mtx_unlock(&umtx_lock); 2134 error = EINVAL; 2135 goto out; 2136 } 2137 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2138 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2139 thread_lock(td); 2140 if (uq->uq_inherited_pri < UPRI(td)) 2141 sched_lend_user_prio(td, uq->uq_inherited_pri); 2142 thread_unlock(td); 2143 } 2144 mtx_unlock(&umtx_lock); 2145 2146 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2147 id | UMUTEX_CONTESTED); 2148 /* The address was invalid. */ 2149 if (rv == -1) { 2150 error = EFAULT; 2151 break; 2152 } 2153 2154 if (owner == UMUTEX_CONTESTED) { 2155 error = 0; 2156 break; 2157 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2158 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2159 &owner, id | UMUTEX_CONTESTED); 2160 if (rv == -1) { 2161 error = EFAULT; 2162 break; 2163 } 2164 if (owner == UMUTEX_RB_OWNERDEAD) { 2165 error = EOWNERDEAD; /* success */ 2166 break; 2167 } 2168 error = 0; 2169 } else if (owner == UMUTEX_RB_NOTRECOV) { 2170 error = ENOTRECOVERABLE; 2171 break; 2172 } 2173 2174 if (try != 0) { 2175 error = EBUSY; 2176 break; 2177 } 2178 2179 /* 2180 * If we caught a signal, we have retried and now 2181 * exit immediately. 2182 */ 2183 if (error != 0) 2184 break; 2185 2186 umtxq_lock(&uq->uq_key); 2187 umtxq_insert(uq); 2188 umtxq_unbusy(&uq->uq_key); 2189 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2190 NULL : &timo); 2191 umtxq_remove(uq); 2192 umtxq_unlock(&uq->uq_key); 2193 2194 mtx_lock(&umtx_lock); 2195 uq->uq_inherited_pri = old_inherited_pri; 2196 pri = PRI_MAX; 2197 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2198 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2199 if (uq2 != NULL) { 2200 if (pri > UPRI(uq2->uq_thread)) 2201 pri = UPRI(uq2->uq_thread); 2202 } 2203 } 2204 if (pri > uq->uq_inherited_pri) 2205 pri = uq->uq_inherited_pri; 2206 thread_lock(td); 2207 sched_lend_user_prio(td, pri); 2208 thread_unlock(td); 2209 mtx_unlock(&umtx_lock); 2210 } 2211 2212 if (error != 0 && error != EOWNERDEAD) { 2213 mtx_lock(&umtx_lock); 2214 uq->uq_inherited_pri = old_inherited_pri; 2215 pri = PRI_MAX; 2216 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2217 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2218 if (uq2 != NULL) { 2219 if (pri > UPRI(uq2->uq_thread)) 2220 pri = UPRI(uq2->uq_thread); 2221 } 2222 } 2223 if (pri > uq->uq_inherited_pri) 2224 pri = uq->uq_inherited_pri; 2225 thread_lock(td); 2226 sched_lend_user_prio(td, pri); 2227 thread_unlock(td); 2228 mtx_unlock(&umtx_lock); 2229 } 2230 2231 out: 2232 umtxq_unbusy_unlocked(&uq->uq_key); 2233 umtx_key_release(&uq->uq_key); 2234 return (error); 2235 } 2236 2237 /* 2238 * Unlock a PP mutex. 2239 */ 2240 static int 2241 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2242 { 2243 struct umtx_key key; 2244 struct umtx_q *uq, *uq2; 2245 struct umtx_pi *pi; 2246 uint32_t id, owner, rceiling; 2247 int error, pri, new_inherited_pri, su; 2248 2249 id = td->td_tid; 2250 uq = td->td_umtxq; 2251 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2252 2253 /* 2254 * Make sure we own this mtx. 2255 */ 2256 error = fueword32(&m->m_owner, &owner); 2257 if (error == -1) 2258 return (EFAULT); 2259 2260 if ((owner & ~UMUTEX_CONTESTED) != id) 2261 return (EPERM); 2262 2263 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2264 if (error != 0) 2265 return (error); 2266 2267 if (rceiling == -1) 2268 new_inherited_pri = PRI_MAX; 2269 else { 2270 rceiling = RTP_PRIO_MAX - rceiling; 2271 if (rceiling > RTP_PRIO_MAX) 2272 return (EINVAL); 2273 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2274 } 2275 2276 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2277 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2278 &key)) != 0) 2279 return (error); 2280 umtxq_lock(&key); 2281 umtxq_busy(&key); 2282 umtxq_unlock(&key); 2283 /* 2284 * For priority protected mutex, always set unlocked state 2285 * to UMUTEX_CONTESTED, so that userland always enters kernel 2286 * to lock the mutex, it is necessary because thread priority 2287 * has to be adjusted for such mutex. 2288 */ 2289 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2290 UMUTEX_CONTESTED); 2291 2292 umtxq_lock(&key); 2293 if (error == 0) 2294 umtxq_signal(&key, 1); 2295 umtxq_unbusy(&key); 2296 umtxq_unlock(&key); 2297 2298 if (error == -1) 2299 error = EFAULT; 2300 else { 2301 mtx_lock(&umtx_lock); 2302 if (su != 0) 2303 uq->uq_inherited_pri = new_inherited_pri; 2304 pri = PRI_MAX; 2305 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2306 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2307 if (uq2 != NULL) { 2308 if (pri > UPRI(uq2->uq_thread)) 2309 pri = UPRI(uq2->uq_thread); 2310 } 2311 } 2312 if (pri > uq->uq_inherited_pri) 2313 pri = uq->uq_inherited_pri; 2314 thread_lock(td); 2315 sched_lend_user_prio(td, pri); 2316 thread_unlock(td); 2317 mtx_unlock(&umtx_lock); 2318 } 2319 umtx_key_release(&key); 2320 return (error); 2321 } 2322 2323 static int 2324 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2325 uint32_t *old_ceiling) 2326 { 2327 struct umtx_q *uq; 2328 uint32_t flags, id, owner, save_ceiling; 2329 int error, rv, rv1; 2330 2331 error = fueword32(&m->m_flags, &flags); 2332 if (error == -1) 2333 return (EFAULT); 2334 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2335 return (EINVAL); 2336 if (ceiling > RTP_PRIO_MAX) 2337 return (EINVAL); 2338 id = td->td_tid; 2339 uq = td->td_umtxq; 2340 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2341 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2342 &uq->uq_key)) != 0) 2343 return (error); 2344 for (;;) { 2345 umtxq_lock(&uq->uq_key); 2346 umtxq_busy(&uq->uq_key); 2347 umtxq_unlock(&uq->uq_key); 2348 2349 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2350 if (rv == -1) { 2351 error = EFAULT; 2352 break; 2353 } 2354 2355 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2356 id | UMUTEX_CONTESTED); 2357 if (rv == -1) { 2358 error = EFAULT; 2359 break; 2360 } 2361 2362 if (owner == UMUTEX_CONTESTED) { 2363 rv = suword32(&m->m_ceilings[0], ceiling); 2364 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2365 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2366 break; 2367 } 2368 2369 if ((owner & ~UMUTEX_CONTESTED) == id) { 2370 rv = suword32(&m->m_ceilings[0], ceiling); 2371 error = rv == 0 ? 0 : EFAULT; 2372 break; 2373 } 2374 2375 if (owner == UMUTEX_RB_OWNERDEAD) { 2376 error = EOWNERDEAD; 2377 break; 2378 } else if (owner == UMUTEX_RB_NOTRECOV) { 2379 error = ENOTRECOVERABLE; 2380 break; 2381 } 2382 2383 /* 2384 * If we caught a signal, we have retried and now 2385 * exit immediately. 2386 */ 2387 if (error != 0) 2388 break; 2389 2390 /* 2391 * We set the contested bit, sleep. Otherwise the lock changed 2392 * and we need to retry or we lost a race to the thread 2393 * unlocking the umtx. 2394 */ 2395 umtxq_lock(&uq->uq_key); 2396 umtxq_insert(uq); 2397 umtxq_unbusy(&uq->uq_key); 2398 error = umtxq_sleep(uq, "umtxpp", NULL); 2399 umtxq_remove(uq); 2400 umtxq_unlock(&uq->uq_key); 2401 } 2402 umtxq_lock(&uq->uq_key); 2403 if (error == 0) 2404 umtxq_signal(&uq->uq_key, INT_MAX); 2405 umtxq_unbusy(&uq->uq_key); 2406 umtxq_unlock(&uq->uq_key); 2407 umtx_key_release(&uq->uq_key); 2408 if (error == 0 && old_ceiling != NULL) { 2409 rv = suword32(old_ceiling, save_ceiling); 2410 error = rv == 0 ? 0 : EFAULT; 2411 } 2412 return (error); 2413 } 2414 2415 /* 2416 * Lock a userland POSIX mutex. 2417 */ 2418 static int 2419 do_lock_umutex(struct thread *td, struct umutex *m, 2420 struct _umtx_time *timeout, int mode) 2421 { 2422 uint32_t flags; 2423 int error; 2424 2425 error = fueword32(&m->m_flags, &flags); 2426 if (error == -1) 2427 return (EFAULT); 2428 2429 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2430 case 0: 2431 error = do_lock_normal(td, m, flags, timeout, mode); 2432 break; 2433 case UMUTEX_PRIO_INHERIT: 2434 error = do_lock_pi(td, m, flags, timeout, mode); 2435 break; 2436 case UMUTEX_PRIO_PROTECT: 2437 error = do_lock_pp(td, m, flags, timeout, mode); 2438 break; 2439 default: 2440 return (EINVAL); 2441 } 2442 if (timeout == NULL) { 2443 if (error == EINTR && mode != _UMUTEX_WAIT) 2444 error = ERESTART; 2445 } else { 2446 /* Timed-locking is not restarted. */ 2447 if (error == ERESTART) 2448 error = EINTR; 2449 } 2450 return (error); 2451 } 2452 2453 /* 2454 * Unlock a userland POSIX mutex. 2455 */ 2456 static int 2457 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2458 { 2459 uint32_t flags; 2460 int error; 2461 2462 error = fueword32(&m->m_flags, &flags); 2463 if (error == -1) 2464 return (EFAULT); 2465 2466 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2467 case 0: 2468 return (do_unlock_normal(td, m, flags, rb)); 2469 case UMUTEX_PRIO_INHERIT: 2470 return (do_unlock_pi(td, m, flags, rb)); 2471 case UMUTEX_PRIO_PROTECT: 2472 return (do_unlock_pp(td, m, flags, rb)); 2473 } 2474 2475 return (EINVAL); 2476 } 2477 2478 static int 2479 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2480 struct timespec *timeout, u_long wflags) 2481 { 2482 struct abs_timeout timo; 2483 struct umtx_q *uq; 2484 uint32_t flags, clockid, hasw; 2485 int error; 2486 2487 uq = td->td_umtxq; 2488 error = fueword32(&cv->c_flags, &flags); 2489 if (error == -1) 2490 return (EFAULT); 2491 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2492 if (error != 0) 2493 return (error); 2494 2495 if ((wflags & CVWAIT_CLOCKID) != 0) { 2496 error = fueword32(&cv->c_clockid, &clockid); 2497 if (error == -1) { 2498 umtx_key_release(&uq->uq_key); 2499 return (EFAULT); 2500 } 2501 if (clockid < CLOCK_REALTIME || 2502 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2503 /* hmm, only HW clock id will work. */ 2504 umtx_key_release(&uq->uq_key); 2505 return (EINVAL); 2506 } 2507 } else { 2508 clockid = CLOCK_REALTIME; 2509 } 2510 2511 umtxq_lock(&uq->uq_key); 2512 umtxq_busy(&uq->uq_key); 2513 umtxq_insert(uq); 2514 umtxq_unlock(&uq->uq_key); 2515 2516 /* 2517 * Set c_has_waiters to 1 before releasing user mutex, also 2518 * don't modify cache line when unnecessary. 2519 */ 2520 error = fueword32(&cv->c_has_waiters, &hasw); 2521 if (error == 0 && hasw == 0) 2522 suword32(&cv->c_has_waiters, 1); 2523 2524 umtxq_unbusy_unlocked(&uq->uq_key); 2525 2526 error = do_unlock_umutex(td, m, false); 2527 2528 if (timeout != NULL) 2529 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2530 timeout); 2531 2532 umtxq_lock(&uq->uq_key); 2533 if (error == 0) { 2534 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2535 NULL : &timo); 2536 } 2537 2538 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2539 error = 0; 2540 else { 2541 /* 2542 * This must be timeout,interrupted by signal or 2543 * surprious wakeup, clear c_has_waiter flag when 2544 * necessary. 2545 */ 2546 umtxq_busy(&uq->uq_key); 2547 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2548 int oldlen = uq->uq_cur_queue->length; 2549 umtxq_remove(uq); 2550 if (oldlen == 1) { 2551 umtxq_unlock(&uq->uq_key); 2552 suword32(&cv->c_has_waiters, 0); 2553 umtxq_lock(&uq->uq_key); 2554 } 2555 } 2556 umtxq_unbusy(&uq->uq_key); 2557 if (error == ERESTART) 2558 error = EINTR; 2559 } 2560 2561 umtxq_unlock(&uq->uq_key); 2562 umtx_key_release(&uq->uq_key); 2563 return (error); 2564 } 2565 2566 /* 2567 * Signal a userland condition variable. 2568 */ 2569 static int 2570 do_cv_signal(struct thread *td, struct ucond *cv) 2571 { 2572 struct umtx_key key; 2573 int error, cnt, nwake; 2574 uint32_t flags; 2575 2576 error = fueword32(&cv->c_flags, &flags); 2577 if (error == -1) 2578 return (EFAULT); 2579 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2580 return (error); 2581 umtxq_lock(&key); 2582 umtxq_busy(&key); 2583 cnt = umtxq_count(&key); 2584 nwake = umtxq_signal(&key, 1); 2585 if (cnt <= nwake) { 2586 umtxq_unlock(&key); 2587 error = suword32(&cv->c_has_waiters, 0); 2588 if (error == -1) 2589 error = EFAULT; 2590 umtxq_lock(&key); 2591 } 2592 umtxq_unbusy(&key); 2593 umtxq_unlock(&key); 2594 umtx_key_release(&key); 2595 return (error); 2596 } 2597 2598 static int 2599 do_cv_broadcast(struct thread *td, struct ucond *cv) 2600 { 2601 struct umtx_key key; 2602 int error; 2603 uint32_t flags; 2604 2605 error = fueword32(&cv->c_flags, &flags); 2606 if (error == -1) 2607 return (EFAULT); 2608 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2609 return (error); 2610 2611 umtxq_lock(&key); 2612 umtxq_busy(&key); 2613 umtxq_signal(&key, INT_MAX); 2614 umtxq_unlock(&key); 2615 2616 error = suword32(&cv->c_has_waiters, 0); 2617 if (error == -1) 2618 error = EFAULT; 2619 2620 umtxq_unbusy_unlocked(&key); 2621 2622 umtx_key_release(&key); 2623 return (error); 2624 } 2625 2626 static int 2627 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2628 struct _umtx_time *timeout) 2629 { 2630 struct abs_timeout timo; 2631 struct umtx_q *uq; 2632 uint32_t flags, wrflags; 2633 int32_t state, oldstate; 2634 int32_t blocked_readers; 2635 int error, error1, rv; 2636 2637 uq = td->td_umtxq; 2638 error = fueword32(&rwlock->rw_flags, &flags); 2639 if (error == -1) 2640 return (EFAULT); 2641 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2642 if (error != 0) 2643 return (error); 2644 2645 if (timeout != NULL) 2646 abs_timeout_init2(&timo, timeout); 2647 2648 wrflags = URWLOCK_WRITE_OWNER; 2649 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2650 wrflags |= URWLOCK_WRITE_WAITERS; 2651 2652 for (;;) { 2653 rv = fueword32(&rwlock->rw_state, &state); 2654 if (rv == -1) { 2655 umtx_key_release(&uq->uq_key); 2656 return (EFAULT); 2657 } 2658 2659 /* try to lock it */ 2660 while (!(state & wrflags)) { 2661 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2662 umtx_key_release(&uq->uq_key); 2663 return (EAGAIN); 2664 } 2665 rv = casueword32(&rwlock->rw_state, state, 2666 &oldstate, state + 1); 2667 if (rv == -1) { 2668 umtx_key_release(&uq->uq_key); 2669 return (EFAULT); 2670 } 2671 if (oldstate == state) { 2672 umtx_key_release(&uq->uq_key); 2673 return (0); 2674 } 2675 error = umtxq_check_susp(td); 2676 if (error != 0) 2677 break; 2678 state = oldstate; 2679 } 2680 2681 if (error) 2682 break; 2683 2684 /* grab monitor lock */ 2685 umtxq_lock(&uq->uq_key); 2686 umtxq_busy(&uq->uq_key); 2687 umtxq_unlock(&uq->uq_key); 2688 2689 /* 2690 * re-read the state, in case it changed between the try-lock above 2691 * and the check below 2692 */ 2693 rv = fueword32(&rwlock->rw_state, &state); 2694 if (rv == -1) 2695 error = EFAULT; 2696 2697 /* set read contention bit */ 2698 while (error == 0 && (state & wrflags) && 2699 !(state & URWLOCK_READ_WAITERS)) { 2700 rv = casueword32(&rwlock->rw_state, state, 2701 &oldstate, state | URWLOCK_READ_WAITERS); 2702 if (rv == -1) { 2703 error = EFAULT; 2704 break; 2705 } 2706 if (oldstate == state) 2707 goto sleep; 2708 state = oldstate; 2709 error = umtxq_check_susp(td); 2710 if (error != 0) 2711 break; 2712 } 2713 if (error != 0) { 2714 umtxq_unbusy_unlocked(&uq->uq_key); 2715 break; 2716 } 2717 2718 /* state is changed while setting flags, restart */ 2719 if (!(state & wrflags)) { 2720 umtxq_unbusy_unlocked(&uq->uq_key); 2721 error = umtxq_check_susp(td); 2722 if (error != 0) 2723 break; 2724 continue; 2725 } 2726 2727 sleep: 2728 /* contention bit is set, before sleeping, increase read waiter count */ 2729 rv = fueword32(&rwlock->rw_blocked_readers, 2730 &blocked_readers); 2731 if (rv == -1) { 2732 umtxq_unbusy_unlocked(&uq->uq_key); 2733 error = EFAULT; 2734 break; 2735 } 2736 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2737 2738 while (state & wrflags) { 2739 umtxq_lock(&uq->uq_key); 2740 umtxq_insert(uq); 2741 umtxq_unbusy(&uq->uq_key); 2742 2743 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2744 NULL : &timo); 2745 2746 umtxq_busy(&uq->uq_key); 2747 umtxq_remove(uq); 2748 umtxq_unlock(&uq->uq_key); 2749 if (error) 2750 break; 2751 rv = fueword32(&rwlock->rw_state, &state); 2752 if (rv == -1) { 2753 error = EFAULT; 2754 break; 2755 } 2756 } 2757 2758 /* decrease read waiter count, and may clear read contention bit */ 2759 rv = fueword32(&rwlock->rw_blocked_readers, 2760 &blocked_readers); 2761 if (rv == -1) { 2762 umtxq_unbusy_unlocked(&uq->uq_key); 2763 error = EFAULT; 2764 break; 2765 } 2766 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2767 if (blocked_readers == 1) { 2768 rv = fueword32(&rwlock->rw_state, &state); 2769 if (rv == -1) { 2770 umtxq_unbusy_unlocked(&uq->uq_key); 2771 error = EFAULT; 2772 break; 2773 } 2774 for (;;) { 2775 rv = casueword32(&rwlock->rw_state, state, 2776 &oldstate, state & ~URWLOCK_READ_WAITERS); 2777 if (rv == -1) { 2778 error = EFAULT; 2779 break; 2780 } 2781 if (oldstate == state) 2782 break; 2783 state = oldstate; 2784 error1 = umtxq_check_susp(td); 2785 if (error1 != 0) { 2786 if (error == 0) 2787 error = error1; 2788 break; 2789 } 2790 } 2791 } 2792 2793 umtxq_unbusy_unlocked(&uq->uq_key); 2794 if (error != 0) 2795 break; 2796 } 2797 umtx_key_release(&uq->uq_key); 2798 if (error == ERESTART) 2799 error = EINTR; 2800 return (error); 2801 } 2802 2803 static int 2804 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2805 { 2806 struct abs_timeout timo; 2807 struct umtx_q *uq; 2808 uint32_t flags; 2809 int32_t state, oldstate; 2810 int32_t blocked_writers; 2811 int32_t blocked_readers; 2812 int error, error1, rv; 2813 2814 uq = td->td_umtxq; 2815 error = fueword32(&rwlock->rw_flags, &flags); 2816 if (error == -1) 2817 return (EFAULT); 2818 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2819 if (error != 0) 2820 return (error); 2821 2822 if (timeout != NULL) 2823 abs_timeout_init2(&timo, timeout); 2824 2825 blocked_readers = 0; 2826 for (;;) { 2827 rv = fueword32(&rwlock->rw_state, &state); 2828 if (rv == -1) { 2829 umtx_key_release(&uq->uq_key); 2830 return (EFAULT); 2831 } 2832 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2833 rv = casueword32(&rwlock->rw_state, state, 2834 &oldstate, state | URWLOCK_WRITE_OWNER); 2835 if (rv == -1) { 2836 umtx_key_release(&uq->uq_key); 2837 return (EFAULT); 2838 } 2839 if (oldstate == state) { 2840 umtx_key_release(&uq->uq_key); 2841 return (0); 2842 } 2843 state = oldstate; 2844 error = umtxq_check_susp(td); 2845 if (error != 0) 2846 break; 2847 } 2848 2849 if (error) { 2850 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2851 blocked_readers != 0) { 2852 umtxq_lock(&uq->uq_key); 2853 umtxq_busy(&uq->uq_key); 2854 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2855 umtxq_unbusy(&uq->uq_key); 2856 umtxq_unlock(&uq->uq_key); 2857 } 2858 2859 break; 2860 } 2861 2862 /* grab monitor lock */ 2863 umtxq_lock(&uq->uq_key); 2864 umtxq_busy(&uq->uq_key); 2865 umtxq_unlock(&uq->uq_key); 2866 2867 /* 2868 * re-read the state, in case it changed between the try-lock above 2869 * and the check below 2870 */ 2871 rv = fueword32(&rwlock->rw_state, &state); 2872 if (rv == -1) 2873 error = EFAULT; 2874 2875 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2876 URWLOCK_READER_COUNT(state) != 0) && 2877 (state & URWLOCK_WRITE_WAITERS) == 0) { 2878 rv = casueword32(&rwlock->rw_state, state, 2879 &oldstate, state | URWLOCK_WRITE_WAITERS); 2880 if (rv == -1) { 2881 error = EFAULT; 2882 break; 2883 } 2884 if (oldstate == state) 2885 goto sleep; 2886 state = oldstate; 2887 error = umtxq_check_susp(td); 2888 if (error != 0) 2889 break; 2890 } 2891 if (error != 0) { 2892 umtxq_unbusy_unlocked(&uq->uq_key); 2893 break; 2894 } 2895 2896 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2897 umtxq_unbusy_unlocked(&uq->uq_key); 2898 error = umtxq_check_susp(td); 2899 if (error != 0) 2900 break; 2901 continue; 2902 } 2903 sleep: 2904 rv = fueword32(&rwlock->rw_blocked_writers, 2905 &blocked_writers); 2906 if (rv == -1) { 2907 umtxq_unbusy_unlocked(&uq->uq_key); 2908 error = EFAULT; 2909 break; 2910 } 2911 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2912 2913 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2914 umtxq_lock(&uq->uq_key); 2915 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2916 umtxq_unbusy(&uq->uq_key); 2917 2918 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2919 NULL : &timo); 2920 2921 umtxq_busy(&uq->uq_key); 2922 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2923 umtxq_unlock(&uq->uq_key); 2924 if (error) 2925 break; 2926 rv = fueword32(&rwlock->rw_state, &state); 2927 if (rv == -1) { 2928 error = EFAULT; 2929 break; 2930 } 2931 } 2932 2933 rv = fueword32(&rwlock->rw_blocked_writers, 2934 &blocked_writers); 2935 if (rv == -1) { 2936 umtxq_unbusy_unlocked(&uq->uq_key); 2937 error = EFAULT; 2938 break; 2939 } 2940 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2941 if (blocked_writers == 1) { 2942 rv = fueword32(&rwlock->rw_state, &state); 2943 if (rv == -1) { 2944 umtxq_unbusy_unlocked(&uq->uq_key); 2945 error = EFAULT; 2946 break; 2947 } 2948 for (;;) { 2949 rv = casueword32(&rwlock->rw_state, state, 2950 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2951 if (rv == -1) { 2952 error = EFAULT; 2953 break; 2954 } 2955 if (oldstate == state) 2956 break; 2957 state = oldstate; 2958 error1 = umtxq_check_susp(td); 2959 /* 2960 * We are leaving the URWLOCK_WRITE_WAITERS 2961 * behind, but this should not harm the 2962 * correctness. 2963 */ 2964 if (error1 != 0) { 2965 if (error == 0) 2966 error = error1; 2967 break; 2968 } 2969 } 2970 rv = fueword32(&rwlock->rw_blocked_readers, 2971 &blocked_readers); 2972 if (rv == -1) { 2973 umtxq_unbusy_unlocked(&uq->uq_key); 2974 error = EFAULT; 2975 break; 2976 } 2977 } else 2978 blocked_readers = 0; 2979 2980 umtxq_unbusy_unlocked(&uq->uq_key); 2981 } 2982 2983 umtx_key_release(&uq->uq_key); 2984 if (error == ERESTART) 2985 error = EINTR; 2986 return (error); 2987 } 2988 2989 static int 2990 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2991 { 2992 struct umtx_q *uq; 2993 uint32_t flags; 2994 int32_t state, oldstate; 2995 int error, rv, q, count; 2996 2997 uq = td->td_umtxq; 2998 error = fueword32(&rwlock->rw_flags, &flags); 2999 if (error == -1) 3000 return (EFAULT); 3001 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3002 if (error != 0) 3003 return (error); 3004 3005 error = fueword32(&rwlock->rw_state, &state); 3006 if (error == -1) { 3007 error = EFAULT; 3008 goto out; 3009 } 3010 if (state & URWLOCK_WRITE_OWNER) { 3011 for (;;) { 3012 rv = casueword32(&rwlock->rw_state, state, 3013 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3014 if (rv == -1) { 3015 error = EFAULT; 3016 goto out; 3017 } 3018 if (oldstate != state) { 3019 state = oldstate; 3020 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3021 error = EPERM; 3022 goto out; 3023 } 3024 error = umtxq_check_susp(td); 3025 if (error != 0) 3026 goto out; 3027 } else 3028 break; 3029 } 3030 } else if (URWLOCK_READER_COUNT(state) != 0) { 3031 for (;;) { 3032 rv = casueword32(&rwlock->rw_state, state, 3033 &oldstate, state - 1); 3034 if (rv == -1) { 3035 error = EFAULT; 3036 goto out; 3037 } 3038 if (oldstate != state) { 3039 state = oldstate; 3040 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3041 error = EPERM; 3042 goto out; 3043 } 3044 error = umtxq_check_susp(td); 3045 if (error != 0) 3046 goto out; 3047 } else 3048 break; 3049 } 3050 } else { 3051 error = EPERM; 3052 goto out; 3053 } 3054 3055 count = 0; 3056 3057 if (!(flags & URWLOCK_PREFER_READER)) { 3058 if (state & URWLOCK_WRITE_WAITERS) { 3059 count = 1; 3060 q = UMTX_EXCLUSIVE_QUEUE; 3061 } else if (state & URWLOCK_READ_WAITERS) { 3062 count = INT_MAX; 3063 q = UMTX_SHARED_QUEUE; 3064 } 3065 } else { 3066 if (state & URWLOCK_READ_WAITERS) { 3067 count = INT_MAX; 3068 q = UMTX_SHARED_QUEUE; 3069 } else if (state & URWLOCK_WRITE_WAITERS) { 3070 count = 1; 3071 q = UMTX_EXCLUSIVE_QUEUE; 3072 } 3073 } 3074 3075 if (count) { 3076 umtxq_lock(&uq->uq_key); 3077 umtxq_busy(&uq->uq_key); 3078 umtxq_signal_queue(&uq->uq_key, count, q); 3079 umtxq_unbusy(&uq->uq_key); 3080 umtxq_unlock(&uq->uq_key); 3081 } 3082 out: 3083 umtx_key_release(&uq->uq_key); 3084 return (error); 3085 } 3086 3087 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3088 static int 3089 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3090 { 3091 struct abs_timeout timo; 3092 struct umtx_q *uq; 3093 uint32_t flags, count, count1; 3094 int error, rv; 3095 3096 uq = td->td_umtxq; 3097 error = fueword32(&sem->_flags, &flags); 3098 if (error == -1) 3099 return (EFAULT); 3100 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3101 if (error != 0) 3102 return (error); 3103 3104 if (timeout != NULL) 3105 abs_timeout_init2(&timo, timeout); 3106 3107 umtxq_lock(&uq->uq_key); 3108 umtxq_busy(&uq->uq_key); 3109 umtxq_insert(uq); 3110 umtxq_unlock(&uq->uq_key); 3111 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3112 if (rv == 0) 3113 rv = fueword32(&sem->_count, &count); 3114 if (rv == -1 || count != 0) { 3115 umtxq_lock(&uq->uq_key); 3116 umtxq_unbusy(&uq->uq_key); 3117 umtxq_remove(uq); 3118 umtxq_unlock(&uq->uq_key); 3119 umtx_key_release(&uq->uq_key); 3120 return (rv == -1 ? EFAULT : 0); 3121 } 3122 umtxq_lock(&uq->uq_key); 3123 umtxq_unbusy(&uq->uq_key); 3124 3125 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3126 3127 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3128 error = 0; 3129 else { 3130 umtxq_remove(uq); 3131 /* A relative timeout cannot be restarted. */ 3132 if (error == ERESTART && timeout != NULL && 3133 (timeout->_flags & UMTX_ABSTIME) == 0) 3134 error = EINTR; 3135 } 3136 umtxq_unlock(&uq->uq_key); 3137 umtx_key_release(&uq->uq_key); 3138 return (error); 3139 } 3140 3141 /* 3142 * Signal a userland semaphore. 3143 */ 3144 static int 3145 do_sem_wake(struct thread *td, struct _usem *sem) 3146 { 3147 struct umtx_key key; 3148 int error, cnt; 3149 uint32_t flags; 3150 3151 error = fueword32(&sem->_flags, &flags); 3152 if (error == -1) 3153 return (EFAULT); 3154 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3155 return (error); 3156 umtxq_lock(&key); 3157 umtxq_busy(&key); 3158 cnt = umtxq_count(&key); 3159 if (cnt > 0) { 3160 /* 3161 * Check if count is greater than 0, this means the memory is 3162 * still being referenced by user code, so we can safely 3163 * update _has_waiters flag. 3164 */ 3165 if (cnt == 1) { 3166 umtxq_unlock(&key); 3167 error = suword32(&sem->_has_waiters, 0); 3168 umtxq_lock(&key); 3169 if (error == -1) 3170 error = EFAULT; 3171 } 3172 umtxq_signal(&key, 1); 3173 } 3174 umtxq_unbusy(&key); 3175 umtxq_unlock(&key); 3176 umtx_key_release(&key); 3177 return (error); 3178 } 3179 #endif 3180 3181 static int 3182 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3183 { 3184 struct abs_timeout timo; 3185 struct umtx_q *uq; 3186 uint32_t count, flags; 3187 int error, rv; 3188 3189 uq = td->td_umtxq; 3190 flags = fuword32(&sem->_flags); 3191 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3192 if (error != 0) 3193 return (error); 3194 3195 if (timeout != NULL) 3196 abs_timeout_init2(&timo, timeout); 3197 3198 umtxq_lock(&uq->uq_key); 3199 umtxq_busy(&uq->uq_key); 3200 umtxq_insert(uq); 3201 umtxq_unlock(&uq->uq_key); 3202 rv = fueword32(&sem->_count, &count); 3203 if (rv == -1) { 3204 umtxq_lock(&uq->uq_key); 3205 umtxq_unbusy(&uq->uq_key); 3206 umtxq_remove(uq); 3207 umtxq_unlock(&uq->uq_key); 3208 umtx_key_release(&uq->uq_key); 3209 return (EFAULT); 3210 } 3211 for (;;) { 3212 if (USEM_COUNT(count) != 0) { 3213 umtxq_lock(&uq->uq_key); 3214 umtxq_unbusy(&uq->uq_key); 3215 umtxq_remove(uq); 3216 umtxq_unlock(&uq->uq_key); 3217 umtx_key_release(&uq->uq_key); 3218 return (0); 3219 } 3220 if (count == USEM_HAS_WAITERS) 3221 break; 3222 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3223 if (rv == -1) { 3224 umtxq_lock(&uq->uq_key); 3225 umtxq_unbusy(&uq->uq_key); 3226 umtxq_remove(uq); 3227 umtxq_unlock(&uq->uq_key); 3228 umtx_key_release(&uq->uq_key); 3229 return (EFAULT); 3230 } 3231 if (count == 0) 3232 break; 3233 } 3234 umtxq_lock(&uq->uq_key); 3235 umtxq_unbusy(&uq->uq_key); 3236 3237 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3238 3239 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3240 error = 0; 3241 else { 3242 umtxq_remove(uq); 3243 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3244 /* A relative timeout cannot be restarted. */ 3245 if (error == ERESTART) 3246 error = EINTR; 3247 if (error == EINTR) { 3248 abs_timeout_update(&timo); 3249 timespecsub(&timo.end, &timo.cur, 3250 &timeout->_timeout); 3251 } 3252 } 3253 } 3254 umtxq_unlock(&uq->uq_key); 3255 umtx_key_release(&uq->uq_key); 3256 return (error); 3257 } 3258 3259 /* 3260 * Signal a userland semaphore. 3261 */ 3262 static int 3263 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3264 { 3265 struct umtx_key key; 3266 int error, cnt, rv; 3267 uint32_t count, flags; 3268 3269 rv = fueword32(&sem->_flags, &flags); 3270 if (rv == -1) 3271 return (EFAULT); 3272 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3273 return (error); 3274 umtxq_lock(&key); 3275 umtxq_busy(&key); 3276 cnt = umtxq_count(&key); 3277 if (cnt > 0) { 3278 /* 3279 * If this was the last sleeping thread, clear the waiters 3280 * flag in _count. 3281 */ 3282 if (cnt == 1) { 3283 umtxq_unlock(&key); 3284 rv = fueword32(&sem->_count, &count); 3285 while (rv != -1 && count & USEM_HAS_WAITERS) 3286 rv = casueword32(&sem->_count, count, &count, 3287 count & ~USEM_HAS_WAITERS); 3288 if (rv == -1) 3289 error = EFAULT; 3290 umtxq_lock(&key); 3291 } 3292 3293 umtxq_signal(&key, 1); 3294 } 3295 umtxq_unbusy(&key); 3296 umtxq_unlock(&key); 3297 umtx_key_release(&key); 3298 return (error); 3299 } 3300 3301 inline int 3302 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3303 { 3304 int error; 3305 3306 error = copyin(addr, tsp, sizeof(struct timespec)); 3307 if (error == 0) { 3308 if (tsp->tv_sec < 0 || 3309 tsp->tv_nsec >= 1000000000 || 3310 tsp->tv_nsec < 0) 3311 error = EINVAL; 3312 } 3313 return (error); 3314 } 3315 3316 static inline int 3317 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3318 { 3319 int error; 3320 3321 if (size <= sizeof(struct timespec)) { 3322 tp->_clockid = CLOCK_REALTIME; 3323 tp->_flags = 0; 3324 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3325 } else 3326 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3327 if (error != 0) 3328 return (error); 3329 if (tp->_timeout.tv_sec < 0 || 3330 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3331 return (EINVAL); 3332 return (0); 3333 } 3334 3335 static int 3336 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3337 { 3338 3339 return (EOPNOTSUPP); 3340 } 3341 3342 static int 3343 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3344 { 3345 struct _umtx_time timeout, *tm_p; 3346 int error; 3347 3348 if (uap->uaddr2 == NULL) 3349 tm_p = NULL; 3350 else { 3351 error = umtx_copyin_umtx_time( 3352 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3353 if (error != 0) 3354 return (error); 3355 tm_p = &timeout; 3356 } 3357 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3358 } 3359 3360 static int 3361 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3362 { 3363 struct _umtx_time timeout, *tm_p; 3364 int error; 3365 3366 if (uap->uaddr2 == NULL) 3367 tm_p = NULL; 3368 else { 3369 error = umtx_copyin_umtx_time( 3370 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3371 if (error != 0) 3372 return (error); 3373 tm_p = &timeout; 3374 } 3375 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3376 } 3377 3378 static int 3379 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3380 { 3381 struct _umtx_time *tm_p, timeout; 3382 int error; 3383 3384 if (uap->uaddr2 == NULL) 3385 tm_p = NULL; 3386 else { 3387 error = umtx_copyin_umtx_time( 3388 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3389 if (error != 0) 3390 return (error); 3391 tm_p = &timeout; 3392 } 3393 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3394 } 3395 3396 static int 3397 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3398 { 3399 3400 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3401 } 3402 3403 #define BATCH_SIZE 128 3404 static int 3405 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3406 { 3407 char *uaddrs[BATCH_SIZE], **upp; 3408 int count, error, i, pos, tocopy; 3409 3410 upp = (char **)uap->obj; 3411 error = 0; 3412 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3413 pos += tocopy) { 3414 tocopy = MIN(count, BATCH_SIZE); 3415 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3416 if (error != 0) 3417 break; 3418 for (i = 0; i < tocopy; ++i) 3419 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3420 maybe_yield(); 3421 } 3422 return (error); 3423 } 3424 3425 static int 3426 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3427 { 3428 3429 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3430 } 3431 3432 static int 3433 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3434 { 3435 struct _umtx_time *tm_p, timeout; 3436 int error; 3437 3438 /* Allow a null timespec (wait forever). */ 3439 if (uap->uaddr2 == NULL) 3440 tm_p = NULL; 3441 else { 3442 error = umtx_copyin_umtx_time( 3443 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3444 if (error != 0) 3445 return (error); 3446 tm_p = &timeout; 3447 } 3448 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3449 } 3450 3451 static int 3452 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3453 { 3454 3455 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3456 } 3457 3458 static int 3459 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3460 { 3461 struct _umtx_time *tm_p, timeout; 3462 int error; 3463 3464 /* Allow a null timespec (wait forever). */ 3465 if (uap->uaddr2 == NULL) 3466 tm_p = NULL; 3467 else { 3468 error = umtx_copyin_umtx_time( 3469 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3470 if (error != 0) 3471 return (error); 3472 tm_p = &timeout; 3473 } 3474 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3475 } 3476 3477 static int 3478 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3479 { 3480 3481 return (do_wake_umutex(td, uap->obj)); 3482 } 3483 3484 static int 3485 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3486 { 3487 3488 return (do_unlock_umutex(td, uap->obj, false)); 3489 } 3490 3491 static int 3492 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3493 { 3494 3495 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3496 } 3497 3498 static int 3499 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3500 { 3501 struct timespec *ts, timeout; 3502 int error; 3503 3504 /* Allow a null timespec (wait forever). */ 3505 if (uap->uaddr2 == NULL) 3506 ts = NULL; 3507 else { 3508 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3509 if (error != 0) 3510 return (error); 3511 ts = &timeout; 3512 } 3513 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3514 } 3515 3516 static int 3517 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3518 { 3519 3520 return (do_cv_signal(td, uap->obj)); 3521 } 3522 3523 static int 3524 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3525 { 3526 3527 return (do_cv_broadcast(td, uap->obj)); 3528 } 3529 3530 static int 3531 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3532 { 3533 struct _umtx_time timeout; 3534 int error; 3535 3536 /* Allow a null timespec (wait forever). */ 3537 if (uap->uaddr2 == NULL) { 3538 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3539 } else { 3540 error = umtx_copyin_umtx_time(uap->uaddr2, 3541 (size_t)uap->uaddr1, &timeout); 3542 if (error != 0) 3543 return (error); 3544 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3545 } 3546 return (error); 3547 } 3548 3549 static int 3550 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3551 { 3552 struct _umtx_time timeout; 3553 int error; 3554 3555 /* Allow a null timespec (wait forever). */ 3556 if (uap->uaddr2 == NULL) { 3557 error = do_rw_wrlock(td, uap->obj, 0); 3558 } else { 3559 error = umtx_copyin_umtx_time(uap->uaddr2, 3560 (size_t)uap->uaddr1, &timeout); 3561 if (error != 0) 3562 return (error); 3563 3564 error = do_rw_wrlock(td, uap->obj, &timeout); 3565 } 3566 return (error); 3567 } 3568 3569 static int 3570 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3571 { 3572 3573 return (do_rw_unlock(td, uap->obj)); 3574 } 3575 3576 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3577 static int 3578 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3579 { 3580 struct _umtx_time *tm_p, timeout; 3581 int error; 3582 3583 /* Allow a null timespec (wait forever). */ 3584 if (uap->uaddr2 == NULL) 3585 tm_p = NULL; 3586 else { 3587 error = umtx_copyin_umtx_time( 3588 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3589 if (error != 0) 3590 return (error); 3591 tm_p = &timeout; 3592 } 3593 return (do_sem_wait(td, uap->obj, tm_p)); 3594 } 3595 3596 static int 3597 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3598 { 3599 3600 return (do_sem_wake(td, uap->obj)); 3601 } 3602 #endif 3603 3604 static int 3605 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3606 { 3607 3608 return (do_wake2_umutex(td, uap->obj, uap->val)); 3609 } 3610 3611 static int 3612 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3613 { 3614 struct _umtx_time *tm_p, timeout; 3615 size_t uasize; 3616 int error; 3617 3618 /* Allow a null timespec (wait forever). */ 3619 if (uap->uaddr2 == NULL) { 3620 uasize = 0; 3621 tm_p = NULL; 3622 } else { 3623 uasize = (size_t)uap->uaddr1; 3624 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3625 if (error != 0) 3626 return (error); 3627 tm_p = &timeout; 3628 } 3629 error = do_sem2_wait(td, uap->obj, tm_p); 3630 if (error == EINTR && uap->uaddr2 != NULL && 3631 (timeout._flags & UMTX_ABSTIME) == 0 && 3632 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3633 error = copyout(&timeout._timeout, 3634 (struct _umtx_time *)uap->uaddr2 + 1, 3635 sizeof(struct timespec)); 3636 if (error == 0) { 3637 error = EINTR; 3638 } 3639 } 3640 3641 return (error); 3642 } 3643 3644 static int 3645 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3646 { 3647 3648 return (do_sem2_wake(td, uap->obj)); 3649 } 3650 3651 #define USHM_OBJ_UMTX(o) \ 3652 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3653 3654 #define USHMF_REG_LINKED 0x0001 3655 #define USHMF_OBJ_LINKED 0x0002 3656 struct umtx_shm_reg { 3657 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3658 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3659 struct umtx_key ushm_key; 3660 struct ucred *ushm_cred; 3661 struct shmfd *ushm_obj; 3662 u_int ushm_refcnt; 3663 u_int ushm_flags; 3664 }; 3665 3666 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3667 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3668 3669 static uma_zone_t umtx_shm_reg_zone; 3670 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3671 static struct mtx umtx_shm_lock; 3672 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3673 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3674 3675 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3676 3677 static void 3678 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3679 { 3680 struct umtx_shm_reg_head d; 3681 struct umtx_shm_reg *reg, *reg1; 3682 3683 TAILQ_INIT(&d); 3684 mtx_lock(&umtx_shm_lock); 3685 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3686 mtx_unlock(&umtx_shm_lock); 3687 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3688 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3689 umtx_shm_free_reg(reg); 3690 } 3691 } 3692 3693 static struct task umtx_shm_reg_delfree_task = 3694 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3695 3696 static struct umtx_shm_reg * 3697 umtx_shm_find_reg_locked(const struct umtx_key *key) 3698 { 3699 struct umtx_shm_reg *reg; 3700 struct umtx_shm_reg_head *reg_head; 3701 3702 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3703 mtx_assert(&umtx_shm_lock, MA_OWNED); 3704 reg_head = &umtx_shm_registry[key->hash]; 3705 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3706 KASSERT(reg->ushm_key.shared, 3707 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3708 if (reg->ushm_key.info.shared.object == 3709 key->info.shared.object && 3710 reg->ushm_key.info.shared.offset == 3711 key->info.shared.offset) { 3712 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3713 KASSERT(reg->ushm_refcnt > 0, 3714 ("reg %p refcnt 0 onlist", reg)); 3715 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3716 ("reg %p not linked", reg)); 3717 reg->ushm_refcnt++; 3718 return (reg); 3719 } 3720 } 3721 return (NULL); 3722 } 3723 3724 static struct umtx_shm_reg * 3725 umtx_shm_find_reg(const struct umtx_key *key) 3726 { 3727 struct umtx_shm_reg *reg; 3728 3729 mtx_lock(&umtx_shm_lock); 3730 reg = umtx_shm_find_reg_locked(key); 3731 mtx_unlock(&umtx_shm_lock); 3732 return (reg); 3733 } 3734 3735 static void 3736 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3737 { 3738 3739 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3740 crfree(reg->ushm_cred); 3741 shm_drop(reg->ushm_obj); 3742 uma_zfree(umtx_shm_reg_zone, reg); 3743 } 3744 3745 static bool 3746 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3747 { 3748 bool res; 3749 3750 mtx_assert(&umtx_shm_lock, MA_OWNED); 3751 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3752 reg->ushm_refcnt--; 3753 res = reg->ushm_refcnt == 0; 3754 if (res || force) { 3755 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3756 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3757 reg, ushm_reg_link); 3758 reg->ushm_flags &= ~USHMF_REG_LINKED; 3759 } 3760 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3761 LIST_REMOVE(reg, ushm_obj_link); 3762 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3763 } 3764 } 3765 return (res); 3766 } 3767 3768 static void 3769 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3770 { 3771 vm_object_t object; 3772 bool dofree; 3773 3774 if (force) { 3775 object = reg->ushm_obj->shm_object; 3776 VM_OBJECT_WLOCK(object); 3777 object->flags |= OBJ_UMTXDEAD; 3778 VM_OBJECT_WUNLOCK(object); 3779 } 3780 mtx_lock(&umtx_shm_lock); 3781 dofree = umtx_shm_unref_reg_locked(reg, force); 3782 mtx_unlock(&umtx_shm_lock); 3783 if (dofree) 3784 umtx_shm_free_reg(reg); 3785 } 3786 3787 void 3788 umtx_shm_object_init(vm_object_t object) 3789 { 3790 3791 LIST_INIT(USHM_OBJ_UMTX(object)); 3792 } 3793 3794 void 3795 umtx_shm_object_terminated(vm_object_t object) 3796 { 3797 struct umtx_shm_reg *reg, *reg1; 3798 bool dofree; 3799 3800 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3801 return; 3802 3803 dofree = false; 3804 mtx_lock(&umtx_shm_lock); 3805 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3806 if (umtx_shm_unref_reg_locked(reg, true)) { 3807 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3808 ushm_reg_link); 3809 dofree = true; 3810 } 3811 } 3812 mtx_unlock(&umtx_shm_lock); 3813 if (dofree) 3814 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3815 } 3816 3817 static int 3818 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3819 struct umtx_shm_reg **res) 3820 { 3821 struct umtx_shm_reg *reg, *reg1; 3822 struct ucred *cred; 3823 int error; 3824 3825 reg = umtx_shm_find_reg(key); 3826 if (reg != NULL) { 3827 *res = reg; 3828 return (0); 3829 } 3830 cred = td->td_ucred; 3831 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3832 return (ENOMEM); 3833 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3834 reg->ushm_refcnt = 1; 3835 bcopy(key, ®->ushm_key, sizeof(*key)); 3836 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3837 reg->ushm_cred = crhold(cred); 3838 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3839 if (error != 0) { 3840 umtx_shm_free_reg(reg); 3841 return (error); 3842 } 3843 mtx_lock(&umtx_shm_lock); 3844 reg1 = umtx_shm_find_reg_locked(key); 3845 if (reg1 != NULL) { 3846 mtx_unlock(&umtx_shm_lock); 3847 umtx_shm_free_reg(reg); 3848 *res = reg1; 3849 return (0); 3850 } 3851 reg->ushm_refcnt++; 3852 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3853 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3854 ushm_obj_link); 3855 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3856 mtx_unlock(&umtx_shm_lock); 3857 *res = reg; 3858 return (0); 3859 } 3860 3861 static int 3862 umtx_shm_alive(struct thread *td, void *addr) 3863 { 3864 vm_map_t map; 3865 vm_map_entry_t entry; 3866 vm_object_t object; 3867 vm_pindex_t pindex; 3868 vm_prot_t prot; 3869 int res, ret; 3870 boolean_t wired; 3871 3872 map = &td->td_proc->p_vmspace->vm_map; 3873 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3874 &object, &pindex, &prot, &wired); 3875 if (res != KERN_SUCCESS) 3876 return (EFAULT); 3877 if (object == NULL) 3878 ret = EINVAL; 3879 else 3880 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3881 vm_map_lookup_done(map, entry); 3882 return (ret); 3883 } 3884 3885 static void 3886 umtx_shm_init(void) 3887 { 3888 int i; 3889 3890 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3891 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3892 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3893 for (i = 0; i < nitems(umtx_shm_registry); i++) 3894 TAILQ_INIT(&umtx_shm_registry[i]); 3895 } 3896 3897 static int 3898 umtx_shm(struct thread *td, void *addr, u_int flags) 3899 { 3900 struct umtx_key key; 3901 struct umtx_shm_reg *reg; 3902 struct file *fp; 3903 int error, fd; 3904 3905 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3906 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3907 return (EINVAL); 3908 if ((flags & UMTX_SHM_ALIVE) != 0) 3909 return (umtx_shm_alive(td, addr)); 3910 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3911 if (error != 0) 3912 return (error); 3913 KASSERT(key.shared == 1, ("non-shared key")); 3914 if ((flags & UMTX_SHM_CREAT) != 0) { 3915 error = umtx_shm_create_reg(td, &key, ®); 3916 } else { 3917 reg = umtx_shm_find_reg(&key); 3918 if (reg == NULL) 3919 error = ESRCH; 3920 } 3921 umtx_key_release(&key); 3922 if (error != 0) 3923 return (error); 3924 KASSERT(reg != NULL, ("no reg")); 3925 if ((flags & UMTX_SHM_DESTROY) != 0) { 3926 umtx_shm_unref_reg(reg, true); 3927 } else { 3928 #if 0 3929 #ifdef MAC 3930 error = mac_posixshm_check_open(td->td_ucred, 3931 reg->ushm_obj, FFLAGS(O_RDWR)); 3932 if (error == 0) 3933 #endif 3934 error = shm_access(reg->ushm_obj, td->td_ucred, 3935 FFLAGS(O_RDWR)); 3936 if (error == 0) 3937 #endif 3938 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3939 if (error == 0) { 3940 shm_hold(reg->ushm_obj); 3941 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3942 &shm_ops); 3943 td->td_retval[0] = fd; 3944 fdrop(fp, td); 3945 } 3946 } 3947 umtx_shm_unref_reg(reg, false); 3948 return (error); 3949 } 3950 3951 static int 3952 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3953 { 3954 3955 return (umtx_shm(td, uap->uaddr1, uap->val)); 3956 } 3957 3958 static int 3959 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3960 { 3961 3962 td->td_rb_list = rbp->robust_list_offset; 3963 td->td_rbp_list = rbp->robust_priv_list_offset; 3964 td->td_rb_inact = rbp->robust_inact_offset; 3965 return (0); 3966 } 3967 3968 static int 3969 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3970 { 3971 struct umtx_robust_lists_params rb; 3972 int error; 3973 3974 if (uap->val > sizeof(rb)) 3975 return (EINVAL); 3976 bzero(&rb, sizeof(rb)); 3977 error = copyin(uap->uaddr1, &rb, uap->val); 3978 if (error != 0) 3979 return (error); 3980 return (umtx_robust_lists(td, &rb)); 3981 } 3982 3983 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3984 3985 static const _umtx_op_func op_table[] = { 3986 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3987 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3988 [UMTX_OP_WAIT] = __umtx_op_wait, 3989 [UMTX_OP_WAKE] = __umtx_op_wake, 3990 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3991 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3992 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3993 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3994 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3995 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3996 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3997 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3998 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3999 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4000 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4001 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4002 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4003 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4004 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4005 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4006 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4007 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4008 #else 4009 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4010 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4011 #endif 4012 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4013 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4014 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4015 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4016 [UMTX_OP_SHM] = __umtx_op_shm, 4017 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4018 }; 4019 4020 int 4021 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4022 { 4023 4024 if ((unsigned)uap->op < nitems(op_table)) 4025 return (*op_table[uap->op])(td, uap); 4026 return (EINVAL); 4027 } 4028 4029 #ifdef COMPAT_FREEBSD32 4030 4031 struct timespec32 { 4032 int32_t tv_sec; 4033 int32_t tv_nsec; 4034 }; 4035 4036 struct umtx_time32 { 4037 struct timespec32 timeout; 4038 uint32_t flags; 4039 uint32_t clockid; 4040 }; 4041 4042 static inline int 4043 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4044 { 4045 struct timespec32 ts32; 4046 int error; 4047 4048 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4049 if (error == 0) { 4050 if (ts32.tv_sec < 0 || 4051 ts32.tv_nsec >= 1000000000 || 4052 ts32.tv_nsec < 0) 4053 error = EINVAL; 4054 else { 4055 tsp->tv_sec = ts32.tv_sec; 4056 tsp->tv_nsec = ts32.tv_nsec; 4057 } 4058 } 4059 return (error); 4060 } 4061 4062 static inline int 4063 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4064 { 4065 struct umtx_time32 t32; 4066 int error; 4067 4068 t32.clockid = CLOCK_REALTIME; 4069 t32.flags = 0; 4070 if (size <= sizeof(struct timespec32)) 4071 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4072 else 4073 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4074 if (error != 0) 4075 return (error); 4076 if (t32.timeout.tv_sec < 0 || 4077 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4078 return (EINVAL); 4079 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4080 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4081 tp->_flags = t32.flags; 4082 tp->_clockid = t32.clockid; 4083 return (0); 4084 } 4085 4086 static int 4087 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4088 { 4089 struct _umtx_time *tm_p, timeout; 4090 int error; 4091 4092 if (uap->uaddr2 == NULL) 4093 tm_p = NULL; 4094 else { 4095 error = umtx_copyin_umtx_time32(uap->uaddr2, 4096 (size_t)uap->uaddr1, &timeout); 4097 if (error != 0) 4098 return (error); 4099 tm_p = &timeout; 4100 } 4101 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4102 } 4103 4104 static int 4105 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4106 { 4107 struct _umtx_time *tm_p, timeout; 4108 int error; 4109 4110 /* Allow a null timespec (wait forever). */ 4111 if (uap->uaddr2 == NULL) 4112 tm_p = NULL; 4113 else { 4114 error = umtx_copyin_umtx_time32(uap->uaddr2, 4115 (size_t)uap->uaddr1, &timeout); 4116 if (error != 0) 4117 return (error); 4118 tm_p = &timeout; 4119 } 4120 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4121 } 4122 4123 static int 4124 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4125 { 4126 struct _umtx_time *tm_p, timeout; 4127 int error; 4128 4129 /* Allow a null timespec (wait forever). */ 4130 if (uap->uaddr2 == NULL) 4131 tm_p = NULL; 4132 else { 4133 error = umtx_copyin_umtx_time32(uap->uaddr2, 4134 (size_t)uap->uaddr1, &timeout); 4135 if (error != 0) 4136 return (error); 4137 tm_p = &timeout; 4138 } 4139 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4140 } 4141 4142 static int 4143 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4144 { 4145 struct timespec *ts, timeout; 4146 int error; 4147 4148 /* Allow a null timespec (wait forever). */ 4149 if (uap->uaddr2 == NULL) 4150 ts = NULL; 4151 else { 4152 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4153 if (error != 0) 4154 return (error); 4155 ts = &timeout; 4156 } 4157 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4158 } 4159 4160 static int 4161 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4162 { 4163 struct _umtx_time timeout; 4164 int error; 4165 4166 /* Allow a null timespec (wait forever). */ 4167 if (uap->uaddr2 == NULL) { 4168 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4169 } else { 4170 error = umtx_copyin_umtx_time32(uap->uaddr2, 4171 (size_t)uap->uaddr1, &timeout); 4172 if (error != 0) 4173 return (error); 4174 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4175 } 4176 return (error); 4177 } 4178 4179 static int 4180 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4181 { 4182 struct _umtx_time timeout; 4183 int error; 4184 4185 /* Allow a null timespec (wait forever). */ 4186 if (uap->uaddr2 == NULL) { 4187 error = do_rw_wrlock(td, uap->obj, 0); 4188 } else { 4189 error = umtx_copyin_umtx_time32(uap->uaddr2, 4190 (size_t)uap->uaddr1, &timeout); 4191 if (error != 0) 4192 return (error); 4193 error = do_rw_wrlock(td, uap->obj, &timeout); 4194 } 4195 return (error); 4196 } 4197 4198 static int 4199 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4200 { 4201 struct _umtx_time *tm_p, timeout; 4202 int error; 4203 4204 if (uap->uaddr2 == NULL) 4205 tm_p = NULL; 4206 else { 4207 error = umtx_copyin_umtx_time32( 4208 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4209 if (error != 0) 4210 return (error); 4211 tm_p = &timeout; 4212 } 4213 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4214 } 4215 4216 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4217 static int 4218 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4219 { 4220 struct _umtx_time *tm_p, timeout; 4221 int error; 4222 4223 /* Allow a null timespec (wait forever). */ 4224 if (uap->uaddr2 == NULL) 4225 tm_p = NULL; 4226 else { 4227 error = umtx_copyin_umtx_time32(uap->uaddr2, 4228 (size_t)uap->uaddr1, &timeout); 4229 if (error != 0) 4230 return (error); 4231 tm_p = &timeout; 4232 } 4233 return (do_sem_wait(td, uap->obj, tm_p)); 4234 } 4235 #endif 4236 4237 static int 4238 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4239 { 4240 struct _umtx_time *tm_p, timeout; 4241 size_t uasize; 4242 int error; 4243 4244 /* Allow a null timespec (wait forever). */ 4245 if (uap->uaddr2 == NULL) { 4246 uasize = 0; 4247 tm_p = NULL; 4248 } else { 4249 uasize = (size_t)uap->uaddr1; 4250 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4251 if (error != 0) 4252 return (error); 4253 tm_p = &timeout; 4254 } 4255 error = do_sem2_wait(td, uap->obj, tm_p); 4256 if (error == EINTR && uap->uaddr2 != NULL && 4257 (timeout._flags & UMTX_ABSTIME) == 0 && 4258 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4259 struct timespec32 remain32 = { 4260 .tv_sec = timeout._timeout.tv_sec, 4261 .tv_nsec = timeout._timeout.tv_nsec 4262 }; 4263 error = copyout(&remain32, 4264 (struct umtx_time32 *)uap->uaddr2 + 1, 4265 sizeof(struct timespec32)); 4266 if (error == 0) { 4267 error = EINTR; 4268 } 4269 } 4270 4271 return (error); 4272 } 4273 4274 static int 4275 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4276 { 4277 uint32_t uaddrs[BATCH_SIZE], **upp; 4278 int count, error, i, pos, tocopy; 4279 4280 upp = (uint32_t **)uap->obj; 4281 error = 0; 4282 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4283 pos += tocopy) { 4284 tocopy = MIN(count, BATCH_SIZE); 4285 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4286 if (error != 0) 4287 break; 4288 for (i = 0; i < tocopy; ++i) 4289 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4290 INT_MAX, 1); 4291 maybe_yield(); 4292 } 4293 return (error); 4294 } 4295 4296 struct umtx_robust_lists_params_compat32 { 4297 uint32_t robust_list_offset; 4298 uint32_t robust_priv_list_offset; 4299 uint32_t robust_inact_offset; 4300 }; 4301 4302 static int 4303 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4304 { 4305 struct umtx_robust_lists_params rb; 4306 struct umtx_robust_lists_params_compat32 rb32; 4307 int error; 4308 4309 if (uap->val > sizeof(rb32)) 4310 return (EINVAL); 4311 bzero(&rb, sizeof(rb)); 4312 bzero(&rb32, sizeof(rb32)); 4313 error = copyin(uap->uaddr1, &rb32, uap->val); 4314 if (error != 0) 4315 return (error); 4316 rb.robust_list_offset = rb32.robust_list_offset; 4317 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4318 rb.robust_inact_offset = rb32.robust_inact_offset; 4319 return (umtx_robust_lists(td, &rb)); 4320 } 4321 4322 static const _umtx_op_func op_table_compat32[] = { 4323 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4324 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4325 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4326 [UMTX_OP_WAKE] = __umtx_op_wake, 4327 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4328 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4329 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4330 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4331 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4332 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4333 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4334 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4335 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4336 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4337 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4338 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4339 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4340 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4341 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4342 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4343 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4344 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4345 #else 4346 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4347 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4348 #endif 4349 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4350 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4351 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4352 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4353 [UMTX_OP_SHM] = __umtx_op_shm, 4354 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4355 }; 4356 4357 int 4358 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4359 { 4360 4361 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4362 return (*op_table_compat32[uap->op])(td, 4363 (struct _umtx_op_args *)uap); 4364 } 4365 return (EINVAL); 4366 } 4367 #endif 4368 4369 void 4370 umtx_thread_init(struct thread *td) 4371 { 4372 4373 td->td_umtxq = umtxq_alloc(); 4374 td->td_umtxq->uq_thread = td; 4375 } 4376 4377 void 4378 umtx_thread_fini(struct thread *td) 4379 { 4380 4381 umtxq_free(td->td_umtxq); 4382 } 4383 4384 /* 4385 * It will be called when new thread is created, e.g fork(). 4386 */ 4387 void 4388 umtx_thread_alloc(struct thread *td) 4389 { 4390 struct umtx_q *uq; 4391 4392 uq = td->td_umtxq; 4393 uq->uq_inherited_pri = PRI_MAX; 4394 4395 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4396 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4397 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4398 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4399 } 4400 4401 /* 4402 * exec() hook. 4403 * 4404 * Clear robust lists for all process' threads, not delaying the 4405 * cleanup to thread_exit hook, since the relevant address space is 4406 * destroyed right now. 4407 */ 4408 static void 4409 umtx_exec_hook(void *arg __unused, struct proc *p, 4410 struct image_params *imgp __unused) 4411 { 4412 struct thread *td; 4413 4414 KASSERT(p == curproc, ("need curproc")); 4415 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4416 (p->p_flag & P_STOPPED_SINGLE) != 0, 4417 ("curproc must be single-threaded")); 4418 /* 4419 * There is no need to lock the list as only this thread can be 4420 * running. 4421 */ 4422 FOREACH_THREAD_IN_PROC(p, td) { 4423 KASSERT(td == curthread || 4424 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4425 ("running thread %p %p", p, td)); 4426 umtx_thread_cleanup(td); 4427 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4428 } 4429 } 4430 4431 /* 4432 * thread_exit() hook. 4433 */ 4434 void 4435 umtx_thread_exit(struct thread *td) 4436 { 4437 4438 umtx_thread_cleanup(td); 4439 } 4440 4441 static int 4442 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4443 { 4444 u_long res1; 4445 #ifdef COMPAT_FREEBSD32 4446 uint32_t res32; 4447 #endif 4448 int error; 4449 4450 #ifdef COMPAT_FREEBSD32 4451 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4452 error = fueword32((void *)ptr, &res32); 4453 if (error == 0) 4454 res1 = res32; 4455 } else 4456 #endif 4457 { 4458 error = fueword((void *)ptr, &res1); 4459 } 4460 if (error == 0) 4461 *res = res1; 4462 else 4463 error = EFAULT; 4464 return (error); 4465 } 4466 4467 static void 4468 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4469 { 4470 #ifdef COMPAT_FREEBSD32 4471 struct umutex32 m32; 4472 4473 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4474 memcpy(&m32, m, sizeof(m32)); 4475 *rb_list = m32.m_rb_lnk; 4476 } else 4477 #endif 4478 *rb_list = m->m_rb_lnk; 4479 } 4480 4481 static int 4482 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4483 { 4484 struct umutex m; 4485 int error; 4486 4487 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4488 error = copyin((void *)rbp, &m, sizeof(m)); 4489 if (error != 0) 4490 return (error); 4491 if (rb_list != NULL) 4492 umtx_read_rb_list(td, &m, rb_list); 4493 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4494 return (EINVAL); 4495 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4496 /* inact is cleared after unlock, allow the inconsistency */ 4497 return (inact ? 0 : EINVAL); 4498 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4499 } 4500 4501 static void 4502 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4503 const char *name) 4504 { 4505 int error, i; 4506 uintptr_t rbp; 4507 bool inact; 4508 4509 if (rb_list == 0) 4510 return; 4511 error = umtx_read_uptr(td, rb_list, &rbp); 4512 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4513 if (rbp == *rb_inact) { 4514 inact = true; 4515 *rb_inact = 0; 4516 } else 4517 inact = false; 4518 error = umtx_handle_rb(td, rbp, &rbp, inact); 4519 } 4520 if (i == umtx_max_rb && umtx_verbose_rb) { 4521 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4522 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4523 } 4524 if (error != 0 && umtx_verbose_rb) { 4525 uprintf("comm %s pid %d: handling %srb error %d\n", 4526 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4527 } 4528 } 4529 4530 /* 4531 * Clean up umtx data. 4532 */ 4533 static void 4534 umtx_thread_cleanup(struct thread *td) 4535 { 4536 struct umtx_q *uq; 4537 struct umtx_pi *pi; 4538 uintptr_t rb_inact; 4539 4540 /* 4541 * Disown pi mutexes. 4542 */ 4543 uq = td->td_umtxq; 4544 if (uq != NULL) { 4545 if (uq->uq_inherited_pri != PRI_MAX || 4546 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4547 mtx_lock(&umtx_lock); 4548 uq->uq_inherited_pri = PRI_MAX; 4549 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4550 pi->pi_owner = NULL; 4551 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4552 } 4553 mtx_unlock(&umtx_lock); 4554 } 4555 sched_lend_user_prio_cond(td, PRI_MAX); 4556 } 4557 4558 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4559 return; 4560 4561 /* 4562 * Handle terminated robust mutexes. Must be done after 4563 * robust pi disown, otherwise unlock could see unowned 4564 * entries. 4565 */ 4566 rb_inact = td->td_rb_inact; 4567 if (rb_inact != 0) 4568 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4569 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4570 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4571 if (rb_inact != 0) 4572 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4573 } 4574