1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_queue *uh; 666 667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 669 if (uh != NULL) 670 return (uh->length); 671 return (0); 672 } 673 674 /* 675 * Check if there are multiple PI waiters and returns first 676 * waiter. 677 */ 678 static int 679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 680 { 681 struct umtxq_queue *uh; 682 683 *first = NULL; 684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 686 if (uh != NULL) { 687 *first = TAILQ_FIRST(&uh->head); 688 return (uh->length); 689 } 690 return (0); 691 } 692 693 static int 694 umtxq_check_susp(struct thread *td) 695 { 696 struct proc *p; 697 int error; 698 699 /* 700 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 701 * eventually break the lockstep loop. 702 */ 703 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 704 return (0); 705 error = 0; 706 p = td->td_proc; 707 PROC_LOCK(p); 708 if (P_SHOULDSTOP(p) || 709 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 710 if (p->p_flag & P_SINGLE_EXIT) 711 error = EINTR; 712 else 713 error = ERESTART; 714 } 715 PROC_UNLOCK(p); 716 return (error); 717 } 718 719 /* 720 * Wake up threads waiting on an userland object. 721 */ 722 723 static int 724 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 725 { 726 struct umtxq_queue *uh; 727 struct umtx_q *uq; 728 int ret; 729 730 ret = 0; 731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 732 uh = umtxq_queue_lookup(key, q); 733 if (uh != NULL) { 734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 735 umtxq_remove_queue(uq, q); 736 wakeup(uq); 737 if (++ret >= n_wake) 738 return (ret); 739 } 740 } 741 return (ret); 742 } 743 744 745 /* 746 * Wake up specified thread. 747 */ 748 static inline void 749 umtxq_signal_thread(struct umtx_q *uq) 750 { 751 752 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 753 umtxq_remove(uq); 754 wakeup(uq); 755 } 756 757 static inline int 758 tstohz(const struct timespec *tsp) 759 { 760 struct timeval tv; 761 762 TIMESPEC_TO_TIMEVAL(&tv, tsp); 763 return tvtohz(&tv); 764 } 765 766 static void 767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 768 const struct timespec *timeout) 769 { 770 771 timo->clockid = clockid; 772 if (!absolute) { 773 timo->is_abs_real = false; 774 abs_timeout_update(timo); 775 timo->end = timo->cur; 776 timespecadd(&timo->end, timeout); 777 } else { 778 timo->end = *timeout; 779 timo->is_abs_real = clockid == CLOCK_REALTIME || 780 clockid == CLOCK_REALTIME_FAST || 781 clockid == CLOCK_REALTIME_PRECISE; 782 /* 783 * If is_abs_real, umtxq_sleep will read the clock 784 * after setting td_rtcgen; otherwise, read it here. 785 */ 786 if (!timo->is_abs_real) { 787 abs_timeout_update(timo); 788 } 789 } 790 } 791 792 static void 793 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 794 { 795 796 abs_timeout_init(timo, umtxtime->_clockid, 797 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 798 } 799 800 static inline void 801 abs_timeout_update(struct abs_timeout *timo) 802 { 803 804 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 805 } 806 807 static int 808 abs_timeout_gethz(struct abs_timeout *timo) 809 { 810 struct timespec tts; 811 812 if (timespeccmp(&timo->end, &timo->cur, <=)) 813 return (-1); 814 tts = timo->end; 815 timespecsub(&tts, &timo->cur); 816 return (tstohz(&tts)); 817 } 818 819 static uint32_t 820 umtx_unlock_val(uint32_t flags, bool rb) 821 { 822 823 if (rb) 824 return (UMUTEX_RB_OWNERDEAD); 825 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 826 return (UMUTEX_RB_NOTRECOV); 827 else 828 return (UMUTEX_UNOWNED); 829 830 } 831 832 /* 833 * Put thread into sleep state, before sleeping, check if 834 * thread was removed from umtx queue. 835 */ 836 static inline int 837 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 838 { 839 struct umtxq_chain *uc; 840 int error, timo; 841 842 if (abstime != NULL && abstime->is_abs_real) { 843 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 844 abs_timeout_update(abstime); 845 } 846 847 uc = umtxq_getchain(&uq->uq_key); 848 UMTXQ_LOCKED_ASSERT(uc); 849 for (;;) { 850 if (!(uq->uq_flags & UQF_UMTXQ)) { 851 error = 0; 852 break; 853 } 854 if (abstime != NULL) { 855 timo = abs_timeout_gethz(abstime); 856 if (timo < 0) { 857 error = ETIMEDOUT; 858 break; 859 } 860 } else 861 timo = 0; 862 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 863 if (error == EINTR || error == ERESTART) { 864 umtxq_lock(&uq->uq_key); 865 break; 866 } 867 if (abstime != NULL) { 868 if (abstime->is_abs_real) 869 curthread->td_rtcgen = 870 atomic_load_acq_int(&rtc_generation); 871 abs_timeout_update(abstime); 872 } 873 umtxq_lock(&uq->uq_key); 874 } 875 876 curthread->td_rtcgen = 0; 877 return (error); 878 } 879 880 /* 881 * Convert userspace address into unique logical address. 882 */ 883 int 884 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 885 { 886 struct thread *td = curthread; 887 vm_map_t map; 888 vm_map_entry_t entry; 889 vm_pindex_t pindex; 890 vm_prot_t prot; 891 boolean_t wired; 892 893 key->type = type; 894 if (share == THREAD_SHARE) { 895 key->shared = 0; 896 key->info.private.vs = td->td_proc->p_vmspace; 897 key->info.private.addr = (uintptr_t)addr; 898 } else { 899 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 900 map = &td->td_proc->p_vmspace->vm_map; 901 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 902 &entry, &key->info.shared.object, &pindex, &prot, 903 &wired) != KERN_SUCCESS) { 904 return (EFAULT); 905 } 906 907 if ((share == PROCESS_SHARE) || 908 (share == AUTO_SHARE && 909 VM_INHERIT_SHARE == entry->inheritance)) { 910 key->shared = 1; 911 key->info.shared.offset = (vm_offset_t)addr - 912 entry->start + entry->offset; 913 vm_object_reference(key->info.shared.object); 914 } else { 915 key->shared = 0; 916 key->info.private.vs = td->td_proc->p_vmspace; 917 key->info.private.addr = (uintptr_t)addr; 918 } 919 vm_map_lookup_done(map, entry); 920 } 921 922 umtxq_hash(key); 923 return (0); 924 } 925 926 /* 927 * Release key. 928 */ 929 void 930 umtx_key_release(struct umtx_key *key) 931 { 932 if (key->shared) 933 vm_object_deallocate(key->info.shared.object); 934 } 935 936 /* 937 * Fetch and compare value, sleep on the address if value is not changed. 938 */ 939 static int 940 do_wait(struct thread *td, void *addr, u_long id, 941 struct _umtx_time *timeout, int compat32, int is_private) 942 { 943 struct abs_timeout timo; 944 struct umtx_q *uq; 945 u_long tmp; 946 uint32_t tmp32; 947 int error = 0; 948 949 uq = td->td_umtxq; 950 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 951 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 952 return (error); 953 954 if (timeout != NULL) 955 abs_timeout_init2(&timo, timeout); 956 957 umtxq_lock(&uq->uq_key); 958 umtxq_insert(uq); 959 umtxq_unlock(&uq->uq_key); 960 if (compat32 == 0) { 961 error = fueword(addr, &tmp); 962 if (error != 0) 963 error = EFAULT; 964 } else { 965 error = fueword32(addr, &tmp32); 966 if (error == 0) 967 tmp = tmp32; 968 else 969 error = EFAULT; 970 } 971 umtxq_lock(&uq->uq_key); 972 if (error == 0) { 973 if (tmp == id) 974 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 975 NULL : &timo); 976 if ((uq->uq_flags & UQF_UMTXQ) == 0) 977 error = 0; 978 else 979 umtxq_remove(uq); 980 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 981 umtxq_remove(uq); 982 } 983 umtxq_unlock(&uq->uq_key); 984 umtx_key_release(&uq->uq_key); 985 if (error == ERESTART) 986 error = EINTR; 987 return (error); 988 } 989 990 /* 991 * Wake up threads sleeping on the specified address. 992 */ 993 int 994 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 995 { 996 struct umtx_key key; 997 int ret; 998 999 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1000 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1001 return (ret); 1002 umtxq_lock(&key); 1003 umtxq_signal(&key, n_wake); 1004 umtxq_unlock(&key); 1005 umtx_key_release(&key); 1006 return (0); 1007 } 1008 1009 /* 1010 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1011 */ 1012 static int 1013 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1014 struct _umtx_time *timeout, int mode) 1015 { 1016 struct abs_timeout timo; 1017 struct umtx_q *uq; 1018 uint32_t owner, old, id; 1019 int error, rv; 1020 1021 id = td->td_tid; 1022 uq = td->td_umtxq; 1023 error = 0; 1024 if (timeout != NULL) 1025 abs_timeout_init2(&timo, timeout); 1026 1027 /* 1028 * Care must be exercised when dealing with umtx structure. It 1029 * can fault on any access. 1030 */ 1031 for (;;) { 1032 rv = fueword32(&m->m_owner, &owner); 1033 if (rv == -1) 1034 return (EFAULT); 1035 if (mode == _UMUTEX_WAIT) { 1036 if (owner == UMUTEX_UNOWNED || 1037 owner == UMUTEX_CONTESTED || 1038 owner == UMUTEX_RB_OWNERDEAD || 1039 owner == UMUTEX_RB_NOTRECOV) 1040 return (0); 1041 } else { 1042 /* 1043 * Robust mutex terminated. Kernel duty is to 1044 * return EOWNERDEAD to the userspace. The 1045 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1046 * by the common userspace code. 1047 */ 1048 if (owner == UMUTEX_RB_OWNERDEAD) { 1049 rv = casueword32(&m->m_owner, 1050 UMUTEX_RB_OWNERDEAD, &owner, 1051 id | UMUTEX_CONTESTED); 1052 if (rv == -1) 1053 return (EFAULT); 1054 if (owner == UMUTEX_RB_OWNERDEAD) 1055 return (EOWNERDEAD); /* success */ 1056 rv = umtxq_check_susp(td); 1057 if (rv != 0) 1058 return (rv); 1059 continue; 1060 } 1061 if (owner == UMUTEX_RB_NOTRECOV) 1062 return (ENOTRECOVERABLE); 1063 1064 1065 /* 1066 * Try the uncontested case. This should be 1067 * done in userland. 1068 */ 1069 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1070 &owner, id); 1071 /* The address was invalid. */ 1072 if (rv == -1) 1073 return (EFAULT); 1074 1075 /* The acquire succeeded. */ 1076 if (owner == UMUTEX_UNOWNED) 1077 return (0); 1078 1079 /* 1080 * If no one owns it but it is contested try 1081 * to acquire it. 1082 */ 1083 if (owner == UMUTEX_CONTESTED) { 1084 rv = casueword32(&m->m_owner, 1085 UMUTEX_CONTESTED, &owner, 1086 id | UMUTEX_CONTESTED); 1087 /* The address was invalid. */ 1088 if (rv == -1) 1089 return (EFAULT); 1090 1091 if (owner == UMUTEX_CONTESTED) 1092 return (0); 1093 1094 rv = umtxq_check_susp(td); 1095 if (rv != 0) 1096 return (rv); 1097 1098 /* 1099 * If this failed the lock has 1100 * changed, restart. 1101 */ 1102 continue; 1103 } 1104 } 1105 1106 if (mode == _UMUTEX_TRY) 1107 return (EBUSY); 1108 1109 /* 1110 * If we caught a signal, we have retried and now 1111 * exit immediately. 1112 */ 1113 if (error != 0) 1114 return (error); 1115 1116 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1117 GET_SHARE(flags), &uq->uq_key)) != 0) 1118 return (error); 1119 1120 umtxq_lock(&uq->uq_key); 1121 umtxq_busy(&uq->uq_key); 1122 umtxq_insert(uq); 1123 umtxq_unlock(&uq->uq_key); 1124 1125 /* 1126 * Set the contested bit so that a release in user space 1127 * knows to use the system call for unlock. If this fails 1128 * either some one else has acquired the lock or it has been 1129 * released. 1130 */ 1131 rv = casueword32(&m->m_owner, owner, &old, 1132 owner | UMUTEX_CONTESTED); 1133 1134 /* The address was invalid. */ 1135 if (rv == -1) { 1136 umtxq_lock(&uq->uq_key); 1137 umtxq_remove(uq); 1138 umtxq_unbusy(&uq->uq_key); 1139 umtxq_unlock(&uq->uq_key); 1140 umtx_key_release(&uq->uq_key); 1141 return (EFAULT); 1142 } 1143 1144 /* 1145 * We set the contested bit, sleep. Otherwise the lock changed 1146 * and we need to retry or we lost a race to the thread 1147 * unlocking the umtx. 1148 */ 1149 umtxq_lock(&uq->uq_key); 1150 umtxq_unbusy(&uq->uq_key); 1151 if (old == owner) 1152 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1153 NULL : &timo); 1154 umtxq_remove(uq); 1155 umtxq_unlock(&uq->uq_key); 1156 umtx_key_release(&uq->uq_key); 1157 1158 if (error == 0) 1159 error = umtxq_check_susp(td); 1160 } 1161 1162 return (0); 1163 } 1164 1165 /* 1166 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1167 */ 1168 static int 1169 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1170 { 1171 struct umtx_key key; 1172 uint32_t owner, old, id, newlock; 1173 int error, count; 1174 1175 id = td->td_tid; 1176 /* 1177 * Make sure we own this mtx. 1178 */ 1179 error = fueword32(&m->m_owner, &owner); 1180 if (error == -1) 1181 return (EFAULT); 1182 1183 if ((owner & ~UMUTEX_CONTESTED) != id) 1184 return (EPERM); 1185 1186 newlock = umtx_unlock_val(flags, rb); 1187 if ((owner & UMUTEX_CONTESTED) == 0) { 1188 error = casueword32(&m->m_owner, owner, &old, newlock); 1189 if (error == -1) 1190 return (EFAULT); 1191 if (old == owner) 1192 return (0); 1193 owner = old; 1194 } 1195 1196 /* We should only ever be in here for contested locks */ 1197 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1198 &key)) != 0) 1199 return (error); 1200 1201 umtxq_lock(&key); 1202 umtxq_busy(&key); 1203 count = umtxq_count(&key); 1204 umtxq_unlock(&key); 1205 1206 /* 1207 * When unlocking the umtx, it must be marked as unowned if 1208 * there is zero or one thread only waiting for it. 1209 * Otherwise, it must be marked as contested. 1210 */ 1211 if (count > 1) 1212 newlock |= UMUTEX_CONTESTED; 1213 error = casueword32(&m->m_owner, owner, &old, newlock); 1214 umtxq_lock(&key); 1215 umtxq_signal(&key, 1); 1216 umtxq_unbusy(&key); 1217 umtxq_unlock(&key); 1218 umtx_key_release(&key); 1219 if (error == -1) 1220 return (EFAULT); 1221 if (old != owner) 1222 return (EINVAL); 1223 return (0); 1224 } 1225 1226 /* 1227 * Check if the mutex is available and wake up a waiter, 1228 * only for simple mutex. 1229 */ 1230 static int 1231 do_wake_umutex(struct thread *td, struct umutex *m) 1232 { 1233 struct umtx_key key; 1234 uint32_t owner; 1235 uint32_t flags; 1236 int error; 1237 int count; 1238 1239 error = fueword32(&m->m_owner, &owner); 1240 if (error == -1) 1241 return (EFAULT); 1242 1243 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1244 owner != UMUTEX_RB_NOTRECOV) 1245 return (0); 1246 1247 error = fueword32(&m->m_flags, &flags); 1248 if (error == -1) 1249 return (EFAULT); 1250 1251 /* We should only ever be in here for contested locks */ 1252 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1253 &key)) != 0) 1254 return (error); 1255 1256 umtxq_lock(&key); 1257 umtxq_busy(&key); 1258 count = umtxq_count(&key); 1259 umtxq_unlock(&key); 1260 1261 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1262 owner != UMUTEX_RB_NOTRECOV) { 1263 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1264 UMUTEX_UNOWNED); 1265 if (error == -1) 1266 error = EFAULT; 1267 } 1268 1269 umtxq_lock(&key); 1270 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1271 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1272 umtxq_signal(&key, 1); 1273 umtxq_unbusy(&key); 1274 umtxq_unlock(&key); 1275 umtx_key_release(&key); 1276 return (error); 1277 } 1278 1279 /* 1280 * Check if the mutex has waiters and tries to fix contention bit. 1281 */ 1282 static int 1283 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1284 { 1285 struct umtx_key key; 1286 uint32_t owner, old; 1287 int type; 1288 int error; 1289 int count; 1290 1291 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1292 UMUTEX_ROBUST)) { 1293 case 0: 1294 case UMUTEX_ROBUST: 1295 type = TYPE_NORMAL_UMUTEX; 1296 break; 1297 case UMUTEX_PRIO_INHERIT: 1298 type = TYPE_PI_UMUTEX; 1299 break; 1300 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1301 type = TYPE_PI_ROBUST_UMUTEX; 1302 break; 1303 case UMUTEX_PRIO_PROTECT: 1304 type = TYPE_PP_UMUTEX; 1305 break; 1306 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1307 type = TYPE_PP_ROBUST_UMUTEX; 1308 break; 1309 default: 1310 return (EINVAL); 1311 } 1312 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1313 return (error); 1314 1315 owner = 0; 1316 umtxq_lock(&key); 1317 umtxq_busy(&key); 1318 count = umtxq_count(&key); 1319 umtxq_unlock(&key); 1320 /* 1321 * Only repair contention bit if there is a waiter, this means the mutex 1322 * is still being referenced by userland code, otherwise don't update 1323 * any memory. 1324 */ 1325 if (count > 1) { 1326 error = fueword32(&m->m_owner, &owner); 1327 if (error == -1) 1328 error = EFAULT; 1329 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1330 error = casueword32(&m->m_owner, owner, &old, 1331 owner | UMUTEX_CONTESTED); 1332 if (error == -1) { 1333 error = EFAULT; 1334 break; 1335 } 1336 if (old == owner) 1337 break; 1338 owner = old; 1339 error = umtxq_check_susp(td); 1340 if (error != 0) 1341 break; 1342 } 1343 } else if (count == 1) { 1344 error = fueword32(&m->m_owner, &owner); 1345 if (error == -1) 1346 error = EFAULT; 1347 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1348 (owner & UMUTEX_CONTESTED) == 0) { 1349 error = casueword32(&m->m_owner, owner, &old, 1350 owner | UMUTEX_CONTESTED); 1351 if (error == -1) { 1352 error = EFAULT; 1353 break; 1354 } 1355 if (old == owner) 1356 break; 1357 owner = old; 1358 error = umtxq_check_susp(td); 1359 if (error != 0) 1360 break; 1361 } 1362 } 1363 umtxq_lock(&key); 1364 if (error == EFAULT) { 1365 umtxq_signal(&key, INT_MAX); 1366 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1367 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1368 umtxq_signal(&key, 1); 1369 umtxq_unbusy(&key); 1370 umtxq_unlock(&key); 1371 umtx_key_release(&key); 1372 return (error); 1373 } 1374 1375 static inline struct umtx_pi * 1376 umtx_pi_alloc(int flags) 1377 { 1378 struct umtx_pi *pi; 1379 1380 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1381 TAILQ_INIT(&pi->pi_blocked); 1382 atomic_add_int(&umtx_pi_allocated, 1); 1383 return (pi); 1384 } 1385 1386 static inline void 1387 umtx_pi_free(struct umtx_pi *pi) 1388 { 1389 uma_zfree(umtx_pi_zone, pi); 1390 atomic_add_int(&umtx_pi_allocated, -1); 1391 } 1392 1393 /* 1394 * Adjust the thread's position on a pi_state after its priority has been 1395 * changed. 1396 */ 1397 static int 1398 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1399 { 1400 struct umtx_q *uq, *uq1, *uq2; 1401 struct thread *td1; 1402 1403 mtx_assert(&umtx_lock, MA_OWNED); 1404 if (pi == NULL) 1405 return (0); 1406 1407 uq = td->td_umtxq; 1408 1409 /* 1410 * Check if the thread needs to be moved on the blocked chain. 1411 * It needs to be moved if either its priority is lower than 1412 * the previous thread or higher than the next thread. 1413 */ 1414 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1415 uq2 = TAILQ_NEXT(uq, uq_lockq); 1416 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1417 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1418 /* 1419 * Remove thread from blocked chain and determine where 1420 * it should be moved to. 1421 */ 1422 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1423 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1424 td1 = uq1->uq_thread; 1425 MPASS(td1->td_proc->p_magic == P_MAGIC); 1426 if (UPRI(td1) > UPRI(td)) 1427 break; 1428 } 1429 1430 if (uq1 == NULL) 1431 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1432 else 1433 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1434 } 1435 return (1); 1436 } 1437 1438 static struct umtx_pi * 1439 umtx_pi_next(struct umtx_pi *pi) 1440 { 1441 struct umtx_q *uq_owner; 1442 1443 if (pi->pi_owner == NULL) 1444 return (NULL); 1445 uq_owner = pi->pi_owner->td_umtxq; 1446 if (uq_owner == NULL) 1447 return (NULL); 1448 return (uq_owner->uq_pi_blocked); 1449 } 1450 1451 /* 1452 * Floyd's Cycle-Finding Algorithm. 1453 */ 1454 static bool 1455 umtx_pi_check_loop(struct umtx_pi *pi) 1456 { 1457 struct umtx_pi *pi1; /* fast iterator */ 1458 1459 mtx_assert(&umtx_lock, MA_OWNED); 1460 if (pi == NULL) 1461 return (false); 1462 pi1 = pi; 1463 for (;;) { 1464 pi = umtx_pi_next(pi); 1465 if (pi == NULL) 1466 break; 1467 pi1 = umtx_pi_next(pi1); 1468 if (pi1 == NULL) 1469 break; 1470 pi1 = umtx_pi_next(pi1); 1471 if (pi1 == NULL) 1472 break; 1473 if (pi == pi1) 1474 return (true); 1475 } 1476 return (false); 1477 } 1478 1479 /* 1480 * Propagate priority when a thread is blocked on POSIX 1481 * PI mutex. 1482 */ 1483 static void 1484 umtx_propagate_priority(struct thread *td) 1485 { 1486 struct umtx_q *uq; 1487 struct umtx_pi *pi; 1488 int pri; 1489 1490 mtx_assert(&umtx_lock, MA_OWNED); 1491 pri = UPRI(td); 1492 uq = td->td_umtxq; 1493 pi = uq->uq_pi_blocked; 1494 if (pi == NULL) 1495 return; 1496 if (umtx_pi_check_loop(pi)) 1497 return; 1498 1499 for (;;) { 1500 td = pi->pi_owner; 1501 if (td == NULL || td == curthread) 1502 return; 1503 1504 MPASS(td->td_proc != NULL); 1505 MPASS(td->td_proc->p_magic == P_MAGIC); 1506 1507 thread_lock(td); 1508 if (td->td_lend_user_pri > pri) 1509 sched_lend_user_prio(td, pri); 1510 else { 1511 thread_unlock(td); 1512 break; 1513 } 1514 thread_unlock(td); 1515 1516 /* 1517 * Pick up the lock that td is blocked on. 1518 */ 1519 uq = td->td_umtxq; 1520 pi = uq->uq_pi_blocked; 1521 if (pi == NULL) 1522 break; 1523 /* Resort td on the list if needed. */ 1524 umtx_pi_adjust_thread(pi, td); 1525 } 1526 } 1527 1528 /* 1529 * Unpropagate priority for a PI mutex when a thread blocked on 1530 * it is interrupted by signal or resumed by others. 1531 */ 1532 static void 1533 umtx_repropagate_priority(struct umtx_pi *pi) 1534 { 1535 struct umtx_q *uq, *uq_owner; 1536 struct umtx_pi *pi2; 1537 int pri; 1538 1539 mtx_assert(&umtx_lock, MA_OWNED); 1540 1541 if (umtx_pi_check_loop(pi)) 1542 return; 1543 while (pi != NULL && pi->pi_owner != NULL) { 1544 pri = PRI_MAX; 1545 uq_owner = pi->pi_owner->td_umtxq; 1546 1547 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1548 uq = TAILQ_FIRST(&pi2->pi_blocked); 1549 if (uq != NULL) { 1550 if (pri > UPRI(uq->uq_thread)) 1551 pri = UPRI(uq->uq_thread); 1552 } 1553 } 1554 1555 if (pri > uq_owner->uq_inherited_pri) 1556 pri = uq_owner->uq_inherited_pri; 1557 thread_lock(pi->pi_owner); 1558 sched_lend_user_prio(pi->pi_owner, pri); 1559 thread_unlock(pi->pi_owner); 1560 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1561 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1562 } 1563 } 1564 1565 /* 1566 * Insert a PI mutex into owned list. 1567 */ 1568 static void 1569 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1570 { 1571 struct umtx_q *uq_owner; 1572 1573 uq_owner = owner->td_umtxq; 1574 mtx_assert(&umtx_lock, MA_OWNED); 1575 MPASS(pi->pi_owner == NULL); 1576 pi->pi_owner = owner; 1577 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1578 } 1579 1580 1581 /* 1582 * Disown a PI mutex, and remove it from the owned list. 1583 */ 1584 static void 1585 umtx_pi_disown(struct umtx_pi *pi) 1586 { 1587 1588 mtx_assert(&umtx_lock, MA_OWNED); 1589 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1590 pi->pi_owner = NULL; 1591 } 1592 1593 /* 1594 * Claim ownership of a PI mutex. 1595 */ 1596 static int 1597 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1598 { 1599 struct umtx_q *uq; 1600 int pri; 1601 1602 mtx_lock(&umtx_lock); 1603 if (pi->pi_owner == owner) { 1604 mtx_unlock(&umtx_lock); 1605 return (0); 1606 } 1607 1608 if (pi->pi_owner != NULL) { 1609 /* 1610 * userland may have already messed the mutex, sigh. 1611 */ 1612 mtx_unlock(&umtx_lock); 1613 return (EPERM); 1614 } 1615 umtx_pi_setowner(pi, owner); 1616 uq = TAILQ_FIRST(&pi->pi_blocked); 1617 if (uq != NULL) { 1618 pri = UPRI(uq->uq_thread); 1619 thread_lock(owner); 1620 if (pri < UPRI(owner)) 1621 sched_lend_user_prio(owner, pri); 1622 thread_unlock(owner); 1623 } 1624 mtx_unlock(&umtx_lock); 1625 return (0); 1626 } 1627 1628 /* 1629 * Adjust a thread's order position in its blocked PI mutex, 1630 * this may result new priority propagating process. 1631 */ 1632 void 1633 umtx_pi_adjust(struct thread *td, u_char oldpri) 1634 { 1635 struct umtx_q *uq; 1636 struct umtx_pi *pi; 1637 1638 uq = td->td_umtxq; 1639 mtx_lock(&umtx_lock); 1640 /* 1641 * Pick up the lock that td is blocked on. 1642 */ 1643 pi = uq->uq_pi_blocked; 1644 if (pi != NULL) { 1645 umtx_pi_adjust_thread(pi, td); 1646 umtx_repropagate_priority(pi); 1647 } 1648 mtx_unlock(&umtx_lock); 1649 } 1650 1651 /* 1652 * Sleep on a PI mutex. 1653 */ 1654 static int 1655 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1656 const char *wmesg, struct abs_timeout *timo, bool shared) 1657 { 1658 struct thread *td, *td1; 1659 struct umtx_q *uq1; 1660 int error, pri; 1661 #ifdef INVARIANTS 1662 struct umtxq_chain *uc; 1663 1664 uc = umtxq_getchain(&pi->pi_key); 1665 #endif 1666 error = 0; 1667 td = uq->uq_thread; 1668 KASSERT(td == curthread, ("inconsistent uq_thread")); 1669 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1670 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1671 umtxq_insert(uq); 1672 mtx_lock(&umtx_lock); 1673 if (pi->pi_owner == NULL) { 1674 mtx_unlock(&umtx_lock); 1675 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1676 mtx_lock(&umtx_lock); 1677 if (td1 != NULL) { 1678 if (pi->pi_owner == NULL) 1679 umtx_pi_setowner(pi, td1); 1680 PROC_UNLOCK(td1->td_proc); 1681 } 1682 } 1683 1684 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1685 pri = UPRI(uq1->uq_thread); 1686 if (pri > UPRI(td)) 1687 break; 1688 } 1689 1690 if (uq1 != NULL) 1691 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1692 else 1693 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1694 1695 uq->uq_pi_blocked = pi; 1696 thread_lock(td); 1697 td->td_flags |= TDF_UPIBLOCKED; 1698 thread_unlock(td); 1699 umtx_propagate_priority(td); 1700 mtx_unlock(&umtx_lock); 1701 umtxq_unbusy(&uq->uq_key); 1702 1703 error = umtxq_sleep(uq, wmesg, timo); 1704 umtxq_remove(uq); 1705 1706 mtx_lock(&umtx_lock); 1707 uq->uq_pi_blocked = NULL; 1708 thread_lock(td); 1709 td->td_flags &= ~TDF_UPIBLOCKED; 1710 thread_unlock(td); 1711 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1712 umtx_repropagate_priority(pi); 1713 mtx_unlock(&umtx_lock); 1714 umtxq_unlock(&uq->uq_key); 1715 1716 return (error); 1717 } 1718 1719 /* 1720 * Add reference count for a PI mutex. 1721 */ 1722 static void 1723 umtx_pi_ref(struct umtx_pi *pi) 1724 { 1725 1726 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1727 pi->pi_refcount++; 1728 } 1729 1730 /* 1731 * Decrease reference count for a PI mutex, if the counter 1732 * is decreased to zero, its memory space is freed. 1733 */ 1734 static void 1735 umtx_pi_unref(struct umtx_pi *pi) 1736 { 1737 struct umtxq_chain *uc; 1738 1739 uc = umtxq_getchain(&pi->pi_key); 1740 UMTXQ_LOCKED_ASSERT(uc); 1741 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1742 if (--pi->pi_refcount == 0) { 1743 mtx_lock(&umtx_lock); 1744 if (pi->pi_owner != NULL) 1745 umtx_pi_disown(pi); 1746 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1747 ("blocked queue not empty")); 1748 mtx_unlock(&umtx_lock); 1749 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1750 umtx_pi_free(pi); 1751 } 1752 } 1753 1754 /* 1755 * Find a PI mutex in hash table. 1756 */ 1757 static struct umtx_pi * 1758 umtx_pi_lookup(struct umtx_key *key) 1759 { 1760 struct umtxq_chain *uc; 1761 struct umtx_pi *pi; 1762 1763 uc = umtxq_getchain(key); 1764 UMTXQ_LOCKED_ASSERT(uc); 1765 1766 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1767 if (umtx_key_match(&pi->pi_key, key)) { 1768 return (pi); 1769 } 1770 } 1771 return (NULL); 1772 } 1773 1774 /* 1775 * Insert a PI mutex into hash table. 1776 */ 1777 static inline void 1778 umtx_pi_insert(struct umtx_pi *pi) 1779 { 1780 struct umtxq_chain *uc; 1781 1782 uc = umtxq_getchain(&pi->pi_key); 1783 UMTXQ_LOCKED_ASSERT(uc); 1784 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1785 } 1786 1787 /* 1788 * Lock a PI mutex. 1789 */ 1790 static int 1791 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1792 struct _umtx_time *timeout, int try) 1793 { 1794 struct abs_timeout timo; 1795 struct umtx_q *uq; 1796 struct umtx_pi *pi, *new_pi; 1797 uint32_t id, old_owner, owner, old; 1798 int error, rv; 1799 1800 id = td->td_tid; 1801 uq = td->td_umtxq; 1802 1803 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1804 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1805 &uq->uq_key)) != 0) 1806 return (error); 1807 1808 if (timeout != NULL) 1809 abs_timeout_init2(&timo, timeout); 1810 1811 umtxq_lock(&uq->uq_key); 1812 pi = umtx_pi_lookup(&uq->uq_key); 1813 if (pi == NULL) { 1814 new_pi = umtx_pi_alloc(M_NOWAIT); 1815 if (new_pi == NULL) { 1816 umtxq_unlock(&uq->uq_key); 1817 new_pi = umtx_pi_alloc(M_WAITOK); 1818 umtxq_lock(&uq->uq_key); 1819 pi = umtx_pi_lookup(&uq->uq_key); 1820 if (pi != NULL) { 1821 umtx_pi_free(new_pi); 1822 new_pi = NULL; 1823 } 1824 } 1825 if (new_pi != NULL) { 1826 new_pi->pi_key = uq->uq_key; 1827 umtx_pi_insert(new_pi); 1828 pi = new_pi; 1829 } 1830 } 1831 umtx_pi_ref(pi); 1832 umtxq_unlock(&uq->uq_key); 1833 1834 /* 1835 * Care must be exercised when dealing with umtx structure. It 1836 * can fault on any access. 1837 */ 1838 for (;;) { 1839 /* 1840 * Try the uncontested case. This should be done in userland. 1841 */ 1842 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1843 /* The address was invalid. */ 1844 if (rv == -1) { 1845 error = EFAULT; 1846 break; 1847 } 1848 1849 /* The acquire succeeded. */ 1850 if (owner == UMUTEX_UNOWNED) { 1851 error = 0; 1852 break; 1853 } 1854 1855 if (owner == UMUTEX_RB_NOTRECOV) { 1856 error = ENOTRECOVERABLE; 1857 break; 1858 } 1859 1860 /* If no one owns it but it is contested try to acquire it. */ 1861 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1862 old_owner = owner; 1863 rv = casueword32(&m->m_owner, owner, &owner, 1864 id | UMUTEX_CONTESTED); 1865 /* The address was invalid. */ 1866 if (rv == -1) { 1867 error = EFAULT; 1868 break; 1869 } 1870 1871 if (owner == old_owner) { 1872 umtxq_lock(&uq->uq_key); 1873 umtxq_busy(&uq->uq_key); 1874 error = umtx_pi_claim(pi, td); 1875 umtxq_unbusy(&uq->uq_key); 1876 umtxq_unlock(&uq->uq_key); 1877 if (error != 0) { 1878 /* 1879 * Since we're going to return an 1880 * error, restore the m_owner to its 1881 * previous, unowned state to avoid 1882 * compounding the problem. 1883 */ 1884 (void)casuword32(&m->m_owner, 1885 id | UMUTEX_CONTESTED, 1886 old_owner); 1887 } 1888 if (error == 0 && 1889 old_owner == UMUTEX_RB_OWNERDEAD) 1890 error = EOWNERDEAD; 1891 break; 1892 } 1893 1894 error = umtxq_check_susp(td); 1895 if (error != 0) 1896 break; 1897 1898 /* If this failed the lock has changed, restart. */ 1899 continue; 1900 } 1901 1902 if ((owner & ~UMUTEX_CONTESTED) == id) { 1903 error = EDEADLK; 1904 break; 1905 } 1906 1907 if (try != 0) { 1908 error = EBUSY; 1909 break; 1910 } 1911 1912 /* 1913 * If we caught a signal, we have retried and now 1914 * exit immediately. 1915 */ 1916 if (error != 0) 1917 break; 1918 1919 umtxq_lock(&uq->uq_key); 1920 umtxq_busy(&uq->uq_key); 1921 umtxq_unlock(&uq->uq_key); 1922 1923 /* 1924 * Set the contested bit so that a release in user space 1925 * knows to use the system call for unlock. If this fails 1926 * either some one else has acquired the lock or it has been 1927 * released. 1928 */ 1929 rv = casueword32(&m->m_owner, owner, &old, owner | 1930 UMUTEX_CONTESTED); 1931 1932 /* The address was invalid. */ 1933 if (rv == -1) { 1934 umtxq_unbusy_unlocked(&uq->uq_key); 1935 error = EFAULT; 1936 break; 1937 } 1938 1939 umtxq_lock(&uq->uq_key); 1940 /* 1941 * We set the contested bit, sleep. Otherwise the lock changed 1942 * and we need to retry or we lost a race to the thread 1943 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1944 * value for owner is impossible there. 1945 */ 1946 if (old == owner) { 1947 error = umtxq_sleep_pi(uq, pi, 1948 owner & ~UMUTEX_CONTESTED, 1949 "umtxpi", timeout == NULL ? NULL : &timo, 1950 (flags & USYNC_PROCESS_SHARED) != 0); 1951 if (error != 0) 1952 continue; 1953 } else { 1954 umtxq_unbusy(&uq->uq_key); 1955 umtxq_unlock(&uq->uq_key); 1956 } 1957 1958 error = umtxq_check_susp(td); 1959 if (error != 0) 1960 break; 1961 } 1962 1963 umtxq_lock(&uq->uq_key); 1964 umtx_pi_unref(pi); 1965 umtxq_unlock(&uq->uq_key); 1966 1967 umtx_key_release(&uq->uq_key); 1968 return (error); 1969 } 1970 1971 /* 1972 * Unlock a PI mutex. 1973 */ 1974 static int 1975 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1976 { 1977 struct umtx_key key; 1978 struct umtx_q *uq_first, *uq_first2, *uq_me; 1979 struct umtx_pi *pi, *pi2; 1980 uint32_t id, new_owner, old, owner; 1981 int count, error, pri; 1982 1983 id = td->td_tid; 1984 /* 1985 * Make sure we own this mtx. 1986 */ 1987 error = fueword32(&m->m_owner, &owner); 1988 if (error == -1) 1989 return (EFAULT); 1990 1991 if ((owner & ~UMUTEX_CONTESTED) != id) 1992 return (EPERM); 1993 1994 new_owner = umtx_unlock_val(flags, rb); 1995 1996 /* This should be done in userland */ 1997 if ((owner & UMUTEX_CONTESTED) == 0) { 1998 error = casueword32(&m->m_owner, owner, &old, new_owner); 1999 if (error == -1) 2000 return (EFAULT); 2001 if (old == owner) 2002 return (0); 2003 owner = old; 2004 } 2005 2006 /* We should only ever be in here for contested locks */ 2007 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2008 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2009 &key)) != 0) 2010 return (error); 2011 2012 umtxq_lock(&key); 2013 umtxq_busy(&key); 2014 count = umtxq_count_pi(&key, &uq_first); 2015 if (uq_first != NULL) { 2016 mtx_lock(&umtx_lock); 2017 pi = uq_first->uq_pi_blocked; 2018 KASSERT(pi != NULL, ("pi == NULL?")); 2019 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2020 mtx_unlock(&umtx_lock); 2021 umtxq_unbusy(&key); 2022 umtxq_unlock(&key); 2023 umtx_key_release(&key); 2024 /* userland messed the mutex */ 2025 return (EPERM); 2026 } 2027 uq_me = td->td_umtxq; 2028 if (pi->pi_owner == td) 2029 umtx_pi_disown(pi); 2030 /* get highest priority thread which is still sleeping. */ 2031 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2032 while (uq_first != NULL && 2033 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2034 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2035 } 2036 pri = PRI_MAX; 2037 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2038 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2039 if (uq_first2 != NULL) { 2040 if (pri > UPRI(uq_first2->uq_thread)) 2041 pri = UPRI(uq_first2->uq_thread); 2042 } 2043 } 2044 thread_lock(td); 2045 sched_lend_user_prio(td, pri); 2046 thread_unlock(td); 2047 mtx_unlock(&umtx_lock); 2048 if (uq_first) 2049 umtxq_signal_thread(uq_first); 2050 } else { 2051 pi = umtx_pi_lookup(&key); 2052 /* 2053 * A umtx_pi can exist if a signal or timeout removed the 2054 * last waiter from the umtxq, but there is still 2055 * a thread in do_lock_pi() holding the umtx_pi. 2056 */ 2057 if (pi != NULL) { 2058 /* 2059 * The umtx_pi can be unowned, such as when a thread 2060 * has just entered do_lock_pi(), allocated the 2061 * umtx_pi, and unlocked the umtxq. 2062 * If the current thread owns it, it must disown it. 2063 */ 2064 mtx_lock(&umtx_lock); 2065 if (pi->pi_owner == td) 2066 umtx_pi_disown(pi); 2067 mtx_unlock(&umtx_lock); 2068 } 2069 } 2070 umtxq_unlock(&key); 2071 2072 /* 2073 * When unlocking the umtx, it must be marked as unowned if 2074 * there is zero or one thread only waiting for it. 2075 * Otherwise, it must be marked as contested. 2076 */ 2077 2078 if (count > 1) 2079 new_owner |= UMUTEX_CONTESTED; 2080 error = casueword32(&m->m_owner, owner, &old, new_owner); 2081 2082 umtxq_unbusy_unlocked(&key); 2083 umtx_key_release(&key); 2084 if (error == -1) 2085 return (EFAULT); 2086 if (old != owner) 2087 return (EINVAL); 2088 return (0); 2089 } 2090 2091 /* 2092 * Lock a PP mutex. 2093 */ 2094 static int 2095 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2096 struct _umtx_time *timeout, int try) 2097 { 2098 struct abs_timeout timo; 2099 struct umtx_q *uq, *uq2; 2100 struct umtx_pi *pi; 2101 uint32_t ceiling; 2102 uint32_t owner, id; 2103 int error, pri, old_inherited_pri, su, rv; 2104 2105 id = td->td_tid; 2106 uq = td->td_umtxq; 2107 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2108 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2109 &uq->uq_key)) != 0) 2110 return (error); 2111 2112 if (timeout != NULL) 2113 abs_timeout_init2(&timo, timeout); 2114 2115 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2116 for (;;) { 2117 old_inherited_pri = uq->uq_inherited_pri; 2118 umtxq_lock(&uq->uq_key); 2119 umtxq_busy(&uq->uq_key); 2120 umtxq_unlock(&uq->uq_key); 2121 2122 rv = fueword32(&m->m_ceilings[0], &ceiling); 2123 if (rv == -1) { 2124 error = EFAULT; 2125 goto out; 2126 } 2127 ceiling = RTP_PRIO_MAX - ceiling; 2128 if (ceiling > RTP_PRIO_MAX) { 2129 error = EINVAL; 2130 goto out; 2131 } 2132 2133 mtx_lock(&umtx_lock); 2134 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2135 mtx_unlock(&umtx_lock); 2136 error = EINVAL; 2137 goto out; 2138 } 2139 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2140 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2141 thread_lock(td); 2142 if (uq->uq_inherited_pri < UPRI(td)) 2143 sched_lend_user_prio(td, uq->uq_inherited_pri); 2144 thread_unlock(td); 2145 } 2146 mtx_unlock(&umtx_lock); 2147 2148 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2149 id | UMUTEX_CONTESTED); 2150 /* The address was invalid. */ 2151 if (rv == -1) { 2152 error = EFAULT; 2153 break; 2154 } 2155 2156 if (owner == UMUTEX_CONTESTED) { 2157 error = 0; 2158 break; 2159 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2160 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2161 &owner, id | UMUTEX_CONTESTED); 2162 if (rv == -1) { 2163 error = EFAULT; 2164 break; 2165 } 2166 if (owner == UMUTEX_RB_OWNERDEAD) { 2167 error = EOWNERDEAD; /* success */ 2168 break; 2169 } 2170 error = 0; 2171 } else if (owner == UMUTEX_RB_NOTRECOV) { 2172 error = ENOTRECOVERABLE; 2173 break; 2174 } 2175 2176 if (try != 0) { 2177 error = EBUSY; 2178 break; 2179 } 2180 2181 /* 2182 * If we caught a signal, we have retried and now 2183 * exit immediately. 2184 */ 2185 if (error != 0) 2186 break; 2187 2188 umtxq_lock(&uq->uq_key); 2189 umtxq_insert(uq); 2190 umtxq_unbusy(&uq->uq_key); 2191 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2192 NULL : &timo); 2193 umtxq_remove(uq); 2194 umtxq_unlock(&uq->uq_key); 2195 2196 mtx_lock(&umtx_lock); 2197 uq->uq_inherited_pri = old_inherited_pri; 2198 pri = PRI_MAX; 2199 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2200 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2201 if (uq2 != NULL) { 2202 if (pri > UPRI(uq2->uq_thread)) 2203 pri = UPRI(uq2->uq_thread); 2204 } 2205 } 2206 if (pri > uq->uq_inherited_pri) 2207 pri = uq->uq_inherited_pri; 2208 thread_lock(td); 2209 sched_lend_user_prio(td, pri); 2210 thread_unlock(td); 2211 mtx_unlock(&umtx_lock); 2212 } 2213 2214 if (error != 0 && error != EOWNERDEAD) { 2215 mtx_lock(&umtx_lock); 2216 uq->uq_inherited_pri = old_inherited_pri; 2217 pri = PRI_MAX; 2218 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2219 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2220 if (uq2 != NULL) { 2221 if (pri > UPRI(uq2->uq_thread)) 2222 pri = UPRI(uq2->uq_thread); 2223 } 2224 } 2225 if (pri > uq->uq_inherited_pri) 2226 pri = uq->uq_inherited_pri; 2227 thread_lock(td); 2228 sched_lend_user_prio(td, pri); 2229 thread_unlock(td); 2230 mtx_unlock(&umtx_lock); 2231 } 2232 2233 out: 2234 umtxq_unbusy_unlocked(&uq->uq_key); 2235 umtx_key_release(&uq->uq_key); 2236 return (error); 2237 } 2238 2239 /* 2240 * Unlock a PP mutex. 2241 */ 2242 static int 2243 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2244 { 2245 struct umtx_key key; 2246 struct umtx_q *uq, *uq2; 2247 struct umtx_pi *pi; 2248 uint32_t id, owner, rceiling; 2249 int error, pri, new_inherited_pri, su; 2250 2251 id = td->td_tid; 2252 uq = td->td_umtxq; 2253 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2254 2255 /* 2256 * Make sure we own this mtx. 2257 */ 2258 error = fueword32(&m->m_owner, &owner); 2259 if (error == -1) 2260 return (EFAULT); 2261 2262 if ((owner & ~UMUTEX_CONTESTED) != id) 2263 return (EPERM); 2264 2265 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2266 if (error != 0) 2267 return (error); 2268 2269 if (rceiling == -1) 2270 new_inherited_pri = PRI_MAX; 2271 else { 2272 rceiling = RTP_PRIO_MAX - rceiling; 2273 if (rceiling > RTP_PRIO_MAX) 2274 return (EINVAL); 2275 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2276 } 2277 2278 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2279 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2280 &key)) != 0) 2281 return (error); 2282 umtxq_lock(&key); 2283 umtxq_busy(&key); 2284 umtxq_unlock(&key); 2285 /* 2286 * For priority protected mutex, always set unlocked state 2287 * to UMUTEX_CONTESTED, so that userland always enters kernel 2288 * to lock the mutex, it is necessary because thread priority 2289 * has to be adjusted for such mutex. 2290 */ 2291 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2292 UMUTEX_CONTESTED); 2293 2294 umtxq_lock(&key); 2295 if (error == 0) 2296 umtxq_signal(&key, 1); 2297 umtxq_unbusy(&key); 2298 umtxq_unlock(&key); 2299 2300 if (error == -1) 2301 error = EFAULT; 2302 else { 2303 mtx_lock(&umtx_lock); 2304 if (su != 0) 2305 uq->uq_inherited_pri = new_inherited_pri; 2306 pri = PRI_MAX; 2307 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2308 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2309 if (uq2 != NULL) { 2310 if (pri > UPRI(uq2->uq_thread)) 2311 pri = UPRI(uq2->uq_thread); 2312 } 2313 } 2314 if (pri > uq->uq_inherited_pri) 2315 pri = uq->uq_inherited_pri; 2316 thread_lock(td); 2317 sched_lend_user_prio(td, pri); 2318 thread_unlock(td); 2319 mtx_unlock(&umtx_lock); 2320 } 2321 umtx_key_release(&key); 2322 return (error); 2323 } 2324 2325 static int 2326 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2327 uint32_t *old_ceiling) 2328 { 2329 struct umtx_q *uq; 2330 uint32_t flags, id, owner, save_ceiling; 2331 int error, rv, rv1; 2332 2333 error = fueword32(&m->m_flags, &flags); 2334 if (error == -1) 2335 return (EFAULT); 2336 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2337 return (EINVAL); 2338 if (ceiling > RTP_PRIO_MAX) 2339 return (EINVAL); 2340 id = td->td_tid; 2341 uq = td->td_umtxq; 2342 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2343 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2344 &uq->uq_key)) != 0) 2345 return (error); 2346 for (;;) { 2347 umtxq_lock(&uq->uq_key); 2348 umtxq_busy(&uq->uq_key); 2349 umtxq_unlock(&uq->uq_key); 2350 2351 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2352 if (rv == -1) { 2353 error = EFAULT; 2354 break; 2355 } 2356 2357 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2358 id | UMUTEX_CONTESTED); 2359 if (rv == -1) { 2360 error = EFAULT; 2361 break; 2362 } 2363 2364 if (owner == UMUTEX_CONTESTED) { 2365 rv = suword32(&m->m_ceilings[0], ceiling); 2366 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2367 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2368 break; 2369 } 2370 2371 if ((owner & ~UMUTEX_CONTESTED) == id) { 2372 rv = suword32(&m->m_ceilings[0], ceiling); 2373 error = rv == 0 ? 0 : EFAULT; 2374 break; 2375 } 2376 2377 if (owner == UMUTEX_RB_OWNERDEAD) { 2378 error = EOWNERDEAD; 2379 break; 2380 } else if (owner == UMUTEX_RB_NOTRECOV) { 2381 error = ENOTRECOVERABLE; 2382 break; 2383 } 2384 2385 /* 2386 * If we caught a signal, we have retried and now 2387 * exit immediately. 2388 */ 2389 if (error != 0) 2390 break; 2391 2392 /* 2393 * We set the contested bit, sleep. Otherwise the lock changed 2394 * and we need to retry or we lost a race to the thread 2395 * unlocking the umtx. 2396 */ 2397 umtxq_lock(&uq->uq_key); 2398 umtxq_insert(uq); 2399 umtxq_unbusy(&uq->uq_key); 2400 error = umtxq_sleep(uq, "umtxpp", NULL); 2401 umtxq_remove(uq); 2402 umtxq_unlock(&uq->uq_key); 2403 } 2404 umtxq_lock(&uq->uq_key); 2405 if (error == 0) 2406 umtxq_signal(&uq->uq_key, INT_MAX); 2407 umtxq_unbusy(&uq->uq_key); 2408 umtxq_unlock(&uq->uq_key); 2409 umtx_key_release(&uq->uq_key); 2410 if (error == 0 && old_ceiling != NULL) { 2411 rv = suword32(old_ceiling, save_ceiling); 2412 error = rv == 0 ? 0 : EFAULT; 2413 } 2414 return (error); 2415 } 2416 2417 /* 2418 * Lock a userland POSIX mutex. 2419 */ 2420 static int 2421 do_lock_umutex(struct thread *td, struct umutex *m, 2422 struct _umtx_time *timeout, int mode) 2423 { 2424 uint32_t flags; 2425 int error; 2426 2427 error = fueword32(&m->m_flags, &flags); 2428 if (error == -1) 2429 return (EFAULT); 2430 2431 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2432 case 0: 2433 error = do_lock_normal(td, m, flags, timeout, mode); 2434 break; 2435 case UMUTEX_PRIO_INHERIT: 2436 error = do_lock_pi(td, m, flags, timeout, mode); 2437 break; 2438 case UMUTEX_PRIO_PROTECT: 2439 error = do_lock_pp(td, m, flags, timeout, mode); 2440 break; 2441 default: 2442 return (EINVAL); 2443 } 2444 if (timeout == NULL) { 2445 if (error == EINTR && mode != _UMUTEX_WAIT) 2446 error = ERESTART; 2447 } else { 2448 /* Timed-locking is not restarted. */ 2449 if (error == ERESTART) 2450 error = EINTR; 2451 } 2452 return (error); 2453 } 2454 2455 /* 2456 * Unlock a userland POSIX mutex. 2457 */ 2458 static int 2459 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2460 { 2461 uint32_t flags; 2462 int error; 2463 2464 error = fueword32(&m->m_flags, &flags); 2465 if (error == -1) 2466 return (EFAULT); 2467 2468 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2469 case 0: 2470 return (do_unlock_normal(td, m, flags, rb)); 2471 case UMUTEX_PRIO_INHERIT: 2472 return (do_unlock_pi(td, m, flags, rb)); 2473 case UMUTEX_PRIO_PROTECT: 2474 return (do_unlock_pp(td, m, flags, rb)); 2475 } 2476 2477 return (EINVAL); 2478 } 2479 2480 static int 2481 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2482 struct timespec *timeout, u_long wflags) 2483 { 2484 struct abs_timeout timo; 2485 struct umtx_q *uq; 2486 uint32_t flags, clockid, hasw; 2487 int error; 2488 2489 uq = td->td_umtxq; 2490 error = fueword32(&cv->c_flags, &flags); 2491 if (error == -1) 2492 return (EFAULT); 2493 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2494 if (error != 0) 2495 return (error); 2496 2497 if ((wflags & CVWAIT_CLOCKID) != 0) { 2498 error = fueword32(&cv->c_clockid, &clockid); 2499 if (error == -1) { 2500 umtx_key_release(&uq->uq_key); 2501 return (EFAULT); 2502 } 2503 if (clockid < CLOCK_REALTIME || 2504 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2505 /* hmm, only HW clock id will work. */ 2506 umtx_key_release(&uq->uq_key); 2507 return (EINVAL); 2508 } 2509 } else { 2510 clockid = CLOCK_REALTIME; 2511 } 2512 2513 umtxq_lock(&uq->uq_key); 2514 umtxq_busy(&uq->uq_key); 2515 umtxq_insert(uq); 2516 umtxq_unlock(&uq->uq_key); 2517 2518 /* 2519 * Set c_has_waiters to 1 before releasing user mutex, also 2520 * don't modify cache line when unnecessary. 2521 */ 2522 error = fueword32(&cv->c_has_waiters, &hasw); 2523 if (error == 0 && hasw == 0) 2524 suword32(&cv->c_has_waiters, 1); 2525 2526 umtxq_unbusy_unlocked(&uq->uq_key); 2527 2528 error = do_unlock_umutex(td, m, false); 2529 2530 if (timeout != NULL) 2531 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2532 timeout); 2533 2534 umtxq_lock(&uq->uq_key); 2535 if (error == 0) { 2536 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2537 NULL : &timo); 2538 } 2539 2540 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2541 error = 0; 2542 else { 2543 /* 2544 * This must be timeout,interrupted by signal or 2545 * surprious wakeup, clear c_has_waiter flag when 2546 * necessary. 2547 */ 2548 umtxq_busy(&uq->uq_key); 2549 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2550 int oldlen = uq->uq_cur_queue->length; 2551 umtxq_remove(uq); 2552 if (oldlen == 1) { 2553 umtxq_unlock(&uq->uq_key); 2554 suword32(&cv->c_has_waiters, 0); 2555 umtxq_lock(&uq->uq_key); 2556 } 2557 } 2558 umtxq_unbusy(&uq->uq_key); 2559 if (error == ERESTART) 2560 error = EINTR; 2561 } 2562 2563 umtxq_unlock(&uq->uq_key); 2564 umtx_key_release(&uq->uq_key); 2565 return (error); 2566 } 2567 2568 /* 2569 * Signal a userland condition variable. 2570 */ 2571 static int 2572 do_cv_signal(struct thread *td, struct ucond *cv) 2573 { 2574 struct umtx_key key; 2575 int error, cnt, nwake; 2576 uint32_t flags; 2577 2578 error = fueword32(&cv->c_flags, &flags); 2579 if (error == -1) 2580 return (EFAULT); 2581 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2582 return (error); 2583 umtxq_lock(&key); 2584 umtxq_busy(&key); 2585 cnt = umtxq_count(&key); 2586 nwake = umtxq_signal(&key, 1); 2587 if (cnt <= nwake) { 2588 umtxq_unlock(&key); 2589 error = suword32(&cv->c_has_waiters, 0); 2590 if (error == -1) 2591 error = EFAULT; 2592 umtxq_lock(&key); 2593 } 2594 umtxq_unbusy(&key); 2595 umtxq_unlock(&key); 2596 umtx_key_release(&key); 2597 return (error); 2598 } 2599 2600 static int 2601 do_cv_broadcast(struct thread *td, struct ucond *cv) 2602 { 2603 struct umtx_key key; 2604 int error; 2605 uint32_t flags; 2606 2607 error = fueword32(&cv->c_flags, &flags); 2608 if (error == -1) 2609 return (EFAULT); 2610 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2611 return (error); 2612 2613 umtxq_lock(&key); 2614 umtxq_busy(&key); 2615 umtxq_signal(&key, INT_MAX); 2616 umtxq_unlock(&key); 2617 2618 error = suword32(&cv->c_has_waiters, 0); 2619 if (error == -1) 2620 error = EFAULT; 2621 2622 umtxq_unbusy_unlocked(&key); 2623 2624 umtx_key_release(&key); 2625 return (error); 2626 } 2627 2628 static int 2629 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2630 { 2631 struct abs_timeout timo; 2632 struct umtx_q *uq; 2633 uint32_t flags, wrflags; 2634 int32_t state, oldstate; 2635 int32_t blocked_readers; 2636 int error, error1, rv; 2637 2638 uq = td->td_umtxq; 2639 error = fueword32(&rwlock->rw_flags, &flags); 2640 if (error == -1) 2641 return (EFAULT); 2642 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2643 if (error != 0) 2644 return (error); 2645 2646 if (timeout != NULL) 2647 abs_timeout_init2(&timo, timeout); 2648 2649 wrflags = URWLOCK_WRITE_OWNER; 2650 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2651 wrflags |= URWLOCK_WRITE_WAITERS; 2652 2653 for (;;) { 2654 rv = fueword32(&rwlock->rw_state, &state); 2655 if (rv == -1) { 2656 umtx_key_release(&uq->uq_key); 2657 return (EFAULT); 2658 } 2659 2660 /* try to lock it */ 2661 while (!(state & wrflags)) { 2662 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2663 umtx_key_release(&uq->uq_key); 2664 return (EAGAIN); 2665 } 2666 rv = casueword32(&rwlock->rw_state, state, 2667 &oldstate, state + 1); 2668 if (rv == -1) { 2669 umtx_key_release(&uq->uq_key); 2670 return (EFAULT); 2671 } 2672 if (oldstate == state) { 2673 umtx_key_release(&uq->uq_key); 2674 return (0); 2675 } 2676 error = umtxq_check_susp(td); 2677 if (error != 0) 2678 break; 2679 state = oldstate; 2680 } 2681 2682 if (error) 2683 break; 2684 2685 /* grab monitor lock */ 2686 umtxq_lock(&uq->uq_key); 2687 umtxq_busy(&uq->uq_key); 2688 umtxq_unlock(&uq->uq_key); 2689 2690 /* 2691 * re-read the state, in case it changed between the try-lock above 2692 * and the check below 2693 */ 2694 rv = fueword32(&rwlock->rw_state, &state); 2695 if (rv == -1) 2696 error = EFAULT; 2697 2698 /* set read contention bit */ 2699 while (error == 0 && (state & wrflags) && 2700 !(state & URWLOCK_READ_WAITERS)) { 2701 rv = casueword32(&rwlock->rw_state, state, 2702 &oldstate, state | URWLOCK_READ_WAITERS); 2703 if (rv == -1) { 2704 error = EFAULT; 2705 break; 2706 } 2707 if (oldstate == state) 2708 goto sleep; 2709 state = oldstate; 2710 error = umtxq_check_susp(td); 2711 if (error != 0) 2712 break; 2713 } 2714 if (error != 0) { 2715 umtxq_unbusy_unlocked(&uq->uq_key); 2716 break; 2717 } 2718 2719 /* state is changed while setting flags, restart */ 2720 if (!(state & wrflags)) { 2721 umtxq_unbusy_unlocked(&uq->uq_key); 2722 error = umtxq_check_susp(td); 2723 if (error != 0) 2724 break; 2725 continue; 2726 } 2727 2728 sleep: 2729 /* contention bit is set, before sleeping, increase read waiter count */ 2730 rv = fueword32(&rwlock->rw_blocked_readers, 2731 &blocked_readers); 2732 if (rv == -1) { 2733 umtxq_unbusy_unlocked(&uq->uq_key); 2734 error = EFAULT; 2735 break; 2736 } 2737 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2738 2739 while (state & wrflags) { 2740 umtxq_lock(&uq->uq_key); 2741 umtxq_insert(uq); 2742 umtxq_unbusy(&uq->uq_key); 2743 2744 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2745 NULL : &timo); 2746 2747 umtxq_busy(&uq->uq_key); 2748 umtxq_remove(uq); 2749 umtxq_unlock(&uq->uq_key); 2750 if (error) 2751 break; 2752 rv = fueword32(&rwlock->rw_state, &state); 2753 if (rv == -1) { 2754 error = EFAULT; 2755 break; 2756 } 2757 } 2758 2759 /* decrease read waiter count, and may clear read contention bit */ 2760 rv = fueword32(&rwlock->rw_blocked_readers, 2761 &blocked_readers); 2762 if (rv == -1) { 2763 umtxq_unbusy_unlocked(&uq->uq_key); 2764 error = EFAULT; 2765 break; 2766 } 2767 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2768 if (blocked_readers == 1) { 2769 rv = fueword32(&rwlock->rw_state, &state); 2770 if (rv == -1) { 2771 umtxq_unbusy_unlocked(&uq->uq_key); 2772 error = EFAULT; 2773 break; 2774 } 2775 for (;;) { 2776 rv = casueword32(&rwlock->rw_state, state, 2777 &oldstate, state & ~URWLOCK_READ_WAITERS); 2778 if (rv == -1) { 2779 error = EFAULT; 2780 break; 2781 } 2782 if (oldstate == state) 2783 break; 2784 state = oldstate; 2785 error1 = umtxq_check_susp(td); 2786 if (error1 != 0) { 2787 if (error == 0) 2788 error = error1; 2789 break; 2790 } 2791 } 2792 } 2793 2794 umtxq_unbusy_unlocked(&uq->uq_key); 2795 if (error != 0) 2796 break; 2797 } 2798 umtx_key_release(&uq->uq_key); 2799 if (error == ERESTART) 2800 error = EINTR; 2801 return (error); 2802 } 2803 2804 static int 2805 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2806 { 2807 struct abs_timeout timo; 2808 struct umtx_q *uq; 2809 uint32_t flags; 2810 int32_t state, oldstate; 2811 int32_t blocked_writers; 2812 int32_t blocked_readers; 2813 int error, error1, rv; 2814 2815 uq = td->td_umtxq; 2816 error = fueword32(&rwlock->rw_flags, &flags); 2817 if (error == -1) 2818 return (EFAULT); 2819 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2820 if (error != 0) 2821 return (error); 2822 2823 if (timeout != NULL) 2824 abs_timeout_init2(&timo, timeout); 2825 2826 blocked_readers = 0; 2827 for (;;) { 2828 rv = fueword32(&rwlock->rw_state, &state); 2829 if (rv == -1) { 2830 umtx_key_release(&uq->uq_key); 2831 return (EFAULT); 2832 } 2833 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2834 rv = casueword32(&rwlock->rw_state, state, 2835 &oldstate, state | URWLOCK_WRITE_OWNER); 2836 if (rv == -1) { 2837 umtx_key_release(&uq->uq_key); 2838 return (EFAULT); 2839 } 2840 if (oldstate == state) { 2841 umtx_key_release(&uq->uq_key); 2842 return (0); 2843 } 2844 state = oldstate; 2845 error = umtxq_check_susp(td); 2846 if (error != 0) 2847 break; 2848 } 2849 2850 if (error) { 2851 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2852 blocked_readers != 0) { 2853 umtxq_lock(&uq->uq_key); 2854 umtxq_busy(&uq->uq_key); 2855 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2856 umtxq_unbusy(&uq->uq_key); 2857 umtxq_unlock(&uq->uq_key); 2858 } 2859 2860 break; 2861 } 2862 2863 /* grab monitor lock */ 2864 umtxq_lock(&uq->uq_key); 2865 umtxq_busy(&uq->uq_key); 2866 umtxq_unlock(&uq->uq_key); 2867 2868 /* 2869 * re-read the state, in case it changed between the try-lock above 2870 * and the check below 2871 */ 2872 rv = fueword32(&rwlock->rw_state, &state); 2873 if (rv == -1) 2874 error = EFAULT; 2875 2876 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2877 URWLOCK_READER_COUNT(state) != 0) && 2878 (state & URWLOCK_WRITE_WAITERS) == 0) { 2879 rv = casueword32(&rwlock->rw_state, state, 2880 &oldstate, state | URWLOCK_WRITE_WAITERS); 2881 if (rv == -1) { 2882 error = EFAULT; 2883 break; 2884 } 2885 if (oldstate == state) 2886 goto sleep; 2887 state = oldstate; 2888 error = umtxq_check_susp(td); 2889 if (error != 0) 2890 break; 2891 } 2892 if (error != 0) { 2893 umtxq_unbusy_unlocked(&uq->uq_key); 2894 break; 2895 } 2896 2897 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2898 umtxq_unbusy_unlocked(&uq->uq_key); 2899 error = umtxq_check_susp(td); 2900 if (error != 0) 2901 break; 2902 continue; 2903 } 2904 sleep: 2905 rv = fueword32(&rwlock->rw_blocked_writers, 2906 &blocked_writers); 2907 if (rv == -1) { 2908 umtxq_unbusy_unlocked(&uq->uq_key); 2909 error = EFAULT; 2910 break; 2911 } 2912 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2913 2914 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2915 umtxq_lock(&uq->uq_key); 2916 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2917 umtxq_unbusy(&uq->uq_key); 2918 2919 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2920 NULL : &timo); 2921 2922 umtxq_busy(&uq->uq_key); 2923 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2924 umtxq_unlock(&uq->uq_key); 2925 if (error) 2926 break; 2927 rv = fueword32(&rwlock->rw_state, &state); 2928 if (rv == -1) { 2929 error = EFAULT; 2930 break; 2931 } 2932 } 2933 2934 rv = fueword32(&rwlock->rw_blocked_writers, 2935 &blocked_writers); 2936 if (rv == -1) { 2937 umtxq_unbusy_unlocked(&uq->uq_key); 2938 error = EFAULT; 2939 break; 2940 } 2941 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2942 if (blocked_writers == 1) { 2943 rv = fueword32(&rwlock->rw_state, &state); 2944 if (rv == -1) { 2945 umtxq_unbusy_unlocked(&uq->uq_key); 2946 error = EFAULT; 2947 break; 2948 } 2949 for (;;) { 2950 rv = casueword32(&rwlock->rw_state, state, 2951 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2952 if (rv == -1) { 2953 error = EFAULT; 2954 break; 2955 } 2956 if (oldstate == state) 2957 break; 2958 state = oldstate; 2959 error1 = umtxq_check_susp(td); 2960 /* 2961 * We are leaving the URWLOCK_WRITE_WAITERS 2962 * behind, but this should not harm the 2963 * correctness. 2964 */ 2965 if (error1 != 0) { 2966 if (error == 0) 2967 error = error1; 2968 break; 2969 } 2970 } 2971 rv = fueword32(&rwlock->rw_blocked_readers, 2972 &blocked_readers); 2973 if (rv == -1) { 2974 umtxq_unbusy_unlocked(&uq->uq_key); 2975 error = EFAULT; 2976 break; 2977 } 2978 } else 2979 blocked_readers = 0; 2980 2981 umtxq_unbusy_unlocked(&uq->uq_key); 2982 } 2983 2984 umtx_key_release(&uq->uq_key); 2985 if (error == ERESTART) 2986 error = EINTR; 2987 return (error); 2988 } 2989 2990 static int 2991 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2992 { 2993 struct umtx_q *uq; 2994 uint32_t flags; 2995 int32_t state, oldstate; 2996 int error, rv, q, count; 2997 2998 uq = td->td_umtxq; 2999 error = fueword32(&rwlock->rw_flags, &flags); 3000 if (error == -1) 3001 return (EFAULT); 3002 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3003 if (error != 0) 3004 return (error); 3005 3006 error = fueword32(&rwlock->rw_state, &state); 3007 if (error == -1) { 3008 error = EFAULT; 3009 goto out; 3010 } 3011 if (state & URWLOCK_WRITE_OWNER) { 3012 for (;;) { 3013 rv = casueword32(&rwlock->rw_state, state, 3014 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3015 if (rv == -1) { 3016 error = EFAULT; 3017 goto out; 3018 } 3019 if (oldstate != state) { 3020 state = oldstate; 3021 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3022 error = EPERM; 3023 goto out; 3024 } 3025 error = umtxq_check_susp(td); 3026 if (error != 0) 3027 goto out; 3028 } else 3029 break; 3030 } 3031 } else if (URWLOCK_READER_COUNT(state) != 0) { 3032 for (;;) { 3033 rv = casueword32(&rwlock->rw_state, state, 3034 &oldstate, state - 1); 3035 if (rv == -1) { 3036 error = EFAULT; 3037 goto out; 3038 } 3039 if (oldstate != state) { 3040 state = oldstate; 3041 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3042 error = EPERM; 3043 goto out; 3044 } 3045 error = umtxq_check_susp(td); 3046 if (error != 0) 3047 goto out; 3048 } else 3049 break; 3050 } 3051 } else { 3052 error = EPERM; 3053 goto out; 3054 } 3055 3056 count = 0; 3057 3058 if (!(flags & URWLOCK_PREFER_READER)) { 3059 if (state & URWLOCK_WRITE_WAITERS) { 3060 count = 1; 3061 q = UMTX_EXCLUSIVE_QUEUE; 3062 } else if (state & URWLOCK_READ_WAITERS) { 3063 count = INT_MAX; 3064 q = UMTX_SHARED_QUEUE; 3065 } 3066 } else { 3067 if (state & URWLOCK_READ_WAITERS) { 3068 count = INT_MAX; 3069 q = UMTX_SHARED_QUEUE; 3070 } else if (state & URWLOCK_WRITE_WAITERS) { 3071 count = 1; 3072 q = UMTX_EXCLUSIVE_QUEUE; 3073 } 3074 } 3075 3076 if (count) { 3077 umtxq_lock(&uq->uq_key); 3078 umtxq_busy(&uq->uq_key); 3079 umtxq_signal_queue(&uq->uq_key, count, q); 3080 umtxq_unbusy(&uq->uq_key); 3081 umtxq_unlock(&uq->uq_key); 3082 } 3083 out: 3084 umtx_key_release(&uq->uq_key); 3085 return (error); 3086 } 3087 3088 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3089 static int 3090 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3091 { 3092 struct abs_timeout timo; 3093 struct umtx_q *uq; 3094 uint32_t flags, count, count1; 3095 int error, rv; 3096 3097 uq = td->td_umtxq; 3098 error = fueword32(&sem->_flags, &flags); 3099 if (error == -1) 3100 return (EFAULT); 3101 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3102 if (error != 0) 3103 return (error); 3104 3105 if (timeout != NULL) 3106 abs_timeout_init2(&timo, timeout); 3107 3108 umtxq_lock(&uq->uq_key); 3109 umtxq_busy(&uq->uq_key); 3110 umtxq_insert(uq); 3111 umtxq_unlock(&uq->uq_key); 3112 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3113 if (rv == 0) 3114 rv = fueword32(&sem->_count, &count); 3115 if (rv == -1 || count != 0) { 3116 umtxq_lock(&uq->uq_key); 3117 umtxq_unbusy(&uq->uq_key); 3118 umtxq_remove(uq); 3119 umtxq_unlock(&uq->uq_key); 3120 umtx_key_release(&uq->uq_key); 3121 return (rv == -1 ? EFAULT : 0); 3122 } 3123 umtxq_lock(&uq->uq_key); 3124 umtxq_unbusy(&uq->uq_key); 3125 3126 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3127 3128 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3129 error = 0; 3130 else { 3131 umtxq_remove(uq); 3132 /* A relative timeout cannot be restarted. */ 3133 if (error == ERESTART && timeout != NULL && 3134 (timeout->_flags & UMTX_ABSTIME) == 0) 3135 error = EINTR; 3136 } 3137 umtxq_unlock(&uq->uq_key); 3138 umtx_key_release(&uq->uq_key); 3139 return (error); 3140 } 3141 3142 /* 3143 * Signal a userland semaphore. 3144 */ 3145 static int 3146 do_sem_wake(struct thread *td, struct _usem *sem) 3147 { 3148 struct umtx_key key; 3149 int error, cnt; 3150 uint32_t flags; 3151 3152 error = fueword32(&sem->_flags, &flags); 3153 if (error == -1) 3154 return (EFAULT); 3155 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3156 return (error); 3157 umtxq_lock(&key); 3158 umtxq_busy(&key); 3159 cnt = umtxq_count(&key); 3160 if (cnt > 0) { 3161 /* 3162 * Check if count is greater than 0, this means the memory is 3163 * still being referenced by user code, so we can safely 3164 * update _has_waiters flag. 3165 */ 3166 if (cnt == 1) { 3167 umtxq_unlock(&key); 3168 error = suword32(&sem->_has_waiters, 0); 3169 umtxq_lock(&key); 3170 if (error == -1) 3171 error = EFAULT; 3172 } 3173 umtxq_signal(&key, 1); 3174 } 3175 umtxq_unbusy(&key); 3176 umtxq_unlock(&key); 3177 umtx_key_release(&key); 3178 return (error); 3179 } 3180 #endif 3181 3182 static int 3183 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3184 { 3185 struct abs_timeout timo; 3186 struct umtx_q *uq; 3187 uint32_t count, flags; 3188 int error, rv; 3189 3190 uq = td->td_umtxq; 3191 flags = fuword32(&sem->_flags); 3192 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3193 if (error != 0) 3194 return (error); 3195 3196 if (timeout != NULL) 3197 abs_timeout_init2(&timo, timeout); 3198 3199 umtxq_lock(&uq->uq_key); 3200 umtxq_busy(&uq->uq_key); 3201 umtxq_insert(uq); 3202 umtxq_unlock(&uq->uq_key); 3203 rv = fueword32(&sem->_count, &count); 3204 if (rv == -1) { 3205 umtxq_lock(&uq->uq_key); 3206 umtxq_unbusy(&uq->uq_key); 3207 umtxq_remove(uq); 3208 umtxq_unlock(&uq->uq_key); 3209 umtx_key_release(&uq->uq_key); 3210 return (EFAULT); 3211 } 3212 for (;;) { 3213 if (USEM_COUNT(count) != 0) { 3214 umtxq_lock(&uq->uq_key); 3215 umtxq_unbusy(&uq->uq_key); 3216 umtxq_remove(uq); 3217 umtxq_unlock(&uq->uq_key); 3218 umtx_key_release(&uq->uq_key); 3219 return (0); 3220 } 3221 if (count == USEM_HAS_WAITERS) 3222 break; 3223 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3224 if (rv == -1) { 3225 umtxq_lock(&uq->uq_key); 3226 umtxq_unbusy(&uq->uq_key); 3227 umtxq_remove(uq); 3228 umtxq_unlock(&uq->uq_key); 3229 umtx_key_release(&uq->uq_key); 3230 return (EFAULT); 3231 } 3232 if (count == 0) 3233 break; 3234 } 3235 umtxq_lock(&uq->uq_key); 3236 umtxq_unbusy(&uq->uq_key); 3237 3238 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3239 3240 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3241 error = 0; 3242 else { 3243 umtxq_remove(uq); 3244 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3245 /* A relative timeout cannot be restarted. */ 3246 if (error == ERESTART) 3247 error = EINTR; 3248 if (error == EINTR) { 3249 abs_timeout_update(&timo); 3250 timeout->_timeout = timo.end; 3251 timespecsub(&timeout->_timeout, &timo.cur); 3252 } 3253 } 3254 } 3255 umtxq_unlock(&uq->uq_key); 3256 umtx_key_release(&uq->uq_key); 3257 return (error); 3258 } 3259 3260 /* 3261 * Signal a userland semaphore. 3262 */ 3263 static int 3264 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3265 { 3266 struct umtx_key key; 3267 int error, cnt, rv; 3268 uint32_t count, flags; 3269 3270 rv = fueword32(&sem->_flags, &flags); 3271 if (rv == -1) 3272 return (EFAULT); 3273 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3274 return (error); 3275 umtxq_lock(&key); 3276 umtxq_busy(&key); 3277 cnt = umtxq_count(&key); 3278 if (cnt > 0) { 3279 /* 3280 * If this was the last sleeping thread, clear the waiters 3281 * flag in _count. 3282 */ 3283 if (cnt == 1) { 3284 umtxq_unlock(&key); 3285 rv = fueword32(&sem->_count, &count); 3286 while (rv != -1 && count & USEM_HAS_WAITERS) 3287 rv = casueword32(&sem->_count, count, &count, 3288 count & ~USEM_HAS_WAITERS); 3289 if (rv == -1) 3290 error = EFAULT; 3291 umtxq_lock(&key); 3292 } 3293 3294 umtxq_signal(&key, 1); 3295 } 3296 umtxq_unbusy(&key); 3297 umtxq_unlock(&key); 3298 umtx_key_release(&key); 3299 return (error); 3300 } 3301 3302 inline int 3303 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3304 { 3305 int error; 3306 3307 error = copyin(addr, tsp, sizeof(struct timespec)); 3308 if (error == 0) { 3309 if (tsp->tv_sec < 0 || 3310 tsp->tv_nsec >= 1000000000 || 3311 tsp->tv_nsec < 0) 3312 error = EINVAL; 3313 } 3314 return (error); 3315 } 3316 3317 static inline int 3318 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3319 { 3320 int error; 3321 3322 if (size <= sizeof(struct timespec)) { 3323 tp->_clockid = CLOCK_REALTIME; 3324 tp->_flags = 0; 3325 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3326 } else 3327 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3328 if (error != 0) 3329 return (error); 3330 if (tp->_timeout.tv_sec < 0 || 3331 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3332 return (EINVAL); 3333 return (0); 3334 } 3335 3336 static int 3337 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3338 { 3339 3340 return (EOPNOTSUPP); 3341 } 3342 3343 static int 3344 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3345 { 3346 struct _umtx_time timeout, *tm_p; 3347 int error; 3348 3349 if (uap->uaddr2 == NULL) 3350 tm_p = NULL; 3351 else { 3352 error = umtx_copyin_umtx_time( 3353 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3354 if (error != 0) 3355 return (error); 3356 tm_p = &timeout; 3357 } 3358 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3359 } 3360 3361 static int 3362 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3363 { 3364 struct _umtx_time timeout, *tm_p; 3365 int error; 3366 3367 if (uap->uaddr2 == NULL) 3368 tm_p = NULL; 3369 else { 3370 error = umtx_copyin_umtx_time( 3371 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3372 if (error != 0) 3373 return (error); 3374 tm_p = &timeout; 3375 } 3376 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3377 } 3378 3379 static int 3380 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3381 { 3382 struct _umtx_time *tm_p, timeout; 3383 int error; 3384 3385 if (uap->uaddr2 == NULL) 3386 tm_p = NULL; 3387 else { 3388 error = umtx_copyin_umtx_time( 3389 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3390 if (error != 0) 3391 return (error); 3392 tm_p = &timeout; 3393 } 3394 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3395 } 3396 3397 static int 3398 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3399 { 3400 3401 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3402 } 3403 3404 #define BATCH_SIZE 128 3405 static int 3406 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3407 { 3408 char *uaddrs[BATCH_SIZE], **upp; 3409 int count, error, i, pos, tocopy; 3410 3411 upp = (char **)uap->obj; 3412 error = 0; 3413 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3414 pos += tocopy) { 3415 tocopy = MIN(count, BATCH_SIZE); 3416 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3417 if (error != 0) 3418 break; 3419 for (i = 0; i < tocopy; ++i) 3420 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3421 maybe_yield(); 3422 } 3423 return (error); 3424 } 3425 3426 static int 3427 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3428 { 3429 3430 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3431 } 3432 3433 static int 3434 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3435 { 3436 struct _umtx_time *tm_p, timeout; 3437 int error; 3438 3439 /* Allow a null timespec (wait forever). */ 3440 if (uap->uaddr2 == NULL) 3441 tm_p = NULL; 3442 else { 3443 error = umtx_copyin_umtx_time( 3444 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3445 if (error != 0) 3446 return (error); 3447 tm_p = &timeout; 3448 } 3449 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3450 } 3451 3452 static int 3453 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3454 { 3455 3456 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3457 } 3458 3459 static int 3460 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3461 { 3462 struct _umtx_time *tm_p, timeout; 3463 int error; 3464 3465 /* Allow a null timespec (wait forever). */ 3466 if (uap->uaddr2 == NULL) 3467 tm_p = NULL; 3468 else { 3469 error = umtx_copyin_umtx_time( 3470 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3471 if (error != 0) 3472 return (error); 3473 tm_p = &timeout; 3474 } 3475 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3476 } 3477 3478 static int 3479 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3480 { 3481 3482 return (do_wake_umutex(td, uap->obj)); 3483 } 3484 3485 static int 3486 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3487 { 3488 3489 return (do_unlock_umutex(td, uap->obj, false)); 3490 } 3491 3492 static int 3493 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3494 { 3495 3496 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3497 } 3498 3499 static int 3500 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3501 { 3502 struct timespec *ts, timeout; 3503 int error; 3504 3505 /* Allow a null timespec (wait forever). */ 3506 if (uap->uaddr2 == NULL) 3507 ts = NULL; 3508 else { 3509 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3510 if (error != 0) 3511 return (error); 3512 ts = &timeout; 3513 } 3514 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3515 } 3516 3517 static int 3518 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3519 { 3520 3521 return (do_cv_signal(td, uap->obj)); 3522 } 3523 3524 static int 3525 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3526 { 3527 3528 return (do_cv_broadcast(td, uap->obj)); 3529 } 3530 3531 static int 3532 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3533 { 3534 struct _umtx_time timeout; 3535 int error; 3536 3537 /* Allow a null timespec (wait forever). */ 3538 if (uap->uaddr2 == NULL) { 3539 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3540 } else { 3541 error = umtx_copyin_umtx_time(uap->uaddr2, 3542 (size_t)uap->uaddr1, &timeout); 3543 if (error != 0) 3544 return (error); 3545 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3546 } 3547 return (error); 3548 } 3549 3550 static int 3551 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3552 { 3553 struct _umtx_time timeout; 3554 int error; 3555 3556 /* Allow a null timespec (wait forever). */ 3557 if (uap->uaddr2 == NULL) { 3558 error = do_rw_wrlock(td, uap->obj, 0); 3559 } else { 3560 error = umtx_copyin_umtx_time(uap->uaddr2, 3561 (size_t)uap->uaddr1, &timeout); 3562 if (error != 0) 3563 return (error); 3564 3565 error = do_rw_wrlock(td, uap->obj, &timeout); 3566 } 3567 return (error); 3568 } 3569 3570 static int 3571 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3572 { 3573 3574 return (do_rw_unlock(td, uap->obj)); 3575 } 3576 3577 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3578 static int 3579 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3580 { 3581 struct _umtx_time *tm_p, timeout; 3582 int error; 3583 3584 /* Allow a null timespec (wait forever). */ 3585 if (uap->uaddr2 == NULL) 3586 tm_p = NULL; 3587 else { 3588 error = umtx_copyin_umtx_time( 3589 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3590 if (error != 0) 3591 return (error); 3592 tm_p = &timeout; 3593 } 3594 return (do_sem_wait(td, uap->obj, tm_p)); 3595 } 3596 3597 static int 3598 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3599 { 3600 3601 return (do_sem_wake(td, uap->obj)); 3602 } 3603 #endif 3604 3605 static int 3606 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3607 { 3608 3609 return (do_wake2_umutex(td, uap->obj, uap->val)); 3610 } 3611 3612 static int 3613 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3614 { 3615 struct _umtx_time *tm_p, timeout; 3616 size_t uasize; 3617 int error; 3618 3619 /* Allow a null timespec (wait forever). */ 3620 if (uap->uaddr2 == NULL) { 3621 uasize = 0; 3622 tm_p = NULL; 3623 } else { 3624 uasize = (size_t)uap->uaddr1; 3625 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3626 if (error != 0) 3627 return (error); 3628 tm_p = &timeout; 3629 } 3630 error = do_sem2_wait(td, uap->obj, tm_p); 3631 if (error == EINTR && uap->uaddr2 != NULL && 3632 (timeout._flags & UMTX_ABSTIME) == 0 && 3633 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3634 error = copyout(&timeout._timeout, 3635 (struct _umtx_time *)uap->uaddr2 + 1, 3636 sizeof(struct timespec)); 3637 if (error == 0) { 3638 error = EINTR; 3639 } 3640 } 3641 3642 return (error); 3643 } 3644 3645 static int 3646 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3647 { 3648 3649 return (do_sem2_wake(td, uap->obj)); 3650 } 3651 3652 #define USHM_OBJ_UMTX(o) \ 3653 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3654 3655 #define USHMF_REG_LINKED 0x0001 3656 #define USHMF_OBJ_LINKED 0x0002 3657 struct umtx_shm_reg { 3658 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3659 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3660 struct umtx_key ushm_key; 3661 struct ucred *ushm_cred; 3662 struct shmfd *ushm_obj; 3663 u_int ushm_refcnt; 3664 u_int ushm_flags; 3665 }; 3666 3667 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3668 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3669 3670 static uma_zone_t umtx_shm_reg_zone; 3671 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3672 static struct mtx umtx_shm_lock; 3673 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3674 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3675 3676 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3677 3678 static void 3679 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3680 { 3681 struct umtx_shm_reg_head d; 3682 struct umtx_shm_reg *reg, *reg1; 3683 3684 TAILQ_INIT(&d); 3685 mtx_lock(&umtx_shm_lock); 3686 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3687 mtx_unlock(&umtx_shm_lock); 3688 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3689 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3690 umtx_shm_free_reg(reg); 3691 } 3692 } 3693 3694 static struct task umtx_shm_reg_delfree_task = 3695 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3696 3697 static struct umtx_shm_reg * 3698 umtx_shm_find_reg_locked(const struct umtx_key *key) 3699 { 3700 struct umtx_shm_reg *reg; 3701 struct umtx_shm_reg_head *reg_head; 3702 3703 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3704 mtx_assert(&umtx_shm_lock, MA_OWNED); 3705 reg_head = &umtx_shm_registry[key->hash]; 3706 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3707 KASSERT(reg->ushm_key.shared, 3708 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3709 if (reg->ushm_key.info.shared.object == 3710 key->info.shared.object && 3711 reg->ushm_key.info.shared.offset == 3712 key->info.shared.offset) { 3713 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3714 KASSERT(reg->ushm_refcnt > 0, 3715 ("reg %p refcnt 0 onlist", reg)); 3716 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3717 ("reg %p not linked", reg)); 3718 reg->ushm_refcnt++; 3719 return (reg); 3720 } 3721 } 3722 return (NULL); 3723 } 3724 3725 static struct umtx_shm_reg * 3726 umtx_shm_find_reg(const struct umtx_key *key) 3727 { 3728 struct umtx_shm_reg *reg; 3729 3730 mtx_lock(&umtx_shm_lock); 3731 reg = umtx_shm_find_reg_locked(key); 3732 mtx_unlock(&umtx_shm_lock); 3733 return (reg); 3734 } 3735 3736 static void 3737 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3738 { 3739 3740 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3741 crfree(reg->ushm_cred); 3742 shm_drop(reg->ushm_obj); 3743 uma_zfree(umtx_shm_reg_zone, reg); 3744 } 3745 3746 static bool 3747 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3748 { 3749 bool res; 3750 3751 mtx_assert(&umtx_shm_lock, MA_OWNED); 3752 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3753 reg->ushm_refcnt--; 3754 res = reg->ushm_refcnt == 0; 3755 if (res || force) { 3756 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3757 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3758 reg, ushm_reg_link); 3759 reg->ushm_flags &= ~USHMF_REG_LINKED; 3760 } 3761 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3762 LIST_REMOVE(reg, ushm_obj_link); 3763 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3764 } 3765 } 3766 return (res); 3767 } 3768 3769 static void 3770 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3771 { 3772 vm_object_t object; 3773 bool dofree; 3774 3775 if (force) { 3776 object = reg->ushm_obj->shm_object; 3777 VM_OBJECT_WLOCK(object); 3778 object->flags |= OBJ_UMTXDEAD; 3779 VM_OBJECT_WUNLOCK(object); 3780 } 3781 mtx_lock(&umtx_shm_lock); 3782 dofree = umtx_shm_unref_reg_locked(reg, force); 3783 mtx_unlock(&umtx_shm_lock); 3784 if (dofree) 3785 umtx_shm_free_reg(reg); 3786 } 3787 3788 void 3789 umtx_shm_object_init(vm_object_t object) 3790 { 3791 3792 LIST_INIT(USHM_OBJ_UMTX(object)); 3793 } 3794 3795 void 3796 umtx_shm_object_terminated(vm_object_t object) 3797 { 3798 struct umtx_shm_reg *reg, *reg1; 3799 bool dofree; 3800 3801 dofree = false; 3802 mtx_lock(&umtx_shm_lock); 3803 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3804 if (umtx_shm_unref_reg_locked(reg, true)) { 3805 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3806 ushm_reg_link); 3807 dofree = true; 3808 } 3809 } 3810 mtx_unlock(&umtx_shm_lock); 3811 if (dofree) 3812 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3813 } 3814 3815 static int 3816 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3817 struct umtx_shm_reg **res) 3818 { 3819 struct umtx_shm_reg *reg, *reg1; 3820 struct ucred *cred; 3821 int error; 3822 3823 reg = umtx_shm_find_reg(key); 3824 if (reg != NULL) { 3825 *res = reg; 3826 return (0); 3827 } 3828 cred = td->td_ucred; 3829 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3830 return (ENOMEM); 3831 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3832 reg->ushm_refcnt = 1; 3833 bcopy(key, ®->ushm_key, sizeof(*key)); 3834 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3835 reg->ushm_cred = crhold(cred); 3836 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3837 if (error != 0) { 3838 umtx_shm_free_reg(reg); 3839 return (error); 3840 } 3841 mtx_lock(&umtx_shm_lock); 3842 reg1 = umtx_shm_find_reg_locked(key); 3843 if (reg1 != NULL) { 3844 mtx_unlock(&umtx_shm_lock); 3845 umtx_shm_free_reg(reg); 3846 *res = reg1; 3847 return (0); 3848 } 3849 reg->ushm_refcnt++; 3850 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3851 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3852 ushm_obj_link); 3853 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3854 mtx_unlock(&umtx_shm_lock); 3855 *res = reg; 3856 return (0); 3857 } 3858 3859 static int 3860 umtx_shm_alive(struct thread *td, void *addr) 3861 { 3862 vm_map_t map; 3863 vm_map_entry_t entry; 3864 vm_object_t object; 3865 vm_pindex_t pindex; 3866 vm_prot_t prot; 3867 int res, ret; 3868 boolean_t wired; 3869 3870 map = &td->td_proc->p_vmspace->vm_map; 3871 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3872 &object, &pindex, &prot, &wired); 3873 if (res != KERN_SUCCESS) 3874 return (EFAULT); 3875 if (object == NULL) 3876 ret = EINVAL; 3877 else 3878 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3879 vm_map_lookup_done(map, entry); 3880 return (ret); 3881 } 3882 3883 static void 3884 umtx_shm_init(void) 3885 { 3886 int i; 3887 3888 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3889 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3890 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3891 for (i = 0; i < nitems(umtx_shm_registry); i++) 3892 TAILQ_INIT(&umtx_shm_registry[i]); 3893 } 3894 3895 static int 3896 umtx_shm(struct thread *td, void *addr, u_int flags) 3897 { 3898 struct umtx_key key; 3899 struct umtx_shm_reg *reg; 3900 struct file *fp; 3901 int error, fd; 3902 3903 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3904 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3905 return (EINVAL); 3906 if ((flags & UMTX_SHM_ALIVE) != 0) 3907 return (umtx_shm_alive(td, addr)); 3908 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3909 if (error != 0) 3910 return (error); 3911 KASSERT(key.shared == 1, ("non-shared key")); 3912 if ((flags & UMTX_SHM_CREAT) != 0) { 3913 error = umtx_shm_create_reg(td, &key, ®); 3914 } else { 3915 reg = umtx_shm_find_reg(&key); 3916 if (reg == NULL) 3917 error = ESRCH; 3918 } 3919 umtx_key_release(&key); 3920 if (error != 0) 3921 return (error); 3922 KASSERT(reg != NULL, ("no reg")); 3923 if ((flags & UMTX_SHM_DESTROY) != 0) { 3924 umtx_shm_unref_reg(reg, true); 3925 } else { 3926 #if 0 3927 #ifdef MAC 3928 error = mac_posixshm_check_open(td->td_ucred, 3929 reg->ushm_obj, FFLAGS(O_RDWR)); 3930 if (error == 0) 3931 #endif 3932 error = shm_access(reg->ushm_obj, td->td_ucred, 3933 FFLAGS(O_RDWR)); 3934 if (error == 0) 3935 #endif 3936 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3937 if (error == 0) { 3938 shm_hold(reg->ushm_obj); 3939 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3940 &shm_ops); 3941 td->td_retval[0] = fd; 3942 fdrop(fp, td); 3943 } 3944 } 3945 umtx_shm_unref_reg(reg, false); 3946 return (error); 3947 } 3948 3949 static int 3950 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3951 { 3952 3953 return (umtx_shm(td, uap->uaddr1, uap->val)); 3954 } 3955 3956 static int 3957 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3958 { 3959 3960 td->td_rb_list = rbp->robust_list_offset; 3961 td->td_rbp_list = rbp->robust_priv_list_offset; 3962 td->td_rb_inact = rbp->robust_inact_offset; 3963 return (0); 3964 } 3965 3966 static int 3967 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3968 { 3969 struct umtx_robust_lists_params rb; 3970 int error; 3971 3972 if (uap->val > sizeof(rb)) 3973 return (EINVAL); 3974 bzero(&rb, sizeof(rb)); 3975 error = copyin(uap->uaddr1, &rb, uap->val); 3976 if (error != 0) 3977 return (error); 3978 return (umtx_robust_lists(td, &rb)); 3979 } 3980 3981 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3982 3983 static const _umtx_op_func op_table[] = { 3984 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3985 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3986 [UMTX_OP_WAIT] = __umtx_op_wait, 3987 [UMTX_OP_WAKE] = __umtx_op_wake, 3988 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3989 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3990 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3991 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3992 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3993 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3994 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3995 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3996 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3997 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 3998 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 3999 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4000 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4001 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4002 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4003 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4004 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4005 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4006 #else 4007 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4008 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4009 #endif 4010 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4011 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4012 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4013 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4014 [UMTX_OP_SHM] = __umtx_op_shm, 4015 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4016 }; 4017 4018 int 4019 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4020 { 4021 4022 if ((unsigned)uap->op < nitems(op_table)) 4023 return (*op_table[uap->op])(td, uap); 4024 return (EINVAL); 4025 } 4026 4027 #ifdef COMPAT_FREEBSD32 4028 4029 struct timespec32 { 4030 int32_t tv_sec; 4031 int32_t tv_nsec; 4032 }; 4033 4034 struct umtx_time32 { 4035 struct timespec32 timeout; 4036 uint32_t flags; 4037 uint32_t clockid; 4038 }; 4039 4040 static inline int 4041 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4042 { 4043 struct timespec32 ts32; 4044 int error; 4045 4046 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4047 if (error == 0) { 4048 if (ts32.tv_sec < 0 || 4049 ts32.tv_nsec >= 1000000000 || 4050 ts32.tv_nsec < 0) 4051 error = EINVAL; 4052 else { 4053 tsp->tv_sec = ts32.tv_sec; 4054 tsp->tv_nsec = ts32.tv_nsec; 4055 } 4056 } 4057 return (error); 4058 } 4059 4060 static inline int 4061 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4062 { 4063 struct umtx_time32 t32; 4064 int error; 4065 4066 t32.clockid = CLOCK_REALTIME; 4067 t32.flags = 0; 4068 if (size <= sizeof(struct timespec32)) 4069 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4070 else 4071 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4072 if (error != 0) 4073 return (error); 4074 if (t32.timeout.tv_sec < 0 || 4075 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4076 return (EINVAL); 4077 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4078 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4079 tp->_flags = t32.flags; 4080 tp->_clockid = t32.clockid; 4081 return (0); 4082 } 4083 4084 static int 4085 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4086 { 4087 struct _umtx_time *tm_p, timeout; 4088 int error; 4089 4090 if (uap->uaddr2 == NULL) 4091 tm_p = NULL; 4092 else { 4093 error = umtx_copyin_umtx_time32(uap->uaddr2, 4094 (size_t)uap->uaddr1, &timeout); 4095 if (error != 0) 4096 return (error); 4097 tm_p = &timeout; 4098 } 4099 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4100 } 4101 4102 static int 4103 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4104 { 4105 struct _umtx_time *tm_p, timeout; 4106 int error; 4107 4108 /* Allow a null timespec (wait forever). */ 4109 if (uap->uaddr2 == NULL) 4110 tm_p = NULL; 4111 else { 4112 error = umtx_copyin_umtx_time32(uap->uaddr2, 4113 (size_t)uap->uaddr1, &timeout); 4114 if (error != 0) 4115 return (error); 4116 tm_p = &timeout; 4117 } 4118 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4119 } 4120 4121 static int 4122 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4123 { 4124 struct _umtx_time *tm_p, timeout; 4125 int error; 4126 4127 /* Allow a null timespec (wait forever). */ 4128 if (uap->uaddr2 == NULL) 4129 tm_p = NULL; 4130 else { 4131 error = umtx_copyin_umtx_time32(uap->uaddr2, 4132 (size_t)uap->uaddr1, &timeout); 4133 if (error != 0) 4134 return (error); 4135 tm_p = &timeout; 4136 } 4137 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4138 } 4139 4140 static int 4141 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4142 { 4143 struct timespec *ts, timeout; 4144 int error; 4145 4146 /* Allow a null timespec (wait forever). */ 4147 if (uap->uaddr2 == NULL) 4148 ts = NULL; 4149 else { 4150 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4151 if (error != 0) 4152 return (error); 4153 ts = &timeout; 4154 } 4155 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4156 } 4157 4158 static int 4159 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4160 { 4161 struct _umtx_time timeout; 4162 int error; 4163 4164 /* Allow a null timespec (wait forever). */ 4165 if (uap->uaddr2 == NULL) { 4166 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4167 } else { 4168 error = umtx_copyin_umtx_time32(uap->uaddr2, 4169 (size_t)uap->uaddr1, &timeout); 4170 if (error != 0) 4171 return (error); 4172 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4173 } 4174 return (error); 4175 } 4176 4177 static int 4178 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4179 { 4180 struct _umtx_time timeout; 4181 int error; 4182 4183 /* Allow a null timespec (wait forever). */ 4184 if (uap->uaddr2 == NULL) { 4185 error = do_rw_wrlock(td, uap->obj, 0); 4186 } else { 4187 error = umtx_copyin_umtx_time32(uap->uaddr2, 4188 (size_t)uap->uaddr1, &timeout); 4189 if (error != 0) 4190 return (error); 4191 error = do_rw_wrlock(td, uap->obj, &timeout); 4192 } 4193 return (error); 4194 } 4195 4196 static int 4197 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4198 { 4199 struct _umtx_time *tm_p, timeout; 4200 int error; 4201 4202 if (uap->uaddr2 == NULL) 4203 tm_p = NULL; 4204 else { 4205 error = umtx_copyin_umtx_time32( 4206 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4207 if (error != 0) 4208 return (error); 4209 tm_p = &timeout; 4210 } 4211 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4212 } 4213 4214 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4215 static int 4216 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4217 { 4218 struct _umtx_time *tm_p, timeout; 4219 int error; 4220 4221 /* Allow a null timespec (wait forever). */ 4222 if (uap->uaddr2 == NULL) 4223 tm_p = NULL; 4224 else { 4225 error = umtx_copyin_umtx_time32(uap->uaddr2, 4226 (size_t)uap->uaddr1, &timeout); 4227 if (error != 0) 4228 return (error); 4229 tm_p = &timeout; 4230 } 4231 return (do_sem_wait(td, uap->obj, tm_p)); 4232 } 4233 #endif 4234 4235 static int 4236 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4237 { 4238 struct _umtx_time *tm_p, timeout; 4239 size_t uasize; 4240 int error; 4241 4242 /* Allow a null timespec (wait forever). */ 4243 if (uap->uaddr2 == NULL) { 4244 uasize = 0; 4245 tm_p = NULL; 4246 } else { 4247 uasize = (size_t)uap->uaddr1; 4248 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4249 if (error != 0) 4250 return (error); 4251 tm_p = &timeout; 4252 } 4253 error = do_sem2_wait(td, uap->obj, tm_p); 4254 if (error == EINTR && uap->uaddr2 != NULL && 4255 (timeout._flags & UMTX_ABSTIME) == 0 && 4256 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4257 struct timespec32 remain32 = { 4258 .tv_sec = timeout._timeout.tv_sec, 4259 .tv_nsec = timeout._timeout.tv_nsec 4260 }; 4261 error = copyout(&remain32, 4262 (struct umtx_time32 *)uap->uaddr2 + 1, 4263 sizeof(struct timespec32)); 4264 if (error == 0) { 4265 error = EINTR; 4266 } 4267 } 4268 4269 return (error); 4270 } 4271 4272 static int 4273 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4274 { 4275 uint32_t uaddrs[BATCH_SIZE], **upp; 4276 int count, error, i, pos, tocopy; 4277 4278 upp = (uint32_t **)uap->obj; 4279 error = 0; 4280 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4281 pos += tocopy) { 4282 tocopy = MIN(count, BATCH_SIZE); 4283 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4284 if (error != 0) 4285 break; 4286 for (i = 0; i < tocopy; ++i) 4287 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4288 INT_MAX, 1); 4289 maybe_yield(); 4290 } 4291 return (error); 4292 } 4293 4294 struct umtx_robust_lists_params_compat32 { 4295 uint32_t robust_list_offset; 4296 uint32_t robust_priv_list_offset; 4297 uint32_t robust_inact_offset; 4298 }; 4299 4300 static int 4301 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4302 { 4303 struct umtx_robust_lists_params rb; 4304 struct umtx_robust_lists_params_compat32 rb32; 4305 int error; 4306 4307 if (uap->val > sizeof(rb32)) 4308 return (EINVAL); 4309 bzero(&rb, sizeof(rb)); 4310 bzero(&rb32, sizeof(rb32)); 4311 error = copyin(uap->uaddr1, &rb32, uap->val); 4312 if (error != 0) 4313 return (error); 4314 rb.robust_list_offset = rb32.robust_list_offset; 4315 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4316 rb.robust_inact_offset = rb32.robust_inact_offset; 4317 return (umtx_robust_lists(td, &rb)); 4318 } 4319 4320 static const _umtx_op_func op_table_compat32[] = { 4321 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4322 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4323 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4324 [UMTX_OP_WAKE] = __umtx_op_wake, 4325 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4326 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4327 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4328 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4329 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4330 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4331 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4332 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4333 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4334 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4335 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4336 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4337 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4338 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4339 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4340 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4341 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4342 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4343 #else 4344 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4345 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4346 #endif 4347 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4348 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4349 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4350 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4351 [UMTX_OP_SHM] = __umtx_op_shm, 4352 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4353 }; 4354 4355 int 4356 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4357 { 4358 4359 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4360 return (*op_table_compat32[uap->op])(td, 4361 (struct _umtx_op_args *)uap); 4362 } 4363 return (EINVAL); 4364 } 4365 #endif 4366 4367 void 4368 umtx_thread_init(struct thread *td) 4369 { 4370 4371 td->td_umtxq = umtxq_alloc(); 4372 td->td_umtxq->uq_thread = td; 4373 } 4374 4375 void 4376 umtx_thread_fini(struct thread *td) 4377 { 4378 4379 umtxq_free(td->td_umtxq); 4380 } 4381 4382 /* 4383 * It will be called when new thread is created, e.g fork(). 4384 */ 4385 void 4386 umtx_thread_alloc(struct thread *td) 4387 { 4388 struct umtx_q *uq; 4389 4390 uq = td->td_umtxq; 4391 uq->uq_inherited_pri = PRI_MAX; 4392 4393 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4394 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4395 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4396 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4397 } 4398 4399 /* 4400 * exec() hook. 4401 * 4402 * Clear robust lists for all process' threads, not delaying the 4403 * cleanup to thread_exit hook, since the relevant address space is 4404 * destroyed right now. 4405 */ 4406 static void 4407 umtx_exec_hook(void *arg __unused, struct proc *p, 4408 struct image_params *imgp __unused) 4409 { 4410 struct thread *td; 4411 4412 KASSERT(p == curproc, ("need curproc")); 4413 PROC_LOCK(p); 4414 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4415 (p->p_flag & P_STOPPED_SINGLE) != 0, 4416 ("curproc must be single-threaded")); 4417 FOREACH_THREAD_IN_PROC(p, td) { 4418 KASSERT(td == curthread || 4419 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4420 ("running thread %p %p", p, td)); 4421 PROC_UNLOCK(p); 4422 umtx_thread_cleanup(td); 4423 PROC_LOCK(p); 4424 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4425 } 4426 PROC_UNLOCK(p); 4427 } 4428 4429 /* 4430 * thread_exit() hook. 4431 */ 4432 void 4433 umtx_thread_exit(struct thread *td) 4434 { 4435 4436 umtx_thread_cleanup(td); 4437 } 4438 4439 static int 4440 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4441 { 4442 u_long res1; 4443 #ifdef COMPAT_FREEBSD32 4444 uint32_t res32; 4445 #endif 4446 int error; 4447 4448 #ifdef COMPAT_FREEBSD32 4449 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4450 error = fueword32((void *)ptr, &res32); 4451 if (error == 0) 4452 res1 = res32; 4453 } else 4454 #endif 4455 { 4456 error = fueword((void *)ptr, &res1); 4457 } 4458 if (error == 0) 4459 *res = res1; 4460 else 4461 error = EFAULT; 4462 return (error); 4463 } 4464 4465 static void 4466 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4467 { 4468 #ifdef COMPAT_FREEBSD32 4469 struct umutex32 m32; 4470 4471 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4472 memcpy(&m32, m, sizeof(m32)); 4473 *rb_list = m32.m_rb_lnk; 4474 } else 4475 #endif 4476 *rb_list = m->m_rb_lnk; 4477 } 4478 4479 static int 4480 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4481 { 4482 struct umutex m; 4483 int error; 4484 4485 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4486 error = copyin((void *)rbp, &m, sizeof(m)); 4487 if (error != 0) 4488 return (error); 4489 if (rb_list != NULL) 4490 umtx_read_rb_list(td, &m, rb_list); 4491 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4492 return (EINVAL); 4493 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4494 /* inact is cleared after unlock, allow the inconsistency */ 4495 return (inact ? 0 : EINVAL); 4496 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4497 } 4498 4499 static void 4500 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4501 const char *name) 4502 { 4503 int error, i; 4504 uintptr_t rbp; 4505 bool inact; 4506 4507 if (rb_list == 0) 4508 return; 4509 error = umtx_read_uptr(td, rb_list, &rbp); 4510 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4511 if (rbp == *rb_inact) { 4512 inact = true; 4513 *rb_inact = 0; 4514 } else 4515 inact = false; 4516 error = umtx_handle_rb(td, rbp, &rbp, inact); 4517 } 4518 if (i == umtx_max_rb && umtx_verbose_rb) { 4519 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4520 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4521 } 4522 if (error != 0 && umtx_verbose_rb) { 4523 uprintf("comm %s pid %d: handling %srb error %d\n", 4524 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4525 } 4526 } 4527 4528 /* 4529 * Clean up umtx data. 4530 */ 4531 static void 4532 umtx_thread_cleanup(struct thread *td) 4533 { 4534 struct umtx_q *uq; 4535 struct umtx_pi *pi; 4536 uintptr_t rb_inact; 4537 4538 /* 4539 * Disown pi mutexes. 4540 */ 4541 uq = td->td_umtxq; 4542 if (uq != NULL) { 4543 mtx_lock(&umtx_lock); 4544 uq->uq_inherited_pri = PRI_MAX; 4545 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4546 pi->pi_owner = NULL; 4547 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4548 } 4549 mtx_unlock(&umtx_lock); 4550 thread_lock(td); 4551 sched_lend_user_prio(td, PRI_MAX); 4552 thread_unlock(td); 4553 } 4554 4555 /* 4556 * Handle terminated robust mutexes. Must be done after 4557 * robust pi disown, otherwise unlock could see unowned 4558 * entries. 4559 */ 4560 rb_inact = td->td_rb_inact; 4561 if (rb_inact != 0) 4562 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4563 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4564 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4565 if (rb_inact != 0) 4566 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4567 } 4568