1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_compat.h" 38 #include "opt_umtx_profiling.h" 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/fcntl.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mutex.h> 50 #include <sys/priv.h> 51 #include <sys/proc.h> 52 #include <sys/resource.h> 53 #include <sys/resourcevar.h> 54 #include <sys/rwlock.h> 55 #include <sys/sbuf.h> 56 #include <sys/sched.h> 57 #include <sys/smp.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #include <sys/systm.h> 61 #include <sys/sysproto.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/taskqueue.h> 64 #include <sys/time.h> 65 #include <sys/eventhandler.h> 66 #include <sys/umtx.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_param.h> 72 #include <vm/pmap.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 76 #include <machine/atomic.h> 77 #include <machine/cpu.h> 78 79 #ifdef COMPAT_FREEBSD32 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 /* Priority inheritance mutex info. */ 92 struct umtx_pi { 93 /* Owner thread */ 94 struct thread *pi_owner; 95 96 /* Reference count */ 97 int pi_refcount; 98 99 /* List entry to link umtx holding by thread */ 100 TAILQ_ENTRY(umtx_pi) pi_link; 101 102 /* List entry in hash */ 103 TAILQ_ENTRY(umtx_pi) pi_hashlink; 104 105 /* List for waiters */ 106 TAILQ_HEAD(,umtx_q) pi_blocked; 107 108 /* Identify a userland lock object */ 109 struct umtx_key pi_key; 110 }; 111 112 /* A userland synchronous object user. */ 113 struct umtx_q { 114 /* Linked list for the hash. */ 115 TAILQ_ENTRY(umtx_q) uq_link; 116 117 /* Umtx key. */ 118 struct umtx_key uq_key; 119 120 /* Umtx flags. */ 121 int uq_flags; 122 #define UQF_UMTXQ 0x0001 123 124 /* The thread waits on. */ 125 struct thread *uq_thread; 126 127 /* 128 * Blocked on PI mutex. read can use chain lock 129 * or umtx_lock, write must have both chain lock and 130 * umtx_lock being hold. 131 */ 132 struct umtx_pi *uq_pi_blocked; 133 134 /* On blocked list */ 135 TAILQ_ENTRY(umtx_q) uq_lockq; 136 137 /* Thread contending with us */ 138 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 139 140 /* Inherited priority from PP mutex */ 141 u_char uq_inherited_pri; 142 143 /* Spare queue ready to be reused */ 144 struct umtxq_queue *uq_spare_queue; 145 146 /* The queue we on */ 147 struct umtxq_queue *uq_cur_queue; 148 }; 149 150 TAILQ_HEAD(umtxq_head, umtx_q); 151 152 /* Per-key wait-queue */ 153 struct umtxq_queue { 154 struct umtxq_head head; 155 struct umtx_key key; 156 LIST_ENTRY(umtxq_queue) link; 157 int length; 158 }; 159 160 LIST_HEAD(umtxq_list, umtxq_queue); 161 162 /* Userland lock object's wait-queue chain */ 163 struct umtxq_chain { 164 /* Lock for this chain. */ 165 struct mtx uc_lock; 166 167 /* List of sleep queues. */ 168 struct umtxq_list uc_queue[2]; 169 #define UMTX_SHARED_QUEUE 0 170 #define UMTX_EXCLUSIVE_QUEUE 1 171 172 LIST_HEAD(, umtxq_queue) uc_spare_queue; 173 174 /* Busy flag */ 175 char uc_busy; 176 177 /* Chain lock waiters */ 178 int uc_waiters; 179 180 /* All PI in the list */ 181 TAILQ_HEAD(,umtx_pi) uc_pi_list; 182 183 #ifdef UMTX_PROFILING 184 u_int length; 185 u_int max_length; 186 #endif 187 }; 188 189 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 190 191 /* 192 * Don't propagate time-sharing priority, there is a security reason, 193 * a user can simply introduce PI-mutex, let thread A lock the mutex, 194 * and let another thread B block on the mutex, because B is 195 * sleeping, its priority will be boosted, this causes A's priority to 196 * be boosted via priority propagating too and will never be lowered even 197 * if it is using 100%CPU, this is unfair to other processes. 198 */ 199 200 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 201 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 202 PRI_MAX_TIMESHARE : (td)->td_user_pri) 203 204 #define GOLDEN_RATIO_PRIME 2654404609U 205 #ifndef UMTX_CHAINS 206 #define UMTX_CHAINS 512 207 #endif 208 #define UMTX_SHIFTS (__WORD_BIT - 9) 209 210 #define GET_SHARE(flags) \ 211 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 212 213 #define BUSY_SPINS 200 214 215 struct abs_timeout { 216 int clockid; 217 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 218 struct timespec cur; 219 struct timespec end; 220 }; 221 222 #ifdef COMPAT_FREEBSD32 223 struct umutex32 { 224 volatile __lwpid_t m_owner; /* Owner of the mutex */ 225 __uint32_t m_flags; /* Flags of the mutex */ 226 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 227 __uint32_t m_rb_lnk; /* Robust linkage */ 228 __uint32_t m_pad; 229 __uint32_t m_spare[2]; 230 }; 231 232 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 233 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 234 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 235 #endif 236 237 int umtx_shm_vnobj_persistent = 0; 238 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 239 &umtx_shm_vnobj_persistent, 0, 240 "False forces destruction of umtx attached to file, on last close"); 241 static int umtx_max_rb = 1000; 242 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 243 &umtx_max_rb, 0, 244 ""); 245 246 static uma_zone_t umtx_pi_zone; 247 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 248 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 249 static int umtx_pi_allocated; 250 251 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 252 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 253 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 254 static int umtx_verbose_rb = 1; 255 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 256 &umtx_verbose_rb, 0, 257 ""); 258 259 #ifdef UMTX_PROFILING 260 static long max_length; 261 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 262 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 263 #endif 264 265 static void abs_timeout_update(struct abs_timeout *timo); 266 267 static void umtx_shm_init(void); 268 static void umtxq_sysinit(void *); 269 static void umtxq_hash(struct umtx_key *key); 270 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 271 static void umtxq_lock(struct umtx_key *key); 272 static void umtxq_unlock(struct umtx_key *key); 273 static void umtxq_busy(struct umtx_key *key); 274 static void umtxq_unbusy(struct umtx_key *key); 275 static void umtxq_insert_queue(struct umtx_q *uq, int q); 276 static void umtxq_remove_queue(struct umtx_q *uq, int q); 277 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 278 static int umtxq_count(struct umtx_key *key); 279 static struct umtx_pi *umtx_pi_alloc(int); 280 static void umtx_pi_free(struct umtx_pi *pi); 281 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 282 bool rb); 283 static void umtx_thread_cleanup(struct thread *td); 284 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 285 struct image_params *imgp __unused); 286 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 287 288 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 289 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 290 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 291 292 static struct mtx umtx_lock; 293 294 #ifdef UMTX_PROFILING 295 static void 296 umtx_init_profiling(void) 297 { 298 struct sysctl_oid *chain_oid; 299 char chain_name[10]; 300 int i; 301 302 for (i = 0; i < UMTX_CHAINS; ++i) { 303 snprintf(chain_name, sizeof(chain_name), "%d", i); 304 chain_oid = SYSCTL_ADD_NODE(NULL, 305 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 306 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 307 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 308 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 309 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 310 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 311 } 312 } 313 314 static int 315 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 316 { 317 char buf[512]; 318 struct sbuf sb; 319 struct umtxq_chain *uc; 320 u_int fract, i, j, tot, whole; 321 u_int sf0, sf1, sf2, sf3, sf4; 322 u_int si0, si1, si2, si3, si4; 323 u_int sw0, sw1, sw2, sw3, sw4; 324 325 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 326 for (i = 0; i < 2; i++) { 327 tot = 0; 328 for (j = 0; j < UMTX_CHAINS; ++j) { 329 uc = &umtxq_chains[i][j]; 330 mtx_lock(&uc->uc_lock); 331 tot += uc->max_length; 332 mtx_unlock(&uc->uc_lock); 333 } 334 if (tot == 0) 335 sbuf_printf(&sb, "%u) Empty ", i); 336 else { 337 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 338 si0 = si1 = si2 = si3 = si4 = 0; 339 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 340 for (j = 0; j < UMTX_CHAINS; j++) { 341 uc = &umtxq_chains[i][j]; 342 mtx_lock(&uc->uc_lock); 343 whole = uc->max_length * 100; 344 mtx_unlock(&uc->uc_lock); 345 fract = (whole % tot) * 100; 346 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 347 sf0 = fract; 348 si0 = j; 349 sw0 = whole; 350 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 351 sf1)) { 352 sf1 = fract; 353 si1 = j; 354 sw1 = whole; 355 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 356 sf2)) { 357 sf2 = fract; 358 si2 = j; 359 sw2 = whole; 360 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 361 sf3)) { 362 sf3 = fract; 363 si3 = j; 364 sw3 = whole; 365 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 366 sf4)) { 367 sf4 = fract; 368 si4 = j; 369 sw4 = whole; 370 } 371 } 372 sbuf_printf(&sb, "queue %u:\n", i); 373 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 374 sf0 / tot, si0); 375 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 376 sf1 / tot, si1); 377 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 378 sf2 / tot, si2); 379 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 380 sf3 / tot, si3); 381 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 382 sf4 / tot, si4); 383 } 384 } 385 sbuf_trim(&sb); 386 sbuf_finish(&sb); 387 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 388 sbuf_delete(&sb); 389 return (0); 390 } 391 392 static int 393 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 394 { 395 struct umtxq_chain *uc; 396 u_int i, j; 397 int clear, error; 398 399 clear = 0; 400 error = sysctl_handle_int(oidp, &clear, 0, req); 401 if (error != 0 || req->newptr == NULL) 402 return (error); 403 404 if (clear != 0) { 405 for (i = 0; i < 2; ++i) { 406 for (j = 0; j < UMTX_CHAINS; ++j) { 407 uc = &umtxq_chains[i][j]; 408 mtx_lock(&uc->uc_lock); 409 uc->length = 0; 410 uc->max_length = 0; 411 mtx_unlock(&uc->uc_lock); 412 } 413 } 414 } 415 return (0); 416 } 417 418 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 419 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 420 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 421 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 422 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 423 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 424 #endif 425 426 static void 427 umtxq_sysinit(void *arg __unused) 428 { 429 int i, j; 430 431 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 432 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 433 for (i = 0; i < 2; ++i) { 434 for (j = 0; j < UMTX_CHAINS; ++j) { 435 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 436 MTX_DEF | MTX_DUPOK); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 438 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 439 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 440 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 441 umtxq_chains[i][j].uc_busy = 0; 442 umtxq_chains[i][j].uc_waiters = 0; 443 #ifdef UMTX_PROFILING 444 umtxq_chains[i][j].length = 0; 445 umtxq_chains[i][j].max_length = 0; 446 #endif 447 } 448 } 449 #ifdef UMTX_PROFILING 450 umtx_init_profiling(); 451 #endif 452 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 453 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 454 EVENTHANDLER_PRI_ANY); 455 umtx_shm_init(); 456 } 457 458 struct umtx_q * 459 umtxq_alloc(void) 460 { 461 struct umtx_q *uq; 462 463 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 464 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 465 M_WAITOK | M_ZERO); 466 TAILQ_INIT(&uq->uq_spare_queue->head); 467 TAILQ_INIT(&uq->uq_pi_contested); 468 uq->uq_inherited_pri = PRI_MAX; 469 return (uq); 470 } 471 472 void 473 umtxq_free(struct umtx_q *uq) 474 { 475 476 MPASS(uq->uq_spare_queue != NULL); 477 free(uq->uq_spare_queue, M_UMTX); 478 free(uq, M_UMTX); 479 } 480 481 static inline void 482 umtxq_hash(struct umtx_key *key) 483 { 484 unsigned n; 485 486 n = (uintptr_t)key->info.both.a + key->info.both.b; 487 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 488 } 489 490 static inline struct umtxq_chain * 491 umtxq_getchain(struct umtx_key *key) 492 { 493 494 if (key->type <= TYPE_SEM) 495 return (&umtxq_chains[1][key->hash]); 496 return (&umtxq_chains[0][key->hash]); 497 } 498 499 /* 500 * Lock a chain. 501 */ 502 static inline void 503 umtxq_lock(struct umtx_key *key) 504 { 505 struct umtxq_chain *uc; 506 507 uc = umtxq_getchain(key); 508 mtx_lock(&uc->uc_lock); 509 } 510 511 /* 512 * Unlock a chain. 513 */ 514 static inline void 515 umtxq_unlock(struct umtx_key *key) 516 { 517 struct umtxq_chain *uc; 518 519 uc = umtxq_getchain(key); 520 mtx_unlock(&uc->uc_lock); 521 } 522 523 /* 524 * Set chain to busy state when following operation 525 * may be blocked (kernel mutex can not be used). 526 */ 527 static inline void 528 umtxq_busy(struct umtx_key *key) 529 { 530 struct umtxq_chain *uc; 531 532 uc = umtxq_getchain(key); 533 mtx_assert(&uc->uc_lock, MA_OWNED); 534 if (uc->uc_busy) { 535 #ifdef SMP 536 if (smp_cpus > 1) { 537 int count = BUSY_SPINS; 538 if (count > 0) { 539 umtxq_unlock(key); 540 while (uc->uc_busy && --count > 0) 541 cpu_spinwait(); 542 umtxq_lock(key); 543 } 544 } 545 #endif 546 while (uc->uc_busy) { 547 uc->uc_waiters++; 548 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 549 uc->uc_waiters--; 550 } 551 } 552 uc->uc_busy = 1; 553 } 554 555 /* 556 * Unbusy a chain. 557 */ 558 static inline void 559 umtxq_unbusy(struct umtx_key *key) 560 { 561 struct umtxq_chain *uc; 562 563 uc = umtxq_getchain(key); 564 mtx_assert(&uc->uc_lock, MA_OWNED); 565 KASSERT(uc->uc_busy != 0, ("not busy")); 566 uc->uc_busy = 0; 567 if (uc->uc_waiters) 568 wakeup_one(uc); 569 } 570 571 static inline void 572 umtxq_unbusy_unlocked(struct umtx_key *key) 573 { 574 575 umtxq_lock(key); 576 umtxq_unbusy(key); 577 umtxq_unlock(key); 578 } 579 580 static struct umtxq_queue * 581 umtxq_queue_lookup(struct umtx_key *key, int q) 582 { 583 struct umtxq_queue *uh; 584 struct umtxq_chain *uc; 585 586 uc = umtxq_getchain(key); 587 UMTXQ_LOCKED_ASSERT(uc); 588 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 589 if (umtx_key_match(&uh->key, key)) 590 return (uh); 591 } 592 593 return (NULL); 594 } 595 596 static inline void 597 umtxq_insert_queue(struct umtx_q *uq, int q) 598 { 599 struct umtxq_queue *uh; 600 struct umtxq_chain *uc; 601 602 uc = umtxq_getchain(&uq->uq_key); 603 UMTXQ_LOCKED_ASSERT(uc); 604 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 605 uh = umtxq_queue_lookup(&uq->uq_key, q); 606 if (uh != NULL) { 607 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 608 } else { 609 uh = uq->uq_spare_queue; 610 uh->key = uq->uq_key; 611 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 612 #ifdef UMTX_PROFILING 613 uc->length++; 614 if (uc->length > uc->max_length) { 615 uc->max_length = uc->length; 616 if (uc->max_length > max_length) 617 max_length = uc->max_length; 618 } 619 #endif 620 } 621 uq->uq_spare_queue = NULL; 622 623 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 624 uh->length++; 625 uq->uq_flags |= UQF_UMTXQ; 626 uq->uq_cur_queue = uh; 627 return; 628 } 629 630 static inline void 631 umtxq_remove_queue(struct umtx_q *uq, int q) 632 { 633 struct umtxq_chain *uc; 634 struct umtxq_queue *uh; 635 636 uc = umtxq_getchain(&uq->uq_key); 637 UMTXQ_LOCKED_ASSERT(uc); 638 if (uq->uq_flags & UQF_UMTXQ) { 639 uh = uq->uq_cur_queue; 640 TAILQ_REMOVE(&uh->head, uq, uq_link); 641 uh->length--; 642 uq->uq_flags &= ~UQF_UMTXQ; 643 if (TAILQ_EMPTY(&uh->head)) { 644 KASSERT(uh->length == 0, 645 ("inconsistent umtxq_queue length")); 646 #ifdef UMTX_PROFILING 647 uc->length--; 648 #endif 649 LIST_REMOVE(uh, link); 650 } else { 651 uh = LIST_FIRST(&uc->uc_spare_queue); 652 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 653 LIST_REMOVE(uh, link); 654 } 655 uq->uq_spare_queue = uh; 656 uq->uq_cur_queue = NULL; 657 } 658 } 659 660 /* 661 * Check if there are multiple waiters 662 */ 663 static int 664 umtxq_count(struct umtx_key *key) 665 { 666 struct umtxq_chain *uc; 667 struct umtxq_queue *uh; 668 669 uc = umtxq_getchain(key); 670 UMTXQ_LOCKED_ASSERT(uc); 671 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 672 if (uh != NULL) 673 return (uh->length); 674 return (0); 675 } 676 677 /* 678 * Check if there are multiple PI waiters and returns first 679 * waiter. 680 */ 681 static int 682 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 683 { 684 struct umtxq_chain *uc; 685 struct umtxq_queue *uh; 686 687 *first = NULL; 688 uc = umtxq_getchain(key); 689 UMTXQ_LOCKED_ASSERT(uc); 690 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 691 if (uh != NULL) { 692 *first = TAILQ_FIRST(&uh->head); 693 return (uh->length); 694 } 695 return (0); 696 } 697 698 static int 699 umtxq_check_susp(struct thread *td) 700 { 701 struct proc *p; 702 int error; 703 704 /* 705 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 706 * eventually break the lockstep loop. 707 */ 708 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 709 return (0); 710 error = 0; 711 p = td->td_proc; 712 PROC_LOCK(p); 713 if (P_SHOULDSTOP(p) || 714 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 715 if (p->p_flag & P_SINGLE_EXIT) 716 error = EINTR; 717 else 718 error = ERESTART; 719 } 720 PROC_UNLOCK(p); 721 return (error); 722 } 723 724 /* 725 * Wake up threads waiting on an userland object. 726 */ 727 728 static int 729 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 730 { 731 struct umtxq_chain *uc; 732 struct umtxq_queue *uh; 733 struct umtx_q *uq; 734 int ret; 735 736 ret = 0; 737 uc = umtxq_getchain(key); 738 UMTXQ_LOCKED_ASSERT(uc); 739 uh = umtxq_queue_lookup(key, q); 740 if (uh != NULL) { 741 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 742 umtxq_remove_queue(uq, q); 743 wakeup(uq); 744 if (++ret >= n_wake) 745 return (ret); 746 } 747 } 748 return (ret); 749 } 750 751 752 /* 753 * Wake up specified thread. 754 */ 755 static inline void 756 umtxq_signal_thread(struct umtx_q *uq) 757 { 758 struct umtxq_chain *uc; 759 760 uc = umtxq_getchain(&uq->uq_key); 761 UMTXQ_LOCKED_ASSERT(uc); 762 umtxq_remove(uq); 763 wakeup(uq); 764 } 765 766 static inline int 767 tstohz(const struct timespec *tsp) 768 { 769 struct timeval tv; 770 771 TIMESPEC_TO_TIMEVAL(&tv, tsp); 772 return tvtohz(&tv); 773 } 774 775 static void 776 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 777 const struct timespec *timeout) 778 { 779 780 timo->clockid = clockid; 781 if (!absolute) { 782 timo->is_abs_real = false; 783 abs_timeout_update(timo); 784 timo->end = timo->cur; 785 timespecadd(&timo->end, timeout); 786 } else { 787 timo->end = *timeout; 788 timo->is_abs_real = clockid == CLOCK_REALTIME || 789 clockid == CLOCK_REALTIME_FAST || 790 clockid == CLOCK_REALTIME_PRECISE; 791 /* 792 * If is_abs_real, umtxq_sleep will read the clock 793 * after setting td_rtcgen; otherwise, read it here. 794 */ 795 if (!timo->is_abs_real) { 796 abs_timeout_update(timo); 797 } 798 } 799 } 800 801 static void 802 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 803 { 804 805 abs_timeout_init(timo, umtxtime->_clockid, 806 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 807 } 808 809 static inline void 810 abs_timeout_update(struct abs_timeout *timo) 811 { 812 813 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 814 } 815 816 static int 817 abs_timeout_gethz(struct abs_timeout *timo) 818 { 819 struct timespec tts; 820 821 if (timespeccmp(&timo->end, &timo->cur, <=)) 822 return (-1); 823 tts = timo->end; 824 timespecsub(&tts, &timo->cur); 825 return (tstohz(&tts)); 826 } 827 828 static uint32_t 829 umtx_unlock_val(uint32_t flags, bool rb) 830 { 831 832 if (rb) 833 return (UMUTEX_RB_OWNERDEAD); 834 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 835 return (UMUTEX_RB_NOTRECOV); 836 else 837 return (UMUTEX_UNOWNED); 838 839 } 840 841 /* 842 * Put thread into sleep state, before sleeping, check if 843 * thread was removed from umtx queue. 844 */ 845 static inline int 846 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 847 { 848 struct umtxq_chain *uc; 849 int error, timo; 850 851 if (abstime != NULL && abstime->is_abs_real) { 852 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 853 abs_timeout_update(abstime); 854 } 855 856 uc = umtxq_getchain(&uq->uq_key); 857 UMTXQ_LOCKED_ASSERT(uc); 858 for (;;) { 859 if (!(uq->uq_flags & UQF_UMTXQ)) { 860 error = 0; 861 break; 862 } 863 if (abstime != NULL) { 864 timo = abs_timeout_gethz(abstime); 865 if (timo < 0) { 866 error = ETIMEDOUT; 867 break; 868 } 869 } else 870 timo = 0; 871 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 872 if (error == EINTR || error == ERESTART) { 873 umtxq_lock(&uq->uq_key); 874 break; 875 } 876 if (abstime != NULL) { 877 if (abstime->is_abs_real) 878 curthread->td_rtcgen = 879 atomic_load_acq_int(&rtc_generation); 880 abs_timeout_update(abstime); 881 } 882 umtxq_lock(&uq->uq_key); 883 } 884 885 curthread->td_rtcgen = 0; 886 return (error); 887 } 888 889 /* 890 * Convert userspace address into unique logical address. 891 */ 892 int 893 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 894 { 895 struct thread *td = curthread; 896 vm_map_t map; 897 vm_map_entry_t entry; 898 vm_pindex_t pindex; 899 vm_prot_t prot; 900 boolean_t wired; 901 902 key->type = type; 903 if (share == THREAD_SHARE) { 904 key->shared = 0; 905 key->info.private.vs = td->td_proc->p_vmspace; 906 key->info.private.addr = (uintptr_t)addr; 907 } else { 908 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 909 map = &td->td_proc->p_vmspace->vm_map; 910 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 911 &entry, &key->info.shared.object, &pindex, &prot, 912 &wired) != KERN_SUCCESS) { 913 return (EFAULT); 914 } 915 916 if ((share == PROCESS_SHARE) || 917 (share == AUTO_SHARE && 918 VM_INHERIT_SHARE == entry->inheritance)) { 919 key->shared = 1; 920 key->info.shared.offset = (vm_offset_t)addr - 921 entry->start + entry->offset; 922 vm_object_reference(key->info.shared.object); 923 } else { 924 key->shared = 0; 925 key->info.private.vs = td->td_proc->p_vmspace; 926 key->info.private.addr = (uintptr_t)addr; 927 } 928 vm_map_lookup_done(map, entry); 929 } 930 931 umtxq_hash(key); 932 return (0); 933 } 934 935 /* 936 * Release key. 937 */ 938 void 939 umtx_key_release(struct umtx_key *key) 940 { 941 if (key->shared) 942 vm_object_deallocate(key->info.shared.object); 943 } 944 945 /* 946 * Fetch and compare value, sleep on the address if value is not changed. 947 */ 948 static int 949 do_wait(struct thread *td, void *addr, u_long id, 950 struct _umtx_time *timeout, int compat32, int is_private) 951 { 952 struct abs_timeout timo; 953 struct umtx_q *uq; 954 u_long tmp; 955 uint32_t tmp32; 956 int error = 0; 957 958 uq = td->td_umtxq; 959 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 960 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 961 return (error); 962 963 if (timeout != NULL) 964 abs_timeout_init2(&timo, timeout); 965 966 umtxq_lock(&uq->uq_key); 967 umtxq_insert(uq); 968 umtxq_unlock(&uq->uq_key); 969 if (compat32 == 0) { 970 error = fueword(addr, &tmp); 971 if (error != 0) 972 error = EFAULT; 973 } else { 974 error = fueword32(addr, &tmp32); 975 if (error == 0) 976 tmp = tmp32; 977 else 978 error = EFAULT; 979 } 980 umtxq_lock(&uq->uq_key); 981 if (error == 0) { 982 if (tmp == id) 983 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 984 NULL : &timo); 985 if ((uq->uq_flags & UQF_UMTXQ) == 0) 986 error = 0; 987 else 988 umtxq_remove(uq); 989 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 990 umtxq_remove(uq); 991 } 992 umtxq_unlock(&uq->uq_key); 993 umtx_key_release(&uq->uq_key); 994 if (error == ERESTART) 995 error = EINTR; 996 return (error); 997 } 998 999 /* 1000 * Wake up threads sleeping on the specified address. 1001 */ 1002 int 1003 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1004 { 1005 struct umtx_key key; 1006 int ret; 1007 1008 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1009 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1010 return (ret); 1011 umtxq_lock(&key); 1012 umtxq_signal(&key, n_wake); 1013 umtxq_unlock(&key); 1014 umtx_key_release(&key); 1015 return (0); 1016 } 1017 1018 /* 1019 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1020 */ 1021 static int 1022 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1023 struct _umtx_time *timeout, int mode) 1024 { 1025 struct abs_timeout timo; 1026 struct umtx_q *uq; 1027 uint32_t owner, old, id; 1028 int error, rv; 1029 1030 id = td->td_tid; 1031 uq = td->td_umtxq; 1032 error = 0; 1033 if (timeout != NULL) 1034 abs_timeout_init2(&timo, timeout); 1035 1036 /* 1037 * Care must be exercised when dealing with umtx structure. It 1038 * can fault on any access. 1039 */ 1040 for (;;) { 1041 rv = fueword32(&m->m_owner, &owner); 1042 if (rv == -1) 1043 return (EFAULT); 1044 if (mode == _UMUTEX_WAIT) { 1045 if (owner == UMUTEX_UNOWNED || 1046 owner == UMUTEX_CONTESTED || 1047 owner == UMUTEX_RB_OWNERDEAD || 1048 owner == UMUTEX_RB_NOTRECOV) 1049 return (0); 1050 } else { 1051 /* 1052 * Robust mutex terminated. Kernel duty is to 1053 * return EOWNERDEAD to the userspace. The 1054 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1055 * by the common userspace code. 1056 */ 1057 if (owner == UMUTEX_RB_OWNERDEAD) { 1058 rv = casueword32(&m->m_owner, 1059 UMUTEX_RB_OWNERDEAD, &owner, 1060 id | UMUTEX_CONTESTED); 1061 if (rv == -1) 1062 return (EFAULT); 1063 if (owner == UMUTEX_RB_OWNERDEAD) 1064 return (EOWNERDEAD); /* success */ 1065 rv = umtxq_check_susp(td); 1066 if (rv != 0) 1067 return (rv); 1068 continue; 1069 } 1070 if (owner == UMUTEX_RB_NOTRECOV) 1071 return (ENOTRECOVERABLE); 1072 1073 1074 /* 1075 * Try the uncontested case. This should be 1076 * done in userland. 1077 */ 1078 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1079 &owner, id); 1080 /* The address was invalid. */ 1081 if (rv == -1) 1082 return (EFAULT); 1083 1084 /* The acquire succeeded. */ 1085 if (owner == UMUTEX_UNOWNED) 1086 return (0); 1087 1088 /* 1089 * If no one owns it but it is contested try 1090 * to acquire it. 1091 */ 1092 if (owner == UMUTEX_CONTESTED) { 1093 rv = casueword32(&m->m_owner, 1094 UMUTEX_CONTESTED, &owner, 1095 id | UMUTEX_CONTESTED); 1096 /* The address was invalid. */ 1097 if (rv == -1) 1098 return (EFAULT); 1099 1100 if (owner == UMUTEX_CONTESTED) 1101 return (0); 1102 1103 rv = umtxq_check_susp(td); 1104 if (rv != 0) 1105 return (rv); 1106 1107 /* 1108 * If this failed the lock has 1109 * changed, restart. 1110 */ 1111 continue; 1112 } 1113 } 1114 1115 if (mode == _UMUTEX_TRY) 1116 return (EBUSY); 1117 1118 /* 1119 * If we caught a signal, we have retried and now 1120 * exit immediately. 1121 */ 1122 if (error != 0) 1123 return (error); 1124 1125 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1126 GET_SHARE(flags), &uq->uq_key)) != 0) 1127 return (error); 1128 1129 umtxq_lock(&uq->uq_key); 1130 umtxq_busy(&uq->uq_key); 1131 umtxq_insert(uq); 1132 umtxq_unlock(&uq->uq_key); 1133 1134 /* 1135 * Set the contested bit so that a release in user space 1136 * knows to use the system call for unlock. If this fails 1137 * either some one else has acquired the lock or it has been 1138 * released. 1139 */ 1140 rv = casueword32(&m->m_owner, owner, &old, 1141 owner | UMUTEX_CONTESTED); 1142 1143 /* The address was invalid. */ 1144 if (rv == -1) { 1145 umtxq_lock(&uq->uq_key); 1146 umtxq_remove(uq); 1147 umtxq_unbusy(&uq->uq_key); 1148 umtxq_unlock(&uq->uq_key); 1149 umtx_key_release(&uq->uq_key); 1150 return (EFAULT); 1151 } 1152 1153 /* 1154 * We set the contested bit, sleep. Otherwise the lock changed 1155 * and we need to retry or we lost a race to the thread 1156 * unlocking the umtx. 1157 */ 1158 umtxq_lock(&uq->uq_key); 1159 umtxq_unbusy(&uq->uq_key); 1160 if (old == owner) 1161 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1162 NULL : &timo); 1163 umtxq_remove(uq); 1164 umtxq_unlock(&uq->uq_key); 1165 umtx_key_release(&uq->uq_key); 1166 1167 if (error == 0) 1168 error = umtxq_check_susp(td); 1169 } 1170 1171 return (0); 1172 } 1173 1174 /* 1175 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1176 */ 1177 static int 1178 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1179 { 1180 struct umtx_key key; 1181 uint32_t owner, old, id, newlock; 1182 int error, count; 1183 1184 id = td->td_tid; 1185 /* 1186 * Make sure we own this mtx. 1187 */ 1188 error = fueword32(&m->m_owner, &owner); 1189 if (error == -1) 1190 return (EFAULT); 1191 1192 if ((owner & ~UMUTEX_CONTESTED) != id) 1193 return (EPERM); 1194 1195 newlock = umtx_unlock_val(flags, rb); 1196 if ((owner & UMUTEX_CONTESTED) == 0) { 1197 error = casueword32(&m->m_owner, owner, &old, newlock); 1198 if (error == -1) 1199 return (EFAULT); 1200 if (old == owner) 1201 return (0); 1202 owner = old; 1203 } 1204 1205 /* We should only ever be in here for contested locks */ 1206 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1207 &key)) != 0) 1208 return (error); 1209 1210 umtxq_lock(&key); 1211 umtxq_busy(&key); 1212 count = umtxq_count(&key); 1213 umtxq_unlock(&key); 1214 1215 /* 1216 * When unlocking the umtx, it must be marked as unowned if 1217 * there is zero or one thread only waiting for it. 1218 * Otherwise, it must be marked as contested. 1219 */ 1220 if (count > 1) 1221 newlock |= UMUTEX_CONTESTED; 1222 error = casueword32(&m->m_owner, owner, &old, newlock); 1223 umtxq_lock(&key); 1224 umtxq_signal(&key, 1); 1225 umtxq_unbusy(&key); 1226 umtxq_unlock(&key); 1227 umtx_key_release(&key); 1228 if (error == -1) 1229 return (EFAULT); 1230 if (old != owner) 1231 return (EINVAL); 1232 return (0); 1233 } 1234 1235 /* 1236 * Check if the mutex is available and wake up a waiter, 1237 * only for simple mutex. 1238 */ 1239 static int 1240 do_wake_umutex(struct thread *td, struct umutex *m) 1241 { 1242 struct umtx_key key; 1243 uint32_t owner; 1244 uint32_t flags; 1245 int error; 1246 int count; 1247 1248 error = fueword32(&m->m_owner, &owner); 1249 if (error == -1) 1250 return (EFAULT); 1251 1252 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1253 owner != UMUTEX_RB_NOTRECOV) 1254 return (0); 1255 1256 error = fueword32(&m->m_flags, &flags); 1257 if (error == -1) 1258 return (EFAULT); 1259 1260 /* We should only ever be in here for contested locks */ 1261 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1262 &key)) != 0) 1263 return (error); 1264 1265 umtxq_lock(&key); 1266 umtxq_busy(&key); 1267 count = umtxq_count(&key); 1268 umtxq_unlock(&key); 1269 1270 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1271 owner != UMUTEX_RB_NOTRECOV) { 1272 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1273 UMUTEX_UNOWNED); 1274 if (error == -1) 1275 error = EFAULT; 1276 } 1277 1278 umtxq_lock(&key); 1279 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1280 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1281 umtxq_signal(&key, 1); 1282 umtxq_unbusy(&key); 1283 umtxq_unlock(&key); 1284 umtx_key_release(&key); 1285 return (error); 1286 } 1287 1288 /* 1289 * Check if the mutex has waiters and tries to fix contention bit. 1290 */ 1291 static int 1292 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1293 { 1294 struct umtx_key key; 1295 uint32_t owner, old; 1296 int type; 1297 int error; 1298 int count; 1299 1300 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1301 UMUTEX_ROBUST)) { 1302 case 0: 1303 case UMUTEX_ROBUST: 1304 type = TYPE_NORMAL_UMUTEX; 1305 break; 1306 case UMUTEX_PRIO_INHERIT: 1307 type = TYPE_PI_UMUTEX; 1308 break; 1309 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1310 type = TYPE_PI_ROBUST_UMUTEX; 1311 break; 1312 case UMUTEX_PRIO_PROTECT: 1313 type = TYPE_PP_UMUTEX; 1314 break; 1315 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1316 type = TYPE_PP_ROBUST_UMUTEX; 1317 break; 1318 default: 1319 return (EINVAL); 1320 } 1321 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1322 return (error); 1323 1324 owner = 0; 1325 umtxq_lock(&key); 1326 umtxq_busy(&key); 1327 count = umtxq_count(&key); 1328 umtxq_unlock(&key); 1329 /* 1330 * Only repair contention bit if there is a waiter, this means the mutex 1331 * is still being referenced by userland code, otherwise don't update 1332 * any memory. 1333 */ 1334 if (count > 1) { 1335 error = fueword32(&m->m_owner, &owner); 1336 if (error == -1) 1337 error = EFAULT; 1338 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1339 error = casueword32(&m->m_owner, owner, &old, 1340 owner | UMUTEX_CONTESTED); 1341 if (error == -1) { 1342 error = EFAULT; 1343 break; 1344 } 1345 if (old == owner) 1346 break; 1347 owner = old; 1348 error = umtxq_check_susp(td); 1349 if (error != 0) 1350 break; 1351 } 1352 } else if (count == 1) { 1353 error = fueword32(&m->m_owner, &owner); 1354 if (error == -1) 1355 error = EFAULT; 1356 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1357 (owner & UMUTEX_CONTESTED) == 0) { 1358 error = casueword32(&m->m_owner, owner, &old, 1359 owner | UMUTEX_CONTESTED); 1360 if (error == -1) { 1361 error = EFAULT; 1362 break; 1363 } 1364 if (old == owner) 1365 break; 1366 owner = old; 1367 error = umtxq_check_susp(td); 1368 if (error != 0) 1369 break; 1370 } 1371 } 1372 umtxq_lock(&key); 1373 if (error == EFAULT) { 1374 umtxq_signal(&key, INT_MAX); 1375 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1376 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1377 umtxq_signal(&key, 1); 1378 umtxq_unbusy(&key); 1379 umtxq_unlock(&key); 1380 umtx_key_release(&key); 1381 return (error); 1382 } 1383 1384 static inline struct umtx_pi * 1385 umtx_pi_alloc(int flags) 1386 { 1387 struct umtx_pi *pi; 1388 1389 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1390 TAILQ_INIT(&pi->pi_blocked); 1391 atomic_add_int(&umtx_pi_allocated, 1); 1392 return (pi); 1393 } 1394 1395 static inline void 1396 umtx_pi_free(struct umtx_pi *pi) 1397 { 1398 uma_zfree(umtx_pi_zone, pi); 1399 atomic_add_int(&umtx_pi_allocated, -1); 1400 } 1401 1402 /* 1403 * Adjust the thread's position on a pi_state after its priority has been 1404 * changed. 1405 */ 1406 static int 1407 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1408 { 1409 struct umtx_q *uq, *uq1, *uq2; 1410 struct thread *td1; 1411 1412 mtx_assert(&umtx_lock, MA_OWNED); 1413 if (pi == NULL) 1414 return (0); 1415 1416 uq = td->td_umtxq; 1417 1418 /* 1419 * Check if the thread needs to be moved on the blocked chain. 1420 * It needs to be moved if either its priority is lower than 1421 * the previous thread or higher than the next thread. 1422 */ 1423 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1424 uq2 = TAILQ_NEXT(uq, uq_lockq); 1425 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1426 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1427 /* 1428 * Remove thread from blocked chain and determine where 1429 * it should be moved to. 1430 */ 1431 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1432 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1433 td1 = uq1->uq_thread; 1434 MPASS(td1->td_proc->p_magic == P_MAGIC); 1435 if (UPRI(td1) > UPRI(td)) 1436 break; 1437 } 1438 1439 if (uq1 == NULL) 1440 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1441 else 1442 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1443 } 1444 return (1); 1445 } 1446 1447 static struct umtx_pi * 1448 umtx_pi_next(struct umtx_pi *pi) 1449 { 1450 struct umtx_q *uq_owner; 1451 1452 if (pi->pi_owner == NULL) 1453 return (NULL); 1454 uq_owner = pi->pi_owner->td_umtxq; 1455 if (uq_owner == NULL) 1456 return (NULL); 1457 return (uq_owner->uq_pi_blocked); 1458 } 1459 1460 /* 1461 * Floyd's Cycle-Finding Algorithm. 1462 */ 1463 static bool 1464 umtx_pi_check_loop(struct umtx_pi *pi) 1465 { 1466 struct umtx_pi *pi1; /* fast iterator */ 1467 1468 mtx_assert(&umtx_lock, MA_OWNED); 1469 if (pi == NULL) 1470 return (false); 1471 pi1 = pi; 1472 for (;;) { 1473 pi = umtx_pi_next(pi); 1474 if (pi == NULL) 1475 break; 1476 pi1 = umtx_pi_next(pi1); 1477 if (pi1 == NULL) 1478 break; 1479 pi1 = umtx_pi_next(pi1); 1480 if (pi1 == NULL) 1481 break; 1482 if (pi == pi1) 1483 return (true); 1484 } 1485 return (false); 1486 } 1487 1488 /* 1489 * Propagate priority when a thread is blocked on POSIX 1490 * PI mutex. 1491 */ 1492 static void 1493 umtx_propagate_priority(struct thread *td) 1494 { 1495 struct umtx_q *uq; 1496 struct umtx_pi *pi; 1497 int pri; 1498 1499 mtx_assert(&umtx_lock, MA_OWNED); 1500 pri = UPRI(td); 1501 uq = td->td_umtxq; 1502 pi = uq->uq_pi_blocked; 1503 if (pi == NULL) 1504 return; 1505 if (umtx_pi_check_loop(pi)) 1506 return; 1507 1508 for (;;) { 1509 td = pi->pi_owner; 1510 if (td == NULL || td == curthread) 1511 return; 1512 1513 MPASS(td->td_proc != NULL); 1514 MPASS(td->td_proc->p_magic == P_MAGIC); 1515 1516 thread_lock(td); 1517 if (td->td_lend_user_pri > pri) 1518 sched_lend_user_prio(td, pri); 1519 else { 1520 thread_unlock(td); 1521 break; 1522 } 1523 thread_unlock(td); 1524 1525 /* 1526 * Pick up the lock that td is blocked on. 1527 */ 1528 uq = td->td_umtxq; 1529 pi = uq->uq_pi_blocked; 1530 if (pi == NULL) 1531 break; 1532 /* Resort td on the list if needed. */ 1533 umtx_pi_adjust_thread(pi, td); 1534 } 1535 } 1536 1537 /* 1538 * Unpropagate priority for a PI mutex when a thread blocked on 1539 * it is interrupted by signal or resumed by others. 1540 */ 1541 static void 1542 umtx_repropagate_priority(struct umtx_pi *pi) 1543 { 1544 struct umtx_q *uq, *uq_owner; 1545 struct umtx_pi *pi2; 1546 int pri; 1547 1548 mtx_assert(&umtx_lock, MA_OWNED); 1549 1550 if (umtx_pi_check_loop(pi)) 1551 return; 1552 while (pi != NULL && pi->pi_owner != NULL) { 1553 pri = PRI_MAX; 1554 uq_owner = pi->pi_owner->td_umtxq; 1555 1556 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1557 uq = TAILQ_FIRST(&pi2->pi_blocked); 1558 if (uq != NULL) { 1559 if (pri > UPRI(uq->uq_thread)) 1560 pri = UPRI(uq->uq_thread); 1561 } 1562 } 1563 1564 if (pri > uq_owner->uq_inherited_pri) 1565 pri = uq_owner->uq_inherited_pri; 1566 thread_lock(pi->pi_owner); 1567 sched_lend_user_prio(pi->pi_owner, pri); 1568 thread_unlock(pi->pi_owner); 1569 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1570 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1571 } 1572 } 1573 1574 /* 1575 * Insert a PI mutex into owned list. 1576 */ 1577 static void 1578 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1579 { 1580 struct umtx_q *uq_owner; 1581 1582 uq_owner = owner->td_umtxq; 1583 mtx_assert(&umtx_lock, MA_OWNED); 1584 MPASS(pi->pi_owner == NULL); 1585 pi->pi_owner = owner; 1586 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1587 } 1588 1589 1590 /* 1591 * Disown a PI mutex, and remove it from the owned list. 1592 */ 1593 static void 1594 umtx_pi_disown(struct umtx_pi *pi) 1595 { 1596 1597 mtx_assert(&umtx_lock, MA_OWNED); 1598 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1599 pi->pi_owner = NULL; 1600 } 1601 1602 /* 1603 * Claim ownership of a PI mutex. 1604 */ 1605 static int 1606 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1607 { 1608 struct umtx_q *uq; 1609 int pri; 1610 1611 mtx_lock(&umtx_lock); 1612 if (pi->pi_owner == owner) { 1613 mtx_unlock(&umtx_lock); 1614 return (0); 1615 } 1616 1617 if (pi->pi_owner != NULL) { 1618 /* 1619 * userland may have already messed the mutex, sigh. 1620 */ 1621 mtx_unlock(&umtx_lock); 1622 return (EPERM); 1623 } 1624 umtx_pi_setowner(pi, owner); 1625 uq = TAILQ_FIRST(&pi->pi_blocked); 1626 if (uq != NULL) { 1627 pri = UPRI(uq->uq_thread); 1628 thread_lock(owner); 1629 if (pri < UPRI(owner)) 1630 sched_lend_user_prio(owner, pri); 1631 thread_unlock(owner); 1632 } 1633 mtx_unlock(&umtx_lock); 1634 return (0); 1635 } 1636 1637 /* 1638 * Adjust a thread's order position in its blocked PI mutex, 1639 * this may result new priority propagating process. 1640 */ 1641 void 1642 umtx_pi_adjust(struct thread *td, u_char oldpri) 1643 { 1644 struct umtx_q *uq; 1645 struct umtx_pi *pi; 1646 1647 uq = td->td_umtxq; 1648 mtx_lock(&umtx_lock); 1649 /* 1650 * Pick up the lock that td is blocked on. 1651 */ 1652 pi = uq->uq_pi_blocked; 1653 if (pi != NULL) { 1654 umtx_pi_adjust_thread(pi, td); 1655 umtx_repropagate_priority(pi); 1656 } 1657 mtx_unlock(&umtx_lock); 1658 } 1659 1660 /* 1661 * Sleep on a PI mutex. 1662 */ 1663 static int 1664 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1665 const char *wmesg, struct abs_timeout *timo, bool shared) 1666 { 1667 struct umtxq_chain *uc; 1668 struct thread *td, *td1; 1669 struct umtx_q *uq1; 1670 int error, pri; 1671 1672 error = 0; 1673 td = uq->uq_thread; 1674 KASSERT(td == curthread, ("inconsistent uq_thread")); 1675 uc = umtxq_getchain(&uq->uq_key); 1676 UMTXQ_LOCKED_ASSERT(uc); 1677 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1678 umtxq_insert(uq); 1679 mtx_lock(&umtx_lock); 1680 if (pi->pi_owner == NULL) { 1681 mtx_unlock(&umtx_lock); 1682 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1683 mtx_lock(&umtx_lock); 1684 if (td1 != NULL) { 1685 if (pi->pi_owner == NULL) 1686 umtx_pi_setowner(pi, td1); 1687 PROC_UNLOCK(td1->td_proc); 1688 } 1689 } 1690 1691 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1692 pri = UPRI(uq1->uq_thread); 1693 if (pri > UPRI(td)) 1694 break; 1695 } 1696 1697 if (uq1 != NULL) 1698 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1699 else 1700 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1701 1702 uq->uq_pi_blocked = pi; 1703 thread_lock(td); 1704 td->td_flags |= TDF_UPIBLOCKED; 1705 thread_unlock(td); 1706 umtx_propagate_priority(td); 1707 mtx_unlock(&umtx_lock); 1708 umtxq_unbusy(&uq->uq_key); 1709 1710 error = umtxq_sleep(uq, wmesg, timo); 1711 umtxq_remove(uq); 1712 1713 mtx_lock(&umtx_lock); 1714 uq->uq_pi_blocked = NULL; 1715 thread_lock(td); 1716 td->td_flags &= ~TDF_UPIBLOCKED; 1717 thread_unlock(td); 1718 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1719 umtx_repropagate_priority(pi); 1720 mtx_unlock(&umtx_lock); 1721 umtxq_unlock(&uq->uq_key); 1722 1723 return (error); 1724 } 1725 1726 /* 1727 * Add reference count for a PI mutex. 1728 */ 1729 static void 1730 umtx_pi_ref(struct umtx_pi *pi) 1731 { 1732 struct umtxq_chain *uc; 1733 1734 uc = umtxq_getchain(&pi->pi_key); 1735 UMTXQ_LOCKED_ASSERT(uc); 1736 pi->pi_refcount++; 1737 } 1738 1739 /* 1740 * Decrease reference count for a PI mutex, if the counter 1741 * is decreased to zero, its memory space is freed. 1742 */ 1743 static void 1744 umtx_pi_unref(struct umtx_pi *pi) 1745 { 1746 struct umtxq_chain *uc; 1747 1748 uc = umtxq_getchain(&pi->pi_key); 1749 UMTXQ_LOCKED_ASSERT(uc); 1750 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1751 if (--pi->pi_refcount == 0) { 1752 mtx_lock(&umtx_lock); 1753 if (pi->pi_owner != NULL) 1754 umtx_pi_disown(pi); 1755 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1756 ("blocked queue not empty")); 1757 mtx_unlock(&umtx_lock); 1758 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1759 umtx_pi_free(pi); 1760 } 1761 } 1762 1763 /* 1764 * Find a PI mutex in hash table. 1765 */ 1766 static struct umtx_pi * 1767 umtx_pi_lookup(struct umtx_key *key) 1768 { 1769 struct umtxq_chain *uc; 1770 struct umtx_pi *pi; 1771 1772 uc = umtxq_getchain(key); 1773 UMTXQ_LOCKED_ASSERT(uc); 1774 1775 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1776 if (umtx_key_match(&pi->pi_key, key)) { 1777 return (pi); 1778 } 1779 } 1780 return (NULL); 1781 } 1782 1783 /* 1784 * Insert a PI mutex into hash table. 1785 */ 1786 static inline void 1787 umtx_pi_insert(struct umtx_pi *pi) 1788 { 1789 struct umtxq_chain *uc; 1790 1791 uc = umtxq_getchain(&pi->pi_key); 1792 UMTXQ_LOCKED_ASSERT(uc); 1793 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1794 } 1795 1796 /* 1797 * Lock a PI mutex. 1798 */ 1799 static int 1800 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1801 struct _umtx_time *timeout, int try) 1802 { 1803 struct abs_timeout timo; 1804 struct umtx_q *uq; 1805 struct umtx_pi *pi, *new_pi; 1806 uint32_t id, old_owner, owner, old; 1807 int error, rv; 1808 1809 id = td->td_tid; 1810 uq = td->td_umtxq; 1811 1812 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1813 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1814 &uq->uq_key)) != 0) 1815 return (error); 1816 1817 if (timeout != NULL) 1818 abs_timeout_init2(&timo, timeout); 1819 1820 umtxq_lock(&uq->uq_key); 1821 pi = umtx_pi_lookup(&uq->uq_key); 1822 if (pi == NULL) { 1823 new_pi = umtx_pi_alloc(M_NOWAIT); 1824 if (new_pi == NULL) { 1825 umtxq_unlock(&uq->uq_key); 1826 new_pi = umtx_pi_alloc(M_WAITOK); 1827 umtxq_lock(&uq->uq_key); 1828 pi = umtx_pi_lookup(&uq->uq_key); 1829 if (pi != NULL) { 1830 umtx_pi_free(new_pi); 1831 new_pi = NULL; 1832 } 1833 } 1834 if (new_pi != NULL) { 1835 new_pi->pi_key = uq->uq_key; 1836 umtx_pi_insert(new_pi); 1837 pi = new_pi; 1838 } 1839 } 1840 umtx_pi_ref(pi); 1841 umtxq_unlock(&uq->uq_key); 1842 1843 /* 1844 * Care must be exercised when dealing with umtx structure. It 1845 * can fault on any access. 1846 */ 1847 for (;;) { 1848 /* 1849 * Try the uncontested case. This should be done in userland. 1850 */ 1851 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1852 /* The address was invalid. */ 1853 if (rv == -1) { 1854 error = EFAULT; 1855 break; 1856 } 1857 1858 /* The acquire succeeded. */ 1859 if (owner == UMUTEX_UNOWNED) { 1860 error = 0; 1861 break; 1862 } 1863 1864 if (owner == UMUTEX_RB_NOTRECOV) { 1865 error = ENOTRECOVERABLE; 1866 break; 1867 } 1868 1869 /* If no one owns it but it is contested try to acquire it. */ 1870 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1871 old_owner = owner; 1872 rv = casueword32(&m->m_owner, owner, &owner, 1873 id | UMUTEX_CONTESTED); 1874 /* The address was invalid. */ 1875 if (rv == -1) { 1876 error = EFAULT; 1877 break; 1878 } 1879 1880 if (owner == old_owner) { 1881 umtxq_lock(&uq->uq_key); 1882 umtxq_busy(&uq->uq_key); 1883 error = umtx_pi_claim(pi, td); 1884 umtxq_unbusy(&uq->uq_key); 1885 umtxq_unlock(&uq->uq_key); 1886 if (error != 0) { 1887 /* 1888 * Since we're going to return an 1889 * error, restore the m_owner to its 1890 * previous, unowned state to avoid 1891 * compounding the problem. 1892 */ 1893 (void)casuword32(&m->m_owner, 1894 id | UMUTEX_CONTESTED, 1895 old_owner); 1896 } 1897 if (error == 0 && 1898 old_owner == UMUTEX_RB_OWNERDEAD) 1899 error = EOWNERDEAD; 1900 break; 1901 } 1902 1903 error = umtxq_check_susp(td); 1904 if (error != 0) 1905 break; 1906 1907 /* If this failed the lock has changed, restart. */ 1908 continue; 1909 } 1910 1911 if ((owner & ~UMUTEX_CONTESTED) == id) { 1912 error = EDEADLK; 1913 break; 1914 } 1915 1916 if (try != 0) { 1917 error = EBUSY; 1918 break; 1919 } 1920 1921 /* 1922 * If we caught a signal, we have retried and now 1923 * exit immediately. 1924 */ 1925 if (error != 0) 1926 break; 1927 1928 umtxq_lock(&uq->uq_key); 1929 umtxq_busy(&uq->uq_key); 1930 umtxq_unlock(&uq->uq_key); 1931 1932 /* 1933 * Set the contested bit so that a release in user space 1934 * knows to use the system call for unlock. If this fails 1935 * either some one else has acquired the lock or it has been 1936 * released. 1937 */ 1938 rv = casueword32(&m->m_owner, owner, &old, owner | 1939 UMUTEX_CONTESTED); 1940 1941 /* The address was invalid. */ 1942 if (rv == -1) { 1943 umtxq_unbusy_unlocked(&uq->uq_key); 1944 error = EFAULT; 1945 break; 1946 } 1947 1948 umtxq_lock(&uq->uq_key); 1949 /* 1950 * We set the contested bit, sleep. Otherwise the lock changed 1951 * and we need to retry or we lost a race to the thread 1952 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1953 * value for owner is impossible there. 1954 */ 1955 if (old == owner) { 1956 error = umtxq_sleep_pi(uq, pi, 1957 owner & ~UMUTEX_CONTESTED, 1958 "umtxpi", timeout == NULL ? NULL : &timo, 1959 (flags & USYNC_PROCESS_SHARED) != 0); 1960 if (error != 0) 1961 continue; 1962 } else { 1963 umtxq_unbusy(&uq->uq_key); 1964 umtxq_unlock(&uq->uq_key); 1965 } 1966 1967 error = umtxq_check_susp(td); 1968 if (error != 0) 1969 break; 1970 } 1971 1972 umtxq_lock(&uq->uq_key); 1973 umtx_pi_unref(pi); 1974 umtxq_unlock(&uq->uq_key); 1975 1976 umtx_key_release(&uq->uq_key); 1977 return (error); 1978 } 1979 1980 /* 1981 * Unlock a PI mutex. 1982 */ 1983 static int 1984 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1985 { 1986 struct umtx_key key; 1987 struct umtx_q *uq_first, *uq_first2, *uq_me; 1988 struct umtx_pi *pi, *pi2; 1989 uint32_t id, new_owner, old, owner; 1990 int count, error, pri; 1991 1992 id = td->td_tid; 1993 /* 1994 * Make sure we own this mtx. 1995 */ 1996 error = fueword32(&m->m_owner, &owner); 1997 if (error == -1) 1998 return (EFAULT); 1999 2000 if ((owner & ~UMUTEX_CONTESTED) != id) 2001 return (EPERM); 2002 2003 new_owner = umtx_unlock_val(flags, rb); 2004 2005 /* This should be done in userland */ 2006 if ((owner & UMUTEX_CONTESTED) == 0) { 2007 error = casueword32(&m->m_owner, owner, &old, new_owner); 2008 if (error == -1) 2009 return (EFAULT); 2010 if (old == owner) 2011 return (0); 2012 owner = old; 2013 } 2014 2015 /* We should only ever be in here for contested locks */ 2016 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2017 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2018 &key)) != 0) 2019 return (error); 2020 2021 umtxq_lock(&key); 2022 umtxq_busy(&key); 2023 count = umtxq_count_pi(&key, &uq_first); 2024 if (uq_first != NULL) { 2025 mtx_lock(&umtx_lock); 2026 pi = uq_first->uq_pi_blocked; 2027 KASSERT(pi != NULL, ("pi == NULL?")); 2028 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2029 mtx_unlock(&umtx_lock); 2030 umtxq_unbusy(&key); 2031 umtxq_unlock(&key); 2032 umtx_key_release(&key); 2033 /* userland messed the mutex */ 2034 return (EPERM); 2035 } 2036 uq_me = td->td_umtxq; 2037 if (pi->pi_owner == td) 2038 umtx_pi_disown(pi); 2039 /* get highest priority thread which is still sleeping. */ 2040 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2041 while (uq_first != NULL && 2042 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2043 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2044 } 2045 pri = PRI_MAX; 2046 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2047 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2048 if (uq_first2 != NULL) { 2049 if (pri > UPRI(uq_first2->uq_thread)) 2050 pri = UPRI(uq_first2->uq_thread); 2051 } 2052 } 2053 thread_lock(td); 2054 sched_lend_user_prio(td, pri); 2055 thread_unlock(td); 2056 mtx_unlock(&umtx_lock); 2057 if (uq_first) 2058 umtxq_signal_thread(uq_first); 2059 } else { 2060 pi = umtx_pi_lookup(&key); 2061 /* 2062 * A umtx_pi can exist if a signal or timeout removed the 2063 * last waiter from the umtxq, but there is still 2064 * a thread in do_lock_pi() holding the umtx_pi. 2065 */ 2066 if (pi != NULL) { 2067 /* 2068 * The umtx_pi can be unowned, such as when a thread 2069 * has just entered do_lock_pi(), allocated the 2070 * umtx_pi, and unlocked the umtxq. 2071 * If the current thread owns it, it must disown it. 2072 */ 2073 mtx_lock(&umtx_lock); 2074 if (pi->pi_owner == td) 2075 umtx_pi_disown(pi); 2076 mtx_unlock(&umtx_lock); 2077 } 2078 } 2079 umtxq_unlock(&key); 2080 2081 /* 2082 * When unlocking the umtx, it must be marked as unowned if 2083 * there is zero or one thread only waiting for it. 2084 * Otherwise, it must be marked as contested. 2085 */ 2086 2087 if (count > 1) 2088 new_owner |= UMUTEX_CONTESTED; 2089 error = casueword32(&m->m_owner, owner, &old, new_owner); 2090 2091 umtxq_unbusy_unlocked(&key); 2092 umtx_key_release(&key); 2093 if (error == -1) 2094 return (EFAULT); 2095 if (old != owner) 2096 return (EINVAL); 2097 return (0); 2098 } 2099 2100 /* 2101 * Lock a PP mutex. 2102 */ 2103 static int 2104 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2105 struct _umtx_time *timeout, int try) 2106 { 2107 struct abs_timeout timo; 2108 struct umtx_q *uq, *uq2; 2109 struct umtx_pi *pi; 2110 uint32_t ceiling; 2111 uint32_t owner, id; 2112 int error, pri, old_inherited_pri, su, rv; 2113 2114 id = td->td_tid; 2115 uq = td->td_umtxq; 2116 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2117 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2118 &uq->uq_key)) != 0) 2119 return (error); 2120 2121 if (timeout != NULL) 2122 abs_timeout_init2(&timo, timeout); 2123 2124 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2125 for (;;) { 2126 old_inherited_pri = uq->uq_inherited_pri; 2127 umtxq_lock(&uq->uq_key); 2128 umtxq_busy(&uq->uq_key); 2129 umtxq_unlock(&uq->uq_key); 2130 2131 rv = fueword32(&m->m_ceilings[0], &ceiling); 2132 if (rv == -1) { 2133 error = EFAULT; 2134 goto out; 2135 } 2136 ceiling = RTP_PRIO_MAX - ceiling; 2137 if (ceiling > RTP_PRIO_MAX) { 2138 error = EINVAL; 2139 goto out; 2140 } 2141 2142 mtx_lock(&umtx_lock); 2143 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2144 mtx_unlock(&umtx_lock); 2145 error = EINVAL; 2146 goto out; 2147 } 2148 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2149 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2150 thread_lock(td); 2151 if (uq->uq_inherited_pri < UPRI(td)) 2152 sched_lend_user_prio(td, uq->uq_inherited_pri); 2153 thread_unlock(td); 2154 } 2155 mtx_unlock(&umtx_lock); 2156 2157 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2158 id | UMUTEX_CONTESTED); 2159 /* The address was invalid. */ 2160 if (rv == -1) { 2161 error = EFAULT; 2162 break; 2163 } 2164 2165 if (owner == UMUTEX_CONTESTED) { 2166 error = 0; 2167 break; 2168 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2169 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2170 &owner, id | UMUTEX_CONTESTED); 2171 if (rv == -1) { 2172 error = EFAULT; 2173 break; 2174 } 2175 if (owner == UMUTEX_RB_OWNERDEAD) { 2176 error = EOWNERDEAD; /* success */ 2177 break; 2178 } 2179 error = 0; 2180 } else if (owner == UMUTEX_RB_NOTRECOV) { 2181 error = ENOTRECOVERABLE; 2182 break; 2183 } 2184 2185 if (try != 0) { 2186 error = EBUSY; 2187 break; 2188 } 2189 2190 /* 2191 * If we caught a signal, we have retried and now 2192 * exit immediately. 2193 */ 2194 if (error != 0) 2195 break; 2196 2197 umtxq_lock(&uq->uq_key); 2198 umtxq_insert(uq); 2199 umtxq_unbusy(&uq->uq_key); 2200 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2201 NULL : &timo); 2202 umtxq_remove(uq); 2203 umtxq_unlock(&uq->uq_key); 2204 2205 mtx_lock(&umtx_lock); 2206 uq->uq_inherited_pri = old_inherited_pri; 2207 pri = PRI_MAX; 2208 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2209 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2210 if (uq2 != NULL) { 2211 if (pri > UPRI(uq2->uq_thread)) 2212 pri = UPRI(uq2->uq_thread); 2213 } 2214 } 2215 if (pri > uq->uq_inherited_pri) 2216 pri = uq->uq_inherited_pri; 2217 thread_lock(td); 2218 sched_lend_user_prio(td, pri); 2219 thread_unlock(td); 2220 mtx_unlock(&umtx_lock); 2221 } 2222 2223 if (error != 0 && error != EOWNERDEAD) { 2224 mtx_lock(&umtx_lock); 2225 uq->uq_inherited_pri = old_inherited_pri; 2226 pri = PRI_MAX; 2227 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2228 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2229 if (uq2 != NULL) { 2230 if (pri > UPRI(uq2->uq_thread)) 2231 pri = UPRI(uq2->uq_thread); 2232 } 2233 } 2234 if (pri > uq->uq_inherited_pri) 2235 pri = uq->uq_inherited_pri; 2236 thread_lock(td); 2237 sched_lend_user_prio(td, pri); 2238 thread_unlock(td); 2239 mtx_unlock(&umtx_lock); 2240 } 2241 2242 out: 2243 umtxq_unbusy_unlocked(&uq->uq_key); 2244 umtx_key_release(&uq->uq_key); 2245 return (error); 2246 } 2247 2248 /* 2249 * Unlock a PP mutex. 2250 */ 2251 static int 2252 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2253 { 2254 struct umtx_key key; 2255 struct umtx_q *uq, *uq2; 2256 struct umtx_pi *pi; 2257 uint32_t id, owner, rceiling; 2258 int error, pri, new_inherited_pri, su; 2259 2260 id = td->td_tid; 2261 uq = td->td_umtxq; 2262 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2263 2264 /* 2265 * Make sure we own this mtx. 2266 */ 2267 error = fueword32(&m->m_owner, &owner); 2268 if (error == -1) 2269 return (EFAULT); 2270 2271 if ((owner & ~UMUTEX_CONTESTED) != id) 2272 return (EPERM); 2273 2274 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2275 if (error != 0) 2276 return (error); 2277 2278 if (rceiling == -1) 2279 new_inherited_pri = PRI_MAX; 2280 else { 2281 rceiling = RTP_PRIO_MAX - rceiling; 2282 if (rceiling > RTP_PRIO_MAX) 2283 return (EINVAL); 2284 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2285 } 2286 2287 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2288 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2289 &key)) != 0) 2290 return (error); 2291 umtxq_lock(&key); 2292 umtxq_busy(&key); 2293 umtxq_unlock(&key); 2294 /* 2295 * For priority protected mutex, always set unlocked state 2296 * to UMUTEX_CONTESTED, so that userland always enters kernel 2297 * to lock the mutex, it is necessary because thread priority 2298 * has to be adjusted for such mutex. 2299 */ 2300 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2301 UMUTEX_CONTESTED); 2302 2303 umtxq_lock(&key); 2304 if (error == 0) 2305 umtxq_signal(&key, 1); 2306 umtxq_unbusy(&key); 2307 umtxq_unlock(&key); 2308 2309 if (error == -1) 2310 error = EFAULT; 2311 else { 2312 mtx_lock(&umtx_lock); 2313 if (su != 0) 2314 uq->uq_inherited_pri = new_inherited_pri; 2315 pri = PRI_MAX; 2316 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2317 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2318 if (uq2 != NULL) { 2319 if (pri > UPRI(uq2->uq_thread)) 2320 pri = UPRI(uq2->uq_thread); 2321 } 2322 } 2323 if (pri > uq->uq_inherited_pri) 2324 pri = uq->uq_inherited_pri; 2325 thread_lock(td); 2326 sched_lend_user_prio(td, pri); 2327 thread_unlock(td); 2328 mtx_unlock(&umtx_lock); 2329 } 2330 umtx_key_release(&key); 2331 return (error); 2332 } 2333 2334 static int 2335 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2336 uint32_t *old_ceiling) 2337 { 2338 struct umtx_q *uq; 2339 uint32_t flags, id, owner, save_ceiling; 2340 int error, rv, rv1; 2341 2342 error = fueword32(&m->m_flags, &flags); 2343 if (error == -1) 2344 return (EFAULT); 2345 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2346 return (EINVAL); 2347 if (ceiling > RTP_PRIO_MAX) 2348 return (EINVAL); 2349 id = td->td_tid; 2350 uq = td->td_umtxq; 2351 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2352 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2353 &uq->uq_key)) != 0) 2354 return (error); 2355 for (;;) { 2356 umtxq_lock(&uq->uq_key); 2357 umtxq_busy(&uq->uq_key); 2358 umtxq_unlock(&uq->uq_key); 2359 2360 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2361 if (rv == -1) { 2362 error = EFAULT; 2363 break; 2364 } 2365 2366 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2367 id | UMUTEX_CONTESTED); 2368 if (rv == -1) { 2369 error = EFAULT; 2370 break; 2371 } 2372 2373 if (owner == UMUTEX_CONTESTED) { 2374 rv = suword32(&m->m_ceilings[0], ceiling); 2375 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2376 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2377 break; 2378 } 2379 2380 if ((owner & ~UMUTEX_CONTESTED) == id) { 2381 rv = suword32(&m->m_ceilings[0], ceiling); 2382 error = rv == 0 ? 0 : EFAULT; 2383 break; 2384 } 2385 2386 if (owner == UMUTEX_RB_OWNERDEAD) { 2387 error = EOWNERDEAD; 2388 break; 2389 } else if (owner == UMUTEX_RB_NOTRECOV) { 2390 error = ENOTRECOVERABLE; 2391 break; 2392 } 2393 2394 /* 2395 * If we caught a signal, we have retried and now 2396 * exit immediately. 2397 */ 2398 if (error != 0) 2399 break; 2400 2401 /* 2402 * We set the contested bit, sleep. Otherwise the lock changed 2403 * and we need to retry or we lost a race to the thread 2404 * unlocking the umtx. 2405 */ 2406 umtxq_lock(&uq->uq_key); 2407 umtxq_insert(uq); 2408 umtxq_unbusy(&uq->uq_key); 2409 error = umtxq_sleep(uq, "umtxpp", NULL); 2410 umtxq_remove(uq); 2411 umtxq_unlock(&uq->uq_key); 2412 } 2413 umtxq_lock(&uq->uq_key); 2414 if (error == 0) 2415 umtxq_signal(&uq->uq_key, INT_MAX); 2416 umtxq_unbusy(&uq->uq_key); 2417 umtxq_unlock(&uq->uq_key); 2418 umtx_key_release(&uq->uq_key); 2419 if (error == 0 && old_ceiling != NULL) { 2420 rv = suword32(old_ceiling, save_ceiling); 2421 error = rv == 0 ? 0 : EFAULT; 2422 } 2423 return (error); 2424 } 2425 2426 /* 2427 * Lock a userland POSIX mutex. 2428 */ 2429 static int 2430 do_lock_umutex(struct thread *td, struct umutex *m, 2431 struct _umtx_time *timeout, int mode) 2432 { 2433 uint32_t flags; 2434 int error; 2435 2436 error = fueword32(&m->m_flags, &flags); 2437 if (error == -1) 2438 return (EFAULT); 2439 2440 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2441 case 0: 2442 error = do_lock_normal(td, m, flags, timeout, mode); 2443 break; 2444 case UMUTEX_PRIO_INHERIT: 2445 error = do_lock_pi(td, m, flags, timeout, mode); 2446 break; 2447 case UMUTEX_PRIO_PROTECT: 2448 error = do_lock_pp(td, m, flags, timeout, mode); 2449 break; 2450 default: 2451 return (EINVAL); 2452 } 2453 if (timeout == NULL) { 2454 if (error == EINTR && mode != _UMUTEX_WAIT) 2455 error = ERESTART; 2456 } else { 2457 /* Timed-locking is not restarted. */ 2458 if (error == ERESTART) 2459 error = EINTR; 2460 } 2461 return (error); 2462 } 2463 2464 /* 2465 * Unlock a userland POSIX mutex. 2466 */ 2467 static int 2468 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2469 { 2470 uint32_t flags; 2471 int error; 2472 2473 error = fueword32(&m->m_flags, &flags); 2474 if (error == -1) 2475 return (EFAULT); 2476 2477 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2478 case 0: 2479 return (do_unlock_normal(td, m, flags, rb)); 2480 case UMUTEX_PRIO_INHERIT: 2481 return (do_unlock_pi(td, m, flags, rb)); 2482 case UMUTEX_PRIO_PROTECT: 2483 return (do_unlock_pp(td, m, flags, rb)); 2484 } 2485 2486 return (EINVAL); 2487 } 2488 2489 static int 2490 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2491 struct timespec *timeout, u_long wflags) 2492 { 2493 struct abs_timeout timo; 2494 struct umtx_q *uq; 2495 uint32_t flags, clockid, hasw; 2496 int error; 2497 2498 uq = td->td_umtxq; 2499 error = fueword32(&cv->c_flags, &flags); 2500 if (error == -1) 2501 return (EFAULT); 2502 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2503 if (error != 0) 2504 return (error); 2505 2506 if ((wflags & CVWAIT_CLOCKID) != 0) { 2507 error = fueword32(&cv->c_clockid, &clockid); 2508 if (error == -1) { 2509 umtx_key_release(&uq->uq_key); 2510 return (EFAULT); 2511 } 2512 if (clockid < CLOCK_REALTIME || 2513 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2514 /* hmm, only HW clock id will work. */ 2515 umtx_key_release(&uq->uq_key); 2516 return (EINVAL); 2517 } 2518 } else { 2519 clockid = CLOCK_REALTIME; 2520 } 2521 2522 umtxq_lock(&uq->uq_key); 2523 umtxq_busy(&uq->uq_key); 2524 umtxq_insert(uq); 2525 umtxq_unlock(&uq->uq_key); 2526 2527 /* 2528 * Set c_has_waiters to 1 before releasing user mutex, also 2529 * don't modify cache line when unnecessary. 2530 */ 2531 error = fueword32(&cv->c_has_waiters, &hasw); 2532 if (error == 0 && hasw == 0) 2533 suword32(&cv->c_has_waiters, 1); 2534 2535 umtxq_unbusy_unlocked(&uq->uq_key); 2536 2537 error = do_unlock_umutex(td, m, false); 2538 2539 if (timeout != NULL) 2540 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2541 timeout); 2542 2543 umtxq_lock(&uq->uq_key); 2544 if (error == 0) { 2545 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2546 NULL : &timo); 2547 } 2548 2549 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2550 error = 0; 2551 else { 2552 /* 2553 * This must be timeout,interrupted by signal or 2554 * surprious wakeup, clear c_has_waiter flag when 2555 * necessary. 2556 */ 2557 umtxq_busy(&uq->uq_key); 2558 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2559 int oldlen = uq->uq_cur_queue->length; 2560 umtxq_remove(uq); 2561 if (oldlen == 1) { 2562 umtxq_unlock(&uq->uq_key); 2563 suword32(&cv->c_has_waiters, 0); 2564 umtxq_lock(&uq->uq_key); 2565 } 2566 } 2567 umtxq_unbusy(&uq->uq_key); 2568 if (error == ERESTART) 2569 error = EINTR; 2570 } 2571 2572 umtxq_unlock(&uq->uq_key); 2573 umtx_key_release(&uq->uq_key); 2574 return (error); 2575 } 2576 2577 /* 2578 * Signal a userland condition variable. 2579 */ 2580 static int 2581 do_cv_signal(struct thread *td, struct ucond *cv) 2582 { 2583 struct umtx_key key; 2584 int error, cnt, nwake; 2585 uint32_t flags; 2586 2587 error = fueword32(&cv->c_flags, &flags); 2588 if (error == -1) 2589 return (EFAULT); 2590 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2591 return (error); 2592 umtxq_lock(&key); 2593 umtxq_busy(&key); 2594 cnt = umtxq_count(&key); 2595 nwake = umtxq_signal(&key, 1); 2596 if (cnt <= nwake) { 2597 umtxq_unlock(&key); 2598 error = suword32(&cv->c_has_waiters, 0); 2599 if (error == -1) 2600 error = EFAULT; 2601 umtxq_lock(&key); 2602 } 2603 umtxq_unbusy(&key); 2604 umtxq_unlock(&key); 2605 umtx_key_release(&key); 2606 return (error); 2607 } 2608 2609 static int 2610 do_cv_broadcast(struct thread *td, struct ucond *cv) 2611 { 2612 struct umtx_key key; 2613 int error; 2614 uint32_t flags; 2615 2616 error = fueword32(&cv->c_flags, &flags); 2617 if (error == -1) 2618 return (EFAULT); 2619 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2620 return (error); 2621 2622 umtxq_lock(&key); 2623 umtxq_busy(&key); 2624 umtxq_signal(&key, INT_MAX); 2625 umtxq_unlock(&key); 2626 2627 error = suword32(&cv->c_has_waiters, 0); 2628 if (error == -1) 2629 error = EFAULT; 2630 2631 umtxq_unbusy_unlocked(&key); 2632 2633 umtx_key_release(&key); 2634 return (error); 2635 } 2636 2637 static int 2638 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2639 { 2640 struct abs_timeout timo; 2641 struct umtx_q *uq; 2642 uint32_t flags, wrflags; 2643 int32_t state, oldstate; 2644 int32_t blocked_readers; 2645 int error, error1, rv; 2646 2647 uq = td->td_umtxq; 2648 error = fueword32(&rwlock->rw_flags, &flags); 2649 if (error == -1) 2650 return (EFAULT); 2651 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2652 if (error != 0) 2653 return (error); 2654 2655 if (timeout != NULL) 2656 abs_timeout_init2(&timo, timeout); 2657 2658 wrflags = URWLOCK_WRITE_OWNER; 2659 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2660 wrflags |= URWLOCK_WRITE_WAITERS; 2661 2662 for (;;) { 2663 rv = fueword32(&rwlock->rw_state, &state); 2664 if (rv == -1) { 2665 umtx_key_release(&uq->uq_key); 2666 return (EFAULT); 2667 } 2668 2669 /* try to lock it */ 2670 while (!(state & wrflags)) { 2671 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2672 umtx_key_release(&uq->uq_key); 2673 return (EAGAIN); 2674 } 2675 rv = casueword32(&rwlock->rw_state, state, 2676 &oldstate, state + 1); 2677 if (rv == -1) { 2678 umtx_key_release(&uq->uq_key); 2679 return (EFAULT); 2680 } 2681 if (oldstate == state) { 2682 umtx_key_release(&uq->uq_key); 2683 return (0); 2684 } 2685 error = umtxq_check_susp(td); 2686 if (error != 0) 2687 break; 2688 state = oldstate; 2689 } 2690 2691 if (error) 2692 break; 2693 2694 /* grab monitor lock */ 2695 umtxq_lock(&uq->uq_key); 2696 umtxq_busy(&uq->uq_key); 2697 umtxq_unlock(&uq->uq_key); 2698 2699 /* 2700 * re-read the state, in case it changed between the try-lock above 2701 * and the check below 2702 */ 2703 rv = fueword32(&rwlock->rw_state, &state); 2704 if (rv == -1) 2705 error = EFAULT; 2706 2707 /* set read contention bit */ 2708 while (error == 0 && (state & wrflags) && 2709 !(state & URWLOCK_READ_WAITERS)) { 2710 rv = casueword32(&rwlock->rw_state, state, 2711 &oldstate, state | URWLOCK_READ_WAITERS); 2712 if (rv == -1) { 2713 error = EFAULT; 2714 break; 2715 } 2716 if (oldstate == state) 2717 goto sleep; 2718 state = oldstate; 2719 error = umtxq_check_susp(td); 2720 if (error != 0) 2721 break; 2722 } 2723 if (error != 0) { 2724 umtxq_unbusy_unlocked(&uq->uq_key); 2725 break; 2726 } 2727 2728 /* state is changed while setting flags, restart */ 2729 if (!(state & wrflags)) { 2730 umtxq_unbusy_unlocked(&uq->uq_key); 2731 error = umtxq_check_susp(td); 2732 if (error != 0) 2733 break; 2734 continue; 2735 } 2736 2737 sleep: 2738 /* contention bit is set, before sleeping, increase read waiter count */ 2739 rv = fueword32(&rwlock->rw_blocked_readers, 2740 &blocked_readers); 2741 if (rv == -1) { 2742 umtxq_unbusy_unlocked(&uq->uq_key); 2743 error = EFAULT; 2744 break; 2745 } 2746 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2747 2748 while (state & wrflags) { 2749 umtxq_lock(&uq->uq_key); 2750 umtxq_insert(uq); 2751 umtxq_unbusy(&uq->uq_key); 2752 2753 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2754 NULL : &timo); 2755 2756 umtxq_busy(&uq->uq_key); 2757 umtxq_remove(uq); 2758 umtxq_unlock(&uq->uq_key); 2759 if (error) 2760 break; 2761 rv = fueword32(&rwlock->rw_state, &state); 2762 if (rv == -1) { 2763 error = EFAULT; 2764 break; 2765 } 2766 } 2767 2768 /* decrease read waiter count, and may clear read contention bit */ 2769 rv = fueword32(&rwlock->rw_blocked_readers, 2770 &blocked_readers); 2771 if (rv == -1) { 2772 umtxq_unbusy_unlocked(&uq->uq_key); 2773 error = EFAULT; 2774 break; 2775 } 2776 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2777 if (blocked_readers == 1) { 2778 rv = fueword32(&rwlock->rw_state, &state); 2779 if (rv == -1) { 2780 umtxq_unbusy_unlocked(&uq->uq_key); 2781 error = EFAULT; 2782 break; 2783 } 2784 for (;;) { 2785 rv = casueword32(&rwlock->rw_state, state, 2786 &oldstate, state & ~URWLOCK_READ_WAITERS); 2787 if (rv == -1) { 2788 error = EFAULT; 2789 break; 2790 } 2791 if (oldstate == state) 2792 break; 2793 state = oldstate; 2794 error1 = umtxq_check_susp(td); 2795 if (error1 != 0) { 2796 if (error == 0) 2797 error = error1; 2798 break; 2799 } 2800 } 2801 } 2802 2803 umtxq_unbusy_unlocked(&uq->uq_key); 2804 if (error != 0) 2805 break; 2806 } 2807 umtx_key_release(&uq->uq_key); 2808 if (error == ERESTART) 2809 error = EINTR; 2810 return (error); 2811 } 2812 2813 static int 2814 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2815 { 2816 struct abs_timeout timo; 2817 struct umtx_q *uq; 2818 uint32_t flags; 2819 int32_t state, oldstate; 2820 int32_t blocked_writers; 2821 int32_t blocked_readers; 2822 int error, error1, rv; 2823 2824 uq = td->td_umtxq; 2825 error = fueword32(&rwlock->rw_flags, &flags); 2826 if (error == -1) 2827 return (EFAULT); 2828 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2829 if (error != 0) 2830 return (error); 2831 2832 if (timeout != NULL) 2833 abs_timeout_init2(&timo, timeout); 2834 2835 blocked_readers = 0; 2836 for (;;) { 2837 rv = fueword32(&rwlock->rw_state, &state); 2838 if (rv == -1) { 2839 umtx_key_release(&uq->uq_key); 2840 return (EFAULT); 2841 } 2842 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2843 rv = casueword32(&rwlock->rw_state, state, 2844 &oldstate, state | URWLOCK_WRITE_OWNER); 2845 if (rv == -1) { 2846 umtx_key_release(&uq->uq_key); 2847 return (EFAULT); 2848 } 2849 if (oldstate == state) { 2850 umtx_key_release(&uq->uq_key); 2851 return (0); 2852 } 2853 state = oldstate; 2854 error = umtxq_check_susp(td); 2855 if (error != 0) 2856 break; 2857 } 2858 2859 if (error) { 2860 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2861 blocked_readers != 0) { 2862 umtxq_lock(&uq->uq_key); 2863 umtxq_busy(&uq->uq_key); 2864 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2865 umtxq_unbusy(&uq->uq_key); 2866 umtxq_unlock(&uq->uq_key); 2867 } 2868 2869 break; 2870 } 2871 2872 /* grab monitor lock */ 2873 umtxq_lock(&uq->uq_key); 2874 umtxq_busy(&uq->uq_key); 2875 umtxq_unlock(&uq->uq_key); 2876 2877 /* 2878 * re-read the state, in case it changed between the try-lock above 2879 * and the check below 2880 */ 2881 rv = fueword32(&rwlock->rw_state, &state); 2882 if (rv == -1) 2883 error = EFAULT; 2884 2885 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2886 URWLOCK_READER_COUNT(state) != 0) && 2887 (state & URWLOCK_WRITE_WAITERS) == 0) { 2888 rv = casueword32(&rwlock->rw_state, state, 2889 &oldstate, state | URWLOCK_WRITE_WAITERS); 2890 if (rv == -1) { 2891 error = EFAULT; 2892 break; 2893 } 2894 if (oldstate == state) 2895 goto sleep; 2896 state = oldstate; 2897 error = umtxq_check_susp(td); 2898 if (error != 0) 2899 break; 2900 } 2901 if (error != 0) { 2902 umtxq_unbusy_unlocked(&uq->uq_key); 2903 break; 2904 } 2905 2906 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2907 umtxq_unbusy_unlocked(&uq->uq_key); 2908 error = umtxq_check_susp(td); 2909 if (error != 0) 2910 break; 2911 continue; 2912 } 2913 sleep: 2914 rv = fueword32(&rwlock->rw_blocked_writers, 2915 &blocked_writers); 2916 if (rv == -1) { 2917 umtxq_unbusy_unlocked(&uq->uq_key); 2918 error = EFAULT; 2919 break; 2920 } 2921 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2922 2923 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2924 umtxq_lock(&uq->uq_key); 2925 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2926 umtxq_unbusy(&uq->uq_key); 2927 2928 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2929 NULL : &timo); 2930 2931 umtxq_busy(&uq->uq_key); 2932 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2933 umtxq_unlock(&uq->uq_key); 2934 if (error) 2935 break; 2936 rv = fueword32(&rwlock->rw_state, &state); 2937 if (rv == -1) { 2938 error = EFAULT; 2939 break; 2940 } 2941 } 2942 2943 rv = fueword32(&rwlock->rw_blocked_writers, 2944 &blocked_writers); 2945 if (rv == -1) { 2946 umtxq_unbusy_unlocked(&uq->uq_key); 2947 error = EFAULT; 2948 break; 2949 } 2950 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2951 if (blocked_writers == 1) { 2952 rv = fueword32(&rwlock->rw_state, &state); 2953 if (rv == -1) { 2954 umtxq_unbusy_unlocked(&uq->uq_key); 2955 error = EFAULT; 2956 break; 2957 } 2958 for (;;) { 2959 rv = casueword32(&rwlock->rw_state, state, 2960 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2961 if (rv == -1) { 2962 error = EFAULT; 2963 break; 2964 } 2965 if (oldstate == state) 2966 break; 2967 state = oldstate; 2968 error1 = umtxq_check_susp(td); 2969 /* 2970 * We are leaving the URWLOCK_WRITE_WAITERS 2971 * behind, but this should not harm the 2972 * correctness. 2973 */ 2974 if (error1 != 0) { 2975 if (error == 0) 2976 error = error1; 2977 break; 2978 } 2979 } 2980 rv = fueword32(&rwlock->rw_blocked_readers, 2981 &blocked_readers); 2982 if (rv == -1) { 2983 umtxq_unbusy_unlocked(&uq->uq_key); 2984 error = EFAULT; 2985 break; 2986 } 2987 } else 2988 blocked_readers = 0; 2989 2990 umtxq_unbusy_unlocked(&uq->uq_key); 2991 } 2992 2993 umtx_key_release(&uq->uq_key); 2994 if (error == ERESTART) 2995 error = EINTR; 2996 return (error); 2997 } 2998 2999 static int 3000 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3001 { 3002 struct umtx_q *uq; 3003 uint32_t flags; 3004 int32_t state, oldstate; 3005 int error, rv, q, count; 3006 3007 uq = td->td_umtxq; 3008 error = fueword32(&rwlock->rw_flags, &flags); 3009 if (error == -1) 3010 return (EFAULT); 3011 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3012 if (error != 0) 3013 return (error); 3014 3015 error = fueword32(&rwlock->rw_state, &state); 3016 if (error == -1) { 3017 error = EFAULT; 3018 goto out; 3019 } 3020 if (state & URWLOCK_WRITE_OWNER) { 3021 for (;;) { 3022 rv = casueword32(&rwlock->rw_state, state, 3023 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3024 if (rv == -1) { 3025 error = EFAULT; 3026 goto out; 3027 } 3028 if (oldstate != state) { 3029 state = oldstate; 3030 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3031 error = EPERM; 3032 goto out; 3033 } 3034 error = umtxq_check_susp(td); 3035 if (error != 0) 3036 goto out; 3037 } else 3038 break; 3039 } 3040 } else if (URWLOCK_READER_COUNT(state) != 0) { 3041 for (;;) { 3042 rv = casueword32(&rwlock->rw_state, state, 3043 &oldstate, state - 1); 3044 if (rv == -1) { 3045 error = EFAULT; 3046 goto out; 3047 } 3048 if (oldstate != state) { 3049 state = oldstate; 3050 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3051 error = EPERM; 3052 goto out; 3053 } 3054 error = umtxq_check_susp(td); 3055 if (error != 0) 3056 goto out; 3057 } else 3058 break; 3059 } 3060 } else { 3061 error = EPERM; 3062 goto out; 3063 } 3064 3065 count = 0; 3066 3067 if (!(flags & URWLOCK_PREFER_READER)) { 3068 if (state & URWLOCK_WRITE_WAITERS) { 3069 count = 1; 3070 q = UMTX_EXCLUSIVE_QUEUE; 3071 } else if (state & URWLOCK_READ_WAITERS) { 3072 count = INT_MAX; 3073 q = UMTX_SHARED_QUEUE; 3074 } 3075 } else { 3076 if (state & URWLOCK_READ_WAITERS) { 3077 count = INT_MAX; 3078 q = UMTX_SHARED_QUEUE; 3079 } else if (state & URWLOCK_WRITE_WAITERS) { 3080 count = 1; 3081 q = UMTX_EXCLUSIVE_QUEUE; 3082 } 3083 } 3084 3085 if (count) { 3086 umtxq_lock(&uq->uq_key); 3087 umtxq_busy(&uq->uq_key); 3088 umtxq_signal_queue(&uq->uq_key, count, q); 3089 umtxq_unbusy(&uq->uq_key); 3090 umtxq_unlock(&uq->uq_key); 3091 } 3092 out: 3093 umtx_key_release(&uq->uq_key); 3094 return (error); 3095 } 3096 3097 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3098 static int 3099 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3100 { 3101 struct abs_timeout timo; 3102 struct umtx_q *uq; 3103 uint32_t flags, count, count1; 3104 int error, rv; 3105 3106 uq = td->td_umtxq; 3107 error = fueword32(&sem->_flags, &flags); 3108 if (error == -1) 3109 return (EFAULT); 3110 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3111 if (error != 0) 3112 return (error); 3113 3114 if (timeout != NULL) 3115 abs_timeout_init2(&timo, timeout); 3116 3117 umtxq_lock(&uq->uq_key); 3118 umtxq_busy(&uq->uq_key); 3119 umtxq_insert(uq); 3120 umtxq_unlock(&uq->uq_key); 3121 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3122 if (rv == 0) 3123 rv = fueword32(&sem->_count, &count); 3124 if (rv == -1 || count != 0) { 3125 umtxq_lock(&uq->uq_key); 3126 umtxq_unbusy(&uq->uq_key); 3127 umtxq_remove(uq); 3128 umtxq_unlock(&uq->uq_key); 3129 umtx_key_release(&uq->uq_key); 3130 return (rv == -1 ? EFAULT : 0); 3131 } 3132 umtxq_lock(&uq->uq_key); 3133 umtxq_unbusy(&uq->uq_key); 3134 3135 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3136 3137 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3138 error = 0; 3139 else { 3140 umtxq_remove(uq); 3141 /* A relative timeout cannot be restarted. */ 3142 if (error == ERESTART && timeout != NULL && 3143 (timeout->_flags & UMTX_ABSTIME) == 0) 3144 error = EINTR; 3145 } 3146 umtxq_unlock(&uq->uq_key); 3147 umtx_key_release(&uq->uq_key); 3148 return (error); 3149 } 3150 3151 /* 3152 * Signal a userland semaphore. 3153 */ 3154 static int 3155 do_sem_wake(struct thread *td, struct _usem *sem) 3156 { 3157 struct umtx_key key; 3158 int error, cnt; 3159 uint32_t flags; 3160 3161 error = fueword32(&sem->_flags, &flags); 3162 if (error == -1) 3163 return (EFAULT); 3164 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3165 return (error); 3166 umtxq_lock(&key); 3167 umtxq_busy(&key); 3168 cnt = umtxq_count(&key); 3169 if (cnt > 0) { 3170 /* 3171 * Check if count is greater than 0, this means the memory is 3172 * still being referenced by user code, so we can safely 3173 * update _has_waiters flag. 3174 */ 3175 if (cnt == 1) { 3176 umtxq_unlock(&key); 3177 error = suword32(&sem->_has_waiters, 0); 3178 umtxq_lock(&key); 3179 if (error == -1) 3180 error = EFAULT; 3181 } 3182 umtxq_signal(&key, 1); 3183 } 3184 umtxq_unbusy(&key); 3185 umtxq_unlock(&key); 3186 umtx_key_release(&key); 3187 return (error); 3188 } 3189 #endif 3190 3191 static int 3192 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3193 { 3194 struct abs_timeout timo; 3195 struct umtx_q *uq; 3196 uint32_t count, flags; 3197 int error, rv; 3198 3199 uq = td->td_umtxq; 3200 flags = fuword32(&sem->_flags); 3201 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3202 if (error != 0) 3203 return (error); 3204 3205 if (timeout != NULL) 3206 abs_timeout_init2(&timo, timeout); 3207 3208 umtxq_lock(&uq->uq_key); 3209 umtxq_busy(&uq->uq_key); 3210 umtxq_insert(uq); 3211 umtxq_unlock(&uq->uq_key); 3212 rv = fueword32(&sem->_count, &count); 3213 if (rv == -1) { 3214 umtxq_lock(&uq->uq_key); 3215 umtxq_unbusy(&uq->uq_key); 3216 umtxq_remove(uq); 3217 umtxq_unlock(&uq->uq_key); 3218 umtx_key_release(&uq->uq_key); 3219 return (EFAULT); 3220 } 3221 for (;;) { 3222 if (USEM_COUNT(count) != 0) { 3223 umtxq_lock(&uq->uq_key); 3224 umtxq_unbusy(&uq->uq_key); 3225 umtxq_remove(uq); 3226 umtxq_unlock(&uq->uq_key); 3227 umtx_key_release(&uq->uq_key); 3228 return (0); 3229 } 3230 if (count == USEM_HAS_WAITERS) 3231 break; 3232 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3233 if (rv == -1) { 3234 umtxq_lock(&uq->uq_key); 3235 umtxq_unbusy(&uq->uq_key); 3236 umtxq_remove(uq); 3237 umtxq_unlock(&uq->uq_key); 3238 umtx_key_release(&uq->uq_key); 3239 return (EFAULT); 3240 } 3241 if (count == 0) 3242 break; 3243 } 3244 umtxq_lock(&uq->uq_key); 3245 umtxq_unbusy(&uq->uq_key); 3246 3247 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3248 3249 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3250 error = 0; 3251 else { 3252 umtxq_remove(uq); 3253 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3254 /* A relative timeout cannot be restarted. */ 3255 if (error == ERESTART) 3256 error = EINTR; 3257 if (error == EINTR) { 3258 abs_timeout_update(&timo); 3259 timeout->_timeout = timo.end; 3260 timespecsub(&timeout->_timeout, &timo.cur); 3261 } 3262 } 3263 } 3264 umtxq_unlock(&uq->uq_key); 3265 umtx_key_release(&uq->uq_key); 3266 return (error); 3267 } 3268 3269 /* 3270 * Signal a userland semaphore. 3271 */ 3272 static int 3273 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3274 { 3275 struct umtx_key key; 3276 int error, cnt, rv; 3277 uint32_t count, flags; 3278 3279 rv = fueword32(&sem->_flags, &flags); 3280 if (rv == -1) 3281 return (EFAULT); 3282 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3283 return (error); 3284 umtxq_lock(&key); 3285 umtxq_busy(&key); 3286 cnt = umtxq_count(&key); 3287 if (cnt > 0) { 3288 /* 3289 * If this was the last sleeping thread, clear the waiters 3290 * flag in _count. 3291 */ 3292 if (cnt == 1) { 3293 umtxq_unlock(&key); 3294 rv = fueword32(&sem->_count, &count); 3295 while (rv != -1 && count & USEM_HAS_WAITERS) 3296 rv = casueword32(&sem->_count, count, &count, 3297 count & ~USEM_HAS_WAITERS); 3298 if (rv == -1) 3299 error = EFAULT; 3300 umtxq_lock(&key); 3301 } 3302 3303 umtxq_signal(&key, 1); 3304 } 3305 umtxq_unbusy(&key); 3306 umtxq_unlock(&key); 3307 umtx_key_release(&key); 3308 return (error); 3309 } 3310 3311 inline int 3312 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3313 { 3314 int error; 3315 3316 error = copyin(addr, tsp, sizeof(struct timespec)); 3317 if (error == 0) { 3318 if (tsp->tv_sec < 0 || 3319 tsp->tv_nsec >= 1000000000 || 3320 tsp->tv_nsec < 0) 3321 error = EINVAL; 3322 } 3323 return (error); 3324 } 3325 3326 static inline int 3327 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3328 { 3329 int error; 3330 3331 if (size <= sizeof(struct timespec)) { 3332 tp->_clockid = CLOCK_REALTIME; 3333 tp->_flags = 0; 3334 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3335 } else 3336 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3337 if (error != 0) 3338 return (error); 3339 if (tp->_timeout.tv_sec < 0 || 3340 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3341 return (EINVAL); 3342 return (0); 3343 } 3344 3345 static int 3346 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3347 { 3348 3349 return (EOPNOTSUPP); 3350 } 3351 3352 static int 3353 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3354 { 3355 struct _umtx_time timeout, *tm_p; 3356 int error; 3357 3358 if (uap->uaddr2 == NULL) 3359 tm_p = NULL; 3360 else { 3361 error = umtx_copyin_umtx_time( 3362 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3363 if (error != 0) 3364 return (error); 3365 tm_p = &timeout; 3366 } 3367 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3368 } 3369 3370 static int 3371 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3372 { 3373 struct _umtx_time timeout, *tm_p; 3374 int error; 3375 3376 if (uap->uaddr2 == NULL) 3377 tm_p = NULL; 3378 else { 3379 error = umtx_copyin_umtx_time( 3380 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3381 if (error != 0) 3382 return (error); 3383 tm_p = &timeout; 3384 } 3385 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3386 } 3387 3388 static int 3389 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3390 { 3391 struct _umtx_time *tm_p, timeout; 3392 int error; 3393 3394 if (uap->uaddr2 == NULL) 3395 tm_p = NULL; 3396 else { 3397 error = umtx_copyin_umtx_time( 3398 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3399 if (error != 0) 3400 return (error); 3401 tm_p = &timeout; 3402 } 3403 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3404 } 3405 3406 static int 3407 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3408 { 3409 3410 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3411 } 3412 3413 #define BATCH_SIZE 128 3414 static int 3415 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3416 { 3417 char *uaddrs[BATCH_SIZE], **upp; 3418 int count, error, i, pos, tocopy; 3419 3420 upp = (char **)uap->obj; 3421 error = 0; 3422 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3423 pos += tocopy) { 3424 tocopy = MIN(count, BATCH_SIZE); 3425 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3426 if (error != 0) 3427 break; 3428 for (i = 0; i < tocopy; ++i) 3429 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3430 maybe_yield(); 3431 } 3432 return (error); 3433 } 3434 3435 static int 3436 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3437 { 3438 3439 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3440 } 3441 3442 static int 3443 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3444 { 3445 struct _umtx_time *tm_p, timeout; 3446 int error; 3447 3448 /* Allow a null timespec (wait forever). */ 3449 if (uap->uaddr2 == NULL) 3450 tm_p = NULL; 3451 else { 3452 error = umtx_copyin_umtx_time( 3453 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3454 if (error != 0) 3455 return (error); 3456 tm_p = &timeout; 3457 } 3458 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3459 } 3460 3461 static int 3462 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3463 { 3464 3465 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3466 } 3467 3468 static int 3469 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3470 { 3471 struct _umtx_time *tm_p, timeout; 3472 int error; 3473 3474 /* Allow a null timespec (wait forever). */ 3475 if (uap->uaddr2 == NULL) 3476 tm_p = NULL; 3477 else { 3478 error = umtx_copyin_umtx_time( 3479 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3480 if (error != 0) 3481 return (error); 3482 tm_p = &timeout; 3483 } 3484 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3485 } 3486 3487 static int 3488 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3489 { 3490 3491 return (do_wake_umutex(td, uap->obj)); 3492 } 3493 3494 static int 3495 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3496 { 3497 3498 return (do_unlock_umutex(td, uap->obj, false)); 3499 } 3500 3501 static int 3502 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3503 { 3504 3505 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3506 } 3507 3508 static int 3509 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3510 { 3511 struct timespec *ts, timeout; 3512 int error; 3513 3514 /* Allow a null timespec (wait forever). */ 3515 if (uap->uaddr2 == NULL) 3516 ts = NULL; 3517 else { 3518 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3519 if (error != 0) 3520 return (error); 3521 ts = &timeout; 3522 } 3523 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3524 } 3525 3526 static int 3527 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3528 { 3529 3530 return (do_cv_signal(td, uap->obj)); 3531 } 3532 3533 static int 3534 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3535 { 3536 3537 return (do_cv_broadcast(td, uap->obj)); 3538 } 3539 3540 static int 3541 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3542 { 3543 struct _umtx_time timeout; 3544 int error; 3545 3546 /* Allow a null timespec (wait forever). */ 3547 if (uap->uaddr2 == NULL) { 3548 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3549 } else { 3550 error = umtx_copyin_umtx_time(uap->uaddr2, 3551 (size_t)uap->uaddr1, &timeout); 3552 if (error != 0) 3553 return (error); 3554 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3555 } 3556 return (error); 3557 } 3558 3559 static int 3560 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3561 { 3562 struct _umtx_time timeout; 3563 int error; 3564 3565 /* Allow a null timespec (wait forever). */ 3566 if (uap->uaddr2 == NULL) { 3567 error = do_rw_wrlock(td, uap->obj, 0); 3568 } else { 3569 error = umtx_copyin_umtx_time(uap->uaddr2, 3570 (size_t)uap->uaddr1, &timeout); 3571 if (error != 0) 3572 return (error); 3573 3574 error = do_rw_wrlock(td, uap->obj, &timeout); 3575 } 3576 return (error); 3577 } 3578 3579 static int 3580 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3581 { 3582 3583 return (do_rw_unlock(td, uap->obj)); 3584 } 3585 3586 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3587 static int 3588 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3589 { 3590 struct _umtx_time *tm_p, timeout; 3591 int error; 3592 3593 /* Allow a null timespec (wait forever). */ 3594 if (uap->uaddr2 == NULL) 3595 tm_p = NULL; 3596 else { 3597 error = umtx_copyin_umtx_time( 3598 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3599 if (error != 0) 3600 return (error); 3601 tm_p = &timeout; 3602 } 3603 return (do_sem_wait(td, uap->obj, tm_p)); 3604 } 3605 3606 static int 3607 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3608 { 3609 3610 return (do_sem_wake(td, uap->obj)); 3611 } 3612 #endif 3613 3614 static int 3615 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3616 { 3617 3618 return (do_wake2_umutex(td, uap->obj, uap->val)); 3619 } 3620 3621 static int 3622 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3623 { 3624 struct _umtx_time *tm_p, timeout; 3625 size_t uasize; 3626 int error; 3627 3628 /* Allow a null timespec (wait forever). */ 3629 if (uap->uaddr2 == NULL) { 3630 uasize = 0; 3631 tm_p = NULL; 3632 } else { 3633 uasize = (size_t)uap->uaddr1; 3634 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3635 if (error != 0) 3636 return (error); 3637 tm_p = &timeout; 3638 } 3639 error = do_sem2_wait(td, uap->obj, tm_p); 3640 if (error == EINTR && uap->uaddr2 != NULL && 3641 (timeout._flags & UMTX_ABSTIME) == 0 && 3642 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3643 error = copyout(&timeout._timeout, 3644 (struct _umtx_time *)uap->uaddr2 + 1, 3645 sizeof(struct timespec)); 3646 if (error == 0) { 3647 error = EINTR; 3648 } 3649 } 3650 3651 return (error); 3652 } 3653 3654 static int 3655 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3656 { 3657 3658 return (do_sem2_wake(td, uap->obj)); 3659 } 3660 3661 #define USHM_OBJ_UMTX(o) \ 3662 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3663 3664 #define USHMF_REG_LINKED 0x0001 3665 #define USHMF_OBJ_LINKED 0x0002 3666 struct umtx_shm_reg { 3667 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3668 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3669 struct umtx_key ushm_key; 3670 struct ucred *ushm_cred; 3671 struct shmfd *ushm_obj; 3672 u_int ushm_refcnt; 3673 u_int ushm_flags; 3674 }; 3675 3676 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3677 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3678 3679 static uma_zone_t umtx_shm_reg_zone; 3680 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3681 static struct mtx umtx_shm_lock; 3682 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3683 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3684 3685 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3686 3687 static void 3688 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3689 { 3690 struct umtx_shm_reg_head d; 3691 struct umtx_shm_reg *reg, *reg1; 3692 3693 TAILQ_INIT(&d); 3694 mtx_lock(&umtx_shm_lock); 3695 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3696 mtx_unlock(&umtx_shm_lock); 3697 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3698 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3699 umtx_shm_free_reg(reg); 3700 } 3701 } 3702 3703 static struct task umtx_shm_reg_delfree_task = 3704 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3705 3706 static struct umtx_shm_reg * 3707 umtx_shm_find_reg_locked(const struct umtx_key *key) 3708 { 3709 struct umtx_shm_reg *reg; 3710 struct umtx_shm_reg_head *reg_head; 3711 3712 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3713 mtx_assert(&umtx_shm_lock, MA_OWNED); 3714 reg_head = &umtx_shm_registry[key->hash]; 3715 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3716 KASSERT(reg->ushm_key.shared, 3717 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3718 if (reg->ushm_key.info.shared.object == 3719 key->info.shared.object && 3720 reg->ushm_key.info.shared.offset == 3721 key->info.shared.offset) { 3722 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3723 KASSERT(reg->ushm_refcnt > 0, 3724 ("reg %p refcnt 0 onlist", reg)); 3725 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3726 ("reg %p not linked", reg)); 3727 reg->ushm_refcnt++; 3728 return (reg); 3729 } 3730 } 3731 return (NULL); 3732 } 3733 3734 static struct umtx_shm_reg * 3735 umtx_shm_find_reg(const struct umtx_key *key) 3736 { 3737 struct umtx_shm_reg *reg; 3738 3739 mtx_lock(&umtx_shm_lock); 3740 reg = umtx_shm_find_reg_locked(key); 3741 mtx_unlock(&umtx_shm_lock); 3742 return (reg); 3743 } 3744 3745 static void 3746 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3747 { 3748 3749 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3750 crfree(reg->ushm_cred); 3751 shm_drop(reg->ushm_obj); 3752 uma_zfree(umtx_shm_reg_zone, reg); 3753 } 3754 3755 static bool 3756 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3757 { 3758 bool res; 3759 3760 mtx_assert(&umtx_shm_lock, MA_OWNED); 3761 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3762 reg->ushm_refcnt--; 3763 res = reg->ushm_refcnt == 0; 3764 if (res || force) { 3765 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3766 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3767 reg, ushm_reg_link); 3768 reg->ushm_flags &= ~USHMF_REG_LINKED; 3769 } 3770 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3771 LIST_REMOVE(reg, ushm_obj_link); 3772 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3773 } 3774 } 3775 return (res); 3776 } 3777 3778 static void 3779 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3780 { 3781 vm_object_t object; 3782 bool dofree; 3783 3784 if (force) { 3785 object = reg->ushm_obj->shm_object; 3786 VM_OBJECT_WLOCK(object); 3787 object->flags |= OBJ_UMTXDEAD; 3788 VM_OBJECT_WUNLOCK(object); 3789 } 3790 mtx_lock(&umtx_shm_lock); 3791 dofree = umtx_shm_unref_reg_locked(reg, force); 3792 mtx_unlock(&umtx_shm_lock); 3793 if (dofree) 3794 umtx_shm_free_reg(reg); 3795 } 3796 3797 void 3798 umtx_shm_object_init(vm_object_t object) 3799 { 3800 3801 LIST_INIT(USHM_OBJ_UMTX(object)); 3802 } 3803 3804 void 3805 umtx_shm_object_terminated(vm_object_t object) 3806 { 3807 struct umtx_shm_reg *reg, *reg1; 3808 bool dofree; 3809 3810 dofree = false; 3811 mtx_lock(&umtx_shm_lock); 3812 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3813 if (umtx_shm_unref_reg_locked(reg, true)) { 3814 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3815 ushm_reg_link); 3816 dofree = true; 3817 } 3818 } 3819 mtx_unlock(&umtx_shm_lock); 3820 if (dofree) 3821 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3822 } 3823 3824 static int 3825 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3826 struct umtx_shm_reg **res) 3827 { 3828 struct umtx_shm_reg *reg, *reg1; 3829 struct ucred *cred; 3830 int error; 3831 3832 reg = umtx_shm_find_reg(key); 3833 if (reg != NULL) { 3834 *res = reg; 3835 return (0); 3836 } 3837 cred = td->td_ucred; 3838 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3839 return (ENOMEM); 3840 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3841 reg->ushm_refcnt = 1; 3842 bcopy(key, ®->ushm_key, sizeof(*key)); 3843 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3844 reg->ushm_cred = crhold(cred); 3845 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3846 if (error != 0) { 3847 umtx_shm_free_reg(reg); 3848 return (error); 3849 } 3850 mtx_lock(&umtx_shm_lock); 3851 reg1 = umtx_shm_find_reg_locked(key); 3852 if (reg1 != NULL) { 3853 mtx_unlock(&umtx_shm_lock); 3854 umtx_shm_free_reg(reg); 3855 *res = reg1; 3856 return (0); 3857 } 3858 reg->ushm_refcnt++; 3859 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3860 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3861 ushm_obj_link); 3862 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3863 mtx_unlock(&umtx_shm_lock); 3864 *res = reg; 3865 return (0); 3866 } 3867 3868 static int 3869 umtx_shm_alive(struct thread *td, void *addr) 3870 { 3871 vm_map_t map; 3872 vm_map_entry_t entry; 3873 vm_object_t object; 3874 vm_pindex_t pindex; 3875 vm_prot_t prot; 3876 int res, ret; 3877 boolean_t wired; 3878 3879 map = &td->td_proc->p_vmspace->vm_map; 3880 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3881 &object, &pindex, &prot, &wired); 3882 if (res != KERN_SUCCESS) 3883 return (EFAULT); 3884 if (object == NULL) 3885 ret = EINVAL; 3886 else 3887 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3888 vm_map_lookup_done(map, entry); 3889 return (ret); 3890 } 3891 3892 static void 3893 umtx_shm_init(void) 3894 { 3895 int i; 3896 3897 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3898 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3899 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3900 for (i = 0; i < nitems(umtx_shm_registry); i++) 3901 TAILQ_INIT(&umtx_shm_registry[i]); 3902 } 3903 3904 static int 3905 umtx_shm(struct thread *td, void *addr, u_int flags) 3906 { 3907 struct umtx_key key; 3908 struct umtx_shm_reg *reg; 3909 struct file *fp; 3910 int error, fd; 3911 3912 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3913 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3914 return (EINVAL); 3915 if ((flags & UMTX_SHM_ALIVE) != 0) 3916 return (umtx_shm_alive(td, addr)); 3917 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3918 if (error != 0) 3919 return (error); 3920 KASSERT(key.shared == 1, ("non-shared key")); 3921 if ((flags & UMTX_SHM_CREAT) != 0) { 3922 error = umtx_shm_create_reg(td, &key, ®); 3923 } else { 3924 reg = umtx_shm_find_reg(&key); 3925 if (reg == NULL) 3926 error = ESRCH; 3927 } 3928 umtx_key_release(&key); 3929 if (error != 0) 3930 return (error); 3931 KASSERT(reg != NULL, ("no reg")); 3932 if ((flags & UMTX_SHM_DESTROY) != 0) { 3933 umtx_shm_unref_reg(reg, true); 3934 } else { 3935 #if 0 3936 #ifdef MAC 3937 error = mac_posixshm_check_open(td->td_ucred, 3938 reg->ushm_obj, FFLAGS(O_RDWR)); 3939 if (error == 0) 3940 #endif 3941 error = shm_access(reg->ushm_obj, td->td_ucred, 3942 FFLAGS(O_RDWR)); 3943 if (error == 0) 3944 #endif 3945 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3946 if (error == 0) { 3947 shm_hold(reg->ushm_obj); 3948 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3949 &shm_ops); 3950 td->td_retval[0] = fd; 3951 fdrop(fp, td); 3952 } 3953 } 3954 umtx_shm_unref_reg(reg, false); 3955 return (error); 3956 } 3957 3958 static int 3959 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3960 { 3961 3962 return (umtx_shm(td, uap->uaddr1, uap->val)); 3963 } 3964 3965 static int 3966 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3967 { 3968 3969 td->td_rb_list = rbp->robust_list_offset; 3970 td->td_rbp_list = rbp->robust_priv_list_offset; 3971 td->td_rb_inact = rbp->robust_inact_offset; 3972 return (0); 3973 } 3974 3975 static int 3976 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3977 { 3978 struct umtx_robust_lists_params rb; 3979 int error; 3980 3981 if (uap->val > sizeof(rb)) 3982 return (EINVAL); 3983 bzero(&rb, sizeof(rb)); 3984 error = copyin(uap->uaddr1, &rb, uap->val); 3985 if (error != 0) 3986 return (error); 3987 return (umtx_robust_lists(td, &rb)); 3988 } 3989 3990 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3991 3992 static const _umtx_op_func op_table[] = { 3993 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3994 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3995 [UMTX_OP_WAIT] = __umtx_op_wait, 3996 [UMTX_OP_WAKE] = __umtx_op_wake, 3997 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3998 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3999 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4000 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4001 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4002 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4003 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4004 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4005 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4006 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4007 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4008 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4009 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4010 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4011 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4012 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4013 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4014 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4015 #else 4016 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4017 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4018 #endif 4019 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4020 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4021 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4022 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4023 [UMTX_OP_SHM] = __umtx_op_shm, 4024 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4025 }; 4026 4027 int 4028 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4029 { 4030 4031 if ((unsigned)uap->op < nitems(op_table)) 4032 return (*op_table[uap->op])(td, uap); 4033 return (EINVAL); 4034 } 4035 4036 #ifdef COMPAT_FREEBSD32 4037 4038 struct timespec32 { 4039 int32_t tv_sec; 4040 int32_t tv_nsec; 4041 }; 4042 4043 struct umtx_time32 { 4044 struct timespec32 timeout; 4045 uint32_t flags; 4046 uint32_t clockid; 4047 }; 4048 4049 static inline int 4050 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4051 { 4052 struct timespec32 ts32; 4053 int error; 4054 4055 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4056 if (error == 0) { 4057 if (ts32.tv_sec < 0 || 4058 ts32.tv_nsec >= 1000000000 || 4059 ts32.tv_nsec < 0) 4060 error = EINVAL; 4061 else { 4062 tsp->tv_sec = ts32.tv_sec; 4063 tsp->tv_nsec = ts32.tv_nsec; 4064 } 4065 } 4066 return (error); 4067 } 4068 4069 static inline int 4070 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4071 { 4072 struct umtx_time32 t32; 4073 int error; 4074 4075 t32.clockid = CLOCK_REALTIME; 4076 t32.flags = 0; 4077 if (size <= sizeof(struct timespec32)) 4078 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4079 else 4080 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4081 if (error != 0) 4082 return (error); 4083 if (t32.timeout.tv_sec < 0 || 4084 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4085 return (EINVAL); 4086 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4087 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4088 tp->_flags = t32.flags; 4089 tp->_clockid = t32.clockid; 4090 return (0); 4091 } 4092 4093 static int 4094 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4095 { 4096 struct _umtx_time *tm_p, timeout; 4097 int error; 4098 4099 if (uap->uaddr2 == NULL) 4100 tm_p = NULL; 4101 else { 4102 error = umtx_copyin_umtx_time32(uap->uaddr2, 4103 (size_t)uap->uaddr1, &timeout); 4104 if (error != 0) 4105 return (error); 4106 tm_p = &timeout; 4107 } 4108 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4109 } 4110 4111 static int 4112 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4113 { 4114 struct _umtx_time *tm_p, timeout; 4115 int error; 4116 4117 /* Allow a null timespec (wait forever). */ 4118 if (uap->uaddr2 == NULL) 4119 tm_p = NULL; 4120 else { 4121 error = umtx_copyin_umtx_time(uap->uaddr2, 4122 (size_t)uap->uaddr1, &timeout); 4123 if (error != 0) 4124 return (error); 4125 tm_p = &timeout; 4126 } 4127 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4128 } 4129 4130 static int 4131 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4132 { 4133 struct _umtx_time *tm_p, timeout; 4134 int error; 4135 4136 /* Allow a null timespec (wait forever). */ 4137 if (uap->uaddr2 == NULL) 4138 tm_p = NULL; 4139 else { 4140 error = umtx_copyin_umtx_time32(uap->uaddr2, 4141 (size_t)uap->uaddr1, &timeout); 4142 if (error != 0) 4143 return (error); 4144 tm_p = &timeout; 4145 } 4146 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4147 } 4148 4149 static int 4150 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4151 { 4152 struct timespec *ts, timeout; 4153 int error; 4154 4155 /* Allow a null timespec (wait forever). */ 4156 if (uap->uaddr2 == NULL) 4157 ts = NULL; 4158 else { 4159 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4160 if (error != 0) 4161 return (error); 4162 ts = &timeout; 4163 } 4164 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4165 } 4166 4167 static int 4168 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4169 { 4170 struct _umtx_time timeout; 4171 int error; 4172 4173 /* Allow a null timespec (wait forever). */ 4174 if (uap->uaddr2 == NULL) { 4175 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4176 } else { 4177 error = umtx_copyin_umtx_time32(uap->uaddr2, 4178 (size_t)uap->uaddr1, &timeout); 4179 if (error != 0) 4180 return (error); 4181 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4182 } 4183 return (error); 4184 } 4185 4186 static int 4187 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4188 { 4189 struct _umtx_time timeout; 4190 int error; 4191 4192 /* Allow a null timespec (wait forever). */ 4193 if (uap->uaddr2 == NULL) { 4194 error = do_rw_wrlock(td, uap->obj, 0); 4195 } else { 4196 error = umtx_copyin_umtx_time32(uap->uaddr2, 4197 (size_t)uap->uaddr1, &timeout); 4198 if (error != 0) 4199 return (error); 4200 error = do_rw_wrlock(td, uap->obj, &timeout); 4201 } 4202 return (error); 4203 } 4204 4205 static int 4206 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4207 { 4208 struct _umtx_time *tm_p, timeout; 4209 int error; 4210 4211 if (uap->uaddr2 == NULL) 4212 tm_p = NULL; 4213 else { 4214 error = umtx_copyin_umtx_time32( 4215 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4216 if (error != 0) 4217 return (error); 4218 tm_p = &timeout; 4219 } 4220 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4221 } 4222 4223 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4224 static int 4225 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4226 { 4227 struct _umtx_time *tm_p, timeout; 4228 int error; 4229 4230 /* Allow a null timespec (wait forever). */ 4231 if (uap->uaddr2 == NULL) 4232 tm_p = NULL; 4233 else { 4234 error = umtx_copyin_umtx_time32(uap->uaddr2, 4235 (size_t)uap->uaddr1, &timeout); 4236 if (error != 0) 4237 return (error); 4238 tm_p = &timeout; 4239 } 4240 return (do_sem_wait(td, uap->obj, tm_p)); 4241 } 4242 #endif 4243 4244 static int 4245 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4246 { 4247 struct _umtx_time *tm_p, timeout; 4248 size_t uasize; 4249 int error; 4250 4251 /* Allow a null timespec (wait forever). */ 4252 if (uap->uaddr2 == NULL) { 4253 uasize = 0; 4254 tm_p = NULL; 4255 } else { 4256 uasize = (size_t)uap->uaddr1; 4257 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4258 if (error != 0) 4259 return (error); 4260 tm_p = &timeout; 4261 } 4262 error = do_sem2_wait(td, uap->obj, tm_p); 4263 if (error == EINTR && uap->uaddr2 != NULL && 4264 (timeout._flags & UMTX_ABSTIME) == 0 && 4265 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4266 struct timespec32 remain32 = { 4267 .tv_sec = timeout._timeout.tv_sec, 4268 .tv_nsec = timeout._timeout.tv_nsec 4269 }; 4270 error = copyout(&remain32, 4271 (struct umtx_time32 *)uap->uaddr2 + 1, 4272 sizeof(struct timespec32)); 4273 if (error == 0) { 4274 error = EINTR; 4275 } 4276 } 4277 4278 return (error); 4279 } 4280 4281 static int 4282 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4283 { 4284 uint32_t uaddrs[BATCH_SIZE], **upp; 4285 int count, error, i, pos, tocopy; 4286 4287 upp = (uint32_t **)uap->obj; 4288 error = 0; 4289 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4290 pos += tocopy) { 4291 tocopy = MIN(count, BATCH_SIZE); 4292 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4293 if (error != 0) 4294 break; 4295 for (i = 0; i < tocopy; ++i) 4296 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4297 INT_MAX, 1); 4298 maybe_yield(); 4299 } 4300 return (error); 4301 } 4302 4303 struct umtx_robust_lists_params_compat32 { 4304 uint32_t robust_list_offset; 4305 uint32_t robust_priv_list_offset; 4306 uint32_t robust_inact_offset; 4307 }; 4308 4309 static int 4310 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4311 { 4312 struct umtx_robust_lists_params rb; 4313 struct umtx_robust_lists_params_compat32 rb32; 4314 int error; 4315 4316 if (uap->val > sizeof(rb32)) 4317 return (EINVAL); 4318 bzero(&rb, sizeof(rb)); 4319 bzero(&rb32, sizeof(rb32)); 4320 error = copyin(uap->uaddr1, &rb32, uap->val); 4321 if (error != 0) 4322 return (error); 4323 rb.robust_list_offset = rb32.robust_list_offset; 4324 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4325 rb.robust_inact_offset = rb32.robust_inact_offset; 4326 return (umtx_robust_lists(td, &rb)); 4327 } 4328 4329 static const _umtx_op_func op_table_compat32[] = { 4330 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4331 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4332 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4333 [UMTX_OP_WAKE] = __umtx_op_wake, 4334 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4335 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4336 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4337 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4338 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4339 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4340 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4341 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4342 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4343 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4344 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4345 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4346 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4347 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4348 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4349 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4350 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4351 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4352 #else 4353 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4354 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4355 #endif 4356 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4357 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4358 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4359 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4360 [UMTX_OP_SHM] = __umtx_op_shm, 4361 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4362 }; 4363 4364 int 4365 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4366 { 4367 4368 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4369 return (*op_table_compat32[uap->op])(td, 4370 (struct _umtx_op_args *)uap); 4371 } 4372 return (EINVAL); 4373 } 4374 #endif 4375 4376 void 4377 umtx_thread_init(struct thread *td) 4378 { 4379 4380 td->td_umtxq = umtxq_alloc(); 4381 td->td_umtxq->uq_thread = td; 4382 } 4383 4384 void 4385 umtx_thread_fini(struct thread *td) 4386 { 4387 4388 umtxq_free(td->td_umtxq); 4389 } 4390 4391 /* 4392 * It will be called when new thread is created, e.g fork(). 4393 */ 4394 void 4395 umtx_thread_alloc(struct thread *td) 4396 { 4397 struct umtx_q *uq; 4398 4399 uq = td->td_umtxq; 4400 uq->uq_inherited_pri = PRI_MAX; 4401 4402 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4403 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4404 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4405 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4406 } 4407 4408 /* 4409 * exec() hook. 4410 * 4411 * Clear robust lists for all process' threads, not delaying the 4412 * cleanup to thread_exit hook, since the relevant address space is 4413 * destroyed right now. 4414 */ 4415 static void 4416 umtx_exec_hook(void *arg __unused, struct proc *p, 4417 struct image_params *imgp __unused) 4418 { 4419 struct thread *td; 4420 4421 KASSERT(p == curproc, ("need curproc")); 4422 PROC_LOCK(p); 4423 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4424 (p->p_flag & P_STOPPED_SINGLE) != 0, 4425 ("curproc must be single-threaded")); 4426 FOREACH_THREAD_IN_PROC(p, td) { 4427 KASSERT(td == curthread || 4428 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4429 ("running thread %p %p", p, td)); 4430 PROC_UNLOCK(p); 4431 umtx_thread_cleanup(td); 4432 PROC_LOCK(p); 4433 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4434 } 4435 PROC_UNLOCK(p); 4436 } 4437 4438 /* 4439 * thread_exit() hook. 4440 */ 4441 void 4442 umtx_thread_exit(struct thread *td) 4443 { 4444 4445 umtx_thread_cleanup(td); 4446 } 4447 4448 static int 4449 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4450 { 4451 u_long res1; 4452 #ifdef COMPAT_FREEBSD32 4453 uint32_t res32; 4454 #endif 4455 int error; 4456 4457 #ifdef COMPAT_FREEBSD32 4458 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4459 error = fueword32((void *)ptr, &res32); 4460 if (error == 0) 4461 res1 = res32; 4462 } else 4463 #endif 4464 { 4465 error = fueword((void *)ptr, &res1); 4466 } 4467 if (error == 0) 4468 *res = res1; 4469 else 4470 error = EFAULT; 4471 return (error); 4472 } 4473 4474 static void 4475 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4476 { 4477 #ifdef COMPAT_FREEBSD32 4478 struct umutex32 m32; 4479 4480 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4481 memcpy(&m32, m, sizeof(m32)); 4482 *rb_list = m32.m_rb_lnk; 4483 } else 4484 #endif 4485 *rb_list = m->m_rb_lnk; 4486 } 4487 4488 static int 4489 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4490 { 4491 struct umutex m; 4492 int error; 4493 4494 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4495 error = copyin((void *)rbp, &m, sizeof(m)); 4496 if (error != 0) 4497 return (error); 4498 if (rb_list != NULL) 4499 umtx_read_rb_list(td, &m, rb_list); 4500 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4501 return (EINVAL); 4502 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4503 /* inact is cleared after unlock, allow the inconsistency */ 4504 return (inact ? 0 : EINVAL); 4505 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4506 } 4507 4508 static void 4509 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4510 const char *name) 4511 { 4512 int error, i; 4513 uintptr_t rbp; 4514 bool inact; 4515 4516 if (rb_list == 0) 4517 return; 4518 error = umtx_read_uptr(td, rb_list, &rbp); 4519 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4520 if (rbp == *rb_inact) { 4521 inact = true; 4522 *rb_inact = 0; 4523 } else 4524 inact = false; 4525 error = umtx_handle_rb(td, rbp, &rbp, inact); 4526 } 4527 if (i == umtx_max_rb && umtx_verbose_rb) { 4528 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4529 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4530 } 4531 if (error != 0 && umtx_verbose_rb) { 4532 uprintf("comm %s pid %d: handling %srb error %d\n", 4533 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4534 } 4535 } 4536 4537 /* 4538 * Clean up umtx data. 4539 */ 4540 static void 4541 umtx_thread_cleanup(struct thread *td) 4542 { 4543 struct umtx_q *uq; 4544 struct umtx_pi *pi; 4545 uintptr_t rb_inact; 4546 4547 /* 4548 * Disown pi mutexes. 4549 */ 4550 uq = td->td_umtxq; 4551 if (uq != NULL) { 4552 mtx_lock(&umtx_lock); 4553 uq->uq_inherited_pri = PRI_MAX; 4554 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4555 pi->pi_owner = NULL; 4556 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4557 } 4558 mtx_unlock(&umtx_lock); 4559 thread_lock(td); 4560 sched_lend_user_prio(td, PRI_MAX); 4561 thread_unlock(td); 4562 } 4563 4564 /* 4565 * Handle terminated robust mutexes. Must be done after 4566 * robust pi disown, otherwise unlock could see unowned 4567 * entries. 4568 */ 4569 rb_inact = td->td_rb_inact; 4570 if (rb_inact != 0) 4571 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4572 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4573 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4574 if (rb_inact != 0) 4575 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4576 } 4577