1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32_proto.h> 80 #endif 81 82 #define _UMUTEX_TRY 1 83 #define _UMUTEX_WAIT 2 84 85 #ifdef UMTX_PROFILING 86 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 87 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 88 #endif 89 90 /* Priority inheritance mutex info. */ 91 struct umtx_pi { 92 /* Owner thread */ 93 struct thread *pi_owner; 94 95 /* Reference count */ 96 int pi_refcount; 97 98 /* List entry to link umtx holding by thread */ 99 TAILQ_ENTRY(umtx_pi) pi_link; 100 101 /* List entry in hash */ 102 TAILQ_ENTRY(umtx_pi) pi_hashlink; 103 104 /* List for waiters */ 105 TAILQ_HEAD(,umtx_q) pi_blocked; 106 107 /* Identify a userland lock object */ 108 struct umtx_key pi_key; 109 }; 110 111 /* A userland synchronous object user. */ 112 struct umtx_q { 113 /* Linked list for the hash. */ 114 TAILQ_ENTRY(umtx_q) uq_link; 115 116 /* Umtx key. */ 117 struct umtx_key uq_key; 118 119 /* Umtx flags. */ 120 int uq_flags; 121 #define UQF_UMTXQ 0x0001 122 123 /* The thread waits on. */ 124 struct thread *uq_thread; 125 126 /* 127 * Blocked on PI mutex. read can use chain lock 128 * or umtx_lock, write must have both chain lock and 129 * umtx_lock being hold. 130 */ 131 struct umtx_pi *uq_pi_blocked; 132 133 /* On blocked list */ 134 TAILQ_ENTRY(umtx_q) uq_lockq; 135 136 /* Thread contending with us */ 137 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 138 139 /* Inherited priority from PP mutex */ 140 u_char uq_inherited_pri; 141 142 /* Spare queue ready to be reused */ 143 struct umtxq_queue *uq_spare_queue; 144 145 /* The queue we on */ 146 struct umtxq_queue *uq_cur_queue; 147 }; 148 149 TAILQ_HEAD(umtxq_head, umtx_q); 150 151 /* Per-key wait-queue */ 152 struct umtxq_queue { 153 struct umtxq_head head; 154 struct umtx_key key; 155 LIST_ENTRY(umtxq_queue) link; 156 int length; 157 }; 158 159 LIST_HEAD(umtxq_list, umtxq_queue); 160 161 /* Userland lock object's wait-queue chain */ 162 struct umtxq_chain { 163 /* Lock for this chain. */ 164 struct mtx uc_lock; 165 166 /* List of sleep queues. */ 167 struct umtxq_list uc_queue[2]; 168 #define UMTX_SHARED_QUEUE 0 169 #define UMTX_EXCLUSIVE_QUEUE 1 170 171 LIST_HEAD(, umtxq_queue) uc_spare_queue; 172 173 /* Busy flag */ 174 char uc_busy; 175 176 /* Chain lock waiters */ 177 int uc_waiters; 178 179 /* All PI in the list */ 180 TAILQ_HEAD(,umtx_pi) uc_pi_list; 181 182 #ifdef UMTX_PROFILING 183 u_int length; 184 u_int max_length; 185 #endif 186 }; 187 188 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 189 190 /* 191 * Don't propagate time-sharing priority, there is a security reason, 192 * a user can simply introduce PI-mutex, let thread A lock the mutex, 193 * and let another thread B block on the mutex, because B is 194 * sleeping, its priority will be boosted, this causes A's priority to 195 * be boosted via priority propagating too and will never be lowered even 196 * if it is using 100%CPU, this is unfair to other processes. 197 */ 198 199 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 200 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 201 PRI_MAX_TIMESHARE : (td)->td_user_pri) 202 203 #define GOLDEN_RATIO_PRIME 2654404609U 204 #ifndef UMTX_CHAINS 205 #define UMTX_CHAINS 512 206 #endif 207 #define UMTX_SHIFTS (__WORD_BIT - 9) 208 209 #define GET_SHARE(flags) \ 210 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 211 212 #define BUSY_SPINS 200 213 214 struct abs_timeout { 215 int clockid; 216 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 217 struct timespec cur; 218 struct timespec end; 219 }; 220 221 #ifdef COMPAT_FREEBSD32 222 struct umutex32 { 223 volatile __lwpid_t m_owner; /* Owner of the mutex */ 224 __uint32_t m_flags; /* Flags of the mutex */ 225 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 226 __uint32_t m_rb_lnk; /* Robust linkage */ 227 __uint32_t m_pad; 228 __uint32_t m_spare[2]; 229 }; 230 231 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 232 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 233 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 234 #endif 235 236 int umtx_shm_vnobj_persistent = 0; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 238 &umtx_shm_vnobj_persistent, 0, 239 "False forces destruction of umtx attached to file, on last close"); 240 static int umtx_max_rb = 1000; 241 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 242 &umtx_max_rb, 0, 243 ""); 244 245 static uma_zone_t umtx_pi_zone; 246 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 247 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 248 static int umtx_pi_allocated; 249 250 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 251 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 252 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 253 static int umtx_verbose_rb = 1; 254 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 255 &umtx_verbose_rb, 0, 256 ""); 257 258 #ifdef UMTX_PROFILING 259 static long max_length; 260 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 261 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 262 #endif 263 264 static void abs_timeout_update(struct abs_timeout *timo); 265 266 static void umtx_shm_init(void); 267 static void umtxq_sysinit(void *); 268 static void umtxq_hash(struct umtx_key *key); 269 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 270 static void umtxq_lock(struct umtx_key *key); 271 static void umtxq_unlock(struct umtx_key *key); 272 static void umtxq_busy(struct umtx_key *key); 273 static void umtxq_unbusy(struct umtx_key *key); 274 static void umtxq_insert_queue(struct umtx_q *uq, int q); 275 static void umtxq_remove_queue(struct umtx_q *uq, int q); 276 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 277 static int umtxq_count(struct umtx_key *key); 278 static struct umtx_pi *umtx_pi_alloc(int); 279 static void umtx_pi_free(struct umtx_pi *pi); 280 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 281 bool rb); 282 static void umtx_thread_cleanup(struct thread *td); 283 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 284 struct image_params *imgp __unused); 285 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 286 287 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 288 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 289 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 290 291 static struct mtx umtx_lock; 292 293 #ifdef UMTX_PROFILING 294 static void 295 umtx_init_profiling(void) 296 { 297 struct sysctl_oid *chain_oid; 298 char chain_name[10]; 299 int i; 300 301 for (i = 0; i < UMTX_CHAINS; ++i) { 302 snprintf(chain_name, sizeof(chain_name), "%d", i); 303 chain_oid = SYSCTL_ADD_NODE(NULL, 304 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 305 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 306 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 307 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 308 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 309 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 310 } 311 } 312 313 static int 314 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 315 { 316 char buf[512]; 317 struct sbuf sb; 318 struct umtxq_chain *uc; 319 u_int fract, i, j, tot, whole; 320 u_int sf0, sf1, sf2, sf3, sf4; 321 u_int si0, si1, si2, si3, si4; 322 u_int sw0, sw1, sw2, sw3, sw4; 323 324 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 325 for (i = 0; i < 2; i++) { 326 tot = 0; 327 for (j = 0; j < UMTX_CHAINS; ++j) { 328 uc = &umtxq_chains[i][j]; 329 mtx_lock(&uc->uc_lock); 330 tot += uc->max_length; 331 mtx_unlock(&uc->uc_lock); 332 } 333 if (tot == 0) 334 sbuf_printf(&sb, "%u) Empty ", i); 335 else { 336 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 337 si0 = si1 = si2 = si3 = si4 = 0; 338 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 339 for (j = 0; j < UMTX_CHAINS; j++) { 340 uc = &umtxq_chains[i][j]; 341 mtx_lock(&uc->uc_lock); 342 whole = uc->max_length * 100; 343 mtx_unlock(&uc->uc_lock); 344 fract = (whole % tot) * 100; 345 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 346 sf0 = fract; 347 si0 = j; 348 sw0 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 350 sf1)) { 351 sf1 = fract; 352 si1 = j; 353 sw1 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 355 sf2)) { 356 sf2 = fract; 357 si2 = j; 358 sw2 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 360 sf3)) { 361 sf3 = fract; 362 si3 = j; 363 sw3 = whole; 364 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 365 sf4)) { 366 sf4 = fract; 367 si4 = j; 368 sw4 = whole; 369 } 370 } 371 sbuf_printf(&sb, "queue %u:\n", i); 372 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 373 sf0 / tot, si0); 374 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 375 sf1 / tot, si1); 376 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 377 sf2 / tot, si2); 378 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 379 sf3 / tot, si3); 380 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 381 sf4 / tot, si4); 382 } 383 } 384 sbuf_trim(&sb); 385 sbuf_finish(&sb); 386 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 387 sbuf_delete(&sb); 388 return (0); 389 } 390 391 static int 392 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 393 { 394 struct umtxq_chain *uc; 395 u_int i, j; 396 int clear, error; 397 398 clear = 0; 399 error = sysctl_handle_int(oidp, &clear, 0, req); 400 if (error != 0 || req->newptr == NULL) 401 return (error); 402 403 if (clear != 0) { 404 for (i = 0; i < 2; ++i) { 405 for (j = 0; j < UMTX_CHAINS; ++j) { 406 uc = &umtxq_chains[i][j]; 407 mtx_lock(&uc->uc_lock); 408 uc->length = 0; 409 uc->max_length = 0; 410 mtx_unlock(&uc->uc_lock); 411 } 412 } 413 } 414 return (0); 415 } 416 417 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 418 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 419 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 421 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 422 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 423 #endif 424 425 static void 426 umtxq_sysinit(void *arg __unused) 427 { 428 int i, j; 429 430 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 431 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 432 for (i = 0; i < 2; ++i) { 433 for (j = 0; j < UMTX_CHAINS; ++j) { 434 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 435 MTX_DEF | MTX_DUPOK); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 437 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 438 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 439 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 440 umtxq_chains[i][j].uc_busy = 0; 441 umtxq_chains[i][j].uc_waiters = 0; 442 #ifdef UMTX_PROFILING 443 umtxq_chains[i][j].length = 0; 444 umtxq_chains[i][j].max_length = 0; 445 #endif 446 } 447 } 448 #ifdef UMTX_PROFILING 449 umtx_init_profiling(); 450 #endif 451 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 452 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 453 EVENTHANDLER_PRI_ANY); 454 umtx_shm_init(); 455 } 456 457 struct umtx_q * 458 umtxq_alloc(void) 459 { 460 struct umtx_q *uq; 461 462 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 463 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 464 M_WAITOK | M_ZERO); 465 TAILQ_INIT(&uq->uq_spare_queue->head); 466 TAILQ_INIT(&uq->uq_pi_contested); 467 uq->uq_inherited_pri = PRI_MAX; 468 return (uq); 469 } 470 471 void 472 umtxq_free(struct umtx_q *uq) 473 { 474 475 MPASS(uq->uq_spare_queue != NULL); 476 free(uq->uq_spare_queue, M_UMTX); 477 free(uq, M_UMTX); 478 } 479 480 static inline void 481 umtxq_hash(struct umtx_key *key) 482 { 483 unsigned n; 484 485 n = (uintptr_t)key->info.both.a + key->info.both.b; 486 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 487 } 488 489 static inline struct umtxq_chain * 490 umtxq_getchain(struct umtx_key *key) 491 { 492 493 if (key->type <= TYPE_SEM) 494 return (&umtxq_chains[1][key->hash]); 495 return (&umtxq_chains[0][key->hash]); 496 } 497 498 /* 499 * Lock a chain. 500 */ 501 static inline void 502 umtxq_lock(struct umtx_key *key) 503 { 504 struct umtxq_chain *uc; 505 506 uc = umtxq_getchain(key); 507 mtx_lock(&uc->uc_lock); 508 } 509 510 /* 511 * Unlock a chain. 512 */ 513 static inline void 514 umtxq_unlock(struct umtx_key *key) 515 { 516 struct umtxq_chain *uc; 517 518 uc = umtxq_getchain(key); 519 mtx_unlock(&uc->uc_lock); 520 } 521 522 /* 523 * Set chain to busy state when following operation 524 * may be blocked (kernel mutex can not be used). 525 */ 526 static inline void 527 umtxq_busy(struct umtx_key *key) 528 { 529 struct umtxq_chain *uc; 530 531 uc = umtxq_getchain(key); 532 mtx_assert(&uc->uc_lock, MA_OWNED); 533 if (uc->uc_busy) { 534 #ifdef SMP 535 if (smp_cpus > 1) { 536 int count = BUSY_SPINS; 537 if (count > 0) { 538 umtxq_unlock(key); 539 while (uc->uc_busy && --count > 0) 540 cpu_spinwait(); 541 umtxq_lock(key); 542 } 543 } 544 #endif 545 while (uc->uc_busy) { 546 uc->uc_waiters++; 547 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 548 uc->uc_waiters--; 549 } 550 } 551 uc->uc_busy = 1; 552 } 553 554 /* 555 * Unbusy a chain. 556 */ 557 static inline void 558 umtxq_unbusy(struct umtx_key *key) 559 { 560 struct umtxq_chain *uc; 561 562 uc = umtxq_getchain(key); 563 mtx_assert(&uc->uc_lock, MA_OWNED); 564 KASSERT(uc->uc_busy != 0, ("not busy")); 565 uc->uc_busy = 0; 566 if (uc->uc_waiters) 567 wakeup_one(uc); 568 } 569 570 static inline void 571 umtxq_unbusy_unlocked(struct umtx_key *key) 572 { 573 574 umtxq_lock(key); 575 umtxq_unbusy(key); 576 umtxq_unlock(key); 577 } 578 579 static struct umtxq_queue * 580 umtxq_queue_lookup(struct umtx_key *key, int q) 581 { 582 struct umtxq_queue *uh; 583 struct umtxq_chain *uc; 584 585 uc = umtxq_getchain(key); 586 UMTXQ_LOCKED_ASSERT(uc); 587 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 588 if (umtx_key_match(&uh->key, key)) 589 return (uh); 590 } 591 592 return (NULL); 593 } 594 595 static inline void 596 umtxq_insert_queue(struct umtx_q *uq, int q) 597 { 598 struct umtxq_queue *uh; 599 struct umtxq_chain *uc; 600 601 uc = umtxq_getchain(&uq->uq_key); 602 UMTXQ_LOCKED_ASSERT(uc); 603 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 604 uh = umtxq_queue_lookup(&uq->uq_key, q); 605 if (uh != NULL) { 606 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 607 } else { 608 uh = uq->uq_spare_queue; 609 uh->key = uq->uq_key; 610 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 611 #ifdef UMTX_PROFILING 612 uc->length++; 613 if (uc->length > uc->max_length) { 614 uc->max_length = uc->length; 615 if (uc->max_length > max_length) 616 max_length = uc->max_length; 617 } 618 #endif 619 } 620 uq->uq_spare_queue = NULL; 621 622 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 623 uh->length++; 624 uq->uq_flags |= UQF_UMTXQ; 625 uq->uq_cur_queue = uh; 626 return; 627 } 628 629 static inline void 630 umtxq_remove_queue(struct umtx_q *uq, int q) 631 { 632 struct umtxq_chain *uc; 633 struct umtxq_queue *uh; 634 635 uc = umtxq_getchain(&uq->uq_key); 636 UMTXQ_LOCKED_ASSERT(uc); 637 if (uq->uq_flags & UQF_UMTXQ) { 638 uh = uq->uq_cur_queue; 639 TAILQ_REMOVE(&uh->head, uq, uq_link); 640 uh->length--; 641 uq->uq_flags &= ~UQF_UMTXQ; 642 if (TAILQ_EMPTY(&uh->head)) { 643 KASSERT(uh->length == 0, 644 ("inconsistent umtxq_queue length")); 645 #ifdef UMTX_PROFILING 646 uc->length--; 647 #endif 648 LIST_REMOVE(uh, link); 649 } else { 650 uh = LIST_FIRST(&uc->uc_spare_queue); 651 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 652 LIST_REMOVE(uh, link); 653 } 654 uq->uq_spare_queue = uh; 655 uq->uq_cur_queue = NULL; 656 } 657 } 658 659 /* 660 * Check if there are multiple waiters 661 */ 662 static int 663 umtxq_count(struct umtx_key *key) 664 { 665 struct umtxq_queue *uh; 666 667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 668 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 669 if (uh != NULL) 670 return (uh->length); 671 return (0); 672 } 673 674 /* 675 * Check if there are multiple PI waiters and returns first 676 * waiter. 677 */ 678 static int 679 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 680 { 681 struct umtxq_queue *uh; 682 683 *first = NULL; 684 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 685 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 686 if (uh != NULL) { 687 *first = TAILQ_FIRST(&uh->head); 688 return (uh->length); 689 } 690 return (0); 691 } 692 693 static int 694 umtxq_check_susp(struct thread *td) 695 { 696 struct proc *p; 697 int error; 698 699 /* 700 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 701 * eventually break the lockstep loop. 702 */ 703 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 704 return (0); 705 error = 0; 706 p = td->td_proc; 707 PROC_LOCK(p); 708 if (P_SHOULDSTOP(p) || 709 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 710 if (p->p_flag & P_SINGLE_EXIT) 711 error = EINTR; 712 else 713 error = ERESTART; 714 } 715 PROC_UNLOCK(p); 716 return (error); 717 } 718 719 /* 720 * Wake up threads waiting on an userland object. 721 */ 722 723 static int 724 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 725 { 726 struct umtxq_queue *uh; 727 struct umtx_q *uq; 728 int ret; 729 730 ret = 0; 731 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 732 uh = umtxq_queue_lookup(key, q); 733 if (uh != NULL) { 734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 735 umtxq_remove_queue(uq, q); 736 wakeup(uq); 737 if (++ret >= n_wake) 738 return (ret); 739 } 740 } 741 return (ret); 742 } 743 744 745 /* 746 * Wake up specified thread. 747 */ 748 static inline void 749 umtxq_signal_thread(struct umtx_q *uq) 750 { 751 752 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 753 umtxq_remove(uq); 754 wakeup(uq); 755 } 756 757 static inline int 758 tstohz(const struct timespec *tsp) 759 { 760 struct timeval tv; 761 762 TIMESPEC_TO_TIMEVAL(&tv, tsp); 763 return tvtohz(&tv); 764 } 765 766 static void 767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 768 const struct timespec *timeout) 769 { 770 771 timo->clockid = clockid; 772 if (!absolute) { 773 timo->is_abs_real = false; 774 abs_timeout_update(timo); 775 timespecadd(&timo->cur, timeout, &timo->end); 776 } else { 777 timo->end = *timeout; 778 timo->is_abs_real = clockid == CLOCK_REALTIME || 779 clockid == CLOCK_REALTIME_FAST || 780 clockid == CLOCK_REALTIME_PRECISE; 781 /* 782 * If is_abs_real, umtxq_sleep will read the clock 783 * after setting td_rtcgen; otherwise, read it here. 784 */ 785 if (!timo->is_abs_real) { 786 abs_timeout_update(timo); 787 } 788 } 789 } 790 791 static void 792 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 793 { 794 795 abs_timeout_init(timo, umtxtime->_clockid, 796 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 797 } 798 799 static inline void 800 abs_timeout_update(struct abs_timeout *timo) 801 { 802 803 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 804 } 805 806 static int 807 abs_timeout_gethz(struct abs_timeout *timo) 808 { 809 struct timespec tts; 810 811 if (timespeccmp(&timo->end, &timo->cur, <=)) 812 return (-1); 813 timespecsub(&timo->end, &timo->cur, &tts); 814 return (tstohz(&tts)); 815 } 816 817 static uint32_t 818 umtx_unlock_val(uint32_t flags, bool rb) 819 { 820 821 if (rb) 822 return (UMUTEX_RB_OWNERDEAD); 823 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 824 return (UMUTEX_RB_NOTRECOV); 825 else 826 return (UMUTEX_UNOWNED); 827 828 } 829 830 /* 831 * Put thread into sleep state, before sleeping, check if 832 * thread was removed from umtx queue. 833 */ 834 static inline int 835 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 836 { 837 struct umtxq_chain *uc; 838 int error, timo; 839 840 if (abstime != NULL && abstime->is_abs_real) { 841 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 842 abs_timeout_update(abstime); 843 } 844 845 uc = umtxq_getchain(&uq->uq_key); 846 UMTXQ_LOCKED_ASSERT(uc); 847 for (;;) { 848 if (!(uq->uq_flags & UQF_UMTXQ)) { 849 error = 0; 850 break; 851 } 852 if (abstime != NULL) { 853 timo = abs_timeout_gethz(abstime); 854 if (timo < 0) { 855 error = ETIMEDOUT; 856 break; 857 } 858 } else 859 timo = 0; 860 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 861 if (error == EINTR || error == ERESTART) { 862 umtxq_lock(&uq->uq_key); 863 break; 864 } 865 if (abstime != NULL) { 866 if (abstime->is_abs_real) 867 curthread->td_rtcgen = 868 atomic_load_acq_int(&rtc_generation); 869 abs_timeout_update(abstime); 870 } 871 umtxq_lock(&uq->uq_key); 872 } 873 874 curthread->td_rtcgen = 0; 875 return (error); 876 } 877 878 /* 879 * Convert userspace address into unique logical address. 880 */ 881 int 882 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 883 { 884 struct thread *td = curthread; 885 vm_map_t map; 886 vm_map_entry_t entry; 887 vm_pindex_t pindex; 888 vm_prot_t prot; 889 boolean_t wired; 890 891 key->type = type; 892 if (share == THREAD_SHARE) { 893 key->shared = 0; 894 key->info.private.vs = td->td_proc->p_vmspace; 895 key->info.private.addr = (uintptr_t)addr; 896 } else { 897 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 898 map = &td->td_proc->p_vmspace->vm_map; 899 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 900 &entry, &key->info.shared.object, &pindex, &prot, 901 &wired) != KERN_SUCCESS) { 902 return (EFAULT); 903 } 904 905 if ((share == PROCESS_SHARE) || 906 (share == AUTO_SHARE && 907 VM_INHERIT_SHARE == entry->inheritance)) { 908 key->shared = 1; 909 key->info.shared.offset = (vm_offset_t)addr - 910 entry->start + entry->offset; 911 vm_object_reference(key->info.shared.object); 912 } else { 913 key->shared = 0; 914 key->info.private.vs = td->td_proc->p_vmspace; 915 key->info.private.addr = (uintptr_t)addr; 916 } 917 vm_map_lookup_done(map, entry); 918 } 919 920 umtxq_hash(key); 921 return (0); 922 } 923 924 /* 925 * Release key. 926 */ 927 void 928 umtx_key_release(struct umtx_key *key) 929 { 930 if (key->shared) 931 vm_object_deallocate(key->info.shared.object); 932 } 933 934 /* 935 * Fetch and compare value, sleep on the address if value is not changed. 936 */ 937 static int 938 do_wait(struct thread *td, void *addr, u_long id, 939 struct _umtx_time *timeout, int compat32, int is_private) 940 { 941 struct abs_timeout timo; 942 struct umtx_q *uq; 943 u_long tmp; 944 uint32_t tmp32; 945 int error = 0; 946 947 uq = td->td_umtxq; 948 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 949 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 950 return (error); 951 952 if (timeout != NULL) 953 abs_timeout_init2(&timo, timeout); 954 955 umtxq_lock(&uq->uq_key); 956 umtxq_insert(uq); 957 umtxq_unlock(&uq->uq_key); 958 if (compat32 == 0) { 959 error = fueword(addr, &tmp); 960 if (error != 0) 961 error = EFAULT; 962 } else { 963 error = fueword32(addr, &tmp32); 964 if (error == 0) 965 tmp = tmp32; 966 else 967 error = EFAULT; 968 } 969 umtxq_lock(&uq->uq_key); 970 if (error == 0) { 971 if (tmp == id) 972 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 973 NULL : &timo); 974 if ((uq->uq_flags & UQF_UMTXQ) == 0) 975 error = 0; 976 else 977 umtxq_remove(uq); 978 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 979 umtxq_remove(uq); 980 } 981 umtxq_unlock(&uq->uq_key); 982 umtx_key_release(&uq->uq_key); 983 if (error == ERESTART) 984 error = EINTR; 985 return (error); 986 } 987 988 /* 989 * Wake up threads sleeping on the specified address. 990 */ 991 int 992 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 993 { 994 struct umtx_key key; 995 int ret; 996 997 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 998 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 999 return (ret); 1000 umtxq_lock(&key); 1001 umtxq_signal(&key, n_wake); 1002 umtxq_unlock(&key); 1003 umtx_key_release(&key); 1004 return (0); 1005 } 1006 1007 /* 1008 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1009 */ 1010 static int 1011 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1012 struct _umtx_time *timeout, int mode) 1013 { 1014 struct abs_timeout timo; 1015 struct umtx_q *uq; 1016 uint32_t owner, old, id; 1017 int error, rv; 1018 1019 id = td->td_tid; 1020 uq = td->td_umtxq; 1021 error = 0; 1022 if (timeout != NULL) 1023 abs_timeout_init2(&timo, timeout); 1024 1025 /* 1026 * Care must be exercised when dealing with umtx structure. It 1027 * can fault on any access. 1028 */ 1029 for (;;) { 1030 rv = fueword32(&m->m_owner, &owner); 1031 if (rv == -1) 1032 return (EFAULT); 1033 if (mode == _UMUTEX_WAIT) { 1034 if (owner == UMUTEX_UNOWNED || 1035 owner == UMUTEX_CONTESTED || 1036 owner == UMUTEX_RB_OWNERDEAD || 1037 owner == UMUTEX_RB_NOTRECOV) 1038 return (0); 1039 } else { 1040 /* 1041 * Robust mutex terminated. Kernel duty is to 1042 * return EOWNERDEAD to the userspace. The 1043 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1044 * by the common userspace code. 1045 */ 1046 if (owner == UMUTEX_RB_OWNERDEAD) { 1047 rv = casueword32(&m->m_owner, 1048 UMUTEX_RB_OWNERDEAD, &owner, 1049 id | UMUTEX_CONTESTED); 1050 if (rv == -1) 1051 return (EFAULT); 1052 if (owner == UMUTEX_RB_OWNERDEAD) 1053 return (EOWNERDEAD); /* success */ 1054 rv = umtxq_check_susp(td); 1055 if (rv != 0) 1056 return (rv); 1057 continue; 1058 } 1059 if (owner == UMUTEX_RB_NOTRECOV) 1060 return (ENOTRECOVERABLE); 1061 1062 1063 /* 1064 * Try the uncontested case. This should be 1065 * done in userland. 1066 */ 1067 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1068 &owner, id); 1069 /* The address was invalid. */ 1070 if (rv == -1) 1071 return (EFAULT); 1072 1073 /* The acquire succeeded. */ 1074 if (owner == UMUTEX_UNOWNED) 1075 return (0); 1076 1077 /* 1078 * If no one owns it but it is contested try 1079 * to acquire it. 1080 */ 1081 if (owner == UMUTEX_CONTESTED) { 1082 rv = casueword32(&m->m_owner, 1083 UMUTEX_CONTESTED, &owner, 1084 id | UMUTEX_CONTESTED); 1085 /* The address was invalid. */ 1086 if (rv == -1) 1087 return (EFAULT); 1088 1089 if (owner == UMUTEX_CONTESTED) 1090 return (0); 1091 1092 rv = umtxq_check_susp(td); 1093 if (rv != 0) 1094 return (rv); 1095 1096 /* 1097 * If this failed the lock has 1098 * changed, restart. 1099 */ 1100 continue; 1101 } 1102 } 1103 1104 if (mode == _UMUTEX_TRY) 1105 return (EBUSY); 1106 1107 /* 1108 * If we caught a signal, we have retried and now 1109 * exit immediately. 1110 */ 1111 if (error != 0) 1112 return (error); 1113 1114 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1115 GET_SHARE(flags), &uq->uq_key)) != 0) 1116 return (error); 1117 1118 umtxq_lock(&uq->uq_key); 1119 umtxq_busy(&uq->uq_key); 1120 umtxq_insert(uq); 1121 umtxq_unlock(&uq->uq_key); 1122 1123 /* 1124 * Set the contested bit so that a release in user space 1125 * knows to use the system call for unlock. If this fails 1126 * either some one else has acquired the lock or it has been 1127 * released. 1128 */ 1129 rv = casueword32(&m->m_owner, owner, &old, 1130 owner | UMUTEX_CONTESTED); 1131 1132 /* The address was invalid. */ 1133 if (rv == -1) { 1134 umtxq_lock(&uq->uq_key); 1135 umtxq_remove(uq); 1136 umtxq_unbusy(&uq->uq_key); 1137 umtxq_unlock(&uq->uq_key); 1138 umtx_key_release(&uq->uq_key); 1139 return (EFAULT); 1140 } 1141 1142 /* 1143 * We set the contested bit, sleep. Otherwise the lock changed 1144 * and we need to retry or we lost a race to the thread 1145 * unlocking the umtx. 1146 */ 1147 umtxq_lock(&uq->uq_key); 1148 umtxq_unbusy(&uq->uq_key); 1149 if (old == owner) 1150 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1151 NULL : &timo); 1152 umtxq_remove(uq); 1153 umtxq_unlock(&uq->uq_key); 1154 umtx_key_release(&uq->uq_key); 1155 1156 if (error == 0) 1157 error = umtxq_check_susp(td); 1158 } 1159 1160 return (0); 1161 } 1162 1163 /* 1164 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1165 */ 1166 static int 1167 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1168 { 1169 struct umtx_key key; 1170 uint32_t owner, old, id, newlock; 1171 int error, count; 1172 1173 id = td->td_tid; 1174 /* 1175 * Make sure we own this mtx. 1176 */ 1177 error = fueword32(&m->m_owner, &owner); 1178 if (error == -1) 1179 return (EFAULT); 1180 1181 if ((owner & ~UMUTEX_CONTESTED) != id) 1182 return (EPERM); 1183 1184 newlock = umtx_unlock_val(flags, rb); 1185 if ((owner & UMUTEX_CONTESTED) == 0) { 1186 error = casueword32(&m->m_owner, owner, &old, newlock); 1187 if (error == -1) 1188 return (EFAULT); 1189 if (old == owner) 1190 return (0); 1191 owner = old; 1192 } 1193 1194 /* We should only ever be in here for contested locks */ 1195 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1196 &key)) != 0) 1197 return (error); 1198 1199 umtxq_lock(&key); 1200 umtxq_busy(&key); 1201 count = umtxq_count(&key); 1202 umtxq_unlock(&key); 1203 1204 /* 1205 * When unlocking the umtx, it must be marked as unowned if 1206 * there is zero or one thread only waiting for it. 1207 * Otherwise, it must be marked as contested. 1208 */ 1209 if (count > 1) 1210 newlock |= UMUTEX_CONTESTED; 1211 error = casueword32(&m->m_owner, owner, &old, newlock); 1212 umtxq_lock(&key); 1213 umtxq_signal(&key, 1); 1214 umtxq_unbusy(&key); 1215 umtxq_unlock(&key); 1216 umtx_key_release(&key); 1217 if (error == -1) 1218 return (EFAULT); 1219 if (old != owner) 1220 return (EINVAL); 1221 return (0); 1222 } 1223 1224 /* 1225 * Check if the mutex is available and wake up a waiter, 1226 * only for simple mutex. 1227 */ 1228 static int 1229 do_wake_umutex(struct thread *td, struct umutex *m) 1230 { 1231 struct umtx_key key; 1232 uint32_t owner; 1233 uint32_t flags; 1234 int error; 1235 int count; 1236 1237 error = fueword32(&m->m_owner, &owner); 1238 if (error == -1) 1239 return (EFAULT); 1240 1241 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1242 owner != UMUTEX_RB_NOTRECOV) 1243 return (0); 1244 1245 error = fueword32(&m->m_flags, &flags); 1246 if (error == -1) 1247 return (EFAULT); 1248 1249 /* We should only ever be in here for contested locks */ 1250 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1251 &key)) != 0) 1252 return (error); 1253 1254 umtxq_lock(&key); 1255 umtxq_busy(&key); 1256 count = umtxq_count(&key); 1257 umtxq_unlock(&key); 1258 1259 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1260 owner != UMUTEX_RB_NOTRECOV) { 1261 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1262 UMUTEX_UNOWNED); 1263 if (error == -1) 1264 error = EFAULT; 1265 } 1266 1267 umtxq_lock(&key); 1268 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1269 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1270 umtxq_signal(&key, 1); 1271 umtxq_unbusy(&key); 1272 umtxq_unlock(&key); 1273 umtx_key_release(&key); 1274 return (error); 1275 } 1276 1277 /* 1278 * Check if the mutex has waiters and tries to fix contention bit. 1279 */ 1280 static int 1281 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1282 { 1283 struct umtx_key key; 1284 uint32_t owner, old; 1285 int type; 1286 int error; 1287 int count; 1288 1289 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1290 UMUTEX_ROBUST)) { 1291 case 0: 1292 case UMUTEX_ROBUST: 1293 type = TYPE_NORMAL_UMUTEX; 1294 break; 1295 case UMUTEX_PRIO_INHERIT: 1296 type = TYPE_PI_UMUTEX; 1297 break; 1298 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1299 type = TYPE_PI_ROBUST_UMUTEX; 1300 break; 1301 case UMUTEX_PRIO_PROTECT: 1302 type = TYPE_PP_UMUTEX; 1303 break; 1304 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1305 type = TYPE_PP_ROBUST_UMUTEX; 1306 break; 1307 default: 1308 return (EINVAL); 1309 } 1310 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1311 return (error); 1312 1313 owner = 0; 1314 umtxq_lock(&key); 1315 umtxq_busy(&key); 1316 count = umtxq_count(&key); 1317 umtxq_unlock(&key); 1318 /* 1319 * Only repair contention bit if there is a waiter, this means the mutex 1320 * is still being referenced by userland code, otherwise don't update 1321 * any memory. 1322 */ 1323 if (count > 1) { 1324 error = fueword32(&m->m_owner, &owner); 1325 if (error == -1) 1326 error = EFAULT; 1327 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1328 error = casueword32(&m->m_owner, owner, &old, 1329 owner | UMUTEX_CONTESTED); 1330 if (error == -1) { 1331 error = EFAULT; 1332 break; 1333 } 1334 if (old == owner) 1335 break; 1336 owner = old; 1337 error = umtxq_check_susp(td); 1338 if (error != 0) 1339 break; 1340 } 1341 } else if (count == 1) { 1342 error = fueword32(&m->m_owner, &owner); 1343 if (error == -1) 1344 error = EFAULT; 1345 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1346 (owner & UMUTEX_CONTESTED) == 0) { 1347 error = casueword32(&m->m_owner, owner, &old, 1348 owner | UMUTEX_CONTESTED); 1349 if (error == -1) { 1350 error = EFAULT; 1351 break; 1352 } 1353 if (old == owner) 1354 break; 1355 owner = old; 1356 error = umtxq_check_susp(td); 1357 if (error != 0) 1358 break; 1359 } 1360 } 1361 umtxq_lock(&key); 1362 if (error == EFAULT) { 1363 umtxq_signal(&key, INT_MAX); 1364 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1365 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1366 umtxq_signal(&key, 1); 1367 umtxq_unbusy(&key); 1368 umtxq_unlock(&key); 1369 umtx_key_release(&key); 1370 return (error); 1371 } 1372 1373 static inline struct umtx_pi * 1374 umtx_pi_alloc(int flags) 1375 { 1376 struct umtx_pi *pi; 1377 1378 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1379 TAILQ_INIT(&pi->pi_blocked); 1380 atomic_add_int(&umtx_pi_allocated, 1); 1381 return (pi); 1382 } 1383 1384 static inline void 1385 umtx_pi_free(struct umtx_pi *pi) 1386 { 1387 uma_zfree(umtx_pi_zone, pi); 1388 atomic_add_int(&umtx_pi_allocated, -1); 1389 } 1390 1391 /* 1392 * Adjust the thread's position on a pi_state after its priority has been 1393 * changed. 1394 */ 1395 static int 1396 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1397 { 1398 struct umtx_q *uq, *uq1, *uq2; 1399 struct thread *td1; 1400 1401 mtx_assert(&umtx_lock, MA_OWNED); 1402 if (pi == NULL) 1403 return (0); 1404 1405 uq = td->td_umtxq; 1406 1407 /* 1408 * Check if the thread needs to be moved on the blocked chain. 1409 * It needs to be moved if either its priority is lower than 1410 * the previous thread or higher than the next thread. 1411 */ 1412 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1413 uq2 = TAILQ_NEXT(uq, uq_lockq); 1414 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1415 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1416 /* 1417 * Remove thread from blocked chain and determine where 1418 * it should be moved to. 1419 */ 1420 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1421 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1422 td1 = uq1->uq_thread; 1423 MPASS(td1->td_proc->p_magic == P_MAGIC); 1424 if (UPRI(td1) > UPRI(td)) 1425 break; 1426 } 1427 1428 if (uq1 == NULL) 1429 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1430 else 1431 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1432 } 1433 return (1); 1434 } 1435 1436 static struct umtx_pi * 1437 umtx_pi_next(struct umtx_pi *pi) 1438 { 1439 struct umtx_q *uq_owner; 1440 1441 if (pi->pi_owner == NULL) 1442 return (NULL); 1443 uq_owner = pi->pi_owner->td_umtxq; 1444 if (uq_owner == NULL) 1445 return (NULL); 1446 return (uq_owner->uq_pi_blocked); 1447 } 1448 1449 /* 1450 * Floyd's Cycle-Finding Algorithm. 1451 */ 1452 static bool 1453 umtx_pi_check_loop(struct umtx_pi *pi) 1454 { 1455 struct umtx_pi *pi1; /* fast iterator */ 1456 1457 mtx_assert(&umtx_lock, MA_OWNED); 1458 if (pi == NULL) 1459 return (false); 1460 pi1 = pi; 1461 for (;;) { 1462 pi = umtx_pi_next(pi); 1463 if (pi == NULL) 1464 break; 1465 pi1 = umtx_pi_next(pi1); 1466 if (pi1 == NULL) 1467 break; 1468 pi1 = umtx_pi_next(pi1); 1469 if (pi1 == NULL) 1470 break; 1471 if (pi == pi1) 1472 return (true); 1473 } 1474 return (false); 1475 } 1476 1477 /* 1478 * Propagate priority when a thread is blocked on POSIX 1479 * PI mutex. 1480 */ 1481 static void 1482 umtx_propagate_priority(struct thread *td) 1483 { 1484 struct umtx_q *uq; 1485 struct umtx_pi *pi; 1486 int pri; 1487 1488 mtx_assert(&umtx_lock, MA_OWNED); 1489 pri = UPRI(td); 1490 uq = td->td_umtxq; 1491 pi = uq->uq_pi_blocked; 1492 if (pi == NULL) 1493 return; 1494 if (umtx_pi_check_loop(pi)) 1495 return; 1496 1497 for (;;) { 1498 td = pi->pi_owner; 1499 if (td == NULL || td == curthread) 1500 return; 1501 1502 MPASS(td->td_proc != NULL); 1503 MPASS(td->td_proc->p_magic == P_MAGIC); 1504 1505 thread_lock(td); 1506 if (td->td_lend_user_pri > pri) 1507 sched_lend_user_prio(td, pri); 1508 else { 1509 thread_unlock(td); 1510 break; 1511 } 1512 thread_unlock(td); 1513 1514 /* 1515 * Pick up the lock that td is blocked on. 1516 */ 1517 uq = td->td_umtxq; 1518 pi = uq->uq_pi_blocked; 1519 if (pi == NULL) 1520 break; 1521 /* Resort td on the list if needed. */ 1522 umtx_pi_adjust_thread(pi, td); 1523 } 1524 } 1525 1526 /* 1527 * Unpropagate priority for a PI mutex when a thread blocked on 1528 * it is interrupted by signal or resumed by others. 1529 */ 1530 static void 1531 umtx_repropagate_priority(struct umtx_pi *pi) 1532 { 1533 struct umtx_q *uq, *uq_owner; 1534 struct umtx_pi *pi2; 1535 int pri; 1536 1537 mtx_assert(&umtx_lock, MA_OWNED); 1538 1539 if (umtx_pi_check_loop(pi)) 1540 return; 1541 while (pi != NULL && pi->pi_owner != NULL) { 1542 pri = PRI_MAX; 1543 uq_owner = pi->pi_owner->td_umtxq; 1544 1545 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1546 uq = TAILQ_FIRST(&pi2->pi_blocked); 1547 if (uq != NULL) { 1548 if (pri > UPRI(uq->uq_thread)) 1549 pri = UPRI(uq->uq_thread); 1550 } 1551 } 1552 1553 if (pri > uq_owner->uq_inherited_pri) 1554 pri = uq_owner->uq_inherited_pri; 1555 thread_lock(pi->pi_owner); 1556 sched_lend_user_prio(pi->pi_owner, pri); 1557 thread_unlock(pi->pi_owner); 1558 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1559 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1560 } 1561 } 1562 1563 /* 1564 * Insert a PI mutex into owned list. 1565 */ 1566 static void 1567 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1568 { 1569 struct umtx_q *uq_owner; 1570 1571 uq_owner = owner->td_umtxq; 1572 mtx_assert(&umtx_lock, MA_OWNED); 1573 MPASS(pi->pi_owner == NULL); 1574 pi->pi_owner = owner; 1575 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1576 } 1577 1578 1579 /* 1580 * Disown a PI mutex, and remove it from the owned list. 1581 */ 1582 static void 1583 umtx_pi_disown(struct umtx_pi *pi) 1584 { 1585 1586 mtx_assert(&umtx_lock, MA_OWNED); 1587 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1588 pi->pi_owner = NULL; 1589 } 1590 1591 /* 1592 * Claim ownership of a PI mutex. 1593 */ 1594 static int 1595 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1596 { 1597 struct umtx_q *uq; 1598 int pri; 1599 1600 mtx_lock(&umtx_lock); 1601 if (pi->pi_owner == owner) { 1602 mtx_unlock(&umtx_lock); 1603 return (0); 1604 } 1605 1606 if (pi->pi_owner != NULL) { 1607 /* 1608 * userland may have already messed the mutex, sigh. 1609 */ 1610 mtx_unlock(&umtx_lock); 1611 return (EPERM); 1612 } 1613 umtx_pi_setowner(pi, owner); 1614 uq = TAILQ_FIRST(&pi->pi_blocked); 1615 if (uq != NULL) { 1616 pri = UPRI(uq->uq_thread); 1617 thread_lock(owner); 1618 if (pri < UPRI(owner)) 1619 sched_lend_user_prio(owner, pri); 1620 thread_unlock(owner); 1621 } 1622 mtx_unlock(&umtx_lock); 1623 return (0); 1624 } 1625 1626 /* 1627 * Adjust a thread's order position in its blocked PI mutex, 1628 * this may result new priority propagating process. 1629 */ 1630 void 1631 umtx_pi_adjust(struct thread *td, u_char oldpri) 1632 { 1633 struct umtx_q *uq; 1634 struct umtx_pi *pi; 1635 1636 uq = td->td_umtxq; 1637 mtx_lock(&umtx_lock); 1638 /* 1639 * Pick up the lock that td is blocked on. 1640 */ 1641 pi = uq->uq_pi_blocked; 1642 if (pi != NULL) { 1643 umtx_pi_adjust_thread(pi, td); 1644 umtx_repropagate_priority(pi); 1645 } 1646 mtx_unlock(&umtx_lock); 1647 } 1648 1649 /* 1650 * Sleep on a PI mutex. 1651 */ 1652 static int 1653 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1654 const char *wmesg, struct abs_timeout *timo, bool shared) 1655 { 1656 struct thread *td, *td1; 1657 struct umtx_q *uq1; 1658 int error, pri; 1659 #ifdef INVARIANTS 1660 struct umtxq_chain *uc; 1661 1662 uc = umtxq_getchain(&pi->pi_key); 1663 #endif 1664 error = 0; 1665 td = uq->uq_thread; 1666 KASSERT(td == curthread, ("inconsistent uq_thread")); 1667 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1668 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1669 umtxq_insert(uq); 1670 mtx_lock(&umtx_lock); 1671 if (pi->pi_owner == NULL) { 1672 mtx_unlock(&umtx_lock); 1673 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1674 mtx_lock(&umtx_lock); 1675 if (td1 != NULL) { 1676 if (pi->pi_owner == NULL) 1677 umtx_pi_setowner(pi, td1); 1678 PROC_UNLOCK(td1->td_proc); 1679 } 1680 } 1681 1682 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1683 pri = UPRI(uq1->uq_thread); 1684 if (pri > UPRI(td)) 1685 break; 1686 } 1687 1688 if (uq1 != NULL) 1689 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1690 else 1691 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1692 1693 uq->uq_pi_blocked = pi; 1694 thread_lock(td); 1695 td->td_flags |= TDF_UPIBLOCKED; 1696 thread_unlock(td); 1697 umtx_propagate_priority(td); 1698 mtx_unlock(&umtx_lock); 1699 umtxq_unbusy(&uq->uq_key); 1700 1701 error = umtxq_sleep(uq, wmesg, timo); 1702 umtxq_remove(uq); 1703 1704 mtx_lock(&umtx_lock); 1705 uq->uq_pi_blocked = NULL; 1706 thread_lock(td); 1707 td->td_flags &= ~TDF_UPIBLOCKED; 1708 thread_unlock(td); 1709 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1710 umtx_repropagate_priority(pi); 1711 mtx_unlock(&umtx_lock); 1712 umtxq_unlock(&uq->uq_key); 1713 1714 return (error); 1715 } 1716 1717 /* 1718 * Add reference count for a PI mutex. 1719 */ 1720 static void 1721 umtx_pi_ref(struct umtx_pi *pi) 1722 { 1723 1724 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1725 pi->pi_refcount++; 1726 } 1727 1728 /* 1729 * Decrease reference count for a PI mutex, if the counter 1730 * is decreased to zero, its memory space is freed. 1731 */ 1732 static void 1733 umtx_pi_unref(struct umtx_pi *pi) 1734 { 1735 struct umtxq_chain *uc; 1736 1737 uc = umtxq_getchain(&pi->pi_key); 1738 UMTXQ_LOCKED_ASSERT(uc); 1739 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1740 if (--pi->pi_refcount == 0) { 1741 mtx_lock(&umtx_lock); 1742 if (pi->pi_owner != NULL) 1743 umtx_pi_disown(pi); 1744 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1745 ("blocked queue not empty")); 1746 mtx_unlock(&umtx_lock); 1747 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1748 umtx_pi_free(pi); 1749 } 1750 } 1751 1752 /* 1753 * Find a PI mutex in hash table. 1754 */ 1755 static struct umtx_pi * 1756 umtx_pi_lookup(struct umtx_key *key) 1757 { 1758 struct umtxq_chain *uc; 1759 struct umtx_pi *pi; 1760 1761 uc = umtxq_getchain(key); 1762 UMTXQ_LOCKED_ASSERT(uc); 1763 1764 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1765 if (umtx_key_match(&pi->pi_key, key)) { 1766 return (pi); 1767 } 1768 } 1769 return (NULL); 1770 } 1771 1772 /* 1773 * Insert a PI mutex into hash table. 1774 */ 1775 static inline void 1776 umtx_pi_insert(struct umtx_pi *pi) 1777 { 1778 struct umtxq_chain *uc; 1779 1780 uc = umtxq_getchain(&pi->pi_key); 1781 UMTXQ_LOCKED_ASSERT(uc); 1782 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1783 } 1784 1785 /* 1786 * Lock a PI mutex. 1787 */ 1788 static int 1789 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1790 struct _umtx_time *timeout, int try) 1791 { 1792 struct abs_timeout timo; 1793 struct umtx_q *uq; 1794 struct umtx_pi *pi, *new_pi; 1795 uint32_t id, old_owner, owner, old; 1796 int error, rv; 1797 1798 id = td->td_tid; 1799 uq = td->td_umtxq; 1800 1801 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1802 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1803 &uq->uq_key)) != 0) 1804 return (error); 1805 1806 if (timeout != NULL) 1807 abs_timeout_init2(&timo, timeout); 1808 1809 umtxq_lock(&uq->uq_key); 1810 pi = umtx_pi_lookup(&uq->uq_key); 1811 if (pi == NULL) { 1812 new_pi = umtx_pi_alloc(M_NOWAIT); 1813 if (new_pi == NULL) { 1814 umtxq_unlock(&uq->uq_key); 1815 new_pi = umtx_pi_alloc(M_WAITOK); 1816 umtxq_lock(&uq->uq_key); 1817 pi = umtx_pi_lookup(&uq->uq_key); 1818 if (pi != NULL) { 1819 umtx_pi_free(new_pi); 1820 new_pi = NULL; 1821 } 1822 } 1823 if (new_pi != NULL) { 1824 new_pi->pi_key = uq->uq_key; 1825 umtx_pi_insert(new_pi); 1826 pi = new_pi; 1827 } 1828 } 1829 umtx_pi_ref(pi); 1830 umtxq_unlock(&uq->uq_key); 1831 1832 /* 1833 * Care must be exercised when dealing with umtx structure. It 1834 * can fault on any access. 1835 */ 1836 for (;;) { 1837 /* 1838 * Try the uncontested case. This should be done in userland. 1839 */ 1840 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1841 /* The address was invalid. */ 1842 if (rv == -1) { 1843 error = EFAULT; 1844 break; 1845 } 1846 1847 /* The acquire succeeded. */ 1848 if (owner == UMUTEX_UNOWNED) { 1849 error = 0; 1850 break; 1851 } 1852 1853 if (owner == UMUTEX_RB_NOTRECOV) { 1854 error = ENOTRECOVERABLE; 1855 break; 1856 } 1857 1858 /* If no one owns it but it is contested try to acquire it. */ 1859 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1860 old_owner = owner; 1861 rv = casueword32(&m->m_owner, owner, &owner, 1862 id | UMUTEX_CONTESTED); 1863 /* The address was invalid. */ 1864 if (rv == -1) { 1865 error = EFAULT; 1866 break; 1867 } 1868 1869 if (owner == old_owner) { 1870 umtxq_lock(&uq->uq_key); 1871 umtxq_busy(&uq->uq_key); 1872 error = umtx_pi_claim(pi, td); 1873 umtxq_unbusy(&uq->uq_key); 1874 umtxq_unlock(&uq->uq_key); 1875 if (error != 0) { 1876 /* 1877 * Since we're going to return an 1878 * error, restore the m_owner to its 1879 * previous, unowned state to avoid 1880 * compounding the problem. 1881 */ 1882 (void)casuword32(&m->m_owner, 1883 id | UMUTEX_CONTESTED, 1884 old_owner); 1885 } 1886 if (error == 0 && 1887 old_owner == UMUTEX_RB_OWNERDEAD) 1888 error = EOWNERDEAD; 1889 break; 1890 } 1891 1892 error = umtxq_check_susp(td); 1893 if (error != 0) 1894 break; 1895 1896 /* If this failed the lock has changed, restart. */ 1897 continue; 1898 } 1899 1900 if ((owner & ~UMUTEX_CONTESTED) == id) { 1901 error = EDEADLK; 1902 break; 1903 } 1904 1905 if (try != 0) { 1906 error = EBUSY; 1907 break; 1908 } 1909 1910 /* 1911 * If we caught a signal, we have retried and now 1912 * exit immediately. 1913 */ 1914 if (error != 0) 1915 break; 1916 1917 umtxq_lock(&uq->uq_key); 1918 umtxq_busy(&uq->uq_key); 1919 umtxq_unlock(&uq->uq_key); 1920 1921 /* 1922 * Set the contested bit so that a release in user space 1923 * knows to use the system call for unlock. If this fails 1924 * either some one else has acquired the lock or it has been 1925 * released. 1926 */ 1927 rv = casueword32(&m->m_owner, owner, &old, owner | 1928 UMUTEX_CONTESTED); 1929 1930 /* The address was invalid. */ 1931 if (rv == -1) { 1932 umtxq_unbusy_unlocked(&uq->uq_key); 1933 error = EFAULT; 1934 break; 1935 } 1936 1937 umtxq_lock(&uq->uq_key); 1938 /* 1939 * We set the contested bit, sleep. Otherwise the lock changed 1940 * and we need to retry or we lost a race to the thread 1941 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1942 * value for owner is impossible there. 1943 */ 1944 if (old == owner) { 1945 error = umtxq_sleep_pi(uq, pi, 1946 owner & ~UMUTEX_CONTESTED, 1947 "umtxpi", timeout == NULL ? NULL : &timo, 1948 (flags & USYNC_PROCESS_SHARED) != 0); 1949 if (error != 0) 1950 continue; 1951 } else { 1952 umtxq_unbusy(&uq->uq_key); 1953 umtxq_unlock(&uq->uq_key); 1954 } 1955 1956 error = umtxq_check_susp(td); 1957 if (error != 0) 1958 break; 1959 } 1960 1961 umtxq_lock(&uq->uq_key); 1962 umtx_pi_unref(pi); 1963 umtxq_unlock(&uq->uq_key); 1964 1965 umtx_key_release(&uq->uq_key); 1966 return (error); 1967 } 1968 1969 /* 1970 * Unlock a PI mutex. 1971 */ 1972 static int 1973 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1974 { 1975 struct umtx_key key; 1976 struct umtx_q *uq_first, *uq_first2, *uq_me; 1977 struct umtx_pi *pi, *pi2; 1978 uint32_t id, new_owner, old, owner; 1979 int count, error, pri; 1980 1981 id = td->td_tid; 1982 /* 1983 * Make sure we own this mtx. 1984 */ 1985 error = fueword32(&m->m_owner, &owner); 1986 if (error == -1) 1987 return (EFAULT); 1988 1989 if ((owner & ~UMUTEX_CONTESTED) != id) 1990 return (EPERM); 1991 1992 new_owner = umtx_unlock_val(flags, rb); 1993 1994 /* This should be done in userland */ 1995 if ((owner & UMUTEX_CONTESTED) == 0) { 1996 error = casueword32(&m->m_owner, owner, &old, new_owner); 1997 if (error == -1) 1998 return (EFAULT); 1999 if (old == owner) 2000 return (0); 2001 owner = old; 2002 } 2003 2004 /* We should only ever be in here for contested locks */ 2005 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2006 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2007 &key)) != 0) 2008 return (error); 2009 2010 umtxq_lock(&key); 2011 umtxq_busy(&key); 2012 count = umtxq_count_pi(&key, &uq_first); 2013 if (uq_first != NULL) { 2014 mtx_lock(&umtx_lock); 2015 pi = uq_first->uq_pi_blocked; 2016 KASSERT(pi != NULL, ("pi == NULL?")); 2017 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2018 mtx_unlock(&umtx_lock); 2019 umtxq_unbusy(&key); 2020 umtxq_unlock(&key); 2021 umtx_key_release(&key); 2022 /* userland messed the mutex */ 2023 return (EPERM); 2024 } 2025 uq_me = td->td_umtxq; 2026 if (pi->pi_owner == td) 2027 umtx_pi_disown(pi); 2028 /* get highest priority thread which is still sleeping. */ 2029 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2030 while (uq_first != NULL && 2031 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2032 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2033 } 2034 pri = PRI_MAX; 2035 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2036 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2037 if (uq_first2 != NULL) { 2038 if (pri > UPRI(uq_first2->uq_thread)) 2039 pri = UPRI(uq_first2->uq_thread); 2040 } 2041 } 2042 thread_lock(td); 2043 sched_lend_user_prio(td, pri); 2044 thread_unlock(td); 2045 mtx_unlock(&umtx_lock); 2046 if (uq_first) 2047 umtxq_signal_thread(uq_first); 2048 } else { 2049 pi = umtx_pi_lookup(&key); 2050 /* 2051 * A umtx_pi can exist if a signal or timeout removed the 2052 * last waiter from the umtxq, but there is still 2053 * a thread in do_lock_pi() holding the umtx_pi. 2054 */ 2055 if (pi != NULL) { 2056 /* 2057 * The umtx_pi can be unowned, such as when a thread 2058 * has just entered do_lock_pi(), allocated the 2059 * umtx_pi, and unlocked the umtxq. 2060 * If the current thread owns it, it must disown it. 2061 */ 2062 mtx_lock(&umtx_lock); 2063 if (pi->pi_owner == td) 2064 umtx_pi_disown(pi); 2065 mtx_unlock(&umtx_lock); 2066 } 2067 } 2068 umtxq_unlock(&key); 2069 2070 /* 2071 * When unlocking the umtx, it must be marked as unowned if 2072 * there is zero or one thread only waiting for it. 2073 * Otherwise, it must be marked as contested. 2074 */ 2075 2076 if (count > 1) 2077 new_owner |= UMUTEX_CONTESTED; 2078 error = casueword32(&m->m_owner, owner, &old, new_owner); 2079 2080 umtxq_unbusy_unlocked(&key); 2081 umtx_key_release(&key); 2082 if (error == -1) 2083 return (EFAULT); 2084 if (old != owner) 2085 return (EINVAL); 2086 return (0); 2087 } 2088 2089 /* 2090 * Lock a PP mutex. 2091 */ 2092 static int 2093 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2094 struct _umtx_time *timeout, int try) 2095 { 2096 struct abs_timeout timo; 2097 struct umtx_q *uq, *uq2; 2098 struct umtx_pi *pi; 2099 uint32_t ceiling; 2100 uint32_t owner, id; 2101 int error, pri, old_inherited_pri, su, rv; 2102 2103 id = td->td_tid; 2104 uq = td->td_umtxq; 2105 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2106 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2107 &uq->uq_key)) != 0) 2108 return (error); 2109 2110 if (timeout != NULL) 2111 abs_timeout_init2(&timo, timeout); 2112 2113 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2114 for (;;) { 2115 old_inherited_pri = uq->uq_inherited_pri; 2116 umtxq_lock(&uq->uq_key); 2117 umtxq_busy(&uq->uq_key); 2118 umtxq_unlock(&uq->uq_key); 2119 2120 rv = fueword32(&m->m_ceilings[0], &ceiling); 2121 if (rv == -1) { 2122 error = EFAULT; 2123 goto out; 2124 } 2125 ceiling = RTP_PRIO_MAX - ceiling; 2126 if (ceiling > RTP_PRIO_MAX) { 2127 error = EINVAL; 2128 goto out; 2129 } 2130 2131 mtx_lock(&umtx_lock); 2132 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2133 mtx_unlock(&umtx_lock); 2134 error = EINVAL; 2135 goto out; 2136 } 2137 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2138 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2139 thread_lock(td); 2140 if (uq->uq_inherited_pri < UPRI(td)) 2141 sched_lend_user_prio(td, uq->uq_inherited_pri); 2142 thread_unlock(td); 2143 } 2144 mtx_unlock(&umtx_lock); 2145 2146 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2147 id | UMUTEX_CONTESTED); 2148 /* The address was invalid. */ 2149 if (rv == -1) { 2150 error = EFAULT; 2151 break; 2152 } 2153 2154 if (owner == UMUTEX_CONTESTED) { 2155 error = 0; 2156 break; 2157 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2158 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2159 &owner, id | UMUTEX_CONTESTED); 2160 if (rv == -1) { 2161 error = EFAULT; 2162 break; 2163 } 2164 if (owner == UMUTEX_RB_OWNERDEAD) { 2165 error = EOWNERDEAD; /* success */ 2166 break; 2167 } 2168 error = 0; 2169 } else if (owner == UMUTEX_RB_NOTRECOV) { 2170 error = ENOTRECOVERABLE; 2171 break; 2172 } 2173 2174 if (try != 0) { 2175 error = EBUSY; 2176 break; 2177 } 2178 2179 /* 2180 * If we caught a signal, we have retried and now 2181 * exit immediately. 2182 */ 2183 if (error != 0) 2184 break; 2185 2186 umtxq_lock(&uq->uq_key); 2187 umtxq_insert(uq); 2188 umtxq_unbusy(&uq->uq_key); 2189 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2190 NULL : &timo); 2191 umtxq_remove(uq); 2192 umtxq_unlock(&uq->uq_key); 2193 2194 mtx_lock(&umtx_lock); 2195 uq->uq_inherited_pri = old_inherited_pri; 2196 pri = PRI_MAX; 2197 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2198 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2199 if (uq2 != NULL) { 2200 if (pri > UPRI(uq2->uq_thread)) 2201 pri = UPRI(uq2->uq_thread); 2202 } 2203 } 2204 if (pri > uq->uq_inherited_pri) 2205 pri = uq->uq_inherited_pri; 2206 thread_lock(td); 2207 sched_lend_user_prio(td, pri); 2208 thread_unlock(td); 2209 mtx_unlock(&umtx_lock); 2210 } 2211 2212 if (error != 0 && error != EOWNERDEAD) { 2213 mtx_lock(&umtx_lock); 2214 uq->uq_inherited_pri = old_inherited_pri; 2215 pri = PRI_MAX; 2216 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2217 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2218 if (uq2 != NULL) { 2219 if (pri > UPRI(uq2->uq_thread)) 2220 pri = UPRI(uq2->uq_thread); 2221 } 2222 } 2223 if (pri > uq->uq_inherited_pri) 2224 pri = uq->uq_inherited_pri; 2225 thread_lock(td); 2226 sched_lend_user_prio(td, pri); 2227 thread_unlock(td); 2228 mtx_unlock(&umtx_lock); 2229 } 2230 2231 out: 2232 umtxq_unbusy_unlocked(&uq->uq_key); 2233 umtx_key_release(&uq->uq_key); 2234 return (error); 2235 } 2236 2237 /* 2238 * Unlock a PP mutex. 2239 */ 2240 static int 2241 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2242 { 2243 struct umtx_key key; 2244 struct umtx_q *uq, *uq2; 2245 struct umtx_pi *pi; 2246 uint32_t id, owner, rceiling; 2247 int error, pri, new_inherited_pri, su; 2248 2249 id = td->td_tid; 2250 uq = td->td_umtxq; 2251 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2252 2253 /* 2254 * Make sure we own this mtx. 2255 */ 2256 error = fueword32(&m->m_owner, &owner); 2257 if (error == -1) 2258 return (EFAULT); 2259 2260 if ((owner & ~UMUTEX_CONTESTED) != id) 2261 return (EPERM); 2262 2263 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2264 if (error != 0) 2265 return (error); 2266 2267 if (rceiling == -1) 2268 new_inherited_pri = PRI_MAX; 2269 else { 2270 rceiling = RTP_PRIO_MAX - rceiling; 2271 if (rceiling > RTP_PRIO_MAX) 2272 return (EINVAL); 2273 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2274 } 2275 2276 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2277 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2278 &key)) != 0) 2279 return (error); 2280 umtxq_lock(&key); 2281 umtxq_busy(&key); 2282 umtxq_unlock(&key); 2283 /* 2284 * For priority protected mutex, always set unlocked state 2285 * to UMUTEX_CONTESTED, so that userland always enters kernel 2286 * to lock the mutex, it is necessary because thread priority 2287 * has to be adjusted for such mutex. 2288 */ 2289 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2290 UMUTEX_CONTESTED); 2291 2292 umtxq_lock(&key); 2293 if (error == 0) 2294 umtxq_signal(&key, 1); 2295 umtxq_unbusy(&key); 2296 umtxq_unlock(&key); 2297 2298 if (error == -1) 2299 error = EFAULT; 2300 else { 2301 mtx_lock(&umtx_lock); 2302 if (su != 0) 2303 uq->uq_inherited_pri = new_inherited_pri; 2304 pri = PRI_MAX; 2305 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2306 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2307 if (uq2 != NULL) { 2308 if (pri > UPRI(uq2->uq_thread)) 2309 pri = UPRI(uq2->uq_thread); 2310 } 2311 } 2312 if (pri > uq->uq_inherited_pri) 2313 pri = uq->uq_inherited_pri; 2314 thread_lock(td); 2315 sched_lend_user_prio(td, pri); 2316 thread_unlock(td); 2317 mtx_unlock(&umtx_lock); 2318 } 2319 umtx_key_release(&key); 2320 return (error); 2321 } 2322 2323 static int 2324 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2325 uint32_t *old_ceiling) 2326 { 2327 struct umtx_q *uq; 2328 uint32_t flags, id, owner, save_ceiling; 2329 int error, rv, rv1; 2330 2331 error = fueword32(&m->m_flags, &flags); 2332 if (error == -1) 2333 return (EFAULT); 2334 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2335 return (EINVAL); 2336 if (ceiling > RTP_PRIO_MAX) 2337 return (EINVAL); 2338 id = td->td_tid; 2339 uq = td->td_umtxq; 2340 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2341 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2342 &uq->uq_key)) != 0) 2343 return (error); 2344 for (;;) { 2345 umtxq_lock(&uq->uq_key); 2346 umtxq_busy(&uq->uq_key); 2347 umtxq_unlock(&uq->uq_key); 2348 2349 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2350 if (rv == -1) { 2351 error = EFAULT; 2352 break; 2353 } 2354 2355 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2356 id | UMUTEX_CONTESTED); 2357 if (rv == -1) { 2358 error = EFAULT; 2359 break; 2360 } 2361 2362 if (owner == UMUTEX_CONTESTED) { 2363 rv = suword32(&m->m_ceilings[0], ceiling); 2364 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2365 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2366 break; 2367 } 2368 2369 if ((owner & ~UMUTEX_CONTESTED) == id) { 2370 rv = suword32(&m->m_ceilings[0], ceiling); 2371 error = rv == 0 ? 0 : EFAULT; 2372 break; 2373 } 2374 2375 if (owner == UMUTEX_RB_OWNERDEAD) { 2376 error = EOWNERDEAD; 2377 break; 2378 } else if (owner == UMUTEX_RB_NOTRECOV) { 2379 error = ENOTRECOVERABLE; 2380 break; 2381 } 2382 2383 /* 2384 * If we caught a signal, we have retried and now 2385 * exit immediately. 2386 */ 2387 if (error != 0) 2388 break; 2389 2390 /* 2391 * We set the contested bit, sleep. Otherwise the lock changed 2392 * and we need to retry or we lost a race to the thread 2393 * unlocking the umtx. 2394 */ 2395 umtxq_lock(&uq->uq_key); 2396 umtxq_insert(uq); 2397 umtxq_unbusy(&uq->uq_key); 2398 error = umtxq_sleep(uq, "umtxpp", NULL); 2399 umtxq_remove(uq); 2400 umtxq_unlock(&uq->uq_key); 2401 } 2402 umtxq_lock(&uq->uq_key); 2403 if (error == 0) 2404 umtxq_signal(&uq->uq_key, INT_MAX); 2405 umtxq_unbusy(&uq->uq_key); 2406 umtxq_unlock(&uq->uq_key); 2407 umtx_key_release(&uq->uq_key); 2408 if (error == 0 && old_ceiling != NULL) { 2409 rv = suword32(old_ceiling, save_ceiling); 2410 error = rv == 0 ? 0 : EFAULT; 2411 } 2412 return (error); 2413 } 2414 2415 /* 2416 * Lock a userland POSIX mutex. 2417 */ 2418 static int 2419 do_lock_umutex(struct thread *td, struct umutex *m, 2420 struct _umtx_time *timeout, int mode) 2421 { 2422 uint32_t flags; 2423 int error; 2424 2425 error = fueword32(&m->m_flags, &flags); 2426 if (error == -1) 2427 return (EFAULT); 2428 2429 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2430 case 0: 2431 error = do_lock_normal(td, m, flags, timeout, mode); 2432 break; 2433 case UMUTEX_PRIO_INHERIT: 2434 error = do_lock_pi(td, m, flags, timeout, mode); 2435 break; 2436 case UMUTEX_PRIO_PROTECT: 2437 error = do_lock_pp(td, m, flags, timeout, mode); 2438 break; 2439 default: 2440 return (EINVAL); 2441 } 2442 if (timeout == NULL) { 2443 if (error == EINTR && mode != _UMUTEX_WAIT) 2444 error = ERESTART; 2445 } else { 2446 /* Timed-locking is not restarted. */ 2447 if (error == ERESTART) 2448 error = EINTR; 2449 } 2450 return (error); 2451 } 2452 2453 /* 2454 * Unlock a userland POSIX mutex. 2455 */ 2456 static int 2457 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2458 { 2459 uint32_t flags; 2460 int error; 2461 2462 error = fueword32(&m->m_flags, &flags); 2463 if (error == -1) 2464 return (EFAULT); 2465 2466 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2467 case 0: 2468 return (do_unlock_normal(td, m, flags, rb)); 2469 case UMUTEX_PRIO_INHERIT: 2470 return (do_unlock_pi(td, m, flags, rb)); 2471 case UMUTEX_PRIO_PROTECT: 2472 return (do_unlock_pp(td, m, flags, rb)); 2473 } 2474 2475 return (EINVAL); 2476 } 2477 2478 static int 2479 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2480 struct timespec *timeout, u_long wflags) 2481 { 2482 struct abs_timeout timo; 2483 struct umtx_q *uq; 2484 uint32_t flags, clockid, hasw; 2485 int error; 2486 2487 uq = td->td_umtxq; 2488 error = fueword32(&cv->c_flags, &flags); 2489 if (error == -1) 2490 return (EFAULT); 2491 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2492 if (error != 0) 2493 return (error); 2494 2495 if ((wflags & CVWAIT_CLOCKID) != 0) { 2496 error = fueword32(&cv->c_clockid, &clockid); 2497 if (error == -1) { 2498 umtx_key_release(&uq->uq_key); 2499 return (EFAULT); 2500 } 2501 if (clockid < CLOCK_REALTIME || 2502 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2503 /* hmm, only HW clock id will work. */ 2504 umtx_key_release(&uq->uq_key); 2505 return (EINVAL); 2506 } 2507 } else { 2508 clockid = CLOCK_REALTIME; 2509 } 2510 2511 umtxq_lock(&uq->uq_key); 2512 umtxq_busy(&uq->uq_key); 2513 umtxq_insert(uq); 2514 umtxq_unlock(&uq->uq_key); 2515 2516 /* 2517 * Set c_has_waiters to 1 before releasing user mutex, also 2518 * don't modify cache line when unnecessary. 2519 */ 2520 error = fueword32(&cv->c_has_waiters, &hasw); 2521 if (error == 0 && hasw == 0) 2522 suword32(&cv->c_has_waiters, 1); 2523 2524 umtxq_unbusy_unlocked(&uq->uq_key); 2525 2526 error = do_unlock_umutex(td, m, false); 2527 2528 if (timeout != NULL) 2529 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2530 timeout); 2531 2532 umtxq_lock(&uq->uq_key); 2533 if (error == 0) { 2534 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2535 NULL : &timo); 2536 } 2537 2538 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2539 error = 0; 2540 else { 2541 /* 2542 * This must be timeout,interrupted by signal or 2543 * surprious wakeup, clear c_has_waiter flag when 2544 * necessary. 2545 */ 2546 umtxq_busy(&uq->uq_key); 2547 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2548 int oldlen = uq->uq_cur_queue->length; 2549 umtxq_remove(uq); 2550 if (oldlen == 1) { 2551 umtxq_unlock(&uq->uq_key); 2552 suword32(&cv->c_has_waiters, 0); 2553 umtxq_lock(&uq->uq_key); 2554 } 2555 } 2556 umtxq_unbusy(&uq->uq_key); 2557 if (error == ERESTART) 2558 error = EINTR; 2559 } 2560 2561 umtxq_unlock(&uq->uq_key); 2562 umtx_key_release(&uq->uq_key); 2563 return (error); 2564 } 2565 2566 /* 2567 * Signal a userland condition variable. 2568 */ 2569 static int 2570 do_cv_signal(struct thread *td, struct ucond *cv) 2571 { 2572 struct umtx_key key; 2573 int error, cnt, nwake; 2574 uint32_t flags; 2575 2576 error = fueword32(&cv->c_flags, &flags); 2577 if (error == -1) 2578 return (EFAULT); 2579 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2580 return (error); 2581 umtxq_lock(&key); 2582 umtxq_busy(&key); 2583 cnt = umtxq_count(&key); 2584 nwake = umtxq_signal(&key, 1); 2585 if (cnt <= nwake) { 2586 umtxq_unlock(&key); 2587 error = suword32(&cv->c_has_waiters, 0); 2588 if (error == -1) 2589 error = EFAULT; 2590 umtxq_lock(&key); 2591 } 2592 umtxq_unbusy(&key); 2593 umtxq_unlock(&key); 2594 umtx_key_release(&key); 2595 return (error); 2596 } 2597 2598 static int 2599 do_cv_broadcast(struct thread *td, struct ucond *cv) 2600 { 2601 struct umtx_key key; 2602 int error; 2603 uint32_t flags; 2604 2605 error = fueword32(&cv->c_flags, &flags); 2606 if (error == -1) 2607 return (EFAULT); 2608 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2609 return (error); 2610 2611 umtxq_lock(&key); 2612 umtxq_busy(&key); 2613 umtxq_signal(&key, INT_MAX); 2614 umtxq_unlock(&key); 2615 2616 error = suword32(&cv->c_has_waiters, 0); 2617 if (error == -1) 2618 error = EFAULT; 2619 2620 umtxq_unbusy_unlocked(&key); 2621 2622 umtx_key_release(&key); 2623 return (error); 2624 } 2625 2626 static int 2627 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2628 { 2629 struct abs_timeout timo; 2630 struct umtx_q *uq; 2631 uint32_t flags, wrflags; 2632 int32_t state, oldstate; 2633 int32_t blocked_readers; 2634 int error, error1, rv; 2635 2636 uq = td->td_umtxq; 2637 error = fueword32(&rwlock->rw_flags, &flags); 2638 if (error == -1) 2639 return (EFAULT); 2640 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2641 if (error != 0) 2642 return (error); 2643 2644 if (timeout != NULL) 2645 abs_timeout_init2(&timo, timeout); 2646 2647 wrflags = URWLOCK_WRITE_OWNER; 2648 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2649 wrflags |= URWLOCK_WRITE_WAITERS; 2650 2651 for (;;) { 2652 rv = fueword32(&rwlock->rw_state, &state); 2653 if (rv == -1) { 2654 umtx_key_release(&uq->uq_key); 2655 return (EFAULT); 2656 } 2657 2658 /* try to lock it */ 2659 while (!(state & wrflags)) { 2660 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2661 umtx_key_release(&uq->uq_key); 2662 return (EAGAIN); 2663 } 2664 rv = casueword32(&rwlock->rw_state, state, 2665 &oldstate, state + 1); 2666 if (rv == -1) { 2667 umtx_key_release(&uq->uq_key); 2668 return (EFAULT); 2669 } 2670 if (oldstate == state) { 2671 umtx_key_release(&uq->uq_key); 2672 return (0); 2673 } 2674 error = umtxq_check_susp(td); 2675 if (error != 0) 2676 break; 2677 state = oldstate; 2678 } 2679 2680 if (error) 2681 break; 2682 2683 /* grab monitor lock */ 2684 umtxq_lock(&uq->uq_key); 2685 umtxq_busy(&uq->uq_key); 2686 umtxq_unlock(&uq->uq_key); 2687 2688 /* 2689 * re-read the state, in case it changed between the try-lock above 2690 * and the check below 2691 */ 2692 rv = fueword32(&rwlock->rw_state, &state); 2693 if (rv == -1) 2694 error = EFAULT; 2695 2696 /* set read contention bit */ 2697 while (error == 0 && (state & wrflags) && 2698 !(state & URWLOCK_READ_WAITERS)) { 2699 rv = casueword32(&rwlock->rw_state, state, 2700 &oldstate, state | URWLOCK_READ_WAITERS); 2701 if (rv == -1) { 2702 error = EFAULT; 2703 break; 2704 } 2705 if (oldstate == state) 2706 goto sleep; 2707 state = oldstate; 2708 error = umtxq_check_susp(td); 2709 if (error != 0) 2710 break; 2711 } 2712 if (error != 0) { 2713 umtxq_unbusy_unlocked(&uq->uq_key); 2714 break; 2715 } 2716 2717 /* state is changed while setting flags, restart */ 2718 if (!(state & wrflags)) { 2719 umtxq_unbusy_unlocked(&uq->uq_key); 2720 error = umtxq_check_susp(td); 2721 if (error != 0) 2722 break; 2723 continue; 2724 } 2725 2726 sleep: 2727 /* contention bit is set, before sleeping, increase read waiter count */ 2728 rv = fueword32(&rwlock->rw_blocked_readers, 2729 &blocked_readers); 2730 if (rv == -1) { 2731 umtxq_unbusy_unlocked(&uq->uq_key); 2732 error = EFAULT; 2733 break; 2734 } 2735 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2736 2737 while (state & wrflags) { 2738 umtxq_lock(&uq->uq_key); 2739 umtxq_insert(uq); 2740 umtxq_unbusy(&uq->uq_key); 2741 2742 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2743 NULL : &timo); 2744 2745 umtxq_busy(&uq->uq_key); 2746 umtxq_remove(uq); 2747 umtxq_unlock(&uq->uq_key); 2748 if (error) 2749 break; 2750 rv = fueword32(&rwlock->rw_state, &state); 2751 if (rv == -1) { 2752 error = EFAULT; 2753 break; 2754 } 2755 } 2756 2757 /* decrease read waiter count, and may clear read contention bit */ 2758 rv = fueword32(&rwlock->rw_blocked_readers, 2759 &blocked_readers); 2760 if (rv == -1) { 2761 umtxq_unbusy_unlocked(&uq->uq_key); 2762 error = EFAULT; 2763 break; 2764 } 2765 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2766 if (blocked_readers == 1) { 2767 rv = fueword32(&rwlock->rw_state, &state); 2768 if (rv == -1) { 2769 umtxq_unbusy_unlocked(&uq->uq_key); 2770 error = EFAULT; 2771 break; 2772 } 2773 for (;;) { 2774 rv = casueword32(&rwlock->rw_state, state, 2775 &oldstate, state & ~URWLOCK_READ_WAITERS); 2776 if (rv == -1) { 2777 error = EFAULT; 2778 break; 2779 } 2780 if (oldstate == state) 2781 break; 2782 state = oldstate; 2783 error1 = umtxq_check_susp(td); 2784 if (error1 != 0) { 2785 if (error == 0) 2786 error = error1; 2787 break; 2788 } 2789 } 2790 } 2791 2792 umtxq_unbusy_unlocked(&uq->uq_key); 2793 if (error != 0) 2794 break; 2795 } 2796 umtx_key_release(&uq->uq_key); 2797 if (error == ERESTART) 2798 error = EINTR; 2799 return (error); 2800 } 2801 2802 static int 2803 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2804 { 2805 struct abs_timeout timo; 2806 struct umtx_q *uq; 2807 uint32_t flags; 2808 int32_t state, oldstate; 2809 int32_t blocked_writers; 2810 int32_t blocked_readers; 2811 int error, error1, rv; 2812 2813 uq = td->td_umtxq; 2814 error = fueword32(&rwlock->rw_flags, &flags); 2815 if (error == -1) 2816 return (EFAULT); 2817 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2818 if (error != 0) 2819 return (error); 2820 2821 if (timeout != NULL) 2822 abs_timeout_init2(&timo, timeout); 2823 2824 blocked_readers = 0; 2825 for (;;) { 2826 rv = fueword32(&rwlock->rw_state, &state); 2827 if (rv == -1) { 2828 umtx_key_release(&uq->uq_key); 2829 return (EFAULT); 2830 } 2831 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2832 rv = casueword32(&rwlock->rw_state, state, 2833 &oldstate, state | URWLOCK_WRITE_OWNER); 2834 if (rv == -1) { 2835 umtx_key_release(&uq->uq_key); 2836 return (EFAULT); 2837 } 2838 if (oldstate == state) { 2839 umtx_key_release(&uq->uq_key); 2840 return (0); 2841 } 2842 state = oldstate; 2843 error = umtxq_check_susp(td); 2844 if (error != 0) 2845 break; 2846 } 2847 2848 if (error) { 2849 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2850 blocked_readers != 0) { 2851 umtxq_lock(&uq->uq_key); 2852 umtxq_busy(&uq->uq_key); 2853 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2854 umtxq_unbusy(&uq->uq_key); 2855 umtxq_unlock(&uq->uq_key); 2856 } 2857 2858 break; 2859 } 2860 2861 /* grab monitor lock */ 2862 umtxq_lock(&uq->uq_key); 2863 umtxq_busy(&uq->uq_key); 2864 umtxq_unlock(&uq->uq_key); 2865 2866 /* 2867 * re-read the state, in case it changed between the try-lock above 2868 * and the check below 2869 */ 2870 rv = fueword32(&rwlock->rw_state, &state); 2871 if (rv == -1) 2872 error = EFAULT; 2873 2874 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2875 URWLOCK_READER_COUNT(state) != 0) && 2876 (state & URWLOCK_WRITE_WAITERS) == 0) { 2877 rv = casueword32(&rwlock->rw_state, state, 2878 &oldstate, state | URWLOCK_WRITE_WAITERS); 2879 if (rv == -1) { 2880 error = EFAULT; 2881 break; 2882 } 2883 if (oldstate == state) 2884 goto sleep; 2885 state = oldstate; 2886 error = umtxq_check_susp(td); 2887 if (error != 0) 2888 break; 2889 } 2890 if (error != 0) { 2891 umtxq_unbusy_unlocked(&uq->uq_key); 2892 break; 2893 } 2894 2895 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2896 umtxq_unbusy_unlocked(&uq->uq_key); 2897 error = umtxq_check_susp(td); 2898 if (error != 0) 2899 break; 2900 continue; 2901 } 2902 sleep: 2903 rv = fueword32(&rwlock->rw_blocked_writers, 2904 &blocked_writers); 2905 if (rv == -1) { 2906 umtxq_unbusy_unlocked(&uq->uq_key); 2907 error = EFAULT; 2908 break; 2909 } 2910 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2911 2912 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2913 umtxq_lock(&uq->uq_key); 2914 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2915 umtxq_unbusy(&uq->uq_key); 2916 2917 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2918 NULL : &timo); 2919 2920 umtxq_busy(&uq->uq_key); 2921 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2922 umtxq_unlock(&uq->uq_key); 2923 if (error) 2924 break; 2925 rv = fueword32(&rwlock->rw_state, &state); 2926 if (rv == -1) { 2927 error = EFAULT; 2928 break; 2929 } 2930 } 2931 2932 rv = fueword32(&rwlock->rw_blocked_writers, 2933 &blocked_writers); 2934 if (rv == -1) { 2935 umtxq_unbusy_unlocked(&uq->uq_key); 2936 error = EFAULT; 2937 break; 2938 } 2939 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2940 if (blocked_writers == 1) { 2941 rv = fueword32(&rwlock->rw_state, &state); 2942 if (rv == -1) { 2943 umtxq_unbusy_unlocked(&uq->uq_key); 2944 error = EFAULT; 2945 break; 2946 } 2947 for (;;) { 2948 rv = casueword32(&rwlock->rw_state, state, 2949 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2950 if (rv == -1) { 2951 error = EFAULT; 2952 break; 2953 } 2954 if (oldstate == state) 2955 break; 2956 state = oldstate; 2957 error1 = umtxq_check_susp(td); 2958 /* 2959 * We are leaving the URWLOCK_WRITE_WAITERS 2960 * behind, but this should not harm the 2961 * correctness. 2962 */ 2963 if (error1 != 0) { 2964 if (error == 0) 2965 error = error1; 2966 break; 2967 } 2968 } 2969 rv = fueword32(&rwlock->rw_blocked_readers, 2970 &blocked_readers); 2971 if (rv == -1) { 2972 umtxq_unbusy_unlocked(&uq->uq_key); 2973 error = EFAULT; 2974 break; 2975 } 2976 } else 2977 blocked_readers = 0; 2978 2979 umtxq_unbusy_unlocked(&uq->uq_key); 2980 } 2981 2982 umtx_key_release(&uq->uq_key); 2983 if (error == ERESTART) 2984 error = EINTR; 2985 return (error); 2986 } 2987 2988 static int 2989 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2990 { 2991 struct umtx_q *uq; 2992 uint32_t flags; 2993 int32_t state, oldstate; 2994 int error, rv, q, count; 2995 2996 uq = td->td_umtxq; 2997 error = fueword32(&rwlock->rw_flags, &flags); 2998 if (error == -1) 2999 return (EFAULT); 3000 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3001 if (error != 0) 3002 return (error); 3003 3004 error = fueword32(&rwlock->rw_state, &state); 3005 if (error == -1) { 3006 error = EFAULT; 3007 goto out; 3008 } 3009 if (state & URWLOCK_WRITE_OWNER) { 3010 for (;;) { 3011 rv = casueword32(&rwlock->rw_state, state, 3012 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3013 if (rv == -1) { 3014 error = EFAULT; 3015 goto out; 3016 } 3017 if (oldstate != state) { 3018 state = oldstate; 3019 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3020 error = EPERM; 3021 goto out; 3022 } 3023 error = umtxq_check_susp(td); 3024 if (error != 0) 3025 goto out; 3026 } else 3027 break; 3028 } 3029 } else if (URWLOCK_READER_COUNT(state) != 0) { 3030 for (;;) { 3031 rv = casueword32(&rwlock->rw_state, state, 3032 &oldstate, state - 1); 3033 if (rv == -1) { 3034 error = EFAULT; 3035 goto out; 3036 } 3037 if (oldstate != state) { 3038 state = oldstate; 3039 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3040 error = EPERM; 3041 goto out; 3042 } 3043 error = umtxq_check_susp(td); 3044 if (error != 0) 3045 goto out; 3046 } else 3047 break; 3048 } 3049 } else { 3050 error = EPERM; 3051 goto out; 3052 } 3053 3054 count = 0; 3055 3056 if (!(flags & URWLOCK_PREFER_READER)) { 3057 if (state & URWLOCK_WRITE_WAITERS) { 3058 count = 1; 3059 q = UMTX_EXCLUSIVE_QUEUE; 3060 } else if (state & URWLOCK_READ_WAITERS) { 3061 count = INT_MAX; 3062 q = UMTX_SHARED_QUEUE; 3063 } 3064 } else { 3065 if (state & URWLOCK_READ_WAITERS) { 3066 count = INT_MAX; 3067 q = UMTX_SHARED_QUEUE; 3068 } else if (state & URWLOCK_WRITE_WAITERS) { 3069 count = 1; 3070 q = UMTX_EXCLUSIVE_QUEUE; 3071 } 3072 } 3073 3074 if (count) { 3075 umtxq_lock(&uq->uq_key); 3076 umtxq_busy(&uq->uq_key); 3077 umtxq_signal_queue(&uq->uq_key, count, q); 3078 umtxq_unbusy(&uq->uq_key); 3079 umtxq_unlock(&uq->uq_key); 3080 } 3081 out: 3082 umtx_key_release(&uq->uq_key); 3083 return (error); 3084 } 3085 3086 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3087 static int 3088 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3089 { 3090 struct abs_timeout timo; 3091 struct umtx_q *uq; 3092 uint32_t flags, count, count1; 3093 int error, rv; 3094 3095 uq = td->td_umtxq; 3096 error = fueword32(&sem->_flags, &flags); 3097 if (error == -1) 3098 return (EFAULT); 3099 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3100 if (error != 0) 3101 return (error); 3102 3103 if (timeout != NULL) 3104 abs_timeout_init2(&timo, timeout); 3105 3106 umtxq_lock(&uq->uq_key); 3107 umtxq_busy(&uq->uq_key); 3108 umtxq_insert(uq); 3109 umtxq_unlock(&uq->uq_key); 3110 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3111 if (rv == 0) 3112 rv = fueword32(&sem->_count, &count); 3113 if (rv == -1 || count != 0) { 3114 umtxq_lock(&uq->uq_key); 3115 umtxq_unbusy(&uq->uq_key); 3116 umtxq_remove(uq); 3117 umtxq_unlock(&uq->uq_key); 3118 umtx_key_release(&uq->uq_key); 3119 return (rv == -1 ? EFAULT : 0); 3120 } 3121 umtxq_lock(&uq->uq_key); 3122 umtxq_unbusy(&uq->uq_key); 3123 3124 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3125 3126 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3127 error = 0; 3128 else { 3129 umtxq_remove(uq); 3130 /* A relative timeout cannot be restarted. */ 3131 if (error == ERESTART && timeout != NULL && 3132 (timeout->_flags & UMTX_ABSTIME) == 0) 3133 error = EINTR; 3134 } 3135 umtxq_unlock(&uq->uq_key); 3136 umtx_key_release(&uq->uq_key); 3137 return (error); 3138 } 3139 3140 /* 3141 * Signal a userland semaphore. 3142 */ 3143 static int 3144 do_sem_wake(struct thread *td, struct _usem *sem) 3145 { 3146 struct umtx_key key; 3147 int error, cnt; 3148 uint32_t flags; 3149 3150 error = fueword32(&sem->_flags, &flags); 3151 if (error == -1) 3152 return (EFAULT); 3153 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3154 return (error); 3155 umtxq_lock(&key); 3156 umtxq_busy(&key); 3157 cnt = umtxq_count(&key); 3158 if (cnt > 0) { 3159 /* 3160 * Check if count is greater than 0, this means the memory is 3161 * still being referenced by user code, so we can safely 3162 * update _has_waiters flag. 3163 */ 3164 if (cnt == 1) { 3165 umtxq_unlock(&key); 3166 error = suword32(&sem->_has_waiters, 0); 3167 umtxq_lock(&key); 3168 if (error == -1) 3169 error = EFAULT; 3170 } 3171 umtxq_signal(&key, 1); 3172 } 3173 umtxq_unbusy(&key); 3174 umtxq_unlock(&key); 3175 umtx_key_release(&key); 3176 return (error); 3177 } 3178 #endif 3179 3180 static int 3181 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3182 { 3183 struct abs_timeout timo; 3184 struct umtx_q *uq; 3185 uint32_t count, flags; 3186 int error, rv; 3187 3188 uq = td->td_umtxq; 3189 flags = fuword32(&sem->_flags); 3190 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3191 if (error != 0) 3192 return (error); 3193 3194 if (timeout != NULL) 3195 abs_timeout_init2(&timo, timeout); 3196 3197 umtxq_lock(&uq->uq_key); 3198 umtxq_busy(&uq->uq_key); 3199 umtxq_insert(uq); 3200 umtxq_unlock(&uq->uq_key); 3201 rv = fueword32(&sem->_count, &count); 3202 if (rv == -1) { 3203 umtxq_lock(&uq->uq_key); 3204 umtxq_unbusy(&uq->uq_key); 3205 umtxq_remove(uq); 3206 umtxq_unlock(&uq->uq_key); 3207 umtx_key_release(&uq->uq_key); 3208 return (EFAULT); 3209 } 3210 for (;;) { 3211 if (USEM_COUNT(count) != 0) { 3212 umtxq_lock(&uq->uq_key); 3213 umtxq_unbusy(&uq->uq_key); 3214 umtxq_remove(uq); 3215 umtxq_unlock(&uq->uq_key); 3216 umtx_key_release(&uq->uq_key); 3217 return (0); 3218 } 3219 if (count == USEM_HAS_WAITERS) 3220 break; 3221 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3222 if (rv == -1) { 3223 umtxq_lock(&uq->uq_key); 3224 umtxq_unbusy(&uq->uq_key); 3225 umtxq_remove(uq); 3226 umtxq_unlock(&uq->uq_key); 3227 umtx_key_release(&uq->uq_key); 3228 return (EFAULT); 3229 } 3230 if (count == 0) 3231 break; 3232 } 3233 umtxq_lock(&uq->uq_key); 3234 umtxq_unbusy(&uq->uq_key); 3235 3236 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3237 3238 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3239 error = 0; 3240 else { 3241 umtxq_remove(uq); 3242 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3243 /* A relative timeout cannot be restarted. */ 3244 if (error == ERESTART) 3245 error = EINTR; 3246 if (error == EINTR) { 3247 abs_timeout_update(&timo); 3248 timespecsub(&timo.end, &timo.cur, 3249 &timeout->_timeout); 3250 } 3251 } 3252 } 3253 umtxq_unlock(&uq->uq_key); 3254 umtx_key_release(&uq->uq_key); 3255 return (error); 3256 } 3257 3258 /* 3259 * Signal a userland semaphore. 3260 */ 3261 static int 3262 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3263 { 3264 struct umtx_key key; 3265 int error, cnt, rv; 3266 uint32_t count, flags; 3267 3268 rv = fueword32(&sem->_flags, &flags); 3269 if (rv == -1) 3270 return (EFAULT); 3271 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3272 return (error); 3273 umtxq_lock(&key); 3274 umtxq_busy(&key); 3275 cnt = umtxq_count(&key); 3276 if (cnt > 0) { 3277 /* 3278 * If this was the last sleeping thread, clear the waiters 3279 * flag in _count. 3280 */ 3281 if (cnt == 1) { 3282 umtxq_unlock(&key); 3283 rv = fueword32(&sem->_count, &count); 3284 while (rv != -1 && count & USEM_HAS_WAITERS) 3285 rv = casueword32(&sem->_count, count, &count, 3286 count & ~USEM_HAS_WAITERS); 3287 if (rv == -1) 3288 error = EFAULT; 3289 umtxq_lock(&key); 3290 } 3291 3292 umtxq_signal(&key, 1); 3293 } 3294 umtxq_unbusy(&key); 3295 umtxq_unlock(&key); 3296 umtx_key_release(&key); 3297 return (error); 3298 } 3299 3300 inline int 3301 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3302 { 3303 int error; 3304 3305 error = copyin(addr, tsp, sizeof(struct timespec)); 3306 if (error == 0) { 3307 if (tsp->tv_sec < 0 || 3308 tsp->tv_nsec >= 1000000000 || 3309 tsp->tv_nsec < 0) 3310 error = EINVAL; 3311 } 3312 return (error); 3313 } 3314 3315 static inline int 3316 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3317 { 3318 int error; 3319 3320 if (size <= sizeof(struct timespec)) { 3321 tp->_clockid = CLOCK_REALTIME; 3322 tp->_flags = 0; 3323 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3324 } else 3325 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3326 if (error != 0) 3327 return (error); 3328 if (tp->_timeout.tv_sec < 0 || 3329 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3330 return (EINVAL); 3331 return (0); 3332 } 3333 3334 static int 3335 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3336 { 3337 3338 return (EOPNOTSUPP); 3339 } 3340 3341 static int 3342 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3343 { 3344 struct _umtx_time timeout, *tm_p; 3345 int error; 3346 3347 if (uap->uaddr2 == NULL) 3348 tm_p = NULL; 3349 else { 3350 error = umtx_copyin_umtx_time( 3351 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3352 if (error != 0) 3353 return (error); 3354 tm_p = &timeout; 3355 } 3356 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3357 } 3358 3359 static int 3360 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3361 { 3362 struct _umtx_time timeout, *tm_p; 3363 int error; 3364 3365 if (uap->uaddr2 == NULL) 3366 tm_p = NULL; 3367 else { 3368 error = umtx_copyin_umtx_time( 3369 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3370 if (error != 0) 3371 return (error); 3372 tm_p = &timeout; 3373 } 3374 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3375 } 3376 3377 static int 3378 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3379 { 3380 struct _umtx_time *tm_p, timeout; 3381 int error; 3382 3383 if (uap->uaddr2 == NULL) 3384 tm_p = NULL; 3385 else { 3386 error = umtx_copyin_umtx_time( 3387 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3388 if (error != 0) 3389 return (error); 3390 tm_p = &timeout; 3391 } 3392 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3393 } 3394 3395 static int 3396 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3397 { 3398 3399 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3400 } 3401 3402 #define BATCH_SIZE 128 3403 static int 3404 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3405 { 3406 char *uaddrs[BATCH_SIZE], **upp; 3407 int count, error, i, pos, tocopy; 3408 3409 upp = (char **)uap->obj; 3410 error = 0; 3411 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3412 pos += tocopy) { 3413 tocopy = MIN(count, BATCH_SIZE); 3414 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3415 if (error != 0) 3416 break; 3417 for (i = 0; i < tocopy; ++i) 3418 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3419 maybe_yield(); 3420 } 3421 return (error); 3422 } 3423 3424 static int 3425 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3426 { 3427 3428 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3429 } 3430 3431 static int 3432 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3433 { 3434 struct _umtx_time *tm_p, timeout; 3435 int error; 3436 3437 /* Allow a null timespec (wait forever). */ 3438 if (uap->uaddr2 == NULL) 3439 tm_p = NULL; 3440 else { 3441 error = umtx_copyin_umtx_time( 3442 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3443 if (error != 0) 3444 return (error); 3445 tm_p = &timeout; 3446 } 3447 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3448 } 3449 3450 static int 3451 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3452 { 3453 3454 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3455 } 3456 3457 static int 3458 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3459 { 3460 struct _umtx_time *tm_p, timeout; 3461 int error; 3462 3463 /* Allow a null timespec (wait forever). */ 3464 if (uap->uaddr2 == NULL) 3465 tm_p = NULL; 3466 else { 3467 error = umtx_copyin_umtx_time( 3468 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3469 if (error != 0) 3470 return (error); 3471 tm_p = &timeout; 3472 } 3473 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3474 } 3475 3476 static int 3477 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3478 { 3479 3480 return (do_wake_umutex(td, uap->obj)); 3481 } 3482 3483 static int 3484 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3485 { 3486 3487 return (do_unlock_umutex(td, uap->obj, false)); 3488 } 3489 3490 static int 3491 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3492 { 3493 3494 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3495 } 3496 3497 static int 3498 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3499 { 3500 struct timespec *ts, timeout; 3501 int error; 3502 3503 /* Allow a null timespec (wait forever). */ 3504 if (uap->uaddr2 == NULL) 3505 ts = NULL; 3506 else { 3507 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3508 if (error != 0) 3509 return (error); 3510 ts = &timeout; 3511 } 3512 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3513 } 3514 3515 static int 3516 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3517 { 3518 3519 return (do_cv_signal(td, uap->obj)); 3520 } 3521 3522 static int 3523 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3524 { 3525 3526 return (do_cv_broadcast(td, uap->obj)); 3527 } 3528 3529 static int 3530 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3531 { 3532 struct _umtx_time timeout; 3533 int error; 3534 3535 /* Allow a null timespec (wait forever). */ 3536 if (uap->uaddr2 == NULL) { 3537 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3538 } else { 3539 error = umtx_copyin_umtx_time(uap->uaddr2, 3540 (size_t)uap->uaddr1, &timeout); 3541 if (error != 0) 3542 return (error); 3543 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3544 } 3545 return (error); 3546 } 3547 3548 static int 3549 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3550 { 3551 struct _umtx_time timeout; 3552 int error; 3553 3554 /* Allow a null timespec (wait forever). */ 3555 if (uap->uaddr2 == NULL) { 3556 error = do_rw_wrlock(td, uap->obj, 0); 3557 } else { 3558 error = umtx_copyin_umtx_time(uap->uaddr2, 3559 (size_t)uap->uaddr1, &timeout); 3560 if (error != 0) 3561 return (error); 3562 3563 error = do_rw_wrlock(td, uap->obj, &timeout); 3564 } 3565 return (error); 3566 } 3567 3568 static int 3569 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3570 { 3571 3572 return (do_rw_unlock(td, uap->obj)); 3573 } 3574 3575 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3576 static int 3577 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3578 { 3579 struct _umtx_time *tm_p, timeout; 3580 int error; 3581 3582 /* Allow a null timespec (wait forever). */ 3583 if (uap->uaddr2 == NULL) 3584 tm_p = NULL; 3585 else { 3586 error = umtx_copyin_umtx_time( 3587 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3588 if (error != 0) 3589 return (error); 3590 tm_p = &timeout; 3591 } 3592 return (do_sem_wait(td, uap->obj, tm_p)); 3593 } 3594 3595 static int 3596 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3597 { 3598 3599 return (do_sem_wake(td, uap->obj)); 3600 } 3601 #endif 3602 3603 static int 3604 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3605 { 3606 3607 return (do_wake2_umutex(td, uap->obj, uap->val)); 3608 } 3609 3610 static int 3611 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3612 { 3613 struct _umtx_time *tm_p, timeout; 3614 size_t uasize; 3615 int error; 3616 3617 /* Allow a null timespec (wait forever). */ 3618 if (uap->uaddr2 == NULL) { 3619 uasize = 0; 3620 tm_p = NULL; 3621 } else { 3622 uasize = (size_t)uap->uaddr1; 3623 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3624 if (error != 0) 3625 return (error); 3626 tm_p = &timeout; 3627 } 3628 error = do_sem2_wait(td, uap->obj, tm_p); 3629 if (error == EINTR && uap->uaddr2 != NULL && 3630 (timeout._flags & UMTX_ABSTIME) == 0 && 3631 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3632 error = copyout(&timeout._timeout, 3633 (struct _umtx_time *)uap->uaddr2 + 1, 3634 sizeof(struct timespec)); 3635 if (error == 0) { 3636 error = EINTR; 3637 } 3638 } 3639 3640 return (error); 3641 } 3642 3643 static int 3644 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3645 { 3646 3647 return (do_sem2_wake(td, uap->obj)); 3648 } 3649 3650 #define USHM_OBJ_UMTX(o) \ 3651 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3652 3653 #define USHMF_REG_LINKED 0x0001 3654 #define USHMF_OBJ_LINKED 0x0002 3655 struct umtx_shm_reg { 3656 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3657 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3658 struct umtx_key ushm_key; 3659 struct ucred *ushm_cred; 3660 struct shmfd *ushm_obj; 3661 u_int ushm_refcnt; 3662 u_int ushm_flags; 3663 }; 3664 3665 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3666 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3667 3668 static uma_zone_t umtx_shm_reg_zone; 3669 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3670 static struct mtx umtx_shm_lock; 3671 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3672 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3673 3674 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3675 3676 static void 3677 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3678 { 3679 struct umtx_shm_reg_head d; 3680 struct umtx_shm_reg *reg, *reg1; 3681 3682 TAILQ_INIT(&d); 3683 mtx_lock(&umtx_shm_lock); 3684 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3685 mtx_unlock(&umtx_shm_lock); 3686 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3687 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3688 umtx_shm_free_reg(reg); 3689 } 3690 } 3691 3692 static struct task umtx_shm_reg_delfree_task = 3693 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3694 3695 static struct umtx_shm_reg * 3696 umtx_shm_find_reg_locked(const struct umtx_key *key) 3697 { 3698 struct umtx_shm_reg *reg; 3699 struct umtx_shm_reg_head *reg_head; 3700 3701 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3702 mtx_assert(&umtx_shm_lock, MA_OWNED); 3703 reg_head = &umtx_shm_registry[key->hash]; 3704 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3705 KASSERT(reg->ushm_key.shared, 3706 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3707 if (reg->ushm_key.info.shared.object == 3708 key->info.shared.object && 3709 reg->ushm_key.info.shared.offset == 3710 key->info.shared.offset) { 3711 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3712 KASSERT(reg->ushm_refcnt > 0, 3713 ("reg %p refcnt 0 onlist", reg)); 3714 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3715 ("reg %p not linked", reg)); 3716 reg->ushm_refcnt++; 3717 return (reg); 3718 } 3719 } 3720 return (NULL); 3721 } 3722 3723 static struct umtx_shm_reg * 3724 umtx_shm_find_reg(const struct umtx_key *key) 3725 { 3726 struct umtx_shm_reg *reg; 3727 3728 mtx_lock(&umtx_shm_lock); 3729 reg = umtx_shm_find_reg_locked(key); 3730 mtx_unlock(&umtx_shm_lock); 3731 return (reg); 3732 } 3733 3734 static void 3735 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3736 { 3737 3738 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3739 crfree(reg->ushm_cred); 3740 shm_drop(reg->ushm_obj); 3741 uma_zfree(umtx_shm_reg_zone, reg); 3742 } 3743 3744 static bool 3745 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3746 { 3747 bool res; 3748 3749 mtx_assert(&umtx_shm_lock, MA_OWNED); 3750 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3751 reg->ushm_refcnt--; 3752 res = reg->ushm_refcnt == 0; 3753 if (res || force) { 3754 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3755 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3756 reg, ushm_reg_link); 3757 reg->ushm_flags &= ~USHMF_REG_LINKED; 3758 } 3759 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3760 LIST_REMOVE(reg, ushm_obj_link); 3761 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3762 } 3763 } 3764 return (res); 3765 } 3766 3767 static void 3768 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3769 { 3770 vm_object_t object; 3771 bool dofree; 3772 3773 if (force) { 3774 object = reg->ushm_obj->shm_object; 3775 VM_OBJECT_WLOCK(object); 3776 object->flags |= OBJ_UMTXDEAD; 3777 VM_OBJECT_WUNLOCK(object); 3778 } 3779 mtx_lock(&umtx_shm_lock); 3780 dofree = umtx_shm_unref_reg_locked(reg, force); 3781 mtx_unlock(&umtx_shm_lock); 3782 if (dofree) 3783 umtx_shm_free_reg(reg); 3784 } 3785 3786 void 3787 umtx_shm_object_init(vm_object_t object) 3788 { 3789 3790 LIST_INIT(USHM_OBJ_UMTX(object)); 3791 } 3792 3793 void 3794 umtx_shm_object_terminated(vm_object_t object) 3795 { 3796 struct umtx_shm_reg *reg, *reg1; 3797 bool dofree; 3798 3799 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3800 return; 3801 3802 dofree = false; 3803 mtx_lock(&umtx_shm_lock); 3804 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3805 if (umtx_shm_unref_reg_locked(reg, true)) { 3806 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3807 ushm_reg_link); 3808 dofree = true; 3809 } 3810 } 3811 mtx_unlock(&umtx_shm_lock); 3812 if (dofree) 3813 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3814 } 3815 3816 static int 3817 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3818 struct umtx_shm_reg **res) 3819 { 3820 struct umtx_shm_reg *reg, *reg1; 3821 struct ucred *cred; 3822 int error; 3823 3824 reg = umtx_shm_find_reg(key); 3825 if (reg != NULL) { 3826 *res = reg; 3827 return (0); 3828 } 3829 cred = td->td_ucred; 3830 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3831 return (ENOMEM); 3832 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3833 reg->ushm_refcnt = 1; 3834 bcopy(key, ®->ushm_key, sizeof(*key)); 3835 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3836 reg->ushm_cred = crhold(cred); 3837 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3838 if (error != 0) { 3839 umtx_shm_free_reg(reg); 3840 return (error); 3841 } 3842 mtx_lock(&umtx_shm_lock); 3843 reg1 = umtx_shm_find_reg_locked(key); 3844 if (reg1 != NULL) { 3845 mtx_unlock(&umtx_shm_lock); 3846 umtx_shm_free_reg(reg); 3847 *res = reg1; 3848 return (0); 3849 } 3850 reg->ushm_refcnt++; 3851 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3852 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3853 ushm_obj_link); 3854 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3855 mtx_unlock(&umtx_shm_lock); 3856 *res = reg; 3857 return (0); 3858 } 3859 3860 static int 3861 umtx_shm_alive(struct thread *td, void *addr) 3862 { 3863 vm_map_t map; 3864 vm_map_entry_t entry; 3865 vm_object_t object; 3866 vm_pindex_t pindex; 3867 vm_prot_t prot; 3868 int res, ret; 3869 boolean_t wired; 3870 3871 map = &td->td_proc->p_vmspace->vm_map; 3872 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3873 &object, &pindex, &prot, &wired); 3874 if (res != KERN_SUCCESS) 3875 return (EFAULT); 3876 if (object == NULL) 3877 ret = EINVAL; 3878 else 3879 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3880 vm_map_lookup_done(map, entry); 3881 return (ret); 3882 } 3883 3884 static void 3885 umtx_shm_init(void) 3886 { 3887 int i; 3888 3889 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3890 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3891 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3892 for (i = 0; i < nitems(umtx_shm_registry); i++) 3893 TAILQ_INIT(&umtx_shm_registry[i]); 3894 } 3895 3896 static int 3897 umtx_shm(struct thread *td, void *addr, u_int flags) 3898 { 3899 struct umtx_key key; 3900 struct umtx_shm_reg *reg; 3901 struct file *fp; 3902 int error, fd; 3903 3904 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3905 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3906 return (EINVAL); 3907 if ((flags & UMTX_SHM_ALIVE) != 0) 3908 return (umtx_shm_alive(td, addr)); 3909 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3910 if (error != 0) 3911 return (error); 3912 KASSERT(key.shared == 1, ("non-shared key")); 3913 if ((flags & UMTX_SHM_CREAT) != 0) { 3914 error = umtx_shm_create_reg(td, &key, ®); 3915 } else { 3916 reg = umtx_shm_find_reg(&key); 3917 if (reg == NULL) 3918 error = ESRCH; 3919 } 3920 umtx_key_release(&key); 3921 if (error != 0) 3922 return (error); 3923 KASSERT(reg != NULL, ("no reg")); 3924 if ((flags & UMTX_SHM_DESTROY) != 0) { 3925 umtx_shm_unref_reg(reg, true); 3926 } else { 3927 #if 0 3928 #ifdef MAC 3929 error = mac_posixshm_check_open(td->td_ucred, 3930 reg->ushm_obj, FFLAGS(O_RDWR)); 3931 if (error == 0) 3932 #endif 3933 error = shm_access(reg->ushm_obj, td->td_ucred, 3934 FFLAGS(O_RDWR)); 3935 if (error == 0) 3936 #endif 3937 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3938 if (error == 0) { 3939 shm_hold(reg->ushm_obj); 3940 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3941 &shm_ops); 3942 td->td_retval[0] = fd; 3943 fdrop(fp, td); 3944 } 3945 } 3946 umtx_shm_unref_reg(reg, false); 3947 return (error); 3948 } 3949 3950 static int 3951 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3952 { 3953 3954 return (umtx_shm(td, uap->uaddr1, uap->val)); 3955 } 3956 3957 static int 3958 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3959 { 3960 3961 td->td_rb_list = rbp->robust_list_offset; 3962 td->td_rbp_list = rbp->robust_priv_list_offset; 3963 td->td_rb_inact = rbp->robust_inact_offset; 3964 return (0); 3965 } 3966 3967 static int 3968 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3969 { 3970 struct umtx_robust_lists_params rb; 3971 int error; 3972 3973 if (uap->val > sizeof(rb)) 3974 return (EINVAL); 3975 bzero(&rb, sizeof(rb)); 3976 error = copyin(uap->uaddr1, &rb, uap->val); 3977 if (error != 0) 3978 return (error); 3979 return (umtx_robust_lists(td, &rb)); 3980 } 3981 3982 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3983 3984 static const _umtx_op_func op_table[] = { 3985 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3986 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3987 [UMTX_OP_WAIT] = __umtx_op_wait, 3988 [UMTX_OP_WAKE] = __umtx_op_wake, 3989 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3990 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3991 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3992 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3993 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3994 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3995 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3996 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3997 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3998 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 3999 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4000 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4001 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4002 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4003 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4004 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4005 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4006 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4007 #else 4008 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4009 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4010 #endif 4011 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4012 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4013 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4014 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4015 [UMTX_OP_SHM] = __umtx_op_shm, 4016 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4017 }; 4018 4019 int 4020 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4021 { 4022 4023 if ((unsigned)uap->op < nitems(op_table)) 4024 return (*op_table[uap->op])(td, uap); 4025 return (EINVAL); 4026 } 4027 4028 #ifdef COMPAT_FREEBSD32 4029 4030 struct timespec32 { 4031 int32_t tv_sec; 4032 int32_t tv_nsec; 4033 }; 4034 4035 struct umtx_time32 { 4036 struct timespec32 timeout; 4037 uint32_t flags; 4038 uint32_t clockid; 4039 }; 4040 4041 static inline int 4042 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4043 { 4044 struct timespec32 ts32; 4045 int error; 4046 4047 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4048 if (error == 0) { 4049 if (ts32.tv_sec < 0 || 4050 ts32.tv_nsec >= 1000000000 || 4051 ts32.tv_nsec < 0) 4052 error = EINVAL; 4053 else { 4054 tsp->tv_sec = ts32.tv_sec; 4055 tsp->tv_nsec = ts32.tv_nsec; 4056 } 4057 } 4058 return (error); 4059 } 4060 4061 static inline int 4062 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4063 { 4064 struct umtx_time32 t32; 4065 int error; 4066 4067 t32.clockid = CLOCK_REALTIME; 4068 t32.flags = 0; 4069 if (size <= sizeof(struct timespec32)) 4070 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4071 else 4072 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4073 if (error != 0) 4074 return (error); 4075 if (t32.timeout.tv_sec < 0 || 4076 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4077 return (EINVAL); 4078 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4079 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4080 tp->_flags = t32.flags; 4081 tp->_clockid = t32.clockid; 4082 return (0); 4083 } 4084 4085 static int 4086 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4087 { 4088 struct _umtx_time *tm_p, timeout; 4089 int error; 4090 4091 if (uap->uaddr2 == NULL) 4092 tm_p = NULL; 4093 else { 4094 error = umtx_copyin_umtx_time32(uap->uaddr2, 4095 (size_t)uap->uaddr1, &timeout); 4096 if (error != 0) 4097 return (error); 4098 tm_p = &timeout; 4099 } 4100 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4101 } 4102 4103 static int 4104 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4105 { 4106 struct _umtx_time *tm_p, timeout; 4107 int error; 4108 4109 /* Allow a null timespec (wait forever). */ 4110 if (uap->uaddr2 == NULL) 4111 tm_p = NULL; 4112 else { 4113 error = umtx_copyin_umtx_time32(uap->uaddr2, 4114 (size_t)uap->uaddr1, &timeout); 4115 if (error != 0) 4116 return (error); 4117 tm_p = &timeout; 4118 } 4119 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4120 } 4121 4122 static int 4123 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4124 { 4125 struct _umtx_time *tm_p, timeout; 4126 int error; 4127 4128 /* Allow a null timespec (wait forever). */ 4129 if (uap->uaddr2 == NULL) 4130 tm_p = NULL; 4131 else { 4132 error = umtx_copyin_umtx_time32(uap->uaddr2, 4133 (size_t)uap->uaddr1, &timeout); 4134 if (error != 0) 4135 return (error); 4136 tm_p = &timeout; 4137 } 4138 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4139 } 4140 4141 static int 4142 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4143 { 4144 struct timespec *ts, timeout; 4145 int error; 4146 4147 /* Allow a null timespec (wait forever). */ 4148 if (uap->uaddr2 == NULL) 4149 ts = NULL; 4150 else { 4151 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4152 if (error != 0) 4153 return (error); 4154 ts = &timeout; 4155 } 4156 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4157 } 4158 4159 static int 4160 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4161 { 4162 struct _umtx_time timeout; 4163 int error; 4164 4165 /* Allow a null timespec (wait forever). */ 4166 if (uap->uaddr2 == NULL) { 4167 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4168 } else { 4169 error = umtx_copyin_umtx_time32(uap->uaddr2, 4170 (size_t)uap->uaddr1, &timeout); 4171 if (error != 0) 4172 return (error); 4173 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4174 } 4175 return (error); 4176 } 4177 4178 static int 4179 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4180 { 4181 struct _umtx_time timeout; 4182 int error; 4183 4184 /* Allow a null timespec (wait forever). */ 4185 if (uap->uaddr2 == NULL) { 4186 error = do_rw_wrlock(td, uap->obj, 0); 4187 } else { 4188 error = umtx_copyin_umtx_time32(uap->uaddr2, 4189 (size_t)uap->uaddr1, &timeout); 4190 if (error != 0) 4191 return (error); 4192 error = do_rw_wrlock(td, uap->obj, &timeout); 4193 } 4194 return (error); 4195 } 4196 4197 static int 4198 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4199 { 4200 struct _umtx_time *tm_p, timeout; 4201 int error; 4202 4203 if (uap->uaddr2 == NULL) 4204 tm_p = NULL; 4205 else { 4206 error = umtx_copyin_umtx_time32( 4207 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4208 if (error != 0) 4209 return (error); 4210 tm_p = &timeout; 4211 } 4212 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4213 } 4214 4215 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4216 static int 4217 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4218 { 4219 struct _umtx_time *tm_p, timeout; 4220 int error; 4221 4222 /* Allow a null timespec (wait forever). */ 4223 if (uap->uaddr2 == NULL) 4224 tm_p = NULL; 4225 else { 4226 error = umtx_copyin_umtx_time32(uap->uaddr2, 4227 (size_t)uap->uaddr1, &timeout); 4228 if (error != 0) 4229 return (error); 4230 tm_p = &timeout; 4231 } 4232 return (do_sem_wait(td, uap->obj, tm_p)); 4233 } 4234 #endif 4235 4236 static int 4237 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4238 { 4239 struct _umtx_time *tm_p, timeout; 4240 size_t uasize; 4241 int error; 4242 4243 /* Allow a null timespec (wait forever). */ 4244 if (uap->uaddr2 == NULL) { 4245 uasize = 0; 4246 tm_p = NULL; 4247 } else { 4248 uasize = (size_t)uap->uaddr1; 4249 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4250 if (error != 0) 4251 return (error); 4252 tm_p = &timeout; 4253 } 4254 error = do_sem2_wait(td, uap->obj, tm_p); 4255 if (error == EINTR && uap->uaddr2 != NULL && 4256 (timeout._flags & UMTX_ABSTIME) == 0 && 4257 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4258 struct timespec32 remain32 = { 4259 .tv_sec = timeout._timeout.tv_sec, 4260 .tv_nsec = timeout._timeout.tv_nsec 4261 }; 4262 error = copyout(&remain32, 4263 (struct umtx_time32 *)uap->uaddr2 + 1, 4264 sizeof(struct timespec32)); 4265 if (error == 0) { 4266 error = EINTR; 4267 } 4268 } 4269 4270 return (error); 4271 } 4272 4273 static int 4274 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4275 { 4276 uint32_t uaddrs[BATCH_SIZE], **upp; 4277 int count, error, i, pos, tocopy; 4278 4279 upp = (uint32_t **)uap->obj; 4280 error = 0; 4281 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4282 pos += tocopy) { 4283 tocopy = MIN(count, BATCH_SIZE); 4284 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4285 if (error != 0) 4286 break; 4287 for (i = 0; i < tocopy; ++i) 4288 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4289 INT_MAX, 1); 4290 maybe_yield(); 4291 } 4292 return (error); 4293 } 4294 4295 struct umtx_robust_lists_params_compat32 { 4296 uint32_t robust_list_offset; 4297 uint32_t robust_priv_list_offset; 4298 uint32_t robust_inact_offset; 4299 }; 4300 4301 static int 4302 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4303 { 4304 struct umtx_robust_lists_params rb; 4305 struct umtx_robust_lists_params_compat32 rb32; 4306 int error; 4307 4308 if (uap->val > sizeof(rb32)) 4309 return (EINVAL); 4310 bzero(&rb, sizeof(rb)); 4311 bzero(&rb32, sizeof(rb32)); 4312 error = copyin(uap->uaddr1, &rb32, uap->val); 4313 if (error != 0) 4314 return (error); 4315 rb.robust_list_offset = rb32.robust_list_offset; 4316 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4317 rb.robust_inact_offset = rb32.robust_inact_offset; 4318 return (umtx_robust_lists(td, &rb)); 4319 } 4320 4321 static const _umtx_op_func op_table_compat32[] = { 4322 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4323 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4324 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4325 [UMTX_OP_WAKE] = __umtx_op_wake, 4326 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4327 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4328 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4329 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4330 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4331 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4332 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4333 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4334 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4335 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4336 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4337 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4338 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4339 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4340 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4341 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4342 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4343 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4344 #else 4345 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4346 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4347 #endif 4348 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4349 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4350 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4351 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4352 [UMTX_OP_SHM] = __umtx_op_shm, 4353 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4354 }; 4355 4356 int 4357 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) 4358 { 4359 4360 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4361 return (*op_table_compat32[uap->op])(td, 4362 (struct _umtx_op_args *)uap); 4363 } 4364 return (EINVAL); 4365 } 4366 #endif 4367 4368 void 4369 umtx_thread_init(struct thread *td) 4370 { 4371 4372 td->td_umtxq = umtxq_alloc(); 4373 td->td_umtxq->uq_thread = td; 4374 } 4375 4376 void 4377 umtx_thread_fini(struct thread *td) 4378 { 4379 4380 umtxq_free(td->td_umtxq); 4381 } 4382 4383 /* 4384 * It will be called when new thread is created, e.g fork(). 4385 */ 4386 void 4387 umtx_thread_alloc(struct thread *td) 4388 { 4389 struct umtx_q *uq; 4390 4391 uq = td->td_umtxq; 4392 uq->uq_inherited_pri = PRI_MAX; 4393 4394 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4395 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4396 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4397 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4398 } 4399 4400 /* 4401 * exec() hook. 4402 * 4403 * Clear robust lists for all process' threads, not delaying the 4404 * cleanup to thread_exit hook, since the relevant address space is 4405 * destroyed right now. 4406 */ 4407 static void 4408 umtx_exec_hook(void *arg __unused, struct proc *p, 4409 struct image_params *imgp __unused) 4410 { 4411 struct thread *td; 4412 4413 KASSERT(p == curproc, ("need curproc")); 4414 PROC_LOCK(p); 4415 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4416 (p->p_flag & P_STOPPED_SINGLE) != 0, 4417 ("curproc must be single-threaded")); 4418 FOREACH_THREAD_IN_PROC(p, td) { 4419 KASSERT(td == curthread || 4420 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4421 ("running thread %p %p", p, td)); 4422 PROC_UNLOCK(p); 4423 umtx_thread_cleanup(td); 4424 PROC_LOCK(p); 4425 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4426 } 4427 PROC_UNLOCK(p); 4428 } 4429 4430 /* 4431 * thread_exit() hook. 4432 */ 4433 void 4434 umtx_thread_exit(struct thread *td) 4435 { 4436 4437 umtx_thread_cleanup(td); 4438 } 4439 4440 static int 4441 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4442 { 4443 u_long res1; 4444 #ifdef COMPAT_FREEBSD32 4445 uint32_t res32; 4446 #endif 4447 int error; 4448 4449 #ifdef COMPAT_FREEBSD32 4450 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4451 error = fueword32((void *)ptr, &res32); 4452 if (error == 0) 4453 res1 = res32; 4454 } else 4455 #endif 4456 { 4457 error = fueword((void *)ptr, &res1); 4458 } 4459 if (error == 0) 4460 *res = res1; 4461 else 4462 error = EFAULT; 4463 return (error); 4464 } 4465 4466 static void 4467 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4468 { 4469 #ifdef COMPAT_FREEBSD32 4470 struct umutex32 m32; 4471 4472 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4473 memcpy(&m32, m, sizeof(m32)); 4474 *rb_list = m32.m_rb_lnk; 4475 } else 4476 #endif 4477 *rb_list = m->m_rb_lnk; 4478 } 4479 4480 static int 4481 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4482 { 4483 struct umutex m; 4484 int error; 4485 4486 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4487 error = copyin((void *)rbp, &m, sizeof(m)); 4488 if (error != 0) 4489 return (error); 4490 if (rb_list != NULL) 4491 umtx_read_rb_list(td, &m, rb_list); 4492 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4493 return (EINVAL); 4494 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4495 /* inact is cleared after unlock, allow the inconsistency */ 4496 return (inact ? 0 : EINVAL); 4497 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4498 } 4499 4500 static void 4501 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4502 const char *name) 4503 { 4504 int error, i; 4505 uintptr_t rbp; 4506 bool inact; 4507 4508 if (rb_list == 0) 4509 return; 4510 error = umtx_read_uptr(td, rb_list, &rbp); 4511 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4512 if (rbp == *rb_inact) { 4513 inact = true; 4514 *rb_inact = 0; 4515 } else 4516 inact = false; 4517 error = umtx_handle_rb(td, rbp, &rbp, inact); 4518 } 4519 if (i == umtx_max_rb && umtx_verbose_rb) { 4520 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4521 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4522 } 4523 if (error != 0 && umtx_verbose_rb) { 4524 uprintf("comm %s pid %d: handling %srb error %d\n", 4525 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4526 } 4527 } 4528 4529 /* 4530 * Clean up umtx data. 4531 */ 4532 static void 4533 umtx_thread_cleanup(struct thread *td) 4534 { 4535 struct umtx_q *uq; 4536 struct umtx_pi *pi; 4537 uintptr_t rb_inact; 4538 4539 /* 4540 * Disown pi mutexes. 4541 */ 4542 uq = td->td_umtxq; 4543 if (uq != NULL) { 4544 mtx_lock(&umtx_lock); 4545 uq->uq_inherited_pri = PRI_MAX; 4546 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4547 pi->pi_owner = NULL; 4548 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4549 } 4550 mtx_unlock(&umtx_lock); 4551 thread_lock(td); 4552 sched_lend_user_prio(td, PRI_MAX); 4553 thread_unlock(td); 4554 } 4555 4556 /* 4557 * Handle terminated robust mutexes. Must be done after 4558 * robust pi disown, otherwise unlock could see unowned 4559 * entries. 4560 */ 4561 rb_inact = td->td_rb_inact; 4562 if (rb_inact != 0) 4563 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4564 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4565 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4566 if (rb_inact != 0) 4567 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4568 } 4569