1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015, 2016 The FreeBSD Foundation 5 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 6 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice unmodified, this list of conditions, and the following 17 * disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_umtx_profiling.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/fcntl.h> 42 #include <sys/file.h> 43 #include <sys/filedesc.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/rwlock.h> 54 #include <sys/sbuf.h> 55 #include <sys/sched.h> 56 #include <sys/smp.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysent.h> 59 #include <sys/systm.h> 60 #include <sys/sysproto.h> 61 #include <sys/syscallsubr.h> 62 #include <sys/taskqueue.h> 63 #include <sys/time.h> 64 #include <sys/eventhandler.h> 65 #include <sys/umtx.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_param.h> 71 #include <vm/pmap.h> 72 #include <vm/vm_map.h> 73 #include <vm/vm_object.h> 74 75 #include <machine/atomic.h> 76 #include <machine/cpu.h> 77 78 #ifdef COMPAT_FREEBSD32 79 #include <compat/freebsd32/freebsd32.h> 80 #include <compat/freebsd32/freebsd32_proto.h> 81 #endif 82 83 #define _UMUTEX_TRY 1 84 #define _UMUTEX_WAIT 2 85 86 #ifdef UMTX_PROFILING 87 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 88 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 89 #endif 90 91 /* Priority inheritance mutex info. */ 92 struct umtx_pi { 93 /* Owner thread */ 94 struct thread *pi_owner; 95 96 /* Reference count */ 97 int pi_refcount; 98 99 /* List entry to link umtx holding by thread */ 100 TAILQ_ENTRY(umtx_pi) pi_link; 101 102 /* List entry in hash */ 103 TAILQ_ENTRY(umtx_pi) pi_hashlink; 104 105 /* List for waiters */ 106 TAILQ_HEAD(,umtx_q) pi_blocked; 107 108 /* Identify a userland lock object */ 109 struct umtx_key pi_key; 110 }; 111 112 /* A userland synchronous object user. */ 113 struct umtx_q { 114 /* Linked list for the hash. */ 115 TAILQ_ENTRY(umtx_q) uq_link; 116 117 /* Umtx key. */ 118 struct umtx_key uq_key; 119 120 /* Umtx flags. */ 121 int uq_flags; 122 #define UQF_UMTXQ 0x0001 123 124 /* The thread waits on. */ 125 struct thread *uq_thread; 126 127 /* 128 * Blocked on PI mutex. read can use chain lock 129 * or umtx_lock, write must have both chain lock and 130 * umtx_lock being hold. 131 */ 132 struct umtx_pi *uq_pi_blocked; 133 134 /* On blocked list */ 135 TAILQ_ENTRY(umtx_q) uq_lockq; 136 137 /* Thread contending with us */ 138 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 139 140 /* Inherited priority from PP mutex */ 141 u_char uq_inherited_pri; 142 143 /* Spare queue ready to be reused */ 144 struct umtxq_queue *uq_spare_queue; 145 146 /* The queue we on */ 147 struct umtxq_queue *uq_cur_queue; 148 }; 149 150 TAILQ_HEAD(umtxq_head, umtx_q); 151 152 /* Per-key wait-queue */ 153 struct umtxq_queue { 154 struct umtxq_head head; 155 struct umtx_key key; 156 LIST_ENTRY(umtxq_queue) link; 157 int length; 158 }; 159 160 LIST_HEAD(umtxq_list, umtxq_queue); 161 162 /* Userland lock object's wait-queue chain */ 163 struct umtxq_chain { 164 /* Lock for this chain. */ 165 struct mtx uc_lock; 166 167 /* List of sleep queues. */ 168 struct umtxq_list uc_queue[2]; 169 #define UMTX_SHARED_QUEUE 0 170 #define UMTX_EXCLUSIVE_QUEUE 1 171 172 LIST_HEAD(, umtxq_queue) uc_spare_queue; 173 174 /* Busy flag */ 175 char uc_busy; 176 177 /* Chain lock waiters */ 178 int uc_waiters; 179 180 /* All PI in the list */ 181 TAILQ_HEAD(,umtx_pi) uc_pi_list; 182 183 #ifdef UMTX_PROFILING 184 u_int length; 185 u_int max_length; 186 #endif 187 }; 188 189 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 190 191 /* 192 * Don't propagate time-sharing priority, there is a security reason, 193 * a user can simply introduce PI-mutex, let thread A lock the mutex, 194 * and let another thread B block on the mutex, because B is 195 * sleeping, its priority will be boosted, this causes A's priority to 196 * be boosted via priority propagating too and will never be lowered even 197 * if it is using 100%CPU, this is unfair to other processes. 198 */ 199 200 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 201 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 202 PRI_MAX_TIMESHARE : (td)->td_user_pri) 203 204 #define GOLDEN_RATIO_PRIME 2654404609U 205 #ifndef UMTX_CHAINS 206 #define UMTX_CHAINS 512 207 #endif 208 #define UMTX_SHIFTS (__WORD_BIT - 9) 209 210 #define GET_SHARE(flags) \ 211 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 212 213 #define BUSY_SPINS 200 214 215 struct abs_timeout { 216 int clockid; 217 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 218 struct timespec cur; 219 struct timespec end; 220 }; 221 222 #ifdef COMPAT_FREEBSD32 223 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 224 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 225 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 226 #endif 227 228 int umtx_shm_vnobj_persistent = 0; 229 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 230 &umtx_shm_vnobj_persistent, 0, 231 "False forces destruction of umtx attached to file, on last close"); 232 static int umtx_max_rb = 1000; 233 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 234 &umtx_max_rb, 0, 235 "Maximum number of robust mutexes allowed for each thread"); 236 237 static uma_zone_t umtx_pi_zone; 238 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 239 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 240 static int umtx_pi_allocated; 241 242 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 243 "umtx debug"); 244 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 245 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 246 static int umtx_verbose_rb = 1; 247 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 248 &umtx_verbose_rb, 0, 249 ""); 250 251 #ifdef UMTX_PROFILING 252 static long max_length; 253 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 254 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 255 "umtx chain stats"); 256 #endif 257 258 static void abs_timeout_update(struct abs_timeout *timo); 259 260 static void umtx_shm_init(void); 261 static void umtxq_sysinit(void *); 262 static void umtxq_hash(struct umtx_key *key); 263 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 264 static void umtxq_lock(struct umtx_key *key); 265 static void umtxq_unlock(struct umtx_key *key); 266 static void umtxq_busy(struct umtx_key *key); 267 static void umtxq_unbusy(struct umtx_key *key); 268 static void umtxq_insert_queue(struct umtx_q *uq, int q); 269 static void umtxq_remove_queue(struct umtx_q *uq, int q); 270 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 271 static int umtxq_count(struct umtx_key *key); 272 static struct umtx_pi *umtx_pi_alloc(int); 273 static void umtx_pi_free(struct umtx_pi *pi); 274 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 275 bool rb); 276 static void umtx_thread_cleanup(struct thread *td); 277 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 278 struct image_params *imgp __unused); 279 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 280 281 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 282 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 283 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 284 285 static struct mtx umtx_lock; 286 287 #ifdef UMTX_PROFILING 288 static void 289 umtx_init_profiling(void) 290 { 291 struct sysctl_oid *chain_oid; 292 char chain_name[10]; 293 int i; 294 295 for (i = 0; i < UMTX_CHAINS; ++i) { 296 snprintf(chain_name, sizeof(chain_name), "%d", i); 297 chain_oid = SYSCTL_ADD_NODE(NULL, 298 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 299 chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 300 "umtx hash stats"); 301 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 302 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 303 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 304 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 305 } 306 } 307 308 static int 309 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 310 { 311 char buf[512]; 312 struct sbuf sb; 313 struct umtxq_chain *uc; 314 u_int fract, i, j, tot, whole; 315 u_int sf0, sf1, sf2, sf3, sf4; 316 u_int si0, si1, si2, si3, si4; 317 u_int sw0, sw1, sw2, sw3, sw4; 318 319 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 320 for (i = 0; i < 2; i++) { 321 tot = 0; 322 for (j = 0; j < UMTX_CHAINS; ++j) { 323 uc = &umtxq_chains[i][j]; 324 mtx_lock(&uc->uc_lock); 325 tot += uc->max_length; 326 mtx_unlock(&uc->uc_lock); 327 } 328 if (tot == 0) 329 sbuf_printf(&sb, "%u) Empty ", i); 330 else { 331 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 332 si0 = si1 = si2 = si3 = si4 = 0; 333 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 334 for (j = 0; j < UMTX_CHAINS; j++) { 335 uc = &umtxq_chains[i][j]; 336 mtx_lock(&uc->uc_lock); 337 whole = uc->max_length * 100; 338 mtx_unlock(&uc->uc_lock); 339 fract = (whole % tot) * 100; 340 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 341 sf0 = fract; 342 si0 = j; 343 sw0 = whole; 344 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 345 sf1)) { 346 sf1 = fract; 347 si1 = j; 348 sw1 = whole; 349 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 350 sf2)) { 351 sf2 = fract; 352 si2 = j; 353 sw2 = whole; 354 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 355 sf3)) { 356 sf3 = fract; 357 si3 = j; 358 sw3 = whole; 359 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 360 sf4)) { 361 sf4 = fract; 362 si4 = j; 363 sw4 = whole; 364 } 365 } 366 sbuf_printf(&sb, "queue %u:\n", i); 367 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 368 sf0 / tot, si0); 369 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 370 sf1 / tot, si1); 371 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 372 sf2 / tot, si2); 373 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 374 sf3 / tot, si3); 375 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 376 sf4 / tot, si4); 377 } 378 } 379 sbuf_trim(&sb); 380 sbuf_finish(&sb); 381 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 382 sbuf_delete(&sb); 383 return (0); 384 } 385 386 static int 387 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 388 { 389 struct umtxq_chain *uc; 390 u_int i, j; 391 int clear, error; 392 393 clear = 0; 394 error = sysctl_handle_int(oidp, &clear, 0, req); 395 if (error != 0 || req->newptr == NULL) 396 return (error); 397 398 if (clear != 0) { 399 for (i = 0; i < 2; ++i) { 400 for (j = 0; j < UMTX_CHAINS; ++j) { 401 uc = &umtxq_chains[i][j]; 402 mtx_lock(&uc->uc_lock); 403 uc->length = 0; 404 uc->max_length = 0; 405 mtx_unlock(&uc->uc_lock); 406 } 407 } 408 } 409 return (0); 410 } 411 412 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 413 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 414 sysctl_debug_umtx_chains_clear, "I", 415 "Clear umtx chains statistics"); 416 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 417 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 418 sysctl_debug_umtx_chains_peaks, "A", 419 "Highest peaks in chains max length"); 420 #endif 421 422 static void 423 umtxq_sysinit(void *arg __unused) 424 { 425 int i, j; 426 427 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 428 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 429 for (i = 0; i < 2; ++i) { 430 for (j = 0; j < UMTX_CHAINS; ++j) { 431 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 432 MTX_DEF | MTX_DUPOK); 433 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 434 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 435 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 436 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 437 umtxq_chains[i][j].uc_busy = 0; 438 umtxq_chains[i][j].uc_waiters = 0; 439 #ifdef UMTX_PROFILING 440 umtxq_chains[i][j].length = 0; 441 umtxq_chains[i][j].max_length = 0; 442 #endif 443 } 444 } 445 #ifdef UMTX_PROFILING 446 umtx_init_profiling(); 447 #endif 448 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 449 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 450 EVENTHANDLER_PRI_ANY); 451 umtx_shm_init(); 452 } 453 454 struct umtx_q * 455 umtxq_alloc(void) 456 { 457 struct umtx_q *uq; 458 459 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 460 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 461 M_WAITOK | M_ZERO); 462 TAILQ_INIT(&uq->uq_spare_queue->head); 463 TAILQ_INIT(&uq->uq_pi_contested); 464 uq->uq_inherited_pri = PRI_MAX; 465 return (uq); 466 } 467 468 void 469 umtxq_free(struct umtx_q *uq) 470 { 471 472 MPASS(uq->uq_spare_queue != NULL); 473 free(uq->uq_spare_queue, M_UMTX); 474 free(uq, M_UMTX); 475 } 476 477 static inline void 478 umtxq_hash(struct umtx_key *key) 479 { 480 unsigned n; 481 482 n = (uintptr_t)key->info.both.a + key->info.both.b; 483 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 484 } 485 486 static inline struct umtxq_chain * 487 umtxq_getchain(struct umtx_key *key) 488 { 489 490 if (key->type <= TYPE_SEM) 491 return (&umtxq_chains[1][key->hash]); 492 return (&umtxq_chains[0][key->hash]); 493 } 494 495 /* 496 * Lock a chain. 497 */ 498 static inline void 499 umtxq_lock(struct umtx_key *key) 500 { 501 struct umtxq_chain *uc; 502 503 uc = umtxq_getchain(key); 504 mtx_lock(&uc->uc_lock); 505 } 506 507 /* 508 * Unlock a chain. 509 */ 510 static inline void 511 umtxq_unlock(struct umtx_key *key) 512 { 513 struct umtxq_chain *uc; 514 515 uc = umtxq_getchain(key); 516 mtx_unlock(&uc->uc_lock); 517 } 518 519 /* 520 * Set chain to busy state when following operation 521 * may be blocked (kernel mutex can not be used). 522 */ 523 static inline void 524 umtxq_busy(struct umtx_key *key) 525 { 526 struct umtxq_chain *uc; 527 528 uc = umtxq_getchain(key); 529 mtx_assert(&uc->uc_lock, MA_OWNED); 530 if (uc->uc_busy) { 531 #ifdef SMP 532 if (smp_cpus > 1) { 533 int count = BUSY_SPINS; 534 if (count > 0) { 535 umtxq_unlock(key); 536 while (uc->uc_busy && --count > 0) 537 cpu_spinwait(); 538 umtxq_lock(key); 539 } 540 } 541 #endif 542 while (uc->uc_busy) { 543 uc->uc_waiters++; 544 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 545 uc->uc_waiters--; 546 } 547 } 548 uc->uc_busy = 1; 549 } 550 551 /* 552 * Unbusy a chain. 553 */ 554 static inline void 555 umtxq_unbusy(struct umtx_key *key) 556 { 557 struct umtxq_chain *uc; 558 559 uc = umtxq_getchain(key); 560 mtx_assert(&uc->uc_lock, MA_OWNED); 561 KASSERT(uc->uc_busy != 0, ("not busy")); 562 uc->uc_busy = 0; 563 if (uc->uc_waiters) 564 wakeup_one(uc); 565 } 566 567 static inline void 568 umtxq_unbusy_unlocked(struct umtx_key *key) 569 { 570 571 umtxq_lock(key); 572 umtxq_unbusy(key); 573 umtxq_unlock(key); 574 } 575 576 static struct umtxq_queue * 577 umtxq_queue_lookup(struct umtx_key *key, int q) 578 { 579 struct umtxq_queue *uh; 580 struct umtxq_chain *uc; 581 582 uc = umtxq_getchain(key); 583 UMTXQ_LOCKED_ASSERT(uc); 584 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 585 if (umtx_key_match(&uh->key, key)) 586 return (uh); 587 } 588 589 return (NULL); 590 } 591 592 static inline void 593 umtxq_insert_queue(struct umtx_q *uq, int q) 594 { 595 struct umtxq_queue *uh; 596 struct umtxq_chain *uc; 597 598 uc = umtxq_getchain(&uq->uq_key); 599 UMTXQ_LOCKED_ASSERT(uc); 600 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 601 uh = umtxq_queue_lookup(&uq->uq_key, q); 602 if (uh != NULL) { 603 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 604 } else { 605 uh = uq->uq_spare_queue; 606 uh->key = uq->uq_key; 607 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 608 #ifdef UMTX_PROFILING 609 uc->length++; 610 if (uc->length > uc->max_length) { 611 uc->max_length = uc->length; 612 if (uc->max_length > max_length) 613 max_length = uc->max_length; 614 } 615 #endif 616 } 617 uq->uq_spare_queue = NULL; 618 619 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 620 uh->length++; 621 uq->uq_flags |= UQF_UMTXQ; 622 uq->uq_cur_queue = uh; 623 return; 624 } 625 626 static inline void 627 umtxq_remove_queue(struct umtx_q *uq, int q) 628 { 629 struct umtxq_chain *uc; 630 struct umtxq_queue *uh; 631 632 uc = umtxq_getchain(&uq->uq_key); 633 UMTXQ_LOCKED_ASSERT(uc); 634 if (uq->uq_flags & UQF_UMTXQ) { 635 uh = uq->uq_cur_queue; 636 TAILQ_REMOVE(&uh->head, uq, uq_link); 637 uh->length--; 638 uq->uq_flags &= ~UQF_UMTXQ; 639 if (TAILQ_EMPTY(&uh->head)) { 640 KASSERT(uh->length == 0, 641 ("inconsistent umtxq_queue length")); 642 #ifdef UMTX_PROFILING 643 uc->length--; 644 #endif 645 LIST_REMOVE(uh, link); 646 } else { 647 uh = LIST_FIRST(&uc->uc_spare_queue); 648 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 649 LIST_REMOVE(uh, link); 650 } 651 uq->uq_spare_queue = uh; 652 uq->uq_cur_queue = NULL; 653 } 654 } 655 656 /* 657 * Check if there are multiple waiters 658 */ 659 static int 660 umtxq_count(struct umtx_key *key) 661 { 662 struct umtxq_queue *uh; 663 664 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 665 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 666 if (uh != NULL) 667 return (uh->length); 668 return (0); 669 } 670 671 /* 672 * Check if there are multiple PI waiters and returns first 673 * waiter. 674 */ 675 static int 676 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 677 { 678 struct umtxq_queue *uh; 679 680 *first = NULL; 681 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 682 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 683 if (uh != NULL) { 684 *first = TAILQ_FIRST(&uh->head); 685 return (uh->length); 686 } 687 return (0); 688 } 689 690 /* 691 * Wake up threads waiting on an userland object. 692 */ 693 694 static int 695 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 696 { 697 struct umtxq_queue *uh; 698 struct umtx_q *uq; 699 int ret; 700 701 ret = 0; 702 UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); 703 uh = umtxq_queue_lookup(key, q); 704 if (uh != NULL) { 705 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 706 umtxq_remove_queue(uq, q); 707 wakeup(uq); 708 if (++ret >= n_wake) 709 return (ret); 710 } 711 } 712 return (ret); 713 } 714 715 /* 716 * Wake up specified thread. 717 */ 718 static inline void 719 umtxq_signal_thread(struct umtx_q *uq) 720 { 721 722 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 723 umtxq_remove(uq); 724 wakeup(uq); 725 } 726 727 static inline int 728 tstohz(const struct timespec *tsp) 729 { 730 struct timeval tv; 731 732 TIMESPEC_TO_TIMEVAL(&tv, tsp); 733 return tvtohz(&tv); 734 } 735 736 static void 737 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 738 const struct timespec *timeout) 739 { 740 741 timo->clockid = clockid; 742 if (!absolute) { 743 timo->is_abs_real = false; 744 abs_timeout_update(timo); 745 timespecadd(&timo->cur, timeout, &timo->end); 746 } else { 747 timo->end = *timeout; 748 timo->is_abs_real = clockid == CLOCK_REALTIME || 749 clockid == CLOCK_REALTIME_FAST || 750 clockid == CLOCK_REALTIME_PRECISE; 751 /* 752 * If is_abs_real, umtxq_sleep will read the clock 753 * after setting td_rtcgen; otherwise, read it here. 754 */ 755 if (!timo->is_abs_real) { 756 abs_timeout_update(timo); 757 } 758 } 759 } 760 761 static void 762 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 763 { 764 765 abs_timeout_init(timo, umtxtime->_clockid, 766 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 767 } 768 769 static inline void 770 abs_timeout_update(struct abs_timeout *timo) 771 { 772 773 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 774 } 775 776 static int 777 abs_timeout_gethz(struct abs_timeout *timo) 778 { 779 struct timespec tts; 780 781 if (timespeccmp(&timo->end, &timo->cur, <=)) 782 return (-1); 783 timespecsub(&timo->end, &timo->cur, &tts); 784 return (tstohz(&tts)); 785 } 786 787 static uint32_t 788 umtx_unlock_val(uint32_t flags, bool rb) 789 { 790 791 if (rb) 792 return (UMUTEX_RB_OWNERDEAD); 793 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 794 return (UMUTEX_RB_NOTRECOV); 795 else 796 return (UMUTEX_UNOWNED); 797 798 } 799 800 /* 801 * Put thread into sleep state, before sleeping, check if 802 * thread was removed from umtx queue. 803 */ 804 static inline int 805 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 806 { 807 struct umtxq_chain *uc; 808 int error, timo; 809 810 if (abstime != NULL && abstime->is_abs_real) { 811 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 812 abs_timeout_update(abstime); 813 } 814 815 uc = umtxq_getchain(&uq->uq_key); 816 UMTXQ_LOCKED_ASSERT(uc); 817 for (;;) { 818 if (!(uq->uq_flags & UQF_UMTXQ)) { 819 error = 0; 820 break; 821 } 822 if (abstime != NULL) { 823 timo = abs_timeout_gethz(abstime); 824 if (timo < 0) { 825 error = ETIMEDOUT; 826 break; 827 } 828 } else 829 timo = 0; 830 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 831 if (error == EINTR || error == ERESTART) { 832 umtxq_lock(&uq->uq_key); 833 break; 834 } 835 if (abstime != NULL) { 836 if (abstime->is_abs_real) 837 curthread->td_rtcgen = 838 atomic_load_acq_int(&rtc_generation); 839 abs_timeout_update(abstime); 840 } 841 umtxq_lock(&uq->uq_key); 842 } 843 844 curthread->td_rtcgen = 0; 845 return (error); 846 } 847 848 /* 849 * Convert userspace address into unique logical address. 850 */ 851 int 852 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 853 { 854 struct thread *td = curthread; 855 vm_map_t map; 856 vm_map_entry_t entry; 857 vm_pindex_t pindex; 858 vm_prot_t prot; 859 boolean_t wired; 860 861 key->type = type; 862 if (share == THREAD_SHARE) { 863 key->shared = 0; 864 key->info.private.vs = td->td_proc->p_vmspace; 865 key->info.private.addr = (uintptr_t)addr; 866 } else { 867 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 868 map = &td->td_proc->p_vmspace->vm_map; 869 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 870 &entry, &key->info.shared.object, &pindex, &prot, 871 &wired) != KERN_SUCCESS) { 872 return (EFAULT); 873 } 874 875 if ((share == PROCESS_SHARE) || 876 (share == AUTO_SHARE && 877 VM_INHERIT_SHARE == entry->inheritance)) { 878 key->shared = 1; 879 key->info.shared.offset = (vm_offset_t)addr - 880 entry->start + entry->offset; 881 vm_object_reference(key->info.shared.object); 882 } else { 883 key->shared = 0; 884 key->info.private.vs = td->td_proc->p_vmspace; 885 key->info.private.addr = (uintptr_t)addr; 886 } 887 vm_map_lookup_done(map, entry); 888 } 889 890 umtxq_hash(key); 891 return (0); 892 } 893 894 /* 895 * Release key. 896 */ 897 void 898 umtx_key_release(struct umtx_key *key) 899 { 900 if (key->shared) 901 vm_object_deallocate(key->info.shared.object); 902 } 903 904 /* 905 * Fetch and compare value, sleep on the address if value is not changed. 906 */ 907 static int 908 do_wait(struct thread *td, void *addr, u_long id, 909 struct _umtx_time *timeout, int compat32, int is_private) 910 { 911 struct abs_timeout timo; 912 struct umtx_q *uq; 913 u_long tmp; 914 uint32_t tmp32; 915 int error = 0; 916 917 uq = td->td_umtxq; 918 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 919 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 920 return (error); 921 922 if (timeout != NULL) 923 abs_timeout_init2(&timo, timeout); 924 925 umtxq_lock(&uq->uq_key); 926 umtxq_insert(uq); 927 umtxq_unlock(&uq->uq_key); 928 if (compat32 == 0) { 929 error = fueword(addr, &tmp); 930 if (error != 0) 931 error = EFAULT; 932 } else { 933 error = fueword32(addr, &tmp32); 934 if (error == 0) 935 tmp = tmp32; 936 else 937 error = EFAULT; 938 } 939 umtxq_lock(&uq->uq_key); 940 if (error == 0) { 941 if (tmp == id) 942 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 943 NULL : &timo); 944 if ((uq->uq_flags & UQF_UMTXQ) == 0) 945 error = 0; 946 else 947 umtxq_remove(uq); 948 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 949 umtxq_remove(uq); 950 } 951 umtxq_unlock(&uq->uq_key); 952 umtx_key_release(&uq->uq_key); 953 if (error == ERESTART) 954 error = EINTR; 955 return (error); 956 } 957 958 /* 959 * Wake up threads sleeping on the specified address. 960 */ 961 int 962 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 963 { 964 struct umtx_key key; 965 int ret; 966 967 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 968 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 969 return (ret); 970 umtxq_lock(&key); 971 umtxq_signal(&key, n_wake); 972 umtxq_unlock(&key); 973 umtx_key_release(&key); 974 return (0); 975 } 976 977 /* 978 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 979 */ 980 static int 981 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 982 struct _umtx_time *timeout, int mode) 983 { 984 struct abs_timeout timo; 985 struct umtx_q *uq; 986 uint32_t owner, old, id; 987 int error, rv; 988 989 id = td->td_tid; 990 uq = td->td_umtxq; 991 error = 0; 992 if (timeout != NULL) 993 abs_timeout_init2(&timo, timeout); 994 995 /* 996 * Care must be exercised when dealing with umtx structure. It 997 * can fault on any access. 998 */ 999 for (;;) { 1000 rv = fueword32(&m->m_owner, &owner); 1001 if (rv == -1) 1002 return (EFAULT); 1003 if (mode == _UMUTEX_WAIT) { 1004 if (owner == UMUTEX_UNOWNED || 1005 owner == UMUTEX_CONTESTED || 1006 owner == UMUTEX_RB_OWNERDEAD || 1007 owner == UMUTEX_RB_NOTRECOV) 1008 return (0); 1009 } else { 1010 /* 1011 * Robust mutex terminated. Kernel duty is to 1012 * return EOWNERDEAD to the userspace. The 1013 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1014 * by the common userspace code. 1015 */ 1016 if (owner == UMUTEX_RB_OWNERDEAD) { 1017 rv = casueword32(&m->m_owner, 1018 UMUTEX_RB_OWNERDEAD, &owner, 1019 id | UMUTEX_CONTESTED); 1020 if (rv == -1) 1021 return (EFAULT); 1022 if (rv == 0) { 1023 MPASS(owner == UMUTEX_RB_OWNERDEAD); 1024 return (EOWNERDEAD); /* success */ 1025 } 1026 MPASS(rv == 1); 1027 rv = thread_check_susp(td, false); 1028 if (rv != 0) 1029 return (rv); 1030 continue; 1031 } 1032 if (owner == UMUTEX_RB_NOTRECOV) 1033 return (ENOTRECOVERABLE); 1034 1035 /* 1036 * Try the uncontested case. This should be 1037 * done in userland. 1038 */ 1039 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1040 &owner, id); 1041 /* The address was invalid. */ 1042 if (rv == -1) 1043 return (EFAULT); 1044 1045 /* The acquire succeeded. */ 1046 if (rv == 0) { 1047 MPASS(owner == UMUTEX_UNOWNED); 1048 return (0); 1049 } 1050 1051 /* 1052 * If no one owns it but it is contested try 1053 * to acquire it. 1054 */ 1055 MPASS(rv == 1); 1056 if (owner == UMUTEX_CONTESTED) { 1057 rv = casueword32(&m->m_owner, 1058 UMUTEX_CONTESTED, &owner, 1059 id | UMUTEX_CONTESTED); 1060 /* The address was invalid. */ 1061 if (rv == -1) 1062 return (EFAULT); 1063 if (rv == 0) { 1064 MPASS(owner == UMUTEX_CONTESTED); 1065 return (0); 1066 } 1067 if (rv == 1) { 1068 rv = thread_check_susp(td, false); 1069 if (rv != 0) 1070 return (rv); 1071 } 1072 1073 /* 1074 * If this failed the lock has 1075 * changed, restart. 1076 */ 1077 continue; 1078 } 1079 1080 /* rv == 1 but not contested, likely store failure */ 1081 rv = thread_check_susp(td, false); 1082 if (rv != 0) 1083 return (rv); 1084 } 1085 1086 if (mode == _UMUTEX_TRY) 1087 return (EBUSY); 1088 1089 /* 1090 * If we caught a signal, we have retried and now 1091 * exit immediately. 1092 */ 1093 if (error != 0) 1094 return (error); 1095 1096 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1097 GET_SHARE(flags), &uq->uq_key)) != 0) 1098 return (error); 1099 1100 umtxq_lock(&uq->uq_key); 1101 umtxq_busy(&uq->uq_key); 1102 umtxq_insert(uq); 1103 umtxq_unlock(&uq->uq_key); 1104 1105 /* 1106 * Set the contested bit so that a release in user space 1107 * knows to use the system call for unlock. If this fails 1108 * either some one else has acquired the lock or it has been 1109 * released. 1110 */ 1111 rv = casueword32(&m->m_owner, owner, &old, 1112 owner | UMUTEX_CONTESTED); 1113 1114 /* The address was invalid or casueword failed to store. */ 1115 if (rv == -1 || rv == 1) { 1116 umtxq_lock(&uq->uq_key); 1117 umtxq_remove(uq); 1118 umtxq_unbusy(&uq->uq_key); 1119 umtxq_unlock(&uq->uq_key); 1120 umtx_key_release(&uq->uq_key); 1121 if (rv == -1) 1122 return (EFAULT); 1123 if (rv == 1) { 1124 rv = thread_check_susp(td, false); 1125 if (rv != 0) 1126 return (rv); 1127 } 1128 continue; 1129 } 1130 1131 /* 1132 * We set the contested bit, sleep. Otherwise the lock changed 1133 * and we need to retry or we lost a race to the thread 1134 * unlocking the umtx. 1135 */ 1136 umtxq_lock(&uq->uq_key); 1137 umtxq_unbusy(&uq->uq_key); 1138 MPASS(old == owner); 1139 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1140 NULL : &timo); 1141 umtxq_remove(uq); 1142 umtxq_unlock(&uq->uq_key); 1143 umtx_key_release(&uq->uq_key); 1144 1145 if (error == 0) 1146 error = thread_check_susp(td, false); 1147 } 1148 1149 return (0); 1150 } 1151 1152 /* 1153 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1154 */ 1155 static int 1156 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1157 { 1158 struct umtx_key key; 1159 uint32_t owner, old, id, newlock; 1160 int error, count; 1161 1162 id = td->td_tid; 1163 1164 again: 1165 /* 1166 * Make sure we own this mtx. 1167 */ 1168 error = fueword32(&m->m_owner, &owner); 1169 if (error == -1) 1170 return (EFAULT); 1171 1172 if ((owner & ~UMUTEX_CONTESTED) != id) 1173 return (EPERM); 1174 1175 newlock = umtx_unlock_val(flags, rb); 1176 if ((owner & UMUTEX_CONTESTED) == 0) { 1177 error = casueword32(&m->m_owner, owner, &old, newlock); 1178 if (error == -1) 1179 return (EFAULT); 1180 if (error == 1) { 1181 error = thread_check_susp(td, false); 1182 if (error != 0) 1183 return (error); 1184 goto again; 1185 } 1186 MPASS(old == owner); 1187 return (0); 1188 } 1189 1190 /* We should only ever be in here for contested locks */ 1191 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1192 &key)) != 0) 1193 return (error); 1194 1195 umtxq_lock(&key); 1196 umtxq_busy(&key); 1197 count = umtxq_count(&key); 1198 umtxq_unlock(&key); 1199 1200 /* 1201 * When unlocking the umtx, it must be marked as unowned if 1202 * there is zero or one thread only waiting for it. 1203 * Otherwise, it must be marked as contested. 1204 */ 1205 if (count > 1) 1206 newlock |= UMUTEX_CONTESTED; 1207 error = casueword32(&m->m_owner, owner, &old, newlock); 1208 umtxq_lock(&key); 1209 umtxq_signal(&key, 1); 1210 umtxq_unbusy(&key); 1211 umtxq_unlock(&key); 1212 umtx_key_release(&key); 1213 if (error == -1) 1214 return (EFAULT); 1215 if (error == 1) { 1216 if (old != owner) 1217 return (EINVAL); 1218 error = thread_check_susp(td, false); 1219 if (error != 0) 1220 return (error); 1221 goto again; 1222 } 1223 return (0); 1224 } 1225 1226 /* 1227 * Check if the mutex is available and wake up a waiter, 1228 * only for simple mutex. 1229 */ 1230 static int 1231 do_wake_umutex(struct thread *td, struct umutex *m) 1232 { 1233 struct umtx_key key; 1234 uint32_t owner; 1235 uint32_t flags; 1236 int error; 1237 int count; 1238 1239 again: 1240 error = fueword32(&m->m_owner, &owner); 1241 if (error == -1) 1242 return (EFAULT); 1243 1244 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1245 owner != UMUTEX_RB_NOTRECOV) 1246 return (0); 1247 1248 error = fueword32(&m->m_flags, &flags); 1249 if (error == -1) 1250 return (EFAULT); 1251 1252 /* We should only ever be in here for contested locks */ 1253 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1254 &key)) != 0) 1255 return (error); 1256 1257 umtxq_lock(&key); 1258 umtxq_busy(&key); 1259 count = umtxq_count(&key); 1260 umtxq_unlock(&key); 1261 1262 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1263 owner != UMUTEX_RB_NOTRECOV) { 1264 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1265 UMUTEX_UNOWNED); 1266 if (error == -1) { 1267 error = EFAULT; 1268 } else if (error == 1) { 1269 umtxq_lock(&key); 1270 umtxq_unbusy(&key); 1271 umtxq_unlock(&key); 1272 umtx_key_release(&key); 1273 error = thread_check_susp(td, false); 1274 if (error != 0) 1275 return (error); 1276 goto again; 1277 } 1278 } 1279 1280 umtxq_lock(&key); 1281 if (error == 0 && count != 0) { 1282 MPASS((owner & ~UMUTEX_CONTESTED) == 0 || 1283 owner == UMUTEX_RB_OWNERDEAD || 1284 owner == UMUTEX_RB_NOTRECOV); 1285 umtxq_signal(&key, 1); 1286 } 1287 umtxq_unbusy(&key); 1288 umtxq_unlock(&key); 1289 umtx_key_release(&key); 1290 return (error); 1291 } 1292 1293 /* 1294 * Check if the mutex has waiters and tries to fix contention bit. 1295 */ 1296 static int 1297 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1298 { 1299 struct umtx_key key; 1300 uint32_t owner, old; 1301 int type; 1302 int error; 1303 int count; 1304 1305 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1306 UMUTEX_ROBUST)) { 1307 case 0: 1308 case UMUTEX_ROBUST: 1309 type = TYPE_NORMAL_UMUTEX; 1310 break; 1311 case UMUTEX_PRIO_INHERIT: 1312 type = TYPE_PI_UMUTEX; 1313 break; 1314 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1315 type = TYPE_PI_ROBUST_UMUTEX; 1316 break; 1317 case UMUTEX_PRIO_PROTECT: 1318 type = TYPE_PP_UMUTEX; 1319 break; 1320 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1321 type = TYPE_PP_ROBUST_UMUTEX; 1322 break; 1323 default: 1324 return (EINVAL); 1325 } 1326 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1327 return (error); 1328 1329 owner = 0; 1330 umtxq_lock(&key); 1331 umtxq_busy(&key); 1332 count = umtxq_count(&key); 1333 umtxq_unlock(&key); 1334 1335 error = fueword32(&m->m_owner, &owner); 1336 if (error == -1) 1337 error = EFAULT; 1338 1339 /* 1340 * Only repair contention bit if there is a waiter, this means 1341 * the mutex is still being referenced by userland code, 1342 * otherwise don't update any memory. 1343 */ 1344 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && 1345 (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { 1346 error = casueword32(&m->m_owner, owner, &old, 1347 owner | UMUTEX_CONTESTED); 1348 if (error == -1) { 1349 error = EFAULT; 1350 break; 1351 } 1352 if (error == 0) { 1353 MPASS(old == owner); 1354 break; 1355 } 1356 owner = old; 1357 error = thread_check_susp(td, false); 1358 } 1359 1360 umtxq_lock(&key); 1361 if (error == EFAULT) { 1362 umtxq_signal(&key, INT_MAX); 1363 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1364 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1365 umtxq_signal(&key, 1); 1366 umtxq_unbusy(&key); 1367 umtxq_unlock(&key); 1368 umtx_key_release(&key); 1369 return (error); 1370 } 1371 1372 static inline struct umtx_pi * 1373 umtx_pi_alloc(int flags) 1374 { 1375 struct umtx_pi *pi; 1376 1377 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1378 TAILQ_INIT(&pi->pi_blocked); 1379 atomic_add_int(&umtx_pi_allocated, 1); 1380 return (pi); 1381 } 1382 1383 static inline void 1384 umtx_pi_free(struct umtx_pi *pi) 1385 { 1386 uma_zfree(umtx_pi_zone, pi); 1387 atomic_add_int(&umtx_pi_allocated, -1); 1388 } 1389 1390 /* 1391 * Adjust the thread's position on a pi_state after its priority has been 1392 * changed. 1393 */ 1394 static int 1395 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1396 { 1397 struct umtx_q *uq, *uq1, *uq2; 1398 struct thread *td1; 1399 1400 mtx_assert(&umtx_lock, MA_OWNED); 1401 if (pi == NULL) 1402 return (0); 1403 1404 uq = td->td_umtxq; 1405 1406 /* 1407 * Check if the thread needs to be moved on the blocked chain. 1408 * It needs to be moved if either its priority is lower than 1409 * the previous thread or higher than the next thread. 1410 */ 1411 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1412 uq2 = TAILQ_NEXT(uq, uq_lockq); 1413 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1414 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1415 /* 1416 * Remove thread from blocked chain and determine where 1417 * it should be moved to. 1418 */ 1419 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1420 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1421 td1 = uq1->uq_thread; 1422 MPASS(td1->td_proc->p_magic == P_MAGIC); 1423 if (UPRI(td1) > UPRI(td)) 1424 break; 1425 } 1426 1427 if (uq1 == NULL) 1428 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1429 else 1430 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1431 } 1432 return (1); 1433 } 1434 1435 static struct umtx_pi * 1436 umtx_pi_next(struct umtx_pi *pi) 1437 { 1438 struct umtx_q *uq_owner; 1439 1440 if (pi->pi_owner == NULL) 1441 return (NULL); 1442 uq_owner = pi->pi_owner->td_umtxq; 1443 if (uq_owner == NULL) 1444 return (NULL); 1445 return (uq_owner->uq_pi_blocked); 1446 } 1447 1448 /* 1449 * Floyd's Cycle-Finding Algorithm. 1450 */ 1451 static bool 1452 umtx_pi_check_loop(struct umtx_pi *pi) 1453 { 1454 struct umtx_pi *pi1; /* fast iterator */ 1455 1456 mtx_assert(&umtx_lock, MA_OWNED); 1457 if (pi == NULL) 1458 return (false); 1459 pi1 = pi; 1460 for (;;) { 1461 pi = umtx_pi_next(pi); 1462 if (pi == NULL) 1463 break; 1464 pi1 = umtx_pi_next(pi1); 1465 if (pi1 == NULL) 1466 break; 1467 pi1 = umtx_pi_next(pi1); 1468 if (pi1 == NULL) 1469 break; 1470 if (pi == pi1) 1471 return (true); 1472 } 1473 return (false); 1474 } 1475 1476 /* 1477 * Propagate priority when a thread is blocked on POSIX 1478 * PI mutex. 1479 */ 1480 static void 1481 umtx_propagate_priority(struct thread *td) 1482 { 1483 struct umtx_q *uq; 1484 struct umtx_pi *pi; 1485 int pri; 1486 1487 mtx_assert(&umtx_lock, MA_OWNED); 1488 pri = UPRI(td); 1489 uq = td->td_umtxq; 1490 pi = uq->uq_pi_blocked; 1491 if (pi == NULL) 1492 return; 1493 if (umtx_pi_check_loop(pi)) 1494 return; 1495 1496 for (;;) { 1497 td = pi->pi_owner; 1498 if (td == NULL || td == curthread) 1499 return; 1500 1501 MPASS(td->td_proc != NULL); 1502 MPASS(td->td_proc->p_magic == P_MAGIC); 1503 1504 thread_lock(td); 1505 if (td->td_lend_user_pri > pri) 1506 sched_lend_user_prio(td, pri); 1507 else { 1508 thread_unlock(td); 1509 break; 1510 } 1511 thread_unlock(td); 1512 1513 /* 1514 * Pick up the lock that td is blocked on. 1515 */ 1516 uq = td->td_umtxq; 1517 pi = uq->uq_pi_blocked; 1518 if (pi == NULL) 1519 break; 1520 /* Resort td on the list if needed. */ 1521 umtx_pi_adjust_thread(pi, td); 1522 } 1523 } 1524 1525 /* 1526 * Unpropagate priority for a PI mutex when a thread blocked on 1527 * it is interrupted by signal or resumed by others. 1528 */ 1529 static void 1530 umtx_repropagate_priority(struct umtx_pi *pi) 1531 { 1532 struct umtx_q *uq, *uq_owner; 1533 struct umtx_pi *pi2; 1534 int pri; 1535 1536 mtx_assert(&umtx_lock, MA_OWNED); 1537 1538 if (umtx_pi_check_loop(pi)) 1539 return; 1540 while (pi != NULL && pi->pi_owner != NULL) { 1541 pri = PRI_MAX; 1542 uq_owner = pi->pi_owner->td_umtxq; 1543 1544 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1545 uq = TAILQ_FIRST(&pi2->pi_blocked); 1546 if (uq != NULL) { 1547 if (pri > UPRI(uq->uq_thread)) 1548 pri = UPRI(uq->uq_thread); 1549 } 1550 } 1551 1552 if (pri > uq_owner->uq_inherited_pri) 1553 pri = uq_owner->uq_inherited_pri; 1554 thread_lock(pi->pi_owner); 1555 sched_lend_user_prio(pi->pi_owner, pri); 1556 thread_unlock(pi->pi_owner); 1557 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1558 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1559 } 1560 } 1561 1562 /* 1563 * Insert a PI mutex into owned list. 1564 */ 1565 static void 1566 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1567 { 1568 struct umtx_q *uq_owner; 1569 1570 uq_owner = owner->td_umtxq; 1571 mtx_assert(&umtx_lock, MA_OWNED); 1572 MPASS(pi->pi_owner == NULL); 1573 pi->pi_owner = owner; 1574 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1575 } 1576 1577 /* 1578 * Disown a PI mutex, and remove it from the owned list. 1579 */ 1580 static void 1581 umtx_pi_disown(struct umtx_pi *pi) 1582 { 1583 1584 mtx_assert(&umtx_lock, MA_OWNED); 1585 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1586 pi->pi_owner = NULL; 1587 } 1588 1589 /* 1590 * Claim ownership of a PI mutex. 1591 */ 1592 static int 1593 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1594 { 1595 struct umtx_q *uq; 1596 int pri; 1597 1598 mtx_lock(&umtx_lock); 1599 if (pi->pi_owner == owner) { 1600 mtx_unlock(&umtx_lock); 1601 return (0); 1602 } 1603 1604 if (pi->pi_owner != NULL) { 1605 /* 1606 * userland may have already messed the mutex, sigh. 1607 */ 1608 mtx_unlock(&umtx_lock); 1609 return (EPERM); 1610 } 1611 umtx_pi_setowner(pi, owner); 1612 uq = TAILQ_FIRST(&pi->pi_blocked); 1613 if (uq != NULL) { 1614 pri = UPRI(uq->uq_thread); 1615 thread_lock(owner); 1616 if (pri < UPRI(owner)) 1617 sched_lend_user_prio(owner, pri); 1618 thread_unlock(owner); 1619 } 1620 mtx_unlock(&umtx_lock); 1621 return (0); 1622 } 1623 1624 /* 1625 * Adjust a thread's order position in its blocked PI mutex, 1626 * this may result new priority propagating process. 1627 */ 1628 void 1629 umtx_pi_adjust(struct thread *td, u_char oldpri) 1630 { 1631 struct umtx_q *uq; 1632 struct umtx_pi *pi; 1633 1634 uq = td->td_umtxq; 1635 mtx_lock(&umtx_lock); 1636 /* 1637 * Pick up the lock that td is blocked on. 1638 */ 1639 pi = uq->uq_pi_blocked; 1640 if (pi != NULL) { 1641 umtx_pi_adjust_thread(pi, td); 1642 umtx_repropagate_priority(pi); 1643 } 1644 mtx_unlock(&umtx_lock); 1645 } 1646 1647 /* 1648 * Sleep on a PI mutex. 1649 */ 1650 static int 1651 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1652 const char *wmesg, struct abs_timeout *timo, bool shared) 1653 { 1654 struct thread *td, *td1; 1655 struct umtx_q *uq1; 1656 int error, pri; 1657 #ifdef INVARIANTS 1658 struct umtxq_chain *uc; 1659 1660 uc = umtxq_getchain(&pi->pi_key); 1661 #endif 1662 error = 0; 1663 td = uq->uq_thread; 1664 KASSERT(td == curthread, ("inconsistent uq_thread")); 1665 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); 1666 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1667 umtxq_insert(uq); 1668 mtx_lock(&umtx_lock); 1669 if (pi->pi_owner == NULL) { 1670 mtx_unlock(&umtx_lock); 1671 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1672 mtx_lock(&umtx_lock); 1673 if (td1 != NULL) { 1674 if (pi->pi_owner == NULL) 1675 umtx_pi_setowner(pi, td1); 1676 PROC_UNLOCK(td1->td_proc); 1677 } 1678 } 1679 1680 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1681 pri = UPRI(uq1->uq_thread); 1682 if (pri > UPRI(td)) 1683 break; 1684 } 1685 1686 if (uq1 != NULL) 1687 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1688 else 1689 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1690 1691 uq->uq_pi_blocked = pi; 1692 thread_lock(td); 1693 td->td_flags |= TDF_UPIBLOCKED; 1694 thread_unlock(td); 1695 umtx_propagate_priority(td); 1696 mtx_unlock(&umtx_lock); 1697 umtxq_unbusy(&uq->uq_key); 1698 1699 error = umtxq_sleep(uq, wmesg, timo); 1700 umtxq_remove(uq); 1701 1702 mtx_lock(&umtx_lock); 1703 uq->uq_pi_blocked = NULL; 1704 thread_lock(td); 1705 td->td_flags &= ~TDF_UPIBLOCKED; 1706 thread_unlock(td); 1707 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1708 umtx_repropagate_priority(pi); 1709 mtx_unlock(&umtx_lock); 1710 umtxq_unlock(&uq->uq_key); 1711 1712 return (error); 1713 } 1714 1715 /* 1716 * Add reference count for a PI mutex. 1717 */ 1718 static void 1719 umtx_pi_ref(struct umtx_pi *pi) 1720 { 1721 1722 UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); 1723 pi->pi_refcount++; 1724 } 1725 1726 /* 1727 * Decrease reference count for a PI mutex, if the counter 1728 * is decreased to zero, its memory space is freed. 1729 */ 1730 static void 1731 umtx_pi_unref(struct umtx_pi *pi) 1732 { 1733 struct umtxq_chain *uc; 1734 1735 uc = umtxq_getchain(&pi->pi_key); 1736 UMTXQ_LOCKED_ASSERT(uc); 1737 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1738 if (--pi->pi_refcount == 0) { 1739 mtx_lock(&umtx_lock); 1740 if (pi->pi_owner != NULL) 1741 umtx_pi_disown(pi); 1742 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1743 ("blocked queue not empty")); 1744 mtx_unlock(&umtx_lock); 1745 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1746 umtx_pi_free(pi); 1747 } 1748 } 1749 1750 /* 1751 * Find a PI mutex in hash table. 1752 */ 1753 static struct umtx_pi * 1754 umtx_pi_lookup(struct umtx_key *key) 1755 { 1756 struct umtxq_chain *uc; 1757 struct umtx_pi *pi; 1758 1759 uc = umtxq_getchain(key); 1760 UMTXQ_LOCKED_ASSERT(uc); 1761 1762 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1763 if (umtx_key_match(&pi->pi_key, key)) { 1764 return (pi); 1765 } 1766 } 1767 return (NULL); 1768 } 1769 1770 /* 1771 * Insert a PI mutex into hash table. 1772 */ 1773 static inline void 1774 umtx_pi_insert(struct umtx_pi *pi) 1775 { 1776 struct umtxq_chain *uc; 1777 1778 uc = umtxq_getchain(&pi->pi_key); 1779 UMTXQ_LOCKED_ASSERT(uc); 1780 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1781 } 1782 1783 /* 1784 * Lock a PI mutex. 1785 */ 1786 static int 1787 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1788 struct _umtx_time *timeout, int try) 1789 { 1790 struct abs_timeout timo; 1791 struct umtx_q *uq; 1792 struct umtx_pi *pi, *new_pi; 1793 uint32_t id, old_owner, owner, old; 1794 int error, rv; 1795 1796 id = td->td_tid; 1797 uq = td->td_umtxq; 1798 1799 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1800 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1801 &uq->uq_key)) != 0) 1802 return (error); 1803 1804 if (timeout != NULL) 1805 abs_timeout_init2(&timo, timeout); 1806 1807 umtxq_lock(&uq->uq_key); 1808 pi = umtx_pi_lookup(&uq->uq_key); 1809 if (pi == NULL) { 1810 new_pi = umtx_pi_alloc(M_NOWAIT); 1811 if (new_pi == NULL) { 1812 umtxq_unlock(&uq->uq_key); 1813 new_pi = umtx_pi_alloc(M_WAITOK); 1814 umtxq_lock(&uq->uq_key); 1815 pi = umtx_pi_lookup(&uq->uq_key); 1816 if (pi != NULL) { 1817 umtx_pi_free(new_pi); 1818 new_pi = NULL; 1819 } 1820 } 1821 if (new_pi != NULL) { 1822 new_pi->pi_key = uq->uq_key; 1823 umtx_pi_insert(new_pi); 1824 pi = new_pi; 1825 } 1826 } 1827 umtx_pi_ref(pi); 1828 umtxq_unlock(&uq->uq_key); 1829 1830 /* 1831 * Care must be exercised when dealing with umtx structure. It 1832 * can fault on any access. 1833 */ 1834 for (;;) { 1835 /* 1836 * Try the uncontested case. This should be done in userland. 1837 */ 1838 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1839 /* The address was invalid. */ 1840 if (rv == -1) { 1841 error = EFAULT; 1842 break; 1843 } 1844 /* The acquire succeeded. */ 1845 if (rv == 0) { 1846 MPASS(owner == UMUTEX_UNOWNED); 1847 error = 0; 1848 break; 1849 } 1850 1851 if (owner == UMUTEX_RB_NOTRECOV) { 1852 error = ENOTRECOVERABLE; 1853 break; 1854 } 1855 1856 /* 1857 * Avoid overwriting a possible error from sleep due 1858 * to the pending signal with suspension check result. 1859 */ 1860 if (error == 0) { 1861 error = thread_check_susp(td, true); 1862 if (error != 0) 1863 break; 1864 } 1865 1866 /* If no one owns it but it is contested try to acquire it. */ 1867 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1868 old_owner = owner; 1869 rv = casueword32(&m->m_owner, owner, &owner, 1870 id | UMUTEX_CONTESTED); 1871 /* The address was invalid. */ 1872 if (rv == -1) { 1873 error = EFAULT; 1874 break; 1875 } 1876 if (rv == 1) { 1877 if (error == 0) { 1878 error = thread_check_susp(td, true); 1879 if (error != 0) 1880 break; 1881 } 1882 1883 /* 1884 * If this failed the lock could 1885 * changed, restart. 1886 */ 1887 continue; 1888 } 1889 1890 MPASS(rv == 0); 1891 MPASS(owner == old_owner); 1892 umtxq_lock(&uq->uq_key); 1893 umtxq_busy(&uq->uq_key); 1894 error = umtx_pi_claim(pi, td); 1895 umtxq_unbusy(&uq->uq_key); 1896 umtxq_unlock(&uq->uq_key); 1897 if (error != 0) { 1898 /* 1899 * Since we're going to return an 1900 * error, restore the m_owner to its 1901 * previous, unowned state to avoid 1902 * compounding the problem. 1903 */ 1904 (void)casuword32(&m->m_owner, 1905 id | UMUTEX_CONTESTED, old_owner); 1906 } 1907 if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) 1908 error = EOWNERDEAD; 1909 break; 1910 } 1911 1912 if ((owner & ~UMUTEX_CONTESTED) == id) { 1913 error = EDEADLK; 1914 break; 1915 } 1916 1917 if (try != 0) { 1918 error = EBUSY; 1919 break; 1920 } 1921 1922 /* 1923 * If we caught a signal, we have retried and now 1924 * exit immediately. 1925 */ 1926 if (error != 0) 1927 break; 1928 1929 umtxq_lock(&uq->uq_key); 1930 umtxq_busy(&uq->uq_key); 1931 umtxq_unlock(&uq->uq_key); 1932 1933 /* 1934 * Set the contested bit so that a release in user space 1935 * knows to use the system call for unlock. If this fails 1936 * either some one else has acquired the lock or it has been 1937 * released. 1938 */ 1939 rv = casueword32(&m->m_owner, owner, &old, owner | 1940 UMUTEX_CONTESTED); 1941 1942 /* The address was invalid. */ 1943 if (rv == -1) { 1944 umtxq_unbusy_unlocked(&uq->uq_key); 1945 error = EFAULT; 1946 break; 1947 } 1948 if (rv == 1) { 1949 umtxq_unbusy_unlocked(&uq->uq_key); 1950 error = thread_check_susp(td, true); 1951 if (error != 0) 1952 break; 1953 1954 /* 1955 * The lock changed and we need to retry or we 1956 * lost a race to the thread unlocking the 1957 * umtx. Note that the UMUTEX_RB_OWNERDEAD 1958 * value for owner is impossible there. 1959 */ 1960 continue; 1961 } 1962 1963 umtxq_lock(&uq->uq_key); 1964 1965 /* We set the contested bit, sleep. */ 1966 MPASS(old == owner); 1967 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, 1968 "umtxpi", timeout == NULL ? NULL : &timo, 1969 (flags & USYNC_PROCESS_SHARED) != 0); 1970 if (error != 0) 1971 continue; 1972 1973 error = thread_check_susp(td, false); 1974 if (error != 0) 1975 break; 1976 } 1977 1978 umtxq_lock(&uq->uq_key); 1979 umtx_pi_unref(pi); 1980 umtxq_unlock(&uq->uq_key); 1981 1982 umtx_key_release(&uq->uq_key); 1983 return (error); 1984 } 1985 1986 /* 1987 * Unlock a PI mutex. 1988 */ 1989 static int 1990 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1991 { 1992 struct umtx_key key; 1993 struct umtx_q *uq_first, *uq_first2, *uq_me; 1994 struct umtx_pi *pi, *pi2; 1995 uint32_t id, new_owner, old, owner; 1996 int count, error, pri; 1997 1998 id = td->td_tid; 1999 2000 usrloop: 2001 /* 2002 * Make sure we own this mtx. 2003 */ 2004 error = fueword32(&m->m_owner, &owner); 2005 if (error == -1) 2006 return (EFAULT); 2007 2008 if ((owner & ~UMUTEX_CONTESTED) != id) 2009 return (EPERM); 2010 2011 new_owner = umtx_unlock_val(flags, rb); 2012 2013 /* This should be done in userland */ 2014 if ((owner & UMUTEX_CONTESTED) == 0) { 2015 error = casueword32(&m->m_owner, owner, &old, new_owner); 2016 if (error == -1) 2017 return (EFAULT); 2018 if (error == 1) { 2019 error = thread_check_susp(td, true); 2020 if (error != 0) 2021 return (error); 2022 goto usrloop; 2023 } 2024 if (old == owner) 2025 return (0); 2026 owner = old; 2027 } 2028 2029 /* We should only ever be in here for contested locks */ 2030 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2031 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2032 &key)) != 0) 2033 return (error); 2034 2035 umtxq_lock(&key); 2036 umtxq_busy(&key); 2037 count = umtxq_count_pi(&key, &uq_first); 2038 if (uq_first != NULL) { 2039 mtx_lock(&umtx_lock); 2040 pi = uq_first->uq_pi_blocked; 2041 KASSERT(pi != NULL, ("pi == NULL?")); 2042 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2043 mtx_unlock(&umtx_lock); 2044 umtxq_unbusy(&key); 2045 umtxq_unlock(&key); 2046 umtx_key_release(&key); 2047 /* userland messed the mutex */ 2048 return (EPERM); 2049 } 2050 uq_me = td->td_umtxq; 2051 if (pi->pi_owner == td) 2052 umtx_pi_disown(pi); 2053 /* get highest priority thread which is still sleeping. */ 2054 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2055 while (uq_first != NULL && 2056 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2057 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2058 } 2059 pri = PRI_MAX; 2060 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2061 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2062 if (uq_first2 != NULL) { 2063 if (pri > UPRI(uq_first2->uq_thread)) 2064 pri = UPRI(uq_first2->uq_thread); 2065 } 2066 } 2067 thread_lock(td); 2068 sched_lend_user_prio(td, pri); 2069 thread_unlock(td); 2070 mtx_unlock(&umtx_lock); 2071 if (uq_first) 2072 umtxq_signal_thread(uq_first); 2073 } else { 2074 pi = umtx_pi_lookup(&key); 2075 /* 2076 * A umtx_pi can exist if a signal or timeout removed the 2077 * last waiter from the umtxq, but there is still 2078 * a thread in do_lock_pi() holding the umtx_pi. 2079 */ 2080 if (pi != NULL) { 2081 /* 2082 * The umtx_pi can be unowned, such as when a thread 2083 * has just entered do_lock_pi(), allocated the 2084 * umtx_pi, and unlocked the umtxq. 2085 * If the current thread owns it, it must disown it. 2086 */ 2087 mtx_lock(&umtx_lock); 2088 if (pi->pi_owner == td) 2089 umtx_pi_disown(pi); 2090 mtx_unlock(&umtx_lock); 2091 } 2092 } 2093 umtxq_unlock(&key); 2094 2095 /* 2096 * When unlocking the umtx, it must be marked as unowned if 2097 * there is zero or one thread only waiting for it. 2098 * Otherwise, it must be marked as contested. 2099 */ 2100 2101 if (count > 1) 2102 new_owner |= UMUTEX_CONTESTED; 2103 again: 2104 error = casueword32(&m->m_owner, owner, &old, new_owner); 2105 if (error == 1) { 2106 error = thread_check_susp(td, false); 2107 if (error == 0) 2108 goto again; 2109 } 2110 umtxq_unbusy_unlocked(&key); 2111 umtx_key_release(&key); 2112 if (error == -1) 2113 return (EFAULT); 2114 if (error == 0 && old != owner) 2115 return (EINVAL); 2116 return (error); 2117 } 2118 2119 /* 2120 * Lock a PP mutex. 2121 */ 2122 static int 2123 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2124 struct _umtx_time *timeout, int try) 2125 { 2126 struct abs_timeout timo; 2127 struct umtx_q *uq, *uq2; 2128 struct umtx_pi *pi; 2129 uint32_t ceiling; 2130 uint32_t owner, id; 2131 int error, pri, old_inherited_pri, su, rv; 2132 2133 id = td->td_tid; 2134 uq = td->td_umtxq; 2135 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2136 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2137 &uq->uq_key)) != 0) 2138 return (error); 2139 2140 if (timeout != NULL) 2141 abs_timeout_init2(&timo, timeout); 2142 2143 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2144 for (;;) { 2145 old_inherited_pri = uq->uq_inherited_pri; 2146 umtxq_lock(&uq->uq_key); 2147 umtxq_busy(&uq->uq_key); 2148 umtxq_unlock(&uq->uq_key); 2149 2150 rv = fueword32(&m->m_ceilings[0], &ceiling); 2151 if (rv == -1) { 2152 error = EFAULT; 2153 goto out; 2154 } 2155 ceiling = RTP_PRIO_MAX - ceiling; 2156 if (ceiling > RTP_PRIO_MAX) { 2157 error = EINVAL; 2158 goto out; 2159 } 2160 2161 mtx_lock(&umtx_lock); 2162 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2163 mtx_unlock(&umtx_lock); 2164 error = EINVAL; 2165 goto out; 2166 } 2167 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2168 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2169 thread_lock(td); 2170 if (uq->uq_inherited_pri < UPRI(td)) 2171 sched_lend_user_prio(td, uq->uq_inherited_pri); 2172 thread_unlock(td); 2173 } 2174 mtx_unlock(&umtx_lock); 2175 2176 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2177 id | UMUTEX_CONTESTED); 2178 /* The address was invalid. */ 2179 if (rv == -1) { 2180 error = EFAULT; 2181 break; 2182 } 2183 if (rv == 0) { 2184 MPASS(owner == UMUTEX_CONTESTED); 2185 error = 0; 2186 break; 2187 } 2188 /* rv == 1 */ 2189 if (owner == UMUTEX_RB_OWNERDEAD) { 2190 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2191 &owner, id | UMUTEX_CONTESTED); 2192 if (rv == -1) { 2193 error = EFAULT; 2194 break; 2195 } 2196 if (rv == 0) { 2197 MPASS(owner == UMUTEX_RB_OWNERDEAD); 2198 error = EOWNERDEAD; /* success */ 2199 break; 2200 } 2201 2202 /* 2203 * rv == 1, only check for suspension if we 2204 * did not already catched a signal. If we 2205 * get an error from the check, the same 2206 * condition is checked by the umtxq_sleep() 2207 * call below, so we should obliterate the 2208 * error to not skip the last loop iteration. 2209 */ 2210 if (error == 0) { 2211 error = thread_check_susp(td, false); 2212 if (error == 0) { 2213 if (try != 0) 2214 error = EBUSY; 2215 else 2216 continue; 2217 } 2218 error = 0; 2219 } 2220 } else if (owner == UMUTEX_RB_NOTRECOV) { 2221 error = ENOTRECOVERABLE; 2222 } 2223 2224 if (try != 0) 2225 error = EBUSY; 2226 2227 /* 2228 * If we caught a signal, we have retried and now 2229 * exit immediately. 2230 */ 2231 if (error != 0) 2232 break; 2233 2234 umtxq_lock(&uq->uq_key); 2235 umtxq_insert(uq); 2236 umtxq_unbusy(&uq->uq_key); 2237 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2238 NULL : &timo); 2239 umtxq_remove(uq); 2240 umtxq_unlock(&uq->uq_key); 2241 2242 mtx_lock(&umtx_lock); 2243 uq->uq_inherited_pri = old_inherited_pri; 2244 pri = PRI_MAX; 2245 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2246 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2247 if (uq2 != NULL) { 2248 if (pri > UPRI(uq2->uq_thread)) 2249 pri = UPRI(uq2->uq_thread); 2250 } 2251 } 2252 if (pri > uq->uq_inherited_pri) 2253 pri = uq->uq_inherited_pri; 2254 thread_lock(td); 2255 sched_lend_user_prio(td, pri); 2256 thread_unlock(td); 2257 mtx_unlock(&umtx_lock); 2258 } 2259 2260 if (error != 0 && error != EOWNERDEAD) { 2261 mtx_lock(&umtx_lock); 2262 uq->uq_inherited_pri = old_inherited_pri; 2263 pri = PRI_MAX; 2264 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2265 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2266 if (uq2 != NULL) { 2267 if (pri > UPRI(uq2->uq_thread)) 2268 pri = UPRI(uq2->uq_thread); 2269 } 2270 } 2271 if (pri > uq->uq_inherited_pri) 2272 pri = uq->uq_inherited_pri; 2273 thread_lock(td); 2274 sched_lend_user_prio(td, pri); 2275 thread_unlock(td); 2276 mtx_unlock(&umtx_lock); 2277 } 2278 2279 out: 2280 umtxq_unbusy_unlocked(&uq->uq_key); 2281 umtx_key_release(&uq->uq_key); 2282 return (error); 2283 } 2284 2285 /* 2286 * Unlock a PP mutex. 2287 */ 2288 static int 2289 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2290 { 2291 struct umtx_key key; 2292 struct umtx_q *uq, *uq2; 2293 struct umtx_pi *pi; 2294 uint32_t id, owner, rceiling; 2295 int error, pri, new_inherited_pri, su; 2296 2297 id = td->td_tid; 2298 uq = td->td_umtxq; 2299 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2300 2301 /* 2302 * Make sure we own this mtx. 2303 */ 2304 error = fueword32(&m->m_owner, &owner); 2305 if (error == -1) 2306 return (EFAULT); 2307 2308 if ((owner & ~UMUTEX_CONTESTED) != id) 2309 return (EPERM); 2310 2311 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2312 if (error != 0) 2313 return (error); 2314 2315 if (rceiling == -1) 2316 new_inherited_pri = PRI_MAX; 2317 else { 2318 rceiling = RTP_PRIO_MAX - rceiling; 2319 if (rceiling > RTP_PRIO_MAX) 2320 return (EINVAL); 2321 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2322 } 2323 2324 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2325 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2326 &key)) != 0) 2327 return (error); 2328 umtxq_lock(&key); 2329 umtxq_busy(&key); 2330 umtxq_unlock(&key); 2331 /* 2332 * For priority protected mutex, always set unlocked state 2333 * to UMUTEX_CONTESTED, so that userland always enters kernel 2334 * to lock the mutex, it is necessary because thread priority 2335 * has to be adjusted for such mutex. 2336 */ 2337 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2338 UMUTEX_CONTESTED); 2339 2340 umtxq_lock(&key); 2341 if (error == 0) 2342 umtxq_signal(&key, 1); 2343 umtxq_unbusy(&key); 2344 umtxq_unlock(&key); 2345 2346 if (error == -1) 2347 error = EFAULT; 2348 else { 2349 mtx_lock(&umtx_lock); 2350 if (su != 0) 2351 uq->uq_inherited_pri = new_inherited_pri; 2352 pri = PRI_MAX; 2353 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2354 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2355 if (uq2 != NULL) { 2356 if (pri > UPRI(uq2->uq_thread)) 2357 pri = UPRI(uq2->uq_thread); 2358 } 2359 } 2360 if (pri > uq->uq_inherited_pri) 2361 pri = uq->uq_inherited_pri; 2362 thread_lock(td); 2363 sched_lend_user_prio(td, pri); 2364 thread_unlock(td); 2365 mtx_unlock(&umtx_lock); 2366 } 2367 umtx_key_release(&key); 2368 return (error); 2369 } 2370 2371 static int 2372 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2373 uint32_t *old_ceiling) 2374 { 2375 struct umtx_q *uq; 2376 uint32_t flags, id, owner, save_ceiling; 2377 int error, rv, rv1; 2378 2379 error = fueword32(&m->m_flags, &flags); 2380 if (error == -1) 2381 return (EFAULT); 2382 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2383 return (EINVAL); 2384 if (ceiling > RTP_PRIO_MAX) 2385 return (EINVAL); 2386 id = td->td_tid; 2387 uq = td->td_umtxq; 2388 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2389 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2390 &uq->uq_key)) != 0) 2391 return (error); 2392 for (;;) { 2393 umtxq_lock(&uq->uq_key); 2394 umtxq_busy(&uq->uq_key); 2395 umtxq_unlock(&uq->uq_key); 2396 2397 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2398 if (rv == -1) { 2399 error = EFAULT; 2400 break; 2401 } 2402 2403 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2404 id | UMUTEX_CONTESTED); 2405 if (rv == -1) { 2406 error = EFAULT; 2407 break; 2408 } 2409 2410 if (rv == 0) { 2411 MPASS(owner == UMUTEX_CONTESTED); 2412 rv = suword32(&m->m_ceilings[0], ceiling); 2413 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2414 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2415 break; 2416 } 2417 2418 if ((owner & ~UMUTEX_CONTESTED) == id) { 2419 rv = suword32(&m->m_ceilings[0], ceiling); 2420 error = rv == 0 ? 0 : EFAULT; 2421 break; 2422 } 2423 2424 if (owner == UMUTEX_RB_OWNERDEAD) { 2425 error = EOWNERDEAD; 2426 break; 2427 } else if (owner == UMUTEX_RB_NOTRECOV) { 2428 error = ENOTRECOVERABLE; 2429 break; 2430 } 2431 2432 /* 2433 * If we caught a signal, we have retried and now 2434 * exit immediately. 2435 */ 2436 if (error != 0) 2437 break; 2438 2439 /* 2440 * We set the contested bit, sleep. Otherwise the lock changed 2441 * and we need to retry or we lost a race to the thread 2442 * unlocking the umtx. 2443 */ 2444 umtxq_lock(&uq->uq_key); 2445 umtxq_insert(uq); 2446 umtxq_unbusy(&uq->uq_key); 2447 error = umtxq_sleep(uq, "umtxpp", NULL); 2448 umtxq_remove(uq); 2449 umtxq_unlock(&uq->uq_key); 2450 } 2451 umtxq_lock(&uq->uq_key); 2452 if (error == 0) 2453 umtxq_signal(&uq->uq_key, INT_MAX); 2454 umtxq_unbusy(&uq->uq_key); 2455 umtxq_unlock(&uq->uq_key); 2456 umtx_key_release(&uq->uq_key); 2457 if (error == 0 && old_ceiling != NULL) { 2458 rv = suword32(old_ceiling, save_ceiling); 2459 error = rv == 0 ? 0 : EFAULT; 2460 } 2461 return (error); 2462 } 2463 2464 /* 2465 * Lock a userland POSIX mutex. 2466 */ 2467 static int 2468 do_lock_umutex(struct thread *td, struct umutex *m, 2469 struct _umtx_time *timeout, int mode) 2470 { 2471 uint32_t flags; 2472 int error; 2473 2474 error = fueword32(&m->m_flags, &flags); 2475 if (error == -1) 2476 return (EFAULT); 2477 2478 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2479 case 0: 2480 error = do_lock_normal(td, m, flags, timeout, mode); 2481 break; 2482 case UMUTEX_PRIO_INHERIT: 2483 error = do_lock_pi(td, m, flags, timeout, mode); 2484 break; 2485 case UMUTEX_PRIO_PROTECT: 2486 error = do_lock_pp(td, m, flags, timeout, mode); 2487 break; 2488 default: 2489 return (EINVAL); 2490 } 2491 if (timeout == NULL) { 2492 if (error == EINTR && mode != _UMUTEX_WAIT) 2493 error = ERESTART; 2494 } else { 2495 /* Timed-locking is not restarted. */ 2496 if (error == ERESTART) 2497 error = EINTR; 2498 } 2499 return (error); 2500 } 2501 2502 /* 2503 * Unlock a userland POSIX mutex. 2504 */ 2505 static int 2506 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2507 { 2508 uint32_t flags; 2509 int error; 2510 2511 error = fueword32(&m->m_flags, &flags); 2512 if (error == -1) 2513 return (EFAULT); 2514 2515 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2516 case 0: 2517 return (do_unlock_normal(td, m, flags, rb)); 2518 case UMUTEX_PRIO_INHERIT: 2519 return (do_unlock_pi(td, m, flags, rb)); 2520 case UMUTEX_PRIO_PROTECT: 2521 return (do_unlock_pp(td, m, flags, rb)); 2522 } 2523 2524 return (EINVAL); 2525 } 2526 2527 static int 2528 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2529 struct timespec *timeout, u_long wflags) 2530 { 2531 struct abs_timeout timo; 2532 struct umtx_q *uq; 2533 uint32_t flags, clockid, hasw; 2534 int error; 2535 2536 uq = td->td_umtxq; 2537 error = fueword32(&cv->c_flags, &flags); 2538 if (error == -1) 2539 return (EFAULT); 2540 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2541 if (error != 0) 2542 return (error); 2543 2544 if ((wflags & CVWAIT_CLOCKID) != 0) { 2545 error = fueword32(&cv->c_clockid, &clockid); 2546 if (error == -1) { 2547 umtx_key_release(&uq->uq_key); 2548 return (EFAULT); 2549 } 2550 if (clockid < CLOCK_REALTIME || 2551 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2552 /* hmm, only HW clock id will work. */ 2553 umtx_key_release(&uq->uq_key); 2554 return (EINVAL); 2555 } 2556 } else { 2557 clockid = CLOCK_REALTIME; 2558 } 2559 2560 umtxq_lock(&uq->uq_key); 2561 umtxq_busy(&uq->uq_key); 2562 umtxq_insert(uq); 2563 umtxq_unlock(&uq->uq_key); 2564 2565 /* 2566 * Set c_has_waiters to 1 before releasing user mutex, also 2567 * don't modify cache line when unnecessary. 2568 */ 2569 error = fueword32(&cv->c_has_waiters, &hasw); 2570 if (error == 0 && hasw == 0) 2571 suword32(&cv->c_has_waiters, 1); 2572 2573 umtxq_unbusy_unlocked(&uq->uq_key); 2574 2575 error = do_unlock_umutex(td, m, false); 2576 2577 if (timeout != NULL) 2578 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2579 timeout); 2580 2581 umtxq_lock(&uq->uq_key); 2582 if (error == 0) { 2583 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2584 NULL : &timo); 2585 } 2586 2587 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2588 error = 0; 2589 else { 2590 /* 2591 * This must be timeout,interrupted by signal or 2592 * surprious wakeup, clear c_has_waiter flag when 2593 * necessary. 2594 */ 2595 umtxq_busy(&uq->uq_key); 2596 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2597 int oldlen = uq->uq_cur_queue->length; 2598 umtxq_remove(uq); 2599 if (oldlen == 1) { 2600 umtxq_unlock(&uq->uq_key); 2601 suword32(&cv->c_has_waiters, 0); 2602 umtxq_lock(&uq->uq_key); 2603 } 2604 } 2605 umtxq_unbusy(&uq->uq_key); 2606 if (error == ERESTART) 2607 error = EINTR; 2608 } 2609 2610 umtxq_unlock(&uq->uq_key); 2611 umtx_key_release(&uq->uq_key); 2612 return (error); 2613 } 2614 2615 /* 2616 * Signal a userland condition variable. 2617 */ 2618 static int 2619 do_cv_signal(struct thread *td, struct ucond *cv) 2620 { 2621 struct umtx_key key; 2622 int error, cnt, nwake; 2623 uint32_t flags; 2624 2625 error = fueword32(&cv->c_flags, &flags); 2626 if (error == -1) 2627 return (EFAULT); 2628 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2629 return (error); 2630 umtxq_lock(&key); 2631 umtxq_busy(&key); 2632 cnt = umtxq_count(&key); 2633 nwake = umtxq_signal(&key, 1); 2634 if (cnt <= nwake) { 2635 umtxq_unlock(&key); 2636 error = suword32(&cv->c_has_waiters, 0); 2637 if (error == -1) 2638 error = EFAULT; 2639 umtxq_lock(&key); 2640 } 2641 umtxq_unbusy(&key); 2642 umtxq_unlock(&key); 2643 umtx_key_release(&key); 2644 return (error); 2645 } 2646 2647 static int 2648 do_cv_broadcast(struct thread *td, struct ucond *cv) 2649 { 2650 struct umtx_key key; 2651 int error; 2652 uint32_t flags; 2653 2654 error = fueword32(&cv->c_flags, &flags); 2655 if (error == -1) 2656 return (EFAULT); 2657 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2658 return (error); 2659 2660 umtxq_lock(&key); 2661 umtxq_busy(&key); 2662 umtxq_signal(&key, INT_MAX); 2663 umtxq_unlock(&key); 2664 2665 error = suword32(&cv->c_has_waiters, 0); 2666 if (error == -1) 2667 error = EFAULT; 2668 2669 umtxq_unbusy_unlocked(&key); 2670 2671 umtx_key_release(&key); 2672 return (error); 2673 } 2674 2675 static int 2676 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, 2677 struct _umtx_time *timeout) 2678 { 2679 struct abs_timeout timo; 2680 struct umtx_q *uq; 2681 uint32_t flags, wrflags; 2682 int32_t state, oldstate; 2683 int32_t blocked_readers; 2684 int error, error1, rv; 2685 2686 uq = td->td_umtxq; 2687 error = fueword32(&rwlock->rw_flags, &flags); 2688 if (error == -1) 2689 return (EFAULT); 2690 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2691 if (error != 0) 2692 return (error); 2693 2694 if (timeout != NULL) 2695 abs_timeout_init2(&timo, timeout); 2696 2697 wrflags = URWLOCK_WRITE_OWNER; 2698 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2699 wrflags |= URWLOCK_WRITE_WAITERS; 2700 2701 for (;;) { 2702 rv = fueword32(&rwlock->rw_state, &state); 2703 if (rv == -1) { 2704 umtx_key_release(&uq->uq_key); 2705 return (EFAULT); 2706 } 2707 2708 /* try to lock it */ 2709 while (!(state & wrflags)) { 2710 if (__predict_false(URWLOCK_READER_COUNT(state) == 2711 URWLOCK_MAX_READERS)) { 2712 umtx_key_release(&uq->uq_key); 2713 return (EAGAIN); 2714 } 2715 rv = casueword32(&rwlock->rw_state, state, 2716 &oldstate, state + 1); 2717 if (rv == -1) { 2718 umtx_key_release(&uq->uq_key); 2719 return (EFAULT); 2720 } 2721 if (rv == 0) { 2722 MPASS(oldstate == state); 2723 umtx_key_release(&uq->uq_key); 2724 return (0); 2725 } 2726 error = thread_check_susp(td, true); 2727 if (error != 0) 2728 break; 2729 state = oldstate; 2730 } 2731 2732 if (error) 2733 break; 2734 2735 /* grab monitor lock */ 2736 umtxq_lock(&uq->uq_key); 2737 umtxq_busy(&uq->uq_key); 2738 umtxq_unlock(&uq->uq_key); 2739 2740 /* 2741 * re-read the state, in case it changed between the try-lock above 2742 * and the check below 2743 */ 2744 rv = fueword32(&rwlock->rw_state, &state); 2745 if (rv == -1) 2746 error = EFAULT; 2747 2748 /* set read contention bit */ 2749 while (error == 0 && (state & wrflags) && 2750 !(state & URWLOCK_READ_WAITERS)) { 2751 rv = casueword32(&rwlock->rw_state, state, 2752 &oldstate, state | URWLOCK_READ_WAITERS); 2753 if (rv == -1) { 2754 error = EFAULT; 2755 break; 2756 } 2757 if (rv == 0) { 2758 MPASS(oldstate == state); 2759 goto sleep; 2760 } 2761 state = oldstate; 2762 error = thread_check_susp(td, false); 2763 if (error != 0) 2764 break; 2765 } 2766 if (error != 0) { 2767 umtxq_unbusy_unlocked(&uq->uq_key); 2768 break; 2769 } 2770 2771 /* state is changed while setting flags, restart */ 2772 if (!(state & wrflags)) { 2773 umtxq_unbusy_unlocked(&uq->uq_key); 2774 error = thread_check_susp(td, true); 2775 if (error != 0) 2776 break; 2777 continue; 2778 } 2779 2780 sleep: 2781 /* 2782 * Contention bit is set, before sleeping, increase 2783 * read waiter count. 2784 */ 2785 rv = fueword32(&rwlock->rw_blocked_readers, 2786 &blocked_readers); 2787 if (rv == -1) { 2788 umtxq_unbusy_unlocked(&uq->uq_key); 2789 error = EFAULT; 2790 break; 2791 } 2792 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2793 2794 while (state & wrflags) { 2795 umtxq_lock(&uq->uq_key); 2796 umtxq_insert(uq); 2797 umtxq_unbusy(&uq->uq_key); 2798 2799 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2800 NULL : &timo); 2801 2802 umtxq_busy(&uq->uq_key); 2803 umtxq_remove(uq); 2804 umtxq_unlock(&uq->uq_key); 2805 if (error) 2806 break; 2807 rv = fueword32(&rwlock->rw_state, &state); 2808 if (rv == -1) { 2809 error = EFAULT; 2810 break; 2811 } 2812 } 2813 2814 /* decrease read waiter count, and may clear read contention bit */ 2815 rv = fueword32(&rwlock->rw_blocked_readers, 2816 &blocked_readers); 2817 if (rv == -1) { 2818 umtxq_unbusy_unlocked(&uq->uq_key); 2819 error = EFAULT; 2820 break; 2821 } 2822 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2823 if (blocked_readers == 1) { 2824 rv = fueword32(&rwlock->rw_state, &state); 2825 if (rv == -1) { 2826 umtxq_unbusy_unlocked(&uq->uq_key); 2827 error = EFAULT; 2828 break; 2829 } 2830 for (;;) { 2831 rv = casueword32(&rwlock->rw_state, state, 2832 &oldstate, state & ~URWLOCK_READ_WAITERS); 2833 if (rv == -1) { 2834 error = EFAULT; 2835 break; 2836 } 2837 if (rv == 0) { 2838 MPASS(oldstate == state); 2839 break; 2840 } 2841 state = oldstate; 2842 error1 = thread_check_susp(td, false); 2843 if (error1 != 0) { 2844 if (error == 0) 2845 error = error1; 2846 break; 2847 } 2848 } 2849 } 2850 2851 umtxq_unbusy_unlocked(&uq->uq_key); 2852 if (error != 0) 2853 break; 2854 } 2855 umtx_key_release(&uq->uq_key); 2856 if (error == ERESTART) 2857 error = EINTR; 2858 return (error); 2859 } 2860 2861 static int 2862 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2863 { 2864 struct abs_timeout timo; 2865 struct umtx_q *uq; 2866 uint32_t flags; 2867 int32_t state, oldstate; 2868 int32_t blocked_writers; 2869 int32_t blocked_readers; 2870 int error, error1, rv; 2871 2872 uq = td->td_umtxq; 2873 error = fueword32(&rwlock->rw_flags, &flags); 2874 if (error == -1) 2875 return (EFAULT); 2876 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2877 if (error != 0) 2878 return (error); 2879 2880 if (timeout != NULL) 2881 abs_timeout_init2(&timo, timeout); 2882 2883 blocked_readers = 0; 2884 for (;;) { 2885 rv = fueword32(&rwlock->rw_state, &state); 2886 if (rv == -1) { 2887 umtx_key_release(&uq->uq_key); 2888 return (EFAULT); 2889 } 2890 while ((state & URWLOCK_WRITE_OWNER) == 0 && 2891 URWLOCK_READER_COUNT(state) == 0) { 2892 rv = casueword32(&rwlock->rw_state, state, 2893 &oldstate, state | URWLOCK_WRITE_OWNER); 2894 if (rv == -1) { 2895 umtx_key_release(&uq->uq_key); 2896 return (EFAULT); 2897 } 2898 if (rv == 0) { 2899 MPASS(oldstate == state); 2900 umtx_key_release(&uq->uq_key); 2901 return (0); 2902 } 2903 state = oldstate; 2904 error = thread_check_susp(td, true); 2905 if (error != 0) 2906 break; 2907 } 2908 2909 if (error) { 2910 if ((state & (URWLOCK_WRITE_OWNER | 2911 URWLOCK_WRITE_WAITERS)) == 0 && 2912 blocked_readers != 0) { 2913 umtxq_lock(&uq->uq_key); 2914 umtxq_busy(&uq->uq_key); 2915 umtxq_signal_queue(&uq->uq_key, INT_MAX, 2916 UMTX_SHARED_QUEUE); 2917 umtxq_unbusy(&uq->uq_key); 2918 umtxq_unlock(&uq->uq_key); 2919 } 2920 2921 break; 2922 } 2923 2924 /* grab monitor lock */ 2925 umtxq_lock(&uq->uq_key); 2926 umtxq_busy(&uq->uq_key); 2927 umtxq_unlock(&uq->uq_key); 2928 2929 /* 2930 * Re-read the state, in case it changed between the 2931 * try-lock above and the check below. 2932 */ 2933 rv = fueword32(&rwlock->rw_state, &state); 2934 if (rv == -1) 2935 error = EFAULT; 2936 2937 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2938 URWLOCK_READER_COUNT(state) != 0) && 2939 (state & URWLOCK_WRITE_WAITERS) == 0) { 2940 rv = casueword32(&rwlock->rw_state, state, 2941 &oldstate, state | URWLOCK_WRITE_WAITERS); 2942 if (rv == -1) { 2943 error = EFAULT; 2944 break; 2945 } 2946 if (rv == 0) { 2947 MPASS(oldstate == state); 2948 goto sleep; 2949 } 2950 state = oldstate; 2951 error = thread_check_susp(td, false); 2952 if (error != 0) 2953 break; 2954 } 2955 if (error != 0) { 2956 umtxq_unbusy_unlocked(&uq->uq_key); 2957 break; 2958 } 2959 2960 if ((state & URWLOCK_WRITE_OWNER) == 0 && 2961 URWLOCK_READER_COUNT(state) == 0) { 2962 umtxq_unbusy_unlocked(&uq->uq_key); 2963 error = thread_check_susp(td, false); 2964 if (error != 0) 2965 break; 2966 continue; 2967 } 2968 sleep: 2969 rv = fueword32(&rwlock->rw_blocked_writers, 2970 &blocked_writers); 2971 if (rv == -1) { 2972 umtxq_unbusy_unlocked(&uq->uq_key); 2973 error = EFAULT; 2974 break; 2975 } 2976 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); 2977 2978 while ((state & URWLOCK_WRITE_OWNER) || 2979 URWLOCK_READER_COUNT(state) != 0) { 2980 umtxq_lock(&uq->uq_key); 2981 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2982 umtxq_unbusy(&uq->uq_key); 2983 2984 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2985 NULL : &timo); 2986 2987 umtxq_busy(&uq->uq_key); 2988 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2989 umtxq_unlock(&uq->uq_key); 2990 if (error) 2991 break; 2992 rv = fueword32(&rwlock->rw_state, &state); 2993 if (rv == -1) { 2994 error = EFAULT; 2995 break; 2996 } 2997 } 2998 2999 rv = fueword32(&rwlock->rw_blocked_writers, 3000 &blocked_writers); 3001 if (rv == -1) { 3002 umtxq_unbusy_unlocked(&uq->uq_key); 3003 error = EFAULT; 3004 break; 3005 } 3006 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 3007 if (blocked_writers == 1) { 3008 rv = fueword32(&rwlock->rw_state, &state); 3009 if (rv == -1) { 3010 umtxq_unbusy_unlocked(&uq->uq_key); 3011 error = EFAULT; 3012 break; 3013 } 3014 for (;;) { 3015 rv = casueword32(&rwlock->rw_state, state, 3016 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 3017 if (rv == -1) { 3018 error = EFAULT; 3019 break; 3020 } 3021 if (rv == 0) { 3022 MPASS(oldstate == state); 3023 break; 3024 } 3025 state = oldstate; 3026 error1 = thread_check_susp(td, false); 3027 /* 3028 * We are leaving the URWLOCK_WRITE_WAITERS 3029 * behind, but this should not harm the 3030 * correctness. 3031 */ 3032 if (error1 != 0) { 3033 if (error == 0) 3034 error = error1; 3035 break; 3036 } 3037 } 3038 rv = fueword32(&rwlock->rw_blocked_readers, 3039 &blocked_readers); 3040 if (rv == -1) { 3041 umtxq_unbusy_unlocked(&uq->uq_key); 3042 error = EFAULT; 3043 break; 3044 } 3045 } else 3046 blocked_readers = 0; 3047 3048 umtxq_unbusy_unlocked(&uq->uq_key); 3049 } 3050 3051 umtx_key_release(&uq->uq_key); 3052 if (error == ERESTART) 3053 error = EINTR; 3054 return (error); 3055 } 3056 3057 static int 3058 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3059 { 3060 struct umtx_q *uq; 3061 uint32_t flags; 3062 int32_t state, oldstate; 3063 int error, rv, q, count; 3064 3065 uq = td->td_umtxq; 3066 error = fueword32(&rwlock->rw_flags, &flags); 3067 if (error == -1) 3068 return (EFAULT); 3069 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3070 if (error != 0) 3071 return (error); 3072 3073 error = fueword32(&rwlock->rw_state, &state); 3074 if (error == -1) { 3075 error = EFAULT; 3076 goto out; 3077 } 3078 if (state & URWLOCK_WRITE_OWNER) { 3079 for (;;) { 3080 rv = casueword32(&rwlock->rw_state, state, 3081 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3082 if (rv == -1) { 3083 error = EFAULT; 3084 goto out; 3085 } 3086 if (rv == 1) { 3087 state = oldstate; 3088 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3089 error = EPERM; 3090 goto out; 3091 } 3092 error = thread_check_susp(td, true); 3093 if (error != 0) 3094 goto out; 3095 } else 3096 break; 3097 } 3098 } else if (URWLOCK_READER_COUNT(state) != 0) { 3099 for (;;) { 3100 rv = casueword32(&rwlock->rw_state, state, 3101 &oldstate, state - 1); 3102 if (rv == -1) { 3103 error = EFAULT; 3104 goto out; 3105 } 3106 if (rv == 1) { 3107 state = oldstate; 3108 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3109 error = EPERM; 3110 goto out; 3111 } 3112 error = thread_check_susp(td, true); 3113 if (error != 0) 3114 goto out; 3115 } else 3116 break; 3117 } 3118 } else { 3119 error = EPERM; 3120 goto out; 3121 } 3122 3123 count = 0; 3124 3125 if (!(flags & URWLOCK_PREFER_READER)) { 3126 if (state & URWLOCK_WRITE_WAITERS) { 3127 count = 1; 3128 q = UMTX_EXCLUSIVE_QUEUE; 3129 } else if (state & URWLOCK_READ_WAITERS) { 3130 count = INT_MAX; 3131 q = UMTX_SHARED_QUEUE; 3132 } 3133 } else { 3134 if (state & URWLOCK_READ_WAITERS) { 3135 count = INT_MAX; 3136 q = UMTX_SHARED_QUEUE; 3137 } else if (state & URWLOCK_WRITE_WAITERS) { 3138 count = 1; 3139 q = UMTX_EXCLUSIVE_QUEUE; 3140 } 3141 } 3142 3143 if (count) { 3144 umtxq_lock(&uq->uq_key); 3145 umtxq_busy(&uq->uq_key); 3146 umtxq_signal_queue(&uq->uq_key, count, q); 3147 umtxq_unbusy(&uq->uq_key); 3148 umtxq_unlock(&uq->uq_key); 3149 } 3150 out: 3151 umtx_key_release(&uq->uq_key); 3152 return (error); 3153 } 3154 3155 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3156 static int 3157 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3158 { 3159 struct abs_timeout timo; 3160 struct umtx_q *uq; 3161 uint32_t flags, count, count1; 3162 int error, rv, rv1; 3163 3164 uq = td->td_umtxq; 3165 error = fueword32(&sem->_flags, &flags); 3166 if (error == -1) 3167 return (EFAULT); 3168 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3169 if (error != 0) 3170 return (error); 3171 3172 if (timeout != NULL) 3173 abs_timeout_init2(&timo, timeout); 3174 3175 again: 3176 umtxq_lock(&uq->uq_key); 3177 umtxq_busy(&uq->uq_key); 3178 umtxq_insert(uq); 3179 umtxq_unlock(&uq->uq_key); 3180 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3181 if (rv == 0) 3182 rv1 = fueword32(&sem->_count, &count); 3183 if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || 3184 (rv == 1 && count1 == 0)) { 3185 umtxq_lock(&uq->uq_key); 3186 umtxq_unbusy(&uq->uq_key); 3187 umtxq_remove(uq); 3188 umtxq_unlock(&uq->uq_key); 3189 if (rv == 1) { 3190 rv = thread_check_susp(td, true); 3191 if (rv == 0) 3192 goto again; 3193 error = rv; 3194 goto out; 3195 } 3196 if (rv == 0) 3197 rv = rv1; 3198 error = rv == -1 ? EFAULT : 0; 3199 goto out; 3200 } 3201 umtxq_lock(&uq->uq_key); 3202 umtxq_unbusy(&uq->uq_key); 3203 3204 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3205 3206 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3207 error = 0; 3208 else { 3209 umtxq_remove(uq); 3210 /* A relative timeout cannot be restarted. */ 3211 if (error == ERESTART && timeout != NULL && 3212 (timeout->_flags & UMTX_ABSTIME) == 0) 3213 error = EINTR; 3214 } 3215 umtxq_unlock(&uq->uq_key); 3216 out: 3217 umtx_key_release(&uq->uq_key); 3218 return (error); 3219 } 3220 3221 /* 3222 * Signal a userland semaphore. 3223 */ 3224 static int 3225 do_sem_wake(struct thread *td, struct _usem *sem) 3226 { 3227 struct umtx_key key; 3228 int error, cnt; 3229 uint32_t flags; 3230 3231 error = fueword32(&sem->_flags, &flags); 3232 if (error == -1) 3233 return (EFAULT); 3234 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3235 return (error); 3236 umtxq_lock(&key); 3237 umtxq_busy(&key); 3238 cnt = umtxq_count(&key); 3239 if (cnt > 0) { 3240 /* 3241 * Check if count is greater than 0, this means the memory is 3242 * still being referenced by user code, so we can safely 3243 * update _has_waiters flag. 3244 */ 3245 if (cnt == 1) { 3246 umtxq_unlock(&key); 3247 error = suword32(&sem->_has_waiters, 0); 3248 umtxq_lock(&key); 3249 if (error == -1) 3250 error = EFAULT; 3251 } 3252 umtxq_signal(&key, 1); 3253 } 3254 umtxq_unbusy(&key); 3255 umtxq_unlock(&key); 3256 umtx_key_release(&key); 3257 return (error); 3258 } 3259 #endif 3260 3261 static int 3262 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3263 { 3264 struct abs_timeout timo; 3265 struct umtx_q *uq; 3266 uint32_t count, flags; 3267 int error, rv; 3268 3269 uq = td->td_umtxq; 3270 flags = fuword32(&sem->_flags); 3271 if (timeout != NULL) 3272 abs_timeout_init2(&timo, timeout); 3273 3274 again: 3275 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3276 if (error != 0) 3277 return (error); 3278 umtxq_lock(&uq->uq_key); 3279 umtxq_busy(&uq->uq_key); 3280 umtxq_insert(uq); 3281 umtxq_unlock(&uq->uq_key); 3282 rv = fueword32(&sem->_count, &count); 3283 if (rv == -1) { 3284 umtxq_lock(&uq->uq_key); 3285 umtxq_unbusy(&uq->uq_key); 3286 umtxq_remove(uq); 3287 umtxq_unlock(&uq->uq_key); 3288 umtx_key_release(&uq->uq_key); 3289 return (EFAULT); 3290 } 3291 for (;;) { 3292 if (USEM_COUNT(count) != 0) { 3293 umtxq_lock(&uq->uq_key); 3294 umtxq_unbusy(&uq->uq_key); 3295 umtxq_remove(uq); 3296 umtxq_unlock(&uq->uq_key); 3297 umtx_key_release(&uq->uq_key); 3298 return (0); 3299 } 3300 if (count == USEM_HAS_WAITERS) 3301 break; 3302 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3303 if (rv == 0) 3304 break; 3305 umtxq_lock(&uq->uq_key); 3306 umtxq_unbusy(&uq->uq_key); 3307 umtxq_remove(uq); 3308 umtxq_unlock(&uq->uq_key); 3309 umtx_key_release(&uq->uq_key); 3310 if (rv == -1) 3311 return (EFAULT); 3312 rv = thread_check_susp(td, true); 3313 if (rv != 0) 3314 return (rv); 3315 goto again; 3316 } 3317 umtxq_lock(&uq->uq_key); 3318 umtxq_unbusy(&uq->uq_key); 3319 3320 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3321 3322 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3323 error = 0; 3324 else { 3325 umtxq_remove(uq); 3326 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3327 /* A relative timeout cannot be restarted. */ 3328 if (error == ERESTART) 3329 error = EINTR; 3330 if (error == EINTR) { 3331 abs_timeout_update(&timo); 3332 timespecsub(&timo.end, &timo.cur, 3333 &timeout->_timeout); 3334 } 3335 } 3336 } 3337 umtxq_unlock(&uq->uq_key); 3338 umtx_key_release(&uq->uq_key); 3339 return (error); 3340 } 3341 3342 /* 3343 * Signal a userland semaphore. 3344 */ 3345 static int 3346 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3347 { 3348 struct umtx_key key; 3349 int error, cnt, rv; 3350 uint32_t count, flags; 3351 3352 rv = fueword32(&sem->_flags, &flags); 3353 if (rv == -1) 3354 return (EFAULT); 3355 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3356 return (error); 3357 umtxq_lock(&key); 3358 umtxq_busy(&key); 3359 cnt = umtxq_count(&key); 3360 if (cnt > 0) { 3361 /* 3362 * If this was the last sleeping thread, clear the waiters 3363 * flag in _count. 3364 */ 3365 if (cnt == 1) { 3366 umtxq_unlock(&key); 3367 rv = fueword32(&sem->_count, &count); 3368 while (rv != -1 && count & USEM_HAS_WAITERS) { 3369 rv = casueword32(&sem->_count, count, &count, 3370 count & ~USEM_HAS_WAITERS); 3371 if (rv == 1) { 3372 rv = thread_check_susp(td, true); 3373 if (rv != 0) 3374 break; 3375 } 3376 } 3377 if (rv == -1) 3378 error = EFAULT; 3379 else if (rv > 0) { 3380 error = rv; 3381 } 3382 umtxq_lock(&key); 3383 } 3384 3385 umtxq_signal(&key, 1); 3386 } 3387 umtxq_unbusy(&key); 3388 umtxq_unlock(&key); 3389 umtx_key_release(&key); 3390 return (error); 3391 } 3392 3393 inline int 3394 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) 3395 { 3396 int error; 3397 3398 error = copyin(uaddr, tsp, sizeof(*tsp)); 3399 if (error == 0) { 3400 if (tsp->tv_sec < 0 || 3401 tsp->tv_nsec >= 1000000000 || 3402 tsp->tv_nsec < 0) 3403 error = EINVAL; 3404 } 3405 return (error); 3406 } 3407 3408 static inline int 3409 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) 3410 { 3411 int error; 3412 3413 if (size <= sizeof(tp->_timeout)) { 3414 tp->_clockid = CLOCK_REALTIME; 3415 tp->_flags = 0; 3416 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); 3417 } else 3418 error = copyin(uaddr, tp, sizeof(*tp)); 3419 if (error != 0) 3420 return (error); 3421 if (tp->_timeout.tv_sec < 0 || 3422 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3423 return (EINVAL); 3424 return (0); 3425 } 3426 3427 static int 3428 umtx_copyin_robust_lists(const void *uaddr, size_t size, 3429 struct umtx_robust_lists_params *rb) 3430 { 3431 3432 if (size > sizeof(*rb)) 3433 return (EINVAL); 3434 return (copyin(uaddr, rb, size)); 3435 } 3436 3437 static int 3438 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) 3439 { 3440 3441 /* 3442 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 3443 * and we're only called if sz >= sizeof(timespec) as supplied in the 3444 * copyops. 3445 */ 3446 KASSERT(sz >= sizeof(*tsp), 3447 ("umtx_copyops specifies incorrect sizes")); 3448 3449 return (copyout(tsp, uaddr, sizeof(*tsp))); 3450 } 3451 3452 static int 3453 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap, 3454 const struct umtx_copyops *ops __unused) 3455 { 3456 3457 return (EOPNOTSUPP); 3458 } 3459 3460 static int 3461 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, 3462 const struct umtx_copyops *ops) 3463 { 3464 struct _umtx_time timeout, *tm_p; 3465 int error; 3466 3467 if (uap->uaddr2 == NULL) 3468 tm_p = NULL; 3469 else { 3470 error = ops->copyin_umtx_time( 3471 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3472 if (error != 0) 3473 return (error); 3474 tm_p = &timeout; 3475 } 3476 return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); 3477 } 3478 3479 static int 3480 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, 3481 const struct umtx_copyops *ops) 3482 { 3483 struct _umtx_time timeout, *tm_p; 3484 int error; 3485 3486 if (uap->uaddr2 == NULL) 3487 tm_p = NULL; 3488 else { 3489 error = ops->copyin_umtx_time( 3490 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3491 if (error != 0) 3492 return (error); 3493 tm_p = &timeout; 3494 } 3495 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3496 } 3497 3498 static int 3499 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, 3500 const struct umtx_copyops *ops) 3501 { 3502 struct _umtx_time *tm_p, timeout; 3503 int error; 3504 3505 if (uap->uaddr2 == NULL) 3506 tm_p = NULL; 3507 else { 3508 error = ops->copyin_umtx_time( 3509 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3510 if (error != 0) 3511 return (error); 3512 tm_p = &timeout; 3513 } 3514 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3515 } 3516 3517 static int 3518 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, 3519 const struct umtx_copyops *ops __unused) 3520 { 3521 3522 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3523 } 3524 3525 #define BATCH_SIZE 128 3526 static int 3527 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) 3528 { 3529 char *uaddrs[BATCH_SIZE], **upp; 3530 int count, error, i, pos, tocopy; 3531 3532 upp = (char **)uap->obj; 3533 error = 0; 3534 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3535 pos += tocopy) { 3536 tocopy = MIN(count, BATCH_SIZE); 3537 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3538 if (error != 0) 3539 break; 3540 for (i = 0; i < tocopy; ++i) { 3541 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3542 } 3543 maybe_yield(); 3544 } 3545 return (error); 3546 } 3547 3548 static int 3549 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) 3550 { 3551 uint32_t uaddrs[BATCH_SIZE], *upp; 3552 int count, error, i, pos, tocopy; 3553 3554 upp = (uint32_t *)uap->obj; 3555 error = 0; 3556 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3557 pos += tocopy) { 3558 tocopy = MIN(count, BATCH_SIZE); 3559 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 3560 if (error != 0) 3561 break; 3562 for (i = 0; i < tocopy; ++i) { 3563 kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], 3564 INT_MAX, 1); 3565 } 3566 maybe_yield(); 3567 } 3568 return (error); 3569 } 3570 3571 static int 3572 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, 3573 const struct umtx_copyops *ops) 3574 { 3575 3576 if (ops->compat32) 3577 return (__umtx_op_nwake_private_compat32(td, uap)); 3578 return (__umtx_op_nwake_private_native(td, uap)); 3579 } 3580 3581 static int 3582 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, 3583 const struct umtx_copyops *ops __unused) 3584 { 3585 3586 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3587 } 3588 3589 static int 3590 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, 3591 const struct umtx_copyops *ops) 3592 { 3593 struct _umtx_time *tm_p, timeout; 3594 int error; 3595 3596 /* Allow a null timespec (wait forever). */ 3597 if (uap->uaddr2 == NULL) 3598 tm_p = NULL; 3599 else { 3600 error = ops->copyin_umtx_time( 3601 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3602 if (error != 0) 3603 return (error); 3604 tm_p = &timeout; 3605 } 3606 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3607 } 3608 3609 static int 3610 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, 3611 const struct umtx_copyops *ops __unused) 3612 { 3613 3614 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3615 } 3616 3617 static int 3618 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, 3619 const struct umtx_copyops *ops) 3620 { 3621 struct _umtx_time *tm_p, timeout; 3622 int error; 3623 3624 /* Allow a null timespec (wait forever). */ 3625 if (uap->uaddr2 == NULL) 3626 tm_p = NULL; 3627 else { 3628 error = ops->copyin_umtx_time( 3629 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3630 if (error != 0) 3631 return (error); 3632 tm_p = &timeout; 3633 } 3634 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3635 } 3636 3637 static int 3638 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, 3639 const struct umtx_copyops *ops __unused) 3640 { 3641 3642 return (do_wake_umutex(td, uap->obj)); 3643 } 3644 3645 static int 3646 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, 3647 const struct umtx_copyops *ops __unused) 3648 { 3649 3650 return (do_unlock_umutex(td, uap->obj, false)); 3651 } 3652 3653 static int 3654 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, 3655 const struct umtx_copyops *ops __unused) 3656 { 3657 3658 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3659 } 3660 3661 static int 3662 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, 3663 const struct umtx_copyops *ops) 3664 { 3665 struct timespec *ts, timeout; 3666 int error; 3667 3668 /* Allow a null timespec (wait forever). */ 3669 if (uap->uaddr2 == NULL) 3670 ts = NULL; 3671 else { 3672 error = ops->copyin_timeout(uap->uaddr2, &timeout); 3673 if (error != 0) 3674 return (error); 3675 ts = &timeout; 3676 } 3677 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3678 } 3679 3680 static int 3681 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, 3682 const struct umtx_copyops *ops __unused) 3683 { 3684 3685 return (do_cv_signal(td, uap->obj)); 3686 } 3687 3688 static int 3689 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, 3690 const struct umtx_copyops *ops __unused) 3691 { 3692 3693 return (do_cv_broadcast(td, uap->obj)); 3694 } 3695 3696 static int 3697 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, 3698 const struct umtx_copyops *ops) 3699 { 3700 struct _umtx_time timeout; 3701 int error; 3702 3703 /* Allow a null timespec (wait forever). */ 3704 if (uap->uaddr2 == NULL) { 3705 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3706 } else { 3707 error = ops->copyin_umtx_time(uap->uaddr2, 3708 (size_t)uap->uaddr1, &timeout); 3709 if (error != 0) 3710 return (error); 3711 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3712 } 3713 return (error); 3714 } 3715 3716 static int 3717 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, 3718 const struct umtx_copyops *ops) 3719 { 3720 struct _umtx_time timeout; 3721 int error; 3722 3723 /* Allow a null timespec (wait forever). */ 3724 if (uap->uaddr2 == NULL) { 3725 error = do_rw_wrlock(td, uap->obj, 0); 3726 } else { 3727 error = ops->copyin_umtx_time(uap->uaddr2, 3728 (size_t)uap->uaddr1, &timeout); 3729 if (error != 0) 3730 return (error); 3731 3732 error = do_rw_wrlock(td, uap->obj, &timeout); 3733 } 3734 return (error); 3735 } 3736 3737 static int 3738 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, 3739 const struct umtx_copyops *ops __unused) 3740 { 3741 3742 return (do_rw_unlock(td, uap->obj)); 3743 } 3744 3745 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3746 static int 3747 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, 3748 const struct umtx_copyops *ops) 3749 { 3750 struct _umtx_time *tm_p, timeout; 3751 int error; 3752 3753 /* Allow a null timespec (wait forever). */ 3754 if (uap->uaddr2 == NULL) 3755 tm_p = NULL; 3756 else { 3757 error = ops->copyin_umtx_time( 3758 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3759 if (error != 0) 3760 return (error); 3761 tm_p = &timeout; 3762 } 3763 return (do_sem_wait(td, uap->obj, tm_p)); 3764 } 3765 3766 static int 3767 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, 3768 const struct umtx_copyops *ops __unused) 3769 { 3770 3771 return (do_sem_wake(td, uap->obj)); 3772 } 3773 #endif 3774 3775 static int 3776 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, 3777 const struct umtx_copyops *ops __unused) 3778 { 3779 3780 return (do_wake2_umutex(td, uap->obj, uap->val)); 3781 } 3782 3783 static int 3784 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, 3785 const struct umtx_copyops *ops) 3786 { 3787 struct _umtx_time *tm_p, timeout; 3788 size_t uasize; 3789 int error; 3790 3791 /* Allow a null timespec (wait forever). */ 3792 if (uap->uaddr2 == NULL) { 3793 uasize = 0; 3794 tm_p = NULL; 3795 } else { 3796 uasize = (size_t)uap->uaddr1; 3797 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3798 if (error != 0) 3799 return (error); 3800 tm_p = &timeout; 3801 } 3802 error = do_sem2_wait(td, uap->obj, tm_p); 3803 if (error == EINTR && uap->uaddr2 != NULL && 3804 (timeout._flags & UMTX_ABSTIME) == 0 && 3805 uasize >= ops->umtx_time_sz + ops->timespec_sz) { 3806 error = ops->copyout_timeout( 3807 (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), 3808 uasize - ops->umtx_time_sz, &timeout._timeout); 3809 if (error == 0) { 3810 error = EINTR; 3811 } 3812 } 3813 3814 return (error); 3815 } 3816 3817 static int 3818 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, 3819 const struct umtx_copyops *ops __unused) 3820 { 3821 3822 return (do_sem2_wake(td, uap->obj)); 3823 } 3824 3825 #define USHM_OBJ_UMTX(o) \ 3826 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3827 3828 #define USHMF_REG_LINKED 0x0001 3829 #define USHMF_OBJ_LINKED 0x0002 3830 struct umtx_shm_reg { 3831 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3832 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3833 struct umtx_key ushm_key; 3834 struct ucred *ushm_cred; 3835 struct shmfd *ushm_obj; 3836 u_int ushm_refcnt; 3837 u_int ushm_flags; 3838 }; 3839 3840 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3841 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3842 3843 static uma_zone_t umtx_shm_reg_zone; 3844 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3845 static struct mtx umtx_shm_lock; 3846 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3847 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3848 3849 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3850 3851 static void 3852 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3853 { 3854 struct umtx_shm_reg_head d; 3855 struct umtx_shm_reg *reg, *reg1; 3856 3857 TAILQ_INIT(&d); 3858 mtx_lock(&umtx_shm_lock); 3859 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3860 mtx_unlock(&umtx_shm_lock); 3861 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3862 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3863 umtx_shm_free_reg(reg); 3864 } 3865 } 3866 3867 static struct task umtx_shm_reg_delfree_task = 3868 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3869 3870 static struct umtx_shm_reg * 3871 umtx_shm_find_reg_locked(const struct umtx_key *key) 3872 { 3873 struct umtx_shm_reg *reg; 3874 struct umtx_shm_reg_head *reg_head; 3875 3876 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3877 mtx_assert(&umtx_shm_lock, MA_OWNED); 3878 reg_head = &umtx_shm_registry[key->hash]; 3879 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3880 KASSERT(reg->ushm_key.shared, 3881 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3882 if (reg->ushm_key.info.shared.object == 3883 key->info.shared.object && 3884 reg->ushm_key.info.shared.offset == 3885 key->info.shared.offset) { 3886 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3887 KASSERT(reg->ushm_refcnt > 0, 3888 ("reg %p refcnt 0 onlist", reg)); 3889 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3890 ("reg %p not linked", reg)); 3891 reg->ushm_refcnt++; 3892 return (reg); 3893 } 3894 } 3895 return (NULL); 3896 } 3897 3898 static struct umtx_shm_reg * 3899 umtx_shm_find_reg(const struct umtx_key *key) 3900 { 3901 struct umtx_shm_reg *reg; 3902 3903 mtx_lock(&umtx_shm_lock); 3904 reg = umtx_shm_find_reg_locked(key); 3905 mtx_unlock(&umtx_shm_lock); 3906 return (reg); 3907 } 3908 3909 static void 3910 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3911 { 3912 3913 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3914 crfree(reg->ushm_cred); 3915 shm_drop(reg->ushm_obj); 3916 uma_zfree(umtx_shm_reg_zone, reg); 3917 } 3918 3919 static bool 3920 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3921 { 3922 bool res; 3923 3924 mtx_assert(&umtx_shm_lock, MA_OWNED); 3925 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3926 reg->ushm_refcnt--; 3927 res = reg->ushm_refcnt == 0; 3928 if (res || force) { 3929 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3930 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3931 reg, ushm_reg_link); 3932 reg->ushm_flags &= ~USHMF_REG_LINKED; 3933 } 3934 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3935 LIST_REMOVE(reg, ushm_obj_link); 3936 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3937 } 3938 } 3939 return (res); 3940 } 3941 3942 static void 3943 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3944 { 3945 vm_object_t object; 3946 bool dofree; 3947 3948 if (force) { 3949 object = reg->ushm_obj->shm_object; 3950 VM_OBJECT_WLOCK(object); 3951 object->flags |= OBJ_UMTXDEAD; 3952 VM_OBJECT_WUNLOCK(object); 3953 } 3954 mtx_lock(&umtx_shm_lock); 3955 dofree = umtx_shm_unref_reg_locked(reg, force); 3956 mtx_unlock(&umtx_shm_lock); 3957 if (dofree) 3958 umtx_shm_free_reg(reg); 3959 } 3960 3961 void 3962 umtx_shm_object_init(vm_object_t object) 3963 { 3964 3965 LIST_INIT(USHM_OBJ_UMTX(object)); 3966 } 3967 3968 void 3969 umtx_shm_object_terminated(vm_object_t object) 3970 { 3971 struct umtx_shm_reg *reg, *reg1; 3972 bool dofree; 3973 3974 if (LIST_EMPTY(USHM_OBJ_UMTX(object))) 3975 return; 3976 3977 dofree = false; 3978 mtx_lock(&umtx_shm_lock); 3979 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3980 if (umtx_shm_unref_reg_locked(reg, true)) { 3981 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3982 ushm_reg_link); 3983 dofree = true; 3984 } 3985 } 3986 mtx_unlock(&umtx_shm_lock); 3987 if (dofree) 3988 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3989 } 3990 3991 static int 3992 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3993 struct umtx_shm_reg **res) 3994 { 3995 struct umtx_shm_reg *reg, *reg1; 3996 struct ucred *cred; 3997 int error; 3998 3999 reg = umtx_shm_find_reg(key); 4000 if (reg != NULL) { 4001 *res = reg; 4002 return (0); 4003 } 4004 cred = td->td_ucred; 4005 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 4006 return (ENOMEM); 4007 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 4008 reg->ushm_refcnt = 1; 4009 bcopy(key, ®->ushm_key, sizeof(*key)); 4010 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); 4011 reg->ushm_cred = crhold(cred); 4012 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 4013 if (error != 0) { 4014 umtx_shm_free_reg(reg); 4015 return (error); 4016 } 4017 mtx_lock(&umtx_shm_lock); 4018 reg1 = umtx_shm_find_reg_locked(key); 4019 if (reg1 != NULL) { 4020 mtx_unlock(&umtx_shm_lock); 4021 umtx_shm_free_reg(reg); 4022 *res = reg1; 4023 return (0); 4024 } 4025 reg->ushm_refcnt++; 4026 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 4027 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 4028 ushm_obj_link); 4029 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 4030 mtx_unlock(&umtx_shm_lock); 4031 *res = reg; 4032 return (0); 4033 } 4034 4035 static int 4036 umtx_shm_alive(struct thread *td, void *addr) 4037 { 4038 vm_map_t map; 4039 vm_map_entry_t entry; 4040 vm_object_t object; 4041 vm_pindex_t pindex; 4042 vm_prot_t prot; 4043 int res, ret; 4044 boolean_t wired; 4045 4046 map = &td->td_proc->p_vmspace->vm_map; 4047 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 4048 &object, &pindex, &prot, &wired); 4049 if (res != KERN_SUCCESS) 4050 return (EFAULT); 4051 if (object == NULL) 4052 ret = EINVAL; 4053 else 4054 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 4055 vm_map_lookup_done(map, entry); 4056 return (ret); 4057 } 4058 4059 static void 4060 umtx_shm_init(void) 4061 { 4062 int i; 4063 4064 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 4065 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 4066 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 4067 for (i = 0; i < nitems(umtx_shm_registry); i++) 4068 TAILQ_INIT(&umtx_shm_registry[i]); 4069 } 4070 4071 static int 4072 umtx_shm(struct thread *td, void *addr, u_int flags) 4073 { 4074 struct umtx_key key; 4075 struct umtx_shm_reg *reg; 4076 struct file *fp; 4077 int error, fd; 4078 4079 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 4080 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 4081 return (EINVAL); 4082 if ((flags & UMTX_SHM_ALIVE) != 0) 4083 return (umtx_shm_alive(td, addr)); 4084 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 4085 if (error != 0) 4086 return (error); 4087 KASSERT(key.shared == 1, ("non-shared key")); 4088 if ((flags & UMTX_SHM_CREAT) != 0) { 4089 error = umtx_shm_create_reg(td, &key, ®); 4090 } else { 4091 reg = umtx_shm_find_reg(&key); 4092 if (reg == NULL) 4093 error = ESRCH; 4094 } 4095 umtx_key_release(&key); 4096 if (error != 0) 4097 return (error); 4098 KASSERT(reg != NULL, ("no reg")); 4099 if ((flags & UMTX_SHM_DESTROY) != 0) { 4100 umtx_shm_unref_reg(reg, true); 4101 } else { 4102 #if 0 4103 #ifdef MAC 4104 error = mac_posixshm_check_open(td->td_ucred, 4105 reg->ushm_obj, FFLAGS(O_RDWR)); 4106 if (error == 0) 4107 #endif 4108 error = shm_access(reg->ushm_obj, td->td_ucred, 4109 FFLAGS(O_RDWR)); 4110 if (error == 0) 4111 #endif 4112 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 4113 if (error == 0) { 4114 shm_hold(reg->ushm_obj); 4115 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 4116 &shm_ops); 4117 td->td_retval[0] = fd; 4118 fdrop(fp, td); 4119 } 4120 } 4121 umtx_shm_unref_reg(reg, false); 4122 return (error); 4123 } 4124 4125 static int 4126 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, 4127 const struct umtx_copyops *ops __unused) 4128 { 4129 4130 return (umtx_shm(td, uap->uaddr1, uap->val)); 4131 } 4132 4133 static int 4134 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, 4135 const struct umtx_copyops *ops) 4136 { 4137 struct umtx_robust_lists_params rb; 4138 int error; 4139 4140 bzero(&rb, sizeof(rb)); 4141 error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); 4142 if (error != 0) 4143 return (error); 4144 4145 if (ops->compat32) 4146 td->td_pflags2 |= TDP2_COMPAT32RB; 4147 else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) 4148 return (EINVAL); 4149 4150 td->td_rb_list = rb.robust_list_offset; 4151 td->td_rbp_list = rb.robust_priv_list_offset; 4152 td->td_rb_inact = rb.robust_inact_offset; 4153 return (0); 4154 } 4155 4156 #ifdef COMPAT_FREEBSD32 4157 static inline int 4158 umtx_copyin_timeout32(const void *uaddr, struct timespec *tsp) 4159 { 4160 struct timespec32 ts32; 4161 int error; 4162 4163 error = copyin(uaddr, &ts32, sizeof(ts32)); 4164 if (error == 0) { 4165 if (ts32.tv_sec < 0 || 4166 ts32.tv_nsec >= 1000000000 || 4167 ts32.tv_nsec < 0) 4168 error = EINVAL; 4169 else { 4170 CP(ts32, *tsp, tv_sec); 4171 CP(ts32, *tsp, tv_nsec); 4172 } 4173 } 4174 return (error); 4175 } 4176 4177 static inline int 4178 umtx_copyin_umtx_time32(const void *uaddr, size_t size, struct _umtx_time *tp) 4179 { 4180 struct umtx_time32 t32; 4181 int error; 4182 4183 t32._clockid = CLOCK_REALTIME; 4184 t32._flags = 0; 4185 if (size <= sizeof(t32._timeout)) 4186 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); 4187 else 4188 error = copyin(uaddr, &t32, sizeof(t32)); 4189 if (error != 0) 4190 return (error); 4191 if (t32._timeout.tv_sec < 0 || 4192 t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) 4193 return (EINVAL); 4194 TS_CP(t32, *tp, _timeout); 4195 CP(t32, *tp, _flags); 4196 CP(t32, *tp, _clockid); 4197 return (0); 4198 } 4199 4200 static int 4201 umtx_copyin_robust_lists32(const void *uaddr, size_t size, 4202 struct umtx_robust_lists_params *rbp) 4203 { 4204 struct umtx_robust_lists_params_compat32 rb32; 4205 int error; 4206 4207 if (size > sizeof(rb32)) 4208 return (EINVAL); 4209 bzero(&rb32, sizeof(rb32)); 4210 error = copyin(uaddr, &rb32, size); 4211 if (error != 0) 4212 return (error); 4213 CP(rb32, *rbp, robust_list_offset); 4214 CP(rb32, *rbp, robust_priv_list_offset); 4215 CP(rb32, *rbp, robust_inact_offset); 4216 return (0); 4217 } 4218 4219 static int 4220 umtx_copyout_timeout32(void *uaddr, size_t sz, struct timespec *tsp) 4221 { 4222 struct timespec32 remain32 = { 4223 .tv_sec = tsp->tv_sec, 4224 .tv_nsec = tsp->tv_nsec, 4225 }; 4226 4227 /* 4228 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) 4229 * and we're only called if sz >= sizeof(timespec) as supplied in the 4230 * copyops. 4231 */ 4232 KASSERT(sz >= sizeof(remain32), 4233 ("umtx_copyops specifies incorrect sizes")); 4234 4235 return (copyout(&remain32, uaddr, sizeof(remain32))); 4236 } 4237 #endif /* COMPAT_FREEBSD32 */ 4238 4239 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, 4240 const struct umtx_copyops *umtx_ops); 4241 4242 static const _umtx_op_func op_table[] = { 4243 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4244 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4245 [UMTX_OP_WAIT] = __umtx_op_wait, 4246 [UMTX_OP_WAKE] = __umtx_op_wake, 4247 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4248 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 4249 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4250 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4251 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4252 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4253 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4254 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4255 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4256 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4257 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4258 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4259 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4260 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4261 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4262 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4263 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4264 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4265 #else 4266 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4267 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4268 #endif 4269 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4270 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4271 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4272 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4273 [UMTX_OP_SHM] = __umtx_op_shm, 4274 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4275 }; 4276 4277 static const struct umtx_copyops umtx_native_ops = { 4278 .copyin_timeout = umtx_copyin_timeout, 4279 .copyin_umtx_time = umtx_copyin_umtx_time, 4280 .copyin_robust_lists = umtx_copyin_robust_lists, 4281 .copyout_timeout = umtx_copyout_timeout, 4282 .timespec_sz = sizeof(struct timespec), 4283 .umtx_time_sz = sizeof(struct _umtx_time), 4284 }; 4285 4286 #ifdef COMPAT_FREEBSD32 4287 const struct umtx_copyops umtx_native_ops32 = { 4288 .copyin_timeout = umtx_copyin_timeout32, 4289 .copyin_umtx_time = umtx_copyin_umtx_time32, 4290 .copyin_robust_lists = umtx_copyin_robust_lists32, 4291 .copyout_timeout = umtx_copyout_timeout32, 4292 .timespec_sz = sizeof(struct timespec32), 4293 .umtx_time_sz = sizeof(struct umtx_time32), 4294 .compat32 = true, 4295 }; 4296 #endif 4297 4298 int 4299 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, 4300 void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) 4301 { 4302 struct _umtx_op_args uap = { 4303 .obj = obj, 4304 .op = op, 4305 .val = val, 4306 .uaddr1 = uaddr1, 4307 .uaddr2 = uaddr2 4308 }; 4309 4310 if ((uap.op >= nitems(op_table))) 4311 return (EINVAL); 4312 return ((*op_table[uap.op])(td, &uap, ops)); 4313 } 4314 4315 int 4316 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4317 { 4318 4319 return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, 4320 uap->uaddr2, &umtx_native_ops)); 4321 } 4322 4323 void 4324 umtx_thread_init(struct thread *td) 4325 { 4326 4327 td->td_umtxq = umtxq_alloc(); 4328 td->td_umtxq->uq_thread = td; 4329 } 4330 4331 void 4332 umtx_thread_fini(struct thread *td) 4333 { 4334 4335 umtxq_free(td->td_umtxq); 4336 } 4337 4338 /* 4339 * It will be called when new thread is created, e.g fork(). 4340 */ 4341 void 4342 umtx_thread_alloc(struct thread *td) 4343 { 4344 struct umtx_q *uq; 4345 4346 uq = td->td_umtxq; 4347 uq->uq_inherited_pri = PRI_MAX; 4348 4349 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4350 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4351 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4352 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4353 } 4354 4355 /* 4356 * exec() hook. 4357 * 4358 * Clear robust lists for all process' threads, not delaying the 4359 * cleanup to thread_exit hook, since the relevant address space is 4360 * destroyed right now. 4361 */ 4362 static void 4363 umtx_exec_hook(void *arg __unused, struct proc *p, 4364 struct image_params *imgp __unused) 4365 { 4366 struct thread *td; 4367 4368 KASSERT(p == curproc, ("need curproc")); 4369 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4370 (p->p_flag & P_STOPPED_SINGLE) != 0, 4371 ("curproc must be single-threaded")); 4372 /* 4373 * There is no need to lock the list as only this thread can be 4374 * running. 4375 */ 4376 FOREACH_THREAD_IN_PROC(p, td) { 4377 KASSERT(td == curthread || 4378 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4379 ("running thread %p %p", p, td)); 4380 umtx_thread_cleanup(td); 4381 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4382 } 4383 } 4384 4385 /* 4386 * thread_exit() hook. 4387 */ 4388 void 4389 umtx_thread_exit(struct thread *td) 4390 { 4391 4392 umtx_thread_cleanup(td); 4393 } 4394 4395 static int 4396 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) 4397 { 4398 u_long res1; 4399 #ifdef COMPAT_FREEBSD32 4400 uint32_t res32; 4401 #endif 4402 int error; 4403 4404 #ifdef COMPAT_FREEBSD32 4405 if (compat32) { 4406 error = fueword32((void *)ptr, &res32); 4407 if (error == 0) 4408 res1 = res32; 4409 } else 4410 #endif 4411 { 4412 error = fueword((void *)ptr, &res1); 4413 } 4414 if (error == 0) 4415 *res = res1; 4416 else 4417 error = EFAULT; 4418 return (error); 4419 } 4420 4421 static void 4422 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, 4423 bool compat32) 4424 { 4425 #ifdef COMPAT_FREEBSD32 4426 struct umutex32 m32; 4427 4428 if (compat32) { 4429 memcpy(&m32, m, sizeof(m32)); 4430 *rb_list = m32.m_rb_lnk; 4431 } else 4432 #endif 4433 *rb_list = m->m_rb_lnk; 4434 } 4435 4436 static int 4437 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, 4438 bool compat32) 4439 { 4440 struct umutex m; 4441 int error; 4442 4443 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4444 error = copyin((void *)rbp, &m, sizeof(m)); 4445 if (error != 0) 4446 return (error); 4447 if (rb_list != NULL) 4448 umtx_read_rb_list(td, &m, rb_list, compat32); 4449 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4450 return (EINVAL); 4451 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4452 /* inact is cleared after unlock, allow the inconsistency */ 4453 return (inact ? 0 : EINVAL); 4454 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4455 } 4456 4457 static void 4458 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4459 const char *name, bool compat32) 4460 { 4461 int error, i; 4462 uintptr_t rbp; 4463 bool inact; 4464 4465 if (rb_list == 0) 4466 return; 4467 error = umtx_read_uptr(td, rb_list, &rbp, compat32); 4468 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4469 if (rbp == *rb_inact) { 4470 inact = true; 4471 *rb_inact = 0; 4472 } else 4473 inact = false; 4474 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); 4475 } 4476 if (i == umtx_max_rb && umtx_verbose_rb) { 4477 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4478 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4479 } 4480 if (error != 0 && umtx_verbose_rb) { 4481 uprintf("comm %s pid %d: handling %srb error %d\n", 4482 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4483 } 4484 } 4485 4486 /* 4487 * Clean up umtx data. 4488 */ 4489 static void 4490 umtx_thread_cleanup(struct thread *td) 4491 { 4492 struct umtx_q *uq; 4493 struct umtx_pi *pi; 4494 uintptr_t rb_inact; 4495 bool compat32; 4496 4497 /* 4498 * Disown pi mutexes. 4499 */ 4500 uq = td->td_umtxq; 4501 if (uq != NULL) { 4502 if (uq->uq_inherited_pri != PRI_MAX || 4503 !TAILQ_EMPTY(&uq->uq_pi_contested)) { 4504 mtx_lock(&umtx_lock); 4505 uq->uq_inherited_pri = PRI_MAX; 4506 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4507 pi->pi_owner = NULL; 4508 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4509 } 4510 mtx_unlock(&umtx_lock); 4511 } 4512 sched_lend_user_prio_cond(td, PRI_MAX); 4513 } 4514 4515 compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; 4516 td->td_pflags2 &= ~TDP2_COMPAT32RB; 4517 4518 if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) 4519 return; 4520 4521 /* 4522 * Handle terminated robust mutexes. Must be done after 4523 * robust pi disown, otherwise unlock could see unowned 4524 * entries. 4525 */ 4526 rb_inact = td->td_rb_inact; 4527 if (rb_inact != 0) 4528 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); 4529 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); 4530 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); 4531 if (rb_inact != 0) 4532 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); 4533 } 4534