1 /*- 2 * Copyright (c) 2015, 2016 The FreeBSD Foundation 3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 #include "opt_umtx_profiling.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mman.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/resource.h> 51 #include <sys/resourcevar.h> 52 #include <sys/rwlock.h> 53 #include <sys/sbuf.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/time.h> 63 #include <sys/eventhandler.h> 64 #include <sys/umtx.h> 65 66 #include <security/mac/mac_framework.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 74 #include <machine/atomic.h> 75 #include <machine/cpu.h> 76 77 #ifdef COMPAT_FREEBSD32 78 #include <compat/freebsd32/freebsd32_proto.h> 79 #endif 80 81 #define _UMUTEX_TRY 1 82 #define _UMUTEX_WAIT 2 83 84 #ifdef UMTX_PROFILING 85 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 86 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 87 #endif 88 89 /* Priority inheritance mutex info. */ 90 struct umtx_pi { 91 /* Owner thread */ 92 struct thread *pi_owner; 93 94 /* Reference count */ 95 int pi_refcount; 96 97 /* List entry to link umtx holding by thread */ 98 TAILQ_ENTRY(umtx_pi) pi_link; 99 100 /* List entry in hash */ 101 TAILQ_ENTRY(umtx_pi) pi_hashlink; 102 103 /* List for waiters */ 104 TAILQ_HEAD(,umtx_q) pi_blocked; 105 106 /* Identify a userland lock object */ 107 struct umtx_key pi_key; 108 }; 109 110 /* A userland synchronous object user. */ 111 struct umtx_q { 112 /* Linked list for the hash. */ 113 TAILQ_ENTRY(umtx_q) uq_link; 114 115 /* Umtx key. */ 116 struct umtx_key uq_key; 117 118 /* Umtx flags. */ 119 int uq_flags; 120 #define UQF_UMTXQ 0x0001 121 122 /* The thread waits on. */ 123 struct thread *uq_thread; 124 125 /* 126 * Blocked on PI mutex. read can use chain lock 127 * or umtx_lock, write must have both chain lock and 128 * umtx_lock being hold. 129 */ 130 struct umtx_pi *uq_pi_blocked; 131 132 /* On blocked list */ 133 TAILQ_ENTRY(umtx_q) uq_lockq; 134 135 /* Thread contending with us */ 136 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 137 138 /* Inherited priority from PP mutex */ 139 u_char uq_inherited_pri; 140 141 /* Spare queue ready to be reused */ 142 struct umtxq_queue *uq_spare_queue; 143 144 /* The queue we on */ 145 struct umtxq_queue *uq_cur_queue; 146 }; 147 148 TAILQ_HEAD(umtxq_head, umtx_q); 149 150 /* Per-key wait-queue */ 151 struct umtxq_queue { 152 struct umtxq_head head; 153 struct umtx_key key; 154 LIST_ENTRY(umtxq_queue) link; 155 int length; 156 }; 157 158 LIST_HEAD(umtxq_list, umtxq_queue); 159 160 /* Userland lock object's wait-queue chain */ 161 struct umtxq_chain { 162 /* Lock for this chain. */ 163 struct mtx uc_lock; 164 165 /* List of sleep queues. */ 166 struct umtxq_list uc_queue[2]; 167 #define UMTX_SHARED_QUEUE 0 168 #define UMTX_EXCLUSIVE_QUEUE 1 169 170 LIST_HEAD(, umtxq_queue) uc_spare_queue; 171 172 /* Busy flag */ 173 char uc_busy; 174 175 /* Chain lock waiters */ 176 int uc_waiters; 177 178 /* All PI in the list */ 179 TAILQ_HEAD(,umtx_pi) uc_pi_list; 180 181 #ifdef UMTX_PROFILING 182 u_int length; 183 u_int max_length; 184 #endif 185 }; 186 187 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 188 189 /* 190 * Don't propagate time-sharing priority, there is a security reason, 191 * a user can simply introduce PI-mutex, let thread A lock the mutex, 192 * and let another thread B block on the mutex, because B is 193 * sleeping, its priority will be boosted, this causes A's priority to 194 * be boosted via priority propagating too and will never be lowered even 195 * if it is using 100%CPU, this is unfair to other processes. 196 */ 197 198 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 199 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 200 PRI_MAX_TIMESHARE : (td)->td_user_pri) 201 202 #define GOLDEN_RATIO_PRIME 2654404609U 203 #ifndef UMTX_CHAINS 204 #define UMTX_CHAINS 512 205 #endif 206 #define UMTX_SHIFTS (__WORD_BIT - 9) 207 208 #define GET_SHARE(flags) \ 209 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 210 211 #define BUSY_SPINS 200 212 213 struct abs_timeout { 214 int clockid; 215 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 216 struct timespec cur; 217 struct timespec end; 218 }; 219 220 #ifdef COMPAT_FREEBSD32 221 struct umutex32 { 222 volatile __lwpid_t m_owner; /* Owner of the mutex */ 223 __uint32_t m_flags; /* Flags of the mutex */ 224 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 225 __uint32_t m_rb_lnk; /* Robust linkage */ 226 __uint32_t m_pad; 227 __uint32_t m_spare[2]; 228 }; 229 230 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 231 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 232 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 233 #endif 234 235 int umtx_shm_vnobj_persistent = 0; 236 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 237 &umtx_shm_vnobj_persistent, 0, 238 "False forces destruction of umtx attached to file, on last close"); 239 static int umtx_max_rb = 1000; 240 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 241 &umtx_max_rb, 0, 242 ""); 243 244 static uma_zone_t umtx_pi_zone; 245 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 246 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 247 static int umtx_pi_allocated; 248 249 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 250 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 251 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 252 static int umtx_verbose_rb = 1; 253 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 254 &umtx_verbose_rb, 0, 255 ""); 256 257 #ifdef UMTX_PROFILING 258 static long max_length; 259 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 260 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 261 #endif 262 263 static void abs_timeout_update(struct abs_timeout *timo); 264 265 static void umtx_shm_init(void); 266 static void umtxq_sysinit(void *); 267 static void umtxq_hash(struct umtx_key *key); 268 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 269 static void umtxq_lock(struct umtx_key *key); 270 static void umtxq_unlock(struct umtx_key *key); 271 static void umtxq_busy(struct umtx_key *key); 272 static void umtxq_unbusy(struct umtx_key *key); 273 static void umtxq_insert_queue(struct umtx_q *uq, int q); 274 static void umtxq_remove_queue(struct umtx_q *uq, int q); 275 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 276 static int umtxq_count(struct umtx_key *key); 277 static struct umtx_pi *umtx_pi_alloc(int); 278 static void umtx_pi_free(struct umtx_pi *pi); 279 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 280 bool rb); 281 static void umtx_thread_cleanup(struct thread *td); 282 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 283 struct image_params *imgp __unused); 284 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 285 286 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 287 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 288 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 289 290 static struct mtx umtx_lock; 291 292 #ifdef UMTX_PROFILING 293 static void 294 umtx_init_profiling(void) 295 { 296 struct sysctl_oid *chain_oid; 297 char chain_name[10]; 298 int i; 299 300 for (i = 0; i < UMTX_CHAINS; ++i) { 301 snprintf(chain_name, sizeof(chain_name), "%d", i); 302 chain_oid = SYSCTL_ADD_NODE(NULL, 303 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 304 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 305 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 306 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 307 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 308 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 309 } 310 } 311 312 static int 313 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 314 { 315 char buf[512]; 316 struct sbuf sb; 317 struct umtxq_chain *uc; 318 u_int fract, i, j, tot, whole; 319 u_int sf0, sf1, sf2, sf3, sf4; 320 u_int si0, si1, si2, si3, si4; 321 u_int sw0, sw1, sw2, sw3, sw4; 322 323 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 324 for (i = 0; i < 2; i++) { 325 tot = 0; 326 for (j = 0; j < UMTX_CHAINS; ++j) { 327 uc = &umtxq_chains[i][j]; 328 mtx_lock(&uc->uc_lock); 329 tot += uc->max_length; 330 mtx_unlock(&uc->uc_lock); 331 } 332 if (tot == 0) 333 sbuf_printf(&sb, "%u) Empty ", i); 334 else { 335 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 336 si0 = si1 = si2 = si3 = si4 = 0; 337 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 338 for (j = 0; j < UMTX_CHAINS; j++) { 339 uc = &umtxq_chains[i][j]; 340 mtx_lock(&uc->uc_lock); 341 whole = uc->max_length * 100; 342 mtx_unlock(&uc->uc_lock); 343 fract = (whole % tot) * 100; 344 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 345 sf0 = fract; 346 si0 = j; 347 sw0 = whole; 348 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 349 sf1)) { 350 sf1 = fract; 351 si1 = j; 352 sw1 = whole; 353 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 354 sf2)) { 355 sf2 = fract; 356 si2 = j; 357 sw2 = whole; 358 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 359 sf3)) { 360 sf3 = fract; 361 si3 = j; 362 sw3 = whole; 363 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 364 sf4)) { 365 sf4 = fract; 366 si4 = j; 367 sw4 = whole; 368 } 369 } 370 sbuf_printf(&sb, "queue %u:\n", i); 371 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 372 sf0 / tot, si0); 373 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 374 sf1 / tot, si1); 375 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 376 sf2 / tot, si2); 377 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 378 sf3 / tot, si3); 379 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 380 sf4 / tot, si4); 381 } 382 } 383 sbuf_trim(&sb); 384 sbuf_finish(&sb); 385 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 386 sbuf_delete(&sb); 387 return (0); 388 } 389 390 static int 391 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 392 { 393 struct umtxq_chain *uc; 394 u_int i, j; 395 int clear, error; 396 397 clear = 0; 398 error = sysctl_handle_int(oidp, &clear, 0, req); 399 if (error != 0 || req->newptr == NULL) 400 return (error); 401 402 if (clear != 0) { 403 for (i = 0; i < 2; ++i) { 404 for (j = 0; j < UMTX_CHAINS; ++j) { 405 uc = &umtxq_chains[i][j]; 406 mtx_lock(&uc->uc_lock); 407 uc->length = 0; 408 uc->max_length = 0; 409 mtx_unlock(&uc->uc_lock); 410 } 411 } 412 } 413 return (0); 414 } 415 416 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 417 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 418 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 419 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 420 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 421 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 422 #endif 423 424 static void 425 umtxq_sysinit(void *arg __unused) 426 { 427 int i, j; 428 429 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 430 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 431 for (i = 0; i < 2; ++i) { 432 for (j = 0; j < UMTX_CHAINS; ++j) { 433 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 434 MTX_DEF | MTX_DUPOK); 435 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 437 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 438 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 439 umtxq_chains[i][j].uc_busy = 0; 440 umtxq_chains[i][j].uc_waiters = 0; 441 #ifdef UMTX_PROFILING 442 umtxq_chains[i][j].length = 0; 443 umtxq_chains[i][j].max_length = 0; 444 #endif 445 } 446 } 447 #ifdef UMTX_PROFILING 448 umtx_init_profiling(); 449 #endif 450 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 451 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 452 EVENTHANDLER_PRI_ANY); 453 umtx_shm_init(); 454 } 455 456 struct umtx_q * 457 umtxq_alloc(void) 458 { 459 struct umtx_q *uq; 460 461 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 462 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 463 M_WAITOK | M_ZERO); 464 TAILQ_INIT(&uq->uq_spare_queue->head); 465 TAILQ_INIT(&uq->uq_pi_contested); 466 uq->uq_inherited_pri = PRI_MAX; 467 return (uq); 468 } 469 470 void 471 umtxq_free(struct umtx_q *uq) 472 { 473 474 MPASS(uq->uq_spare_queue != NULL); 475 free(uq->uq_spare_queue, M_UMTX); 476 free(uq, M_UMTX); 477 } 478 479 static inline void 480 umtxq_hash(struct umtx_key *key) 481 { 482 unsigned n; 483 484 n = (uintptr_t)key->info.both.a + key->info.both.b; 485 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 486 } 487 488 static inline struct umtxq_chain * 489 umtxq_getchain(struct umtx_key *key) 490 { 491 492 if (key->type <= TYPE_SEM) 493 return (&umtxq_chains[1][key->hash]); 494 return (&umtxq_chains[0][key->hash]); 495 } 496 497 /* 498 * Lock a chain. 499 */ 500 static inline void 501 umtxq_lock(struct umtx_key *key) 502 { 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_lock(&uc->uc_lock); 507 } 508 509 /* 510 * Unlock a chain. 511 */ 512 static inline void 513 umtxq_unlock(struct umtx_key *key) 514 { 515 struct umtxq_chain *uc; 516 517 uc = umtxq_getchain(key); 518 mtx_unlock(&uc->uc_lock); 519 } 520 521 /* 522 * Set chain to busy state when following operation 523 * may be blocked (kernel mutex can not be used). 524 */ 525 static inline void 526 umtxq_busy(struct umtx_key *key) 527 { 528 struct umtxq_chain *uc; 529 530 uc = umtxq_getchain(key); 531 mtx_assert(&uc->uc_lock, MA_OWNED); 532 if (uc->uc_busy) { 533 #ifdef SMP 534 if (smp_cpus > 1) { 535 int count = BUSY_SPINS; 536 if (count > 0) { 537 umtxq_unlock(key); 538 while (uc->uc_busy && --count > 0) 539 cpu_spinwait(); 540 umtxq_lock(key); 541 } 542 } 543 #endif 544 while (uc->uc_busy) { 545 uc->uc_waiters++; 546 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 547 uc->uc_waiters--; 548 } 549 } 550 uc->uc_busy = 1; 551 } 552 553 /* 554 * Unbusy a chain. 555 */ 556 static inline void 557 umtxq_unbusy(struct umtx_key *key) 558 { 559 struct umtxq_chain *uc; 560 561 uc = umtxq_getchain(key); 562 mtx_assert(&uc->uc_lock, MA_OWNED); 563 KASSERT(uc->uc_busy != 0, ("not busy")); 564 uc->uc_busy = 0; 565 if (uc->uc_waiters) 566 wakeup_one(uc); 567 } 568 569 static inline void 570 umtxq_unbusy_unlocked(struct umtx_key *key) 571 { 572 573 umtxq_lock(key); 574 umtxq_unbusy(key); 575 umtxq_unlock(key); 576 } 577 578 static struct umtxq_queue * 579 umtxq_queue_lookup(struct umtx_key *key, int q) 580 { 581 struct umtxq_queue *uh; 582 struct umtxq_chain *uc; 583 584 uc = umtxq_getchain(key); 585 UMTXQ_LOCKED_ASSERT(uc); 586 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 587 if (umtx_key_match(&uh->key, key)) 588 return (uh); 589 } 590 591 return (NULL); 592 } 593 594 static inline void 595 umtxq_insert_queue(struct umtx_q *uq, int q) 596 { 597 struct umtxq_queue *uh; 598 struct umtxq_chain *uc; 599 600 uc = umtxq_getchain(&uq->uq_key); 601 UMTXQ_LOCKED_ASSERT(uc); 602 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 603 uh = umtxq_queue_lookup(&uq->uq_key, q); 604 if (uh != NULL) { 605 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 606 } else { 607 uh = uq->uq_spare_queue; 608 uh->key = uq->uq_key; 609 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 610 #ifdef UMTX_PROFILING 611 uc->length++; 612 if (uc->length > uc->max_length) { 613 uc->max_length = uc->length; 614 if (uc->max_length > max_length) 615 max_length = uc->max_length; 616 } 617 #endif 618 } 619 uq->uq_spare_queue = NULL; 620 621 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 622 uh->length++; 623 uq->uq_flags |= UQF_UMTXQ; 624 uq->uq_cur_queue = uh; 625 return; 626 } 627 628 static inline void 629 umtxq_remove_queue(struct umtx_q *uq, int q) 630 { 631 struct umtxq_chain *uc; 632 struct umtxq_queue *uh; 633 634 uc = umtxq_getchain(&uq->uq_key); 635 UMTXQ_LOCKED_ASSERT(uc); 636 if (uq->uq_flags & UQF_UMTXQ) { 637 uh = uq->uq_cur_queue; 638 TAILQ_REMOVE(&uh->head, uq, uq_link); 639 uh->length--; 640 uq->uq_flags &= ~UQF_UMTXQ; 641 if (TAILQ_EMPTY(&uh->head)) { 642 KASSERT(uh->length == 0, 643 ("inconsistent umtxq_queue length")); 644 #ifdef UMTX_PROFILING 645 uc->length--; 646 #endif 647 LIST_REMOVE(uh, link); 648 } else { 649 uh = LIST_FIRST(&uc->uc_spare_queue); 650 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 651 LIST_REMOVE(uh, link); 652 } 653 uq->uq_spare_queue = uh; 654 uq->uq_cur_queue = NULL; 655 } 656 } 657 658 /* 659 * Check if there are multiple waiters 660 */ 661 static int 662 umtxq_count(struct umtx_key *key) 663 { 664 struct umtxq_chain *uc; 665 struct umtxq_queue *uh; 666 667 uc = umtxq_getchain(key); 668 UMTXQ_LOCKED_ASSERT(uc); 669 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 670 if (uh != NULL) 671 return (uh->length); 672 return (0); 673 } 674 675 /* 676 * Check if there are multiple PI waiters and returns first 677 * waiter. 678 */ 679 static int 680 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 681 { 682 struct umtxq_chain *uc; 683 struct umtxq_queue *uh; 684 685 *first = NULL; 686 uc = umtxq_getchain(key); 687 UMTXQ_LOCKED_ASSERT(uc); 688 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 689 if (uh != NULL) { 690 *first = TAILQ_FIRST(&uh->head); 691 return (uh->length); 692 } 693 return (0); 694 } 695 696 static int 697 umtxq_check_susp(struct thread *td) 698 { 699 struct proc *p; 700 int error; 701 702 /* 703 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 704 * eventually break the lockstep loop. 705 */ 706 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 707 return (0); 708 error = 0; 709 p = td->td_proc; 710 PROC_LOCK(p); 711 if (P_SHOULDSTOP(p) || 712 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 713 if (p->p_flag & P_SINGLE_EXIT) 714 error = EINTR; 715 else 716 error = ERESTART; 717 } 718 PROC_UNLOCK(p); 719 return (error); 720 } 721 722 /* 723 * Wake up threads waiting on an userland object. 724 */ 725 726 static int 727 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 728 { 729 struct umtxq_chain *uc; 730 struct umtxq_queue *uh; 731 struct umtx_q *uq; 732 int ret; 733 734 ret = 0; 735 uc = umtxq_getchain(key); 736 UMTXQ_LOCKED_ASSERT(uc); 737 uh = umtxq_queue_lookup(key, q); 738 if (uh != NULL) { 739 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 740 umtxq_remove_queue(uq, q); 741 wakeup(uq); 742 if (++ret >= n_wake) 743 return (ret); 744 } 745 } 746 return (ret); 747 } 748 749 750 /* 751 * Wake up specified thread. 752 */ 753 static inline void 754 umtxq_signal_thread(struct umtx_q *uq) 755 { 756 struct umtxq_chain *uc; 757 758 uc = umtxq_getchain(&uq->uq_key); 759 UMTXQ_LOCKED_ASSERT(uc); 760 umtxq_remove(uq); 761 wakeup(uq); 762 } 763 764 static inline int 765 tstohz(const struct timespec *tsp) 766 { 767 struct timeval tv; 768 769 TIMESPEC_TO_TIMEVAL(&tv, tsp); 770 return tvtohz(&tv); 771 } 772 773 static void 774 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 775 const struct timespec *timeout) 776 { 777 778 timo->clockid = clockid; 779 if (!absolute) { 780 timo->is_abs_real = false; 781 abs_timeout_update(timo); 782 timo->end = timo->cur; 783 timespecadd(&timo->end, timeout); 784 } else { 785 timo->end = *timeout; 786 timo->is_abs_real = clockid == CLOCK_REALTIME || 787 clockid == CLOCK_REALTIME_FAST || 788 clockid == CLOCK_REALTIME_PRECISE; 789 /* 790 * If is_abs_real, umtxq_sleep will read the clock 791 * after setting td_rtcgen; otherwise, read it here. 792 */ 793 if (!timo->is_abs_real) { 794 abs_timeout_update(timo); 795 } 796 } 797 } 798 799 static void 800 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 801 { 802 803 abs_timeout_init(timo, umtxtime->_clockid, 804 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 805 } 806 807 static inline void 808 abs_timeout_update(struct abs_timeout *timo) 809 { 810 811 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 812 } 813 814 static int 815 abs_timeout_gethz(struct abs_timeout *timo) 816 { 817 struct timespec tts; 818 819 if (timespeccmp(&timo->end, &timo->cur, <=)) 820 return (-1); 821 tts = timo->end; 822 timespecsub(&tts, &timo->cur); 823 return (tstohz(&tts)); 824 } 825 826 static uint32_t 827 umtx_unlock_val(uint32_t flags, bool rb) 828 { 829 830 if (rb) 831 return (UMUTEX_RB_OWNERDEAD); 832 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 833 return (UMUTEX_RB_NOTRECOV); 834 else 835 return (UMUTEX_UNOWNED); 836 837 } 838 839 /* 840 * Put thread into sleep state, before sleeping, check if 841 * thread was removed from umtx queue. 842 */ 843 static inline int 844 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 845 { 846 struct umtxq_chain *uc; 847 int error, timo; 848 849 if (abstime != NULL && abstime->is_abs_real) { 850 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 851 abs_timeout_update(abstime); 852 } 853 854 uc = umtxq_getchain(&uq->uq_key); 855 UMTXQ_LOCKED_ASSERT(uc); 856 for (;;) { 857 if (!(uq->uq_flags & UQF_UMTXQ)) { 858 error = 0; 859 break; 860 } 861 if (abstime != NULL) { 862 timo = abs_timeout_gethz(abstime); 863 if (timo < 0) { 864 error = ETIMEDOUT; 865 break; 866 } 867 } else 868 timo = 0; 869 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 870 if (error == EINTR || error == ERESTART) { 871 umtxq_lock(&uq->uq_key); 872 break; 873 } 874 if (abstime != NULL) { 875 if (abstime->is_abs_real) 876 curthread->td_rtcgen = 877 atomic_load_acq_int(&rtc_generation); 878 abs_timeout_update(abstime); 879 } 880 umtxq_lock(&uq->uq_key); 881 } 882 883 curthread->td_rtcgen = 0; 884 return (error); 885 } 886 887 /* 888 * Convert userspace address into unique logical address. 889 */ 890 int 891 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 892 { 893 struct thread *td = curthread; 894 vm_map_t map; 895 vm_map_entry_t entry; 896 vm_pindex_t pindex; 897 vm_prot_t prot; 898 boolean_t wired; 899 900 key->type = type; 901 if (share == THREAD_SHARE) { 902 key->shared = 0; 903 key->info.private.vs = td->td_proc->p_vmspace; 904 key->info.private.addr = (uintptr_t)addr; 905 } else { 906 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 907 map = &td->td_proc->p_vmspace->vm_map; 908 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 909 &entry, &key->info.shared.object, &pindex, &prot, 910 &wired) != KERN_SUCCESS) { 911 return (EFAULT); 912 } 913 914 if ((share == PROCESS_SHARE) || 915 (share == AUTO_SHARE && 916 VM_INHERIT_SHARE == entry->inheritance)) { 917 key->shared = 1; 918 key->info.shared.offset = (vm_offset_t)addr - 919 entry->start + entry->offset; 920 vm_object_reference(key->info.shared.object); 921 } else { 922 key->shared = 0; 923 key->info.private.vs = td->td_proc->p_vmspace; 924 key->info.private.addr = (uintptr_t)addr; 925 } 926 vm_map_lookup_done(map, entry); 927 } 928 929 umtxq_hash(key); 930 return (0); 931 } 932 933 /* 934 * Release key. 935 */ 936 void 937 umtx_key_release(struct umtx_key *key) 938 { 939 if (key->shared) 940 vm_object_deallocate(key->info.shared.object); 941 } 942 943 /* 944 * Fetch and compare value, sleep on the address if value is not changed. 945 */ 946 static int 947 do_wait(struct thread *td, void *addr, u_long id, 948 struct _umtx_time *timeout, int compat32, int is_private) 949 { 950 struct abs_timeout timo; 951 struct umtx_q *uq; 952 u_long tmp; 953 uint32_t tmp32; 954 int error = 0; 955 956 uq = td->td_umtxq; 957 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 958 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 959 return (error); 960 961 if (timeout != NULL) 962 abs_timeout_init2(&timo, timeout); 963 964 umtxq_lock(&uq->uq_key); 965 umtxq_insert(uq); 966 umtxq_unlock(&uq->uq_key); 967 if (compat32 == 0) { 968 error = fueword(addr, &tmp); 969 if (error != 0) 970 error = EFAULT; 971 } else { 972 error = fueword32(addr, &tmp32); 973 if (error == 0) 974 tmp = tmp32; 975 else 976 error = EFAULT; 977 } 978 umtxq_lock(&uq->uq_key); 979 if (error == 0) { 980 if (tmp == id) 981 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 982 NULL : &timo); 983 if ((uq->uq_flags & UQF_UMTXQ) == 0) 984 error = 0; 985 else 986 umtxq_remove(uq); 987 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 988 umtxq_remove(uq); 989 } 990 umtxq_unlock(&uq->uq_key); 991 umtx_key_release(&uq->uq_key); 992 if (error == ERESTART) 993 error = EINTR; 994 return (error); 995 } 996 997 /* 998 * Wake up threads sleeping on the specified address. 999 */ 1000 int 1001 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1002 { 1003 struct umtx_key key; 1004 int ret; 1005 1006 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1007 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1008 return (ret); 1009 umtxq_lock(&key); 1010 umtxq_signal(&key, n_wake); 1011 umtxq_unlock(&key); 1012 umtx_key_release(&key); 1013 return (0); 1014 } 1015 1016 /* 1017 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1018 */ 1019 static int 1020 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1021 struct _umtx_time *timeout, int mode) 1022 { 1023 struct abs_timeout timo; 1024 struct umtx_q *uq; 1025 uint32_t owner, old, id; 1026 int error, rv; 1027 1028 id = td->td_tid; 1029 uq = td->td_umtxq; 1030 error = 0; 1031 if (timeout != NULL) 1032 abs_timeout_init2(&timo, timeout); 1033 1034 /* 1035 * Care must be exercised when dealing with umtx structure. It 1036 * can fault on any access. 1037 */ 1038 for (;;) { 1039 rv = fueword32(&m->m_owner, &owner); 1040 if (rv == -1) 1041 return (EFAULT); 1042 if (mode == _UMUTEX_WAIT) { 1043 if (owner == UMUTEX_UNOWNED || 1044 owner == UMUTEX_CONTESTED || 1045 owner == UMUTEX_RB_OWNERDEAD || 1046 owner == UMUTEX_RB_NOTRECOV) 1047 return (0); 1048 } else { 1049 /* 1050 * Robust mutex terminated. Kernel duty is to 1051 * return EOWNERDEAD to the userspace. The 1052 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1053 * by the common userspace code. 1054 */ 1055 if (owner == UMUTEX_RB_OWNERDEAD) { 1056 rv = casueword32(&m->m_owner, 1057 UMUTEX_RB_OWNERDEAD, &owner, 1058 id | UMUTEX_CONTESTED); 1059 if (rv == -1) 1060 return (EFAULT); 1061 if (owner == UMUTEX_RB_OWNERDEAD) 1062 return (EOWNERDEAD); /* success */ 1063 rv = umtxq_check_susp(td); 1064 if (rv != 0) 1065 return (rv); 1066 continue; 1067 } 1068 if (owner == UMUTEX_RB_NOTRECOV) 1069 return (ENOTRECOVERABLE); 1070 1071 1072 /* 1073 * Try the uncontested case. This should be 1074 * done in userland. 1075 */ 1076 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1077 &owner, id); 1078 /* The address was invalid. */ 1079 if (rv == -1) 1080 return (EFAULT); 1081 1082 /* The acquire succeeded. */ 1083 if (owner == UMUTEX_UNOWNED) 1084 return (0); 1085 1086 /* 1087 * If no one owns it but it is contested try 1088 * to acquire it. 1089 */ 1090 if (owner == UMUTEX_CONTESTED) { 1091 rv = casueword32(&m->m_owner, 1092 UMUTEX_CONTESTED, &owner, 1093 id | UMUTEX_CONTESTED); 1094 /* The address was invalid. */ 1095 if (rv == -1) 1096 return (EFAULT); 1097 1098 if (owner == UMUTEX_CONTESTED) 1099 return (0); 1100 1101 rv = umtxq_check_susp(td); 1102 if (rv != 0) 1103 return (rv); 1104 1105 /* 1106 * If this failed the lock has 1107 * changed, restart. 1108 */ 1109 continue; 1110 } 1111 } 1112 1113 if (mode == _UMUTEX_TRY) 1114 return (EBUSY); 1115 1116 /* 1117 * If we caught a signal, we have retried and now 1118 * exit immediately. 1119 */ 1120 if (error != 0) 1121 return (error); 1122 1123 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1124 GET_SHARE(flags), &uq->uq_key)) != 0) 1125 return (error); 1126 1127 umtxq_lock(&uq->uq_key); 1128 umtxq_busy(&uq->uq_key); 1129 umtxq_insert(uq); 1130 umtxq_unlock(&uq->uq_key); 1131 1132 /* 1133 * Set the contested bit so that a release in user space 1134 * knows to use the system call for unlock. If this fails 1135 * either some one else has acquired the lock or it has been 1136 * released. 1137 */ 1138 rv = casueword32(&m->m_owner, owner, &old, 1139 owner | UMUTEX_CONTESTED); 1140 1141 /* The address was invalid. */ 1142 if (rv == -1) { 1143 umtxq_lock(&uq->uq_key); 1144 umtxq_remove(uq); 1145 umtxq_unbusy(&uq->uq_key); 1146 umtxq_unlock(&uq->uq_key); 1147 umtx_key_release(&uq->uq_key); 1148 return (EFAULT); 1149 } 1150 1151 /* 1152 * We set the contested bit, sleep. Otherwise the lock changed 1153 * and we need to retry or we lost a race to the thread 1154 * unlocking the umtx. 1155 */ 1156 umtxq_lock(&uq->uq_key); 1157 umtxq_unbusy(&uq->uq_key); 1158 if (old == owner) 1159 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1160 NULL : &timo); 1161 umtxq_remove(uq); 1162 umtxq_unlock(&uq->uq_key); 1163 umtx_key_release(&uq->uq_key); 1164 1165 if (error == 0) 1166 error = umtxq_check_susp(td); 1167 } 1168 1169 return (0); 1170 } 1171 1172 /* 1173 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1174 */ 1175 static int 1176 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1177 { 1178 struct umtx_key key; 1179 uint32_t owner, old, id, newlock; 1180 int error, count; 1181 1182 id = td->td_tid; 1183 /* 1184 * Make sure we own this mtx. 1185 */ 1186 error = fueword32(&m->m_owner, &owner); 1187 if (error == -1) 1188 return (EFAULT); 1189 1190 if ((owner & ~UMUTEX_CONTESTED) != id) 1191 return (EPERM); 1192 1193 newlock = umtx_unlock_val(flags, rb); 1194 if ((owner & UMUTEX_CONTESTED) == 0) { 1195 error = casueword32(&m->m_owner, owner, &old, newlock); 1196 if (error == -1) 1197 return (EFAULT); 1198 if (old == owner) 1199 return (0); 1200 owner = old; 1201 } 1202 1203 /* We should only ever be in here for contested locks */ 1204 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1205 &key)) != 0) 1206 return (error); 1207 1208 umtxq_lock(&key); 1209 umtxq_busy(&key); 1210 count = umtxq_count(&key); 1211 umtxq_unlock(&key); 1212 1213 /* 1214 * When unlocking the umtx, it must be marked as unowned if 1215 * there is zero or one thread only waiting for it. 1216 * Otherwise, it must be marked as contested. 1217 */ 1218 if (count > 1) 1219 newlock |= UMUTEX_CONTESTED; 1220 error = casueword32(&m->m_owner, owner, &old, newlock); 1221 umtxq_lock(&key); 1222 umtxq_signal(&key, 1); 1223 umtxq_unbusy(&key); 1224 umtxq_unlock(&key); 1225 umtx_key_release(&key); 1226 if (error == -1) 1227 return (EFAULT); 1228 if (old != owner) 1229 return (EINVAL); 1230 return (0); 1231 } 1232 1233 /* 1234 * Check if the mutex is available and wake up a waiter, 1235 * only for simple mutex. 1236 */ 1237 static int 1238 do_wake_umutex(struct thread *td, struct umutex *m) 1239 { 1240 struct umtx_key key; 1241 uint32_t owner; 1242 uint32_t flags; 1243 int error; 1244 int count; 1245 1246 error = fueword32(&m->m_owner, &owner); 1247 if (error == -1) 1248 return (EFAULT); 1249 1250 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1251 owner != UMUTEX_RB_NOTRECOV) 1252 return (0); 1253 1254 error = fueword32(&m->m_flags, &flags); 1255 if (error == -1) 1256 return (EFAULT); 1257 1258 /* We should only ever be in here for contested locks */ 1259 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1260 &key)) != 0) 1261 return (error); 1262 1263 umtxq_lock(&key); 1264 umtxq_busy(&key); 1265 count = umtxq_count(&key); 1266 umtxq_unlock(&key); 1267 1268 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1269 owner != UMUTEX_RB_NOTRECOV) { 1270 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1271 UMUTEX_UNOWNED); 1272 if (error == -1) 1273 error = EFAULT; 1274 } 1275 1276 umtxq_lock(&key); 1277 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1278 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1279 umtxq_signal(&key, 1); 1280 umtxq_unbusy(&key); 1281 umtxq_unlock(&key); 1282 umtx_key_release(&key); 1283 return (error); 1284 } 1285 1286 /* 1287 * Check if the mutex has waiters and tries to fix contention bit. 1288 */ 1289 static int 1290 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1291 { 1292 struct umtx_key key; 1293 uint32_t owner, old; 1294 int type; 1295 int error; 1296 int count; 1297 1298 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1299 UMUTEX_ROBUST)) { 1300 case 0: 1301 case UMUTEX_ROBUST: 1302 type = TYPE_NORMAL_UMUTEX; 1303 break; 1304 case UMUTEX_PRIO_INHERIT: 1305 type = TYPE_PI_UMUTEX; 1306 break; 1307 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1308 type = TYPE_PI_ROBUST_UMUTEX; 1309 break; 1310 case UMUTEX_PRIO_PROTECT: 1311 type = TYPE_PP_UMUTEX; 1312 break; 1313 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1314 type = TYPE_PP_ROBUST_UMUTEX; 1315 break; 1316 default: 1317 return (EINVAL); 1318 } 1319 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1320 return (error); 1321 1322 owner = 0; 1323 umtxq_lock(&key); 1324 umtxq_busy(&key); 1325 count = umtxq_count(&key); 1326 umtxq_unlock(&key); 1327 /* 1328 * Only repair contention bit if there is a waiter, this means the mutex 1329 * is still being referenced by userland code, otherwise don't update 1330 * any memory. 1331 */ 1332 if (count > 1) { 1333 error = fueword32(&m->m_owner, &owner); 1334 if (error == -1) 1335 error = EFAULT; 1336 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1337 error = casueword32(&m->m_owner, owner, &old, 1338 owner | UMUTEX_CONTESTED); 1339 if (error == -1) { 1340 error = EFAULT; 1341 break; 1342 } 1343 if (old == owner) 1344 break; 1345 owner = old; 1346 error = umtxq_check_susp(td); 1347 if (error != 0) 1348 break; 1349 } 1350 } else if (count == 1) { 1351 error = fueword32(&m->m_owner, &owner); 1352 if (error == -1) 1353 error = EFAULT; 1354 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1355 (owner & UMUTEX_CONTESTED) == 0) { 1356 error = casueword32(&m->m_owner, owner, &old, 1357 owner | UMUTEX_CONTESTED); 1358 if (error == -1) { 1359 error = EFAULT; 1360 break; 1361 } 1362 if (old == owner) 1363 break; 1364 owner = old; 1365 error = umtxq_check_susp(td); 1366 if (error != 0) 1367 break; 1368 } 1369 } 1370 umtxq_lock(&key); 1371 if (error == EFAULT) { 1372 umtxq_signal(&key, INT_MAX); 1373 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1374 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1375 umtxq_signal(&key, 1); 1376 umtxq_unbusy(&key); 1377 umtxq_unlock(&key); 1378 umtx_key_release(&key); 1379 return (error); 1380 } 1381 1382 static inline struct umtx_pi * 1383 umtx_pi_alloc(int flags) 1384 { 1385 struct umtx_pi *pi; 1386 1387 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1388 TAILQ_INIT(&pi->pi_blocked); 1389 atomic_add_int(&umtx_pi_allocated, 1); 1390 return (pi); 1391 } 1392 1393 static inline void 1394 umtx_pi_free(struct umtx_pi *pi) 1395 { 1396 uma_zfree(umtx_pi_zone, pi); 1397 atomic_add_int(&umtx_pi_allocated, -1); 1398 } 1399 1400 /* 1401 * Adjust the thread's position on a pi_state after its priority has been 1402 * changed. 1403 */ 1404 static int 1405 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1406 { 1407 struct umtx_q *uq, *uq1, *uq2; 1408 struct thread *td1; 1409 1410 mtx_assert(&umtx_lock, MA_OWNED); 1411 if (pi == NULL) 1412 return (0); 1413 1414 uq = td->td_umtxq; 1415 1416 /* 1417 * Check if the thread needs to be moved on the blocked chain. 1418 * It needs to be moved if either its priority is lower than 1419 * the previous thread or higher than the next thread. 1420 */ 1421 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1422 uq2 = TAILQ_NEXT(uq, uq_lockq); 1423 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1424 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1425 /* 1426 * Remove thread from blocked chain and determine where 1427 * it should be moved to. 1428 */ 1429 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1430 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1431 td1 = uq1->uq_thread; 1432 MPASS(td1->td_proc->p_magic == P_MAGIC); 1433 if (UPRI(td1) > UPRI(td)) 1434 break; 1435 } 1436 1437 if (uq1 == NULL) 1438 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1439 else 1440 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1441 } 1442 return (1); 1443 } 1444 1445 static struct umtx_pi * 1446 umtx_pi_next(struct umtx_pi *pi) 1447 { 1448 struct umtx_q *uq_owner; 1449 1450 if (pi->pi_owner == NULL) 1451 return (NULL); 1452 uq_owner = pi->pi_owner->td_umtxq; 1453 if (uq_owner == NULL) 1454 return (NULL); 1455 return (uq_owner->uq_pi_blocked); 1456 } 1457 1458 /* 1459 * Floyd's Cycle-Finding Algorithm. 1460 */ 1461 static bool 1462 umtx_pi_check_loop(struct umtx_pi *pi) 1463 { 1464 struct umtx_pi *pi1; /* fast iterator */ 1465 1466 mtx_assert(&umtx_lock, MA_OWNED); 1467 if (pi == NULL) 1468 return (false); 1469 pi1 = pi; 1470 for (;;) { 1471 pi = umtx_pi_next(pi); 1472 if (pi == NULL) 1473 break; 1474 pi1 = umtx_pi_next(pi1); 1475 if (pi1 == NULL) 1476 break; 1477 pi1 = umtx_pi_next(pi1); 1478 if (pi1 == NULL) 1479 break; 1480 if (pi == pi1) 1481 return (true); 1482 } 1483 return (false); 1484 } 1485 1486 /* 1487 * Propagate priority when a thread is blocked on POSIX 1488 * PI mutex. 1489 */ 1490 static void 1491 umtx_propagate_priority(struct thread *td) 1492 { 1493 struct umtx_q *uq; 1494 struct umtx_pi *pi; 1495 int pri; 1496 1497 mtx_assert(&umtx_lock, MA_OWNED); 1498 pri = UPRI(td); 1499 uq = td->td_umtxq; 1500 pi = uq->uq_pi_blocked; 1501 if (pi == NULL) 1502 return; 1503 if (umtx_pi_check_loop(pi)) 1504 return; 1505 1506 for (;;) { 1507 td = pi->pi_owner; 1508 if (td == NULL || td == curthread) 1509 return; 1510 1511 MPASS(td->td_proc != NULL); 1512 MPASS(td->td_proc->p_magic == P_MAGIC); 1513 1514 thread_lock(td); 1515 if (td->td_lend_user_pri > pri) 1516 sched_lend_user_prio(td, pri); 1517 else { 1518 thread_unlock(td); 1519 break; 1520 } 1521 thread_unlock(td); 1522 1523 /* 1524 * Pick up the lock that td is blocked on. 1525 */ 1526 uq = td->td_umtxq; 1527 pi = uq->uq_pi_blocked; 1528 if (pi == NULL) 1529 break; 1530 /* Resort td on the list if needed. */ 1531 umtx_pi_adjust_thread(pi, td); 1532 } 1533 } 1534 1535 /* 1536 * Unpropagate priority for a PI mutex when a thread blocked on 1537 * it is interrupted by signal or resumed by others. 1538 */ 1539 static void 1540 umtx_repropagate_priority(struct umtx_pi *pi) 1541 { 1542 struct umtx_q *uq, *uq_owner; 1543 struct umtx_pi *pi2; 1544 int pri; 1545 1546 mtx_assert(&umtx_lock, MA_OWNED); 1547 1548 if (umtx_pi_check_loop(pi)) 1549 return; 1550 while (pi != NULL && pi->pi_owner != NULL) { 1551 pri = PRI_MAX; 1552 uq_owner = pi->pi_owner->td_umtxq; 1553 1554 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1555 uq = TAILQ_FIRST(&pi2->pi_blocked); 1556 if (uq != NULL) { 1557 if (pri > UPRI(uq->uq_thread)) 1558 pri = UPRI(uq->uq_thread); 1559 } 1560 } 1561 1562 if (pri > uq_owner->uq_inherited_pri) 1563 pri = uq_owner->uq_inherited_pri; 1564 thread_lock(pi->pi_owner); 1565 sched_lend_user_prio(pi->pi_owner, pri); 1566 thread_unlock(pi->pi_owner); 1567 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1568 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1569 } 1570 } 1571 1572 /* 1573 * Insert a PI mutex into owned list. 1574 */ 1575 static void 1576 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1577 { 1578 struct umtx_q *uq_owner; 1579 1580 uq_owner = owner->td_umtxq; 1581 mtx_assert(&umtx_lock, MA_OWNED); 1582 if (pi->pi_owner != NULL) 1583 panic("pi_owner != NULL"); 1584 pi->pi_owner = owner; 1585 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1586 } 1587 1588 1589 /* 1590 * Disown a PI mutex, and remove it from the owned list. 1591 */ 1592 static void 1593 umtx_pi_disown(struct umtx_pi *pi) 1594 { 1595 1596 mtx_assert(&umtx_lock, MA_OWNED); 1597 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1598 pi->pi_owner = NULL; 1599 } 1600 1601 /* 1602 * Claim ownership of a PI mutex. 1603 */ 1604 static int 1605 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1606 { 1607 struct umtx_q *uq; 1608 int pri; 1609 1610 mtx_lock(&umtx_lock); 1611 if (pi->pi_owner == owner) { 1612 mtx_unlock(&umtx_lock); 1613 return (0); 1614 } 1615 1616 if (pi->pi_owner != NULL) { 1617 /* 1618 * userland may have already messed the mutex, sigh. 1619 */ 1620 mtx_unlock(&umtx_lock); 1621 return (EPERM); 1622 } 1623 umtx_pi_setowner(pi, owner); 1624 uq = TAILQ_FIRST(&pi->pi_blocked); 1625 if (uq != NULL) { 1626 pri = UPRI(uq->uq_thread); 1627 thread_lock(owner); 1628 if (pri < UPRI(owner)) 1629 sched_lend_user_prio(owner, pri); 1630 thread_unlock(owner); 1631 } 1632 mtx_unlock(&umtx_lock); 1633 return (0); 1634 } 1635 1636 /* 1637 * Adjust a thread's order position in its blocked PI mutex, 1638 * this may result new priority propagating process. 1639 */ 1640 void 1641 umtx_pi_adjust(struct thread *td, u_char oldpri) 1642 { 1643 struct umtx_q *uq; 1644 struct umtx_pi *pi; 1645 1646 uq = td->td_umtxq; 1647 mtx_lock(&umtx_lock); 1648 /* 1649 * Pick up the lock that td is blocked on. 1650 */ 1651 pi = uq->uq_pi_blocked; 1652 if (pi != NULL) { 1653 umtx_pi_adjust_thread(pi, td); 1654 umtx_repropagate_priority(pi); 1655 } 1656 mtx_unlock(&umtx_lock); 1657 } 1658 1659 /* 1660 * Sleep on a PI mutex. 1661 */ 1662 static int 1663 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1664 const char *wmesg, struct abs_timeout *timo, bool shared) 1665 { 1666 struct umtxq_chain *uc; 1667 struct thread *td, *td1; 1668 struct umtx_q *uq1; 1669 int error, pri; 1670 1671 error = 0; 1672 td = uq->uq_thread; 1673 KASSERT(td == curthread, ("inconsistent uq_thread")); 1674 uc = umtxq_getchain(&uq->uq_key); 1675 UMTXQ_LOCKED_ASSERT(uc); 1676 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1677 umtxq_insert(uq); 1678 mtx_lock(&umtx_lock); 1679 if (pi->pi_owner == NULL) { 1680 mtx_unlock(&umtx_lock); 1681 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1682 mtx_lock(&umtx_lock); 1683 if (td1 != NULL) { 1684 if (pi->pi_owner == NULL) 1685 umtx_pi_setowner(pi, td1); 1686 PROC_UNLOCK(td1->td_proc); 1687 } 1688 } 1689 1690 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1691 pri = UPRI(uq1->uq_thread); 1692 if (pri > UPRI(td)) 1693 break; 1694 } 1695 1696 if (uq1 != NULL) 1697 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1698 else 1699 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1700 1701 uq->uq_pi_blocked = pi; 1702 thread_lock(td); 1703 td->td_flags |= TDF_UPIBLOCKED; 1704 thread_unlock(td); 1705 umtx_propagate_priority(td); 1706 mtx_unlock(&umtx_lock); 1707 umtxq_unbusy(&uq->uq_key); 1708 1709 error = umtxq_sleep(uq, wmesg, timo); 1710 umtxq_remove(uq); 1711 1712 mtx_lock(&umtx_lock); 1713 uq->uq_pi_blocked = NULL; 1714 thread_lock(td); 1715 td->td_flags &= ~TDF_UPIBLOCKED; 1716 thread_unlock(td); 1717 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1718 umtx_repropagate_priority(pi); 1719 mtx_unlock(&umtx_lock); 1720 umtxq_unlock(&uq->uq_key); 1721 1722 return (error); 1723 } 1724 1725 /* 1726 * Add reference count for a PI mutex. 1727 */ 1728 static void 1729 umtx_pi_ref(struct umtx_pi *pi) 1730 { 1731 struct umtxq_chain *uc; 1732 1733 uc = umtxq_getchain(&pi->pi_key); 1734 UMTXQ_LOCKED_ASSERT(uc); 1735 pi->pi_refcount++; 1736 } 1737 1738 /* 1739 * Decrease reference count for a PI mutex, if the counter 1740 * is decreased to zero, its memory space is freed. 1741 */ 1742 static void 1743 umtx_pi_unref(struct umtx_pi *pi) 1744 { 1745 struct umtxq_chain *uc; 1746 1747 uc = umtxq_getchain(&pi->pi_key); 1748 UMTXQ_LOCKED_ASSERT(uc); 1749 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1750 if (--pi->pi_refcount == 0) { 1751 mtx_lock(&umtx_lock); 1752 if (pi->pi_owner != NULL) 1753 umtx_pi_disown(pi); 1754 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1755 ("blocked queue not empty")); 1756 mtx_unlock(&umtx_lock); 1757 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1758 umtx_pi_free(pi); 1759 } 1760 } 1761 1762 /* 1763 * Find a PI mutex in hash table. 1764 */ 1765 static struct umtx_pi * 1766 umtx_pi_lookup(struct umtx_key *key) 1767 { 1768 struct umtxq_chain *uc; 1769 struct umtx_pi *pi; 1770 1771 uc = umtxq_getchain(key); 1772 UMTXQ_LOCKED_ASSERT(uc); 1773 1774 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1775 if (umtx_key_match(&pi->pi_key, key)) { 1776 return (pi); 1777 } 1778 } 1779 return (NULL); 1780 } 1781 1782 /* 1783 * Insert a PI mutex into hash table. 1784 */ 1785 static inline void 1786 umtx_pi_insert(struct umtx_pi *pi) 1787 { 1788 struct umtxq_chain *uc; 1789 1790 uc = umtxq_getchain(&pi->pi_key); 1791 UMTXQ_LOCKED_ASSERT(uc); 1792 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1793 } 1794 1795 /* 1796 * Lock a PI mutex. 1797 */ 1798 static int 1799 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1800 struct _umtx_time *timeout, int try) 1801 { 1802 struct abs_timeout timo; 1803 struct umtx_q *uq; 1804 struct umtx_pi *pi, *new_pi; 1805 uint32_t id, old_owner, owner, old; 1806 int error, rv; 1807 1808 id = td->td_tid; 1809 uq = td->td_umtxq; 1810 1811 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1812 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1813 &uq->uq_key)) != 0) 1814 return (error); 1815 1816 if (timeout != NULL) 1817 abs_timeout_init2(&timo, timeout); 1818 1819 umtxq_lock(&uq->uq_key); 1820 pi = umtx_pi_lookup(&uq->uq_key); 1821 if (pi == NULL) { 1822 new_pi = umtx_pi_alloc(M_NOWAIT); 1823 if (new_pi == NULL) { 1824 umtxq_unlock(&uq->uq_key); 1825 new_pi = umtx_pi_alloc(M_WAITOK); 1826 umtxq_lock(&uq->uq_key); 1827 pi = umtx_pi_lookup(&uq->uq_key); 1828 if (pi != NULL) { 1829 umtx_pi_free(new_pi); 1830 new_pi = NULL; 1831 } 1832 } 1833 if (new_pi != NULL) { 1834 new_pi->pi_key = uq->uq_key; 1835 umtx_pi_insert(new_pi); 1836 pi = new_pi; 1837 } 1838 } 1839 umtx_pi_ref(pi); 1840 umtxq_unlock(&uq->uq_key); 1841 1842 /* 1843 * Care must be exercised when dealing with umtx structure. It 1844 * can fault on any access. 1845 */ 1846 for (;;) { 1847 /* 1848 * Try the uncontested case. This should be done in userland. 1849 */ 1850 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1851 /* The address was invalid. */ 1852 if (rv == -1) { 1853 error = EFAULT; 1854 break; 1855 } 1856 1857 /* The acquire succeeded. */ 1858 if (owner == UMUTEX_UNOWNED) { 1859 error = 0; 1860 break; 1861 } 1862 1863 if (owner == UMUTEX_RB_NOTRECOV) { 1864 error = ENOTRECOVERABLE; 1865 break; 1866 } 1867 1868 /* If no one owns it but it is contested try to acquire it. */ 1869 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1870 old_owner = owner; 1871 rv = casueword32(&m->m_owner, owner, &owner, 1872 id | UMUTEX_CONTESTED); 1873 /* The address was invalid. */ 1874 if (rv == -1) { 1875 error = EFAULT; 1876 break; 1877 } 1878 1879 if (owner == old_owner) { 1880 umtxq_lock(&uq->uq_key); 1881 umtxq_busy(&uq->uq_key); 1882 error = umtx_pi_claim(pi, td); 1883 umtxq_unbusy(&uq->uq_key); 1884 umtxq_unlock(&uq->uq_key); 1885 if (error != 0) { 1886 /* 1887 * Since we're going to return an 1888 * error, restore the m_owner to its 1889 * previous, unowned state to avoid 1890 * compounding the problem. 1891 */ 1892 (void)casuword32(&m->m_owner, 1893 id | UMUTEX_CONTESTED, 1894 old_owner); 1895 } 1896 if (error == 0 && 1897 old_owner == UMUTEX_RB_OWNERDEAD) 1898 error = EOWNERDEAD; 1899 break; 1900 } 1901 1902 error = umtxq_check_susp(td); 1903 if (error != 0) 1904 break; 1905 1906 /* If this failed the lock has changed, restart. */ 1907 continue; 1908 } 1909 1910 if ((owner & ~UMUTEX_CONTESTED) == id) { 1911 error = EDEADLK; 1912 break; 1913 } 1914 1915 if (try != 0) { 1916 error = EBUSY; 1917 break; 1918 } 1919 1920 /* 1921 * If we caught a signal, we have retried and now 1922 * exit immediately. 1923 */ 1924 if (error != 0) 1925 break; 1926 1927 umtxq_lock(&uq->uq_key); 1928 umtxq_busy(&uq->uq_key); 1929 umtxq_unlock(&uq->uq_key); 1930 1931 /* 1932 * Set the contested bit so that a release in user space 1933 * knows to use the system call for unlock. If this fails 1934 * either some one else has acquired the lock or it has been 1935 * released. 1936 */ 1937 rv = casueword32(&m->m_owner, owner, &old, owner | 1938 UMUTEX_CONTESTED); 1939 1940 /* The address was invalid. */ 1941 if (rv == -1) { 1942 umtxq_unbusy_unlocked(&uq->uq_key); 1943 error = EFAULT; 1944 break; 1945 } 1946 1947 umtxq_lock(&uq->uq_key); 1948 /* 1949 * We set the contested bit, sleep. Otherwise the lock changed 1950 * and we need to retry or we lost a race to the thread 1951 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1952 * value for owner is impossible there. 1953 */ 1954 if (old == owner) { 1955 error = umtxq_sleep_pi(uq, pi, 1956 owner & ~UMUTEX_CONTESTED, 1957 "umtxpi", timeout == NULL ? NULL : &timo, 1958 (flags & USYNC_PROCESS_SHARED) != 0); 1959 if (error != 0) 1960 continue; 1961 } else { 1962 umtxq_unbusy(&uq->uq_key); 1963 umtxq_unlock(&uq->uq_key); 1964 } 1965 1966 error = umtxq_check_susp(td); 1967 if (error != 0) 1968 break; 1969 } 1970 1971 umtxq_lock(&uq->uq_key); 1972 umtx_pi_unref(pi); 1973 umtxq_unlock(&uq->uq_key); 1974 1975 umtx_key_release(&uq->uq_key); 1976 return (error); 1977 } 1978 1979 /* 1980 * Unlock a PI mutex. 1981 */ 1982 static int 1983 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1984 { 1985 struct umtx_key key; 1986 struct umtx_q *uq_first, *uq_first2, *uq_me; 1987 struct umtx_pi *pi, *pi2; 1988 uint32_t id, new_owner, old, owner; 1989 int count, error, pri; 1990 1991 id = td->td_tid; 1992 /* 1993 * Make sure we own this mtx. 1994 */ 1995 error = fueword32(&m->m_owner, &owner); 1996 if (error == -1) 1997 return (EFAULT); 1998 1999 if ((owner & ~UMUTEX_CONTESTED) != id) 2000 return (EPERM); 2001 2002 new_owner = umtx_unlock_val(flags, rb); 2003 2004 /* This should be done in userland */ 2005 if ((owner & UMUTEX_CONTESTED) == 0) { 2006 error = casueword32(&m->m_owner, owner, &old, new_owner); 2007 if (error == -1) 2008 return (EFAULT); 2009 if (old == owner) 2010 return (0); 2011 owner = old; 2012 } 2013 2014 /* We should only ever be in here for contested locks */ 2015 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2016 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2017 &key)) != 0) 2018 return (error); 2019 2020 umtxq_lock(&key); 2021 umtxq_busy(&key); 2022 count = umtxq_count_pi(&key, &uq_first); 2023 if (uq_first != NULL) { 2024 mtx_lock(&umtx_lock); 2025 pi = uq_first->uq_pi_blocked; 2026 KASSERT(pi != NULL, ("pi == NULL?")); 2027 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2028 mtx_unlock(&umtx_lock); 2029 umtxq_unbusy(&key); 2030 umtxq_unlock(&key); 2031 umtx_key_release(&key); 2032 /* userland messed the mutex */ 2033 return (EPERM); 2034 } 2035 uq_me = td->td_umtxq; 2036 if (pi->pi_owner == td) 2037 umtx_pi_disown(pi); 2038 /* get highest priority thread which is still sleeping. */ 2039 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2040 while (uq_first != NULL && 2041 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2042 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2043 } 2044 pri = PRI_MAX; 2045 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2046 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2047 if (uq_first2 != NULL) { 2048 if (pri > UPRI(uq_first2->uq_thread)) 2049 pri = UPRI(uq_first2->uq_thread); 2050 } 2051 } 2052 thread_lock(td); 2053 sched_lend_user_prio(td, pri); 2054 thread_unlock(td); 2055 mtx_unlock(&umtx_lock); 2056 if (uq_first) 2057 umtxq_signal_thread(uq_first); 2058 } else { 2059 pi = umtx_pi_lookup(&key); 2060 /* 2061 * A umtx_pi can exist if a signal or timeout removed the 2062 * last waiter from the umtxq, but there is still 2063 * a thread in do_lock_pi() holding the umtx_pi. 2064 */ 2065 if (pi != NULL) { 2066 /* 2067 * The umtx_pi can be unowned, such as when a thread 2068 * has just entered do_lock_pi(), allocated the 2069 * umtx_pi, and unlocked the umtxq. 2070 * If the current thread owns it, it must disown it. 2071 */ 2072 mtx_lock(&umtx_lock); 2073 if (pi->pi_owner == td) 2074 umtx_pi_disown(pi); 2075 mtx_unlock(&umtx_lock); 2076 } 2077 } 2078 umtxq_unlock(&key); 2079 2080 /* 2081 * When unlocking the umtx, it must be marked as unowned if 2082 * there is zero or one thread only waiting for it. 2083 * Otherwise, it must be marked as contested. 2084 */ 2085 2086 if (count > 1) 2087 new_owner |= UMUTEX_CONTESTED; 2088 error = casueword32(&m->m_owner, owner, &old, new_owner); 2089 2090 umtxq_unbusy_unlocked(&key); 2091 umtx_key_release(&key); 2092 if (error == -1) 2093 return (EFAULT); 2094 if (old != owner) 2095 return (EINVAL); 2096 return (0); 2097 } 2098 2099 /* 2100 * Lock a PP mutex. 2101 */ 2102 static int 2103 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2104 struct _umtx_time *timeout, int try) 2105 { 2106 struct abs_timeout timo; 2107 struct umtx_q *uq, *uq2; 2108 struct umtx_pi *pi; 2109 uint32_t ceiling; 2110 uint32_t owner, id; 2111 int error, pri, old_inherited_pri, su, rv; 2112 2113 id = td->td_tid; 2114 uq = td->td_umtxq; 2115 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2116 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2117 &uq->uq_key)) != 0) 2118 return (error); 2119 2120 if (timeout != NULL) 2121 abs_timeout_init2(&timo, timeout); 2122 2123 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2124 for (;;) { 2125 old_inherited_pri = uq->uq_inherited_pri; 2126 umtxq_lock(&uq->uq_key); 2127 umtxq_busy(&uq->uq_key); 2128 umtxq_unlock(&uq->uq_key); 2129 2130 rv = fueword32(&m->m_ceilings[0], &ceiling); 2131 if (rv == -1) { 2132 error = EFAULT; 2133 goto out; 2134 } 2135 ceiling = RTP_PRIO_MAX - ceiling; 2136 if (ceiling > RTP_PRIO_MAX) { 2137 error = EINVAL; 2138 goto out; 2139 } 2140 2141 mtx_lock(&umtx_lock); 2142 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2143 mtx_unlock(&umtx_lock); 2144 error = EINVAL; 2145 goto out; 2146 } 2147 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2148 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2149 thread_lock(td); 2150 if (uq->uq_inherited_pri < UPRI(td)) 2151 sched_lend_user_prio(td, uq->uq_inherited_pri); 2152 thread_unlock(td); 2153 } 2154 mtx_unlock(&umtx_lock); 2155 2156 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2157 id | UMUTEX_CONTESTED); 2158 /* The address was invalid. */ 2159 if (rv == -1) { 2160 error = EFAULT; 2161 break; 2162 } 2163 2164 if (owner == UMUTEX_CONTESTED) { 2165 error = 0; 2166 break; 2167 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2168 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2169 &owner, id | UMUTEX_CONTESTED); 2170 if (rv == -1) { 2171 error = EFAULT; 2172 break; 2173 } 2174 if (owner == UMUTEX_RB_OWNERDEAD) { 2175 error = EOWNERDEAD; /* success */ 2176 break; 2177 } 2178 error = 0; 2179 } else if (owner == UMUTEX_RB_NOTRECOV) { 2180 error = ENOTRECOVERABLE; 2181 break; 2182 } 2183 2184 if (try != 0) { 2185 error = EBUSY; 2186 break; 2187 } 2188 2189 /* 2190 * If we caught a signal, we have retried and now 2191 * exit immediately. 2192 */ 2193 if (error != 0) 2194 break; 2195 2196 umtxq_lock(&uq->uq_key); 2197 umtxq_insert(uq); 2198 umtxq_unbusy(&uq->uq_key); 2199 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2200 NULL : &timo); 2201 umtxq_remove(uq); 2202 umtxq_unlock(&uq->uq_key); 2203 2204 mtx_lock(&umtx_lock); 2205 uq->uq_inherited_pri = old_inherited_pri; 2206 pri = PRI_MAX; 2207 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2208 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2209 if (uq2 != NULL) { 2210 if (pri > UPRI(uq2->uq_thread)) 2211 pri = UPRI(uq2->uq_thread); 2212 } 2213 } 2214 if (pri > uq->uq_inherited_pri) 2215 pri = uq->uq_inherited_pri; 2216 thread_lock(td); 2217 sched_lend_user_prio(td, pri); 2218 thread_unlock(td); 2219 mtx_unlock(&umtx_lock); 2220 } 2221 2222 if (error != 0 && error != EOWNERDEAD) { 2223 mtx_lock(&umtx_lock); 2224 uq->uq_inherited_pri = old_inherited_pri; 2225 pri = PRI_MAX; 2226 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2227 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2228 if (uq2 != NULL) { 2229 if (pri > UPRI(uq2->uq_thread)) 2230 pri = UPRI(uq2->uq_thread); 2231 } 2232 } 2233 if (pri > uq->uq_inherited_pri) 2234 pri = uq->uq_inherited_pri; 2235 thread_lock(td); 2236 sched_lend_user_prio(td, pri); 2237 thread_unlock(td); 2238 mtx_unlock(&umtx_lock); 2239 } 2240 2241 out: 2242 umtxq_unbusy_unlocked(&uq->uq_key); 2243 umtx_key_release(&uq->uq_key); 2244 return (error); 2245 } 2246 2247 /* 2248 * Unlock a PP mutex. 2249 */ 2250 static int 2251 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2252 { 2253 struct umtx_key key; 2254 struct umtx_q *uq, *uq2; 2255 struct umtx_pi *pi; 2256 uint32_t id, owner, rceiling; 2257 int error, pri, new_inherited_pri, su; 2258 2259 id = td->td_tid; 2260 uq = td->td_umtxq; 2261 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2262 2263 /* 2264 * Make sure we own this mtx. 2265 */ 2266 error = fueword32(&m->m_owner, &owner); 2267 if (error == -1) 2268 return (EFAULT); 2269 2270 if ((owner & ~UMUTEX_CONTESTED) != id) 2271 return (EPERM); 2272 2273 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2274 if (error != 0) 2275 return (error); 2276 2277 if (rceiling == -1) 2278 new_inherited_pri = PRI_MAX; 2279 else { 2280 rceiling = RTP_PRIO_MAX - rceiling; 2281 if (rceiling > RTP_PRIO_MAX) 2282 return (EINVAL); 2283 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2284 } 2285 2286 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2287 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2288 &key)) != 0) 2289 return (error); 2290 umtxq_lock(&key); 2291 umtxq_busy(&key); 2292 umtxq_unlock(&key); 2293 /* 2294 * For priority protected mutex, always set unlocked state 2295 * to UMUTEX_CONTESTED, so that userland always enters kernel 2296 * to lock the mutex, it is necessary because thread priority 2297 * has to be adjusted for such mutex. 2298 */ 2299 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2300 UMUTEX_CONTESTED); 2301 2302 umtxq_lock(&key); 2303 if (error == 0) 2304 umtxq_signal(&key, 1); 2305 umtxq_unbusy(&key); 2306 umtxq_unlock(&key); 2307 2308 if (error == -1) 2309 error = EFAULT; 2310 else { 2311 mtx_lock(&umtx_lock); 2312 if (su != 0) 2313 uq->uq_inherited_pri = new_inherited_pri; 2314 pri = PRI_MAX; 2315 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2316 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2317 if (uq2 != NULL) { 2318 if (pri > UPRI(uq2->uq_thread)) 2319 pri = UPRI(uq2->uq_thread); 2320 } 2321 } 2322 if (pri > uq->uq_inherited_pri) 2323 pri = uq->uq_inherited_pri; 2324 thread_lock(td); 2325 sched_lend_user_prio(td, pri); 2326 thread_unlock(td); 2327 mtx_unlock(&umtx_lock); 2328 } 2329 umtx_key_release(&key); 2330 return (error); 2331 } 2332 2333 static int 2334 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2335 uint32_t *old_ceiling) 2336 { 2337 struct umtx_q *uq; 2338 uint32_t flags, id, owner, save_ceiling; 2339 int error, rv, rv1; 2340 2341 error = fueword32(&m->m_flags, &flags); 2342 if (error == -1) 2343 return (EFAULT); 2344 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2345 return (EINVAL); 2346 if (ceiling > RTP_PRIO_MAX) 2347 return (EINVAL); 2348 id = td->td_tid; 2349 uq = td->td_umtxq; 2350 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2351 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2352 &uq->uq_key)) != 0) 2353 return (error); 2354 for (;;) { 2355 umtxq_lock(&uq->uq_key); 2356 umtxq_busy(&uq->uq_key); 2357 umtxq_unlock(&uq->uq_key); 2358 2359 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2360 if (rv == -1) { 2361 error = EFAULT; 2362 break; 2363 } 2364 2365 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2366 id | UMUTEX_CONTESTED); 2367 if (rv == -1) { 2368 error = EFAULT; 2369 break; 2370 } 2371 2372 if (owner == UMUTEX_CONTESTED) { 2373 rv = suword32(&m->m_ceilings[0], ceiling); 2374 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2375 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2376 break; 2377 } 2378 2379 if ((owner & ~UMUTEX_CONTESTED) == id) { 2380 rv = suword32(&m->m_ceilings[0], ceiling); 2381 error = rv == 0 ? 0 : EFAULT; 2382 break; 2383 } 2384 2385 if (owner == UMUTEX_RB_OWNERDEAD) { 2386 error = EOWNERDEAD; 2387 break; 2388 } else if (owner == UMUTEX_RB_NOTRECOV) { 2389 error = ENOTRECOVERABLE; 2390 break; 2391 } 2392 2393 /* 2394 * If we caught a signal, we have retried and now 2395 * exit immediately. 2396 */ 2397 if (error != 0) 2398 break; 2399 2400 /* 2401 * We set the contested bit, sleep. Otherwise the lock changed 2402 * and we need to retry or we lost a race to the thread 2403 * unlocking the umtx. 2404 */ 2405 umtxq_lock(&uq->uq_key); 2406 umtxq_insert(uq); 2407 umtxq_unbusy(&uq->uq_key); 2408 error = umtxq_sleep(uq, "umtxpp", NULL); 2409 umtxq_remove(uq); 2410 umtxq_unlock(&uq->uq_key); 2411 } 2412 umtxq_lock(&uq->uq_key); 2413 if (error == 0) 2414 umtxq_signal(&uq->uq_key, INT_MAX); 2415 umtxq_unbusy(&uq->uq_key); 2416 umtxq_unlock(&uq->uq_key); 2417 umtx_key_release(&uq->uq_key); 2418 if (error == 0 && old_ceiling != NULL) { 2419 rv = suword32(old_ceiling, save_ceiling); 2420 error = rv == 0 ? 0 : EFAULT; 2421 } 2422 return (error); 2423 } 2424 2425 /* 2426 * Lock a userland POSIX mutex. 2427 */ 2428 static int 2429 do_lock_umutex(struct thread *td, struct umutex *m, 2430 struct _umtx_time *timeout, int mode) 2431 { 2432 uint32_t flags; 2433 int error; 2434 2435 error = fueword32(&m->m_flags, &flags); 2436 if (error == -1) 2437 return (EFAULT); 2438 2439 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2440 case 0: 2441 error = do_lock_normal(td, m, flags, timeout, mode); 2442 break; 2443 case UMUTEX_PRIO_INHERIT: 2444 error = do_lock_pi(td, m, flags, timeout, mode); 2445 break; 2446 case UMUTEX_PRIO_PROTECT: 2447 error = do_lock_pp(td, m, flags, timeout, mode); 2448 break; 2449 default: 2450 return (EINVAL); 2451 } 2452 if (timeout == NULL) { 2453 if (error == EINTR && mode != _UMUTEX_WAIT) 2454 error = ERESTART; 2455 } else { 2456 /* Timed-locking is not restarted. */ 2457 if (error == ERESTART) 2458 error = EINTR; 2459 } 2460 return (error); 2461 } 2462 2463 /* 2464 * Unlock a userland POSIX mutex. 2465 */ 2466 static int 2467 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2468 { 2469 uint32_t flags; 2470 int error; 2471 2472 error = fueword32(&m->m_flags, &flags); 2473 if (error == -1) 2474 return (EFAULT); 2475 2476 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2477 case 0: 2478 return (do_unlock_normal(td, m, flags, rb)); 2479 case UMUTEX_PRIO_INHERIT: 2480 return (do_unlock_pi(td, m, flags, rb)); 2481 case UMUTEX_PRIO_PROTECT: 2482 return (do_unlock_pp(td, m, flags, rb)); 2483 } 2484 2485 return (EINVAL); 2486 } 2487 2488 static int 2489 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2490 struct timespec *timeout, u_long wflags) 2491 { 2492 struct abs_timeout timo; 2493 struct umtx_q *uq; 2494 uint32_t flags, clockid, hasw; 2495 int error; 2496 2497 uq = td->td_umtxq; 2498 error = fueword32(&cv->c_flags, &flags); 2499 if (error == -1) 2500 return (EFAULT); 2501 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2502 if (error != 0) 2503 return (error); 2504 2505 if ((wflags & CVWAIT_CLOCKID) != 0) { 2506 error = fueword32(&cv->c_clockid, &clockid); 2507 if (error == -1) { 2508 umtx_key_release(&uq->uq_key); 2509 return (EFAULT); 2510 } 2511 if (clockid < CLOCK_REALTIME || 2512 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2513 /* hmm, only HW clock id will work. */ 2514 umtx_key_release(&uq->uq_key); 2515 return (EINVAL); 2516 } 2517 } else { 2518 clockid = CLOCK_REALTIME; 2519 } 2520 2521 umtxq_lock(&uq->uq_key); 2522 umtxq_busy(&uq->uq_key); 2523 umtxq_insert(uq); 2524 umtxq_unlock(&uq->uq_key); 2525 2526 /* 2527 * Set c_has_waiters to 1 before releasing user mutex, also 2528 * don't modify cache line when unnecessary. 2529 */ 2530 error = fueword32(&cv->c_has_waiters, &hasw); 2531 if (error == 0 && hasw == 0) 2532 suword32(&cv->c_has_waiters, 1); 2533 2534 umtxq_unbusy_unlocked(&uq->uq_key); 2535 2536 error = do_unlock_umutex(td, m, false); 2537 2538 if (timeout != NULL) 2539 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2540 timeout); 2541 2542 umtxq_lock(&uq->uq_key); 2543 if (error == 0) { 2544 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2545 NULL : &timo); 2546 } 2547 2548 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2549 error = 0; 2550 else { 2551 /* 2552 * This must be timeout,interrupted by signal or 2553 * surprious wakeup, clear c_has_waiter flag when 2554 * necessary. 2555 */ 2556 umtxq_busy(&uq->uq_key); 2557 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2558 int oldlen = uq->uq_cur_queue->length; 2559 umtxq_remove(uq); 2560 if (oldlen == 1) { 2561 umtxq_unlock(&uq->uq_key); 2562 suword32(&cv->c_has_waiters, 0); 2563 umtxq_lock(&uq->uq_key); 2564 } 2565 } 2566 umtxq_unbusy(&uq->uq_key); 2567 if (error == ERESTART) 2568 error = EINTR; 2569 } 2570 2571 umtxq_unlock(&uq->uq_key); 2572 umtx_key_release(&uq->uq_key); 2573 return (error); 2574 } 2575 2576 /* 2577 * Signal a userland condition variable. 2578 */ 2579 static int 2580 do_cv_signal(struct thread *td, struct ucond *cv) 2581 { 2582 struct umtx_key key; 2583 int error, cnt, nwake; 2584 uint32_t flags; 2585 2586 error = fueword32(&cv->c_flags, &flags); 2587 if (error == -1) 2588 return (EFAULT); 2589 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2590 return (error); 2591 umtxq_lock(&key); 2592 umtxq_busy(&key); 2593 cnt = umtxq_count(&key); 2594 nwake = umtxq_signal(&key, 1); 2595 if (cnt <= nwake) { 2596 umtxq_unlock(&key); 2597 error = suword32(&cv->c_has_waiters, 0); 2598 if (error == -1) 2599 error = EFAULT; 2600 umtxq_lock(&key); 2601 } 2602 umtxq_unbusy(&key); 2603 umtxq_unlock(&key); 2604 umtx_key_release(&key); 2605 return (error); 2606 } 2607 2608 static int 2609 do_cv_broadcast(struct thread *td, struct ucond *cv) 2610 { 2611 struct umtx_key key; 2612 int error; 2613 uint32_t flags; 2614 2615 error = fueword32(&cv->c_flags, &flags); 2616 if (error == -1) 2617 return (EFAULT); 2618 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2619 return (error); 2620 2621 umtxq_lock(&key); 2622 umtxq_busy(&key); 2623 umtxq_signal(&key, INT_MAX); 2624 umtxq_unlock(&key); 2625 2626 error = suword32(&cv->c_has_waiters, 0); 2627 if (error == -1) 2628 error = EFAULT; 2629 2630 umtxq_unbusy_unlocked(&key); 2631 2632 umtx_key_release(&key); 2633 return (error); 2634 } 2635 2636 static int 2637 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2638 { 2639 struct abs_timeout timo; 2640 struct umtx_q *uq; 2641 uint32_t flags, wrflags; 2642 int32_t state, oldstate; 2643 int32_t blocked_readers; 2644 int error, error1, rv; 2645 2646 uq = td->td_umtxq; 2647 error = fueword32(&rwlock->rw_flags, &flags); 2648 if (error == -1) 2649 return (EFAULT); 2650 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2651 if (error != 0) 2652 return (error); 2653 2654 if (timeout != NULL) 2655 abs_timeout_init2(&timo, timeout); 2656 2657 wrflags = URWLOCK_WRITE_OWNER; 2658 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2659 wrflags |= URWLOCK_WRITE_WAITERS; 2660 2661 for (;;) { 2662 rv = fueword32(&rwlock->rw_state, &state); 2663 if (rv == -1) { 2664 umtx_key_release(&uq->uq_key); 2665 return (EFAULT); 2666 } 2667 2668 /* try to lock it */ 2669 while (!(state & wrflags)) { 2670 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2671 umtx_key_release(&uq->uq_key); 2672 return (EAGAIN); 2673 } 2674 rv = casueword32(&rwlock->rw_state, state, 2675 &oldstate, state + 1); 2676 if (rv == -1) { 2677 umtx_key_release(&uq->uq_key); 2678 return (EFAULT); 2679 } 2680 if (oldstate == state) { 2681 umtx_key_release(&uq->uq_key); 2682 return (0); 2683 } 2684 error = umtxq_check_susp(td); 2685 if (error != 0) 2686 break; 2687 state = oldstate; 2688 } 2689 2690 if (error) 2691 break; 2692 2693 /* grab monitor lock */ 2694 umtxq_lock(&uq->uq_key); 2695 umtxq_busy(&uq->uq_key); 2696 umtxq_unlock(&uq->uq_key); 2697 2698 /* 2699 * re-read the state, in case it changed between the try-lock above 2700 * and the check below 2701 */ 2702 rv = fueword32(&rwlock->rw_state, &state); 2703 if (rv == -1) 2704 error = EFAULT; 2705 2706 /* set read contention bit */ 2707 while (error == 0 && (state & wrflags) && 2708 !(state & URWLOCK_READ_WAITERS)) { 2709 rv = casueword32(&rwlock->rw_state, state, 2710 &oldstate, state | URWLOCK_READ_WAITERS); 2711 if (rv == -1) { 2712 error = EFAULT; 2713 break; 2714 } 2715 if (oldstate == state) 2716 goto sleep; 2717 state = oldstate; 2718 error = umtxq_check_susp(td); 2719 if (error != 0) 2720 break; 2721 } 2722 if (error != 0) { 2723 umtxq_unbusy_unlocked(&uq->uq_key); 2724 break; 2725 } 2726 2727 /* state is changed while setting flags, restart */ 2728 if (!(state & wrflags)) { 2729 umtxq_unbusy_unlocked(&uq->uq_key); 2730 error = umtxq_check_susp(td); 2731 if (error != 0) 2732 break; 2733 continue; 2734 } 2735 2736 sleep: 2737 /* contention bit is set, before sleeping, increase read waiter count */ 2738 rv = fueword32(&rwlock->rw_blocked_readers, 2739 &blocked_readers); 2740 if (rv == -1) { 2741 umtxq_unbusy_unlocked(&uq->uq_key); 2742 error = EFAULT; 2743 break; 2744 } 2745 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2746 2747 while (state & wrflags) { 2748 umtxq_lock(&uq->uq_key); 2749 umtxq_insert(uq); 2750 umtxq_unbusy(&uq->uq_key); 2751 2752 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2753 NULL : &timo); 2754 2755 umtxq_busy(&uq->uq_key); 2756 umtxq_remove(uq); 2757 umtxq_unlock(&uq->uq_key); 2758 if (error) 2759 break; 2760 rv = fueword32(&rwlock->rw_state, &state); 2761 if (rv == -1) { 2762 error = EFAULT; 2763 break; 2764 } 2765 } 2766 2767 /* decrease read waiter count, and may clear read contention bit */ 2768 rv = fueword32(&rwlock->rw_blocked_readers, 2769 &blocked_readers); 2770 if (rv == -1) { 2771 umtxq_unbusy_unlocked(&uq->uq_key); 2772 error = EFAULT; 2773 break; 2774 } 2775 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2776 if (blocked_readers == 1) { 2777 rv = fueword32(&rwlock->rw_state, &state); 2778 if (rv == -1) { 2779 umtxq_unbusy_unlocked(&uq->uq_key); 2780 error = EFAULT; 2781 break; 2782 } 2783 for (;;) { 2784 rv = casueword32(&rwlock->rw_state, state, 2785 &oldstate, state & ~URWLOCK_READ_WAITERS); 2786 if (rv == -1) { 2787 error = EFAULT; 2788 break; 2789 } 2790 if (oldstate == state) 2791 break; 2792 state = oldstate; 2793 error1 = umtxq_check_susp(td); 2794 if (error1 != 0) { 2795 if (error == 0) 2796 error = error1; 2797 break; 2798 } 2799 } 2800 } 2801 2802 umtxq_unbusy_unlocked(&uq->uq_key); 2803 if (error != 0) 2804 break; 2805 } 2806 umtx_key_release(&uq->uq_key); 2807 if (error == ERESTART) 2808 error = EINTR; 2809 return (error); 2810 } 2811 2812 static int 2813 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2814 { 2815 struct abs_timeout timo; 2816 struct umtx_q *uq; 2817 uint32_t flags; 2818 int32_t state, oldstate; 2819 int32_t blocked_writers; 2820 int32_t blocked_readers; 2821 int error, error1, rv; 2822 2823 uq = td->td_umtxq; 2824 error = fueword32(&rwlock->rw_flags, &flags); 2825 if (error == -1) 2826 return (EFAULT); 2827 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2828 if (error != 0) 2829 return (error); 2830 2831 if (timeout != NULL) 2832 abs_timeout_init2(&timo, timeout); 2833 2834 blocked_readers = 0; 2835 for (;;) { 2836 rv = fueword32(&rwlock->rw_state, &state); 2837 if (rv == -1) { 2838 umtx_key_release(&uq->uq_key); 2839 return (EFAULT); 2840 } 2841 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2842 rv = casueword32(&rwlock->rw_state, state, 2843 &oldstate, state | URWLOCK_WRITE_OWNER); 2844 if (rv == -1) { 2845 umtx_key_release(&uq->uq_key); 2846 return (EFAULT); 2847 } 2848 if (oldstate == state) { 2849 umtx_key_release(&uq->uq_key); 2850 return (0); 2851 } 2852 state = oldstate; 2853 error = umtxq_check_susp(td); 2854 if (error != 0) 2855 break; 2856 } 2857 2858 if (error) { 2859 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2860 blocked_readers != 0) { 2861 umtxq_lock(&uq->uq_key); 2862 umtxq_busy(&uq->uq_key); 2863 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2864 umtxq_unbusy(&uq->uq_key); 2865 umtxq_unlock(&uq->uq_key); 2866 } 2867 2868 break; 2869 } 2870 2871 /* grab monitor lock */ 2872 umtxq_lock(&uq->uq_key); 2873 umtxq_busy(&uq->uq_key); 2874 umtxq_unlock(&uq->uq_key); 2875 2876 /* 2877 * re-read the state, in case it changed between the try-lock above 2878 * and the check below 2879 */ 2880 rv = fueword32(&rwlock->rw_state, &state); 2881 if (rv == -1) 2882 error = EFAULT; 2883 2884 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2885 URWLOCK_READER_COUNT(state) != 0) && 2886 (state & URWLOCK_WRITE_WAITERS) == 0) { 2887 rv = casueword32(&rwlock->rw_state, state, 2888 &oldstate, state | URWLOCK_WRITE_WAITERS); 2889 if (rv == -1) { 2890 error = EFAULT; 2891 break; 2892 } 2893 if (oldstate == state) 2894 goto sleep; 2895 state = oldstate; 2896 error = umtxq_check_susp(td); 2897 if (error != 0) 2898 break; 2899 } 2900 if (error != 0) { 2901 umtxq_unbusy_unlocked(&uq->uq_key); 2902 break; 2903 } 2904 2905 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2906 umtxq_unbusy_unlocked(&uq->uq_key); 2907 error = umtxq_check_susp(td); 2908 if (error != 0) 2909 break; 2910 continue; 2911 } 2912 sleep: 2913 rv = fueword32(&rwlock->rw_blocked_writers, 2914 &blocked_writers); 2915 if (rv == -1) { 2916 umtxq_unbusy_unlocked(&uq->uq_key); 2917 error = EFAULT; 2918 break; 2919 } 2920 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2921 2922 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2923 umtxq_lock(&uq->uq_key); 2924 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2925 umtxq_unbusy(&uq->uq_key); 2926 2927 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2928 NULL : &timo); 2929 2930 umtxq_busy(&uq->uq_key); 2931 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2932 umtxq_unlock(&uq->uq_key); 2933 if (error) 2934 break; 2935 rv = fueword32(&rwlock->rw_state, &state); 2936 if (rv == -1) { 2937 error = EFAULT; 2938 break; 2939 } 2940 } 2941 2942 rv = fueword32(&rwlock->rw_blocked_writers, 2943 &blocked_writers); 2944 if (rv == -1) { 2945 umtxq_unbusy_unlocked(&uq->uq_key); 2946 error = EFAULT; 2947 break; 2948 } 2949 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2950 if (blocked_writers == 1) { 2951 rv = fueword32(&rwlock->rw_state, &state); 2952 if (rv == -1) { 2953 umtxq_unbusy_unlocked(&uq->uq_key); 2954 error = EFAULT; 2955 break; 2956 } 2957 for (;;) { 2958 rv = casueword32(&rwlock->rw_state, state, 2959 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2960 if (rv == -1) { 2961 error = EFAULT; 2962 break; 2963 } 2964 if (oldstate == state) 2965 break; 2966 state = oldstate; 2967 error1 = umtxq_check_susp(td); 2968 /* 2969 * We are leaving the URWLOCK_WRITE_WAITERS 2970 * behind, but this should not harm the 2971 * correctness. 2972 */ 2973 if (error1 != 0) { 2974 if (error == 0) 2975 error = error1; 2976 break; 2977 } 2978 } 2979 rv = fueword32(&rwlock->rw_blocked_readers, 2980 &blocked_readers); 2981 if (rv == -1) { 2982 umtxq_unbusy_unlocked(&uq->uq_key); 2983 error = EFAULT; 2984 break; 2985 } 2986 } else 2987 blocked_readers = 0; 2988 2989 umtxq_unbusy_unlocked(&uq->uq_key); 2990 } 2991 2992 umtx_key_release(&uq->uq_key); 2993 if (error == ERESTART) 2994 error = EINTR; 2995 return (error); 2996 } 2997 2998 static int 2999 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 3000 { 3001 struct umtx_q *uq; 3002 uint32_t flags; 3003 int32_t state, oldstate; 3004 int error, rv, q, count; 3005 3006 uq = td->td_umtxq; 3007 error = fueword32(&rwlock->rw_flags, &flags); 3008 if (error == -1) 3009 return (EFAULT); 3010 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3011 if (error != 0) 3012 return (error); 3013 3014 error = fueword32(&rwlock->rw_state, &state); 3015 if (error == -1) { 3016 error = EFAULT; 3017 goto out; 3018 } 3019 if (state & URWLOCK_WRITE_OWNER) { 3020 for (;;) { 3021 rv = casueword32(&rwlock->rw_state, state, 3022 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3023 if (rv == -1) { 3024 error = EFAULT; 3025 goto out; 3026 } 3027 if (oldstate != state) { 3028 state = oldstate; 3029 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3030 error = EPERM; 3031 goto out; 3032 } 3033 error = umtxq_check_susp(td); 3034 if (error != 0) 3035 goto out; 3036 } else 3037 break; 3038 } 3039 } else if (URWLOCK_READER_COUNT(state) != 0) { 3040 for (;;) { 3041 rv = casueword32(&rwlock->rw_state, state, 3042 &oldstate, state - 1); 3043 if (rv == -1) { 3044 error = EFAULT; 3045 goto out; 3046 } 3047 if (oldstate != state) { 3048 state = oldstate; 3049 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3050 error = EPERM; 3051 goto out; 3052 } 3053 error = umtxq_check_susp(td); 3054 if (error != 0) 3055 goto out; 3056 } else 3057 break; 3058 } 3059 } else { 3060 error = EPERM; 3061 goto out; 3062 } 3063 3064 count = 0; 3065 3066 if (!(flags & URWLOCK_PREFER_READER)) { 3067 if (state & URWLOCK_WRITE_WAITERS) { 3068 count = 1; 3069 q = UMTX_EXCLUSIVE_QUEUE; 3070 } else if (state & URWLOCK_READ_WAITERS) { 3071 count = INT_MAX; 3072 q = UMTX_SHARED_QUEUE; 3073 } 3074 } else { 3075 if (state & URWLOCK_READ_WAITERS) { 3076 count = INT_MAX; 3077 q = UMTX_SHARED_QUEUE; 3078 } else if (state & URWLOCK_WRITE_WAITERS) { 3079 count = 1; 3080 q = UMTX_EXCLUSIVE_QUEUE; 3081 } 3082 } 3083 3084 if (count) { 3085 umtxq_lock(&uq->uq_key); 3086 umtxq_busy(&uq->uq_key); 3087 umtxq_signal_queue(&uq->uq_key, count, q); 3088 umtxq_unbusy(&uq->uq_key); 3089 umtxq_unlock(&uq->uq_key); 3090 } 3091 out: 3092 umtx_key_release(&uq->uq_key); 3093 return (error); 3094 } 3095 3096 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3097 static int 3098 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3099 { 3100 struct abs_timeout timo; 3101 struct umtx_q *uq; 3102 uint32_t flags, count, count1; 3103 int error, rv; 3104 3105 uq = td->td_umtxq; 3106 error = fueword32(&sem->_flags, &flags); 3107 if (error == -1) 3108 return (EFAULT); 3109 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3110 if (error != 0) 3111 return (error); 3112 3113 if (timeout != NULL) 3114 abs_timeout_init2(&timo, timeout); 3115 3116 umtxq_lock(&uq->uq_key); 3117 umtxq_busy(&uq->uq_key); 3118 umtxq_insert(uq); 3119 umtxq_unlock(&uq->uq_key); 3120 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3121 if (rv == 0) 3122 rv = fueword32(&sem->_count, &count); 3123 if (rv == -1 || count != 0) { 3124 umtxq_lock(&uq->uq_key); 3125 umtxq_unbusy(&uq->uq_key); 3126 umtxq_remove(uq); 3127 umtxq_unlock(&uq->uq_key); 3128 umtx_key_release(&uq->uq_key); 3129 return (rv == -1 ? EFAULT : 0); 3130 } 3131 umtxq_lock(&uq->uq_key); 3132 umtxq_unbusy(&uq->uq_key); 3133 3134 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3135 3136 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3137 error = 0; 3138 else { 3139 umtxq_remove(uq); 3140 /* A relative timeout cannot be restarted. */ 3141 if (error == ERESTART && timeout != NULL && 3142 (timeout->_flags & UMTX_ABSTIME) == 0) 3143 error = EINTR; 3144 } 3145 umtxq_unlock(&uq->uq_key); 3146 umtx_key_release(&uq->uq_key); 3147 return (error); 3148 } 3149 3150 /* 3151 * Signal a userland semaphore. 3152 */ 3153 static int 3154 do_sem_wake(struct thread *td, struct _usem *sem) 3155 { 3156 struct umtx_key key; 3157 int error, cnt; 3158 uint32_t flags; 3159 3160 error = fueword32(&sem->_flags, &flags); 3161 if (error == -1) 3162 return (EFAULT); 3163 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3164 return (error); 3165 umtxq_lock(&key); 3166 umtxq_busy(&key); 3167 cnt = umtxq_count(&key); 3168 if (cnt > 0) { 3169 /* 3170 * Check if count is greater than 0, this means the memory is 3171 * still being referenced by user code, so we can safely 3172 * update _has_waiters flag. 3173 */ 3174 if (cnt == 1) { 3175 umtxq_unlock(&key); 3176 error = suword32(&sem->_has_waiters, 0); 3177 umtxq_lock(&key); 3178 if (error == -1) 3179 error = EFAULT; 3180 } 3181 umtxq_signal(&key, 1); 3182 } 3183 umtxq_unbusy(&key); 3184 umtxq_unlock(&key); 3185 umtx_key_release(&key); 3186 return (error); 3187 } 3188 #endif 3189 3190 static int 3191 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3192 { 3193 struct abs_timeout timo; 3194 struct umtx_q *uq; 3195 uint32_t count, flags; 3196 int error, rv; 3197 3198 uq = td->td_umtxq; 3199 flags = fuword32(&sem->_flags); 3200 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3201 if (error != 0) 3202 return (error); 3203 3204 if (timeout != NULL) 3205 abs_timeout_init2(&timo, timeout); 3206 3207 umtxq_lock(&uq->uq_key); 3208 umtxq_busy(&uq->uq_key); 3209 umtxq_insert(uq); 3210 umtxq_unlock(&uq->uq_key); 3211 rv = fueword32(&sem->_count, &count); 3212 if (rv == -1) { 3213 umtxq_lock(&uq->uq_key); 3214 umtxq_unbusy(&uq->uq_key); 3215 umtxq_remove(uq); 3216 umtxq_unlock(&uq->uq_key); 3217 umtx_key_release(&uq->uq_key); 3218 return (EFAULT); 3219 } 3220 for (;;) { 3221 if (USEM_COUNT(count) != 0) { 3222 umtxq_lock(&uq->uq_key); 3223 umtxq_unbusy(&uq->uq_key); 3224 umtxq_remove(uq); 3225 umtxq_unlock(&uq->uq_key); 3226 umtx_key_release(&uq->uq_key); 3227 return (0); 3228 } 3229 if (count == USEM_HAS_WAITERS) 3230 break; 3231 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3232 if (rv == -1) { 3233 umtxq_lock(&uq->uq_key); 3234 umtxq_unbusy(&uq->uq_key); 3235 umtxq_remove(uq); 3236 umtxq_unlock(&uq->uq_key); 3237 umtx_key_release(&uq->uq_key); 3238 return (EFAULT); 3239 } 3240 if (count == 0) 3241 break; 3242 } 3243 umtxq_lock(&uq->uq_key); 3244 umtxq_unbusy(&uq->uq_key); 3245 3246 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3247 3248 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3249 error = 0; 3250 else { 3251 umtxq_remove(uq); 3252 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3253 /* A relative timeout cannot be restarted. */ 3254 if (error == ERESTART) 3255 error = EINTR; 3256 if (error == EINTR) { 3257 abs_timeout_update(&timo); 3258 timeout->_timeout = timo.end; 3259 timespecsub(&timeout->_timeout, &timo.cur); 3260 } 3261 } 3262 } 3263 umtxq_unlock(&uq->uq_key); 3264 umtx_key_release(&uq->uq_key); 3265 return (error); 3266 } 3267 3268 /* 3269 * Signal a userland semaphore. 3270 */ 3271 static int 3272 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3273 { 3274 struct umtx_key key; 3275 int error, cnt, rv; 3276 uint32_t count, flags; 3277 3278 rv = fueword32(&sem->_flags, &flags); 3279 if (rv == -1) 3280 return (EFAULT); 3281 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3282 return (error); 3283 umtxq_lock(&key); 3284 umtxq_busy(&key); 3285 cnt = umtxq_count(&key); 3286 if (cnt > 0) { 3287 /* 3288 * If this was the last sleeping thread, clear the waiters 3289 * flag in _count. 3290 */ 3291 if (cnt == 1) { 3292 umtxq_unlock(&key); 3293 rv = fueword32(&sem->_count, &count); 3294 while (rv != -1 && count & USEM_HAS_WAITERS) 3295 rv = casueword32(&sem->_count, count, &count, 3296 count & ~USEM_HAS_WAITERS); 3297 if (rv == -1) 3298 error = EFAULT; 3299 umtxq_lock(&key); 3300 } 3301 3302 umtxq_signal(&key, 1); 3303 } 3304 umtxq_unbusy(&key); 3305 umtxq_unlock(&key); 3306 umtx_key_release(&key); 3307 return (error); 3308 } 3309 3310 inline int 3311 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3312 { 3313 int error; 3314 3315 error = copyin(addr, tsp, sizeof(struct timespec)); 3316 if (error == 0) { 3317 if (tsp->tv_sec < 0 || 3318 tsp->tv_nsec >= 1000000000 || 3319 tsp->tv_nsec < 0) 3320 error = EINVAL; 3321 } 3322 return (error); 3323 } 3324 3325 static inline int 3326 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3327 { 3328 int error; 3329 3330 if (size <= sizeof(struct timespec)) { 3331 tp->_clockid = CLOCK_REALTIME; 3332 tp->_flags = 0; 3333 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3334 } else 3335 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3336 if (error != 0) 3337 return (error); 3338 if (tp->_timeout.tv_sec < 0 || 3339 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3340 return (EINVAL); 3341 return (0); 3342 } 3343 3344 static int 3345 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3346 { 3347 3348 return (EOPNOTSUPP); 3349 } 3350 3351 static int 3352 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3353 { 3354 struct _umtx_time timeout, *tm_p; 3355 int error; 3356 3357 if (uap->uaddr2 == NULL) 3358 tm_p = NULL; 3359 else { 3360 error = umtx_copyin_umtx_time( 3361 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3362 if (error != 0) 3363 return (error); 3364 tm_p = &timeout; 3365 } 3366 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3367 } 3368 3369 static int 3370 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3371 { 3372 struct _umtx_time timeout, *tm_p; 3373 int error; 3374 3375 if (uap->uaddr2 == NULL) 3376 tm_p = NULL; 3377 else { 3378 error = umtx_copyin_umtx_time( 3379 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3380 if (error != 0) 3381 return (error); 3382 tm_p = &timeout; 3383 } 3384 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3385 } 3386 3387 static int 3388 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3389 { 3390 struct _umtx_time *tm_p, timeout; 3391 int error; 3392 3393 if (uap->uaddr2 == NULL) 3394 tm_p = NULL; 3395 else { 3396 error = umtx_copyin_umtx_time( 3397 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3398 if (error != 0) 3399 return (error); 3400 tm_p = &timeout; 3401 } 3402 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3403 } 3404 3405 static int 3406 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3407 { 3408 3409 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3410 } 3411 3412 #define BATCH_SIZE 128 3413 static int 3414 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3415 { 3416 char *uaddrs[BATCH_SIZE], **upp; 3417 int count, error, i, pos, tocopy; 3418 3419 upp = (char **)uap->obj; 3420 error = 0; 3421 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3422 pos += tocopy) { 3423 tocopy = MIN(count, BATCH_SIZE); 3424 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3425 if (error != 0) 3426 break; 3427 for (i = 0; i < tocopy; ++i) 3428 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3429 maybe_yield(); 3430 } 3431 return (error); 3432 } 3433 3434 static int 3435 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3436 { 3437 3438 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3439 } 3440 3441 static int 3442 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3443 { 3444 struct _umtx_time *tm_p, timeout; 3445 int error; 3446 3447 /* Allow a null timespec (wait forever). */ 3448 if (uap->uaddr2 == NULL) 3449 tm_p = NULL; 3450 else { 3451 error = umtx_copyin_umtx_time( 3452 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3453 if (error != 0) 3454 return (error); 3455 tm_p = &timeout; 3456 } 3457 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3458 } 3459 3460 static int 3461 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3462 { 3463 3464 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3465 } 3466 3467 static int 3468 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3469 { 3470 struct _umtx_time *tm_p, timeout; 3471 int error; 3472 3473 /* Allow a null timespec (wait forever). */ 3474 if (uap->uaddr2 == NULL) 3475 tm_p = NULL; 3476 else { 3477 error = umtx_copyin_umtx_time( 3478 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3479 if (error != 0) 3480 return (error); 3481 tm_p = &timeout; 3482 } 3483 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3484 } 3485 3486 static int 3487 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3488 { 3489 3490 return (do_wake_umutex(td, uap->obj)); 3491 } 3492 3493 static int 3494 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3495 { 3496 3497 return (do_unlock_umutex(td, uap->obj, false)); 3498 } 3499 3500 static int 3501 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3502 { 3503 3504 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3505 } 3506 3507 static int 3508 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3509 { 3510 struct timespec *ts, timeout; 3511 int error; 3512 3513 /* Allow a null timespec (wait forever). */ 3514 if (uap->uaddr2 == NULL) 3515 ts = NULL; 3516 else { 3517 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3518 if (error != 0) 3519 return (error); 3520 ts = &timeout; 3521 } 3522 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3523 } 3524 3525 static int 3526 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3527 { 3528 3529 return (do_cv_signal(td, uap->obj)); 3530 } 3531 3532 static int 3533 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3534 { 3535 3536 return (do_cv_broadcast(td, uap->obj)); 3537 } 3538 3539 static int 3540 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3541 { 3542 struct _umtx_time timeout; 3543 int error; 3544 3545 /* Allow a null timespec (wait forever). */ 3546 if (uap->uaddr2 == NULL) { 3547 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3548 } else { 3549 error = umtx_copyin_umtx_time(uap->uaddr2, 3550 (size_t)uap->uaddr1, &timeout); 3551 if (error != 0) 3552 return (error); 3553 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3554 } 3555 return (error); 3556 } 3557 3558 static int 3559 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3560 { 3561 struct _umtx_time timeout; 3562 int error; 3563 3564 /* Allow a null timespec (wait forever). */ 3565 if (uap->uaddr2 == NULL) { 3566 error = do_rw_wrlock(td, uap->obj, 0); 3567 } else { 3568 error = umtx_copyin_umtx_time(uap->uaddr2, 3569 (size_t)uap->uaddr1, &timeout); 3570 if (error != 0) 3571 return (error); 3572 3573 error = do_rw_wrlock(td, uap->obj, &timeout); 3574 } 3575 return (error); 3576 } 3577 3578 static int 3579 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3580 { 3581 3582 return (do_rw_unlock(td, uap->obj)); 3583 } 3584 3585 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3586 static int 3587 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3588 { 3589 struct _umtx_time *tm_p, timeout; 3590 int error; 3591 3592 /* Allow a null timespec (wait forever). */ 3593 if (uap->uaddr2 == NULL) 3594 tm_p = NULL; 3595 else { 3596 error = umtx_copyin_umtx_time( 3597 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3598 if (error != 0) 3599 return (error); 3600 tm_p = &timeout; 3601 } 3602 return (do_sem_wait(td, uap->obj, tm_p)); 3603 } 3604 3605 static int 3606 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3607 { 3608 3609 return (do_sem_wake(td, uap->obj)); 3610 } 3611 #endif 3612 3613 static int 3614 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3615 { 3616 3617 return (do_wake2_umutex(td, uap->obj, uap->val)); 3618 } 3619 3620 static int 3621 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3622 { 3623 struct _umtx_time *tm_p, timeout; 3624 size_t uasize; 3625 int error; 3626 3627 /* Allow a null timespec (wait forever). */ 3628 if (uap->uaddr2 == NULL) { 3629 uasize = 0; 3630 tm_p = NULL; 3631 } else { 3632 uasize = (size_t)uap->uaddr1; 3633 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3634 if (error != 0) 3635 return (error); 3636 tm_p = &timeout; 3637 } 3638 error = do_sem2_wait(td, uap->obj, tm_p); 3639 if (error == EINTR && uap->uaddr2 != NULL && 3640 (timeout._flags & UMTX_ABSTIME) == 0 && 3641 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3642 error = copyout(&timeout._timeout, 3643 (struct _umtx_time *)uap->uaddr2 + 1, 3644 sizeof(struct timespec)); 3645 if (error == 0) { 3646 error = EINTR; 3647 } 3648 } 3649 3650 return (error); 3651 } 3652 3653 static int 3654 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3655 { 3656 3657 return (do_sem2_wake(td, uap->obj)); 3658 } 3659 3660 #define USHM_OBJ_UMTX(o) \ 3661 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3662 3663 #define USHMF_REG_LINKED 0x0001 3664 #define USHMF_OBJ_LINKED 0x0002 3665 struct umtx_shm_reg { 3666 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3667 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3668 struct umtx_key ushm_key; 3669 struct ucred *ushm_cred; 3670 struct shmfd *ushm_obj; 3671 u_int ushm_refcnt; 3672 u_int ushm_flags; 3673 }; 3674 3675 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3676 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3677 3678 static uma_zone_t umtx_shm_reg_zone; 3679 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3680 static struct mtx umtx_shm_lock; 3681 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3682 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3683 3684 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3685 3686 static void 3687 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3688 { 3689 struct umtx_shm_reg_head d; 3690 struct umtx_shm_reg *reg, *reg1; 3691 3692 TAILQ_INIT(&d); 3693 mtx_lock(&umtx_shm_lock); 3694 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3695 mtx_unlock(&umtx_shm_lock); 3696 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3697 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3698 umtx_shm_free_reg(reg); 3699 } 3700 } 3701 3702 static struct task umtx_shm_reg_delfree_task = 3703 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3704 3705 static struct umtx_shm_reg * 3706 umtx_shm_find_reg_locked(const struct umtx_key *key) 3707 { 3708 struct umtx_shm_reg *reg; 3709 struct umtx_shm_reg_head *reg_head; 3710 3711 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3712 mtx_assert(&umtx_shm_lock, MA_OWNED); 3713 reg_head = &umtx_shm_registry[key->hash]; 3714 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3715 KASSERT(reg->ushm_key.shared, 3716 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3717 if (reg->ushm_key.info.shared.object == 3718 key->info.shared.object && 3719 reg->ushm_key.info.shared.offset == 3720 key->info.shared.offset) { 3721 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3722 KASSERT(reg->ushm_refcnt > 0, 3723 ("reg %p refcnt 0 onlist", reg)); 3724 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3725 ("reg %p not linked", reg)); 3726 reg->ushm_refcnt++; 3727 return (reg); 3728 } 3729 } 3730 return (NULL); 3731 } 3732 3733 static struct umtx_shm_reg * 3734 umtx_shm_find_reg(const struct umtx_key *key) 3735 { 3736 struct umtx_shm_reg *reg; 3737 3738 mtx_lock(&umtx_shm_lock); 3739 reg = umtx_shm_find_reg_locked(key); 3740 mtx_unlock(&umtx_shm_lock); 3741 return (reg); 3742 } 3743 3744 static void 3745 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3746 { 3747 3748 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3749 crfree(reg->ushm_cred); 3750 shm_drop(reg->ushm_obj); 3751 uma_zfree(umtx_shm_reg_zone, reg); 3752 } 3753 3754 static bool 3755 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3756 { 3757 bool res; 3758 3759 mtx_assert(&umtx_shm_lock, MA_OWNED); 3760 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3761 reg->ushm_refcnt--; 3762 res = reg->ushm_refcnt == 0; 3763 if (res || force) { 3764 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3765 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3766 reg, ushm_reg_link); 3767 reg->ushm_flags &= ~USHMF_REG_LINKED; 3768 } 3769 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3770 LIST_REMOVE(reg, ushm_obj_link); 3771 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3772 } 3773 } 3774 return (res); 3775 } 3776 3777 static void 3778 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3779 { 3780 vm_object_t object; 3781 bool dofree; 3782 3783 if (force) { 3784 object = reg->ushm_obj->shm_object; 3785 VM_OBJECT_WLOCK(object); 3786 object->flags |= OBJ_UMTXDEAD; 3787 VM_OBJECT_WUNLOCK(object); 3788 } 3789 mtx_lock(&umtx_shm_lock); 3790 dofree = umtx_shm_unref_reg_locked(reg, force); 3791 mtx_unlock(&umtx_shm_lock); 3792 if (dofree) 3793 umtx_shm_free_reg(reg); 3794 } 3795 3796 void 3797 umtx_shm_object_init(vm_object_t object) 3798 { 3799 3800 LIST_INIT(USHM_OBJ_UMTX(object)); 3801 } 3802 3803 void 3804 umtx_shm_object_terminated(vm_object_t object) 3805 { 3806 struct umtx_shm_reg *reg, *reg1; 3807 bool dofree; 3808 3809 dofree = false; 3810 mtx_lock(&umtx_shm_lock); 3811 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3812 if (umtx_shm_unref_reg_locked(reg, true)) { 3813 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3814 ushm_reg_link); 3815 dofree = true; 3816 } 3817 } 3818 mtx_unlock(&umtx_shm_lock); 3819 if (dofree) 3820 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3821 } 3822 3823 static int 3824 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3825 struct umtx_shm_reg **res) 3826 { 3827 struct umtx_shm_reg *reg, *reg1; 3828 struct ucred *cred; 3829 int error; 3830 3831 reg = umtx_shm_find_reg(key); 3832 if (reg != NULL) { 3833 *res = reg; 3834 return (0); 3835 } 3836 cred = td->td_ucred; 3837 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3838 return (ENOMEM); 3839 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3840 reg->ushm_refcnt = 1; 3841 bcopy(key, ®->ushm_key, sizeof(*key)); 3842 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3843 reg->ushm_cred = crhold(cred); 3844 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3845 if (error != 0) { 3846 umtx_shm_free_reg(reg); 3847 return (error); 3848 } 3849 mtx_lock(&umtx_shm_lock); 3850 reg1 = umtx_shm_find_reg_locked(key); 3851 if (reg1 != NULL) { 3852 mtx_unlock(&umtx_shm_lock); 3853 umtx_shm_free_reg(reg); 3854 *res = reg1; 3855 return (0); 3856 } 3857 reg->ushm_refcnt++; 3858 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3859 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3860 ushm_obj_link); 3861 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3862 mtx_unlock(&umtx_shm_lock); 3863 *res = reg; 3864 return (0); 3865 } 3866 3867 static int 3868 umtx_shm_alive(struct thread *td, void *addr) 3869 { 3870 vm_map_t map; 3871 vm_map_entry_t entry; 3872 vm_object_t object; 3873 vm_pindex_t pindex; 3874 vm_prot_t prot; 3875 int res, ret; 3876 boolean_t wired; 3877 3878 map = &td->td_proc->p_vmspace->vm_map; 3879 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3880 &object, &pindex, &prot, &wired); 3881 if (res != KERN_SUCCESS) 3882 return (EFAULT); 3883 if (object == NULL) 3884 ret = EINVAL; 3885 else 3886 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3887 vm_map_lookup_done(map, entry); 3888 return (ret); 3889 } 3890 3891 static void 3892 umtx_shm_init(void) 3893 { 3894 int i; 3895 3896 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3897 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3898 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3899 for (i = 0; i < nitems(umtx_shm_registry); i++) 3900 TAILQ_INIT(&umtx_shm_registry[i]); 3901 } 3902 3903 static int 3904 umtx_shm(struct thread *td, void *addr, u_int flags) 3905 { 3906 struct umtx_key key; 3907 struct umtx_shm_reg *reg; 3908 struct file *fp; 3909 int error, fd; 3910 3911 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3912 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3913 return (EINVAL); 3914 if ((flags & UMTX_SHM_ALIVE) != 0) 3915 return (umtx_shm_alive(td, addr)); 3916 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3917 if (error != 0) 3918 return (error); 3919 KASSERT(key.shared == 1, ("non-shared key")); 3920 if ((flags & UMTX_SHM_CREAT) != 0) { 3921 error = umtx_shm_create_reg(td, &key, ®); 3922 } else { 3923 reg = umtx_shm_find_reg(&key); 3924 if (reg == NULL) 3925 error = ESRCH; 3926 } 3927 umtx_key_release(&key); 3928 if (error != 0) 3929 return (error); 3930 KASSERT(reg != NULL, ("no reg")); 3931 if ((flags & UMTX_SHM_DESTROY) != 0) { 3932 umtx_shm_unref_reg(reg, true); 3933 } else { 3934 #if 0 3935 #ifdef MAC 3936 error = mac_posixshm_check_open(td->td_ucred, 3937 reg->ushm_obj, FFLAGS(O_RDWR)); 3938 if (error == 0) 3939 #endif 3940 error = shm_access(reg->ushm_obj, td->td_ucred, 3941 FFLAGS(O_RDWR)); 3942 if (error == 0) 3943 #endif 3944 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3945 if (error == 0) { 3946 shm_hold(reg->ushm_obj); 3947 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3948 &shm_ops); 3949 td->td_retval[0] = fd; 3950 fdrop(fp, td); 3951 } 3952 } 3953 umtx_shm_unref_reg(reg, false); 3954 return (error); 3955 } 3956 3957 static int 3958 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3959 { 3960 3961 return (umtx_shm(td, uap->uaddr1, uap->val)); 3962 } 3963 3964 static int 3965 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3966 { 3967 3968 td->td_rb_list = rbp->robust_list_offset; 3969 td->td_rbp_list = rbp->robust_priv_list_offset; 3970 td->td_rb_inact = rbp->robust_inact_offset; 3971 return (0); 3972 } 3973 3974 static int 3975 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3976 { 3977 struct umtx_robust_lists_params rb; 3978 int error; 3979 3980 if (uap->val > sizeof(rb)) 3981 return (EINVAL); 3982 bzero(&rb, sizeof(rb)); 3983 error = copyin(uap->uaddr1, &rb, uap->val); 3984 if (error != 0) 3985 return (error); 3986 return (umtx_robust_lists(td, &rb)); 3987 } 3988 3989 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3990 3991 static const _umtx_op_func op_table[] = { 3992 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3993 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3994 [UMTX_OP_WAIT] = __umtx_op_wait, 3995 [UMTX_OP_WAKE] = __umtx_op_wake, 3996 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3997 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3998 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3999 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4000 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4001 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4002 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4003 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4004 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4005 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4006 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4007 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4008 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4009 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4010 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4011 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4012 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4013 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4014 #else 4015 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4016 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4017 #endif 4018 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4019 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4020 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4021 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4022 [UMTX_OP_SHM] = __umtx_op_shm, 4023 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4024 }; 4025 4026 int 4027 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4028 { 4029 4030 if ((unsigned)uap->op < nitems(op_table)) 4031 return (*op_table[uap->op])(td, uap); 4032 return (EINVAL); 4033 } 4034 4035 #ifdef COMPAT_FREEBSD32 4036 4037 struct timespec32 { 4038 int32_t tv_sec; 4039 int32_t tv_nsec; 4040 }; 4041 4042 struct umtx_time32 { 4043 struct timespec32 timeout; 4044 uint32_t flags; 4045 uint32_t clockid; 4046 }; 4047 4048 static inline int 4049 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4050 { 4051 struct timespec32 ts32; 4052 int error; 4053 4054 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4055 if (error == 0) { 4056 if (ts32.tv_sec < 0 || 4057 ts32.tv_nsec >= 1000000000 || 4058 ts32.tv_nsec < 0) 4059 error = EINVAL; 4060 else { 4061 tsp->tv_sec = ts32.tv_sec; 4062 tsp->tv_nsec = ts32.tv_nsec; 4063 } 4064 } 4065 return (error); 4066 } 4067 4068 static inline int 4069 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4070 { 4071 struct umtx_time32 t32; 4072 int error; 4073 4074 t32.clockid = CLOCK_REALTIME; 4075 t32.flags = 0; 4076 if (size <= sizeof(struct timespec32)) 4077 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4078 else 4079 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4080 if (error != 0) 4081 return (error); 4082 if (t32.timeout.tv_sec < 0 || 4083 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4084 return (EINVAL); 4085 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4086 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4087 tp->_flags = t32.flags; 4088 tp->_clockid = t32.clockid; 4089 return (0); 4090 } 4091 4092 static int 4093 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4094 { 4095 struct _umtx_time *tm_p, timeout; 4096 int error; 4097 4098 if (uap->uaddr2 == NULL) 4099 tm_p = NULL; 4100 else { 4101 error = umtx_copyin_umtx_time32(uap->uaddr2, 4102 (size_t)uap->uaddr1, &timeout); 4103 if (error != 0) 4104 return (error); 4105 tm_p = &timeout; 4106 } 4107 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4108 } 4109 4110 static int 4111 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4112 { 4113 struct _umtx_time *tm_p, timeout; 4114 int error; 4115 4116 /* Allow a null timespec (wait forever). */ 4117 if (uap->uaddr2 == NULL) 4118 tm_p = NULL; 4119 else { 4120 error = umtx_copyin_umtx_time(uap->uaddr2, 4121 (size_t)uap->uaddr1, &timeout); 4122 if (error != 0) 4123 return (error); 4124 tm_p = &timeout; 4125 } 4126 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4127 } 4128 4129 static int 4130 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4131 { 4132 struct _umtx_time *tm_p, timeout; 4133 int error; 4134 4135 /* Allow a null timespec (wait forever). */ 4136 if (uap->uaddr2 == NULL) 4137 tm_p = NULL; 4138 else { 4139 error = umtx_copyin_umtx_time32(uap->uaddr2, 4140 (size_t)uap->uaddr1, &timeout); 4141 if (error != 0) 4142 return (error); 4143 tm_p = &timeout; 4144 } 4145 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4146 } 4147 4148 static int 4149 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4150 { 4151 struct timespec *ts, timeout; 4152 int error; 4153 4154 /* Allow a null timespec (wait forever). */ 4155 if (uap->uaddr2 == NULL) 4156 ts = NULL; 4157 else { 4158 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4159 if (error != 0) 4160 return (error); 4161 ts = &timeout; 4162 } 4163 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4164 } 4165 4166 static int 4167 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4168 { 4169 struct _umtx_time timeout; 4170 int error; 4171 4172 /* Allow a null timespec (wait forever). */ 4173 if (uap->uaddr2 == NULL) { 4174 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4175 } else { 4176 error = umtx_copyin_umtx_time32(uap->uaddr2, 4177 (size_t)uap->uaddr1, &timeout); 4178 if (error != 0) 4179 return (error); 4180 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4181 } 4182 return (error); 4183 } 4184 4185 static int 4186 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4187 { 4188 struct _umtx_time timeout; 4189 int error; 4190 4191 /* Allow a null timespec (wait forever). */ 4192 if (uap->uaddr2 == NULL) { 4193 error = do_rw_wrlock(td, uap->obj, 0); 4194 } else { 4195 error = umtx_copyin_umtx_time32(uap->uaddr2, 4196 (size_t)uap->uaddr1, &timeout); 4197 if (error != 0) 4198 return (error); 4199 error = do_rw_wrlock(td, uap->obj, &timeout); 4200 } 4201 return (error); 4202 } 4203 4204 static int 4205 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4206 { 4207 struct _umtx_time *tm_p, timeout; 4208 int error; 4209 4210 if (uap->uaddr2 == NULL) 4211 tm_p = NULL; 4212 else { 4213 error = umtx_copyin_umtx_time32( 4214 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4215 if (error != 0) 4216 return (error); 4217 tm_p = &timeout; 4218 } 4219 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4220 } 4221 4222 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4223 static int 4224 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4225 { 4226 struct _umtx_time *tm_p, timeout; 4227 int error; 4228 4229 /* Allow a null timespec (wait forever). */ 4230 if (uap->uaddr2 == NULL) 4231 tm_p = NULL; 4232 else { 4233 error = umtx_copyin_umtx_time32(uap->uaddr2, 4234 (size_t)uap->uaddr1, &timeout); 4235 if (error != 0) 4236 return (error); 4237 tm_p = &timeout; 4238 } 4239 return (do_sem_wait(td, uap->obj, tm_p)); 4240 } 4241 #endif 4242 4243 static int 4244 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4245 { 4246 struct _umtx_time *tm_p, timeout; 4247 size_t uasize; 4248 int error; 4249 4250 /* Allow a null timespec (wait forever). */ 4251 if (uap->uaddr2 == NULL) { 4252 uasize = 0; 4253 tm_p = NULL; 4254 } else { 4255 uasize = (size_t)uap->uaddr1; 4256 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4257 if (error != 0) 4258 return (error); 4259 tm_p = &timeout; 4260 } 4261 error = do_sem2_wait(td, uap->obj, tm_p); 4262 if (error == EINTR && uap->uaddr2 != NULL && 4263 (timeout._flags & UMTX_ABSTIME) == 0 && 4264 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4265 struct timespec32 remain32 = { 4266 .tv_sec = timeout._timeout.tv_sec, 4267 .tv_nsec = timeout._timeout.tv_nsec 4268 }; 4269 error = copyout(&remain32, 4270 (struct umtx_time32 *)uap->uaddr2 + 1, 4271 sizeof(struct timespec32)); 4272 if (error == 0) { 4273 error = EINTR; 4274 } 4275 } 4276 4277 return (error); 4278 } 4279 4280 static int 4281 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4282 { 4283 uint32_t uaddrs[BATCH_SIZE], **upp; 4284 int count, error, i, pos, tocopy; 4285 4286 upp = (uint32_t **)uap->obj; 4287 error = 0; 4288 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4289 pos += tocopy) { 4290 tocopy = MIN(count, BATCH_SIZE); 4291 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4292 if (error != 0) 4293 break; 4294 for (i = 0; i < tocopy; ++i) 4295 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4296 INT_MAX, 1); 4297 maybe_yield(); 4298 } 4299 return (error); 4300 } 4301 4302 struct umtx_robust_lists_params_compat32 { 4303 uint32_t robust_list_offset; 4304 uint32_t robust_priv_list_offset; 4305 uint32_t robust_inact_offset; 4306 }; 4307 4308 static int 4309 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4310 { 4311 struct umtx_robust_lists_params rb; 4312 struct umtx_robust_lists_params_compat32 rb32; 4313 int error; 4314 4315 if (uap->val > sizeof(rb32)) 4316 return (EINVAL); 4317 bzero(&rb, sizeof(rb)); 4318 bzero(&rb32, sizeof(rb32)); 4319 error = copyin(uap->uaddr1, &rb32, uap->val); 4320 if (error != 0) 4321 return (error); 4322 rb.robust_list_offset = rb32.robust_list_offset; 4323 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4324 rb.robust_inact_offset = rb32.robust_inact_offset; 4325 return (umtx_robust_lists(td, &rb)); 4326 } 4327 4328 static const _umtx_op_func op_table_compat32[] = { 4329 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4330 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4331 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4332 [UMTX_OP_WAKE] = __umtx_op_wake, 4333 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4334 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4335 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4336 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4337 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4338 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4339 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4340 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4341 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4342 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4343 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4344 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4345 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4346 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4347 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4348 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4349 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4350 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4351 #else 4352 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4353 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4354 #endif 4355 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4356 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4357 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4358 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4359 [UMTX_OP_SHM] = __umtx_op_shm, 4360 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4361 }; 4362 4363 int 4364 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4365 { 4366 4367 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4368 return (*op_table_compat32[uap->op])(td, 4369 (struct _umtx_op_args *)uap); 4370 } 4371 return (EINVAL); 4372 } 4373 #endif 4374 4375 void 4376 umtx_thread_init(struct thread *td) 4377 { 4378 4379 td->td_umtxq = umtxq_alloc(); 4380 td->td_umtxq->uq_thread = td; 4381 } 4382 4383 void 4384 umtx_thread_fini(struct thread *td) 4385 { 4386 4387 umtxq_free(td->td_umtxq); 4388 } 4389 4390 /* 4391 * It will be called when new thread is created, e.g fork(). 4392 */ 4393 void 4394 umtx_thread_alloc(struct thread *td) 4395 { 4396 struct umtx_q *uq; 4397 4398 uq = td->td_umtxq; 4399 uq->uq_inherited_pri = PRI_MAX; 4400 4401 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4402 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4403 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4404 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4405 } 4406 4407 /* 4408 * exec() hook. 4409 * 4410 * Clear robust lists for all process' threads, not delaying the 4411 * cleanup to thread_exit hook, since the relevant address space is 4412 * destroyed right now. 4413 */ 4414 static void 4415 umtx_exec_hook(void *arg __unused, struct proc *p, 4416 struct image_params *imgp __unused) 4417 { 4418 struct thread *td; 4419 4420 KASSERT(p == curproc, ("need curproc")); 4421 PROC_LOCK(p); 4422 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4423 (p->p_flag & P_STOPPED_SINGLE) != 0, 4424 ("curproc must be single-threaded")); 4425 FOREACH_THREAD_IN_PROC(p, td) { 4426 KASSERT(td == curthread || 4427 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4428 ("running thread %p %p", p, td)); 4429 PROC_UNLOCK(p); 4430 umtx_thread_cleanup(td); 4431 PROC_LOCK(p); 4432 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4433 } 4434 PROC_UNLOCK(p); 4435 } 4436 4437 /* 4438 * thread_exit() hook. 4439 */ 4440 void 4441 umtx_thread_exit(struct thread *td) 4442 { 4443 4444 umtx_thread_cleanup(td); 4445 } 4446 4447 static int 4448 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4449 { 4450 u_long res1; 4451 #ifdef COMPAT_FREEBSD32 4452 uint32_t res32; 4453 #endif 4454 int error; 4455 4456 #ifdef COMPAT_FREEBSD32 4457 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4458 error = fueword32((void *)ptr, &res32); 4459 if (error == 0) 4460 res1 = res32; 4461 } else 4462 #endif 4463 { 4464 error = fueword((void *)ptr, &res1); 4465 } 4466 if (error == 0) 4467 *res = res1; 4468 else 4469 error = EFAULT; 4470 return (error); 4471 } 4472 4473 static void 4474 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4475 { 4476 #ifdef COMPAT_FREEBSD32 4477 struct umutex32 m32; 4478 4479 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4480 memcpy(&m32, m, sizeof(m32)); 4481 *rb_list = m32.m_rb_lnk; 4482 } else 4483 #endif 4484 *rb_list = m->m_rb_lnk; 4485 } 4486 4487 static int 4488 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4489 { 4490 struct umutex m; 4491 int error; 4492 4493 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4494 error = copyin((void *)rbp, &m, sizeof(m)); 4495 if (error != 0) 4496 return (error); 4497 if (rb_list != NULL) 4498 umtx_read_rb_list(td, &m, rb_list); 4499 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4500 return (EINVAL); 4501 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4502 /* inact is cleared after unlock, allow the inconsistency */ 4503 return (inact ? 0 : EINVAL); 4504 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4505 } 4506 4507 static void 4508 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4509 const char *name) 4510 { 4511 int error, i; 4512 uintptr_t rbp; 4513 bool inact; 4514 4515 if (rb_list == 0) 4516 return; 4517 error = umtx_read_uptr(td, rb_list, &rbp); 4518 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4519 if (rbp == *rb_inact) { 4520 inact = true; 4521 *rb_inact = 0; 4522 } else 4523 inact = false; 4524 error = umtx_handle_rb(td, rbp, &rbp, inact); 4525 } 4526 if (i == umtx_max_rb && umtx_verbose_rb) { 4527 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4528 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4529 } 4530 if (error != 0 && umtx_verbose_rb) { 4531 uprintf("comm %s pid %d: handling %srb error %d\n", 4532 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4533 } 4534 } 4535 4536 /* 4537 * Clean up umtx data. 4538 */ 4539 static void 4540 umtx_thread_cleanup(struct thread *td) 4541 { 4542 struct umtx_q *uq; 4543 struct umtx_pi *pi; 4544 uintptr_t rb_inact; 4545 4546 /* 4547 * Disown pi mutexes. 4548 */ 4549 uq = td->td_umtxq; 4550 if (uq != NULL) { 4551 mtx_lock(&umtx_lock); 4552 uq->uq_inherited_pri = PRI_MAX; 4553 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4554 pi->pi_owner = NULL; 4555 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4556 } 4557 mtx_unlock(&umtx_lock); 4558 thread_lock(td); 4559 sched_lend_user_prio(td, PRI_MAX); 4560 thread_unlock(td); 4561 } 4562 4563 /* 4564 * Handle terminated robust mutexes. Must be done after 4565 * robust pi disown, otherwise unlock could see unowned 4566 * entries. 4567 */ 4568 rb_inact = td->td_rb_inact; 4569 if (rb_inact != 0) 4570 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4571 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4572 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4573 if (rb_inact != 0) 4574 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4575 } 4576