1 /*- 2 * Copyright (c) 2015, 2016 The FreeBSD Foundation 3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 #include "opt_umtx_profiling.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mman.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/resource.h> 51 #include <sys/resourcevar.h> 52 #include <sys/rwlock.h> 53 #include <sys/sbuf.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/time.h> 63 #include <sys/eventhandler.h> 64 #include <sys/umtx.h> 65 66 #include <security/mac/mac_framework.h> 67 68 #include <vm/vm.h> 69 #include <vm/vm_param.h> 70 #include <vm/pmap.h> 71 #include <vm/vm_map.h> 72 #include <vm/vm_object.h> 73 74 #include <machine/atomic.h> 75 #include <machine/cpu.h> 76 77 #ifdef COMPAT_FREEBSD32 78 #include <compat/freebsd32/freebsd32_proto.h> 79 #endif 80 81 #define _UMUTEX_TRY 1 82 #define _UMUTEX_WAIT 2 83 84 #ifdef UMTX_PROFILING 85 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 86 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 87 #endif 88 89 /* Priority inheritance mutex info. */ 90 struct umtx_pi { 91 /* Owner thread */ 92 struct thread *pi_owner; 93 94 /* Reference count */ 95 int pi_refcount; 96 97 /* List entry to link umtx holding by thread */ 98 TAILQ_ENTRY(umtx_pi) pi_link; 99 100 /* List entry in hash */ 101 TAILQ_ENTRY(umtx_pi) pi_hashlink; 102 103 /* List for waiters */ 104 TAILQ_HEAD(,umtx_q) pi_blocked; 105 106 /* Identify a userland lock object */ 107 struct umtx_key pi_key; 108 }; 109 110 /* A userland synchronous object user. */ 111 struct umtx_q { 112 /* Linked list for the hash. */ 113 TAILQ_ENTRY(umtx_q) uq_link; 114 115 /* Umtx key. */ 116 struct umtx_key uq_key; 117 118 /* Umtx flags. */ 119 int uq_flags; 120 #define UQF_UMTXQ 0x0001 121 122 /* The thread waits on. */ 123 struct thread *uq_thread; 124 125 /* 126 * Blocked on PI mutex. read can use chain lock 127 * or umtx_lock, write must have both chain lock and 128 * umtx_lock being hold. 129 */ 130 struct umtx_pi *uq_pi_blocked; 131 132 /* On blocked list */ 133 TAILQ_ENTRY(umtx_q) uq_lockq; 134 135 /* Thread contending with us */ 136 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 137 138 /* Inherited priority from PP mutex */ 139 u_char uq_inherited_pri; 140 141 /* Spare queue ready to be reused */ 142 struct umtxq_queue *uq_spare_queue; 143 144 /* The queue we on */ 145 struct umtxq_queue *uq_cur_queue; 146 }; 147 148 TAILQ_HEAD(umtxq_head, umtx_q); 149 150 /* Per-key wait-queue */ 151 struct umtxq_queue { 152 struct umtxq_head head; 153 struct umtx_key key; 154 LIST_ENTRY(umtxq_queue) link; 155 int length; 156 }; 157 158 LIST_HEAD(umtxq_list, umtxq_queue); 159 160 /* Userland lock object's wait-queue chain */ 161 struct umtxq_chain { 162 /* Lock for this chain. */ 163 struct mtx uc_lock; 164 165 /* List of sleep queues. */ 166 struct umtxq_list uc_queue[2]; 167 #define UMTX_SHARED_QUEUE 0 168 #define UMTX_EXCLUSIVE_QUEUE 1 169 170 LIST_HEAD(, umtxq_queue) uc_spare_queue; 171 172 /* Busy flag */ 173 char uc_busy; 174 175 /* Chain lock waiters */ 176 int uc_waiters; 177 178 /* All PI in the list */ 179 TAILQ_HEAD(,umtx_pi) uc_pi_list; 180 181 #ifdef UMTX_PROFILING 182 u_int length; 183 u_int max_length; 184 #endif 185 }; 186 187 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 188 189 /* 190 * Don't propagate time-sharing priority, there is a security reason, 191 * a user can simply introduce PI-mutex, let thread A lock the mutex, 192 * and let another thread B block on the mutex, because B is 193 * sleeping, its priority will be boosted, this causes A's priority to 194 * be boosted via priority propagating too and will never be lowered even 195 * if it is using 100%CPU, this is unfair to other processes. 196 */ 197 198 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 199 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 200 PRI_MAX_TIMESHARE : (td)->td_user_pri) 201 202 #define GOLDEN_RATIO_PRIME 2654404609U 203 #ifndef UMTX_CHAINS 204 #define UMTX_CHAINS 512 205 #endif 206 #define UMTX_SHIFTS (__WORD_BIT - 9) 207 208 #define GET_SHARE(flags) \ 209 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 210 211 #define BUSY_SPINS 200 212 213 struct abs_timeout { 214 int clockid; 215 bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ 216 struct timespec cur; 217 struct timespec end; 218 }; 219 220 #ifdef COMPAT_FREEBSD32 221 struct umutex32 { 222 volatile __lwpid_t m_owner; /* Owner of the mutex */ 223 __uint32_t m_flags; /* Flags of the mutex */ 224 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 225 __uint32_t m_rb_lnk; /* Robust linkage */ 226 __uint32_t m_pad; 227 __uint32_t m_spare[2]; 228 }; 229 230 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 231 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 232 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 233 #endif 234 235 int umtx_shm_vnobj_persistent = 0; 236 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 237 &umtx_shm_vnobj_persistent, 0, 238 "False forces destruction of umtx attached to file, on last close"); 239 static int umtx_max_rb = 1000; 240 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 241 &umtx_max_rb, 0, 242 ""); 243 244 static uma_zone_t umtx_pi_zone; 245 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 246 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 247 static int umtx_pi_allocated; 248 249 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 250 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 251 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 252 static int umtx_verbose_rb = 1; 253 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 254 &umtx_verbose_rb, 0, 255 ""); 256 257 #ifdef UMTX_PROFILING 258 static long max_length; 259 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 260 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 261 #endif 262 263 static void abs_timeout_update(struct abs_timeout *timo); 264 265 static void umtx_shm_init(void); 266 static void umtxq_sysinit(void *); 267 static void umtxq_hash(struct umtx_key *key); 268 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 269 static void umtxq_lock(struct umtx_key *key); 270 static void umtxq_unlock(struct umtx_key *key); 271 static void umtxq_busy(struct umtx_key *key); 272 static void umtxq_unbusy(struct umtx_key *key); 273 static void umtxq_insert_queue(struct umtx_q *uq, int q); 274 static void umtxq_remove_queue(struct umtx_q *uq, int q); 275 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 276 static int umtxq_count(struct umtx_key *key); 277 static struct umtx_pi *umtx_pi_alloc(int); 278 static void umtx_pi_free(struct umtx_pi *pi); 279 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 280 bool rb); 281 static void umtx_thread_cleanup(struct thread *td); 282 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 283 struct image_params *imgp __unused); 284 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 285 286 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 287 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 288 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 289 290 static struct mtx umtx_lock; 291 292 #ifdef UMTX_PROFILING 293 static void 294 umtx_init_profiling(void) 295 { 296 struct sysctl_oid *chain_oid; 297 char chain_name[10]; 298 int i; 299 300 for (i = 0; i < UMTX_CHAINS; ++i) { 301 snprintf(chain_name, sizeof(chain_name), "%d", i); 302 chain_oid = SYSCTL_ADD_NODE(NULL, 303 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 304 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 305 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 306 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 307 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 308 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 309 } 310 } 311 312 static int 313 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 314 { 315 char buf[512]; 316 struct sbuf sb; 317 struct umtxq_chain *uc; 318 u_int fract, i, j, tot, whole; 319 u_int sf0, sf1, sf2, sf3, sf4; 320 u_int si0, si1, si2, si3, si4; 321 u_int sw0, sw1, sw2, sw3, sw4; 322 323 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 324 for (i = 0; i < 2; i++) { 325 tot = 0; 326 for (j = 0; j < UMTX_CHAINS; ++j) { 327 uc = &umtxq_chains[i][j]; 328 mtx_lock(&uc->uc_lock); 329 tot += uc->max_length; 330 mtx_unlock(&uc->uc_lock); 331 } 332 if (tot == 0) 333 sbuf_printf(&sb, "%u) Empty ", i); 334 else { 335 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 336 si0 = si1 = si2 = si3 = si4 = 0; 337 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 338 for (j = 0; j < UMTX_CHAINS; j++) { 339 uc = &umtxq_chains[i][j]; 340 mtx_lock(&uc->uc_lock); 341 whole = uc->max_length * 100; 342 mtx_unlock(&uc->uc_lock); 343 fract = (whole % tot) * 100; 344 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 345 sf0 = fract; 346 si0 = j; 347 sw0 = whole; 348 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 349 sf1)) { 350 sf1 = fract; 351 si1 = j; 352 sw1 = whole; 353 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 354 sf2)) { 355 sf2 = fract; 356 si2 = j; 357 sw2 = whole; 358 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 359 sf3)) { 360 sf3 = fract; 361 si3 = j; 362 sw3 = whole; 363 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 364 sf4)) { 365 sf4 = fract; 366 si4 = j; 367 sw4 = whole; 368 } 369 } 370 sbuf_printf(&sb, "queue %u:\n", i); 371 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 372 sf0 / tot, si0); 373 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 374 sf1 / tot, si1); 375 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 376 sf2 / tot, si2); 377 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 378 sf3 / tot, si3); 379 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 380 sf4 / tot, si4); 381 } 382 } 383 sbuf_trim(&sb); 384 sbuf_finish(&sb); 385 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 386 sbuf_delete(&sb); 387 return (0); 388 } 389 390 static int 391 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 392 { 393 struct umtxq_chain *uc; 394 u_int i, j; 395 int clear, error; 396 397 clear = 0; 398 error = sysctl_handle_int(oidp, &clear, 0, req); 399 if (error != 0 || req->newptr == NULL) 400 return (error); 401 402 if (clear != 0) { 403 for (i = 0; i < 2; ++i) { 404 for (j = 0; j < UMTX_CHAINS; ++j) { 405 uc = &umtxq_chains[i][j]; 406 mtx_lock(&uc->uc_lock); 407 uc->length = 0; 408 uc->max_length = 0; 409 mtx_unlock(&uc->uc_lock); 410 } 411 } 412 } 413 return (0); 414 } 415 416 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 417 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 418 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 419 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 420 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 421 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 422 #endif 423 424 static void 425 umtxq_sysinit(void *arg __unused) 426 { 427 int i, j; 428 429 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 430 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 431 for (i = 0; i < 2; ++i) { 432 for (j = 0; j < UMTX_CHAINS; ++j) { 433 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 434 MTX_DEF | MTX_DUPOK); 435 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 436 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 437 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 438 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 439 umtxq_chains[i][j].uc_busy = 0; 440 umtxq_chains[i][j].uc_waiters = 0; 441 #ifdef UMTX_PROFILING 442 umtxq_chains[i][j].length = 0; 443 umtxq_chains[i][j].max_length = 0; 444 #endif 445 } 446 } 447 #ifdef UMTX_PROFILING 448 umtx_init_profiling(); 449 #endif 450 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 451 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 452 EVENTHANDLER_PRI_ANY); 453 umtx_shm_init(); 454 } 455 456 struct umtx_q * 457 umtxq_alloc(void) 458 { 459 struct umtx_q *uq; 460 461 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 462 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 463 M_WAITOK | M_ZERO); 464 TAILQ_INIT(&uq->uq_spare_queue->head); 465 TAILQ_INIT(&uq->uq_pi_contested); 466 uq->uq_inherited_pri = PRI_MAX; 467 return (uq); 468 } 469 470 void 471 umtxq_free(struct umtx_q *uq) 472 { 473 474 MPASS(uq->uq_spare_queue != NULL); 475 free(uq->uq_spare_queue, M_UMTX); 476 free(uq, M_UMTX); 477 } 478 479 static inline void 480 umtxq_hash(struct umtx_key *key) 481 { 482 unsigned n; 483 484 n = (uintptr_t)key->info.both.a + key->info.both.b; 485 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 486 } 487 488 static inline struct umtxq_chain * 489 umtxq_getchain(struct umtx_key *key) 490 { 491 492 if (key->type <= TYPE_SEM) 493 return (&umtxq_chains[1][key->hash]); 494 return (&umtxq_chains[0][key->hash]); 495 } 496 497 /* 498 * Lock a chain. 499 */ 500 static inline void 501 umtxq_lock(struct umtx_key *key) 502 { 503 struct umtxq_chain *uc; 504 505 uc = umtxq_getchain(key); 506 mtx_lock(&uc->uc_lock); 507 } 508 509 /* 510 * Unlock a chain. 511 */ 512 static inline void 513 umtxq_unlock(struct umtx_key *key) 514 { 515 struct umtxq_chain *uc; 516 517 uc = umtxq_getchain(key); 518 mtx_unlock(&uc->uc_lock); 519 } 520 521 /* 522 * Set chain to busy state when following operation 523 * may be blocked (kernel mutex can not be used). 524 */ 525 static inline void 526 umtxq_busy(struct umtx_key *key) 527 { 528 struct umtxq_chain *uc; 529 530 uc = umtxq_getchain(key); 531 mtx_assert(&uc->uc_lock, MA_OWNED); 532 if (uc->uc_busy) { 533 #ifdef SMP 534 if (smp_cpus > 1) { 535 int count = BUSY_SPINS; 536 if (count > 0) { 537 umtxq_unlock(key); 538 while (uc->uc_busy && --count > 0) 539 cpu_spinwait(); 540 umtxq_lock(key); 541 } 542 } 543 #endif 544 while (uc->uc_busy) { 545 uc->uc_waiters++; 546 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 547 uc->uc_waiters--; 548 } 549 } 550 uc->uc_busy = 1; 551 } 552 553 /* 554 * Unbusy a chain. 555 */ 556 static inline void 557 umtxq_unbusy(struct umtx_key *key) 558 { 559 struct umtxq_chain *uc; 560 561 uc = umtxq_getchain(key); 562 mtx_assert(&uc->uc_lock, MA_OWNED); 563 KASSERT(uc->uc_busy != 0, ("not busy")); 564 uc->uc_busy = 0; 565 if (uc->uc_waiters) 566 wakeup_one(uc); 567 } 568 569 static inline void 570 umtxq_unbusy_unlocked(struct umtx_key *key) 571 { 572 573 umtxq_lock(key); 574 umtxq_unbusy(key); 575 umtxq_unlock(key); 576 } 577 578 static struct umtxq_queue * 579 umtxq_queue_lookup(struct umtx_key *key, int q) 580 { 581 struct umtxq_queue *uh; 582 struct umtxq_chain *uc; 583 584 uc = umtxq_getchain(key); 585 UMTXQ_LOCKED_ASSERT(uc); 586 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 587 if (umtx_key_match(&uh->key, key)) 588 return (uh); 589 } 590 591 return (NULL); 592 } 593 594 static inline void 595 umtxq_insert_queue(struct umtx_q *uq, int q) 596 { 597 struct umtxq_queue *uh; 598 struct umtxq_chain *uc; 599 600 uc = umtxq_getchain(&uq->uq_key); 601 UMTXQ_LOCKED_ASSERT(uc); 602 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 603 uh = umtxq_queue_lookup(&uq->uq_key, q); 604 if (uh != NULL) { 605 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 606 } else { 607 uh = uq->uq_spare_queue; 608 uh->key = uq->uq_key; 609 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 610 #ifdef UMTX_PROFILING 611 uc->length++; 612 if (uc->length > uc->max_length) { 613 uc->max_length = uc->length; 614 if (uc->max_length > max_length) 615 max_length = uc->max_length; 616 } 617 #endif 618 } 619 uq->uq_spare_queue = NULL; 620 621 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 622 uh->length++; 623 uq->uq_flags |= UQF_UMTXQ; 624 uq->uq_cur_queue = uh; 625 return; 626 } 627 628 static inline void 629 umtxq_remove_queue(struct umtx_q *uq, int q) 630 { 631 struct umtxq_chain *uc; 632 struct umtxq_queue *uh; 633 634 uc = umtxq_getchain(&uq->uq_key); 635 UMTXQ_LOCKED_ASSERT(uc); 636 if (uq->uq_flags & UQF_UMTXQ) { 637 uh = uq->uq_cur_queue; 638 TAILQ_REMOVE(&uh->head, uq, uq_link); 639 uh->length--; 640 uq->uq_flags &= ~UQF_UMTXQ; 641 if (TAILQ_EMPTY(&uh->head)) { 642 KASSERT(uh->length == 0, 643 ("inconsistent umtxq_queue length")); 644 #ifdef UMTX_PROFILING 645 uc->length--; 646 #endif 647 LIST_REMOVE(uh, link); 648 } else { 649 uh = LIST_FIRST(&uc->uc_spare_queue); 650 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 651 LIST_REMOVE(uh, link); 652 } 653 uq->uq_spare_queue = uh; 654 uq->uq_cur_queue = NULL; 655 } 656 } 657 658 /* 659 * Check if there are multiple waiters 660 */ 661 static int 662 umtxq_count(struct umtx_key *key) 663 { 664 struct umtxq_chain *uc; 665 struct umtxq_queue *uh; 666 667 uc = umtxq_getchain(key); 668 UMTXQ_LOCKED_ASSERT(uc); 669 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 670 if (uh != NULL) 671 return (uh->length); 672 return (0); 673 } 674 675 /* 676 * Check if there are multiple PI waiters and returns first 677 * waiter. 678 */ 679 static int 680 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 681 { 682 struct umtxq_chain *uc; 683 struct umtxq_queue *uh; 684 685 *first = NULL; 686 uc = umtxq_getchain(key); 687 UMTXQ_LOCKED_ASSERT(uc); 688 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 689 if (uh != NULL) { 690 *first = TAILQ_FIRST(&uh->head); 691 return (uh->length); 692 } 693 return (0); 694 } 695 696 static int 697 umtxq_check_susp(struct thread *td) 698 { 699 struct proc *p; 700 int error; 701 702 /* 703 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 704 * eventually break the lockstep loop. 705 */ 706 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 707 return (0); 708 error = 0; 709 p = td->td_proc; 710 PROC_LOCK(p); 711 if (P_SHOULDSTOP(p) || 712 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 713 if (p->p_flag & P_SINGLE_EXIT) 714 error = EINTR; 715 else 716 error = ERESTART; 717 } 718 PROC_UNLOCK(p); 719 return (error); 720 } 721 722 /* 723 * Wake up threads waiting on an userland object. 724 */ 725 726 static int 727 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 728 { 729 struct umtxq_chain *uc; 730 struct umtxq_queue *uh; 731 struct umtx_q *uq; 732 int ret; 733 734 ret = 0; 735 uc = umtxq_getchain(key); 736 UMTXQ_LOCKED_ASSERT(uc); 737 uh = umtxq_queue_lookup(key, q); 738 if (uh != NULL) { 739 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 740 umtxq_remove_queue(uq, q); 741 wakeup(uq); 742 if (++ret >= n_wake) 743 return (ret); 744 } 745 } 746 return (ret); 747 } 748 749 750 /* 751 * Wake up specified thread. 752 */ 753 static inline void 754 umtxq_signal_thread(struct umtx_q *uq) 755 { 756 struct umtxq_chain *uc; 757 758 uc = umtxq_getchain(&uq->uq_key); 759 UMTXQ_LOCKED_ASSERT(uc); 760 umtxq_remove(uq); 761 wakeup(uq); 762 } 763 764 static inline int 765 tstohz(const struct timespec *tsp) 766 { 767 struct timeval tv; 768 769 TIMESPEC_TO_TIMEVAL(&tv, tsp); 770 return tvtohz(&tv); 771 } 772 773 static void 774 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 775 const struct timespec *timeout) 776 { 777 778 timo->clockid = clockid; 779 if (!absolute) { 780 timo->is_abs_real = false; 781 abs_timeout_update(timo); 782 timo->end = timo->cur; 783 timespecadd(&timo->end, timeout); 784 } else { 785 timo->end = *timeout; 786 timo->is_abs_real = clockid == CLOCK_REALTIME || 787 clockid == CLOCK_REALTIME_FAST || 788 clockid == CLOCK_REALTIME_PRECISE; 789 /* 790 * If is_abs_real, umtxq_sleep will read the clock 791 * after setting td_rtcgen; otherwise, read it here. 792 */ 793 if (!timo->is_abs_real) { 794 abs_timeout_update(timo); 795 } 796 } 797 } 798 799 static void 800 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 801 { 802 803 abs_timeout_init(timo, umtxtime->_clockid, 804 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 805 } 806 807 static inline void 808 abs_timeout_update(struct abs_timeout *timo) 809 { 810 811 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 812 } 813 814 static int 815 abs_timeout_gethz(struct abs_timeout *timo) 816 { 817 struct timespec tts; 818 819 if (timespeccmp(&timo->end, &timo->cur, <=)) 820 return (-1); 821 tts = timo->end; 822 timespecsub(&tts, &timo->cur); 823 return (tstohz(&tts)); 824 } 825 826 static uint32_t 827 umtx_unlock_val(uint32_t flags, bool rb) 828 { 829 830 if (rb) 831 return (UMUTEX_RB_OWNERDEAD); 832 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 833 return (UMUTEX_RB_NOTRECOV); 834 else 835 return (UMUTEX_UNOWNED); 836 837 } 838 839 /* 840 * Put thread into sleep state, before sleeping, check if 841 * thread was removed from umtx queue. 842 */ 843 static inline int 844 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 845 { 846 struct umtxq_chain *uc; 847 int error, timo; 848 849 if (abstime != NULL && abstime->is_abs_real) { 850 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); 851 abs_timeout_update(abstime); 852 } 853 854 uc = umtxq_getchain(&uq->uq_key); 855 UMTXQ_LOCKED_ASSERT(uc); 856 for (;;) { 857 if (!(uq->uq_flags & UQF_UMTXQ)) { 858 error = 0; 859 break; 860 } 861 if (abstime != NULL) { 862 timo = abs_timeout_gethz(abstime); 863 if (timo < 0) { 864 error = ETIMEDOUT; 865 break; 866 } 867 } else 868 timo = 0; 869 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 870 if (error == EINTR || error == ERESTART) { 871 umtxq_lock(&uq->uq_key); 872 break; 873 } 874 if (abstime != NULL) { 875 if (abstime->is_abs_real) 876 curthread->td_rtcgen = 877 atomic_load_acq_int(&rtc_generation); 878 abs_timeout_update(abstime); 879 } 880 umtxq_lock(&uq->uq_key); 881 } 882 883 curthread->td_rtcgen = 0; 884 return (error); 885 } 886 887 /* 888 * Convert userspace address into unique logical address. 889 */ 890 int 891 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 892 { 893 struct thread *td = curthread; 894 vm_map_t map; 895 vm_map_entry_t entry; 896 vm_pindex_t pindex; 897 vm_prot_t prot; 898 boolean_t wired; 899 900 key->type = type; 901 if (share == THREAD_SHARE) { 902 key->shared = 0; 903 key->info.private.vs = td->td_proc->p_vmspace; 904 key->info.private.addr = (uintptr_t)addr; 905 } else { 906 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 907 map = &td->td_proc->p_vmspace->vm_map; 908 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 909 &entry, &key->info.shared.object, &pindex, &prot, 910 &wired) != KERN_SUCCESS) { 911 return (EFAULT); 912 } 913 914 if ((share == PROCESS_SHARE) || 915 (share == AUTO_SHARE && 916 VM_INHERIT_SHARE == entry->inheritance)) { 917 key->shared = 1; 918 key->info.shared.offset = (vm_offset_t)addr - 919 entry->start + entry->offset; 920 vm_object_reference(key->info.shared.object); 921 } else { 922 key->shared = 0; 923 key->info.private.vs = td->td_proc->p_vmspace; 924 key->info.private.addr = (uintptr_t)addr; 925 } 926 vm_map_lookup_done(map, entry); 927 } 928 929 umtxq_hash(key); 930 return (0); 931 } 932 933 /* 934 * Release key. 935 */ 936 void 937 umtx_key_release(struct umtx_key *key) 938 { 939 if (key->shared) 940 vm_object_deallocate(key->info.shared.object); 941 } 942 943 /* 944 * Fetch and compare value, sleep on the address if value is not changed. 945 */ 946 static int 947 do_wait(struct thread *td, void *addr, u_long id, 948 struct _umtx_time *timeout, int compat32, int is_private) 949 { 950 struct abs_timeout timo; 951 struct umtx_q *uq; 952 u_long tmp; 953 uint32_t tmp32; 954 int error = 0; 955 956 uq = td->td_umtxq; 957 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 958 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 959 return (error); 960 961 if (timeout != NULL) 962 abs_timeout_init2(&timo, timeout); 963 964 umtxq_lock(&uq->uq_key); 965 umtxq_insert(uq); 966 umtxq_unlock(&uq->uq_key); 967 if (compat32 == 0) { 968 error = fueword(addr, &tmp); 969 if (error != 0) 970 error = EFAULT; 971 } else { 972 error = fueword32(addr, &tmp32); 973 if (error == 0) 974 tmp = tmp32; 975 else 976 error = EFAULT; 977 } 978 umtxq_lock(&uq->uq_key); 979 if (error == 0) { 980 if (tmp == id) 981 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 982 NULL : &timo); 983 if ((uq->uq_flags & UQF_UMTXQ) == 0) 984 error = 0; 985 else 986 umtxq_remove(uq); 987 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 988 umtxq_remove(uq); 989 } 990 umtxq_unlock(&uq->uq_key); 991 umtx_key_release(&uq->uq_key); 992 if (error == ERESTART) 993 error = EINTR; 994 return (error); 995 } 996 997 /* 998 * Wake up threads sleeping on the specified address. 999 */ 1000 int 1001 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 1002 { 1003 struct umtx_key key; 1004 int ret; 1005 1006 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 1007 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 1008 return (ret); 1009 umtxq_lock(&key); 1010 umtxq_signal(&key, n_wake); 1011 umtxq_unlock(&key); 1012 umtx_key_release(&key); 1013 return (0); 1014 } 1015 1016 /* 1017 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 1018 */ 1019 static int 1020 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 1021 struct _umtx_time *timeout, int mode) 1022 { 1023 struct abs_timeout timo; 1024 struct umtx_q *uq; 1025 uint32_t owner, old, id; 1026 int error, rv; 1027 1028 id = td->td_tid; 1029 uq = td->td_umtxq; 1030 error = 0; 1031 if (timeout != NULL) 1032 abs_timeout_init2(&timo, timeout); 1033 1034 /* 1035 * Care must be exercised when dealing with umtx structure. It 1036 * can fault on any access. 1037 */ 1038 for (;;) { 1039 rv = fueword32(&m->m_owner, &owner); 1040 if (rv == -1) 1041 return (EFAULT); 1042 if (mode == _UMUTEX_WAIT) { 1043 if (owner == UMUTEX_UNOWNED || 1044 owner == UMUTEX_CONTESTED || 1045 owner == UMUTEX_RB_OWNERDEAD || 1046 owner == UMUTEX_RB_NOTRECOV) 1047 return (0); 1048 } else { 1049 /* 1050 * Robust mutex terminated. Kernel duty is to 1051 * return EOWNERDEAD to the userspace. The 1052 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1053 * by the common userspace code. 1054 */ 1055 if (owner == UMUTEX_RB_OWNERDEAD) { 1056 rv = casueword32(&m->m_owner, 1057 UMUTEX_RB_OWNERDEAD, &owner, 1058 id | UMUTEX_CONTESTED); 1059 if (rv == -1) 1060 return (EFAULT); 1061 if (owner == UMUTEX_RB_OWNERDEAD) 1062 return (EOWNERDEAD); /* success */ 1063 rv = umtxq_check_susp(td); 1064 if (rv != 0) 1065 return (rv); 1066 continue; 1067 } 1068 if (owner == UMUTEX_RB_NOTRECOV) 1069 return (ENOTRECOVERABLE); 1070 1071 1072 /* 1073 * Try the uncontested case. This should be 1074 * done in userland. 1075 */ 1076 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1077 &owner, id); 1078 /* The address was invalid. */ 1079 if (rv == -1) 1080 return (EFAULT); 1081 1082 /* The acquire succeeded. */ 1083 if (owner == UMUTEX_UNOWNED) 1084 return (0); 1085 1086 /* 1087 * If no one owns it but it is contested try 1088 * to acquire it. 1089 */ 1090 if (owner == UMUTEX_CONTESTED) { 1091 rv = casueword32(&m->m_owner, 1092 UMUTEX_CONTESTED, &owner, 1093 id | UMUTEX_CONTESTED); 1094 /* The address was invalid. */ 1095 if (rv == -1) 1096 return (EFAULT); 1097 1098 if (owner == UMUTEX_CONTESTED) 1099 return (0); 1100 1101 rv = umtxq_check_susp(td); 1102 if (rv != 0) 1103 return (rv); 1104 1105 /* 1106 * If this failed the lock has 1107 * changed, restart. 1108 */ 1109 continue; 1110 } 1111 } 1112 1113 if (mode == _UMUTEX_TRY) 1114 return (EBUSY); 1115 1116 /* 1117 * If we caught a signal, we have retried and now 1118 * exit immediately. 1119 */ 1120 if (error != 0) 1121 return (error); 1122 1123 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1124 GET_SHARE(flags), &uq->uq_key)) != 0) 1125 return (error); 1126 1127 umtxq_lock(&uq->uq_key); 1128 umtxq_busy(&uq->uq_key); 1129 umtxq_insert(uq); 1130 umtxq_unlock(&uq->uq_key); 1131 1132 /* 1133 * Set the contested bit so that a release in user space 1134 * knows to use the system call for unlock. If this fails 1135 * either some one else has acquired the lock or it has been 1136 * released. 1137 */ 1138 rv = casueword32(&m->m_owner, owner, &old, 1139 owner | UMUTEX_CONTESTED); 1140 1141 /* The address was invalid. */ 1142 if (rv == -1) { 1143 umtxq_lock(&uq->uq_key); 1144 umtxq_remove(uq); 1145 umtxq_unbusy(&uq->uq_key); 1146 umtxq_unlock(&uq->uq_key); 1147 umtx_key_release(&uq->uq_key); 1148 return (EFAULT); 1149 } 1150 1151 /* 1152 * We set the contested bit, sleep. Otherwise the lock changed 1153 * and we need to retry or we lost a race to the thread 1154 * unlocking the umtx. 1155 */ 1156 umtxq_lock(&uq->uq_key); 1157 umtxq_unbusy(&uq->uq_key); 1158 if (old == owner) 1159 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1160 NULL : &timo); 1161 umtxq_remove(uq); 1162 umtxq_unlock(&uq->uq_key); 1163 umtx_key_release(&uq->uq_key); 1164 1165 if (error == 0) 1166 error = umtxq_check_susp(td); 1167 } 1168 1169 return (0); 1170 } 1171 1172 /* 1173 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1174 */ 1175 static int 1176 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1177 { 1178 struct umtx_key key; 1179 uint32_t owner, old, id, newlock; 1180 int error, count; 1181 1182 id = td->td_tid; 1183 /* 1184 * Make sure we own this mtx. 1185 */ 1186 error = fueword32(&m->m_owner, &owner); 1187 if (error == -1) 1188 return (EFAULT); 1189 1190 if ((owner & ~UMUTEX_CONTESTED) != id) 1191 return (EPERM); 1192 1193 newlock = umtx_unlock_val(flags, rb); 1194 if ((owner & UMUTEX_CONTESTED) == 0) { 1195 error = casueword32(&m->m_owner, owner, &old, newlock); 1196 if (error == -1) 1197 return (EFAULT); 1198 if (old == owner) 1199 return (0); 1200 owner = old; 1201 } 1202 1203 /* We should only ever be in here for contested locks */ 1204 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1205 &key)) != 0) 1206 return (error); 1207 1208 umtxq_lock(&key); 1209 umtxq_busy(&key); 1210 count = umtxq_count(&key); 1211 umtxq_unlock(&key); 1212 1213 /* 1214 * When unlocking the umtx, it must be marked as unowned if 1215 * there is zero or one thread only waiting for it. 1216 * Otherwise, it must be marked as contested. 1217 */ 1218 if (count > 1) 1219 newlock |= UMUTEX_CONTESTED; 1220 error = casueword32(&m->m_owner, owner, &old, newlock); 1221 umtxq_lock(&key); 1222 umtxq_signal(&key, 1); 1223 umtxq_unbusy(&key); 1224 umtxq_unlock(&key); 1225 umtx_key_release(&key); 1226 if (error == -1) 1227 return (EFAULT); 1228 if (old != owner) 1229 return (EINVAL); 1230 return (0); 1231 } 1232 1233 /* 1234 * Check if the mutex is available and wake up a waiter, 1235 * only for simple mutex. 1236 */ 1237 static int 1238 do_wake_umutex(struct thread *td, struct umutex *m) 1239 { 1240 struct umtx_key key; 1241 uint32_t owner; 1242 uint32_t flags; 1243 int error; 1244 int count; 1245 1246 error = fueword32(&m->m_owner, &owner); 1247 if (error == -1) 1248 return (EFAULT); 1249 1250 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1251 owner != UMUTEX_RB_NOTRECOV) 1252 return (0); 1253 1254 error = fueword32(&m->m_flags, &flags); 1255 if (error == -1) 1256 return (EFAULT); 1257 1258 /* We should only ever be in here for contested locks */ 1259 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1260 &key)) != 0) 1261 return (error); 1262 1263 umtxq_lock(&key); 1264 umtxq_busy(&key); 1265 count = umtxq_count(&key); 1266 umtxq_unlock(&key); 1267 1268 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1269 owner != UMUTEX_RB_NOTRECOV) { 1270 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1271 UMUTEX_UNOWNED); 1272 if (error == -1) 1273 error = EFAULT; 1274 } 1275 1276 umtxq_lock(&key); 1277 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1278 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1279 umtxq_signal(&key, 1); 1280 umtxq_unbusy(&key); 1281 umtxq_unlock(&key); 1282 umtx_key_release(&key); 1283 return (error); 1284 } 1285 1286 /* 1287 * Check if the mutex has waiters and tries to fix contention bit. 1288 */ 1289 static int 1290 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1291 { 1292 struct umtx_key key; 1293 uint32_t owner, old; 1294 int type; 1295 int error; 1296 int count; 1297 1298 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1299 UMUTEX_ROBUST)) { 1300 case 0: 1301 case UMUTEX_ROBUST: 1302 type = TYPE_NORMAL_UMUTEX; 1303 break; 1304 case UMUTEX_PRIO_INHERIT: 1305 type = TYPE_PI_UMUTEX; 1306 break; 1307 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1308 type = TYPE_PI_ROBUST_UMUTEX; 1309 break; 1310 case UMUTEX_PRIO_PROTECT: 1311 type = TYPE_PP_UMUTEX; 1312 break; 1313 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1314 type = TYPE_PP_ROBUST_UMUTEX; 1315 break; 1316 default: 1317 return (EINVAL); 1318 } 1319 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1320 return (error); 1321 1322 owner = 0; 1323 umtxq_lock(&key); 1324 umtxq_busy(&key); 1325 count = umtxq_count(&key); 1326 umtxq_unlock(&key); 1327 /* 1328 * Only repair contention bit if there is a waiter, this means the mutex 1329 * is still being referenced by userland code, otherwise don't update 1330 * any memory. 1331 */ 1332 if (count > 1) { 1333 error = fueword32(&m->m_owner, &owner); 1334 if (error == -1) 1335 error = EFAULT; 1336 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1337 error = casueword32(&m->m_owner, owner, &old, 1338 owner | UMUTEX_CONTESTED); 1339 if (error == -1) { 1340 error = EFAULT; 1341 break; 1342 } 1343 if (old == owner) 1344 break; 1345 owner = old; 1346 error = umtxq_check_susp(td); 1347 if (error != 0) 1348 break; 1349 } 1350 } else if (count == 1) { 1351 error = fueword32(&m->m_owner, &owner); 1352 if (error == -1) 1353 error = EFAULT; 1354 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1355 (owner & UMUTEX_CONTESTED) == 0) { 1356 error = casueword32(&m->m_owner, owner, &old, 1357 owner | UMUTEX_CONTESTED); 1358 if (error == -1) { 1359 error = EFAULT; 1360 break; 1361 } 1362 if (old == owner) 1363 break; 1364 owner = old; 1365 error = umtxq_check_susp(td); 1366 if (error != 0) 1367 break; 1368 } 1369 } 1370 umtxq_lock(&key); 1371 if (error == EFAULT) { 1372 umtxq_signal(&key, INT_MAX); 1373 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1374 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1375 umtxq_signal(&key, 1); 1376 umtxq_unbusy(&key); 1377 umtxq_unlock(&key); 1378 umtx_key_release(&key); 1379 return (error); 1380 } 1381 1382 static inline struct umtx_pi * 1383 umtx_pi_alloc(int flags) 1384 { 1385 struct umtx_pi *pi; 1386 1387 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1388 TAILQ_INIT(&pi->pi_blocked); 1389 atomic_add_int(&umtx_pi_allocated, 1); 1390 return (pi); 1391 } 1392 1393 static inline void 1394 umtx_pi_free(struct umtx_pi *pi) 1395 { 1396 uma_zfree(umtx_pi_zone, pi); 1397 atomic_add_int(&umtx_pi_allocated, -1); 1398 } 1399 1400 /* 1401 * Adjust the thread's position on a pi_state after its priority has been 1402 * changed. 1403 */ 1404 static int 1405 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1406 { 1407 struct umtx_q *uq, *uq1, *uq2; 1408 struct thread *td1; 1409 1410 mtx_assert(&umtx_lock, MA_OWNED); 1411 if (pi == NULL) 1412 return (0); 1413 1414 uq = td->td_umtxq; 1415 1416 /* 1417 * Check if the thread needs to be moved on the blocked chain. 1418 * It needs to be moved if either its priority is lower than 1419 * the previous thread or higher than the next thread. 1420 */ 1421 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1422 uq2 = TAILQ_NEXT(uq, uq_lockq); 1423 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1424 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1425 /* 1426 * Remove thread from blocked chain and determine where 1427 * it should be moved to. 1428 */ 1429 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1430 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1431 td1 = uq1->uq_thread; 1432 MPASS(td1->td_proc->p_magic == P_MAGIC); 1433 if (UPRI(td1) > UPRI(td)) 1434 break; 1435 } 1436 1437 if (uq1 == NULL) 1438 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1439 else 1440 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1441 } 1442 return (1); 1443 } 1444 1445 static struct umtx_pi * 1446 umtx_pi_next(struct umtx_pi *pi) 1447 { 1448 struct umtx_q *uq_owner; 1449 1450 if (pi->pi_owner == NULL) 1451 return (NULL); 1452 uq_owner = pi->pi_owner->td_umtxq; 1453 if (uq_owner == NULL) 1454 return (NULL); 1455 return (uq_owner->uq_pi_blocked); 1456 } 1457 1458 /* 1459 * Floyd's Cycle-Finding Algorithm. 1460 */ 1461 static bool 1462 umtx_pi_check_loop(struct umtx_pi *pi) 1463 { 1464 struct umtx_pi *pi1; /* fast iterator */ 1465 1466 mtx_assert(&umtx_lock, MA_OWNED); 1467 if (pi == NULL) 1468 return (false); 1469 pi1 = pi; 1470 for (;;) { 1471 pi = umtx_pi_next(pi); 1472 if (pi == NULL) 1473 break; 1474 pi1 = umtx_pi_next(pi1); 1475 if (pi1 == NULL) 1476 break; 1477 pi1 = umtx_pi_next(pi1); 1478 if (pi1 == NULL) 1479 break; 1480 if (pi == pi1) 1481 return (true); 1482 } 1483 return (false); 1484 } 1485 1486 /* 1487 * Propagate priority when a thread is blocked on POSIX 1488 * PI mutex. 1489 */ 1490 static void 1491 umtx_propagate_priority(struct thread *td) 1492 { 1493 struct umtx_q *uq; 1494 struct umtx_pi *pi; 1495 int pri; 1496 1497 mtx_assert(&umtx_lock, MA_OWNED); 1498 pri = UPRI(td); 1499 uq = td->td_umtxq; 1500 pi = uq->uq_pi_blocked; 1501 if (pi == NULL) 1502 return; 1503 if (umtx_pi_check_loop(pi)) 1504 return; 1505 1506 for (;;) { 1507 td = pi->pi_owner; 1508 if (td == NULL || td == curthread) 1509 return; 1510 1511 MPASS(td->td_proc != NULL); 1512 MPASS(td->td_proc->p_magic == P_MAGIC); 1513 1514 thread_lock(td); 1515 if (td->td_lend_user_pri > pri) 1516 sched_lend_user_prio(td, pri); 1517 else { 1518 thread_unlock(td); 1519 break; 1520 } 1521 thread_unlock(td); 1522 1523 /* 1524 * Pick up the lock that td is blocked on. 1525 */ 1526 uq = td->td_umtxq; 1527 pi = uq->uq_pi_blocked; 1528 if (pi == NULL) 1529 break; 1530 /* Resort td on the list if needed. */ 1531 umtx_pi_adjust_thread(pi, td); 1532 } 1533 } 1534 1535 /* 1536 * Unpropagate priority for a PI mutex when a thread blocked on 1537 * it is interrupted by signal or resumed by others. 1538 */ 1539 static void 1540 umtx_repropagate_priority(struct umtx_pi *pi) 1541 { 1542 struct umtx_q *uq, *uq_owner; 1543 struct umtx_pi *pi2; 1544 int pri; 1545 1546 mtx_assert(&umtx_lock, MA_OWNED); 1547 1548 if (umtx_pi_check_loop(pi)) 1549 return; 1550 while (pi != NULL && pi->pi_owner != NULL) { 1551 pri = PRI_MAX; 1552 uq_owner = pi->pi_owner->td_umtxq; 1553 1554 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1555 uq = TAILQ_FIRST(&pi2->pi_blocked); 1556 if (uq != NULL) { 1557 if (pri > UPRI(uq->uq_thread)) 1558 pri = UPRI(uq->uq_thread); 1559 } 1560 } 1561 1562 if (pri > uq_owner->uq_inherited_pri) 1563 pri = uq_owner->uq_inherited_pri; 1564 thread_lock(pi->pi_owner); 1565 sched_lend_user_prio(pi->pi_owner, pri); 1566 thread_unlock(pi->pi_owner); 1567 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1568 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1569 } 1570 } 1571 1572 /* 1573 * Insert a PI mutex into owned list. 1574 */ 1575 static void 1576 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1577 { 1578 struct umtx_q *uq_owner; 1579 1580 uq_owner = owner->td_umtxq; 1581 mtx_assert(&umtx_lock, MA_OWNED); 1582 MPASS(pi->pi_owner == NULL); 1583 pi->pi_owner = owner; 1584 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1585 } 1586 1587 1588 /* 1589 * Disown a PI mutex, and remove it from the owned list. 1590 */ 1591 static void 1592 umtx_pi_disown(struct umtx_pi *pi) 1593 { 1594 1595 mtx_assert(&umtx_lock, MA_OWNED); 1596 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1597 pi->pi_owner = NULL; 1598 } 1599 1600 /* 1601 * Claim ownership of a PI mutex. 1602 */ 1603 static int 1604 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1605 { 1606 struct umtx_q *uq; 1607 int pri; 1608 1609 mtx_lock(&umtx_lock); 1610 if (pi->pi_owner == owner) { 1611 mtx_unlock(&umtx_lock); 1612 return (0); 1613 } 1614 1615 if (pi->pi_owner != NULL) { 1616 /* 1617 * userland may have already messed the mutex, sigh. 1618 */ 1619 mtx_unlock(&umtx_lock); 1620 return (EPERM); 1621 } 1622 umtx_pi_setowner(pi, owner); 1623 uq = TAILQ_FIRST(&pi->pi_blocked); 1624 if (uq != NULL) { 1625 pri = UPRI(uq->uq_thread); 1626 thread_lock(owner); 1627 if (pri < UPRI(owner)) 1628 sched_lend_user_prio(owner, pri); 1629 thread_unlock(owner); 1630 } 1631 mtx_unlock(&umtx_lock); 1632 return (0); 1633 } 1634 1635 /* 1636 * Adjust a thread's order position in its blocked PI mutex, 1637 * this may result new priority propagating process. 1638 */ 1639 void 1640 umtx_pi_adjust(struct thread *td, u_char oldpri) 1641 { 1642 struct umtx_q *uq; 1643 struct umtx_pi *pi; 1644 1645 uq = td->td_umtxq; 1646 mtx_lock(&umtx_lock); 1647 /* 1648 * Pick up the lock that td is blocked on. 1649 */ 1650 pi = uq->uq_pi_blocked; 1651 if (pi != NULL) { 1652 umtx_pi_adjust_thread(pi, td); 1653 umtx_repropagate_priority(pi); 1654 } 1655 mtx_unlock(&umtx_lock); 1656 } 1657 1658 /* 1659 * Sleep on a PI mutex. 1660 */ 1661 static int 1662 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1663 const char *wmesg, struct abs_timeout *timo, bool shared) 1664 { 1665 struct umtxq_chain *uc; 1666 struct thread *td, *td1; 1667 struct umtx_q *uq1; 1668 int error, pri; 1669 1670 error = 0; 1671 td = uq->uq_thread; 1672 KASSERT(td == curthread, ("inconsistent uq_thread")); 1673 uc = umtxq_getchain(&uq->uq_key); 1674 UMTXQ_LOCKED_ASSERT(uc); 1675 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1676 umtxq_insert(uq); 1677 mtx_lock(&umtx_lock); 1678 if (pi->pi_owner == NULL) { 1679 mtx_unlock(&umtx_lock); 1680 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1681 mtx_lock(&umtx_lock); 1682 if (td1 != NULL) { 1683 if (pi->pi_owner == NULL) 1684 umtx_pi_setowner(pi, td1); 1685 PROC_UNLOCK(td1->td_proc); 1686 } 1687 } 1688 1689 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1690 pri = UPRI(uq1->uq_thread); 1691 if (pri > UPRI(td)) 1692 break; 1693 } 1694 1695 if (uq1 != NULL) 1696 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1697 else 1698 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1699 1700 uq->uq_pi_blocked = pi; 1701 thread_lock(td); 1702 td->td_flags |= TDF_UPIBLOCKED; 1703 thread_unlock(td); 1704 umtx_propagate_priority(td); 1705 mtx_unlock(&umtx_lock); 1706 umtxq_unbusy(&uq->uq_key); 1707 1708 error = umtxq_sleep(uq, wmesg, timo); 1709 umtxq_remove(uq); 1710 1711 mtx_lock(&umtx_lock); 1712 uq->uq_pi_blocked = NULL; 1713 thread_lock(td); 1714 td->td_flags &= ~TDF_UPIBLOCKED; 1715 thread_unlock(td); 1716 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1717 umtx_repropagate_priority(pi); 1718 mtx_unlock(&umtx_lock); 1719 umtxq_unlock(&uq->uq_key); 1720 1721 return (error); 1722 } 1723 1724 /* 1725 * Add reference count for a PI mutex. 1726 */ 1727 static void 1728 umtx_pi_ref(struct umtx_pi *pi) 1729 { 1730 struct umtxq_chain *uc; 1731 1732 uc = umtxq_getchain(&pi->pi_key); 1733 UMTXQ_LOCKED_ASSERT(uc); 1734 pi->pi_refcount++; 1735 } 1736 1737 /* 1738 * Decrease reference count for a PI mutex, if the counter 1739 * is decreased to zero, its memory space is freed. 1740 */ 1741 static void 1742 umtx_pi_unref(struct umtx_pi *pi) 1743 { 1744 struct umtxq_chain *uc; 1745 1746 uc = umtxq_getchain(&pi->pi_key); 1747 UMTXQ_LOCKED_ASSERT(uc); 1748 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1749 if (--pi->pi_refcount == 0) { 1750 mtx_lock(&umtx_lock); 1751 if (pi->pi_owner != NULL) 1752 umtx_pi_disown(pi); 1753 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1754 ("blocked queue not empty")); 1755 mtx_unlock(&umtx_lock); 1756 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1757 umtx_pi_free(pi); 1758 } 1759 } 1760 1761 /* 1762 * Find a PI mutex in hash table. 1763 */ 1764 static struct umtx_pi * 1765 umtx_pi_lookup(struct umtx_key *key) 1766 { 1767 struct umtxq_chain *uc; 1768 struct umtx_pi *pi; 1769 1770 uc = umtxq_getchain(key); 1771 UMTXQ_LOCKED_ASSERT(uc); 1772 1773 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1774 if (umtx_key_match(&pi->pi_key, key)) { 1775 return (pi); 1776 } 1777 } 1778 return (NULL); 1779 } 1780 1781 /* 1782 * Insert a PI mutex into hash table. 1783 */ 1784 static inline void 1785 umtx_pi_insert(struct umtx_pi *pi) 1786 { 1787 struct umtxq_chain *uc; 1788 1789 uc = umtxq_getchain(&pi->pi_key); 1790 UMTXQ_LOCKED_ASSERT(uc); 1791 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1792 } 1793 1794 /* 1795 * Lock a PI mutex. 1796 */ 1797 static int 1798 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1799 struct _umtx_time *timeout, int try) 1800 { 1801 struct abs_timeout timo; 1802 struct umtx_q *uq; 1803 struct umtx_pi *pi, *new_pi; 1804 uint32_t id, old_owner, owner, old; 1805 int error, rv; 1806 1807 id = td->td_tid; 1808 uq = td->td_umtxq; 1809 1810 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1811 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1812 &uq->uq_key)) != 0) 1813 return (error); 1814 1815 if (timeout != NULL) 1816 abs_timeout_init2(&timo, timeout); 1817 1818 umtxq_lock(&uq->uq_key); 1819 pi = umtx_pi_lookup(&uq->uq_key); 1820 if (pi == NULL) { 1821 new_pi = umtx_pi_alloc(M_NOWAIT); 1822 if (new_pi == NULL) { 1823 umtxq_unlock(&uq->uq_key); 1824 new_pi = umtx_pi_alloc(M_WAITOK); 1825 umtxq_lock(&uq->uq_key); 1826 pi = umtx_pi_lookup(&uq->uq_key); 1827 if (pi != NULL) { 1828 umtx_pi_free(new_pi); 1829 new_pi = NULL; 1830 } 1831 } 1832 if (new_pi != NULL) { 1833 new_pi->pi_key = uq->uq_key; 1834 umtx_pi_insert(new_pi); 1835 pi = new_pi; 1836 } 1837 } 1838 umtx_pi_ref(pi); 1839 umtxq_unlock(&uq->uq_key); 1840 1841 /* 1842 * Care must be exercised when dealing with umtx structure. It 1843 * can fault on any access. 1844 */ 1845 for (;;) { 1846 /* 1847 * Try the uncontested case. This should be done in userland. 1848 */ 1849 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1850 /* The address was invalid. */ 1851 if (rv == -1) { 1852 error = EFAULT; 1853 break; 1854 } 1855 1856 /* The acquire succeeded. */ 1857 if (owner == UMUTEX_UNOWNED) { 1858 error = 0; 1859 break; 1860 } 1861 1862 if (owner == UMUTEX_RB_NOTRECOV) { 1863 error = ENOTRECOVERABLE; 1864 break; 1865 } 1866 1867 /* If no one owns it but it is contested try to acquire it. */ 1868 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1869 old_owner = owner; 1870 rv = casueword32(&m->m_owner, owner, &owner, 1871 id | UMUTEX_CONTESTED); 1872 /* The address was invalid. */ 1873 if (rv == -1) { 1874 error = EFAULT; 1875 break; 1876 } 1877 1878 if (owner == old_owner) { 1879 umtxq_lock(&uq->uq_key); 1880 umtxq_busy(&uq->uq_key); 1881 error = umtx_pi_claim(pi, td); 1882 umtxq_unbusy(&uq->uq_key); 1883 umtxq_unlock(&uq->uq_key); 1884 if (error != 0) { 1885 /* 1886 * Since we're going to return an 1887 * error, restore the m_owner to its 1888 * previous, unowned state to avoid 1889 * compounding the problem. 1890 */ 1891 (void)casuword32(&m->m_owner, 1892 id | UMUTEX_CONTESTED, 1893 old_owner); 1894 } 1895 if (error == 0 && 1896 old_owner == UMUTEX_RB_OWNERDEAD) 1897 error = EOWNERDEAD; 1898 break; 1899 } 1900 1901 error = umtxq_check_susp(td); 1902 if (error != 0) 1903 break; 1904 1905 /* If this failed the lock has changed, restart. */ 1906 continue; 1907 } 1908 1909 if ((owner & ~UMUTEX_CONTESTED) == id) { 1910 error = EDEADLK; 1911 break; 1912 } 1913 1914 if (try != 0) { 1915 error = EBUSY; 1916 break; 1917 } 1918 1919 /* 1920 * If we caught a signal, we have retried and now 1921 * exit immediately. 1922 */ 1923 if (error != 0) 1924 break; 1925 1926 umtxq_lock(&uq->uq_key); 1927 umtxq_busy(&uq->uq_key); 1928 umtxq_unlock(&uq->uq_key); 1929 1930 /* 1931 * Set the contested bit so that a release in user space 1932 * knows to use the system call for unlock. If this fails 1933 * either some one else has acquired the lock or it has been 1934 * released. 1935 */ 1936 rv = casueword32(&m->m_owner, owner, &old, owner | 1937 UMUTEX_CONTESTED); 1938 1939 /* The address was invalid. */ 1940 if (rv == -1) { 1941 umtxq_unbusy_unlocked(&uq->uq_key); 1942 error = EFAULT; 1943 break; 1944 } 1945 1946 umtxq_lock(&uq->uq_key); 1947 /* 1948 * We set the contested bit, sleep. Otherwise the lock changed 1949 * and we need to retry or we lost a race to the thread 1950 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1951 * value for owner is impossible there. 1952 */ 1953 if (old == owner) { 1954 error = umtxq_sleep_pi(uq, pi, 1955 owner & ~UMUTEX_CONTESTED, 1956 "umtxpi", timeout == NULL ? NULL : &timo, 1957 (flags & USYNC_PROCESS_SHARED) != 0); 1958 if (error != 0) 1959 continue; 1960 } else { 1961 umtxq_unbusy(&uq->uq_key); 1962 umtxq_unlock(&uq->uq_key); 1963 } 1964 1965 error = umtxq_check_susp(td); 1966 if (error != 0) 1967 break; 1968 } 1969 1970 umtxq_lock(&uq->uq_key); 1971 umtx_pi_unref(pi); 1972 umtxq_unlock(&uq->uq_key); 1973 1974 umtx_key_release(&uq->uq_key); 1975 return (error); 1976 } 1977 1978 /* 1979 * Unlock a PI mutex. 1980 */ 1981 static int 1982 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1983 { 1984 struct umtx_key key; 1985 struct umtx_q *uq_first, *uq_first2, *uq_me; 1986 struct umtx_pi *pi, *pi2; 1987 uint32_t id, new_owner, old, owner; 1988 int count, error, pri; 1989 1990 id = td->td_tid; 1991 /* 1992 * Make sure we own this mtx. 1993 */ 1994 error = fueword32(&m->m_owner, &owner); 1995 if (error == -1) 1996 return (EFAULT); 1997 1998 if ((owner & ~UMUTEX_CONTESTED) != id) 1999 return (EPERM); 2000 2001 new_owner = umtx_unlock_val(flags, rb); 2002 2003 /* This should be done in userland */ 2004 if ((owner & UMUTEX_CONTESTED) == 0) { 2005 error = casueword32(&m->m_owner, owner, &old, new_owner); 2006 if (error == -1) 2007 return (EFAULT); 2008 if (old == owner) 2009 return (0); 2010 owner = old; 2011 } 2012 2013 /* We should only ever be in here for contested locks */ 2014 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2015 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 2016 &key)) != 0) 2017 return (error); 2018 2019 umtxq_lock(&key); 2020 umtxq_busy(&key); 2021 count = umtxq_count_pi(&key, &uq_first); 2022 if (uq_first != NULL) { 2023 mtx_lock(&umtx_lock); 2024 pi = uq_first->uq_pi_blocked; 2025 KASSERT(pi != NULL, ("pi == NULL?")); 2026 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 2027 mtx_unlock(&umtx_lock); 2028 umtxq_unbusy(&key); 2029 umtxq_unlock(&key); 2030 umtx_key_release(&key); 2031 /* userland messed the mutex */ 2032 return (EPERM); 2033 } 2034 uq_me = td->td_umtxq; 2035 if (pi->pi_owner == td) 2036 umtx_pi_disown(pi); 2037 /* get highest priority thread which is still sleeping. */ 2038 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2039 while (uq_first != NULL && 2040 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2041 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2042 } 2043 pri = PRI_MAX; 2044 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2045 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2046 if (uq_first2 != NULL) { 2047 if (pri > UPRI(uq_first2->uq_thread)) 2048 pri = UPRI(uq_first2->uq_thread); 2049 } 2050 } 2051 thread_lock(td); 2052 sched_lend_user_prio(td, pri); 2053 thread_unlock(td); 2054 mtx_unlock(&umtx_lock); 2055 if (uq_first) 2056 umtxq_signal_thread(uq_first); 2057 } else { 2058 pi = umtx_pi_lookup(&key); 2059 /* 2060 * A umtx_pi can exist if a signal or timeout removed the 2061 * last waiter from the umtxq, but there is still 2062 * a thread in do_lock_pi() holding the umtx_pi. 2063 */ 2064 if (pi != NULL) { 2065 /* 2066 * The umtx_pi can be unowned, such as when a thread 2067 * has just entered do_lock_pi(), allocated the 2068 * umtx_pi, and unlocked the umtxq. 2069 * If the current thread owns it, it must disown it. 2070 */ 2071 mtx_lock(&umtx_lock); 2072 if (pi->pi_owner == td) 2073 umtx_pi_disown(pi); 2074 mtx_unlock(&umtx_lock); 2075 } 2076 } 2077 umtxq_unlock(&key); 2078 2079 /* 2080 * When unlocking the umtx, it must be marked as unowned if 2081 * there is zero or one thread only waiting for it. 2082 * Otherwise, it must be marked as contested. 2083 */ 2084 2085 if (count > 1) 2086 new_owner |= UMUTEX_CONTESTED; 2087 error = casueword32(&m->m_owner, owner, &old, new_owner); 2088 2089 umtxq_unbusy_unlocked(&key); 2090 umtx_key_release(&key); 2091 if (error == -1) 2092 return (EFAULT); 2093 if (old != owner) 2094 return (EINVAL); 2095 return (0); 2096 } 2097 2098 /* 2099 * Lock a PP mutex. 2100 */ 2101 static int 2102 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2103 struct _umtx_time *timeout, int try) 2104 { 2105 struct abs_timeout timo; 2106 struct umtx_q *uq, *uq2; 2107 struct umtx_pi *pi; 2108 uint32_t ceiling; 2109 uint32_t owner, id; 2110 int error, pri, old_inherited_pri, su, rv; 2111 2112 id = td->td_tid; 2113 uq = td->td_umtxq; 2114 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2115 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2116 &uq->uq_key)) != 0) 2117 return (error); 2118 2119 if (timeout != NULL) 2120 abs_timeout_init2(&timo, timeout); 2121 2122 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2123 for (;;) { 2124 old_inherited_pri = uq->uq_inherited_pri; 2125 umtxq_lock(&uq->uq_key); 2126 umtxq_busy(&uq->uq_key); 2127 umtxq_unlock(&uq->uq_key); 2128 2129 rv = fueword32(&m->m_ceilings[0], &ceiling); 2130 if (rv == -1) { 2131 error = EFAULT; 2132 goto out; 2133 } 2134 ceiling = RTP_PRIO_MAX - ceiling; 2135 if (ceiling > RTP_PRIO_MAX) { 2136 error = EINVAL; 2137 goto out; 2138 } 2139 2140 mtx_lock(&umtx_lock); 2141 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2142 mtx_unlock(&umtx_lock); 2143 error = EINVAL; 2144 goto out; 2145 } 2146 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2147 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2148 thread_lock(td); 2149 if (uq->uq_inherited_pri < UPRI(td)) 2150 sched_lend_user_prio(td, uq->uq_inherited_pri); 2151 thread_unlock(td); 2152 } 2153 mtx_unlock(&umtx_lock); 2154 2155 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2156 id | UMUTEX_CONTESTED); 2157 /* The address was invalid. */ 2158 if (rv == -1) { 2159 error = EFAULT; 2160 break; 2161 } 2162 2163 if (owner == UMUTEX_CONTESTED) { 2164 error = 0; 2165 break; 2166 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2167 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2168 &owner, id | UMUTEX_CONTESTED); 2169 if (rv == -1) { 2170 error = EFAULT; 2171 break; 2172 } 2173 if (owner == UMUTEX_RB_OWNERDEAD) { 2174 error = EOWNERDEAD; /* success */ 2175 break; 2176 } 2177 error = 0; 2178 } else if (owner == UMUTEX_RB_NOTRECOV) { 2179 error = ENOTRECOVERABLE; 2180 break; 2181 } 2182 2183 if (try != 0) { 2184 error = EBUSY; 2185 break; 2186 } 2187 2188 /* 2189 * If we caught a signal, we have retried and now 2190 * exit immediately. 2191 */ 2192 if (error != 0) 2193 break; 2194 2195 umtxq_lock(&uq->uq_key); 2196 umtxq_insert(uq); 2197 umtxq_unbusy(&uq->uq_key); 2198 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2199 NULL : &timo); 2200 umtxq_remove(uq); 2201 umtxq_unlock(&uq->uq_key); 2202 2203 mtx_lock(&umtx_lock); 2204 uq->uq_inherited_pri = old_inherited_pri; 2205 pri = PRI_MAX; 2206 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2207 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2208 if (uq2 != NULL) { 2209 if (pri > UPRI(uq2->uq_thread)) 2210 pri = UPRI(uq2->uq_thread); 2211 } 2212 } 2213 if (pri > uq->uq_inherited_pri) 2214 pri = uq->uq_inherited_pri; 2215 thread_lock(td); 2216 sched_lend_user_prio(td, pri); 2217 thread_unlock(td); 2218 mtx_unlock(&umtx_lock); 2219 } 2220 2221 if (error != 0 && error != EOWNERDEAD) { 2222 mtx_lock(&umtx_lock); 2223 uq->uq_inherited_pri = old_inherited_pri; 2224 pri = PRI_MAX; 2225 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2226 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2227 if (uq2 != NULL) { 2228 if (pri > UPRI(uq2->uq_thread)) 2229 pri = UPRI(uq2->uq_thread); 2230 } 2231 } 2232 if (pri > uq->uq_inherited_pri) 2233 pri = uq->uq_inherited_pri; 2234 thread_lock(td); 2235 sched_lend_user_prio(td, pri); 2236 thread_unlock(td); 2237 mtx_unlock(&umtx_lock); 2238 } 2239 2240 out: 2241 umtxq_unbusy_unlocked(&uq->uq_key); 2242 umtx_key_release(&uq->uq_key); 2243 return (error); 2244 } 2245 2246 /* 2247 * Unlock a PP mutex. 2248 */ 2249 static int 2250 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2251 { 2252 struct umtx_key key; 2253 struct umtx_q *uq, *uq2; 2254 struct umtx_pi *pi; 2255 uint32_t id, owner, rceiling; 2256 int error, pri, new_inherited_pri, su; 2257 2258 id = td->td_tid; 2259 uq = td->td_umtxq; 2260 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2261 2262 /* 2263 * Make sure we own this mtx. 2264 */ 2265 error = fueword32(&m->m_owner, &owner); 2266 if (error == -1) 2267 return (EFAULT); 2268 2269 if ((owner & ~UMUTEX_CONTESTED) != id) 2270 return (EPERM); 2271 2272 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2273 if (error != 0) 2274 return (error); 2275 2276 if (rceiling == -1) 2277 new_inherited_pri = PRI_MAX; 2278 else { 2279 rceiling = RTP_PRIO_MAX - rceiling; 2280 if (rceiling > RTP_PRIO_MAX) 2281 return (EINVAL); 2282 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2283 } 2284 2285 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2286 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2287 &key)) != 0) 2288 return (error); 2289 umtxq_lock(&key); 2290 umtxq_busy(&key); 2291 umtxq_unlock(&key); 2292 /* 2293 * For priority protected mutex, always set unlocked state 2294 * to UMUTEX_CONTESTED, so that userland always enters kernel 2295 * to lock the mutex, it is necessary because thread priority 2296 * has to be adjusted for such mutex. 2297 */ 2298 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2299 UMUTEX_CONTESTED); 2300 2301 umtxq_lock(&key); 2302 if (error == 0) 2303 umtxq_signal(&key, 1); 2304 umtxq_unbusy(&key); 2305 umtxq_unlock(&key); 2306 2307 if (error == -1) 2308 error = EFAULT; 2309 else { 2310 mtx_lock(&umtx_lock); 2311 if (su != 0) 2312 uq->uq_inherited_pri = new_inherited_pri; 2313 pri = PRI_MAX; 2314 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2315 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2316 if (uq2 != NULL) { 2317 if (pri > UPRI(uq2->uq_thread)) 2318 pri = UPRI(uq2->uq_thread); 2319 } 2320 } 2321 if (pri > uq->uq_inherited_pri) 2322 pri = uq->uq_inherited_pri; 2323 thread_lock(td); 2324 sched_lend_user_prio(td, pri); 2325 thread_unlock(td); 2326 mtx_unlock(&umtx_lock); 2327 } 2328 umtx_key_release(&key); 2329 return (error); 2330 } 2331 2332 static int 2333 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2334 uint32_t *old_ceiling) 2335 { 2336 struct umtx_q *uq; 2337 uint32_t flags, id, owner, save_ceiling; 2338 int error, rv, rv1; 2339 2340 error = fueword32(&m->m_flags, &flags); 2341 if (error == -1) 2342 return (EFAULT); 2343 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2344 return (EINVAL); 2345 if (ceiling > RTP_PRIO_MAX) 2346 return (EINVAL); 2347 id = td->td_tid; 2348 uq = td->td_umtxq; 2349 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2350 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2351 &uq->uq_key)) != 0) 2352 return (error); 2353 for (;;) { 2354 umtxq_lock(&uq->uq_key); 2355 umtxq_busy(&uq->uq_key); 2356 umtxq_unlock(&uq->uq_key); 2357 2358 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2359 if (rv == -1) { 2360 error = EFAULT; 2361 break; 2362 } 2363 2364 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2365 id | UMUTEX_CONTESTED); 2366 if (rv == -1) { 2367 error = EFAULT; 2368 break; 2369 } 2370 2371 if (owner == UMUTEX_CONTESTED) { 2372 rv = suword32(&m->m_ceilings[0], ceiling); 2373 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2374 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2375 break; 2376 } 2377 2378 if ((owner & ~UMUTEX_CONTESTED) == id) { 2379 rv = suword32(&m->m_ceilings[0], ceiling); 2380 error = rv == 0 ? 0 : EFAULT; 2381 break; 2382 } 2383 2384 if (owner == UMUTEX_RB_OWNERDEAD) { 2385 error = EOWNERDEAD; 2386 break; 2387 } else if (owner == UMUTEX_RB_NOTRECOV) { 2388 error = ENOTRECOVERABLE; 2389 break; 2390 } 2391 2392 /* 2393 * If we caught a signal, we have retried and now 2394 * exit immediately. 2395 */ 2396 if (error != 0) 2397 break; 2398 2399 /* 2400 * We set the contested bit, sleep. Otherwise the lock changed 2401 * and we need to retry or we lost a race to the thread 2402 * unlocking the umtx. 2403 */ 2404 umtxq_lock(&uq->uq_key); 2405 umtxq_insert(uq); 2406 umtxq_unbusy(&uq->uq_key); 2407 error = umtxq_sleep(uq, "umtxpp", NULL); 2408 umtxq_remove(uq); 2409 umtxq_unlock(&uq->uq_key); 2410 } 2411 umtxq_lock(&uq->uq_key); 2412 if (error == 0) 2413 umtxq_signal(&uq->uq_key, INT_MAX); 2414 umtxq_unbusy(&uq->uq_key); 2415 umtxq_unlock(&uq->uq_key); 2416 umtx_key_release(&uq->uq_key); 2417 if (error == 0 && old_ceiling != NULL) { 2418 rv = suword32(old_ceiling, save_ceiling); 2419 error = rv == 0 ? 0 : EFAULT; 2420 } 2421 return (error); 2422 } 2423 2424 /* 2425 * Lock a userland POSIX mutex. 2426 */ 2427 static int 2428 do_lock_umutex(struct thread *td, struct umutex *m, 2429 struct _umtx_time *timeout, int mode) 2430 { 2431 uint32_t flags; 2432 int error; 2433 2434 error = fueword32(&m->m_flags, &flags); 2435 if (error == -1) 2436 return (EFAULT); 2437 2438 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2439 case 0: 2440 error = do_lock_normal(td, m, flags, timeout, mode); 2441 break; 2442 case UMUTEX_PRIO_INHERIT: 2443 error = do_lock_pi(td, m, flags, timeout, mode); 2444 break; 2445 case UMUTEX_PRIO_PROTECT: 2446 error = do_lock_pp(td, m, flags, timeout, mode); 2447 break; 2448 default: 2449 return (EINVAL); 2450 } 2451 if (timeout == NULL) { 2452 if (error == EINTR && mode != _UMUTEX_WAIT) 2453 error = ERESTART; 2454 } else { 2455 /* Timed-locking is not restarted. */ 2456 if (error == ERESTART) 2457 error = EINTR; 2458 } 2459 return (error); 2460 } 2461 2462 /* 2463 * Unlock a userland POSIX mutex. 2464 */ 2465 static int 2466 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2467 { 2468 uint32_t flags; 2469 int error; 2470 2471 error = fueword32(&m->m_flags, &flags); 2472 if (error == -1) 2473 return (EFAULT); 2474 2475 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2476 case 0: 2477 return (do_unlock_normal(td, m, flags, rb)); 2478 case UMUTEX_PRIO_INHERIT: 2479 return (do_unlock_pi(td, m, flags, rb)); 2480 case UMUTEX_PRIO_PROTECT: 2481 return (do_unlock_pp(td, m, flags, rb)); 2482 } 2483 2484 return (EINVAL); 2485 } 2486 2487 static int 2488 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2489 struct timespec *timeout, u_long wflags) 2490 { 2491 struct abs_timeout timo; 2492 struct umtx_q *uq; 2493 uint32_t flags, clockid, hasw; 2494 int error; 2495 2496 uq = td->td_umtxq; 2497 error = fueword32(&cv->c_flags, &flags); 2498 if (error == -1) 2499 return (EFAULT); 2500 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2501 if (error != 0) 2502 return (error); 2503 2504 if ((wflags & CVWAIT_CLOCKID) != 0) { 2505 error = fueword32(&cv->c_clockid, &clockid); 2506 if (error == -1) { 2507 umtx_key_release(&uq->uq_key); 2508 return (EFAULT); 2509 } 2510 if (clockid < CLOCK_REALTIME || 2511 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2512 /* hmm, only HW clock id will work. */ 2513 umtx_key_release(&uq->uq_key); 2514 return (EINVAL); 2515 } 2516 } else { 2517 clockid = CLOCK_REALTIME; 2518 } 2519 2520 umtxq_lock(&uq->uq_key); 2521 umtxq_busy(&uq->uq_key); 2522 umtxq_insert(uq); 2523 umtxq_unlock(&uq->uq_key); 2524 2525 /* 2526 * Set c_has_waiters to 1 before releasing user mutex, also 2527 * don't modify cache line when unnecessary. 2528 */ 2529 error = fueword32(&cv->c_has_waiters, &hasw); 2530 if (error == 0 && hasw == 0) 2531 suword32(&cv->c_has_waiters, 1); 2532 2533 umtxq_unbusy_unlocked(&uq->uq_key); 2534 2535 error = do_unlock_umutex(td, m, false); 2536 2537 if (timeout != NULL) 2538 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2539 timeout); 2540 2541 umtxq_lock(&uq->uq_key); 2542 if (error == 0) { 2543 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2544 NULL : &timo); 2545 } 2546 2547 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2548 error = 0; 2549 else { 2550 /* 2551 * This must be timeout,interrupted by signal or 2552 * surprious wakeup, clear c_has_waiter flag when 2553 * necessary. 2554 */ 2555 umtxq_busy(&uq->uq_key); 2556 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2557 int oldlen = uq->uq_cur_queue->length; 2558 umtxq_remove(uq); 2559 if (oldlen == 1) { 2560 umtxq_unlock(&uq->uq_key); 2561 suword32(&cv->c_has_waiters, 0); 2562 umtxq_lock(&uq->uq_key); 2563 } 2564 } 2565 umtxq_unbusy(&uq->uq_key); 2566 if (error == ERESTART) 2567 error = EINTR; 2568 } 2569 2570 umtxq_unlock(&uq->uq_key); 2571 umtx_key_release(&uq->uq_key); 2572 return (error); 2573 } 2574 2575 /* 2576 * Signal a userland condition variable. 2577 */ 2578 static int 2579 do_cv_signal(struct thread *td, struct ucond *cv) 2580 { 2581 struct umtx_key key; 2582 int error, cnt, nwake; 2583 uint32_t flags; 2584 2585 error = fueword32(&cv->c_flags, &flags); 2586 if (error == -1) 2587 return (EFAULT); 2588 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2589 return (error); 2590 umtxq_lock(&key); 2591 umtxq_busy(&key); 2592 cnt = umtxq_count(&key); 2593 nwake = umtxq_signal(&key, 1); 2594 if (cnt <= nwake) { 2595 umtxq_unlock(&key); 2596 error = suword32(&cv->c_has_waiters, 0); 2597 if (error == -1) 2598 error = EFAULT; 2599 umtxq_lock(&key); 2600 } 2601 umtxq_unbusy(&key); 2602 umtxq_unlock(&key); 2603 umtx_key_release(&key); 2604 return (error); 2605 } 2606 2607 static int 2608 do_cv_broadcast(struct thread *td, struct ucond *cv) 2609 { 2610 struct umtx_key key; 2611 int error; 2612 uint32_t flags; 2613 2614 error = fueword32(&cv->c_flags, &flags); 2615 if (error == -1) 2616 return (EFAULT); 2617 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2618 return (error); 2619 2620 umtxq_lock(&key); 2621 umtxq_busy(&key); 2622 umtxq_signal(&key, INT_MAX); 2623 umtxq_unlock(&key); 2624 2625 error = suword32(&cv->c_has_waiters, 0); 2626 if (error == -1) 2627 error = EFAULT; 2628 2629 umtxq_unbusy_unlocked(&key); 2630 2631 umtx_key_release(&key); 2632 return (error); 2633 } 2634 2635 static int 2636 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2637 { 2638 struct abs_timeout timo; 2639 struct umtx_q *uq; 2640 uint32_t flags, wrflags; 2641 int32_t state, oldstate; 2642 int32_t blocked_readers; 2643 int error, error1, rv; 2644 2645 uq = td->td_umtxq; 2646 error = fueword32(&rwlock->rw_flags, &flags); 2647 if (error == -1) 2648 return (EFAULT); 2649 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2650 if (error != 0) 2651 return (error); 2652 2653 if (timeout != NULL) 2654 abs_timeout_init2(&timo, timeout); 2655 2656 wrflags = URWLOCK_WRITE_OWNER; 2657 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2658 wrflags |= URWLOCK_WRITE_WAITERS; 2659 2660 for (;;) { 2661 rv = fueword32(&rwlock->rw_state, &state); 2662 if (rv == -1) { 2663 umtx_key_release(&uq->uq_key); 2664 return (EFAULT); 2665 } 2666 2667 /* try to lock it */ 2668 while (!(state & wrflags)) { 2669 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2670 umtx_key_release(&uq->uq_key); 2671 return (EAGAIN); 2672 } 2673 rv = casueword32(&rwlock->rw_state, state, 2674 &oldstate, state + 1); 2675 if (rv == -1) { 2676 umtx_key_release(&uq->uq_key); 2677 return (EFAULT); 2678 } 2679 if (oldstate == state) { 2680 umtx_key_release(&uq->uq_key); 2681 return (0); 2682 } 2683 error = umtxq_check_susp(td); 2684 if (error != 0) 2685 break; 2686 state = oldstate; 2687 } 2688 2689 if (error) 2690 break; 2691 2692 /* grab monitor lock */ 2693 umtxq_lock(&uq->uq_key); 2694 umtxq_busy(&uq->uq_key); 2695 umtxq_unlock(&uq->uq_key); 2696 2697 /* 2698 * re-read the state, in case it changed between the try-lock above 2699 * and the check below 2700 */ 2701 rv = fueword32(&rwlock->rw_state, &state); 2702 if (rv == -1) 2703 error = EFAULT; 2704 2705 /* set read contention bit */ 2706 while (error == 0 && (state & wrflags) && 2707 !(state & URWLOCK_READ_WAITERS)) { 2708 rv = casueword32(&rwlock->rw_state, state, 2709 &oldstate, state | URWLOCK_READ_WAITERS); 2710 if (rv == -1) { 2711 error = EFAULT; 2712 break; 2713 } 2714 if (oldstate == state) 2715 goto sleep; 2716 state = oldstate; 2717 error = umtxq_check_susp(td); 2718 if (error != 0) 2719 break; 2720 } 2721 if (error != 0) { 2722 umtxq_unbusy_unlocked(&uq->uq_key); 2723 break; 2724 } 2725 2726 /* state is changed while setting flags, restart */ 2727 if (!(state & wrflags)) { 2728 umtxq_unbusy_unlocked(&uq->uq_key); 2729 error = umtxq_check_susp(td); 2730 if (error != 0) 2731 break; 2732 continue; 2733 } 2734 2735 sleep: 2736 /* contention bit is set, before sleeping, increase read waiter count */ 2737 rv = fueword32(&rwlock->rw_blocked_readers, 2738 &blocked_readers); 2739 if (rv == -1) { 2740 umtxq_unbusy_unlocked(&uq->uq_key); 2741 error = EFAULT; 2742 break; 2743 } 2744 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2745 2746 while (state & wrflags) { 2747 umtxq_lock(&uq->uq_key); 2748 umtxq_insert(uq); 2749 umtxq_unbusy(&uq->uq_key); 2750 2751 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2752 NULL : &timo); 2753 2754 umtxq_busy(&uq->uq_key); 2755 umtxq_remove(uq); 2756 umtxq_unlock(&uq->uq_key); 2757 if (error) 2758 break; 2759 rv = fueword32(&rwlock->rw_state, &state); 2760 if (rv == -1) { 2761 error = EFAULT; 2762 break; 2763 } 2764 } 2765 2766 /* decrease read waiter count, and may clear read contention bit */ 2767 rv = fueword32(&rwlock->rw_blocked_readers, 2768 &blocked_readers); 2769 if (rv == -1) { 2770 umtxq_unbusy_unlocked(&uq->uq_key); 2771 error = EFAULT; 2772 break; 2773 } 2774 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2775 if (blocked_readers == 1) { 2776 rv = fueword32(&rwlock->rw_state, &state); 2777 if (rv == -1) { 2778 umtxq_unbusy_unlocked(&uq->uq_key); 2779 error = EFAULT; 2780 break; 2781 } 2782 for (;;) { 2783 rv = casueword32(&rwlock->rw_state, state, 2784 &oldstate, state & ~URWLOCK_READ_WAITERS); 2785 if (rv == -1) { 2786 error = EFAULT; 2787 break; 2788 } 2789 if (oldstate == state) 2790 break; 2791 state = oldstate; 2792 error1 = umtxq_check_susp(td); 2793 if (error1 != 0) { 2794 if (error == 0) 2795 error = error1; 2796 break; 2797 } 2798 } 2799 } 2800 2801 umtxq_unbusy_unlocked(&uq->uq_key); 2802 if (error != 0) 2803 break; 2804 } 2805 umtx_key_release(&uq->uq_key); 2806 if (error == ERESTART) 2807 error = EINTR; 2808 return (error); 2809 } 2810 2811 static int 2812 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2813 { 2814 struct abs_timeout timo; 2815 struct umtx_q *uq; 2816 uint32_t flags; 2817 int32_t state, oldstate; 2818 int32_t blocked_writers; 2819 int32_t blocked_readers; 2820 int error, error1, rv; 2821 2822 uq = td->td_umtxq; 2823 error = fueword32(&rwlock->rw_flags, &flags); 2824 if (error == -1) 2825 return (EFAULT); 2826 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2827 if (error != 0) 2828 return (error); 2829 2830 if (timeout != NULL) 2831 abs_timeout_init2(&timo, timeout); 2832 2833 blocked_readers = 0; 2834 for (;;) { 2835 rv = fueword32(&rwlock->rw_state, &state); 2836 if (rv == -1) { 2837 umtx_key_release(&uq->uq_key); 2838 return (EFAULT); 2839 } 2840 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2841 rv = casueword32(&rwlock->rw_state, state, 2842 &oldstate, state | URWLOCK_WRITE_OWNER); 2843 if (rv == -1) { 2844 umtx_key_release(&uq->uq_key); 2845 return (EFAULT); 2846 } 2847 if (oldstate == state) { 2848 umtx_key_release(&uq->uq_key); 2849 return (0); 2850 } 2851 state = oldstate; 2852 error = umtxq_check_susp(td); 2853 if (error != 0) 2854 break; 2855 } 2856 2857 if (error) { 2858 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2859 blocked_readers != 0) { 2860 umtxq_lock(&uq->uq_key); 2861 umtxq_busy(&uq->uq_key); 2862 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2863 umtxq_unbusy(&uq->uq_key); 2864 umtxq_unlock(&uq->uq_key); 2865 } 2866 2867 break; 2868 } 2869 2870 /* grab monitor lock */ 2871 umtxq_lock(&uq->uq_key); 2872 umtxq_busy(&uq->uq_key); 2873 umtxq_unlock(&uq->uq_key); 2874 2875 /* 2876 * re-read the state, in case it changed between the try-lock above 2877 * and the check below 2878 */ 2879 rv = fueword32(&rwlock->rw_state, &state); 2880 if (rv == -1) 2881 error = EFAULT; 2882 2883 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2884 URWLOCK_READER_COUNT(state) != 0) && 2885 (state & URWLOCK_WRITE_WAITERS) == 0) { 2886 rv = casueword32(&rwlock->rw_state, state, 2887 &oldstate, state | URWLOCK_WRITE_WAITERS); 2888 if (rv == -1) { 2889 error = EFAULT; 2890 break; 2891 } 2892 if (oldstate == state) 2893 goto sleep; 2894 state = oldstate; 2895 error = umtxq_check_susp(td); 2896 if (error != 0) 2897 break; 2898 } 2899 if (error != 0) { 2900 umtxq_unbusy_unlocked(&uq->uq_key); 2901 break; 2902 } 2903 2904 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2905 umtxq_unbusy_unlocked(&uq->uq_key); 2906 error = umtxq_check_susp(td); 2907 if (error != 0) 2908 break; 2909 continue; 2910 } 2911 sleep: 2912 rv = fueword32(&rwlock->rw_blocked_writers, 2913 &blocked_writers); 2914 if (rv == -1) { 2915 umtxq_unbusy_unlocked(&uq->uq_key); 2916 error = EFAULT; 2917 break; 2918 } 2919 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2920 2921 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2922 umtxq_lock(&uq->uq_key); 2923 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2924 umtxq_unbusy(&uq->uq_key); 2925 2926 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2927 NULL : &timo); 2928 2929 umtxq_busy(&uq->uq_key); 2930 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2931 umtxq_unlock(&uq->uq_key); 2932 if (error) 2933 break; 2934 rv = fueword32(&rwlock->rw_state, &state); 2935 if (rv == -1) { 2936 error = EFAULT; 2937 break; 2938 } 2939 } 2940 2941 rv = fueword32(&rwlock->rw_blocked_writers, 2942 &blocked_writers); 2943 if (rv == -1) { 2944 umtxq_unbusy_unlocked(&uq->uq_key); 2945 error = EFAULT; 2946 break; 2947 } 2948 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2949 if (blocked_writers == 1) { 2950 rv = fueword32(&rwlock->rw_state, &state); 2951 if (rv == -1) { 2952 umtxq_unbusy_unlocked(&uq->uq_key); 2953 error = EFAULT; 2954 break; 2955 } 2956 for (;;) { 2957 rv = casueword32(&rwlock->rw_state, state, 2958 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2959 if (rv == -1) { 2960 error = EFAULT; 2961 break; 2962 } 2963 if (oldstate == state) 2964 break; 2965 state = oldstate; 2966 error1 = umtxq_check_susp(td); 2967 /* 2968 * We are leaving the URWLOCK_WRITE_WAITERS 2969 * behind, but this should not harm the 2970 * correctness. 2971 */ 2972 if (error1 != 0) { 2973 if (error == 0) 2974 error = error1; 2975 break; 2976 } 2977 } 2978 rv = fueword32(&rwlock->rw_blocked_readers, 2979 &blocked_readers); 2980 if (rv == -1) { 2981 umtxq_unbusy_unlocked(&uq->uq_key); 2982 error = EFAULT; 2983 break; 2984 } 2985 } else 2986 blocked_readers = 0; 2987 2988 umtxq_unbusy_unlocked(&uq->uq_key); 2989 } 2990 2991 umtx_key_release(&uq->uq_key); 2992 if (error == ERESTART) 2993 error = EINTR; 2994 return (error); 2995 } 2996 2997 static int 2998 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2999 { 3000 struct umtx_q *uq; 3001 uint32_t flags; 3002 int32_t state, oldstate; 3003 int error, rv, q, count; 3004 3005 uq = td->td_umtxq; 3006 error = fueword32(&rwlock->rw_flags, &flags); 3007 if (error == -1) 3008 return (EFAULT); 3009 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 3010 if (error != 0) 3011 return (error); 3012 3013 error = fueword32(&rwlock->rw_state, &state); 3014 if (error == -1) { 3015 error = EFAULT; 3016 goto out; 3017 } 3018 if (state & URWLOCK_WRITE_OWNER) { 3019 for (;;) { 3020 rv = casueword32(&rwlock->rw_state, state, 3021 &oldstate, state & ~URWLOCK_WRITE_OWNER); 3022 if (rv == -1) { 3023 error = EFAULT; 3024 goto out; 3025 } 3026 if (oldstate != state) { 3027 state = oldstate; 3028 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3029 error = EPERM; 3030 goto out; 3031 } 3032 error = umtxq_check_susp(td); 3033 if (error != 0) 3034 goto out; 3035 } else 3036 break; 3037 } 3038 } else if (URWLOCK_READER_COUNT(state) != 0) { 3039 for (;;) { 3040 rv = casueword32(&rwlock->rw_state, state, 3041 &oldstate, state - 1); 3042 if (rv == -1) { 3043 error = EFAULT; 3044 goto out; 3045 } 3046 if (oldstate != state) { 3047 state = oldstate; 3048 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3049 error = EPERM; 3050 goto out; 3051 } 3052 error = umtxq_check_susp(td); 3053 if (error != 0) 3054 goto out; 3055 } else 3056 break; 3057 } 3058 } else { 3059 error = EPERM; 3060 goto out; 3061 } 3062 3063 count = 0; 3064 3065 if (!(flags & URWLOCK_PREFER_READER)) { 3066 if (state & URWLOCK_WRITE_WAITERS) { 3067 count = 1; 3068 q = UMTX_EXCLUSIVE_QUEUE; 3069 } else if (state & URWLOCK_READ_WAITERS) { 3070 count = INT_MAX; 3071 q = UMTX_SHARED_QUEUE; 3072 } 3073 } else { 3074 if (state & URWLOCK_READ_WAITERS) { 3075 count = INT_MAX; 3076 q = UMTX_SHARED_QUEUE; 3077 } else if (state & URWLOCK_WRITE_WAITERS) { 3078 count = 1; 3079 q = UMTX_EXCLUSIVE_QUEUE; 3080 } 3081 } 3082 3083 if (count) { 3084 umtxq_lock(&uq->uq_key); 3085 umtxq_busy(&uq->uq_key); 3086 umtxq_signal_queue(&uq->uq_key, count, q); 3087 umtxq_unbusy(&uq->uq_key); 3088 umtxq_unlock(&uq->uq_key); 3089 } 3090 out: 3091 umtx_key_release(&uq->uq_key); 3092 return (error); 3093 } 3094 3095 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3096 static int 3097 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3098 { 3099 struct abs_timeout timo; 3100 struct umtx_q *uq; 3101 uint32_t flags, count, count1; 3102 int error, rv; 3103 3104 uq = td->td_umtxq; 3105 error = fueword32(&sem->_flags, &flags); 3106 if (error == -1) 3107 return (EFAULT); 3108 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3109 if (error != 0) 3110 return (error); 3111 3112 if (timeout != NULL) 3113 abs_timeout_init2(&timo, timeout); 3114 3115 umtxq_lock(&uq->uq_key); 3116 umtxq_busy(&uq->uq_key); 3117 umtxq_insert(uq); 3118 umtxq_unlock(&uq->uq_key); 3119 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3120 if (rv == 0) 3121 rv = fueword32(&sem->_count, &count); 3122 if (rv == -1 || count != 0) { 3123 umtxq_lock(&uq->uq_key); 3124 umtxq_unbusy(&uq->uq_key); 3125 umtxq_remove(uq); 3126 umtxq_unlock(&uq->uq_key); 3127 umtx_key_release(&uq->uq_key); 3128 return (rv == -1 ? EFAULT : 0); 3129 } 3130 umtxq_lock(&uq->uq_key); 3131 umtxq_unbusy(&uq->uq_key); 3132 3133 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3134 3135 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3136 error = 0; 3137 else { 3138 umtxq_remove(uq); 3139 /* A relative timeout cannot be restarted. */ 3140 if (error == ERESTART && timeout != NULL && 3141 (timeout->_flags & UMTX_ABSTIME) == 0) 3142 error = EINTR; 3143 } 3144 umtxq_unlock(&uq->uq_key); 3145 umtx_key_release(&uq->uq_key); 3146 return (error); 3147 } 3148 3149 /* 3150 * Signal a userland semaphore. 3151 */ 3152 static int 3153 do_sem_wake(struct thread *td, struct _usem *sem) 3154 { 3155 struct umtx_key key; 3156 int error, cnt; 3157 uint32_t flags; 3158 3159 error = fueword32(&sem->_flags, &flags); 3160 if (error == -1) 3161 return (EFAULT); 3162 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3163 return (error); 3164 umtxq_lock(&key); 3165 umtxq_busy(&key); 3166 cnt = umtxq_count(&key); 3167 if (cnt > 0) { 3168 /* 3169 * Check if count is greater than 0, this means the memory is 3170 * still being referenced by user code, so we can safely 3171 * update _has_waiters flag. 3172 */ 3173 if (cnt == 1) { 3174 umtxq_unlock(&key); 3175 error = suword32(&sem->_has_waiters, 0); 3176 umtxq_lock(&key); 3177 if (error == -1) 3178 error = EFAULT; 3179 } 3180 umtxq_signal(&key, 1); 3181 } 3182 umtxq_unbusy(&key); 3183 umtxq_unlock(&key); 3184 umtx_key_release(&key); 3185 return (error); 3186 } 3187 #endif 3188 3189 static int 3190 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3191 { 3192 struct abs_timeout timo; 3193 struct umtx_q *uq; 3194 uint32_t count, flags; 3195 int error, rv; 3196 3197 uq = td->td_umtxq; 3198 flags = fuword32(&sem->_flags); 3199 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3200 if (error != 0) 3201 return (error); 3202 3203 if (timeout != NULL) 3204 abs_timeout_init2(&timo, timeout); 3205 3206 umtxq_lock(&uq->uq_key); 3207 umtxq_busy(&uq->uq_key); 3208 umtxq_insert(uq); 3209 umtxq_unlock(&uq->uq_key); 3210 rv = fueword32(&sem->_count, &count); 3211 if (rv == -1) { 3212 umtxq_lock(&uq->uq_key); 3213 umtxq_unbusy(&uq->uq_key); 3214 umtxq_remove(uq); 3215 umtxq_unlock(&uq->uq_key); 3216 umtx_key_release(&uq->uq_key); 3217 return (EFAULT); 3218 } 3219 for (;;) { 3220 if (USEM_COUNT(count) != 0) { 3221 umtxq_lock(&uq->uq_key); 3222 umtxq_unbusy(&uq->uq_key); 3223 umtxq_remove(uq); 3224 umtxq_unlock(&uq->uq_key); 3225 umtx_key_release(&uq->uq_key); 3226 return (0); 3227 } 3228 if (count == USEM_HAS_WAITERS) 3229 break; 3230 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3231 if (rv == -1) { 3232 umtxq_lock(&uq->uq_key); 3233 umtxq_unbusy(&uq->uq_key); 3234 umtxq_remove(uq); 3235 umtxq_unlock(&uq->uq_key); 3236 umtx_key_release(&uq->uq_key); 3237 return (EFAULT); 3238 } 3239 if (count == 0) 3240 break; 3241 } 3242 umtxq_lock(&uq->uq_key); 3243 umtxq_unbusy(&uq->uq_key); 3244 3245 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3246 3247 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3248 error = 0; 3249 else { 3250 umtxq_remove(uq); 3251 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3252 /* A relative timeout cannot be restarted. */ 3253 if (error == ERESTART) 3254 error = EINTR; 3255 if (error == EINTR) { 3256 abs_timeout_update(&timo); 3257 timeout->_timeout = timo.end; 3258 timespecsub(&timeout->_timeout, &timo.cur); 3259 } 3260 } 3261 } 3262 umtxq_unlock(&uq->uq_key); 3263 umtx_key_release(&uq->uq_key); 3264 return (error); 3265 } 3266 3267 /* 3268 * Signal a userland semaphore. 3269 */ 3270 static int 3271 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3272 { 3273 struct umtx_key key; 3274 int error, cnt, rv; 3275 uint32_t count, flags; 3276 3277 rv = fueword32(&sem->_flags, &flags); 3278 if (rv == -1) 3279 return (EFAULT); 3280 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3281 return (error); 3282 umtxq_lock(&key); 3283 umtxq_busy(&key); 3284 cnt = umtxq_count(&key); 3285 if (cnt > 0) { 3286 /* 3287 * If this was the last sleeping thread, clear the waiters 3288 * flag in _count. 3289 */ 3290 if (cnt == 1) { 3291 umtxq_unlock(&key); 3292 rv = fueword32(&sem->_count, &count); 3293 while (rv != -1 && count & USEM_HAS_WAITERS) 3294 rv = casueword32(&sem->_count, count, &count, 3295 count & ~USEM_HAS_WAITERS); 3296 if (rv == -1) 3297 error = EFAULT; 3298 umtxq_lock(&key); 3299 } 3300 3301 umtxq_signal(&key, 1); 3302 } 3303 umtxq_unbusy(&key); 3304 umtxq_unlock(&key); 3305 umtx_key_release(&key); 3306 return (error); 3307 } 3308 3309 inline int 3310 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3311 { 3312 int error; 3313 3314 error = copyin(addr, tsp, sizeof(struct timespec)); 3315 if (error == 0) { 3316 if (tsp->tv_sec < 0 || 3317 tsp->tv_nsec >= 1000000000 || 3318 tsp->tv_nsec < 0) 3319 error = EINVAL; 3320 } 3321 return (error); 3322 } 3323 3324 static inline int 3325 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3326 { 3327 int error; 3328 3329 if (size <= sizeof(struct timespec)) { 3330 tp->_clockid = CLOCK_REALTIME; 3331 tp->_flags = 0; 3332 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3333 } else 3334 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3335 if (error != 0) 3336 return (error); 3337 if (tp->_timeout.tv_sec < 0 || 3338 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3339 return (EINVAL); 3340 return (0); 3341 } 3342 3343 static int 3344 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3345 { 3346 3347 return (EOPNOTSUPP); 3348 } 3349 3350 static int 3351 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3352 { 3353 struct _umtx_time timeout, *tm_p; 3354 int error; 3355 3356 if (uap->uaddr2 == NULL) 3357 tm_p = NULL; 3358 else { 3359 error = umtx_copyin_umtx_time( 3360 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3361 if (error != 0) 3362 return (error); 3363 tm_p = &timeout; 3364 } 3365 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3366 } 3367 3368 static int 3369 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3370 { 3371 struct _umtx_time timeout, *tm_p; 3372 int error; 3373 3374 if (uap->uaddr2 == NULL) 3375 tm_p = NULL; 3376 else { 3377 error = umtx_copyin_umtx_time( 3378 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3379 if (error != 0) 3380 return (error); 3381 tm_p = &timeout; 3382 } 3383 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3384 } 3385 3386 static int 3387 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3388 { 3389 struct _umtx_time *tm_p, timeout; 3390 int error; 3391 3392 if (uap->uaddr2 == NULL) 3393 tm_p = NULL; 3394 else { 3395 error = umtx_copyin_umtx_time( 3396 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3397 if (error != 0) 3398 return (error); 3399 tm_p = &timeout; 3400 } 3401 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3402 } 3403 3404 static int 3405 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3406 { 3407 3408 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3409 } 3410 3411 #define BATCH_SIZE 128 3412 static int 3413 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3414 { 3415 char *uaddrs[BATCH_SIZE], **upp; 3416 int count, error, i, pos, tocopy; 3417 3418 upp = (char **)uap->obj; 3419 error = 0; 3420 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3421 pos += tocopy) { 3422 tocopy = MIN(count, BATCH_SIZE); 3423 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3424 if (error != 0) 3425 break; 3426 for (i = 0; i < tocopy; ++i) 3427 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3428 maybe_yield(); 3429 } 3430 return (error); 3431 } 3432 3433 static int 3434 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3435 { 3436 3437 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3438 } 3439 3440 static int 3441 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3442 { 3443 struct _umtx_time *tm_p, timeout; 3444 int error; 3445 3446 /* Allow a null timespec (wait forever). */ 3447 if (uap->uaddr2 == NULL) 3448 tm_p = NULL; 3449 else { 3450 error = umtx_copyin_umtx_time( 3451 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3452 if (error != 0) 3453 return (error); 3454 tm_p = &timeout; 3455 } 3456 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3457 } 3458 3459 static int 3460 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3461 { 3462 3463 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3464 } 3465 3466 static int 3467 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3468 { 3469 struct _umtx_time *tm_p, timeout; 3470 int error; 3471 3472 /* Allow a null timespec (wait forever). */ 3473 if (uap->uaddr2 == NULL) 3474 tm_p = NULL; 3475 else { 3476 error = umtx_copyin_umtx_time( 3477 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3478 if (error != 0) 3479 return (error); 3480 tm_p = &timeout; 3481 } 3482 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3483 } 3484 3485 static int 3486 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3487 { 3488 3489 return (do_wake_umutex(td, uap->obj)); 3490 } 3491 3492 static int 3493 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3494 { 3495 3496 return (do_unlock_umutex(td, uap->obj, false)); 3497 } 3498 3499 static int 3500 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3501 { 3502 3503 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3504 } 3505 3506 static int 3507 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3508 { 3509 struct timespec *ts, timeout; 3510 int error; 3511 3512 /* Allow a null timespec (wait forever). */ 3513 if (uap->uaddr2 == NULL) 3514 ts = NULL; 3515 else { 3516 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3517 if (error != 0) 3518 return (error); 3519 ts = &timeout; 3520 } 3521 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3522 } 3523 3524 static int 3525 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3526 { 3527 3528 return (do_cv_signal(td, uap->obj)); 3529 } 3530 3531 static int 3532 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3533 { 3534 3535 return (do_cv_broadcast(td, uap->obj)); 3536 } 3537 3538 static int 3539 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3540 { 3541 struct _umtx_time timeout; 3542 int error; 3543 3544 /* Allow a null timespec (wait forever). */ 3545 if (uap->uaddr2 == NULL) { 3546 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3547 } else { 3548 error = umtx_copyin_umtx_time(uap->uaddr2, 3549 (size_t)uap->uaddr1, &timeout); 3550 if (error != 0) 3551 return (error); 3552 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3553 } 3554 return (error); 3555 } 3556 3557 static int 3558 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3559 { 3560 struct _umtx_time timeout; 3561 int error; 3562 3563 /* Allow a null timespec (wait forever). */ 3564 if (uap->uaddr2 == NULL) { 3565 error = do_rw_wrlock(td, uap->obj, 0); 3566 } else { 3567 error = umtx_copyin_umtx_time(uap->uaddr2, 3568 (size_t)uap->uaddr1, &timeout); 3569 if (error != 0) 3570 return (error); 3571 3572 error = do_rw_wrlock(td, uap->obj, &timeout); 3573 } 3574 return (error); 3575 } 3576 3577 static int 3578 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3579 { 3580 3581 return (do_rw_unlock(td, uap->obj)); 3582 } 3583 3584 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3585 static int 3586 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3587 { 3588 struct _umtx_time *tm_p, timeout; 3589 int error; 3590 3591 /* Allow a null timespec (wait forever). */ 3592 if (uap->uaddr2 == NULL) 3593 tm_p = NULL; 3594 else { 3595 error = umtx_copyin_umtx_time( 3596 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3597 if (error != 0) 3598 return (error); 3599 tm_p = &timeout; 3600 } 3601 return (do_sem_wait(td, uap->obj, tm_p)); 3602 } 3603 3604 static int 3605 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3606 { 3607 3608 return (do_sem_wake(td, uap->obj)); 3609 } 3610 #endif 3611 3612 static int 3613 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3614 { 3615 3616 return (do_wake2_umutex(td, uap->obj, uap->val)); 3617 } 3618 3619 static int 3620 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3621 { 3622 struct _umtx_time *tm_p, timeout; 3623 size_t uasize; 3624 int error; 3625 3626 /* Allow a null timespec (wait forever). */ 3627 if (uap->uaddr2 == NULL) { 3628 uasize = 0; 3629 tm_p = NULL; 3630 } else { 3631 uasize = (size_t)uap->uaddr1; 3632 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3633 if (error != 0) 3634 return (error); 3635 tm_p = &timeout; 3636 } 3637 error = do_sem2_wait(td, uap->obj, tm_p); 3638 if (error == EINTR && uap->uaddr2 != NULL && 3639 (timeout._flags & UMTX_ABSTIME) == 0 && 3640 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3641 error = copyout(&timeout._timeout, 3642 (struct _umtx_time *)uap->uaddr2 + 1, 3643 sizeof(struct timespec)); 3644 if (error == 0) { 3645 error = EINTR; 3646 } 3647 } 3648 3649 return (error); 3650 } 3651 3652 static int 3653 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3654 { 3655 3656 return (do_sem2_wake(td, uap->obj)); 3657 } 3658 3659 #define USHM_OBJ_UMTX(o) \ 3660 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3661 3662 #define USHMF_REG_LINKED 0x0001 3663 #define USHMF_OBJ_LINKED 0x0002 3664 struct umtx_shm_reg { 3665 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3666 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3667 struct umtx_key ushm_key; 3668 struct ucred *ushm_cred; 3669 struct shmfd *ushm_obj; 3670 u_int ushm_refcnt; 3671 u_int ushm_flags; 3672 }; 3673 3674 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3675 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3676 3677 static uma_zone_t umtx_shm_reg_zone; 3678 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3679 static struct mtx umtx_shm_lock; 3680 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3681 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3682 3683 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3684 3685 static void 3686 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3687 { 3688 struct umtx_shm_reg_head d; 3689 struct umtx_shm_reg *reg, *reg1; 3690 3691 TAILQ_INIT(&d); 3692 mtx_lock(&umtx_shm_lock); 3693 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3694 mtx_unlock(&umtx_shm_lock); 3695 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3696 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3697 umtx_shm_free_reg(reg); 3698 } 3699 } 3700 3701 static struct task umtx_shm_reg_delfree_task = 3702 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3703 3704 static struct umtx_shm_reg * 3705 umtx_shm_find_reg_locked(const struct umtx_key *key) 3706 { 3707 struct umtx_shm_reg *reg; 3708 struct umtx_shm_reg_head *reg_head; 3709 3710 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3711 mtx_assert(&umtx_shm_lock, MA_OWNED); 3712 reg_head = &umtx_shm_registry[key->hash]; 3713 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3714 KASSERT(reg->ushm_key.shared, 3715 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3716 if (reg->ushm_key.info.shared.object == 3717 key->info.shared.object && 3718 reg->ushm_key.info.shared.offset == 3719 key->info.shared.offset) { 3720 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3721 KASSERT(reg->ushm_refcnt > 0, 3722 ("reg %p refcnt 0 onlist", reg)); 3723 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3724 ("reg %p not linked", reg)); 3725 reg->ushm_refcnt++; 3726 return (reg); 3727 } 3728 } 3729 return (NULL); 3730 } 3731 3732 static struct umtx_shm_reg * 3733 umtx_shm_find_reg(const struct umtx_key *key) 3734 { 3735 struct umtx_shm_reg *reg; 3736 3737 mtx_lock(&umtx_shm_lock); 3738 reg = umtx_shm_find_reg_locked(key); 3739 mtx_unlock(&umtx_shm_lock); 3740 return (reg); 3741 } 3742 3743 static void 3744 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3745 { 3746 3747 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3748 crfree(reg->ushm_cred); 3749 shm_drop(reg->ushm_obj); 3750 uma_zfree(umtx_shm_reg_zone, reg); 3751 } 3752 3753 static bool 3754 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3755 { 3756 bool res; 3757 3758 mtx_assert(&umtx_shm_lock, MA_OWNED); 3759 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3760 reg->ushm_refcnt--; 3761 res = reg->ushm_refcnt == 0; 3762 if (res || force) { 3763 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3764 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3765 reg, ushm_reg_link); 3766 reg->ushm_flags &= ~USHMF_REG_LINKED; 3767 } 3768 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3769 LIST_REMOVE(reg, ushm_obj_link); 3770 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3771 } 3772 } 3773 return (res); 3774 } 3775 3776 static void 3777 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3778 { 3779 vm_object_t object; 3780 bool dofree; 3781 3782 if (force) { 3783 object = reg->ushm_obj->shm_object; 3784 VM_OBJECT_WLOCK(object); 3785 object->flags |= OBJ_UMTXDEAD; 3786 VM_OBJECT_WUNLOCK(object); 3787 } 3788 mtx_lock(&umtx_shm_lock); 3789 dofree = umtx_shm_unref_reg_locked(reg, force); 3790 mtx_unlock(&umtx_shm_lock); 3791 if (dofree) 3792 umtx_shm_free_reg(reg); 3793 } 3794 3795 void 3796 umtx_shm_object_init(vm_object_t object) 3797 { 3798 3799 LIST_INIT(USHM_OBJ_UMTX(object)); 3800 } 3801 3802 void 3803 umtx_shm_object_terminated(vm_object_t object) 3804 { 3805 struct umtx_shm_reg *reg, *reg1; 3806 bool dofree; 3807 3808 dofree = false; 3809 mtx_lock(&umtx_shm_lock); 3810 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3811 if (umtx_shm_unref_reg_locked(reg, true)) { 3812 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3813 ushm_reg_link); 3814 dofree = true; 3815 } 3816 } 3817 mtx_unlock(&umtx_shm_lock); 3818 if (dofree) 3819 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3820 } 3821 3822 static int 3823 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3824 struct umtx_shm_reg **res) 3825 { 3826 struct umtx_shm_reg *reg, *reg1; 3827 struct ucred *cred; 3828 int error; 3829 3830 reg = umtx_shm_find_reg(key); 3831 if (reg != NULL) { 3832 *res = reg; 3833 return (0); 3834 } 3835 cred = td->td_ucred; 3836 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3837 return (ENOMEM); 3838 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3839 reg->ushm_refcnt = 1; 3840 bcopy(key, ®->ushm_key, sizeof(*key)); 3841 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3842 reg->ushm_cred = crhold(cred); 3843 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3844 if (error != 0) { 3845 umtx_shm_free_reg(reg); 3846 return (error); 3847 } 3848 mtx_lock(&umtx_shm_lock); 3849 reg1 = umtx_shm_find_reg_locked(key); 3850 if (reg1 != NULL) { 3851 mtx_unlock(&umtx_shm_lock); 3852 umtx_shm_free_reg(reg); 3853 *res = reg1; 3854 return (0); 3855 } 3856 reg->ushm_refcnt++; 3857 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3858 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3859 ushm_obj_link); 3860 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3861 mtx_unlock(&umtx_shm_lock); 3862 *res = reg; 3863 return (0); 3864 } 3865 3866 static int 3867 umtx_shm_alive(struct thread *td, void *addr) 3868 { 3869 vm_map_t map; 3870 vm_map_entry_t entry; 3871 vm_object_t object; 3872 vm_pindex_t pindex; 3873 vm_prot_t prot; 3874 int res, ret; 3875 boolean_t wired; 3876 3877 map = &td->td_proc->p_vmspace->vm_map; 3878 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3879 &object, &pindex, &prot, &wired); 3880 if (res != KERN_SUCCESS) 3881 return (EFAULT); 3882 if (object == NULL) 3883 ret = EINVAL; 3884 else 3885 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3886 vm_map_lookup_done(map, entry); 3887 return (ret); 3888 } 3889 3890 static void 3891 umtx_shm_init(void) 3892 { 3893 int i; 3894 3895 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3896 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3897 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3898 for (i = 0; i < nitems(umtx_shm_registry); i++) 3899 TAILQ_INIT(&umtx_shm_registry[i]); 3900 } 3901 3902 static int 3903 umtx_shm(struct thread *td, void *addr, u_int flags) 3904 { 3905 struct umtx_key key; 3906 struct umtx_shm_reg *reg; 3907 struct file *fp; 3908 int error, fd; 3909 3910 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3911 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3912 return (EINVAL); 3913 if ((flags & UMTX_SHM_ALIVE) != 0) 3914 return (umtx_shm_alive(td, addr)); 3915 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3916 if (error != 0) 3917 return (error); 3918 KASSERT(key.shared == 1, ("non-shared key")); 3919 if ((flags & UMTX_SHM_CREAT) != 0) { 3920 error = umtx_shm_create_reg(td, &key, ®); 3921 } else { 3922 reg = umtx_shm_find_reg(&key); 3923 if (reg == NULL) 3924 error = ESRCH; 3925 } 3926 umtx_key_release(&key); 3927 if (error != 0) 3928 return (error); 3929 KASSERT(reg != NULL, ("no reg")); 3930 if ((flags & UMTX_SHM_DESTROY) != 0) { 3931 umtx_shm_unref_reg(reg, true); 3932 } else { 3933 #if 0 3934 #ifdef MAC 3935 error = mac_posixshm_check_open(td->td_ucred, 3936 reg->ushm_obj, FFLAGS(O_RDWR)); 3937 if (error == 0) 3938 #endif 3939 error = shm_access(reg->ushm_obj, td->td_ucred, 3940 FFLAGS(O_RDWR)); 3941 if (error == 0) 3942 #endif 3943 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3944 if (error == 0) { 3945 shm_hold(reg->ushm_obj); 3946 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3947 &shm_ops); 3948 td->td_retval[0] = fd; 3949 fdrop(fp, td); 3950 } 3951 } 3952 umtx_shm_unref_reg(reg, false); 3953 return (error); 3954 } 3955 3956 static int 3957 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3958 { 3959 3960 return (umtx_shm(td, uap->uaddr1, uap->val)); 3961 } 3962 3963 static int 3964 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3965 { 3966 3967 td->td_rb_list = rbp->robust_list_offset; 3968 td->td_rbp_list = rbp->robust_priv_list_offset; 3969 td->td_rb_inact = rbp->robust_inact_offset; 3970 return (0); 3971 } 3972 3973 static int 3974 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3975 { 3976 struct umtx_robust_lists_params rb; 3977 int error; 3978 3979 if (uap->val > sizeof(rb)) 3980 return (EINVAL); 3981 bzero(&rb, sizeof(rb)); 3982 error = copyin(uap->uaddr1, &rb, uap->val); 3983 if (error != 0) 3984 return (error); 3985 return (umtx_robust_lists(td, &rb)); 3986 } 3987 3988 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3989 3990 static const _umtx_op_func op_table[] = { 3991 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3992 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3993 [UMTX_OP_WAIT] = __umtx_op_wait, 3994 [UMTX_OP_WAKE] = __umtx_op_wake, 3995 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3996 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3997 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3998 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3999 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 4000 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4001 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4002 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 4003 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 4004 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 4005 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4006 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 4007 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4008 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 4009 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4010 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4011 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 4012 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4013 #else 4014 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4015 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4016 #endif 4017 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 4018 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4019 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 4020 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4021 [UMTX_OP_SHM] = __umtx_op_shm, 4022 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 4023 }; 4024 4025 int 4026 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 4027 { 4028 4029 if ((unsigned)uap->op < nitems(op_table)) 4030 return (*op_table[uap->op])(td, uap); 4031 return (EINVAL); 4032 } 4033 4034 #ifdef COMPAT_FREEBSD32 4035 4036 struct timespec32 { 4037 int32_t tv_sec; 4038 int32_t tv_nsec; 4039 }; 4040 4041 struct umtx_time32 { 4042 struct timespec32 timeout; 4043 uint32_t flags; 4044 uint32_t clockid; 4045 }; 4046 4047 static inline int 4048 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4049 { 4050 struct timespec32 ts32; 4051 int error; 4052 4053 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4054 if (error == 0) { 4055 if (ts32.tv_sec < 0 || 4056 ts32.tv_nsec >= 1000000000 || 4057 ts32.tv_nsec < 0) 4058 error = EINVAL; 4059 else { 4060 tsp->tv_sec = ts32.tv_sec; 4061 tsp->tv_nsec = ts32.tv_nsec; 4062 } 4063 } 4064 return (error); 4065 } 4066 4067 static inline int 4068 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4069 { 4070 struct umtx_time32 t32; 4071 int error; 4072 4073 t32.clockid = CLOCK_REALTIME; 4074 t32.flags = 0; 4075 if (size <= sizeof(struct timespec32)) 4076 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4077 else 4078 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4079 if (error != 0) 4080 return (error); 4081 if (t32.timeout.tv_sec < 0 || 4082 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4083 return (EINVAL); 4084 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4085 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4086 tp->_flags = t32.flags; 4087 tp->_clockid = t32.clockid; 4088 return (0); 4089 } 4090 4091 static int 4092 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4093 { 4094 struct _umtx_time *tm_p, timeout; 4095 int error; 4096 4097 if (uap->uaddr2 == NULL) 4098 tm_p = NULL; 4099 else { 4100 error = umtx_copyin_umtx_time32(uap->uaddr2, 4101 (size_t)uap->uaddr1, &timeout); 4102 if (error != 0) 4103 return (error); 4104 tm_p = &timeout; 4105 } 4106 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4107 } 4108 4109 static int 4110 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4111 { 4112 struct _umtx_time *tm_p, timeout; 4113 int error; 4114 4115 /* Allow a null timespec (wait forever). */ 4116 if (uap->uaddr2 == NULL) 4117 tm_p = NULL; 4118 else { 4119 error = umtx_copyin_umtx_time(uap->uaddr2, 4120 (size_t)uap->uaddr1, &timeout); 4121 if (error != 0) 4122 return (error); 4123 tm_p = &timeout; 4124 } 4125 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4126 } 4127 4128 static int 4129 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4130 { 4131 struct _umtx_time *tm_p, timeout; 4132 int error; 4133 4134 /* Allow a null timespec (wait forever). */ 4135 if (uap->uaddr2 == NULL) 4136 tm_p = NULL; 4137 else { 4138 error = umtx_copyin_umtx_time32(uap->uaddr2, 4139 (size_t)uap->uaddr1, &timeout); 4140 if (error != 0) 4141 return (error); 4142 tm_p = &timeout; 4143 } 4144 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4145 } 4146 4147 static int 4148 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4149 { 4150 struct timespec *ts, timeout; 4151 int error; 4152 4153 /* Allow a null timespec (wait forever). */ 4154 if (uap->uaddr2 == NULL) 4155 ts = NULL; 4156 else { 4157 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4158 if (error != 0) 4159 return (error); 4160 ts = &timeout; 4161 } 4162 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4163 } 4164 4165 static int 4166 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4167 { 4168 struct _umtx_time timeout; 4169 int error; 4170 4171 /* Allow a null timespec (wait forever). */ 4172 if (uap->uaddr2 == NULL) { 4173 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4174 } else { 4175 error = umtx_copyin_umtx_time32(uap->uaddr2, 4176 (size_t)uap->uaddr1, &timeout); 4177 if (error != 0) 4178 return (error); 4179 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4180 } 4181 return (error); 4182 } 4183 4184 static int 4185 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4186 { 4187 struct _umtx_time timeout; 4188 int error; 4189 4190 /* Allow a null timespec (wait forever). */ 4191 if (uap->uaddr2 == NULL) { 4192 error = do_rw_wrlock(td, uap->obj, 0); 4193 } else { 4194 error = umtx_copyin_umtx_time32(uap->uaddr2, 4195 (size_t)uap->uaddr1, &timeout); 4196 if (error != 0) 4197 return (error); 4198 error = do_rw_wrlock(td, uap->obj, &timeout); 4199 } 4200 return (error); 4201 } 4202 4203 static int 4204 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4205 { 4206 struct _umtx_time *tm_p, timeout; 4207 int error; 4208 4209 if (uap->uaddr2 == NULL) 4210 tm_p = NULL; 4211 else { 4212 error = umtx_copyin_umtx_time32( 4213 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4214 if (error != 0) 4215 return (error); 4216 tm_p = &timeout; 4217 } 4218 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4219 } 4220 4221 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4222 static int 4223 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4224 { 4225 struct _umtx_time *tm_p, timeout; 4226 int error; 4227 4228 /* Allow a null timespec (wait forever). */ 4229 if (uap->uaddr2 == NULL) 4230 tm_p = NULL; 4231 else { 4232 error = umtx_copyin_umtx_time32(uap->uaddr2, 4233 (size_t)uap->uaddr1, &timeout); 4234 if (error != 0) 4235 return (error); 4236 tm_p = &timeout; 4237 } 4238 return (do_sem_wait(td, uap->obj, tm_p)); 4239 } 4240 #endif 4241 4242 static int 4243 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4244 { 4245 struct _umtx_time *tm_p, timeout; 4246 size_t uasize; 4247 int error; 4248 4249 /* Allow a null timespec (wait forever). */ 4250 if (uap->uaddr2 == NULL) { 4251 uasize = 0; 4252 tm_p = NULL; 4253 } else { 4254 uasize = (size_t)uap->uaddr1; 4255 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4256 if (error != 0) 4257 return (error); 4258 tm_p = &timeout; 4259 } 4260 error = do_sem2_wait(td, uap->obj, tm_p); 4261 if (error == EINTR && uap->uaddr2 != NULL && 4262 (timeout._flags & UMTX_ABSTIME) == 0 && 4263 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4264 struct timespec32 remain32 = { 4265 .tv_sec = timeout._timeout.tv_sec, 4266 .tv_nsec = timeout._timeout.tv_nsec 4267 }; 4268 error = copyout(&remain32, 4269 (struct umtx_time32 *)uap->uaddr2 + 1, 4270 sizeof(struct timespec32)); 4271 if (error == 0) { 4272 error = EINTR; 4273 } 4274 } 4275 4276 return (error); 4277 } 4278 4279 static int 4280 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4281 { 4282 uint32_t uaddrs[BATCH_SIZE], **upp; 4283 int count, error, i, pos, tocopy; 4284 4285 upp = (uint32_t **)uap->obj; 4286 error = 0; 4287 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4288 pos += tocopy) { 4289 tocopy = MIN(count, BATCH_SIZE); 4290 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4291 if (error != 0) 4292 break; 4293 for (i = 0; i < tocopy; ++i) 4294 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4295 INT_MAX, 1); 4296 maybe_yield(); 4297 } 4298 return (error); 4299 } 4300 4301 struct umtx_robust_lists_params_compat32 { 4302 uint32_t robust_list_offset; 4303 uint32_t robust_priv_list_offset; 4304 uint32_t robust_inact_offset; 4305 }; 4306 4307 static int 4308 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4309 { 4310 struct umtx_robust_lists_params rb; 4311 struct umtx_robust_lists_params_compat32 rb32; 4312 int error; 4313 4314 if (uap->val > sizeof(rb32)) 4315 return (EINVAL); 4316 bzero(&rb, sizeof(rb)); 4317 bzero(&rb32, sizeof(rb32)); 4318 error = copyin(uap->uaddr1, &rb32, uap->val); 4319 if (error != 0) 4320 return (error); 4321 rb.robust_list_offset = rb32.robust_list_offset; 4322 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4323 rb.robust_inact_offset = rb32.robust_inact_offset; 4324 return (umtx_robust_lists(td, &rb)); 4325 } 4326 4327 static const _umtx_op_func op_table_compat32[] = { 4328 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4329 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4330 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4331 [UMTX_OP_WAKE] = __umtx_op_wake, 4332 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4333 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4334 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4335 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4336 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4337 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4338 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4339 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4340 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4341 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4342 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4343 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4344 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4345 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4346 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4347 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4348 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4349 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4350 #else 4351 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4352 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4353 #endif 4354 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4355 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4356 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4357 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4358 [UMTX_OP_SHM] = __umtx_op_shm, 4359 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4360 }; 4361 4362 int 4363 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4364 { 4365 4366 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4367 return (*op_table_compat32[uap->op])(td, 4368 (struct _umtx_op_args *)uap); 4369 } 4370 return (EINVAL); 4371 } 4372 #endif 4373 4374 void 4375 umtx_thread_init(struct thread *td) 4376 { 4377 4378 td->td_umtxq = umtxq_alloc(); 4379 td->td_umtxq->uq_thread = td; 4380 } 4381 4382 void 4383 umtx_thread_fini(struct thread *td) 4384 { 4385 4386 umtxq_free(td->td_umtxq); 4387 } 4388 4389 /* 4390 * It will be called when new thread is created, e.g fork(). 4391 */ 4392 void 4393 umtx_thread_alloc(struct thread *td) 4394 { 4395 struct umtx_q *uq; 4396 4397 uq = td->td_umtxq; 4398 uq->uq_inherited_pri = PRI_MAX; 4399 4400 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4401 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4402 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4403 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4404 } 4405 4406 /* 4407 * exec() hook. 4408 * 4409 * Clear robust lists for all process' threads, not delaying the 4410 * cleanup to thread_exit hook, since the relevant address space is 4411 * destroyed right now. 4412 */ 4413 static void 4414 umtx_exec_hook(void *arg __unused, struct proc *p, 4415 struct image_params *imgp __unused) 4416 { 4417 struct thread *td; 4418 4419 KASSERT(p == curproc, ("need curproc")); 4420 PROC_LOCK(p); 4421 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4422 (p->p_flag & P_STOPPED_SINGLE) != 0, 4423 ("curproc must be single-threaded")); 4424 FOREACH_THREAD_IN_PROC(p, td) { 4425 KASSERT(td == curthread || 4426 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4427 ("running thread %p %p", p, td)); 4428 PROC_UNLOCK(p); 4429 umtx_thread_cleanup(td); 4430 PROC_LOCK(p); 4431 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4432 } 4433 PROC_UNLOCK(p); 4434 } 4435 4436 /* 4437 * thread_exit() hook. 4438 */ 4439 void 4440 umtx_thread_exit(struct thread *td) 4441 { 4442 4443 umtx_thread_cleanup(td); 4444 } 4445 4446 static int 4447 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4448 { 4449 u_long res1; 4450 #ifdef COMPAT_FREEBSD32 4451 uint32_t res32; 4452 #endif 4453 int error; 4454 4455 #ifdef COMPAT_FREEBSD32 4456 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4457 error = fueword32((void *)ptr, &res32); 4458 if (error == 0) 4459 res1 = res32; 4460 } else 4461 #endif 4462 { 4463 error = fueword((void *)ptr, &res1); 4464 } 4465 if (error == 0) 4466 *res = res1; 4467 else 4468 error = EFAULT; 4469 return (error); 4470 } 4471 4472 static void 4473 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4474 { 4475 #ifdef COMPAT_FREEBSD32 4476 struct umutex32 m32; 4477 4478 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4479 memcpy(&m32, m, sizeof(m32)); 4480 *rb_list = m32.m_rb_lnk; 4481 } else 4482 #endif 4483 *rb_list = m->m_rb_lnk; 4484 } 4485 4486 static int 4487 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4488 { 4489 struct umutex m; 4490 int error; 4491 4492 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4493 error = copyin((void *)rbp, &m, sizeof(m)); 4494 if (error != 0) 4495 return (error); 4496 if (rb_list != NULL) 4497 umtx_read_rb_list(td, &m, rb_list); 4498 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4499 return (EINVAL); 4500 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4501 /* inact is cleared after unlock, allow the inconsistency */ 4502 return (inact ? 0 : EINVAL); 4503 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4504 } 4505 4506 static void 4507 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4508 const char *name) 4509 { 4510 int error, i; 4511 uintptr_t rbp; 4512 bool inact; 4513 4514 if (rb_list == 0) 4515 return; 4516 error = umtx_read_uptr(td, rb_list, &rbp); 4517 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4518 if (rbp == *rb_inact) { 4519 inact = true; 4520 *rb_inact = 0; 4521 } else 4522 inact = false; 4523 error = umtx_handle_rb(td, rbp, &rbp, inact); 4524 } 4525 if (i == umtx_max_rb && umtx_verbose_rb) { 4526 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4527 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4528 } 4529 if (error != 0 && umtx_verbose_rb) { 4530 uprintf("comm %s pid %d: handling %srb error %d\n", 4531 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4532 } 4533 } 4534 4535 /* 4536 * Clean up umtx data. 4537 */ 4538 static void 4539 umtx_thread_cleanup(struct thread *td) 4540 { 4541 struct umtx_q *uq; 4542 struct umtx_pi *pi; 4543 uintptr_t rb_inact; 4544 4545 /* 4546 * Disown pi mutexes. 4547 */ 4548 uq = td->td_umtxq; 4549 if (uq != NULL) { 4550 mtx_lock(&umtx_lock); 4551 uq->uq_inherited_pri = PRI_MAX; 4552 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4553 pi->pi_owner = NULL; 4554 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4555 } 4556 mtx_unlock(&umtx_lock); 4557 thread_lock(td); 4558 sched_lend_user_prio(td, PRI_MAX); 4559 thread_unlock(td); 4560 } 4561 4562 /* 4563 * Handle terminated robust mutexes. Must be done after 4564 * robust pi disown, otherwise unlock could see unowned 4565 * entries. 4566 */ 4567 rb_inact = td->td_rb_inact; 4568 if (rb_inact != 0) 4569 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4570 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4571 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4572 if (rb_inact != 0) 4573 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4574 } 4575