1 /*- 2 * Copyright (c) 2015, 2016 The FreeBSD Foundation 3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 #include "opt_umtx_profiling.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mman.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/resource.h> 51 #include <sys/resourcevar.h> 52 #include <sys/rwlock.h> 53 #include <sys/sbuf.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/eventhandler.h> 63 #include <sys/umtx.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 73 #include <machine/cpu.h> 74 75 #ifdef COMPAT_FREEBSD32 76 #include <compat/freebsd32/freebsd32_proto.h> 77 #endif 78 79 #define _UMUTEX_TRY 1 80 #define _UMUTEX_WAIT 2 81 82 #ifdef UMTX_PROFILING 83 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 84 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 85 #endif 86 87 /* Priority inheritance mutex info. */ 88 struct umtx_pi { 89 /* Owner thread */ 90 struct thread *pi_owner; 91 92 /* Reference count */ 93 int pi_refcount; 94 95 /* List entry to link umtx holding by thread */ 96 TAILQ_ENTRY(umtx_pi) pi_link; 97 98 /* List entry in hash */ 99 TAILQ_ENTRY(umtx_pi) pi_hashlink; 100 101 /* List for waiters */ 102 TAILQ_HEAD(,umtx_q) pi_blocked; 103 104 /* Identify a userland lock object */ 105 struct umtx_key pi_key; 106 }; 107 108 /* A userland synchronous object user. */ 109 struct umtx_q { 110 /* Linked list for the hash. */ 111 TAILQ_ENTRY(umtx_q) uq_link; 112 113 /* Umtx key. */ 114 struct umtx_key uq_key; 115 116 /* Umtx flags. */ 117 int uq_flags; 118 #define UQF_UMTXQ 0x0001 119 120 /* The thread waits on. */ 121 struct thread *uq_thread; 122 123 /* 124 * Blocked on PI mutex. read can use chain lock 125 * or umtx_lock, write must have both chain lock and 126 * umtx_lock being hold. 127 */ 128 struct umtx_pi *uq_pi_blocked; 129 130 /* On blocked list */ 131 TAILQ_ENTRY(umtx_q) uq_lockq; 132 133 /* Thread contending with us */ 134 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 135 136 /* Inherited priority from PP mutex */ 137 u_char uq_inherited_pri; 138 139 /* Spare queue ready to be reused */ 140 struct umtxq_queue *uq_spare_queue; 141 142 /* The queue we on */ 143 struct umtxq_queue *uq_cur_queue; 144 }; 145 146 TAILQ_HEAD(umtxq_head, umtx_q); 147 148 /* Per-key wait-queue */ 149 struct umtxq_queue { 150 struct umtxq_head head; 151 struct umtx_key key; 152 LIST_ENTRY(umtxq_queue) link; 153 int length; 154 }; 155 156 LIST_HEAD(umtxq_list, umtxq_queue); 157 158 /* Userland lock object's wait-queue chain */ 159 struct umtxq_chain { 160 /* Lock for this chain. */ 161 struct mtx uc_lock; 162 163 /* List of sleep queues. */ 164 struct umtxq_list uc_queue[2]; 165 #define UMTX_SHARED_QUEUE 0 166 #define UMTX_EXCLUSIVE_QUEUE 1 167 168 LIST_HEAD(, umtxq_queue) uc_spare_queue; 169 170 /* Busy flag */ 171 char uc_busy; 172 173 /* Chain lock waiters */ 174 int uc_waiters; 175 176 /* All PI in the list */ 177 TAILQ_HEAD(,umtx_pi) uc_pi_list; 178 179 #ifdef UMTX_PROFILING 180 u_int length; 181 u_int max_length; 182 #endif 183 }; 184 185 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 186 187 /* 188 * Don't propagate time-sharing priority, there is a security reason, 189 * a user can simply introduce PI-mutex, let thread A lock the mutex, 190 * and let another thread B block on the mutex, because B is 191 * sleeping, its priority will be boosted, this causes A's priority to 192 * be boosted via priority propagating too and will never be lowered even 193 * if it is using 100%CPU, this is unfair to other processes. 194 */ 195 196 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 197 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 198 PRI_MAX_TIMESHARE : (td)->td_user_pri) 199 200 #define GOLDEN_RATIO_PRIME 2654404609U 201 #ifndef UMTX_CHAINS 202 #define UMTX_CHAINS 512 203 #endif 204 #define UMTX_SHIFTS (__WORD_BIT - 9) 205 206 #define GET_SHARE(flags) \ 207 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 208 209 #define BUSY_SPINS 200 210 211 struct abs_timeout { 212 int clockid; 213 struct timespec cur; 214 struct timespec end; 215 }; 216 217 #ifdef COMPAT_FREEBSD32 218 struct umutex32 { 219 volatile __lwpid_t m_owner; /* Owner of the mutex */ 220 __uint32_t m_flags; /* Flags of the mutex */ 221 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 222 __uint32_t m_rb_lnk; /* Robust linkage */ 223 __uint32_t m_pad; 224 __uint32_t m_spare[2]; 225 }; 226 227 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 228 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 229 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 230 #endif 231 232 int umtx_shm_vnobj_persistent = 0; 233 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 234 &umtx_shm_vnobj_persistent, 0, 235 "False forces destruction of umtx attached to file, on last close"); 236 static int umtx_max_rb = 1000; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 238 &umtx_max_rb, 0, 239 ""); 240 241 static uma_zone_t umtx_pi_zone; 242 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 243 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 244 static int umtx_pi_allocated; 245 246 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 247 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 248 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 249 static int umtx_verbose_rb = 1; 250 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 251 &umtx_verbose_rb, 0, 252 ""); 253 254 #ifdef UMTX_PROFILING 255 static long max_length; 256 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 257 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 258 #endif 259 260 static void umtx_shm_init(void); 261 static void umtxq_sysinit(void *); 262 static void umtxq_hash(struct umtx_key *key); 263 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 264 static void umtxq_lock(struct umtx_key *key); 265 static void umtxq_unlock(struct umtx_key *key); 266 static void umtxq_busy(struct umtx_key *key); 267 static void umtxq_unbusy(struct umtx_key *key); 268 static void umtxq_insert_queue(struct umtx_q *uq, int q); 269 static void umtxq_remove_queue(struct umtx_q *uq, int q); 270 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 271 static int umtxq_count(struct umtx_key *key); 272 static struct umtx_pi *umtx_pi_alloc(int); 273 static void umtx_pi_free(struct umtx_pi *pi); 274 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 275 bool rb); 276 static void umtx_thread_cleanup(struct thread *td); 277 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 278 struct image_params *imgp __unused); 279 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 280 281 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 282 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 283 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 284 285 static struct mtx umtx_lock; 286 287 #ifdef UMTX_PROFILING 288 static void 289 umtx_init_profiling(void) 290 { 291 struct sysctl_oid *chain_oid; 292 char chain_name[10]; 293 int i; 294 295 for (i = 0; i < UMTX_CHAINS; ++i) { 296 snprintf(chain_name, sizeof(chain_name), "%d", i); 297 chain_oid = SYSCTL_ADD_NODE(NULL, 298 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 299 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 300 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 301 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 302 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 303 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 304 } 305 } 306 307 static int 308 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 309 { 310 char buf[512]; 311 struct sbuf sb; 312 struct umtxq_chain *uc; 313 u_int fract, i, j, tot, whole; 314 u_int sf0, sf1, sf2, sf3, sf4; 315 u_int si0, si1, si2, si3, si4; 316 u_int sw0, sw1, sw2, sw3, sw4; 317 318 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 319 for (i = 0; i < 2; i++) { 320 tot = 0; 321 for (j = 0; j < UMTX_CHAINS; ++j) { 322 uc = &umtxq_chains[i][j]; 323 mtx_lock(&uc->uc_lock); 324 tot += uc->max_length; 325 mtx_unlock(&uc->uc_lock); 326 } 327 if (tot == 0) 328 sbuf_printf(&sb, "%u) Empty ", i); 329 else { 330 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 331 si0 = si1 = si2 = si3 = si4 = 0; 332 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 333 for (j = 0; j < UMTX_CHAINS; j++) { 334 uc = &umtxq_chains[i][j]; 335 mtx_lock(&uc->uc_lock); 336 whole = uc->max_length * 100; 337 mtx_unlock(&uc->uc_lock); 338 fract = (whole % tot) * 100; 339 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 340 sf0 = fract; 341 si0 = j; 342 sw0 = whole; 343 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 344 sf1)) { 345 sf1 = fract; 346 si1 = j; 347 sw1 = whole; 348 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 349 sf2)) { 350 sf2 = fract; 351 si2 = j; 352 sw2 = whole; 353 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 354 sf3)) { 355 sf3 = fract; 356 si3 = j; 357 sw3 = whole; 358 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 359 sf4)) { 360 sf4 = fract; 361 si4 = j; 362 sw4 = whole; 363 } 364 } 365 sbuf_printf(&sb, "queue %u:\n", i); 366 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 367 sf0 / tot, si0); 368 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 369 sf1 / tot, si1); 370 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 371 sf2 / tot, si2); 372 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 373 sf3 / tot, si3); 374 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 375 sf4 / tot, si4); 376 } 377 } 378 sbuf_trim(&sb); 379 sbuf_finish(&sb); 380 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 381 sbuf_delete(&sb); 382 return (0); 383 } 384 385 static int 386 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 387 { 388 struct umtxq_chain *uc; 389 u_int i, j; 390 int clear, error; 391 392 clear = 0; 393 error = sysctl_handle_int(oidp, &clear, 0, req); 394 if (error != 0 || req->newptr == NULL) 395 return (error); 396 397 if (clear != 0) { 398 for (i = 0; i < 2; ++i) { 399 for (j = 0; j < UMTX_CHAINS; ++j) { 400 uc = &umtxq_chains[i][j]; 401 mtx_lock(&uc->uc_lock); 402 uc->length = 0; 403 uc->max_length = 0; 404 mtx_unlock(&uc->uc_lock); 405 } 406 } 407 } 408 return (0); 409 } 410 411 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 412 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 413 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 414 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 415 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 416 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 417 #endif 418 419 static void 420 umtxq_sysinit(void *arg __unused) 421 { 422 int i, j; 423 424 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 425 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 426 for (i = 0; i < 2; ++i) { 427 for (j = 0; j < UMTX_CHAINS; ++j) { 428 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 429 MTX_DEF | MTX_DUPOK); 430 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 431 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 432 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 433 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 434 umtxq_chains[i][j].uc_busy = 0; 435 umtxq_chains[i][j].uc_waiters = 0; 436 #ifdef UMTX_PROFILING 437 umtxq_chains[i][j].length = 0; 438 umtxq_chains[i][j].max_length = 0; 439 #endif 440 } 441 } 442 #ifdef UMTX_PROFILING 443 umtx_init_profiling(); 444 #endif 445 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 446 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 447 EVENTHANDLER_PRI_ANY); 448 umtx_shm_init(); 449 } 450 451 struct umtx_q * 452 umtxq_alloc(void) 453 { 454 struct umtx_q *uq; 455 456 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 457 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 458 M_WAITOK | M_ZERO); 459 TAILQ_INIT(&uq->uq_spare_queue->head); 460 TAILQ_INIT(&uq->uq_pi_contested); 461 uq->uq_inherited_pri = PRI_MAX; 462 return (uq); 463 } 464 465 void 466 umtxq_free(struct umtx_q *uq) 467 { 468 469 MPASS(uq->uq_spare_queue != NULL); 470 free(uq->uq_spare_queue, M_UMTX); 471 free(uq, M_UMTX); 472 } 473 474 static inline void 475 umtxq_hash(struct umtx_key *key) 476 { 477 unsigned n; 478 479 n = (uintptr_t)key->info.both.a + key->info.both.b; 480 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 481 } 482 483 static inline struct umtxq_chain * 484 umtxq_getchain(struct umtx_key *key) 485 { 486 487 if (key->type <= TYPE_SEM) 488 return (&umtxq_chains[1][key->hash]); 489 return (&umtxq_chains[0][key->hash]); 490 } 491 492 /* 493 * Lock a chain. 494 */ 495 static inline void 496 umtxq_lock(struct umtx_key *key) 497 { 498 struct umtxq_chain *uc; 499 500 uc = umtxq_getchain(key); 501 mtx_lock(&uc->uc_lock); 502 } 503 504 /* 505 * Unlock a chain. 506 */ 507 static inline void 508 umtxq_unlock(struct umtx_key *key) 509 { 510 struct umtxq_chain *uc; 511 512 uc = umtxq_getchain(key); 513 mtx_unlock(&uc->uc_lock); 514 } 515 516 /* 517 * Set chain to busy state when following operation 518 * may be blocked (kernel mutex can not be used). 519 */ 520 static inline void 521 umtxq_busy(struct umtx_key *key) 522 { 523 struct umtxq_chain *uc; 524 525 uc = umtxq_getchain(key); 526 mtx_assert(&uc->uc_lock, MA_OWNED); 527 if (uc->uc_busy) { 528 #ifdef SMP 529 if (smp_cpus > 1) { 530 int count = BUSY_SPINS; 531 if (count > 0) { 532 umtxq_unlock(key); 533 while (uc->uc_busy && --count > 0) 534 cpu_spinwait(); 535 umtxq_lock(key); 536 } 537 } 538 #endif 539 while (uc->uc_busy) { 540 uc->uc_waiters++; 541 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 542 uc->uc_waiters--; 543 } 544 } 545 uc->uc_busy = 1; 546 } 547 548 /* 549 * Unbusy a chain. 550 */ 551 static inline void 552 umtxq_unbusy(struct umtx_key *key) 553 { 554 struct umtxq_chain *uc; 555 556 uc = umtxq_getchain(key); 557 mtx_assert(&uc->uc_lock, MA_OWNED); 558 KASSERT(uc->uc_busy != 0, ("not busy")); 559 uc->uc_busy = 0; 560 if (uc->uc_waiters) 561 wakeup_one(uc); 562 } 563 564 static inline void 565 umtxq_unbusy_unlocked(struct umtx_key *key) 566 { 567 568 umtxq_lock(key); 569 umtxq_unbusy(key); 570 umtxq_unlock(key); 571 } 572 573 static struct umtxq_queue * 574 umtxq_queue_lookup(struct umtx_key *key, int q) 575 { 576 struct umtxq_queue *uh; 577 struct umtxq_chain *uc; 578 579 uc = umtxq_getchain(key); 580 UMTXQ_LOCKED_ASSERT(uc); 581 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 582 if (umtx_key_match(&uh->key, key)) 583 return (uh); 584 } 585 586 return (NULL); 587 } 588 589 static inline void 590 umtxq_insert_queue(struct umtx_q *uq, int q) 591 { 592 struct umtxq_queue *uh; 593 struct umtxq_chain *uc; 594 595 uc = umtxq_getchain(&uq->uq_key); 596 UMTXQ_LOCKED_ASSERT(uc); 597 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 598 uh = umtxq_queue_lookup(&uq->uq_key, q); 599 if (uh != NULL) { 600 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 601 } else { 602 uh = uq->uq_spare_queue; 603 uh->key = uq->uq_key; 604 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 605 #ifdef UMTX_PROFILING 606 uc->length++; 607 if (uc->length > uc->max_length) { 608 uc->max_length = uc->length; 609 if (uc->max_length > max_length) 610 max_length = uc->max_length; 611 } 612 #endif 613 } 614 uq->uq_spare_queue = NULL; 615 616 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 617 uh->length++; 618 uq->uq_flags |= UQF_UMTXQ; 619 uq->uq_cur_queue = uh; 620 return; 621 } 622 623 static inline void 624 umtxq_remove_queue(struct umtx_q *uq, int q) 625 { 626 struct umtxq_chain *uc; 627 struct umtxq_queue *uh; 628 629 uc = umtxq_getchain(&uq->uq_key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 if (uq->uq_flags & UQF_UMTXQ) { 632 uh = uq->uq_cur_queue; 633 TAILQ_REMOVE(&uh->head, uq, uq_link); 634 uh->length--; 635 uq->uq_flags &= ~UQF_UMTXQ; 636 if (TAILQ_EMPTY(&uh->head)) { 637 KASSERT(uh->length == 0, 638 ("inconsistent umtxq_queue length")); 639 #ifdef UMTX_PROFILING 640 uc->length--; 641 #endif 642 LIST_REMOVE(uh, link); 643 } else { 644 uh = LIST_FIRST(&uc->uc_spare_queue); 645 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 646 LIST_REMOVE(uh, link); 647 } 648 uq->uq_spare_queue = uh; 649 uq->uq_cur_queue = NULL; 650 } 651 } 652 653 /* 654 * Check if there are multiple waiters 655 */ 656 static int 657 umtxq_count(struct umtx_key *key) 658 { 659 struct umtxq_chain *uc; 660 struct umtxq_queue *uh; 661 662 uc = umtxq_getchain(key); 663 UMTXQ_LOCKED_ASSERT(uc); 664 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 665 if (uh != NULL) 666 return (uh->length); 667 return (0); 668 } 669 670 /* 671 * Check if there are multiple PI waiters and returns first 672 * waiter. 673 */ 674 static int 675 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 676 { 677 struct umtxq_chain *uc; 678 struct umtxq_queue *uh; 679 680 *first = NULL; 681 uc = umtxq_getchain(key); 682 UMTXQ_LOCKED_ASSERT(uc); 683 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 684 if (uh != NULL) { 685 *first = TAILQ_FIRST(&uh->head); 686 return (uh->length); 687 } 688 return (0); 689 } 690 691 static int 692 umtxq_check_susp(struct thread *td) 693 { 694 struct proc *p; 695 int error; 696 697 /* 698 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 699 * eventually break the lockstep loop. 700 */ 701 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 702 return (0); 703 error = 0; 704 p = td->td_proc; 705 PROC_LOCK(p); 706 if (P_SHOULDSTOP(p) || 707 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 708 if (p->p_flag & P_SINGLE_EXIT) 709 error = EINTR; 710 else 711 error = ERESTART; 712 } 713 PROC_UNLOCK(p); 714 return (error); 715 } 716 717 /* 718 * Wake up threads waiting on an userland object. 719 */ 720 721 static int 722 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 723 { 724 struct umtxq_chain *uc; 725 struct umtxq_queue *uh; 726 struct umtx_q *uq; 727 int ret; 728 729 ret = 0; 730 uc = umtxq_getchain(key); 731 UMTXQ_LOCKED_ASSERT(uc); 732 uh = umtxq_queue_lookup(key, q); 733 if (uh != NULL) { 734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 735 umtxq_remove_queue(uq, q); 736 wakeup(uq); 737 if (++ret >= n_wake) 738 return (ret); 739 } 740 } 741 return (ret); 742 } 743 744 745 /* 746 * Wake up specified thread. 747 */ 748 static inline void 749 umtxq_signal_thread(struct umtx_q *uq) 750 { 751 struct umtxq_chain *uc; 752 753 uc = umtxq_getchain(&uq->uq_key); 754 UMTXQ_LOCKED_ASSERT(uc); 755 umtxq_remove(uq); 756 wakeup(uq); 757 } 758 759 static inline int 760 tstohz(const struct timespec *tsp) 761 { 762 struct timeval tv; 763 764 TIMESPEC_TO_TIMEVAL(&tv, tsp); 765 return tvtohz(&tv); 766 } 767 768 static void 769 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 770 const struct timespec *timeout) 771 { 772 773 timo->clockid = clockid; 774 if (!absolute) { 775 kern_clock_gettime(curthread, clockid, &timo->end); 776 timo->cur = timo->end; 777 timespecadd(&timo->end, timeout); 778 } else { 779 timo->end = *timeout; 780 kern_clock_gettime(curthread, clockid, &timo->cur); 781 } 782 } 783 784 static void 785 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 786 { 787 788 abs_timeout_init(timo, umtxtime->_clockid, 789 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 790 } 791 792 static inline void 793 abs_timeout_update(struct abs_timeout *timo) 794 { 795 796 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 797 } 798 799 static int 800 abs_timeout_gethz(struct abs_timeout *timo) 801 { 802 struct timespec tts; 803 804 if (timespeccmp(&timo->end, &timo->cur, <=)) 805 return (-1); 806 tts = timo->end; 807 timespecsub(&tts, &timo->cur); 808 return (tstohz(&tts)); 809 } 810 811 static uint32_t 812 umtx_unlock_val(uint32_t flags, bool rb) 813 { 814 815 if (rb) 816 return (UMUTEX_RB_OWNERDEAD); 817 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 818 return (UMUTEX_RB_NOTRECOV); 819 else 820 return (UMUTEX_UNOWNED); 821 822 } 823 824 /* 825 * Put thread into sleep state, before sleeping, check if 826 * thread was removed from umtx queue. 827 */ 828 static inline int 829 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 830 { 831 struct umtxq_chain *uc; 832 int error, timo; 833 834 uc = umtxq_getchain(&uq->uq_key); 835 UMTXQ_LOCKED_ASSERT(uc); 836 for (;;) { 837 if (!(uq->uq_flags & UQF_UMTXQ)) 838 return (0); 839 if (abstime != NULL) { 840 timo = abs_timeout_gethz(abstime); 841 if (timo < 0) 842 return (ETIMEDOUT); 843 } else 844 timo = 0; 845 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 846 if (error != EWOULDBLOCK) { 847 umtxq_lock(&uq->uq_key); 848 break; 849 } 850 if (abstime != NULL) 851 abs_timeout_update(abstime); 852 umtxq_lock(&uq->uq_key); 853 } 854 return (error); 855 } 856 857 /* 858 * Convert userspace address into unique logical address. 859 */ 860 int 861 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 862 { 863 struct thread *td = curthread; 864 vm_map_t map; 865 vm_map_entry_t entry; 866 vm_pindex_t pindex; 867 vm_prot_t prot; 868 boolean_t wired; 869 870 key->type = type; 871 if (share == THREAD_SHARE) { 872 key->shared = 0; 873 key->info.private.vs = td->td_proc->p_vmspace; 874 key->info.private.addr = (uintptr_t)addr; 875 } else { 876 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 877 map = &td->td_proc->p_vmspace->vm_map; 878 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 879 &entry, &key->info.shared.object, &pindex, &prot, 880 &wired) != KERN_SUCCESS) { 881 return (EFAULT); 882 } 883 884 if ((share == PROCESS_SHARE) || 885 (share == AUTO_SHARE && 886 VM_INHERIT_SHARE == entry->inheritance)) { 887 key->shared = 1; 888 key->info.shared.offset = (vm_offset_t)addr - 889 entry->start + entry->offset; 890 vm_object_reference(key->info.shared.object); 891 } else { 892 key->shared = 0; 893 key->info.private.vs = td->td_proc->p_vmspace; 894 key->info.private.addr = (uintptr_t)addr; 895 } 896 vm_map_lookup_done(map, entry); 897 } 898 899 umtxq_hash(key); 900 return (0); 901 } 902 903 /* 904 * Release key. 905 */ 906 void 907 umtx_key_release(struct umtx_key *key) 908 { 909 if (key->shared) 910 vm_object_deallocate(key->info.shared.object); 911 } 912 913 /* 914 * Fetch and compare value, sleep on the address if value is not changed. 915 */ 916 static int 917 do_wait(struct thread *td, void *addr, u_long id, 918 struct _umtx_time *timeout, int compat32, int is_private) 919 { 920 struct abs_timeout timo; 921 struct umtx_q *uq; 922 u_long tmp; 923 uint32_t tmp32; 924 int error = 0; 925 926 uq = td->td_umtxq; 927 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 928 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 929 return (error); 930 931 if (timeout != NULL) 932 abs_timeout_init2(&timo, timeout); 933 934 umtxq_lock(&uq->uq_key); 935 umtxq_insert(uq); 936 umtxq_unlock(&uq->uq_key); 937 if (compat32 == 0) { 938 error = fueword(addr, &tmp); 939 if (error != 0) 940 error = EFAULT; 941 } else { 942 error = fueword32(addr, &tmp32); 943 if (error == 0) 944 tmp = tmp32; 945 else 946 error = EFAULT; 947 } 948 umtxq_lock(&uq->uq_key); 949 if (error == 0) { 950 if (tmp == id) 951 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 952 NULL : &timo); 953 if ((uq->uq_flags & UQF_UMTXQ) == 0) 954 error = 0; 955 else 956 umtxq_remove(uq); 957 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 958 umtxq_remove(uq); 959 } 960 umtxq_unlock(&uq->uq_key); 961 umtx_key_release(&uq->uq_key); 962 if (error == ERESTART) 963 error = EINTR; 964 return (error); 965 } 966 967 /* 968 * Wake up threads sleeping on the specified address. 969 */ 970 int 971 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 972 { 973 struct umtx_key key; 974 int ret; 975 976 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 977 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 978 return (ret); 979 umtxq_lock(&key); 980 umtxq_signal(&key, n_wake); 981 umtxq_unlock(&key); 982 umtx_key_release(&key); 983 return (0); 984 } 985 986 /* 987 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 988 */ 989 static int 990 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 991 struct _umtx_time *timeout, int mode) 992 { 993 struct abs_timeout timo; 994 struct umtx_q *uq; 995 uint32_t owner, old, id; 996 int error, rv; 997 998 id = td->td_tid; 999 uq = td->td_umtxq; 1000 error = 0; 1001 if (timeout != NULL) 1002 abs_timeout_init2(&timo, timeout); 1003 1004 /* 1005 * Care must be exercised when dealing with umtx structure. It 1006 * can fault on any access. 1007 */ 1008 for (;;) { 1009 rv = fueword32(&m->m_owner, &owner); 1010 if (rv == -1) 1011 return (EFAULT); 1012 if (mode == _UMUTEX_WAIT) { 1013 if (owner == UMUTEX_UNOWNED || 1014 owner == UMUTEX_CONTESTED || 1015 owner == UMUTEX_RB_OWNERDEAD || 1016 owner == UMUTEX_RB_NOTRECOV) 1017 return (0); 1018 } else { 1019 /* 1020 * Robust mutex terminated. Kernel duty is to 1021 * return EOWNERDEAD to the userspace. The 1022 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1023 * by the common userspace code. 1024 */ 1025 if (owner == UMUTEX_RB_OWNERDEAD) { 1026 rv = casueword32(&m->m_owner, 1027 UMUTEX_RB_OWNERDEAD, &owner, 1028 id | UMUTEX_CONTESTED); 1029 if (rv == -1) 1030 return (EFAULT); 1031 if (owner == UMUTEX_RB_OWNERDEAD) 1032 return (EOWNERDEAD); /* success */ 1033 rv = umtxq_check_susp(td); 1034 if (rv != 0) 1035 return (rv); 1036 continue; 1037 } 1038 if (owner == UMUTEX_RB_NOTRECOV) 1039 return (ENOTRECOVERABLE); 1040 1041 1042 /* 1043 * Try the uncontested case. This should be 1044 * done in userland. 1045 */ 1046 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1047 &owner, id); 1048 /* The address was invalid. */ 1049 if (rv == -1) 1050 return (EFAULT); 1051 1052 /* The acquire succeeded. */ 1053 if (owner == UMUTEX_UNOWNED) 1054 return (0); 1055 1056 /* 1057 * If no one owns it but it is contested try 1058 * to acquire it. 1059 */ 1060 if (owner == UMUTEX_CONTESTED) { 1061 rv = casueword32(&m->m_owner, 1062 UMUTEX_CONTESTED, &owner, 1063 id | UMUTEX_CONTESTED); 1064 /* The address was invalid. */ 1065 if (rv == -1) 1066 return (EFAULT); 1067 1068 if (owner == UMUTEX_CONTESTED) 1069 return (0); 1070 1071 rv = umtxq_check_susp(td); 1072 if (rv != 0) 1073 return (rv); 1074 1075 /* 1076 * If this failed the lock has 1077 * changed, restart. 1078 */ 1079 continue; 1080 } 1081 } 1082 1083 if (mode == _UMUTEX_TRY) 1084 return (EBUSY); 1085 1086 /* 1087 * If we caught a signal, we have retried and now 1088 * exit immediately. 1089 */ 1090 if (error != 0) 1091 return (error); 1092 1093 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1094 GET_SHARE(flags), &uq->uq_key)) != 0) 1095 return (error); 1096 1097 umtxq_lock(&uq->uq_key); 1098 umtxq_busy(&uq->uq_key); 1099 umtxq_insert(uq); 1100 umtxq_unlock(&uq->uq_key); 1101 1102 /* 1103 * Set the contested bit so that a release in user space 1104 * knows to use the system call for unlock. If this fails 1105 * either some one else has acquired the lock or it has been 1106 * released. 1107 */ 1108 rv = casueword32(&m->m_owner, owner, &old, 1109 owner | UMUTEX_CONTESTED); 1110 1111 /* The address was invalid. */ 1112 if (rv == -1) { 1113 umtxq_lock(&uq->uq_key); 1114 umtxq_remove(uq); 1115 umtxq_unbusy(&uq->uq_key); 1116 umtxq_unlock(&uq->uq_key); 1117 umtx_key_release(&uq->uq_key); 1118 return (EFAULT); 1119 } 1120 1121 /* 1122 * We set the contested bit, sleep. Otherwise the lock changed 1123 * and we need to retry or we lost a race to the thread 1124 * unlocking the umtx. 1125 */ 1126 umtxq_lock(&uq->uq_key); 1127 umtxq_unbusy(&uq->uq_key); 1128 if (old == owner) 1129 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1130 NULL : &timo); 1131 umtxq_remove(uq); 1132 umtxq_unlock(&uq->uq_key); 1133 umtx_key_release(&uq->uq_key); 1134 1135 if (error == 0) 1136 error = umtxq_check_susp(td); 1137 } 1138 1139 return (0); 1140 } 1141 1142 /* 1143 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1144 */ 1145 static int 1146 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1147 { 1148 struct umtx_key key; 1149 uint32_t owner, old, id, newlock; 1150 int error, count; 1151 1152 id = td->td_tid; 1153 /* 1154 * Make sure we own this mtx. 1155 */ 1156 error = fueword32(&m->m_owner, &owner); 1157 if (error == -1) 1158 return (EFAULT); 1159 1160 if ((owner & ~UMUTEX_CONTESTED) != id) 1161 return (EPERM); 1162 1163 newlock = umtx_unlock_val(flags, rb); 1164 if ((owner & UMUTEX_CONTESTED) == 0) { 1165 error = casueword32(&m->m_owner, owner, &old, newlock); 1166 if (error == -1) 1167 return (EFAULT); 1168 if (old == owner) 1169 return (0); 1170 owner = old; 1171 } 1172 1173 /* We should only ever be in here for contested locks */ 1174 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1175 &key)) != 0) 1176 return (error); 1177 1178 umtxq_lock(&key); 1179 umtxq_busy(&key); 1180 count = umtxq_count(&key); 1181 umtxq_unlock(&key); 1182 1183 /* 1184 * When unlocking the umtx, it must be marked as unowned if 1185 * there is zero or one thread only waiting for it. 1186 * Otherwise, it must be marked as contested. 1187 */ 1188 if (count > 1) 1189 newlock |= UMUTEX_CONTESTED; 1190 error = casueword32(&m->m_owner, owner, &old, newlock); 1191 umtxq_lock(&key); 1192 umtxq_signal(&key, 1); 1193 umtxq_unbusy(&key); 1194 umtxq_unlock(&key); 1195 umtx_key_release(&key); 1196 if (error == -1) 1197 return (EFAULT); 1198 if (old != owner) 1199 return (EINVAL); 1200 return (0); 1201 } 1202 1203 /* 1204 * Check if the mutex is available and wake up a waiter, 1205 * only for simple mutex. 1206 */ 1207 static int 1208 do_wake_umutex(struct thread *td, struct umutex *m) 1209 { 1210 struct umtx_key key; 1211 uint32_t owner; 1212 uint32_t flags; 1213 int error; 1214 int count; 1215 1216 error = fueword32(&m->m_owner, &owner); 1217 if (error == -1) 1218 return (EFAULT); 1219 1220 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1221 owner != UMUTEX_RB_NOTRECOV) 1222 return (0); 1223 1224 error = fueword32(&m->m_flags, &flags); 1225 if (error == -1) 1226 return (EFAULT); 1227 1228 /* We should only ever be in here for contested locks */ 1229 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1230 &key)) != 0) 1231 return (error); 1232 1233 umtxq_lock(&key); 1234 umtxq_busy(&key); 1235 count = umtxq_count(&key); 1236 umtxq_unlock(&key); 1237 1238 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1239 owner != UMUTEX_RB_NOTRECOV) { 1240 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1241 UMUTEX_UNOWNED); 1242 if (error == -1) 1243 error = EFAULT; 1244 } 1245 1246 umtxq_lock(&key); 1247 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1248 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1249 umtxq_signal(&key, 1); 1250 umtxq_unbusy(&key); 1251 umtxq_unlock(&key); 1252 umtx_key_release(&key); 1253 return (error); 1254 } 1255 1256 /* 1257 * Check if the mutex has waiters and tries to fix contention bit. 1258 */ 1259 static int 1260 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1261 { 1262 struct umtx_key key; 1263 uint32_t owner, old; 1264 int type; 1265 int error; 1266 int count; 1267 1268 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1269 UMUTEX_ROBUST)) { 1270 case 0: 1271 case UMUTEX_ROBUST: 1272 type = TYPE_NORMAL_UMUTEX; 1273 break; 1274 case UMUTEX_PRIO_INHERIT: 1275 type = TYPE_PI_UMUTEX; 1276 break; 1277 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1278 type = TYPE_PI_ROBUST_UMUTEX; 1279 break; 1280 case UMUTEX_PRIO_PROTECT: 1281 type = TYPE_PP_UMUTEX; 1282 break; 1283 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1284 type = TYPE_PP_ROBUST_UMUTEX; 1285 break; 1286 default: 1287 return (EINVAL); 1288 } 1289 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1290 return (error); 1291 1292 owner = 0; 1293 umtxq_lock(&key); 1294 umtxq_busy(&key); 1295 count = umtxq_count(&key); 1296 umtxq_unlock(&key); 1297 /* 1298 * Only repair contention bit if there is a waiter, this means the mutex 1299 * is still being referenced by userland code, otherwise don't update 1300 * any memory. 1301 */ 1302 if (count > 1) { 1303 error = fueword32(&m->m_owner, &owner); 1304 if (error == -1) 1305 error = EFAULT; 1306 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1307 error = casueword32(&m->m_owner, owner, &old, 1308 owner | UMUTEX_CONTESTED); 1309 if (error == -1) { 1310 error = EFAULT; 1311 break; 1312 } 1313 if (old == owner) 1314 break; 1315 owner = old; 1316 error = umtxq_check_susp(td); 1317 if (error != 0) 1318 break; 1319 } 1320 } else if (count == 1) { 1321 error = fueword32(&m->m_owner, &owner); 1322 if (error == -1) 1323 error = EFAULT; 1324 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1325 (owner & UMUTEX_CONTESTED) == 0) { 1326 error = casueword32(&m->m_owner, owner, &old, 1327 owner | UMUTEX_CONTESTED); 1328 if (error == -1) { 1329 error = EFAULT; 1330 break; 1331 } 1332 if (old == owner) 1333 break; 1334 owner = old; 1335 error = umtxq_check_susp(td); 1336 if (error != 0) 1337 break; 1338 } 1339 } 1340 umtxq_lock(&key); 1341 if (error == EFAULT) { 1342 umtxq_signal(&key, INT_MAX); 1343 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1344 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1345 umtxq_signal(&key, 1); 1346 umtxq_unbusy(&key); 1347 umtxq_unlock(&key); 1348 umtx_key_release(&key); 1349 return (error); 1350 } 1351 1352 static inline struct umtx_pi * 1353 umtx_pi_alloc(int flags) 1354 { 1355 struct umtx_pi *pi; 1356 1357 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1358 TAILQ_INIT(&pi->pi_blocked); 1359 atomic_add_int(&umtx_pi_allocated, 1); 1360 return (pi); 1361 } 1362 1363 static inline void 1364 umtx_pi_free(struct umtx_pi *pi) 1365 { 1366 uma_zfree(umtx_pi_zone, pi); 1367 atomic_add_int(&umtx_pi_allocated, -1); 1368 } 1369 1370 /* 1371 * Adjust the thread's position on a pi_state after its priority has been 1372 * changed. 1373 */ 1374 static int 1375 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1376 { 1377 struct umtx_q *uq, *uq1, *uq2; 1378 struct thread *td1; 1379 1380 mtx_assert(&umtx_lock, MA_OWNED); 1381 if (pi == NULL) 1382 return (0); 1383 1384 uq = td->td_umtxq; 1385 1386 /* 1387 * Check if the thread needs to be moved on the blocked chain. 1388 * It needs to be moved if either its priority is lower than 1389 * the previous thread or higher than the next thread. 1390 */ 1391 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1392 uq2 = TAILQ_NEXT(uq, uq_lockq); 1393 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1394 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1395 /* 1396 * Remove thread from blocked chain and determine where 1397 * it should be moved to. 1398 */ 1399 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1400 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1401 td1 = uq1->uq_thread; 1402 MPASS(td1->td_proc->p_magic == P_MAGIC); 1403 if (UPRI(td1) > UPRI(td)) 1404 break; 1405 } 1406 1407 if (uq1 == NULL) 1408 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1409 else 1410 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1411 } 1412 return (1); 1413 } 1414 1415 static struct umtx_pi * 1416 umtx_pi_next(struct umtx_pi *pi) 1417 { 1418 struct umtx_q *uq_owner; 1419 1420 if (pi->pi_owner == NULL) 1421 return (NULL); 1422 uq_owner = pi->pi_owner->td_umtxq; 1423 if (uq_owner == NULL) 1424 return (NULL); 1425 return (uq_owner->uq_pi_blocked); 1426 } 1427 1428 /* 1429 * Floyd's Cycle-Finding Algorithm. 1430 */ 1431 static bool 1432 umtx_pi_check_loop(struct umtx_pi *pi) 1433 { 1434 struct umtx_pi *pi1; /* fast iterator */ 1435 1436 mtx_assert(&umtx_lock, MA_OWNED); 1437 if (pi == NULL) 1438 return (false); 1439 pi1 = pi; 1440 for (;;) { 1441 pi = umtx_pi_next(pi); 1442 if (pi == NULL) 1443 break; 1444 pi1 = umtx_pi_next(pi1); 1445 if (pi1 == NULL) 1446 break; 1447 pi1 = umtx_pi_next(pi1); 1448 if (pi1 == NULL) 1449 break; 1450 if (pi == pi1) 1451 return (true); 1452 } 1453 return (false); 1454 } 1455 1456 /* 1457 * Propagate priority when a thread is blocked on POSIX 1458 * PI mutex. 1459 */ 1460 static void 1461 umtx_propagate_priority(struct thread *td) 1462 { 1463 struct umtx_q *uq; 1464 struct umtx_pi *pi; 1465 int pri; 1466 1467 mtx_assert(&umtx_lock, MA_OWNED); 1468 pri = UPRI(td); 1469 uq = td->td_umtxq; 1470 pi = uq->uq_pi_blocked; 1471 if (pi == NULL) 1472 return; 1473 if (umtx_pi_check_loop(pi)) 1474 return; 1475 1476 for (;;) { 1477 td = pi->pi_owner; 1478 if (td == NULL || td == curthread) 1479 return; 1480 1481 MPASS(td->td_proc != NULL); 1482 MPASS(td->td_proc->p_magic == P_MAGIC); 1483 1484 thread_lock(td); 1485 if (td->td_lend_user_pri > pri) 1486 sched_lend_user_prio(td, pri); 1487 else { 1488 thread_unlock(td); 1489 break; 1490 } 1491 thread_unlock(td); 1492 1493 /* 1494 * Pick up the lock that td is blocked on. 1495 */ 1496 uq = td->td_umtxq; 1497 pi = uq->uq_pi_blocked; 1498 if (pi == NULL) 1499 break; 1500 /* Resort td on the list if needed. */ 1501 umtx_pi_adjust_thread(pi, td); 1502 } 1503 } 1504 1505 /* 1506 * Unpropagate priority for a PI mutex when a thread blocked on 1507 * it is interrupted by signal or resumed by others. 1508 */ 1509 static void 1510 umtx_repropagate_priority(struct umtx_pi *pi) 1511 { 1512 struct umtx_q *uq, *uq_owner; 1513 struct umtx_pi *pi2; 1514 int pri; 1515 1516 mtx_assert(&umtx_lock, MA_OWNED); 1517 1518 if (umtx_pi_check_loop(pi)) 1519 return; 1520 while (pi != NULL && pi->pi_owner != NULL) { 1521 pri = PRI_MAX; 1522 uq_owner = pi->pi_owner->td_umtxq; 1523 1524 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1525 uq = TAILQ_FIRST(&pi2->pi_blocked); 1526 if (uq != NULL) { 1527 if (pri > UPRI(uq->uq_thread)) 1528 pri = UPRI(uq->uq_thread); 1529 } 1530 } 1531 1532 if (pri > uq_owner->uq_inherited_pri) 1533 pri = uq_owner->uq_inherited_pri; 1534 thread_lock(pi->pi_owner); 1535 sched_lend_user_prio(pi->pi_owner, pri); 1536 thread_unlock(pi->pi_owner); 1537 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1538 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1539 } 1540 } 1541 1542 /* 1543 * Insert a PI mutex into owned list. 1544 */ 1545 static void 1546 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1547 { 1548 struct umtx_q *uq_owner; 1549 1550 uq_owner = owner->td_umtxq; 1551 mtx_assert(&umtx_lock, MA_OWNED); 1552 if (pi->pi_owner != NULL) 1553 panic("pi_owner != NULL"); 1554 pi->pi_owner = owner; 1555 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1556 } 1557 1558 1559 /* 1560 * Disown a PI mutex, and remove it from the owned list. 1561 */ 1562 static void 1563 umtx_pi_disown(struct umtx_pi *pi) 1564 { 1565 1566 mtx_assert(&umtx_lock, MA_OWNED); 1567 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1568 pi->pi_owner = NULL; 1569 } 1570 1571 /* 1572 * Claim ownership of a PI mutex. 1573 */ 1574 static int 1575 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1576 { 1577 struct umtx_q *uq; 1578 int pri; 1579 1580 mtx_lock(&umtx_lock); 1581 if (pi->pi_owner == owner) { 1582 mtx_unlock(&umtx_lock); 1583 return (0); 1584 } 1585 1586 if (pi->pi_owner != NULL) { 1587 /* 1588 * userland may have already messed the mutex, sigh. 1589 */ 1590 mtx_unlock(&umtx_lock); 1591 return (EPERM); 1592 } 1593 umtx_pi_setowner(pi, owner); 1594 uq = TAILQ_FIRST(&pi->pi_blocked); 1595 if (uq != NULL) { 1596 pri = UPRI(uq->uq_thread); 1597 thread_lock(owner); 1598 if (pri < UPRI(owner)) 1599 sched_lend_user_prio(owner, pri); 1600 thread_unlock(owner); 1601 } 1602 mtx_unlock(&umtx_lock); 1603 return (0); 1604 } 1605 1606 /* 1607 * Adjust a thread's order position in its blocked PI mutex, 1608 * this may result new priority propagating process. 1609 */ 1610 void 1611 umtx_pi_adjust(struct thread *td, u_char oldpri) 1612 { 1613 struct umtx_q *uq; 1614 struct umtx_pi *pi; 1615 1616 uq = td->td_umtxq; 1617 mtx_lock(&umtx_lock); 1618 /* 1619 * Pick up the lock that td is blocked on. 1620 */ 1621 pi = uq->uq_pi_blocked; 1622 if (pi != NULL) { 1623 umtx_pi_adjust_thread(pi, td); 1624 umtx_repropagate_priority(pi); 1625 } 1626 mtx_unlock(&umtx_lock); 1627 } 1628 1629 /* 1630 * Sleep on a PI mutex. 1631 */ 1632 static int 1633 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1634 const char *wmesg, struct abs_timeout *timo, bool shared) 1635 { 1636 struct umtxq_chain *uc; 1637 struct thread *td, *td1; 1638 struct umtx_q *uq1; 1639 int error, pri; 1640 1641 error = 0; 1642 td = uq->uq_thread; 1643 KASSERT(td == curthread, ("inconsistent uq_thread")); 1644 uc = umtxq_getchain(&uq->uq_key); 1645 UMTXQ_LOCKED_ASSERT(uc); 1646 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1647 umtxq_insert(uq); 1648 mtx_lock(&umtx_lock); 1649 if (pi->pi_owner == NULL) { 1650 mtx_unlock(&umtx_lock); 1651 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1652 mtx_lock(&umtx_lock); 1653 if (td1 != NULL) { 1654 if (pi->pi_owner == NULL) 1655 umtx_pi_setowner(pi, td1); 1656 PROC_UNLOCK(td1->td_proc); 1657 } 1658 } 1659 1660 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1661 pri = UPRI(uq1->uq_thread); 1662 if (pri > UPRI(td)) 1663 break; 1664 } 1665 1666 if (uq1 != NULL) 1667 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1668 else 1669 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1670 1671 uq->uq_pi_blocked = pi; 1672 thread_lock(td); 1673 td->td_flags |= TDF_UPIBLOCKED; 1674 thread_unlock(td); 1675 umtx_propagate_priority(td); 1676 mtx_unlock(&umtx_lock); 1677 umtxq_unbusy(&uq->uq_key); 1678 1679 error = umtxq_sleep(uq, wmesg, timo); 1680 umtxq_remove(uq); 1681 1682 mtx_lock(&umtx_lock); 1683 uq->uq_pi_blocked = NULL; 1684 thread_lock(td); 1685 td->td_flags &= ~TDF_UPIBLOCKED; 1686 thread_unlock(td); 1687 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1688 umtx_repropagate_priority(pi); 1689 mtx_unlock(&umtx_lock); 1690 umtxq_unlock(&uq->uq_key); 1691 1692 return (error); 1693 } 1694 1695 /* 1696 * Add reference count for a PI mutex. 1697 */ 1698 static void 1699 umtx_pi_ref(struct umtx_pi *pi) 1700 { 1701 struct umtxq_chain *uc; 1702 1703 uc = umtxq_getchain(&pi->pi_key); 1704 UMTXQ_LOCKED_ASSERT(uc); 1705 pi->pi_refcount++; 1706 } 1707 1708 /* 1709 * Decrease reference count for a PI mutex, if the counter 1710 * is decreased to zero, its memory space is freed. 1711 */ 1712 static void 1713 umtx_pi_unref(struct umtx_pi *pi) 1714 { 1715 struct umtxq_chain *uc; 1716 1717 uc = umtxq_getchain(&pi->pi_key); 1718 UMTXQ_LOCKED_ASSERT(uc); 1719 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1720 if (--pi->pi_refcount == 0) { 1721 mtx_lock(&umtx_lock); 1722 if (pi->pi_owner != NULL) 1723 umtx_pi_disown(pi); 1724 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1725 ("blocked queue not empty")); 1726 mtx_unlock(&umtx_lock); 1727 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1728 umtx_pi_free(pi); 1729 } 1730 } 1731 1732 /* 1733 * Find a PI mutex in hash table. 1734 */ 1735 static struct umtx_pi * 1736 umtx_pi_lookup(struct umtx_key *key) 1737 { 1738 struct umtxq_chain *uc; 1739 struct umtx_pi *pi; 1740 1741 uc = umtxq_getchain(key); 1742 UMTXQ_LOCKED_ASSERT(uc); 1743 1744 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1745 if (umtx_key_match(&pi->pi_key, key)) { 1746 return (pi); 1747 } 1748 } 1749 return (NULL); 1750 } 1751 1752 /* 1753 * Insert a PI mutex into hash table. 1754 */ 1755 static inline void 1756 umtx_pi_insert(struct umtx_pi *pi) 1757 { 1758 struct umtxq_chain *uc; 1759 1760 uc = umtxq_getchain(&pi->pi_key); 1761 UMTXQ_LOCKED_ASSERT(uc); 1762 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1763 } 1764 1765 /* 1766 * Lock a PI mutex. 1767 */ 1768 static int 1769 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1770 struct _umtx_time *timeout, int try) 1771 { 1772 struct abs_timeout timo; 1773 struct umtx_q *uq; 1774 struct umtx_pi *pi, *new_pi; 1775 uint32_t id, old_owner, owner, old; 1776 int error, rv; 1777 1778 id = td->td_tid; 1779 uq = td->td_umtxq; 1780 1781 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1782 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1783 &uq->uq_key)) != 0) 1784 return (error); 1785 1786 if (timeout != NULL) 1787 abs_timeout_init2(&timo, timeout); 1788 1789 umtxq_lock(&uq->uq_key); 1790 pi = umtx_pi_lookup(&uq->uq_key); 1791 if (pi == NULL) { 1792 new_pi = umtx_pi_alloc(M_NOWAIT); 1793 if (new_pi == NULL) { 1794 umtxq_unlock(&uq->uq_key); 1795 new_pi = umtx_pi_alloc(M_WAITOK); 1796 umtxq_lock(&uq->uq_key); 1797 pi = umtx_pi_lookup(&uq->uq_key); 1798 if (pi != NULL) { 1799 umtx_pi_free(new_pi); 1800 new_pi = NULL; 1801 } 1802 } 1803 if (new_pi != NULL) { 1804 new_pi->pi_key = uq->uq_key; 1805 umtx_pi_insert(new_pi); 1806 pi = new_pi; 1807 } 1808 } 1809 umtx_pi_ref(pi); 1810 umtxq_unlock(&uq->uq_key); 1811 1812 /* 1813 * Care must be exercised when dealing with umtx structure. It 1814 * can fault on any access. 1815 */ 1816 for (;;) { 1817 /* 1818 * Try the uncontested case. This should be done in userland. 1819 */ 1820 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1821 /* The address was invalid. */ 1822 if (rv == -1) { 1823 error = EFAULT; 1824 break; 1825 } 1826 1827 /* The acquire succeeded. */ 1828 if (owner == UMUTEX_UNOWNED) { 1829 error = 0; 1830 break; 1831 } 1832 1833 if (owner == UMUTEX_RB_NOTRECOV) { 1834 error = ENOTRECOVERABLE; 1835 break; 1836 } 1837 1838 /* If no one owns it but it is contested try to acquire it. */ 1839 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1840 old_owner = owner; 1841 rv = casueword32(&m->m_owner, owner, &owner, 1842 id | UMUTEX_CONTESTED); 1843 /* The address was invalid. */ 1844 if (rv == -1) { 1845 error = EFAULT; 1846 break; 1847 } 1848 1849 if (owner == old_owner) { 1850 umtxq_lock(&uq->uq_key); 1851 umtxq_busy(&uq->uq_key); 1852 error = umtx_pi_claim(pi, td); 1853 umtxq_unbusy(&uq->uq_key); 1854 umtxq_unlock(&uq->uq_key); 1855 if (error != 0) { 1856 /* 1857 * Since we're going to return an 1858 * error, restore the m_owner to its 1859 * previous, unowned state to avoid 1860 * compounding the problem. 1861 */ 1862 (void)casuword32(&m->m_owner, 1863 id | UMUTEX_CONTESTED, 1864 old_owner); 1865 } 1866 if (error == 0 && 1867 old_owner == UMUTEX_RB_OWNERDEAD) 1868 error = EOWNERDEAD; 1869 break; 1870 } 1871 1872 error = umtxq_check_susp(td); 1873 if (error != 0) 1874 break; 1875 1876 /* If this failed the lock has changed, restart. */ 1877 continue; 1878 } 1879 1880 if ((owner & ~UMUTEX_CONTESTED) == id) { 1881 error = EDEADLK; 1882 break; 1883 } 1884 1885 if (try != 0) { 1886 error = EBUSY; 1887 break; 1888 } 1889 1890 /* 1891 * If we caught a signal, we have retried and now 1892 * exit immediately. 1893 */ 1894 if (error != 0) 1895 break; 1896 1897 umtxq_lock(&uq->uq_key); 1898 umtxq_busy(&uq->uq_key); 1899 umtxq_unlock(&uq->uq_key); 1900 1901 /* 1902 * Set the contested bit so that a release in user space 1903 * knows to use the system call for unlock. If this fails 1904 * either some one else has acquired the lock or it has been 1905 * released. 1906 */ 1907 rv = casueword32(&m->m_owner, owner, &old, owner | 1908 UMUTEX_CONTESTED); 1909 1910 /* The address was invalid. */ 1911 if (rv == -1) { 1912 umtxq_unbusy_unlocked(&uq->uq_key); 1913 error = EFAULT; 1914 break; 1915 } 1916 1917 umtxq_lock(&uq->uq_key); 1918 /* 1919 * We set the contested bit, sleep. Otherwise the lock changed 1920 * and we need to retry or we lost a race to the thread 1921 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1922 * value for owner is impossible there. 1923 */ 1924 if (old == owner) { 1925 error = umtxq_sleep_pi(uq, pi, 1926 owner & ~UMUTEX_CONTESTED, 1927 "umtxpi", timeout == NULL ? NULL : &timo, 1928 (flags & USYNC_PROCESS_SHARED) != 0); 1929 if (error != 0) 1930 continue; 1931 } else { 1932 umtxq_unbusy(&uq->uq_key); 1933 umtxq_unlock(&uq->uq_key); 1934 } 1935 1936 error = umtxq_check_susp(td); 1937 if (error != 0) 1938 break; 1939 } 1940 1941 umtxq_lock(&uq->uq_key); 1942 umtx_pi_unref(pi); 1943 umtxq_unlock(&uq->uq_key); 1944 1945 umtx_key_release(&uq->uq_key); 1946 return (error); 1947 } 1948 1949 /* 1950 * Unlock a PI mutex. 1951 */ 1952 static int 1953 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1954 { 1955 struct umtx_key key; 1956 struct umtx_q *uq_first, *uq_first2, *uq_me; 1957 struct umtx_pi *pi, *pi2; 1958 uint32_t id, new_owner, old, owner; 1959 int count, error, pri; 1960 1961 id = td->td_tid; 1962 /* 1963 * Make sure we own this mtx. 1964 */ 1965 error = fueword32(&m->m_owner, &owner); 1966 if (error == -1) 1967 return (EFAULT); 1968 1969 if ((owner & ~UMUTEX_CONTESTED) != id) 1970 return (EPERM); 1971 1972 new_owner = umtx_unlock_val(flags, rb); 1973 1974 /* This should be done in userland */ 1975 if ((owner & UMUTEX_CONTESTED) == 0) { 1976 error = casueword32(&m->m_owner, owner, &old, new_owner); 1977 if (error == -1) 1978 return (EFAULT); 1979 if (old == owner) 1980 return (0); 1981 owner = old; 1982 } 1983 1984 /* We should only ever be in here for contested locks */ 1985 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1986 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1987 &key)) != 0) 1988 return (error); 1989 1990 umtxq_lock(&key); 1991 umtxq_busy(&key); 1992 count = umtxq_count_pi(&key, &uq_first); 1993 if (uq_first != NULL) { 1994 mtx_lock(&umtx_lock); 1995 pi = uq_first->uq_pi_blocked; 1996 KASSERT(pi != NULL, ("pi == NULL?")); 1997 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 1998 mtx_unlock(&umtx_lock); 1999 umtxq_unbusy(&key); 2000 umtxq_unlock(&key); 2001 umtx_key_release(&key); 2002 /* userland messed the mutex */ 2003 return (EPERM); 2004 } 2005 uq_me = td->td_umtxq; 2006 if (pi->pi_owner == td) 2007 umtx_pi_disown(pi); 2008 /* get highest priority thread which is still sleeping. */ 2009 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2010 while (uq_first != NULL && 2011 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2012 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2013 } 2014 pri = PRI_MAX; 2015 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2016 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2017 if (uq_first2 != NULL) { 2018 if (pri > UPRI(uq_first2->uq_thread)) 2019 pri = UPRI(uq_first2->uq_thread); 2020 } 2021 } 2022 thread_lock(td); 2023 sched_lend_user_prio(td, pri); 2024 thread_unlock(td); 2025 mtx_unlock(&umtx_lock); 2026 if (uq_first) 2027 umtxq_signal_thread(uq_first); 2028 } else { 2029 pi = umtx_pi_lookup(&key); 2030 /* 2031 * A umtx_pi can exist if a signal or timeout removed the 2032 * last waiter from the umtxq, but there is still 2033 * a thread in do_lock_pi() holding the umtx_pi. 2034 */ 2035 if (pi != NULL) { 2036 /* 2037 * The umtx_pi can be unowned, such as when a thread 2038 * has just entered do_lock_pi(), allocated the 2039 * umtx_pi, and unlocked the umtxq. 2040 * If the current thread owns it, it must disown it. 2041 */ 2042 mtx_lock(&umtx_lock); 2043 if (pi->pi_owner == td) 2044 umtx_pi_disown(pi); 2045 mtx_unlock(&umtx_lock); 2046 } 2047 } 2048 umtxq_unlock(&key); 2049 2050 /* 2051 * When unlocking the umtx, it must be marked as unowned if 2052 * there is zero or one thread only waiting for it. 2053 * Otherwise, it must be marked as contested. 2054 */ 2055 2056 if (count > 1) 2057 new_owner |= UMUTEX_CONTESTED; 2058 error = casueword32(&m->m_owner, owner, &old, new_owner); 2059 2060 umtxq_unbusy_unlocked(&key); 2061 umtx_key_release(&key); 2062 if (error == -1) 2063 return (EFAULT); 2064 if (old != owner) 2065 return (EINVAL); 2066 return (0); 2067 } 2068 2069 /* 2070 * Lock a PP mutex. 2071 */ 2072 static int 2073 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2074 struct _umtx_time *timeout, int try) 2075 { 2076 struct abs_timeout timo; 2077 struct umtx_q *uq, *uq2; 2078 struct umtx_pi *pi; 2079 uint32_t ceiling; 2080 uint32_t owner, id; 2081 int error, pri, old_inherited_pri, su, rv; 2082 2083 id = td->td_tid; 2084 uq = td->td_umtxq; 2085 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2086 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2087 &uq->uq_key)) != 0) 2088 return (error); 2089 2090 if (timeout != NULL) 2091 abs_timeout_init2(&timo, timeout); 2092 2093 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2094 for (;;) { 2095 old_inherited_pri = uq->uq_inherited_pri; 2096 umtxq_lock(&uq->uq_key); 2097 umtxq_busy(&uq->uq_key); 2098 umtxq_unlock(&uq->uq_key); 2099 2100 rv = fueword32(&m->m_ceilings[0], &ceiling); 2101 if (rv == -1) { 2102 error = EFAULT; 2103 goto out; 2104 } 2105 ceiling = RTP_PRIO_MAX - ceiling; 2106 if (ceiling > RTP_PRIO_MAX) { 2107 error = EINVAL; 2108 goto out; 2109 } 2110 2111 mtx_lock(&umtx_lock); 2112 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2113 mtx_unlock(&umtx_lock); 2114 error = EINVAL; 2115 goto out; 2116 } 2117 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2118 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2119 thread_lock(td); 2120 if (uq->uq_inherited_pri < UPRI(td)) 2121 sched_lend_user_prio(td, uq->uq_inherited_pri); 2122 thread_unlock(td); 2123 } 2124 mtx_unlock(&umtx_lock); 2125 2126 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2127 id | UMUTEX_CONTESTED); 2128 /* The address was invalid. */ 2129 if (rv == -1) { 2130 error = EFAULT; 2131 break; 2132 } 2133 2134 if (owner == UMUTEX_CONTESTED) { 2135 error = 0; 2136 break; 2137 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2138 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2139 &owner, id | UMUTEX_CONTESTED); 2140 if (rv == -1) { 2141 error = EFAULT; 2142 break; 2143 } 2144 if (owner == UMUTEX_RB_OWNERDEAD) { 2145 error = EOWNERDEAD; /* success */ 2146 break; 2147 } 2148 error = 0; 2149 } else if (owner == UMUTEX_RB_NOTRECOV) { 2150 error = ENOTRECOVERABLE; 2151 break; 2152 } 2153 2154 if (try != 0) { 2155 error = EBUSY; 2156 break; 2157 } 2158 2159 /* 2160 * If we caught a signal, we have retried and now 2161 * exit immediately. 2162 */ 2163 if (error != 0) 2164 break; 2165 2166 umtxq_lock(&uq->uq_key); 2167 umtxq_insert(uq); 2168 umtxq_unbusy(&uq->uq_key); 2169 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2170 NULL : &timo); 2171 umtxq_remove(uq); 2172 umtxq_unlock(&uq->uq_key); 2173 2174 mtx_lock(&umtx_lock); 2175 uq->uq_inherited_pri = old_inherited_pri; 2176 pri = PRI_MAX; 2177 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2178 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2179 if (uq2 != NULL) { 2180 if (pri > UPRI(uq2->uq_thread)) 2181 pri = UPRI(uq2->uq_thread); 2182 } 2183 } 2184 if (pri > uq->uq_inherited_pri) 2185 pri = uq->uq_inherited_pri; 2186 thread_lock(td); 2187 sched_lend_user_prio(td, pri); 2188 thread_unlock(td); 2189 mtx_unlock(&umtx_lock); 2190 } 2191 2192 if (error != 0 && error != EOWNERDEAD) { 2193 mtx_lock(&umtx_lock); 2194 uq->uq_inherited_pri = old_inherited_pri; 2195 pri = PRI_MAX; 2196 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2197 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2198 if (uq2 != NULL) { 2199 if (pri > UPRI(uq2->uq_thread)) 2200 pri = UPRI(uq2->uq_thread); 2201 } 2202 } 2203 if (pri > uq->uq_inherited_pri) 2204 pri = uq->uq_inherited_pri; 2205 thread_lock(td); 2206 sched_lend_user_prio(td, pri); 2207 thread_unlock(td); 2208 mtx_unlock(&umtx_lock); 2209 } 2210 2211 out: 2212 umtxq_unbusy_unlocked(&uq->uq_key); 2213 umtx_key_release(&uq->uq_key); 2214 return (error); 2215 } 2216 2217 /* 2218 * Unlock a PP mutex. 2219 */ 2220 static int 2221 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2222 { 2223 struct umtx_key key; 2224 struct umtx_q *uq, *uq2; 2225 struct umtx_pi *pi; 2226 uint32_t id, owner, rceiling; 2227 int error, pri, new_inherited_pri, su; 2228 2229 id = td->td_tid; 2230 uq = td->td_umtxq; 2231 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2232 2233 /* 2234 * Make sure we own this mtx. 2235 */ 2236 error = fueword32(&m->m_owner, &owner); 2237 if (error == -1) 2238 return (EFAULT); 2239 2240 if ((owner & ~UMUTEX_CONTESTED) != id) 2241 return (EPERM); 2242 2243 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2244 if (error != 0) 2245 return (error); 2246 2247 if (rceiling == -1) 2248 new_inherited_pri = PRI_MAX; 2249 else { 2250 rceiling = RTP_PRIO_MAX - rceiling; 2251 if (rceiling > RTP_PRIO_MAX) 2252 return (EINVAL); 2253 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2254 } 2255 2256 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2257 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2258 &key)) != 0) 2259 return (error); 2260 umtxq_lock(&key); 2261 umtxq_busy(&key); 2262 umtxq_unlock(&key); 2263 /* 2264 * For priority protected mutex, always set unlocked state 2265 * to UMUTEX_CONTESTED, so that userland always enters kernel 2266 * to lock the mutex, it is necessary because thread priority 2267 * has to be adjusted for such mutex. 2268 */ 2269 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2270 UMUTEX_CONTESTED); 2271 2272 umtxq_lock(&key); 2273 if (error == 0) 2274 umtxq_signal(&key, 1); 2275 umtxq_unbusy(&key); 2276 umtxq_unlock(&key); 2277 2278 if (error == -1) 2279 error = EFAULT; 2280 else { 2281 mtx_lock(&umtx_lock); 2282 if (su != 0) 2283 uq->uq_inherited_pri = new_inherited_pri; 2284 pri = PRI_MAX; 2285 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2286 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2287 if (uq2 != NULL) { 2288 if (pri > UPRI(uq2->uq_thread)) 2289 pri = UPRI(uq2->uq_thread); 2290 } 2291 } 2292 if (pri > uq->uq_inherited_pri) 2293 pri = uq->uq_inherited_pri; 2294 thread_lock(td); 2295 sched_lend_user_prio(td, pri); 2296 thread_unlock(td); 2297 mtx_unlock(&umtx_lock); 2298 } 2299 umtx_key_release(&key); 2300 return (error); 2301 } 2302 2303 static int 2304 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2305 uint32_t *old_ceiling) 2306 { 2307 struct umtx_q *uq; 2308 uint32_t flags, id, owner, save_ceiling; 2309 int error, rv, rv1; 2310 2311 error = fueword32(&m->m_flags, &flags); 2312 if (error == -1) 2313 return (EFAULT); 2314 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2315 return (EINVAL); 2316 if (ceiling > RTP_PRIO_MAX) 2317 return (EINVAL); 2318 id = td->td_tid; 2319 uq = td->td_umtxq; 2320 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2321 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2322 &uq->uq_key)) != 0) 2323 return (error); 2324 for (;;) { 2325 umtxq_lock(&uq->uq_key); 2326 umtxq_busy(&uq->uq_key); 2327 umtxq_unlock(&uq->uq_key); 2328 2329 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2330 if (rv == -1) { 2331 error = EFAULT; 2332 break; 2333 } 2334 2335 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2336 id | UMUTEX_CONTESTED); 2337 if (rv == -1) { 2338 error = EFAULT; 2339 break; 2340 } 2341 2342 if (owner == UMUTEX_CONTESTED) { 2343 rv = suword32(&m->m_ceilings[0], ceiling); 2344 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2345 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2346 break; 2347 } 2348 2349 if ((owner & ~UMUTEX_CONTESTED) == id) { 2350 rv = suword32(&m->m_ceilings[0], ceiling); 2351 error = rv == 0 ? 0 : EFAULT; 2352 break; 2353 } 2354 2355 if (owner == UMUTEX_RB_OWNERDEAD) { 2356 error = EOWNERDEAD; 2357 break; 2358 } else if (owner == UMUTEX_RB_NOTRECOV) { 2359 error = ENOTRECOVERABLE; 2360 break; 2361 } 2362 2363 /* 2364 * If we caught a signal, we have retried and now 2365 * exit immediately. 2366 */ 2367 if (error != 0) 2368 break; 2369 2370 /* 2371 * We set the contested bit, sleep. Otherwise the lock changed 2372 * and we need to retry or we lost a race to the thread 2373 * unlocking the umtx. 2374 */ 2375 umtxq_lock(&uq->uq_key); 2376 umtxq_insert(uq); 2377 umtxq_unbusy(&uq->uq_key); 2378 error = umtxq_sleep(uq, "umtxpp", NULL); 2379 umtxq_remove(uq); 2380 umtxq_unlock(&uq->uq_key); 2381 } 2382 umtxq_lock(&uq->uq_key); 2383 if (error == 0) 2384 umtxq_signal(&uq->uq_key, INT_MAX); 2385 umtxq_unbusy(&uq->uq_key); 2386 umtxq_unlock(&uq->uq_key); 2387 umtx_key_release(&uq->uq_key); 2388 if (error == 0 && old_ceiling != NULL) { 2389 rv = suword32(old_ceiling, save_ceiling); 2390 error = rv == 0 ? 0 : EFAULT; 2391 } 2392 return (error); 2393 } 2394 2395 /* 2396 * Lock a userland POSIX mutex. 2397 */ 2398 static int 2399 do_lock_umutex(struct thread *td, struct umutex *m, 2400 struct _umtx_time *timeout, int mode) 2401 { 2402 uint32_t flags; 2403 int error; 2404 2405 error = fueword32(&m->m_flags, &flags); 2406 if (error == -1) 2407 return (EFAULT); 2408 2409 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2410 case 0: 2411 error = do_lock_normal(td, m, flags, timeout, mode); 2412 break; 2413 case UMUTEX_PRIO_INHERIT: 2414 error = do_lock_pi(td, m, flags, timeout, mode); 2415 break; 2416 case UMUTEX_PRIO_PROTECT: 2417 error = do_lock_pp(td, m, flags, timeout, mode); 2418 break; 2419 default: 2420 return (EINVAL); 2421 } 2422 if (timeout == NULL) { 2423 if (error == EINTR && mode != _UMUTEX_WAIT) 2424 error = ERESTART; 2425 } else { 2426 /* Timed-locking is not restarted. */ 2427 if (error == ERESTART) 2428 error = EINTR; 2429 } 2430 return (error); 2431 } 2432 2433 /* 2434 * Unlock a userland POSIX mutex. 2435 */ 2436 static int 2437 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2438 { 2439 uint32_t flags; 2440 int error; 2441 2442 error = fueword32(&m->m_flags, &flags); 2443 if (error == -1) 2444 return (EFAULT); 2445 2446 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2447 case 0: 2448 return (do_unlock_normal(td, m, flags, rb)); 2449 case UMUTEX_PRIO_INHERIT: 2450 return (do_unlock_pi(td, m, flags, rb)); 2451 case UMUTEX_PRIO_PROTECT: 2452 return (do_unlock_pp(td, m, flags, rb)); 2453 } 2454 2455 return (EINVAL); 2456 } 2457 2458 static int 2459 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2460 struct timespec *timeout, u_long wflags) 2461 { 2462 struct abs_timeout timo; 2463 struct umtx_q *uq; 2464 uint32_t flags, clockid, hasw; 2465 int error; 2466 2467 uq = td->td_umtxq; 2468 error = fueword32(&cv->c_flags, &flags); 2469 if (error == -1) 2470 return (EFAULT); 2471 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2472 if (error != 0) 2473 return (error); 2474 2475 if ((wflags & CVWAIT_CLOCKID) != 0) { 2476 error = fueword32(&cv->c_clockid, &clockid); 2477 if (error == -1) { 2478 umtx_key_release(&uq->uq_key); 2479 return (EFAULT); 2480 } 2481 if (clockid < CLOCK_REALTIME || 2482 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2483 /* hmm, only HW clock id will work. */ 2484 umtx_key_release(&uq->uq_key); 2485 return (EINVAL); 2486 } 2487 } else { 2488 clockid = CLOCK_REALTIME; 2489 } 2490 2491 umtxq_lock(&uq->uq_key); 2492 umtxq_busy(&uq->uq_key); 2493 umtxq_insert(uq); 2494 umtxq_unlock(&uq->uq_key); 2495 2496 /* 2497 * Set c_has_waiters to 1 before releasing user mutex, also 2498 * don't modify cache line when unnecessary. 2499 */ 2500 error = fueword32(&cv->c_has_waiters, &hasw); 2501 if (error == 0 && hasw == 0) 2502 suword32(&cv->c_has_waiters, 1); 2503 2504 umtxq_unbusy_unlocked(&uq->uq_key); 2505 2506 error = do_unlock_umutex(td, m, false); 2507 2508 if (timeout != NULL) 2509 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2510 timeout); 2511 2512 umtxq_lock(&uq->uq_key); 2513 if (error == 0) { 2514 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2515 NULL : &timo); 2516 } 2517 2518 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2519 error = 0; 2520 else { 2521 /* 2522 * This must be timeout,interrupted by signal or 2523 * surprious wakeup, clear c_has_waiter flag when 2524 * necessary. 2525 */ 2526 umtxq_busy(&uq->uq_key); 2527 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2528 int oldlen = uq->uq_cur_queue->length; 2529 umtxq_remove(uq); 2530 if (oldlen == 1) { 2531 umtxq_unlock(&uq->uq_key); 2532 suword32(&cv->c_has_waiters, 0); 2533 umtxq_lock(&uq->uq_key); 2534 } 2535 } 2536 umtxq_unbusy(&uq->uq_key); 2537 if (error == ERESTART) 2538 error = EINTR; 2539 } 2540 2541 umtxq_unlock(&uq->uq_key); 2542 umtx_key_release(&uq->uq_key); 2543 return (error); 2544 } 2545 2546 /* 2547 * Signal a userland condition variable. 2548 */ 2549 static int 2550 do_cv_signal(struct thread *td, struct ucond *cv) 2551 { 2552 struct umtx_key key; 2553 int error, cnt, nwake; 2554 uint32_t flags; 2555 2556 error = fueword32(&cv->c_flags, &flags); 2557 if (error == -1) 2558 return (EFAULT); 2559 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2560 return (error); 2561 umtxq_lock(&key); 2562 umtxq_busy(&key); 2563 cnt = umtxq_count(&key); 2564 nwake = umtxq_signal(&key, 1); 2565 if (cnt <= nwake) { 2566 umtxq_unlock(&key); 2567 error = suword32(&cv->c_has_waiters, 0); 2568 if (error == -1) 2569 error = EFAULT; 2570 umtxq_lock(&key); 2571 } 2572 umtxq_unbusy(&key); 2573 umtxq_unlock(&key); 2574 umtx_key_release(&key); 2575 return (error); 2576 } 2577 2578 static int 2579 do_cv_broadcast(struct thread *td, struct ucond *cv) 2580 { 2581 struct umtx_key key; 2582 int error; 2583 uint32_t flags; 2584 2585 error = fueword32(&cv->c_flags, &flags); 2586 if (error == -1) 2587 return (EFAULT); 2588 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2589 return (error); 2590 2591 umtxq_lock(&key); 2592 umtxq_busy(&key); 2593 umtxq_signal(&key, INT_MAX); 2594 umtxq_unlock(&key); 2595 2596 error = suword32(&cv->c_has_waiters, 0); 2597 if (error == -1) 2598 error = EFAULT; 2599 2600 umtxq_unbusy_unlocked(&key); 2601 2602 umtx_key_release(&key); 2603 return (error); 2604 } 2605 2606 static int 2607 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2608 { 2609 struct abs_timeout timo; 2610 struct umtx_q *uq; 2611 uint32_t flags, wrflags; 2612 int32_t state, oldstate; 2613 int32_t blocked_readers; 2614 int error, error1, rv; 2615 2616 uq = td->td_umtxq; 2617 error = fueword32(&rwlock->rw_flags, &flags); 2618 if (error == -1) 2619 return (EFAULT); 2620 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2621 if (error != 0) 2622 return (error); 2623 2624 if (timeout != NULL) 2625 abs_timeout_init2(&timo, timeout); 2626 2627 wrflags = URWLOCK_WRITE_OWNER; 2628 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2629 wrflags |= URWLOCK_WRITE_WAITERS; 2630 2631 for (;;) { 2632 rv = fueword32(&rwlock->rw_state, &state); 2633 if (rv == -1) { 2634 umtx_key_release(&uq->uq_key); 2635 return (EFAULT); 2636 } 2637 2638 /* try to lock it */ 2639 while (!(state & wrflags)) { 2640 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2641 umtx_key_release(&uq->uq_key); 2642 return (EAGAIN); 2643 } 2644 rv = casueword32(&rwlock->rw_state, state, 2645 &oldstate, state + 1); 2646 if (rv == -1) { 2647 umtx_key_release(&uq->uq_key); 2648 return (EFAULT); 2649 } 2650 if (oldstate == state) { 2651 umtx_key_release(&uq->uq_key); 2652 return (0); 2653 } 2654 error = umtxq_check_susp(td); 2655 if (error != 0) 2656 break; 2657 state = oldstate; 2658 } 2659 2660 if (error) 2661 break; 2662 2663 /* grab monitor lock */ 2664 umtxq_lock(&uq->uq_key); 2665 umtxq_busy(&uq->uq_key); 2666 umtxq_unlock(&uq->uq_key); 2667 2668 /* 2669 * re-read the state, in case it changed between the try-lock above 2670 * and the check below 2671 */ 2672 rv = fueword32(&rwlock->rw_state, &state); 2673 if (rv == -1) 2674 error = EFAULT; 2675 2676 /* set read contention bit */ 2677 while (error == 0 && (state & wrflags) && 2678 !(state & URWLOCK_READ_WAITERS)) { 2679 rv = casueword32(&rwlock->rw_state, state, 2680 &oldstate, state | URWLOCK_READ_WAITERS); 2681 if (rv == -1) { 2682 error = EFAULT; 2683 break; 2684 } 2685 if (oldstate == state) 2686 goto sleep; 2687 state = oldstate; 2688 error = umtxq_check_susp(td); 2689 if (error != 0) 2690 break; 2691 } 2692 if (error != 0) { 2693 umtxq_unbusy_unlocked(&uq->uq_key); 2694 break; 2695 } 2696 2697 /* state is changed while setting flags, restart */ 2698 if (!(state & wrflags)) { 2699 umtxq_unbusy_unlocked(&uq->uq_key); 2700 error = umtxq_check_susp(td); 2701 if (error != 0) 2702 break; 2703 continue; 2704 } 2705 2706 sleep: 2707 /* contention bit is set, before sleeping, increase read waiter count */ 2708 rv = fueword32(&rwlock->rw_blocked_readers, 2709 &blocked_readers); 2710 if (rv == -1) { 2711 umtxq_unbusy_unlocked(&uq->uq_key); 2712 error = EFAULT; 2713 break; 2714 } 2715 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2716 2717 while (state & wrflags) { 2718 umtxq_lock(&uq->uq_key); 2719 umtxq_insert(uq); 2720 umtxq_unbusy(&uq->uq_key); 2721 2722 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2723 NULL : &timo); 2724 2725 umtxq_busy(&uq->uq_key); 2726 umtxq_remove(uq); 2727 umtxq_unlock(&uq->uq_key); 2728 if (error) 2729 break; 2730 rv = fueword32(&rwlock->rw_state, &state); 2731 if (rv == -1) { 2732 error = EFAULT; 2733 break; 2734 } 2735 } 2736 2737 /* decrease read waiter count, and may clear read contention bit */ 2738 rv = fueword32(&rwlock->rw_blocked_readers, 2739 &blocked_readers); 2740 if (rv == -1) { 2741 umtxq_unbusy_unlocked(&uq->uq_key); 2742 error = EFAULT; 2743 break; 2744 } 2745 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2746 if (blocked_readers == 1) { 2747 rv = fueword32(&rwlock->rw_state, &state); 2748 if (rv == -1) { 2749 umtxq_unbusy_unlocked(&uq->uq_key); 2750 error = EFAULT; 2751 break; 2752 } 2753 for (;;) { 2754 rv = casueword32(&rwlock->rw_state, state, 2755 &oldstate, state & ~URWLOCK_READ_WAITERS); 2756 if (rv == -1) { 2757 error = EFAULT; 2758 break; 2759 } 2760 if (oldstate == state) 2761 break; 2762 state = oldstate; 2763 error1 = umtxq_check_susp(td); 2764 if (error1 != 0) { 2765 if (error == 0) 2766 error = error1; 2767 break; 2768 } 2769 } 2770 } 2771 2772 umtxq_unbusy_unlocked(&uq->uq_key); 2773 if (error != 0) 2774 break; 2775 } 2776 umtx_key_release(&uq->uq_key); 2777 if (error == ERESTART) 2778 error = EINTR; 2779 return (error); 2780 } 2781 2782 static int 2783 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2784 { 2785 struct abs_timeout timo; 2786 struct umtx_q *uq; 2787 uint32_t flags; 2788 int32_t state, oldstate; 2789 int32_t blocked_writers; 2790 int32_t blocked_readers; 2791 int error, error1, rv; 2792 2793 uq = td->td_umtxq; 2794 error = fueword32(&rwlock->rw_flags, &flags); 2795 if (error == -1) 2796 return (EFAULT); 2797 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2798 if (error != 0) 2799 return (error); 2800 2801 if (timeout != NULL) 2802 abs_timeout_init2(&timo, timeout); 2803 2804 blocked_readers = 0; 2805 for (;;) { 2806 rv = fueword32(&rwlock->rw_state, &state); 2807 if (rv == -1) { 2808 umtx_key_release(&uq->uq_key); 2809 return (EFAULT); 2810 } 2811 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2812 rv = casueword32(&rwlock->rw_state, state, 2813 &oldstate, state | URWLOCK_WRITE_OWNER); 2814 if (rv == -1) { 2815 umtx_key_release(&uq->uq_key); 2816 return (EFAULT); 2817 } 2818 if (oldstate == state) { 2819 umtx_key_release(&uq->uq_key); 2820 return (0); 2821 } 2822 state = oldstate; 2823 error = umtxq_check_susp(td); 2824 if (error != 0) 2825 break; 2826 } 2827 2828 if (error) { 2829 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2830 blocked_readers != 0) { 2831 umtxq_lock(&uq->uq_key); 2832 umtxq_busy(&uq->uq_key); 2833 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2834 umtxq_unbusy(&uq->uq_key); 2835 umtxq_unlock(&uq->uq_key); 2836 } 2837 2838 break; 2839 } 2840 2841 /* grab monitor lock */ 2842 umtxq_lock(&uq->uq_key); 2843 umtxq_busy(&uq->uq_key); 2844 umtxq_unlock(&uq->uq_key); 2845 2846 /* 2847 * re-read the state, in case it changed between the try-lock above 2848 * and the check below 2849 */ 2850 rv = fueword32(&rwlock->rw_state, &state); 2851 if (rv == -1) 2852 error = EFAULT; 2853 2854 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2855 URWLOCK_READER_COUNT(state) != 0) && 2856 (state & URWLOCK_WRITE_WAITERS) == 0) { 2857 rv = casueword32(&rwlock->rw_state, state, 2858 &oldstate, state | URWLOCK_WRITE_WAITERS); 2859 if (rv == -1) { 2860 error = EFAULT; 2861 break; 2862 } 2863 if (oldstate == state) 2864 goto sleep; 2865 state = oldstate; 2866 error = umtxq_check_susp(td); 2867 if (error != 0) 2868 break; 2869 } 2870 if (error != 0) { 2871 umtxq_unbusy_unlocked(&uq->uq_key); 2872 break; 2873 } 2874 2875 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2876 umtxq_unbusy_unlocked(&uq->uq_key); 2877 error = umtxq_check_susp(td); 2878 if (error != 0) 2879 break; 2880 continue; 2881 } 2882 sleep: 2883 rv = fueword32(&rwlock->rw_blocked_writers, 2884 &blocked_writers); 2885 if (rv == -1) { 2886 umtxq_unbusy_unlocked(&uq->uq_key); 2887 error = EFAULT; 2888 break; 2889 } 2890 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2891 2892 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2893 umtxq_lock(&uq->uq_key); 2894 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2895 umtxq_unbusy(&uq->uq_key); 2896 2897 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2898 NULL : &timo); 2899 2900 umtxq_busy(&uq->uq_key); 2901 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2902 umtxq_unlock(&uq->uq_key); 2903 if (error) 2904 break; 2905 rv = fueword32(&rwlock->rw_state, &state); 2906 if (rv == -1) { 2907 error = EFAULT; 2908 break; 2909 } 2910 } 2911 2912 rv = fueword32(&rwlock->rw_blocked_writers, 2913 &blocked_writers); 2914 if (rv == -1) { 2915 umtxq_unbusy_unlocked(&uq->uq_key); 2916 error = EFAULT; 2917 break; 2918 } 2919 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2920 if (blocked_writers == 1) { 2921 rv = fueword32(&rwlock->rw_state, &state); 2922 if (rv == -1) { 2923 umtxq_unbusy_unlocked(&uq->uq_key); 2924 error = EFAULT; 2925 break; 2926 } 2927 for (;;) { 2928 rv = casueword32(&rwlock->rw_state, state, 2929 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2930 if (rv == -1) { 2931 error = EFAULT; 2932 break; 2933 } 2934 if (oldstate == state) 2935 break; 2936 state = oldstate; 2937 error1 = umtxq_check_susp(td); 2938 /* 2939 * We are leaving the URWLOCK_WRITE_WAITERS 2940 * behind, but this should not harm the 2941 * correctness. 2942 */ 2943 if (error1 != 0) { 2944 if (error == 0) 2945 error = error1; 2946 break; 2947 } 2948 } 2949 rv = fueword32(&rwlock->rw_blocked_readers, 2950 &blocked_readers); 2951 if (rv == -1) { 2952 umtxq_unbusy_unlocked(&uq->uq_key); 2953 error = EFAULT; 2954 break; 2955 } 2956 } else 2957 blocked_readers = 0; 2958 2959 umtxq_unbusy_unlocked(&uq->uq_key); 2960 } 2961 2962 umtx_key_release(&uq->uq_key); 2963 if (error == ERESTART) 2964 error = EINTR; 2965 return (error); 2966 } 2967 2968 static int 2969 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2970 { 2971 struct umtx_q *uq; 2972 uint32_t flags; 2973 int32_t state, oldstate; 2974 int error, rv, q, count; 2975 2976 uq = td->td_umtxq; 2977 error = fueword32(&rwlock->rw_flags, &flags); 2978 if (error == -1) 2979 return (EFAULT); 2980 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2981 if (error != 0) 2982 return (error); 2983 2984 error = fueword32(&rwlock->rw_state, &state); 2985 if (error == -1) { 2986 error = EFAULT; 2987 goto out; 2988 } 2989 if (state & URWLOCK_WRITE_OWNER) { 2990 for (;;) { 2991 rv = casueword32(&rwlock->rw_state, state, 2992 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2993 if (rv == -1) { 2994 error = EFAULT; 2995 goto out; 2996 } 2997 if (oldstate != state) { 2998 state = oldstate; 2999 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3000 error = EPERM; 3001 goto out; 3002 } 3003 error = umtxq_check_susp(td); 3004 if (error != 0) 3005 goto out; 3006 } else 3007 break; 3008 } 3009 } else if (URWLOCK_READER_COUNT(state) != 0) { 3010 for (;;) { 3011 rv = casueword32(&rwlock->rw_state, state, 3012 &oldstate, state - 1); 3013 if (rv == -1) { 3014 error = EFAULT; 3015 goto out; 3016 } 3017 if (oldstate != state) { 3018 state = oldstate; 3019 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3020 error = EPERM; 3021 goto out; 3022 } 3023 error = umtxq_check_susp(td); 3024 if (error != 0) 3025 goto out; 3026 } else 3027 break; 3028 } 3029 } else { 3030 error = EPERM; 3031 goto out; 3032 } 3033 3034 count = 0; 3035 3036 if (!(flags & URWLOCK_PREFER_READER)) { 3037 if (state & URWLOCK_WRITE_WAITERS) { 3038 count = 1; 3039 q = UMTX_EXCLUSIVE_QUEUE; 3040 } else if (state & URWLOCK_READ_WAITERS) { 3041 count = INT_MAX; 3042 q = UMTX_SHARED_QUEUE; 3043 } 3044 } else { 3045 if (state & URWLOCK_READ_WAITERS) { 3046 count = INT_MAX; 3047 q = UMTX_SHARED_QUEUE; 3048 } else if (state & URWLOCK_WRITE_WAITERS) { 3049 count = 1; 3050 q = UMTX_EXCLUSIVE_QUEUE; 3051 } 3052 } 3053 3054 if (count) { 3055 umtxq_lock(&uq->uq_key); 3056 umtxq_busy(&uq->uq_key); 3057 umtxq_signal_queue(&uq->uq_key, count, q); 3058 umtxq_unbusy(&uq->uq_key); 3059 umtxq_unlock(&uq->uq_key); 3060 } 3061 out: 3062 umtx_key_release(&uq->uq_key); 3063 return (error); 3064 } 3065 3066 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3067 static int 3068 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3069 { 3070 struct abs_timeout timo; 3071 struct umtx_q *uq; 3072 uint32_t flags, count, count1; 3073 int error, rv; 3074 3075 uq = td->td_umtxq; 3076 error = fueword32(&sem->_flags, &flags); 3077 if (error == -1) 3078 return (EFAULT); 3079 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3080 if (error != 0) 3081 return (error); 3082 3083 if (timeout != NULL) 3084 abs_timeout_init2(&timo, timeout); 3085 3086 umtxq_lock(&uq->uq_key); 3087 umtxq_busy(&uq->uq_key); 3088 umtxq_insert(uq); 3089 umtxq_unlock(&uq->uq_key); 3090 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3091 if (rv == 0) 3092 rv = fueword32(&sem->_count, &count); 3093 if (rv == -1 || count != 0) { 3094 umtxq_lock(&uq->uq_key); 3095 umtxq_unbusy(&uq->uq_key); 3096 umtxq_remove(uq); 3097 umtxq_unlock(&uq->uq_key); 3098 umtx_key_release(&uq->uq_key); 3099 return (rv == -1 ? EFAULT : 0); 3100 } 3101 umtxq_lock(&uq->uq_key); 3102 umtxq_unbusy(&uq->uq_key); 3103 3104 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3105 3106 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3107 error = 0; 3108 else { 3109 umtxq_remove(uq); 3110 /* A relative timeout cannot be restarted. */ 3111 if (error == ERESTART && timeout != NULL && 3112 (timeout->_flags & UMTX_ABSTIME) == 0) 3113 error = EINTR; 3114 } 3115 umtxq_unlock(&uq->uq_key); 3116 umtx_key_release(&uq->uq_key); 3117 return (error); 3118 } 3119 3120 /* 3121 * Signal a userland semaphore. 3122 */ 3123 static int 3124 do_sem_wake(struct thread *td, struct _usem *sem) 3125 { 3126 struct umtx_key key; 3127 int error, cnt; 3128 uint32_t flags; 3129 3130 error = fueword32(&sem->_flags, &flags); 3131 if (error == -1) 3132 return (EFAULT); 3133 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3134 return (error); 3135 umtxq_lock(&key); 3136 umtxq_busy(&key); 3137 cnt = umtxq_count(&key); 3138 if (cnt > 0) { 3139 /* 3140 * Check if count is greater than 0, this means the memory is 3141 * still being referenced by user code, so we can safely 3142 * update _has_waiters flag. 3143 */ 3144 if (cnt == 1) { 3145 umtxq_unlock(&key); 3146 error = suword32(&sem->_has_waiters, 0); 3147 umtxq_lock(&key); 3148 if (error == -1) 3149 error = EFAULT; 3150 } 3151 umtxq_signal(&key, 1); 3152 } 3153 umtxq_unbusy(&key); 3154 umtxq_unlock(&key); 3155 umtx_key_release(&key); 3156 return (error); 3157 } 3158 #endif 3159 3160 static int 3161 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3162 { 3163 struct abs_timeout timo; 3164 struct umtx_q *uq; 3165 uint32_t count, flags; 3166 int error, rv; 3167 3168 uq = td->td_umtxq; 3169 flags = fuword32(&sem->_flags); 3170 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3171 if (error != 0) 3172 return (error); 3173 3174 if (timeout != NULL) 3175 abs_timeout_init2(&timo, timeout); 3176 3177 umtxq_lock(&uq->uq_key); 3178 umtxq_busy(&uq->uq_key); 3179 umtxq_insert(uq); 3180 umtxq_unlock(&uq->uq_key); 3181 rv = fueword32(&sem->_count, &count); 3182 if (rv == -1) { 3183 umtxq_lock(&uq->uq_key); 3184 umtxq_unbusy(&uq->uq_key); 3185 umtxq_remove(uq); 3186 umtxq_unlock(&uq->uq_key); 3187 umtx_key_release(&uq->uq_key); 3188 return (EFAULT); 3189 } 3190 for (;;) { 3191 if (USEM_COUNT(count) != 0) { 3192 umtxq_lock(&uq->uq_key); 3193 umtxq_unbusy(&uq->uq_key); 3194 umtxq_remove(uq); 3195 umtxq_unlock(&uq->uq_key); 3196 umtx_key_release(&uq->uq_key); 3197 return (0); 3198 } 3199 if (count == USEM_HAS_WAITERS) 3200 break; 3201 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3202 if (rv == -1) { 3203 umtxq_lock(&uq->uq_key); 3204 umtxq_unbusy(&uq->uq_key); 3205 umtxq_remove(uq); 3206 umtxq_unlock(&uq->uq_key); 3207 umtx_key_release(&uq->uq_key); 3208 return (EFAULT); 3209 } 3210 if (count == 0) 3211 break; 3212 } 3213 umtxq_lock(&uq->uq_key); 3214 umtxq_unbusy(&uq->uq_key); 3215 3216 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3217 3218 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3219 error = 0; 3220 else { 3221 umtxq_remove(uq); 3222 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { 3223 /* A relative timeout cannot be restarted. */ 3224 if (error == ERESTART) 3225 error = EINTR; 3226 if (error == EINTR) { 3227 abs_timeout_update(&timo); 3228 timeout->_timeout = timo.end; 3229 timespecsub(&timeout->_timeout, &timo.cur); 3230 } 3231 } 3232 } 3233 umtxq_unlock(&uq->uq_key); 3234 umtx_key_release(&uq->uq_key); 3235 return (error); 3236 } 3237 3238 /* 3239 * Signal a userland semaphore. 3240 */ 3241 static int 3242 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3243 { 3244 struct umtx_key key; 3245 int error, cnt, rv; 3246 uint32_t count, flags; 3247 3248 rv = fueword32(&sem->_flags, &flags); 3249 if (rv == -1) 3250 return (EFAULT); 3251 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3252 return (error); 3253 umtxq_lock(&key); 3254 umtxq_busy(&key); 3255 cnt = umtxq_count(&key); 3256 if (cnt > 0) { 3257 /* 3258 * If this was the last sleeping thread, clear the waiters 3259 * flag in _count. 3260 */ 3261 if (cnt == 1) { 3262 umtxq_unlock(&key); 3263 rv = fueword32(&sem->_count, &count); 3264 while (rv != -1 && count & USEM_HAS_WAITERS) 3265 rv = casueword32(&sem->_count, count, &count, 3266 count & ~USEM_HAS_WAITERS); 3267 if (rv == -1) 3268 error = EFAULT; 3269 umtxq_lock(&key); 3270 } 3271 3272 umtxq_signal(&key, 1); 3273 } 3274 umtxq_unbusy(&key); 3275 umtxq_unlock(&key); 3276 umtx_key_release(&key); 3277 return (error); 3278 } 3279 3280 inline int 3281 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3282 { 3283 int error; 3284 3285 error = copyin(addr, tsp, sizeof(struct timespec)); 3286 if (error == 0) { 3287 if (tsp->tv_sec < 0 || 3288 tsp->tv_nsec >= 1000000000 || 3289 tsp->tv_nsec < 0) 3290 error = EINVAL; 3291 } 3292 return (error); 3293 } 3294 3295 static inline int 3296 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3297 { 3298 int error; 3299 3300 if (size <= sizeof(struct timespec)) { 3301 tp->_clockid = CLOCK_REALTIME; 3302 tp->_flags = 0; 3303 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3304 } else 3305 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3306 if (error != 0) 3307 return (error); 3308 if (tp->_timeout.tv_sec < 0 || 3309 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3310 return (EINVAL); 3311 return (0); 3312 } 3313 3314 static int 3315 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3316 { 3317 3318 return (EOPNOTSUPP); 3319 } 3320 3321 static int 3322 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3323 { 3324 struct _umtx_time timeout, *tm_p; 3325 int error; 3326 3327 if (uap->uaddr2 == NULL) 3328 tm_p = NULL; 3329 else { 3330 error = umtx_copyin_umtx_time( 3331 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3332 if (error != 0) 3333 return (error); 3334 tm_p = &timeout; 3335 } 3336 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3337 } 3338 3339 static int 3340 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3341 { 3342 struct _umtx_time timeout, *tm_p; 3343 int error; 3344 3345 if (uap->uaddr2 == NULL) 3346 tm_p = NULL; 3347 else { 3348 error = umtx_copyin_umtx_time( 3349 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3350 if (error != 0) 3351 return (error); 3352 tm_p = &timeout; 3353 } 3354 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3355 } 3356 3357 static int 3358 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3359 { 3360 struct _umtx_time *tm_p, timeout; 3361 int error; 3362 3363 if (uap->uaddr2 == NULL) 3364 tm_p = NULL; 3365 else { 3366 error = umtx_copyin_umtx_time( 3367 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3368 if (error != 0) 3369 return (error); 3370 tm_p = &timeout; 3371 } 3372 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3373 } 3374 3375 static int 3376 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3377 { 3378 3379 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3380 } 3381 3382 #define BATCH_SIZE 128 3383 static int 3384 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3385 { 3386 char *uaddrs[BATCH_SIZE], **upp; 3387 int count, error, i, pos, tocopy; 3388 3389 upp = (char **)uap->obj; 3390 error = 0; 3391 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3392 pos += tocopy) { 3393 tocopy = MIN(count, BATCH_SIZE); 3394 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3395 if (error != 0) 3396 break; 3397 for (i = 0; i < tocopy; ++i) 3398 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3399 maybe_yield(); 3400 } 3401 return (error); 3402 } 3403 3404 static int 3405 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3406 { 3407 3408 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3409 } 3410 3411 static int 3412 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3413 { 3414 struct _umtx_time *tm_p, timeout; 3415 int error; 3416 3417 /* Allow a null timespec (wait forever). */ 3418 if (uap->uaddr2 == NULL) 3419 tm_p = NULL; 3420 else { 3421 error = umtx_copyin_umtx_time( 3422 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3423 if (error != 0) 3424 return (error); 3425 tm_p = &timeout; 3426 } 3427 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3428 } 3429 3430 static int 3431 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3432 { 3433 3434 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3435 } 3436 3437 static int 3438 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3439 { 3440 struct _umtx_time *tm_p, timeout; 3441 int error; 3442 3443 /* Allow a null timespec (wait forever). */ 3444 if (uap->uaddr2 == NULL) 3445 tm_p = NULL; 3446 else { 3447 error = umtx_copyin_umtx_time( 3448 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3449 if (error != 0) 3450 return (error); 3451 tm_p = &timeout; 3452 } 3453 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3454 } 3455 3456 static int 3457 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3458 { 3459 3460 return (do_wake_umutex(td, uap->obj)); 3461 } 3462 3463 static int 3464 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3465 { 3466 3467 return (do_unlock_umutex(td, uap->obj, false)); 3468 } 3469 3470 static int 3471 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3472 { 3473 3474 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3475 } 3476 3477 static int 3478 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3479 { 3480 struct timespec *ts, timeout; 3481 int error; 3482 3483 /* Allow a null timespec (wait forever). */ 3484 if (uap->uaddr2 == NULL) 3485 ts = NULL; 3486 else { 3487 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3488 if (error != 0) 3489 return (error); 3490 ts = &timeout; 3491 } 3492 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3493 } 3494 3495 static int 3496 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3497 { 3498 3499 return (do_cv_signal(td, uap->obj)); 3500 } 3501 3502 static int 3503 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3504 { 3505 3506 return (do_cv_broadcast(td, uap->obj)); 3507 } 3508 3509 static int 3510 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3511 { 3512 struct _umtx_time timeout; 3513 int error; 3514 3515 /* Allow a null timespec (wait forever). */ 3516 if (uap->uaddr2 == NULL) { 3517 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3518 } else { 3519 error = umtx_copyin_umtx_time(uap->uaddr2, 3520 (size_t)uap->uaddr1, &timeout); 3521 if (error != 0) 3522 return (error); 3523 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3524 } 3525 return (error); 3526 } 3527 3528 static int 3529 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3530 { 3531 struct _umtx_time timeout; 3532 int error; 3533 3534 /* Allow a null timespec (wait forever). */ 3535 if (uap->uaddr2 == NULL) { 3536 error = do_rw_wrlock(td, uap->obj, 0); 3537 } else { 3538 error = umtx_copyin_umtx_time(uap->uaddr2, 3539 (size_t)uap->uaddr1, &timeout); 3540 if (error != 0) 3541 return (error); 3542 3543 error = do_rw_wrlock(td, uap->obj, &timeout); 3544 } 3545 return (error); 3546 } 3547 3548 static int 3549 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3550 { 3551 3552 return (do_rw_unlock(td, uap->obj)); 3553 } 3554 3555 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3556 static int 3557 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3558 { 3559 struct _umtx_time *tm_p, timeout; 3560 int error; 3561 3562 /* Allow a null timespec (wait forever). */ 3563 if (uap->uaddr2 == NULL) 3564 tm_p = NULL; 3565 else { 3566 error = umtx_copyin_umtx_time( 3567 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3568 if (error != 0) 3569 return (error); 3570 tm_p = &timeout; 3571 } 3572 return (do_sem_wait(td, uap->obj, tm_p)); 3573 } 3574 3575 static int 3576 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3577 { 3578 3579 return (do_sem_wake(td, uap->obj)); 3580 } 3581 #endif 3582 3583 static int 3584 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3585 { 3586 3587 return (do_wake2_umutex(td, uap->obj, uap->val)); 3588 } 3589 3590 static int 3591 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3592 { 3593 struct _umtx_time *tm_p, timeout; 3594 size_t uasize; 3595 int error; 3596 3597 /* Allow a null timespec (wait forever). */ 3598 if (uap->uaddr2 == NULL) { 3599 uasize = 0; 3600 tm_p = NULL; 3601 } else { 3602 uasize = (size_t)uap->uaddr1; 3603 error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); 3604 if (error != 0) 3605 return (error); 3606 tm_p = &timeout; 3607 } 3608 error = do_sem2_wait(td, uap->obj, tm_p); 3609 if (error == EINTR && uap->uaddr2 != NULL && 3610 (timeout._flags & UMTX_ABSTIME) == 0 && 3611 uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { 3612 error = copyout(&timeout._timeout, 3613 (struct _umtx_time *)uap->uaddr2 + 1, 3614 sizeof(struct timespec)); 3615 if (error == 0) { 3616 error = EINTR; 3617 } 3618 } 3619 3620 return (error); 3621 } 3622 3623 static int 3624 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3625 { 3626 3627 return (do_sem2_wake(td, uap->obj)); 3628 } 3629 3630 #define USHM_OBJ_UMTX(o) \ 3631 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3632 3633 #define USHMF_REG_LINKED 0x0001 3634 #define USHMF_OBJ_LINKED 0x0002 3635 struct umtx_shm_reg { 3636 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3637 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3638 struct umtx_key ushm_key; 3639 struct ucred *ushm_cred; 3640 struct shmfd *ushm_obj; 3641 u_int ushm_refcnt; 3642 u_int ushm_flags; 3643 }; 3644 3645 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3646 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3647 3648 static uma_zone_t umtx_shm_reg_zone; 3649 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3650 static struct mtx umtx_shm_lock; 3651 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3652 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3653 3654 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3655 3656 static void 3657 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3658 { 3659 struct umtx_shm_reg_head d; 3660 struct umtx_shm_reg *reg, *reg1; 3661 3662 TAILQ_INIT(&d); 3663 mtx_lock(&umtx_shm_lock); 3664 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3665 mtx_unlock(&umtx_shm_lock); 3666 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3667 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3668 umtx_shm_free_reg(reg); 3669 } 3670 } 3671 3672 static struct task umtx_shm_reg_delfree_task = 3673 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3674 3675 static struct umtx_shm_reg * 3676 umtx_shm_find_reg_locked(const struct umtx_key *key) 3677 { 3678 struct umtx_shm_reg *reg; 3679 struct umtx_shm_reg_head *reg_head; 3680 3681 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3682 mtx_assert(&umtx_shm_lock, MA_OWNED); 3683 reg_head = &umtx_shm_registry[key->hash]; 3684 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3685 KASSERT(reg->ushm_key.shared, 3686 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3687 if (reg->ushm_key.info.shared.object == 3688 key->info.shared.object && 3689 reg->ushm_key.info.shared.offset == 3690 key->info.shared.offset) { 3691 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3692 KASSERT(reg->ushm_refcnt > 0, 3693 ("reg %p refcnt 0 onlist", reg)); 3694 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3695 ("reg %p not linked", reg)); 3696 reg->ushm_refcnt++; 3697 return (reg); 3698 } 3699 } 3700 return (NULL); 3701 } 3702 3703 static struct umtx_shm_reg * 3704 umtx_shm_find_reg(const struct umtx_key *key) 3705 { 3706 struct umtx_shm_reg *reg; 3707 3708 mtx_lock(&umtx_shm_lock); 3709 reg = umtx_shm_find_reg_locked(key); 3710 mtx_unlock(&umtx_shm_lock); 3711 return (reg); 3712 } 3713 3714 static void 3715 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3716 { 3717 3718 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3719 crfree(reg->ushm_cred); 3720 shm_drop(reg->ushm_obj); 3721 uma_zfree(umtx_shm_reg_zone, reg); 3722 } 3723 3724 static bool 3725 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3726 { 3727 bool res; 3728 3729 mtx_assert(&umtx_shm_lock, MA_OWNED); 3730 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3731 reg->ushm_refcnt--; 3732 res = reg->ushm_refcnt == 0; 3733 if (res || force) { 3734 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3735 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3736 reg, ushm_reg_link); 3737 reg->ushm_flags &= ~USHMF_REG_LINKED; 3738 } 3739 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3740 LIST_REMOVE(reg, ushm_obj_link); 3741 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3742 } 3743 } 3744 return (res); 3745 } 3746 3747 static void 3748 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3749 { 3750 vm_object_t object; 3751 bool dofree; 3752 3753 if (force) { 3754 object = reg->ushm_obj->shm_object; 3755 VM_OBJECT_WLOCK(object); 3756 object->flags |= OBJ_UMTXDEAD; 3757 VM_OBJECT_WUNLOCK(object); 3758 } 3759 mtx_lock(&umtx_shm_lock); 3760 dofree = umtx_shm_unref_reg_locked(reg, force); 3761 mtx_unlock(&umtx_shm_lock); 3762 if (dofree) 3763 umtx_shm_free_reg(reg); 3764 } 3765 3766 void 3767 umtx_shm_object_init(vm_object_t object) 3768 { 3769 3770 LIST_INIT(USHM_OBJ_UMTX(object)); 3771 } 3772 3773 void 3774 umtx_shm_object_terminated(vm_object_t object) 3775 { 3776 struct umtx_shm_reg *reg, *reg1; 3777 bool dofree; 3778 3779 dofree = false; 3780 mtx_lock(&umtx_shm_lock); 3781 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3782 if (umtx_shm_unref_reg_locked(reg, true)) { 3783 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3784 ushm_reg_link); 3785 dofree = true; 3786 } 3787 } 3788 mtx_unlock(&umtx_shm_lock); 3789 if (dofree) 3790 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3791 } 3792 3793 static int 3794 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3795 struct umtx_shm_reg **res) 3796 { 3797 struct umtx_shm_reg *reg, *reg1; 3798 struct ucred *cred; 3799 int error; 3800 3801 reg = umtx_shm_find_reg(key); 3802 if (reg != NULL) { 3803 *res = reg; 3804 return (0); 3805 } 3806 cred = td->td_ucred; 3807 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3808 return (ENOMEM); 3809 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3810 reg->ushm_refcnt = 1; 3811 bcopy(key, ®->ushm_key, sizeof(*key)); 3812 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3813 reg->ushm_cred = crhold(cred); 3814 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3815 if (error != 0) { 3816 umtx_shm_free_reg(reg); 3817 return (error); 3818 } 3819 mtx_lock(&umtx_shm_lock); 3820 reg1 = umtx_shm_find_reg_locked(key); 3821 if (reg1 != NULL) { 3822 mtx_unlock(&umtx_shm_lock); 3823 umtx_shm_free_reg(reg); 3824 *res = reg1; 3825 return (0); 3826 } 3827 reg->ushm_refcnt++; 3828 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3829 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3830 ushm_obj_link); 3831 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3832 mtx_unlock(&umtx_shm_lock); 3833 *res = reg; 3834 return (0); 3835 } 3836 3837 static int 3838 umtx_shm_alive(struct thread *td, void *addr) 3839 { 3840 vm_map_t map; 3841 vm_map_entry_t entry; 3842 vm_object_t object; 3843 vm_pindex_t pindex; 3844 vm_prot_t prot; 3845 int res, ret; 3846 boolean_t wired; 3847 3848 map = &td->td_proc->p_vmspace->vm_map; 3849 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3850 &object, &pindex, &prot, &wired); 3851 if (res != KERN_SUCCESS) 3852 return (EFAULT); 3853 if (object == NULL) 3854 ret = EINVAL; 3855 else 3856 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3857 vm_map_lookup_done(map, entry); 3858 return (ret); 3859 } 3860 3861 static void 3862 umtx_shm_init(void) 3863 { 3864 int i; 3865 3866 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3867 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3868 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3869 for (i = 0; i < nitems(umtx_shm_registry); i++) 3870 TAILQ_INIT(&umtx_shm_registry[i]); 3871 } 3872 3873 static int 3874 umtx_shm(struct thread *td, void *addr, u_int flags) 3875 { 3876 struct umtx_key key; 3877 struct umtx_shm_reg *reg; 3878 struct file *fp; 3879 int error, fd; 3880 3881 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3882 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3883 return (EINVAL); 3884 if ((flags & UMTX_SHM_ALIVE) != 0) 3885 return (umtx_shm_alive(td, addr)); 3886 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3887 if (error != 0) 3888 return (error); 3889 KASSERT(key.shared == 1, ("non-shared key")); 3890 if ((flags & UMTX_SHM_CREAT) != 0) { 3891 error = umtx_shm_create_reg(td, &key, ®); 3892 } else { 3893 reg = umtx_shm_find_reg(&key); 3894 if (reg == NULL) 3895 error = ESRCH; 3896 } 3897 umtx_key_release(&key); 3898 if (error != 0) 3899 return (error); 3900 KASSERT(reg != NULL, ("no reg")); 3901 if ((flags & UMTX_SHM_DESTROY) != 0) { 3902 umtx_shm_unref_reg(reg, true); 3903 } else { 3904 #if 0 3905 #ifdef MAC 3906 error = mac_posixshm_check_open(td->td_ucred, 3907 reg->ushm_obj, FFLAGS(O_RDWR)); 3908 if (error == 0) 3909 #endif 3910 error = shm_access(reg->ushm_obj, td->td_ucred, 3911 FFLAGS(O_RDWR)); 3912 if (error == 0) 3913 #endif 3914 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3915 if (error == 0) { 3916 shm_hold(reg->ushm_obj); 3917 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3918 &shm_ops); 3919 td->td_retval[0] = fd; 3920 fdrop(fp, td); 3921 } 3922 } 3923 umtx_shm_unref_reg(reg, false); 3924 return (error); 3925 } 3926 3927 static int 3928 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3929 { 3930 3931 return (umtx_shm(td, uap->uaddr1, uap->val)); 3932 } 3933 3934 static int 3935 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3936 { 3937 3938 td->td_rb_list = rbp->robust_list_offset; 3939 td->td_rbp_list = rbp->robust_priv_list_offset; 3940 td->td_rb_inact = rbp->robust_inact_offset; 3941 return (0); 3942 } 3943 3944 static int 3945 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3946 { 3947 struct umtx_robust_lists_params rb; 3948 int error; 3949 3950 if (uap->val > sizeof(rb)) 3951 return (EINVAL); 3952 bzero(&rb, sizeof(rb)); 3953 error = copyin(uap->uaddr1, &rb, uap->val); 3954 if (error != 0) 3955 return (error); 3956 return (umtx_robust_lists(td, &rb)); 3957 } 3958 3959 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3960 3961 static const _umtx_op_func op_table[] = { 3962 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3963 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3964 [UMTX_OP_WAIT] = __umtx_op_wait, 3965 [UMTX_OP_WAKE] = __umtx_op_wake, 3966 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3967 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3968 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3969 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3970 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3971 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3972 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3973 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3974 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3975 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 3976 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 3977 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 3978 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 3979 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 3980 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 3981 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3982 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 3983 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 3984 #else 3985 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 3986 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 3987 #endif 3988 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 3989 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 3990 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 3991 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 3992 [UMTX_OP_SHM] = __umtx_op_shm, 3993 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 3994 }; 3995 3996 int 3997 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3998 { 3999 4000 if ((unsigned)uap->op < nitems(op_table)) 4001 return (*op_table[uap->op])(td, uap); 4002 return (EINVAL); 4003 } 4004 4005 #ifdef COMPAT_FREEBSD32 4006 4007 struct timespec32 { 4008 int32_t tv_sec; 4009 int32_t tv_nsec; 4010 }; 4011 4012 struct umtx_time32 { 4013 struct timespec32 timeout; 4014 uint32_t flags; 4015 uint32_t clockid; 4016 }; 4017 4018 static inline int 4019 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4020 { 4021 struct timespec32 ts32; 4022 int error; 4023 4024 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4025 if (error == 0) { 4026 if (ts32.tv_sec < 0 || 4027 ts32.tv_nsec >= 1000000000 || 4028 ts32.tv_nsec < 0) 4029 error = EINVAL; 4030 else { 4031 tsp->tv_sec = ts32.tv_sec; 4032 tsp->tv_nsec = ts32.tv_nsec; 4033 } 4034 } 4035 return (error); 4036 } 4037 4038 static inline int 4039 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4040 { 4041 struct umtx_time32 t32; 4042 int error; 4043 4044 t32.clockid = CLOCK_REALTIME; 4045 t32.flags = 0; 4046 if (size <= sizeof(struct timespec32)) 4047 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4048 else 4049 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4050 if (error != 0) 4051 return (error); 4052 if (t32.timeout.tv_sec < 0 || 4053 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4054 return (EINVAL); 4055 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4056 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4057 tp->_flags = t32.flags; 4058 tp->_clockid = t32.clockid; 4059 return (0); 4060 } 4061 4062 static int 4063 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4064 { 4065 struct _umtx_time *tm_p, timeout; 4066 int error; 4067 4068 if (uap->uaddr2 == NULL) 4069 tm_p = NULL; 4070 else { 4071 error = umtx_copyin_umtx_time32(uap->uaddr2, 4072 (size_t)uap->uaddr1, &timeout); 4073 if (error != 0) 4074 return (error); 4075 tm_p = &timeout; 4076 } 4077 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4078 } 4079 4080 static int 4081 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4082 { 4083 struct _umtx_time *tm_p, timeout; 4084 int error; 4085 4086 /* Allow a null timespec (wait forever). */ 4087 if (uap->uaddr2 == NULL) 4088 tm_p = NULL; 4089 else { 4090 error = umtx_copyin_umtx_time(uap->uaddr2, 4091 (size_t)uap->uaddr1, &timeout); 4092 if (error != 0) 4093 return (error); 4094 tm_p = &timeout; 4095 } 4096 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4097 } 4098 4099 static int 4100 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4101 { 4102 struct _umtx_time *tm_p, timeout; 4103 int error; 4104 4105 /* Allow a null timespec (wait forever). */ 4106 if (uap->uaddr2 == NULL) 4107 tm_p = NULL; 4108 else { 4109 error = umtx_copyin_umtx_time32(uap->uaddr2, 4110 (size_t)uap->uaddr1, &timeout); 4111 if (error != 0) 4112 return (error); 4113 tm_p = &timeout; 4114 } 4115 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4116 } 4117 4118 static int 4119 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4120 { 4121 struct timespec *ts, timeout; 4122 int error; 4123 4124 /* Allow a null timespec (wait forever). */ 4125 if (uap->uaddr2 == NULL) 4126 ts = NULL; 4127 else { 4128 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4129 if (error != 0) 4130 return (error); 4131 ts = &timeout; 4132 } 4133 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4134 } 4135 4136 static int 4137 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4138 { 4139 struct _umtx_time timeout; 4140 int error; 4141 4142 /* Allow a null timespec (wait forever). */ 4143 if (uap->uaddr2 == NULL) { 4144 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4145 } else { 4146 error = umtx_copyin_umtx_time32(uap->uaddr2, 4147 (size_t)uap->uaddr1, &timeout); 4148 if (error != 0) 4149 return (error); 4150 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4151 } 4152 return (error); 4153 } 4154 4155 static int 4156 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4157 { 4158 struct _umtx_time timeout; 4159 int error; 4160 4161 /* Allow a null timespec (wait forever). */ 4162 if (uap->uaddr2 == NULL) { 4163 error = do_rw_wrlock(td, uap->obj, 0); 4164 } else { 4165 error = umtx_copyin_umtx_time32(uap->uaddr2, 4166 (size_t)uap->uaddr1, &timeout); 4167 if (error != 0) 4168 return (error); 4169 error = do_rw_wrlock(td, uap->obj, &timeout); 4170 } 4171 return (error); 4172 } 4173 4174 static int 4175 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4176 { 4177 struct _umtx_time *tm_p, timeout; 4178 int error; 4179 4180 if (uap->uaddr2 == NULL) 4181 tm_p = NULL; 4182 else { 4183 error = umtx_copyin_umtx_time32( 4184 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4185 if (error != 0) 4186 return (error); 4187 tm_p = &timeout; 4188 } 4189 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4190 } 4191 4192 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4193 static int 4194 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4195 { 4196 struct _umtx_time *tm_p, timeout; 4197 int error; 4198 4199 /* Allow a null timespec (wait forever). */ 4200 if (uap->uaddr2 == NULL) 4201 tm_p = NULL; 4202 else { 4203 error = umtx_copyin_umtx_time32(uap->uaddr2, 4204 (size_t)uap->uaddr1, &timeout); 4205 if (error != 0) 4206 return (error); 4207 tm_p = &timeout; 4208 } 4209 return (do_sem_wait(td, uap->obj, tm_p)); 4210 } 4211 #endif 4212 4213 static int 4214 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4215 { 4216 struct _umtx_time *tm_p, timeout; 4217 size_t uasize; 4218 int error; 4219 4220 /* Allow a null timespec (wait forever). */ 4221 if (uap->uaddr2 == NULL) { 4222 uasize = 0; 4223 tm_p = NULL; 4224 } else { 4225 uasize = (size_t)uap->uaddr1; 4226 error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); 4227 if (error != 0) 4228 return (error); 4229 tm_p = &timeout; 4230 } 4231 error = do_sem2_wait(td, uap->obj, tm_p); 4232 if (error == EINTR && uap->uaddr2 != NULL && 4233 (timeout._flags & UMTX_ABSTIME) == 0 && 4234 uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { 4235 struct timespec32 remain32 = { 4236 .tv_sec = timeout._timeout.tv_sec, 4237 .tv_nsec = timeout._timeout.tv_nsec 4238 }; 4239 error = copyout(&remain32, 4240 (struct umtx_time32 *)uap->uaddr2 + 1, 4241 sizeof(struct timespec32)); 4242 if (error == 0) { 4243 error = EINTR; 4244 } 4245 } 4246 4247 return (error); 4248 } 4249 4250 static int 4251 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4252 { 4253 uint32_t uaddrs[BATCH_SIZE], **upp; 4254 int count, error, i, pos, tocopy; 4255 4256 upp = (uint32_t **)uap->obj; 4257 error = 0; 4258 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4259 pos += tocopy) { 4260 tocopy = MIN(count, BATCH_SIZE); 4261 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4262 if (error != 0) 4263 break; 4264 for (i = 0; i < tocopy; ++i) 4265 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4266 INT_MAX, 1); 4267 maybe_yield(); 4268 } 4269 return (error); 4270 } 4271 4272 struct umtx_robust_lists_params_compat32 { 4273 uint32_t robust_list_offset; 4274 uint32_t robust_priv_list_offset; 4275 uint32_t robust_inact_offset; 4276 }; 4277 4278 static int 4279 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4280 { 4281 struct umtx_robust_lists_params rb; 4282 struct umtx_robust_lists_params_compat32 rb32; 4283 int error; 4284 4285 if (uap->val > sizeof(rb32)) 4286 return (EINVAL); 4287 bzero(&rb, sizeof(rb)); 4288 bzero(&rb32, sizeof(rb32)); 4289 error = copyin(uap->uaddr1, &rb32, uap->val); 4290 if (error != 0) 4291 return (error); 4292 rb.robust_list_offset = rb32.robust_list_offset; 4293 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4294 rb.robust_inact_offset = rb32.robust_inact_offset; 4295 return (umtx_robust_lists(td, &rb)); 4296 } 4297 4298 static const _umtx_op_func op_table_compat32[] = { 4299 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4300 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4301 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4302 [UMTX_OP_WAKE] = __umtx_op_wake, 4303 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4304 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4305 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4306 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4307 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4308 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4309 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4310 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4311 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4312 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4313 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4314 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4315 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4316 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4317 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4318 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4319 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4320 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4321 #else 4322 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4323 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4324 #endif 4325 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4326 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4327 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4328 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4329 [UMTX_OP_SHM] = __umtx_op_shm, 4330 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4331 }; 4332 4333 int 4334 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4335 { 4336 4337 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4338 return (*op_table_compat32[uap->op])(td, 4339 (struct _umtx_op_args *)uap); 4340 } 4341 return (EINVAL); 4342 } 4343 #endif 4344 4345 void 4346 umtx_thread_init(struct thread *td) 4347 { 4348 4349 td->td_umtxq = umtxq_alloc(); 4350 td->td_umtxq->uq_thread = td; 4351 } 4352 4353 void 4354 umtx_thread_fini(struct thread *td) 4355 { 4356 4357 umtxq_free(td->td_umtxq); 4358 } 4359 4360 /* 4361 * It will be called when new thread is created, e.g fork(). 4362 */ 4363 void 4364 umtx_thread_alloc(struct thread *td) 4365 { 4366 struct umtx_q *uq; 4367 4368 uq = td->td_umtxq; 4369 uq->uq_inherited_pri = PRI_MAX; 4370 4371 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4372 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4373 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4374 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4375 } 4376 4377 /* 4378 * exec() hook. 4379 * 4380 * Clear robust lists for all process' threads, not delaying the 4381 * cleanup to thread_exit hook, since the relevant address space is 4382 * destroyed right now. 4383 */ 4384 static void 4385 umtx_exec_hook(void *arg __unused, struct proc *p, 4386 struct image_params *imgp __unused) 4387 { 4388 struct thread *td; 4389 4390 KASSERT(p == curproc, ("need curproc")); 4391 PROC_LOCK(p); 4392 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4393 (p->p_flag & P_STOPPED_SINGLE) != 0, 4394 ("curproc must be single-threaded")); 4395 FOREACH_THREAD_IN_PROC(p, td) { 4396 KASSERT(td == curthread || 4397 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4398 ("running thread %p %p", p, td)); 4399 PROC_UNLOCK(p); 4400 umtx_thread_cleanup(td); 4401 PROC_LOCK(p); 4402 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4403 } 4404 PROC_UNLOCK(p); 4405 } 4406 4407 /* 4408 * thread_exit() hook. 4409 */ 4410 void 4411 umtx_thread_exit(struct thread *td) 4412 { 4413 4414 umtx_thread_cleanup(td); 4415 } 4416 4417 static int 4418 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4419 { 4420 u_long res1; 4421 #ifdef COMPAT_FREEBSD32 4422 uint32_t res32; 4423 #endif 4424 int error; 4425 4426 #ifdef COMPAT_FREEBSD32 4427 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4428 error = fueword32((void *)ptr, &res32); 4429 if (error == 0) 4430 res1 = res32; 4431 } else 4432 #endif 4433 { 4434 error = fueword((void *)ptr, &res1); 4435 } 4436 if (error == 0) 4437 *res = res1; 4438 else 4439 error = EFAULT; 4440 return (error); 4441 } 4442 4443 static void 4444 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4445 { 4446 #ifdef COMPAT_FREEBSD32 4447 struct umutex32 m32; 4448 4449 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4450 memcpy(&m32, m, sizeof(m32)); 4451 *rb_list = m32.m_rb_lnk; 4452 } else 4453 #endif 4454 *rb_list = m->m_rb_lnk; 4455 } 4456 4457 static int 4458 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4459 { 4460 struct umutex m; 4461 int error; 4462 4463 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4464 error = copyin((void *)rbp, &m, sizeof(m)); 4465 if (error != 0) 4466 return (error); 4467 if (rb_list != NULL) 4468 umtx_read_rb_list(td, &m, rb_list); 4469 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4470 return (EINVAL); 4471 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4472 /* inact is cleared after unlock, allow the inconsistency */ 4473 return (inact ? 0 : EINVAL); 4474 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4475 } 4476 4477 static void 4478 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4479 const char *name) 4480 { 4481 int error, i; 4482 uintptr_t rbp; 4483 bool inact; 4484 4485 if (rb_list == 0) 4486 return; 4487 error = umtx_read_uptr(td, rb_list, &rbp); 4488 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4489 if (rbp == *rb_inact) { 4490 inact = true; 4491 *rb_inact = 0; 4492 } else 4493 inact = false; 4494 error = umtx_handle_rb(td, rbp, &rbp, inact); 4495 } 4496 if (i == umtx_max_rb && umtx_verbose_rb) { 4497 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4498 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4499 } 4500 if (error != 0 && umtx_verbose_rb) { 4501 uprintf("comm %s pid %d: handling %srb error %d\n", 4502 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4503 } 4504 } 4505 4506 /* 4507 * Clean up umtx data. 4508 */ 4509 static void 4510 umtx_thread_cleanup(struct thread *td) 4511 { 4512 struct umtx_q *uq; 4513 struct umtx_pi *pi; 4514 uintptr_t rb_inact; 4515 4516 /* 4517 * Disown pi mutexes. 4518 */ 4519 uq = td->td_umtxq; 4520 if (uq != NULL) { 4521 mtx_lock(&umtx_lock); 4522 uq->uq_inherited_pri = PRI_MAX; 4523 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4524 pi->pi_owner = NULL; 4525 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4526 } 4527 mtx_unlock(&umtx_lock); 4528 thread_lock(td); 4529 sched_lend_user_prio(td, PRI_MAX); 4530 thread_unlock(td); 4531 } 4532 4533 /* 4534 * Handle terminated robust mutexes. Must be done after 4535 * robust pi disown, otherwise unlock could see unowned 4536 * entries. 4537 */ 4538 rb_inact = td->td_rb_inact; 4539 if (rb_inact != 0) 4540 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4541 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4542 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4543 if (rb_inact != 0) 4544 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4545 } 4546