1 /*- 2 * Copyright (c) 2015, 2016 The FreeBSD Foundation 3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 #include "opt_umtx_profiling.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mman.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/resource.h> 51 #include <sys/resourcevar.h> 52 #include <sys/rwlock.h> 53 #include <sys/sbuf.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/eventhandler.h> 63 #include <sys/umtx.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 73 #include <machine/cpu.h> 74 75 #ifdef COMPAT_FREEBSD32 76 #include <compat/freebsd32/freebsd32_proto.h> 77 #endif 78 79 #define _UMUTEX_TRY 1 80 #define _UMUTEX_WAIT 2 81 82 #ifdef UMTX_PROFILING 83 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 84 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 85 #endif 86 87 /* Priority inheritance mutex info. */ 88 struct umtx_pi { 89 /* Owner thread */ 90 struct thread *pi_owner; 91 92 /* Reference count */ 93 int pi_refcount; 94 95 /* List entry to link umtx holding by thread */ 96 TAILQ_ENTRY(umtx_pi) pi_link; 97 98 /* List entry in hash */ 99 TAILQ_ENTRY(umtx_pi) pi_hashlink; 100 101 /* List for waiters */ 102 TAILQ_HEAD(,umtx_q) pi_blocked; 103 104 /* Identify a userland lock object */ 105 struct umtx_key pi_key; 106 }; 107 108 /* A userland synchronous object user. */ 109 struct umtx_q { 110 /* Linked list for the hash. */ 111 TAILQ_ENTRY(umtx_q) uq_link; 112 113 /* Umtx key. */ 114 struct umtx_key uq_key; 115 116 /* Umtx flags. */ 117 int uq_flags; 118 #define UQF_UMTXQ 0x0001 119 120 /* The thread waits on. */ 121 struct thread *uq_thread; 122 123 /* 124 * Blocked on PI mutex. read can use chain lock 125 * or umtx_lock, write must have both chain lock and 126 * umtx_lock being hold. 127 */ 128 struct umtx_pi *uq_pi_blocked; 129 130 /* On blocked list */ 131 TAILQ_ENTRY(umtx_q) uq_lockq; 132 133 /* Thread contending with us */ 134 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 135 136 /* Inherited priority from PP mutex */ 137 u_char uq_inherited_pri; 138 139 /* Spare queue ready to be reused */ 140 struct umtxq_queue *uq_spare_queue; 141 142 /* The queue we on */ 143 struct umtxq_queue *uq_cur_queue; 144 }; 145 146 TAILQ_HEAD(umtxq_head, umtx_q); 147 148 /* Per-key wait-queue */ 149 struct umtxq_queue { 150 struct umtxq_head head; 151 struct umtx_key key; 152 LIST_ENTRY(umtxq_queue) link; 153 int length; 154 }; 155 156 LIST_HEAD(umtxq_list, umtxq_queue); 157 158 /* Userland lock object's wait-queue chain */ 159 struct umtxq_chain { 160 /* Lock for this chain. */ 161 struct mtx uc_lock; 162 163 /* List of sleep queues. */ 164 struct umtxq_list uc_queue[2]; 165 #define UMTX_SHARED_QUEUE 0 166 #define UMTX_EXCLUSIVE_QUEUE 1 167 168 LIST_HEAD(, umtxq_queue) uc_spare_queue; 169 170 /* Busy flag */ 171 char uc_busy; 172 173 /* Chain lock waiters */ 174 int uc_waiters; 175 176 /* All PI in the list */ 177 TAILQ_HEAD(,umtx_pi) uc_pi_list; 178 179 #ifdef UMTX_PROFILING 180 u_int length; 181 u_int max_length; 182 #endif 183 }; 184 185 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 186 187 /* 188 * Don't propagate time-sharing priority, there is a security reason, 189 * a user can simply introduce PI-mutex, let thread A lock the mutex, 190 * and let another thread B block on the mutex, because B is 191 * sleeping, its priority will be boosted, this causes A's priority to 192 * be boosted via priority propagating too and will never be lowered even 193 * if it is using 100%CPU, this is unfair to other processes. 194 */ 195 196 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 197 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 198 PRI_MAX_TIMESHARE : (td)->td_user_pri) 199 200 #define GOLDEN_RATIO_PRIME 2654404609U 201 #ifndef UMTX_CHAINS 202 #define UMTX_CHAINS 512 203 #endif 204 #define UMTX_SHIFTS (__WORD_BIT - 9) 205 206 #define GET_SHARE(flags) \ 207 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 208 209 #define BUSY_SPINS 200 210 211 struct abs_timeout { 212 int clockid; 213 struct timespec cur; 214 struct timespec end; 215 }; 216 217 #ifdef COMPAT_FREEBSD32 218 struct umutex32 { 219 volatile __lwpid_t m_owner; /* Owner of the mutex */ 220 __uint32_t m_flags; /* Flags of the mutex */ 221 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 222 __uint32_t m_rb_lnk; /* Robust linkage */ 223 __uint32_t m_pad; 224 __uint32_t m_spare[2]; 225 }; 226 227 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 228 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 229 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 230 #endif 231 232 int umtx_shm_vnobj_persistent = 0; 233 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 234 &umtx_shm_vnobj_persistent, 0, 235 "False forces destruction of umtx attached to file, on last close"); 236 static int umtx_max_rb = 1000; 237 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 238 &umtx_max_rb, 0, 239 ""); 240 241 static uma_zone_t umtx_pi_zone; 242 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 243 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 244 static int umtx_pi_allocated; 245 246 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 247 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 248 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 249 static int umtx_verbose_rb = 1; 250 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 251 &umtx_verbose_rb, 0, 252 ""); 253 254 #ifdef UMTX_PROFILING 255 static long max_length; 256 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 257 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 258 #endif 259 260 static void umtx_shm_init(void); 261 static void umtxq_sysinit(void *); 262 static void umtxq_hash(struct umtx_key *key); 263 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 264 static void umtxq_lock(struct umtx_key *key); 265 static void umtxq_unlock(struct umtx_key *key); 266 static void umtxq_busy(struct umtx_key *key); 267 static void umtxq_unbusy(struct umtx_key *key); 268 static void umtxq_insert_queue(struct umtx_q *uq, int q); 269 static void umtxq_remove_queue(struct umtx_q *uq, int q); 270 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 271 static int umtxq_count(struct umtx_key *key); 272 static struct umtx_pi *umtx_pi_alloc(int); 273 static void umtx_pi_free(struct umtx_pi *pi); 274 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 275 bool rb); 276 static void umtx_thread_cleanup(struct thread *td); 277 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 278 struct image_params *imgp __unused); 279 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 280 281 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 282 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 283 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 284 285 static struct mtx umtx_lock; 286 287 #ifdef UMTX_PROFILING 288 static void 289 umtx_init_profiling(void) 290 { 291 struct sysctl_oid *chain_oid; 292 char chain_name[10]; 293 int i; 294 295 for (i = 0; i < UMTX_CHAINS; ++i) { 296 snprintf(chain_name, sizeof(chain_name), "%d", i); 297 chain_oid = SYSCTL_ADD_NODE(NULL, 298 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 299 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 300 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 301 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 302 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 303 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 304 } 305 } 306 307 static int 308 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 309 { 310 char buf[512]; 311 struct sbuf sb; 312 struct umtxq_chain *uc; 313 u_int fract, i, j, tot, whole; 314 u_int sf0, sf1, sf2, sf3, sf4; 315 u_int si0, si1, si2, si3, si4; 316 u_int sw0, sw1, sw2, sw3, sw4; 317 318 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 319 for (i = 0; i < 2; i++) { 320 tot = 0; 321 for (j = 0; j < UMTX_CHAINS; ++j) { 322 uc = &umtxq_chains[i][j]; 323 mtx_lock(&uc->uc_lock); 324 tot += uc->max_length; 325 mtx_unlock(&uc->uc_lock); 326 } 327 if (tot == 0) 328 sbuf_printf(&sb, "%u) Empty ", i); 329 else { 330 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 331 si0 = si1 = si2 = si3 = si4 = 0; 332 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 333 for (j = 0; j < UMTX_CHAINS; j++) { 334 uc = &umtxq_chains[i][j]; 335 mtx_lock(&uc->uc_lock); 336 whole = uc->max_length * 100; 337 mtx_unlock(&uc->uc_lock); 338 fract = (whole % tot) * 100; 339 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 340 sf0 = fract; 341 si0 = j; 342 sw0 = whole; 343 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 344 sf1)) { 345 sf1 = fract; 346 si1 = j; 347 sw1 = whole; 348 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 349 sf2)) { 350 sf2 = fract; 351 si2 = j; 352 sw2 = whole; 353 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 354 sf3)) { 355 sf3 = fract; 356 si3 = j; 357 sw3 = whole; 358 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 359 sf4)) { 360 sf4 = fract; 361 si4 = j; 362 sw4 = whole; 363 } 364 } 365 sbuf_printf(&sb, "queue %u:\n", i); 366 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 367 sf0 / tot, si0); 368 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 369 sf1 / tot, si1); 370 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 371 sf2 / tot, si2); 372 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 373 sf3 / tot, si3); 374 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 375 sf4 / tot, si4); 376 } 377 } 378 sbuf_trim(&sb); 379 sbuf_finish(&sb); 380 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 381 sbuf_delete(&sb); 382 return (0); 383 } 384 385 static int 386 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 387 { 388 struct umtxq_chain *uc; 389 u_int i, j; 390 int clear, error; 391 392 clear = 0; 393 error = sysctl_handle_int(oidp, &clear, 0, req); 394 if (error != 0 || req->newptr == NULL) 395 return (error); 396 397 if (clear != 0) { 398 for (i = 0; i < 2; ++i) { 399 for (j = 0; j < UMTX_CHAINS; ++j) { 400 uc = &umtxq_chains[i][j]; 401 mtx_lock(&uc->uc_lock); 402 uc->length = 0; 403 uc->max_length = 0; 404 mtx_unlock(&uc->uc_lock); 405 } 406 } 407 } 408 return (0); 409 } 410 411 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 412 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 413 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 414 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 415 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 416 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 417 #endif 418 419 static void 420 umtxq_sysinit(void *arg __unused) 421 { 422 int i, j; 423 424 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 425 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 426 for (i = 0; i < 2; ++i) { 427 for (j = 0; j < UMTX_CHAINS; ++j) { 428 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 429 MTX_DEF | MTX_DUPOK); 430 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 431 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 432 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 433 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 434 umtxq_chains[i][j].uc_busy = 0; 435 umtxq_chains[i][j].uc_waiters = 0; 436 #ifdef UMTX_PROFILING 437 umtxq_chains[i][j].length = 0; 438 umtxq_chains[i][j].max_length = 0; 439 #endif 440 } 441 } 442 #ifdef UMTX_PROFILING 443 umtx_init_profiling(); 444 #endif 445 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 446 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 447 EVENTHANDLER_PRI_ANY); 448 umtx_shm_init(); 449 } 450 451 struct umtx_q * 452 umtxq_alloc(void) 453 { 454 struct umtx_q *uq; 455 456 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 457 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 458 M_WAITOK | M_ZERO); 459 TAILQ_INIT(&uq->uq_spare_queue->head); 460 TAILQ_INIT(&uq->uq_pi_contested); 461 uq->uq_inherited_pri = PRI_MAX; 462 return (uq); 463 } 464 465 void 466 umtxq_free(struct umtx_q *uq) 467 { 468 469 MPASS(uq->uq_spare_queue != NULL); 470 free(uq->uq_spare_queue, M_UMTX); 471 free(uq, M_UMTX); 472 } 473 474 static inline void 475 umtxq_hash(struct umtx_key *key) 476 { 477 unsigned n; 478 479 n = (uintptr_t)key->info.both.a + key->info.both.b; 480 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 481 } 482 483 static inline struct umtxq_chain * 484 umtxq_getchain(struct umtx_key *key) 485 { 486 487 if (key->type <= TYPE_SEM) 488 return (&umtxq_chains[1][key->hash]); 489 return (&umtxq_chains[0][key->hash]); 490 } 491 492 /* 493 * Lock a chain. 494 */ 495 static inline void 496 umtxq_lock(struct umtx_key *key) 497 { 498 struct umtxq_chain *uc; 499 500 uc = umtxq_getchain(key); 501 mtx_lock(&uc->uc_lock); 502 } 503 504 /* 505 * Unlock a chain. 506 */ 507 static inline void 508 umtxq_unlock(struct umtx_key *key) 509 { 510 struct umtxq_chain *uc; 511 512 uc = umtxq_getchain(key); 513 mtx_unlock(&uc->uc_lock); 514 } 515 516 /* 517 * Set chain to busy state when following operation 518 * may be blocked (kernel mutex can not be used). 519 */ 520 static inline void 521 umtxq_busy(struct umtx_key *key) 522 { 523 struct umtxq_chain *uc; 524 525 uc = umtxq_getchain(key); 526 mtx_assert(&uc->uc_lock, MA_OWNED); 527 if (uc->uc_busy) { 528 #ifdef SMP 529 if (smp_cpus > 1) { 530 int count = BUSY_SPINS; 531 if (count > 0) { 532 umtxq_unlock(key); 533 while (uc->uc_busy && --count > 0) 534 cpu_spinwait(); 535 umtxq_lock(key); 536 } 537 } 538 #endif 539 while (uc->uc_busy) { 540 uc->uc_waiters++; 541 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 542 uc->uc_waiters--; 543 } 544 } 545 uc->uc_busy = 1; 546 } 547 548 /* 549 * Unbusy a chain. 550 */ 551 static inline void 552 umtxq_unbusy(struct umtx_key *key) 553 { 554 struct umtxq_chain *uc; 555 556 uc = umtxq_getchain(key); 557 mtx_assert(&uc->uc_lock, MA_OWNED); 558 KASSERT(uc->uc_busy != 0, ("not busy")); 559 uc->uc_busy = 0; 560 if (uc->uc_waiters) 561 wakeup_one(uc); 562 } 563 564 static inline void 565 umtxq_unbusy_unlocked(struct umtx_key *key) 566 { 567 568 umtxq_lock(key); 569 umtxq_unbusy(key); 570 umtxq_unlock(key); 571 } 572 573 static struct umtxq_queue * 574 umtxq_queue_lookup(struct umtx_key *key, int q) 575 { 576 struct umtxq_queue *uh; 577 struct umtxq_chain *uc; 578 579 uc = umtxq_getchain(key); 580 UMTXQ_LOCKED_ASSERT(uc); 581 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 582 if (umtx_key_match(&uh->key, key)) 583 return (uh); 584 } 585 586 return (NULL); 587 } 588 589 static inline void 590 umtxq_insert_queue(struct umtx_q *uq, int q) 591 { 592 struct umtxq_queue *uh; 593 struct umtxq_chain *uc; 594 595 uc = umtxq_getchain(&uq->uq_key); 596 UMTXQ_LOCKED_ASSERT(uc); 597 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 598 uh = umtxq_queue_lookup(&uq->uq_key, q); 599 if (uh != NULL) { 600 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 601 } else { 602 uh = uq->uq_spare_queue; 603 uh->key = uq->uq_key; 604 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 605 #ifdef UMTX_PROFILING 606 uc->length++; 607 if (uc->length > uc->max_length) { 608 uc->max_length = uc->length; 609 if (uc->max_length > max_length) 610 max_length = uc->max_length; 611 } 612 #endif 613 } 614 uq->uq_spare_queue = NULL; 615 616 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 617 uh->length++; 618 uq->uq_flags |= UQF_UMTXQ; 619 uq->uq_cur_queue = uh; 620 return; 621 } 622 623 static inline void 624 umtxq_remove_queue(struct umtx_q *uq, int q) 625 { 626 struct umtxq_chain *uc; 627 struct umtxq_queue *uh; 628 629 uc = umtxq_getchain(&uq->uq_key); 630 UMTXQ_LOCKED_ASSERT(uc); 631 if (uq->uq_flags & UQF_UMTXQ) { 632 uh = uq->uq_cur_queue; 633 TAILQ_REMOVE(&uh->head, uq, uq_link); 634 uh->length--; 635 uq->uq_flags &= ~UQF_UMTXQ; 636 if (TAILQ_EMPTY(&uh->head)) { 637 KASSERT(uh->length == 0, 638 ("inconsistent umtxq_queue length")); 639 #ifdef UMTX_PROFILING 640 uc->length--; 641 #endif 642 LIST_REMOVE(uh, link); 643 } else { 644 uh = LIST_FIRST(&uc->uc_spare_queue); 645 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 646 LIST_REMOVE(uh, link); 647 } 648 uq->uq_spare_queue = uh; 649 uq->uq_cur_queue = NULL; 650 } 651 } 652 653 /* 654 * Check if there are multiple waiters 655 */ 656 static int 657 umtxq_count(struct umtx_key *key) 658 { 659 struct umtxq_chain *uc; 660 struct umtxq_queue *uh; 661 662 uc = umtxq_getchain(key); 663 UMTXQ_LOCKED_ASSERT(uc); 664 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 665 if (uh != NULL) 666 return (uh->length); 667 return (0); 668 } 669 670 /* 671 * Check if there are multiple PI waiters and returns first 672 * waiter. 673 */ 674 static int 675 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 676 { 677 struct umtxq_chain *uc; 678 struct umtxq_queue *uh; 679 680 *first = NULL; 681 uc = umtxq_getchain(key); 682 UMTXQ_LOCKED_ASSERT(uc); 683 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 684 if (uh != NULL) { 685 *first = TAILQ_FIRST(&uh->head); 686 return (uh->length); 687 } 688 return (0); 689 } 690 691 static int 692 umtxq_check_susp(struct thread *td) 693 { 694 struct proc *p; 695 int error; 696 697 /* 698 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 699 * eventually break the lockstep loop. 700 */ 701 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 702 return (0); 703 error = 0; 704 p = td->td_proc; 705 PROC_LOCK(p); 706 if (P_SHOULDSTOP(p) || 707 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 708 if (p->p_flag & P_SINGLE_EXIT) 709 error = EINTR; 710 else 711 error = ERESTART; 712 } 713 PROC_UNLOCK(p); 714 return (error); 715 } 716 717 /* 718 * Wake up threads waiting on an userland object. 719 */ 720 721 static int 722 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 723 { 724 struct umtxq_chain *uc; 725 struct umtxq_queue *uh; 726 struct umtx_q *uq; 727 int ret; 728 729 ret = 0; 730 uc = umtxq_getchain(key); 731 UMTXQ_LOCKED_ASSERT(uc); 732 uh = umtxq_queue_lookup(key, q); 733 if (uh != NULL) { 734 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 735 umtxq_remove_queue(uq, q); 736 wakeup(uq); 737 if (++ret >= n_wake) 738 return (ret); 739 } 740 } 741 return (ret); 742 } 743 744 745 /* 746 * Wake up specified thread. 747 */ 748 static inline void 749 umtxq_signal_thread(struct umtx_q *uq) 750 { 751 struct umtxq_chain *uc; 752 753 uc = umtxq_getchain(&uq->uq_key); 754 UMTXQ_LOCKED_ASSERT(uc); 755 umtxq_remove(uq); 756 wakeup(uq); 757 } 758 759 static inline int 760 tstohz(const struct timespec *tsp) 761 { 762 struct timeval tv; 763 764 TIMESPEC_TO_TIMEVAL(&tv, tsp); 765 return tvtohz(&tv); 766 } 767 768 static void 769 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 770 const struct timespec *timeout) 771 { 772 773 timo->clockid = clockid; 774 if (!absolute) { 775 kern_clock_gettime(curthread, clockid, &timo->end); 776 timo->cur = timo->end; 777 timespecadd(&timo->end, timeout); 778 } else { 779 timo->end = *timeout; 780 kern_clock_gettime(curthread, clockid, &timo->cur); 781 } 782 } 783 784 static void 785 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 786 { 787 788 abs_timeout_init(timo, umtxtime->_clockid, 789 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 790 } 791 792 static inline void 793 abs_timeout_update(struct abs_timeout *timo) 794 { 795 796 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 797 } 798 799 static int 800 abs_timeout_gethz(struct abs_timeout *timo) 801 { 802 struct timespec tts; 803 804 if (timespeccmp(&timo->end, &timo->cur, <=)) 805 return (-1); 806 tts = timo->end; 807 timespecsub(&tts, &timo->cur); 808 return (tstohz(&tts)); 809 } 810 811 static uint32_t 812 umtx_unlock_val(uint32_t flags, bool rb) 813 { 814 815 if (rb) 816 return (UMUTEX_RB_OWNERDEAD); 817 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 818 return (UMUTEX_RB_NOTRECOV); 819 else 820 return (UMUTEX_UNOWNED); 821 822 } 823 824 /* 825 * Put thread into sleep state, before sleeping, check if 826 * thread was removed from umtx queue. 827 */ 828 static inline int 829 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 830 { 831 struct umtxq_chain *uc; 832 int error, timo; 833 834 uc = umtxq_getchain(&uq->uq_key); 835 UMTXQ_LOCKED_ASSERT(uc); 836 for (;;) { 837 if (!(uq->uq_flags & UQF_UMTXQ)) 838 return (0); 839 if (abstime != NULL) { 840 timo = abs_timeout_gethz(abstime); 841 if (timo < 0) 842 return (ETIMEDOUT); 843 } else 844 timo = 0; 845 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 846 if (error != EWOULDBLOCK) { 847 umtxq_lock(&uq->uq_key); 848 break; 849 } 850 if (abstime != NULL) 851 abs_timeout_update(abstime); 852 umtxq_lock(&uq->uq_key); 853 } 854 return (error); 855 } 856 857 /* 858 * Convert userspace address into unique logical address. 859 */ 860 int 861 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 862 { 863 struct thread *td = curthread; 864 vm_map_t map; 865 vm_map_entry_t entry; 866 vm_pindex_t pindex; 867 vm_prot_t prot; 868 boolean_t wired; 869 870 key->type = type; 871 if (share == THREAD_SHARE) { 872 key->shared = 0; 873 key->info.private.vs = td->td_proc->p_vmspace; 874 key->info.private.addr = (uintptr_t)addr; 875 } else { 876 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 877 map = &td->td_proc->p_vmspace->vm_map; 878 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 879 &entry, &key->info.shared.object, &pindex, &prot, 880 &wired) != KERN_SUCCESS) { 881 return (EFAULT); 882 } 883 884 if ((share == PROCESS_SHARE) || 885 (share == AUTO_SHARE && 886 VM_INHERIT_SHARE == entry->inheritance)) { 887 key->shared = 1; 888 key->info.shared.offset = (vm_offset_t)addr - 889 entry->start + entry->offset; 890 vm_object_reference(key->info.shared.object); 891 } else { 892 key->shared = 0; 893 key->info.private.vs = td->td_proc->p_vmspace; 894 key->info.private.addr = (uintptr_t)addr; 895 } 896 vm_map_lookup_done(map, entry); 897 } 898 899 umtxq_hash(key); 900 return (0); 901 } 902 903 /* 904 * Release key. 905 */ 906 void 907 umtx_key_release(struct umtx_key *key) 908 { 909 if (key->shared) 910 vm_object_deallocate(key->info.shared.object); 911 } 912 913 /* 914 * Fetch and compare value, sleep on the address if value is not changed. 915 */ 916 static int 917 do_wait(struct thread *td, void *addr, u_long id, 918 struct _umtx_time *timeout, int compat32, int is_private) 919 { 920 struct abs_timeout timo; 921 struct umtx_q *uq; 922 u_long tmp; 923 uint32_t tmp32; 924 int error = 0; 925 926 uq = td->td_umtxq; 927 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 928 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 929 return (error); 930 931 if (timeout != NULL) 932 abs_timeout_init2(&timo, timeout); 933 934 umtxq_lock(&uq->uq_key); 935 umtxq_insert(uq); 936 umtxq_unlock(&uq->uq_key); 937 if (compat32 == 0) { 938 error = fueword(addr, &tmp); 939 if (error != 0) 940 error = EFAULT; 941 } else { 942 error = fueword32(addr, &tmp32); 943 if (error == 0) 944 tmp = tmp32; 945 else 946 error = EFAULT; 947 } 948 umtxq_lock(&uq->uq_key); 949 if (error == 0) { 950 if (tmp == id) 951 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 952 NULL : &timo); 953 if ((uq->uq_flags & UQF_UMTXQ) == 0) 954 error = 0; 955 else 956 umtxq_remove(uq); 957 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 958 umtxq_remove(uq); 959 } 960 umtxq_unlock(&uq->uq_key); 961 umtx_key_release(&uq->uq_key); 962 if (error == ERESTART) 963 error = EINTR; 964 return (error); 965 } 966 967 /* 968 * Wake up threads sleeping on the specified address. 969 */ 970 int 971 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 972 { 973 struct umtx_key key; 974 int ret; 975 976 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 977 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 978 return (ret); 979 umtxq_lock(&key); 980 umtxq_signal(&key, n_wake); 981 umtxq_unlock(&key); 982 umtx_key_release(&key); 983 return (0); 984 } 985 986 /* 987 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 988 */ 989 static int 990 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 991 struct _umtx_time *timeout, int mode) 992 { 993 struct abs_timeout timo; 994 struct umtx_q *uq; 995 uint32_t owner, old, id; 996 int error, rv; 997 998 id = td->td_tid; 999 uq = td->td_umtxq; 1000 error = 0; 1001 if (timeout != NULL) 1002 abs_timeout_init2(&timo, timeout); 1003 1004 /* 1005 * Care must be exercised when dealing with umtx structure. It 1006 * can fault on any access. 1007 */ 1008 for (;;) { 1009 rv = fueword32(&m->m_owner, &owner); 1010 if (rv == -1) 1011 return (EFAULT); 1012 if (mode == _UMUTEX_WAIT) { 1013 if (owner == UMUTEX_UNOWNED || 1014 owner == UMUTEX_CONTESTED || 1015 owner == UMUTEX_RB_OWNERDEAD || 1016 owner == UMUTEX_RB_NOTRECOV) 1017 return (0); 1018 } else { 1019 /* 1020 * Robust mutex terminated. Kernel duty is to 1021 * return EOWNERDEAD to the userspace. The 1022 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1023 * by the common userspace code. 1024 */ 1025 if (owner == UMUTEX_RB_OWNERDEAD) { 1026 rv = casueword32(&m->m_owner, 1027 UMUTEX_RB_OWNERDEAD, &owner, 1028 id | UMUTEX_CONTESTED); 1029 if (rv == -1) 1030 return (EFAULT); 1031 if (owner == UMUTEX_RB_OWNERDEAD) 1032 return (EOWNERDEAD); /* success */ 1033 rv = umtxq_check_susp(td); 1034 if (rv != 0) 1035 return (rv); 1036 continue; 1037 } 1038 if (owner == UMUTEX_RB_NOTRECOV) 1039 return (ENOTRECOVERABLE); 1040 1041 1042 /* 1043 * Try the uncontested case. This should be 1044 * done in userland. 1045 */ 1046 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1047 &owner, id); 1048 /* The address was invalid. */ 1049 if (rv == -1) 1050 return (EFAULT); 1051 1052 /* The acquire succeeded. */ 1053 if (owner == UMUTEX_UNOWNED) 1054 return (0); 1055 1056 /* 1057 * If no one owns it but it is contested try 1058 * to acquire it. 1059 */ 1060 if (owner == UMUTEX_CONTESTED) { 1061 rv = casueword32(&m->m_owner, 1062 UMUTEX_CONTESTED, &owner, 1063 id | UMUTEX_CONTESTED); 1064 /* The address was invalid. */ 1065 if (rv == -1) 1066 return (EFAULT); 1067 1068 if (owner == UMUTEX_CONTESTED) 1069 return (0); 1070 1071 rv = umtxq_check_susp(td); 1072 if (rv != 0) 1073 return (rv); 1074 1075 /* 1076 * If this failed the lock has 1077 * changed, restart. 1078 */ 1079 continue; 1080 } 1081 } 1082 1083 if (mode == _UMUTEX_TRY) 1084 return (EBUSY); 1085 1086 /* 1087 * If we caught a signal, we have retried and now 1088 * exit immediately. 1089 */ 1090 if (error != 0) 1091 return (error); 1092 1093 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1094 GET_SHARE(flags), &uq->uq_key)) != 0) 1095 return (error); 1096 1097 umtxq_lock(&uq->uq_key); 1098 umtxq_busy(&uq->uq_key); 1099 umtxq_insert(uq); 1100 umtxq_unlock(&uq->uq_key); 1101 1102 /* 1103 * Set the contested bit so that a release in user space 1104 * knows to use the system call for unlock. If this fails 1105 * either some one else has acquired the lock or it has been 1106 * released. 1107 */ 1108 rv = casueword32(&m->m_owner, owner, &old, 1109 owner | UMUTEX_CONTESTED); 1110 1111 /* The address was invalid. */ 1112 if (rv == -1) { 1113 umtxq_lock(&uq->uq_key); 1114 umtxq_remove(uq); 1115 umtxq_unbusy(&uq->uq_key); 1116 umtxq_unlock(&uq->uq_key); 1117 umtx_key_release(&uq->uq_key); 1118 return (EFAULT); 1119 } 1120 1121 /* 1122 * We set the contested bit, sleep. Otherwise the lock changed 1123 * and we need to retry or we lost a race to the thread 1124 * unlocking the umtx. 1125 */ 1126 umtxq_lock(&uq->uq_key); 1127 umtxq_unbusy(&uq->uq_key); 1128 if (old == owner) 1129 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1130 NULL : &timo); 1131 umtxq_remove(uq); 1132 umtxq_unlock(&uq->uq_key); 1133 umtx_key_release(&uq->uq_key); 1134 1135 if (error == 0) 1136 error = umtxq_check_susp(td); 1137 } 1138 1139 return (0); 1140 } 1141 1142 /* 1143 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1144 */ 1145 static int 1146 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1147 { 1148 struct umtx_key key; 1149 uint32_t owner, old, id, newlock; 1150 int error, count; 1151 1152 id = td->td_tid; 1153 /* 1154 * Make sure we own this mtx. 1155 */ 1156 error = fueword32(&m->m_owner, &owner); 1157 if (error == -1) 1158 return (EFAULT); 1159 1160 if ((owner & ~UMUTEX_CONTESTED) != id) 1161 return (EPERM); 1162 1163 newlock = umtx_unlock_val(flags, rb); 1164 if ((owner & UMUTEX_CONTESTED) == 0) { 1165 error = casueword32(&m->m_owner, owner, &old, newlock); 1166 if (error == -1) 1167 return (EFAULT); 1168 if (old == owner) 1169 return (0); 1170 owner = old; 1171 } 1172 1173 /* We should only ever be in here for contested locks */ 1174 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1175 &key)) != 0) 1176 return (error); 1177 1178 umtxq_lock(&key); 1179 umtxq_busy(&key); 1180 count = umtxq_count(&key); 1181 umtxq_unlock(&key); 1182 1183 /* 1184 * When unlocking the umtx, it must be marked as unowned if 1185 * there is zero or one thread only waiting for it. 1186 * Otherwise, it must be marked as contested. 1187 */ 1188 if (count > 1) 1189 newlock |= UMUTEX_CONTESTED; 1190 error = casueword32(&m->m_owner, owner, &old, newlock); 1191 umtxq_lock(&key); 1192 umtxq_signal(&key, 1); 1193 umtxq_unbusy(&key); 1194 umtxq_unlock(&key); 1195 umtx_key_release(&key); 1196 if (error == -1) 1197 return (EFAULT); 1198 if (old != owner) 1199 return (EINVAL); 1200 return (0); 1201 } 1202 1203 /* 1204 * Check if the mutex is available and wake up a waiter, 1205 * only for simple mutex. 1206 */ 1207 static int 1208 do_wake_umutex(struct thread *td, struct umutex *m) 1209 { 1210 struct umtx_key key; 1211 uint32_t owner; 1212 uint32_t flags; 1213 int error; 1214 int count; 1215 1216 error = fueword32(&m->m_owner, &owner); 1217 if (error == -1) 1218 return (EFAULT); 1219 1220 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1221 owner != UMUTEX_RB_NOTRECOV) 1222 return (0); 1223 1224 error = fueword32(&m->m_flags, &flags); 1225 if (error == -1) 1226 return (EFAULT); 1227 1228 /* We should only ever be in here for contested locks */ 1229 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1230 &key)) != 0) 1231 return (error); 1232 1233 umtxq_lock(&key); 1234 umtxq_busy(&key); 1235 count = umtxq_count(&key); 1236 umtxq_unlock(&key); 1237 1238 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1239 owner != UMUTEX_RB_NOTRECOV) { 1240 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1241 UMUTEX_UNOWNED); 1242 if (error == -1) 1243 error = EFAULT; 1244 } 1245 1246 umtxq_lock(&key); 1247 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1248 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1249 umtxq_signal(&key, 1); 1250 umtxq_unbusy(&key); 1251 umtxq_unlock(&key); 1252 umtx_key_release(&key); 1253 return (error); 1254 } 1255 1256 /* 1257 * Check if the mutex has waiters and tries to fix contention bit. 1258 */ 1259 static int 1260 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1261 { 1262 struct umtx_key key; 1263 uint32_t owner, old; 1264 int type; 1265 int error; 1266 int count; 1267 1268 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1269 UMUTEX_ROBUST)) { 1270 case 0: 1271 case UMUTEX_ROBUST: 1272 type = TYPE_NORMAL_UMUTEX; 1273 break; 1274 case UMUTEX_PRIO_INHERIT: 1275 type = TYPE_PI_UMUTEX; 1276 break; 1277 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1278 type = TYPE_PI_ROBUST_UMUTEX; 1279 break; 1280 case UMUTEX_PRIO_PROTECT: 1281 type = TYPE_PP_UMUTEX; 1282 break; 1283 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1284 type = TYPE_PP_ROBUST_UMUTEX; 1285 break; 1286 default: 1287 return (EINVAL); 1288 } 1289 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1290 return (error); 1291 1292 owner = 0; 1293 umtxq_lock(&key); 1294 umtxq_busy(&key); 1295 count = umtxq_count(&key); 1296 umtxq_unlock(&key); 1297 /* 1298 * Only repair contention bit if there is a waiter, this means the mutex 1299 * is still being referenced by userland code, otherwise don't update 1300 * any memory. 1301 */ 1302 if (count > 1) { 1303 error = fueword32(&m->m_owner, &owner); 1304 if (error == -1) 1305 error = EFAULT; 1306 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1307 error = casueword32(&m->m_owner, owner, &old, 1308 owner | UMUTEX_CONTESTED); 1309 if (error == -1) { 1310 error = EFAULT; 1311 break; 1312 } 1313 if (old == owner) 1314 break; 1315 owner = old; 1316 error = umtxq_check_susp(td); 1317 if (error != 0) 1318 break; 1319 } 1320 } else if (count == 1) { 1321 error = fueword32(&m->m_owner, &owner); 1322 if (error == -1) 1323 error = EFAULT; 1324 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1325 (owner & UMUTEX_CONTESTED) == 0) { 1326 error = casueword32(&m->m_owner, owner, &old, 1327 owner | UMUTEX_CONTESTED); 1328 if (error == -1) { 1329 error = EFAULT; 1330 break; 1331 } 1332 if (old == owner) 1333 break; 1334 owner = old; 1335 error = umtxq_check_susp(td); 1336 if (error != 0) 1337 break; 1338 } 1339 } 1340 umtxq_lock(&key); 1341 if (error == EFAULT) { 1342 umtxq_signal(&key, INT_MAX); 1343 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1344 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1345 umtxq_signal(&key, 1); 1346 umtxq_unbusy(&key); 1347 umtxq_unlock(&key); 1348 umtx_key_release(&key); 1349 return (error); 1350 } 1351 1352 static inline struct umtx_pi * 1353 umtx_pi_alloc(int flags) 1354 { 1355 struct umtx_pi *pi; 1356 1357 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1358 TAILQ_INIT(&pi->pi_blocked); 1359 atomic_add_int(&umtx_pi_allocated, 1); 1360 return (pi); 1361 } 1362 1363 static inline void 1364 umtx_pi_free(struct umtx_pi *pi) 1365 { 1366 uma_zfree(umtx_pi_zone, pi); 1367 atomic_add_int(&umtx_pi_allocated, -1); 1368 } 1369 1370 /* 1371 * Adjust the thread's position on a pi_state after its priority has been 1372 * changed. 1373 */ 1374 static int 1375 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1376 { 1377 struct umtx_q *uq, *uq1, *uq2; 1378 struct thread *td1; 1379 1380 mtx_assert(&umtx_lock, MA_OWNED); 1381 if (pi == NULL) 1382 return (0); 1383 1384 uq = td->td_umtxq; 1385 1386 /* 1387 * Check if the thread needs to be moved on the blocked chain. 1388 * It needs to be moved if either its priority is lower than 1389 * the previous thread or higher than the next thread. 1390 */ 1391 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1392 uq2 = TAILQ_NEXT(uq, uq_lockq); 1393 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1394 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1395 /* 1396 * Remove thread from blocked chain and determine where 1397 * it should be moved to. 1398 */ 1399 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1400 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1401 td1 = uq1->uq_thread; 1402 MPASS(td1->td_proc->p_magic == P_MAGIC); 1403 if (UPRI(td1) > UPRI(td)) 1404 break; 1405 } 1406 1407 if (uq1 == NULL) 1408 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1409 else 1410 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1411 } 1412 return (1); 1413 } 1414 1415 static struct umtx_pi * 1416 umtx_pi_next(struct umtx_pi *pi) 1417 { 1418 struct umtx_q *uq_owner; 1419 1420 if (pi->pi_owner == NULL) 1421 return (NULL); 1422 uq_owner = pi->pi_owner->td_umtxq; 1423 if (uq_owner == NULL) 1424 return (NULL); 1425 return (uq_owner->uq_pi_blocked); 1426 } 1427 1428 /* 1429 * Floyd's Cycle-Finding Algorithm. 1430 */ 1431 static bool 1432 umtx_pi_check_loop(struct umtx_pi *pi) 1433 { 1434 struct umtx_pi *pi1; /* fast iterator */ 1435 1436 mtx_assert(&umtx_lock, MA_OWNED); 1437 if (pi == NULL) 1438 return (false); 1439 pi1 = pi; 1440 for (;;) { 1441 pi = umtx_pi_next(pi); 1442 if (pi == NULL) 1443 break; 1444 pi1 = umtx_pi_next(pi1); 1445 if (pi1 == NULL) 1446 break; 1447 pi1 = umtx_pi_next(pi1); 1448 if (pi1 == NULL) 1449 break; 1450 if (pi == pi1) 1451 return (true); 1452 } 1453 return (false); 1454 } 1455 1456 /* 1457 * Propagate priority when a thread is blocked on POSIX 1458 * PI mutex. 1459 */ 1460 static void 1461 umtx_propagate_priority(struct thread *td) 1462 { 1463 struct umtx_q *uq; 1464 struct umtx_pi *pi; 1465 int pri; 1466 1467 mtx_assert(&umtx_lock, MA_OWNED); 1468 pri = UPRI(td); 1469 uq = td->td_umtxq; 1470 pi = uq->uq_pi_blocked; 1471 if (pi == NULL) 1472 return; 1473 if (umtx_pi_check_loop(pi)) 1474 return; 1475 1476 for (;;) { 1477 td = pi->pi_owner; 1478 if (td == NULL || td == curthread) 1479 return; 1480 1481 MPASS(td->td_proc != NULL); 1482 MPASS(td->td_proc->p_magic == P_MAGIC); 1483 1484 thread_lock(td); 1485 if (td->td_lend_user_pri > pri) 1486 sched_lend_user_prio(td, pri); 1487 else { 1488 thread_unlock(td); 1489 break; 1490 } 1491 thread_unlock(td); 1492 1493 /* 1494 * Pick up the lock that td is blocked on. 1495 */ 1496 uq = td->td_umtxq; 1497 pi = uq->uq_pi_blocked; 1498 if (pi == NULL) 1499 break; 1500 /* Resort td on the list if needed. */ 1501 umtx_pi_adjust_thread(pi, td); 1502 } 1503 } 1504 1505 /* 1506 * Unpropagate priority for a PI mutex when a thread blocked on 1507 * it is interrupted by signal or resumed by others. 1508 */ 1509 static void 1510 umtx_repropagate_priority(struct umtx_pi *pi) 1511 { 1512 struct umtx_q *uq, *uq_owner; 1513 struct umtx_pi *pi2; 1514 int pri; 1515 1516 mtx_assert(&umtx_lock, MA_OWNED); 1517 1518 if (umtx_pi_check_loop(pi)) 1519 return; 1520 while (pi != NULL && pi->pi_owner != NULL) { 1521 pri = PRI_MAX; 1522 uq_owner = pi->pi_owner->td_umtxq; 1523 1524 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1525 uq = TAILQ_FIRST(&pi2->pi_blocked); 1526 if (uq != NULL) { 1527 if (pri > UPRI(uq->uq_thread)) 1528 pri = UPRI(uq->uq_thread); 1529 } 1530 } 1531 1532 if (pri > uq_owner->uq_inherited_pri) 1533 pri = uq_owner->uq_inherited_pri; 1534 thread_lock(pi->pi_owner); 1535 sched_lend_user_prio(pi->pi_owner, pri); 1536 thread_unlock(pi->pi_owner); 1537 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1538 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1539 } 1540 } 1541 1542 /* 1543 * Insert a PI mutex into owned list. 1544 */ 1545 static void 1546 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1547 { 1548 struct umtx_q *uq_owner; 1549 1550 uq_owner = owner->td_umtxq; 1551 mtx_assert(&umtx_lock, MA_OWNED); 1552 if (pi->pi_owner != NULL) 1553 panic("pi_owner != NULL"); 1554 pi->pi_owner = owner; 1555 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1556 } 1557 1558 1559 /* 1560 * Disown a PI mutex, and remove it from the owned list. 1561 */ 1562 static void 1563 umtx_pi_disown(struct umtx_pi *pi) 1564 { 1565 1566 mtx_assert(&umtx_lock, MA_OWNED); 1567 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1568 pi->pi_owner = NULL; 1569 } 1570 1571 /* 1572 * Claim ownership of a PI mutex. 1573 */ 1574 static int 1575 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1576 { 1577 struct umtx_q *uq; 1578 int pri; 1579 1580 mtx_lock(&umtx_lock); 1581 if (pi->pi_owner == owner) { 1582 mtx_unlock(&umtx_lock); 1583 return (0); 1584 } 1585 1586 if (pi->pi_owner != NULL) { 1587 /* 1588 * userland may have already messed the mutex, sigh. 1589 */ 1590 mtx_unlock(&umtx_lock); 1591 return (EPERM); 1592 } 1593 umtx_pi_setowner(pi, owner); 1594 uq = TAILQ_FIRST(&pi->pi_blocked); 1595 if (uq != NULL) { 1596 pri = UPRI(uq->uq_thread); 1597 thread_lock(owner); 1598 if (pri < UPRI(owner)) 1599 sched_lend_user_prio(owner, pri); 1600 thread_unlock(owner); 1601 } 1602 mtx_unlock(&umtx_lock); 1603 return (0); 1604 } 1605 1606 /* 1607 * Adjust a thread's order position in its blocked PI mutex, 1608 * this may result new priority propagating process. 1609 */ 1610 void 1611 umtx_pi_adjust(struct thread *td, u_char oldpri) 1612 { 1613 struct umtx_q *uq; 1614 struct umtx_pi *pi; 1615 1616 uq = td->td_umtxq; 1617 mtx_lock(&umtx_lock); 1618 /* 1619 * Pick up the lock that td is blocked on. 1620 */ 1621 pi = uq->uq_pi_blocked; 1622 if (pi != NULL) { 1623 umtx_pi_adjust_thread(pi, td); 1624 umtx_repropagate_priority(pi); 1625 } 1626 mtx_unlock(&umtx_lock); 1627 } 1628 1629 /* 1630 * Sleep on a PI mutex. 1631 */ 1632 static int 1633 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1634 const char *wmesg, struct abs_timeout *timo, bool shared) 1635 { 1636 struct umtxq_chain *uc; 1637 struct thread *td, *td1; 1638 struct umtx_q *uq1; 1639 int error, pri; 1640 1641 error = 0; 1642 td = uq->uq_thread; 1643 KASSERT(td == curthread, ("inconsistent uq_thread")); 1644 uc = umtxq_getchain(&uq->uq_key); 1645 UMTXQ_LOCKED_ASSERT(uc); 1646 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1647 umtxq_insert(uq); 1648 mtx_lock(&umtx_lock); 1649 if (pi->pi_owner == NULL) { 1650 mtx_unlock(&umtx_lock); 1651 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1652 mtx_lock(&umtx_lock); 1653 if (td1 != NULL) { 1654 if (pi->pi_owner == NULL) 1655 umtx_pi_setowner(pi, td1); 1656 PROC_UNLOCK(td1->td_proc); 1657 } 1658 } 1659 1660 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1661 pri = UPRI(uq1->uq_thread); 1662 if (pri > UPRI(td)) 1663 break; 1664 } 1665 1666 if (uq1 != NULL) 1667 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1668 else 1669 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1670 1671 uq->uq_pi_blocked = pi; 1672 thread_lock(td); 1673 td->td_flags |= TDF_UPIBLOCKED; 1674 thread_unlock(td); 1675 umtx_propagate_priority(td); 1676 mtx_unlock(&umtx_lock); 1677 umtxq_unbusy(&uq->uq_key); 1678 1679 error = umtxq_sleep(uq, wmesg, timo); 1680 umtxq_remove(uq); 1681 1682 mtx_lock(&umtx_lock); 1683 uq->uq_pi_blocked = NULL; 1684 thread_lock(td); 1685 td->td_flags &= ~TDF_UPIBLOCKED; 1686 thread_unlock(td); 1687 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1688 umtx_repropagate_priority(pi); 1689 mtx_unlock(&umtx_lock); 1690 umtxq_unlock(&uq->uq_key); 1691 1692 return (error); 1693 } 1694 1695 /* 1696 * Add reference count for a PI mutex. 1697 */ 1698 static void 1699 umtx_pi_ref(struct umtx_pi *pi) 1700 { 1701 struct umtxq_chain *uc; 1702 1703 uc = umtxq_getchain(&pi->pi_key); 1704 UMTXQ_LOCKED_ASSERT(uc); 1705 pi->pi_refcount++; 1706 } 1707 1708 /* 1709 * Decrease reference count for a PI mutex, if the counter 1710 * is decreased to zero, its memory space is freed. 1711 */ 1712 static void 1713 umtx_pi_unref(struct umtx_pi *pi) 1714 { 1715 struct umtxq_chain *uc; 1716 1717 uc = umtxq_getchain(&pi->pi_key); 1718 UMTXQ_LOCKED_ASSERT(uc); 1719 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1720 if (--pi->pi_refcount == 0) { 1721 mtx_lock(&umtx_lock); 1722 if (pi->pi_owner != NULL) 1723 umtx_pi_disown(pi); 1724 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1725 ("blocked queue not empty")); 1726 mtx_unlock(&umtx_lock); 1727 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1728 umtx_pi_free(pi); 1729 } 1730 } 1731 1732 /* 1733 * Find a PI mutex in hash table. 1734 */ 1735 static struct umtx_pi * 1736 umtx_pi_lookup(struct umtx_key *key) 1737 { 1738 struct umtxq_chain *uc; 1739 struct umtx_pi *pi; 1740 1741 uc = umtxq_getchain(key); 1742 UMTXQ_LOCKED_ASSERT(uc); 1743 1744 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1745 if (umtx_key_match(&pi->pi_key, key)) { 1746 return (pi); 1747 } 1748 } 1749 return (NULL); 1750 } 1751 1752 /* 1753 * Insert a PI mutex into hash table. 1754 */ 1755 static inline void 1756 umtx_pi_insert(struct umtx_pi *pi) 1757 { 1758 struct umtxq_chain *uc; 1759 1760 uc = umtxq_getchain(&pi->pi_key); 1761 UMTXQ_LOCKED_ASSERT(uc); 1762 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1763 } 1764 1765 /* 1766 * Lock a PI mutex. 1767 */ 1768 static int 1769 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1770 struct _umtx_time *timeout, int try) 1771 { 1772 struct abs_timeout timo; 1773 struct umtx_q *uq; 1774 struct umtx_pi *pi, *new_pi; 1775 uint32_t id, old_owner, owner, old; 1776 int error, rv; 1777 1778 id = td->td_tid; 1779 uq = td->td_umtxq; 1780 1781 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1782 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1783 &uq->uq_key)) != 0) 1784 return (error); 1785 1786 if (timeout != NULL) 1787 abs_timeout_init2(&timo, timeout); 1788 1789 umtxq_lock(&uq->uq_key); 1790 pi = umtx_pi_lookup(&uq->uq_key); 1791 if (pi == NULL) { 1792 new_pi = umtx_pi_alloc(M_NOWAIT); 1793 if (new_pi == NULL) { 1794 umtxq_unlock(&uq->uq_key); 1795 new_pi = umtx_pi_alloc(M_WAITOK); 1796 umtxq_lock(&uq->uq_key); 1797 pi = umtx_pi_lookup(&uq->uq_key); 1798 if (pi != NULL) { 1799 umtx_pi_free(new_pi); 1800 new_pi = NULL; 1801 } 1802 } 1803 if (new_pi != NULL) { 1804 new_pi->pi_key = uq->uq_key; 1805 umtx_pi_insert(new_pi); 1806 pi = new_pi; 1807 } 1808 } 1809 umtx_pi_ref(pi); 1810 umtxq_unlock(&uq->uq_key); 1811 1812 /* 1813 * Care must be exercised when dealing with umtx structure. It 1814 * can fault on any access. 1815 */ 1816 for (;;) { 1817 /* 1818 * Try the uncontested case. This should be done in userland. 1819 */ 1820 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1821 /* The address was invalid. */ 1822 if (rv == -1) { 1823 error = EFAULT; 1824 break; 1825 } 1826 1827 /* The acquire succeeded. */ 1828 if (owner == UMUTEX_UNOWNED) { 1829 error = 0; 1830 break; 1831 } 1832 1833 if (owner == UMUTEX_RB_NOTRECOV) { 1834 error = ENOTRECOVERABLE; 1835 break; 1836 } 1837 1838 /* If no one owns it but it is contested try to acquire it. */ 1839 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1840 old_owner = owner; 1841 rv = casueword32(&m->m_owner, owner, &owner, 1842 id | UMUTEX_CONTESTED); 1843 /* The address was invalid. */ 1844 if (rv == -1) { 1845 error = EFAULT; 1846 break; 1847 } 1848 1849 if (owner == old_owner) { 1850 umtxq_lock(&uq->uq_key); 1851 umtxq_busy(&uq->uq_key); 1852 error = umtx_pi_claim(pi, td); 1853 umtxq_unbusy(&uq->uq_key); 1854 umtxq_unlock(&uq->uq_key); 1855 if (error != 0) { 1856 /* 1857 * Since we're going to return an 1858 * error, restore the m_owner to its 1859 * previous, unowned state to avoid 1860 * compounding the problem. 1861 */ 1862 (void)casuword32(&m->m_owner, 1863 id | UMUTEX_CONTESTED, 1864 old_owner); 1865 } 1866 if (error == 0 && 1867 old_owner == UMUTEX_RB_OWNERDEAD) 1868 error = EOWNERDEAD; 1869 break; 1870 } 1871 1872 error = umtxq_check_susp(td); 1873 if (error != 0) 1874 break; 1875 1876 /* If this failed the lock has changed, restart. */ 1877 continue; 1878 } 1879 1880 if ((owner & ~UMUTEX_CONTESTED) == id) { 1881 error = EDEADLK; 1882 break; 1883 } 1884 1885 if (try != 0) { 1886 error = EBUSY; 1887 break; 1888 } 1889 1890 /* 1891 * If we caught a signal, we have retried and now 1892 * exit immediately. 1893 */ 1894 if (error != 0) 1895 break; 1896 1897 umtxq_lock(&uq->uq_key); 1898 umtxq_busy(&uq->uq_key); 1899 umtxq_unlock(&uq->uq_key); 1900 1901 /* 1902 * Set the contested bit so that a release in user space 1903 * knows to use the system call for unlock. If this fails 1904 * either some one else has acquired the lock or it has been 1905 * released. 1906 */ 1907 rv = casueword32(&m->m_owner, owner, &old, owner | 1908 UMUTEX_CONTESTED); 1909 1910 /* The address was invalid. */ 1911 if (rv == -1) { 1912 umtxq_unbusy_unlocked(&uq->uq_key); 1913 error = EFAULT; 1914 break; 1915 } 1916 1917 umtxq_lock(&uq->uq_key); 1918 /* 1919 * We set the contested bit, sleep. Otherwise the lock changed 1920 * and we need to retry or we lost a race to the thread 1921 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1922 * value for owner is impossible there. 1923 */ 1924 if (old == owner) { 1925 error = umtxq_sleep_pi(uq, pi, 1926 owner & ~UMUTEX_CONTESTED, 1927 "umtxpi", timeout == NULL ? NULL : &timo, 1928 (flags & USYNC_PROCESS_SHARED) != 0); 1929 if (error != 0) 1930 continue; 1931 } else { 1932 umtxq_unbusy(&uq->uq_key); 1933 umtxq_unlock(&uq->uq_key); 1934 } 1935 1936 error = umtxq_check_susp(td); 1937 if (error != 0) 1938 break; 1939 } 1940 1941 umtxq_lock(&uq->uq_key); 1942 umtx_pi_unref(pi); 1943 umtxq_unlock(&uq->uq_key); 1944 1945 umtx_key_release(&uq->uq_key); 1946 return (error); 1947 } 1948 1949 /* 1950 * Unlock a PI mutex. 1951 */ 1952 static int 1953 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1954 { 1955 struct umtx_key key; 1956 struct umtx_q *uq_first, *uq_first2, *uq_me; 1957 struct umtx_pi *pi, *pi2; 1958 uint32_t id, new_owner, old, owner; 1959 int count, error, pri; 1960 1961 id = td->td_tid; 1962 /* 1963 * Make sure we own this mtx. 1964 */ 1965 error = fueword32(&m->m_owner, &owner); 1966 if (error == -1) 1967 return (EFAULT); 1968 1969 if ((owner & ~UMUTEX_CONTESTED) != id) 1970 return (EPERM); 1971 1972 new_owner = umtx_unlock_val(flags, rb); 1973 1974 /* This should be done in userland */ 1975 if ((owner & UMUTEX_CONTESTED) == 0) { 1976 error = casueword32(&m->m_owner, owner, &old, new_owner); 1977 if (error == -1) 1978 return (EFAULT); 1979 if (old == owner) 1980 return (0); 1981 owner = old; 1982 } 1983 1984 /* We should only ever be in here for contested locks */ 1985 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1986 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1987 &key)) != 0) 1988 return (error); 1989 1990 umtxq_lock(&key); 1991 umtxq_busy(&key); 1992 count = umtxq_count_pi(&key, &uq_first); 1993 if (uq_first != NULL) { 1994 mtx_lock(&umtx_lock); 1995 pi = uq_first->uq_pi_blocked; 1996 KASSERT(pi != NULL, ("pi == NULL?")); 1997 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 1998 mtx_unlock(&umtx_lock); 1999 umtxq_unbusy(&key); 2000 umtxq_unlock(&key); 2001 umtx_key_release(&key); 2002 /* userland messed the mutex */ 2003 return (EPERM); 2004 } 2005 uq_me = td->td_umtxq; 2006 if (pi->pi_owner == td) 2007 umtx_pi_disown(pi); 2008 /* get highest priority thread which is still sleeping. */ 2009 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2010 while (uq_first != NULL && 2011 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2012 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2013 } 2014 pri = PRI_MAX; 2015 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2016 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2017 if (uq_first2 != NULL) { 2018 if (pri > UPRI(uq_first2->uq_thread)) 2019 pri = UPRI(uq_first2->uq_thread); 2020 } 2021 } 2022 thread_lock(td); 2023 sched_lend_user_prio(td, pri); 2024 thread_unlock(td); 2025 mtx_unlock(&umtx_lock); 2026 if (uq_first) 2027 umtxq_signal_thread(uq_first); 2028 } else { 2029 pi = umtx_pi_lookup(&key); 2030 /* 2031 * A umtx_pi can exist if a signal or timeout removed the 2032 * last waiter from the umtxq, but there is still 2033 * a thread in do_lock_pi() holding the umtx_pi. 2034 */ 2035 if (pi != NULL) { 2036 /* 2037 * The umtx_pi can be unowned, such as when a thread 2038 * has just entered do_lock_pi(), allocated the 2039 * umtx_pi, and unlocked the umtxq. 2040 * If the current thread owns it, it must disown it. 2041 */ 2042 mtx_lock(&umtx_lock); 2043 if (pi->pi_owner == td) 2044 umtx_pi_disown(pi); 2045 mtx_unlock(&umtx_lock); 2046 } 2047 } 2048 umtxq_unlock(&key); 2049 2050 /* 2051 * When unlocking the umtx, it must be marked as unowned if 2052 * there is zero or one thread only waiting for it. 2053 * Otherwise, it must be marked as contested. 2054 */ 2055 2056 if (count > 1) 2057 new_owner |= UMUTEX_CONTESTED; 2058 error = casueword32(&m->m_owner, owner, &old, new_owner); 2059 2060 umtxq_unbusy_unlocked(&key); 2061 umtx_key_release(&key); 2062 if (error == -1) 2063 return (EFAULT); 2064 if (old != owner) 2065 return (EINVAL); 2066 return (0); 2067 } 2068 2069 /* 2070 * Lock a PP mutex. 2071 */ 2072 static int 2073 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2074 struct _umtx_time *timeout, int try) 2075 { 2076 struct abs_timeout timo; 2077 struct umtx_q *uq, *uq2; 2078 struct umtx_pi *pi; 2079 uint32_t ceiling; 2080 uint32_t owner, id; 2081 int error, pri, old_inherited_pri, su, rv; 2082 2083 id = td->td_tid; 2084 uq = td->td_umtxq; 2085 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2086 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2087 &uq->uq_key)) != 0) 2088 return (error); 2089 2090 if (timeout != NULL) 2091 abs_timeout_init2(&timo, timeout); 2092 2093 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2094 for (;;) { 2095 old_inherited_pri = uq->uq_inherited_pri; 2096 umtxq_lock(&uq->uq_key); 2097 umtxq_busy(&uq->uq_key); 2098 umtxq_unlock(&uq->uq_key); 2099 2100 rv = fueword32(&m->m_ceilings[0], &ceiling); 2101 if (rv == -1) { 2102 error = EFAULT; 2103 goto out; 2104 } 2105 ceiling = RTP_PRIO_MAX - ceiling; 2106 if (ceiling > RTP_PRIO_MAX) { 2107 error = EINVAL; 2108 goto out; 2109 } 2110 2111 mtx_lock(&umtx_lock); 2112 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2113 mtx_unlock(&umtx_lock); 2114 error = EINVAL; 2115 goto out; 2116 } 2117 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2118 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2119 thread_lock(td); 2120 if (uq->uq_inherited_pri < UPRI(td)) 2121 sched_lend_user_prio(td, uq->uq_inherited_pri); 2122 thread_unlock(td); 2123 } 2124 mtx_unlock(&umtx_lock); 2125 2126 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2127 id | UMUTEX_CONTESTED); 2128 /* The address was invalid. */ 2129 if (rv == -1) { 2130 error = EFAULT; 2131 break; 2132 } 2133 2134 if (owner == UMUTEX_CONTESTED) { 2135 error = 0; 2136 break; 2137 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2138 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2139 &owner, id | UMUTEX_CONTESTED); 2140 if (rv == -1) { 2141 error = EFAULT; 2142 break; 2143 } 2144 if (owner == UMUTEX_RB_OWNERDEAD) { 2145 error = EOWNERDEAD; /* success */ 2146 break; 2147 } 2148 error = 0; 2149 } else if (owner == UMUTEX_RB_NOTRECOV) { 2150 error = ENOTRECOVERABLE; 2151 break; 2152 } 2153 2154 if (try != 0) { 2155 error = EBUSY; 2156 break; 2157 } 2158 2159 /* 2160 * If we caught a signal, we have retried and now 2161 * exit immediately. 2162 */ 2163 if (error != 0) 2164 break; 2165 2166 umtxq_lock(&uq->uq_key); 2167 umtxq_insert(uq); 2168 umtxq_unbusy(&uq->uq_key); 2169 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2170 NULL : &timo); 2171 umtxq_remove(uq); 2172 umtxq_unlock(&uq->uq_key); 2173 2174 mtx_lock(&umtx_lock); 2175 uq->uq_inherited_pri = old_inherited_pri; 2176 pri = PRI_MAX; 2177 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2178 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2179 if (uq2 != NULL) { 2180 if (pri > UPRI(uq2->uq_thread)) 2181 pri = UPRI(uq2->uq_thread); 2182 } 2183 } 2184 if (pri > uq->uq_inherited_pri) 2185 pri = uq->uq_inherited_pri; 2186 thread_lock(td); 2187 sched_lend_user_prio(td, pri); 2188 thread_unlock(td); 2189 mtx_unlock(&umtx_lock); 2190 } 2191 2192 if (error != 0 && error != EOWNERDEAD) { 2193 mtx_lock(&umtx_lock); 2194 uq->uq_inherited_pri = old_inherited_pri; 2195 pri = PRI_MAX; 2196 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2197 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2198 if (uq2 != NULL) { 2199 if (pri > UPRI(uq2->uq_thread)) 2200 pri = UPRI(uq2->uq_thread); 2201 } 2202 } 2203 if (pri > uq->uq_inherited_pri) 2204 pri = uq->uq_inherited_pri; 2205 thread_lock(td); 2206 sched_lend_user_prio(td, pri); 2207 thread_unlock(td); 2208 mtx_unlock(&umtx_lock); 2209 } 2210 2211 out: 2212 umtxq_unbusy_unlocked(&uq->uq_key); 2213 umtx_key_release(&uq->uq_key); 2214 return (error); 2215 } 2216 2217 /* 2218 * Unlock a PP mutex. 2219 */ 2220 static int 2221 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2222 { 2223 struct umtx_key key; 2224 struct umtx_q *uq, *uq2; 2225 struct umtx_pi *pi; 2226 uint32_t id, owner, rceiling; 2227 int error, pri, new_inherited_pri, su; 2228 2229 id = td->td_tid; 2230 uq = td->td_umtxq; 2231 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2232 2233 /* 2234 * Make sure we own this mtx. 2235 */ 2236 error = fueword32(&m->m_owner, &owner); 2237 if (error == -1) 2238 return (EFAULT); 2239 2240 if ((owner & ~UMUTEX_CONTESTED) != id) 2241 return (EPERM); 2242 2243 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2244 if (error != 0) 2245 return (error); 2246 2247 if (rceiling == -1) 2248 new_inherited_pri = PRI_MAX; 2249 else { 2250 rceiling = RTP_PRIO_MAX - rceiling; 2251 if (rceiling > RTP_PRIO_MAX) 2252 return (EINVAL); 2253 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2254 } 2255 2256 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2257 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2258 &key)) != 0) 2259 return (error); 2260 umtxq_lock(&key); 2261 umtxq_busy(&key); 2262 umtxq_unlock(&key); 2263 /* 2264 * For priority protected mutex, always set unlocked state 2265 * to UMUTEX_CONTESTED, so that userland always enters kernel 2266 * to lock the mutex, it is necessary because thread priority 2267 * has to be adjusted for such mutex. 2268 */ 2269 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2270 UMUTEX_CONTESTED); 2271 2272 umtxq_lock(&key); 2273 if (error == 0) 2274 umtxq_signal(&key, 1); 2275 umtxq_unbusy(&key); 2276 umtxq_unlock(&key); 2277 2278 if (error == -1) 2279 error = EFAULT; 2280 else { 2281 mtx_lock(&umtx_lock); 2282 if (su != 0) 2283 uq->uq_inherited_pri = new_inherited_pri; 2284 pri = PRI_MAX; 2285 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2286 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2287 if (uq2 != NULL) { 2288 if (pri > UPRI(uq2->uq_thread)) 2289 pri = UPRI(uq2->uq_thread); 2290 } 2291 } 2292 if (pri > uq->uq_inherited_pri) 2293 pri = uq->uq_inherited_pri; 2294 thread_lock(td); 2295 sched_lend_user_prio(td, pri); 2296 thread_unlock(td); 2297 mtx_unlock(&umtx_lock); 2298 } 2299 umtx_key_release(&key); 2300 return (error); 2301 } 2302 2303 static int 2304 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2305 uint32_t *old_ceiling) 2306 { 2307 struct umtx_q *uq; 2308 uint32_t flags, id, owner, save_ceiling; 2309 int error, rv, rv1; 2310 2311 error = fueword32(&m->m_flags, &flags); 2312 if (error == -1) 2313 return (EFAULT); 2314 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2315 return (EINVAL); 2316 if (ceiling > RTP_PRIO_MAX) 2317 return (EINVAL); 2318 id = td->td_tid; 2319 uq = td->td_umtxq; 2320 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2321 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2322 &uq->uq_key)) != 0) 2323 return (error); 2324 for (;;) { 2325 umtxq_lock(&uq->uq_key); 2326 umtxq_busy(&uq->uq_key); 2327 umtxq_unlock(&uq->uq_key); 2328 2329 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2330 if (rv == -1) { 2331 error = EFAULT; 2332 break; 2333 } 2334 2335 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2336 id | UMUTEX_CONTESTED); 2337 if (rv == -1) { 2338 error = EFAULT; 2339 break; 2340 } 2341 2342 if (owner == UMUTEX_CONTESTED) { 2343 rv = suword32(&m->m_ceilings[0], ceiling); 2344 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2345 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2346 break; 2347 } 2348 2349 if ((owner & ~UMUTEX_CONTESTED) == id) { 2350 rv = suword32(&m->m_ceilings[0], ceiling); 2351 error = rv == 0 ? 0 : EFAULT; 2352 break; 2353 } 2354 2355 if (owner == UMUTEX_RB_OWNERDEAD) { 2356 error = EOWNERDEAD; 2357 break; 2358 } else if (owner == UMUTEX_RB_NOTRECOV) { 2359 error = ENOTRECOVERABLE; 2360 break; 2361 } 2362 2363 /* 2364 * If we caught a signal, we have retried and now 2365 * exit immediately. 2366 */ 2367 if (error != 0) 2368 break; 2369 2370 /* 2371 * We set the contested bit, sleep. Otherwise the lock changed 2372 * and we need to retry or we lost a race to the thread 2373 * unlocking the umtx. 2374 */ 2375 umtxq_lock(&uq->uq_key); 2376 umtxq_insert(uq); 2377 umtxq_unbusy(&uq->uq_key); 2378 error = umtxq_sleep(uq, "umtxpp", NULL); 2379 umtxq_remove(uq); 2380 umtxq_unlock(&uq->uq_key); 2381 } 2382 umtxq_lock(&uq->uq_key); 2383 if (error == 0) 2384 umtxq_signal(&uq->uq_key, INT_MAX); 2385 umtxq_unbusy(&uq->uq_key); 2386 umtxq_unlock(&uq->uq_key); 2387 umtx_key_release(&uq->uq_key); 2388 if (error == 0 && old_ceiling != NULL) { 2389 rv = suword32(old_ceiling, save_ceiling); 2390 error = rv == 0 ? 0 : EFAULT; 2391 } 2392 return (error); 2393 } 2394 2395 /* 2396 * Lock a userland POSIX mutex. 2397 */ 2398 static int 2399 do_lock_umutex(struct thread *td, struct umutex *m, 2400 struct _umtx_time *timeout, int mode) 2401 { 2402 uint32_t flags; 2403 int error; 2404 2405 error = fueword32(&m->m_flags, &flags); 2406 if (error == -1) 2407 return (EFAULT); 2408 2409 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2410 case 0: 2411 error = do_lock_normal(td, m, flags, timeout, mode); 2412 break; 2413 case UMUTEX_PRIO_INHERIT: 2414 error = do_lock_pi(td, m, flags, timeout, mode); 2415 break; 2416 case UMUTEX_PRIO_PROTECT: 2417 error = do_lock_pp(td, m, flags, timeout, mode); 2418 break; 2419 default: 2420 return (EINVAL); 2421 } 2422 if (timeout == NULL) { 2423 if (error == EINTR && mode != _UMUTEX_WAIT) 2424 error = ERESTART; 2425 } else { 2426 /* Timed-locking is not restarted. */ 2427 if (error == ERESTART) 2428 error = EINTR; 2429 } 2430 return (error); 2431 } 2432 2433 /* 2434 * Unlock a userland POSIX mutex. 2435 */ 2436 static int 2437 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2438 { 2439 uint32_t flags; 2440 int error; 2441 2442 error = fueword32(&m->m_flags, &flags); 2443 if (error == -1) 2444 return (EFAULT); 2445 2446 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2447 case 0: 2448 return (do_unlock_normal(td, m, flags, rb)); 2449 case UMUTEX_PRIO_INHERIT: 2450 return (do_unlock_pi(td, m, flags, rb)); 2451 case UMUTEX_PRIO_PROTECT: 2452 return (do_unlock_pp(td, m, flags, rb)); 2453 } 2454 2455 return (EINVAL); 2456 } 2457 2458 static int 2459 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2460 struct timespec *timeout, u_long wflags) 2461 { 2462 struct abs_timeout timo; 2463 struct umtx_q *uq; 2464 uint32_t flags, clockid, hasw; 2465 int error; 2466 2467 uq = td->td_umtxq; 2468 error = fueword32(&cv->c_flags, &flags); 2469 if (error == -1) 2470 return (EFAULT); 2471 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2472 if (error != 0) 2473 return (error); 2474 2475 if ((wflags & CVWAIT_CLOCKID) != 0) { 2476 error = fueword32(&cv->c_clockid, &clockid); 2477 if (error == -1) { 2478 umtx_key_release(&uq->uq_key); 2479 return (EFAULT); 2480 } 2481 if (clockid < CLOCK_REALTIME || 2482 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2483 /* hmm, only HW clock id will work. */ 2484 umtx_key_release(&uq->uq_key); 2485 return (EINVAL); 2486 } 2487 } else { 2488 clockid = CLOCK_REALTIME; 2489 } 2490 2491 umtxq_lock(&uq->uq_key); 2492 umtxq_busy(&uq->uq_key); 2493 umtxq_insert(uq); 2494 umtxq_unlock(&uq->uq_key); 2495 2496 /* 2497 * Set c_has_waiters to 1 before releasing user mutex, also 2498 * don't modify cache line when unnecessary. 2499 */ 2500 error = fueword32(&cv->c_has_waiters, &hasw); 2501 if (error == 0 && hasw == 0) 2502 suword32(&cv->c_has_waiters, 1); 2503 2504 umtxq_unbusy_unlocked(&uq->uq_key); 2505 2506 error = do_unlock_umutex(td, m, false); 2507 2508 if (timeout != NULL) 2509 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2510 timeout); 2511 2512 umtxq_lock(&uq->uq_key); 2513 if (error == 0) { 2514 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2515 NULL : &timo); 2516 } 2517 2518 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2519 error = 0; 2520 else { 2521 /* 2522 * This must be timeout,interrupted by signal or 2523 * surprious wakeup, clear c_has_waiter flag when 2524 * necessary. 2525 */ 2526 umtxq_busy(&uq->uq_key); 2527 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2528 int oldlen = uq->uq_cur_queue->length; 2529 umtxq_remove(uq); 2530 if (oldlen == 1) { 2531 umtxq_unlock(&uq->uq_key); 2532 suword32(&cv->c_has_waiters, 0); 2533 umtxq_lock(&uq->uq_key); 2534 } 2535 } 2536 umtxq_unbusy(&uq->uq_key); 2537 if (error == ERESTART) 2538 error = EINTR; 2539 } 2540 2541 umtxq_unlock(&uq->uq_key); 2542 umtx_key_release(&uq->uq_key); 2543 return (error); 2544 } 2545 2546 /* 2547 * Signal a userland condition variable. 2548 */ 2549 static int 2550 do_cv_signal(struct thread *td, struct ucond *cv) 2551 { 2552 struct umtx_key key; 2553 int error, cnt, nwake; 2554 uint32_t flags; 2555 2556 error = fueword32(&cv->c_flags, &flags); 2557 if (error == -1) 2558 return (EFAULT); 2559 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2560 return (error); 2561 umtxq_lock(&key); 2562 umtxq_busy(&key); 2563 cnt = umtxq_count(&key); 2564 nwake = umtxq_signal(&key, 1); 2565 if (cnt <= nwake) { 2566 umtxq_unlock(&key); 2567 error = suword32(&cv->c_has_waiters, 0); 2568 if (error == -1) 2569 error = EFAULT; 2570 umtxq_lock(&key); 2571 } 2572 umtxq_unbusy(&key); 2573 umtxq_unlock(&key); 2574 umtx_key_release(&key); 2575 return (error); 2576 } 2577 2578 static int 2579 do_cv_broadcast(struct thread *td, struct ucond *cv) 2580 { 2581 struct umtx_key key; 2582 int error; 2583 uint32_t flags; 2584 2585 error = fueword32(&cv->c_flags, &flags); 2586 if (error == -1) 2587 return (EFAULT); 2588 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2589 return (error); 2590 2591 umtxq_lock(&key); 2592 umtxq_busy(&key); 2593 umtxq_signal(&key, INT_MAX); 2594 umtxq_unlock(&key); 2595 2596 error = suword32(&cv->c_has_waiters, 0); 2597 if (error == -1) 2598 error = EFAULT; 2599 2600 umtxq_unbusy_unlocked(&key); 2601 2602 umtx_key_release(&key); 2603 return (error); 2604 } 2605 2606 static int 2607 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2608 { 2609 struct abs_timeout timo; 2610 struct umtx_q *uq; 2611 uint32_t flags, wrflags; 2612 int32_t state, oldstate; 2613 int32_t blocked_readers; 2614 int error, error1, rv; 2615 2616 uq = td->td_umtxq; 2617 error = fueword32(&rwlock->rw_flags, &flags); 2618 if (error == -1) 2619 return (EFAULT); 2620 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2621 if (error != 0) 2622 return (error); 2623 2624 if (timeout != NULL) 2625 abs_timeout_init2(&timo, timeout); 2626 2627 wrflags = URWLOCK_WRITE_OWNER; 2628 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2629 wrflags |= URWLOCK_WRITE_WAITERS; 2630 2631 for (;;) { 2632 rv = fueword32(&rwlock->rw_state, &state); 2633 if (rv == -1) { 2634 umtx_key_release(&uq->uq_key); 2635 return (EFAULT); 2636 } 2637 2638 /* try to lock it */ 2639 while (!(state & wrflags)) { 2640 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2641 umtx_key_release(&uq->uq_key); 2642 return (EAGAIN); 2643 } 2644 rv = casueword32(&rwlock->rw_state, state, 2645 &oldstate, state + 1); 2646 if (rv == -1) { 2647 umtx_key_release(&uq->uq_key); 2648 return (EFAULT); 2649 } 2650 if (oldstate == state) { 2651 umtx_key_release(&uq->uq_key); 2652 return (0); 2653 } 2654 error = umtxq_check_susp(td); 2655 if (error != 0) 2656 break; 2657 state = oldstate; 2658 } 2659 2660 if (error) 2661 break; 2662 2663 /* grab monitor lock */ 2664 umtxq_lock(&uq->uq_key); 2665 umtxq_busy(&uq->uq_key); 2666 umtxq_unlock(&uq->uq_key); 2667 2668 /* 2669 * re-read the state, in case it changed between the try-lock above 2670 * and the check below 2671 */ 2672 rv = fueword32(&rwlock->rw_state, &state); 2673 if (rv == -1) 2674 error = EFAULT; 2675 2676 /* set read contention bit */ 2677 while (error == 0 && (state & wrflags) && 2678 !(state & URWLOCK_READ_WAITERS)) { 2679 rv = casueword32(&rwlock->rw_state, state, 2680 &oldstate, state | URWLOCK_READ_WAITERS); 2681 if (rv == -1) { 2682 error = EFAULT; 2683 break; 2684 } 2685 if (oldstate == state) 2686 goto sleep; 2687 state = oldstate; 2688 error = umtxq_check_susp(td); 2689 if (error != 0) 2690 break; 2691 } 2692 if (error != 0) { 2693 umtxq_unbusy_unlocked(&uq->uq_key); 2694 break; 2695 } 2696 2697 /* state is changed while setting flags, restart */ 2698 if (!(state & wrflags)) { 2699 umtxq_unbusy_unlocked(&uq->uq_key); 2700 error = umtxq_check_susp(td); 2701 if (error != 0) 2702 break; 2703 continue; 2704 } 2705 2706 sleep: 2707 /* contention bit is set, before sleeping, increase read waiter count */ 2708 rv = fueword32(&rwlock->rw_blocked_readers, 2709 &blocked_readers); 2710 if (rv == -1) { 2711 umtxq_unbusy_unlocked(&uq->uq_key); 2712 error = EFAULT; 2713 break; 2714 } 2715 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2716 2717 while (state & wrflags) { 2718 umtxq_lock(&uq->uq_key); 2719 umtxq_insert(uq); 2720 umtxq_unbusy(&uq->uq_key); 2721 2722 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2723 NULL : &timo); 2724 2725 umtxq_busy(&uq->uq_key); 2726 umtxq_remove(uq); 2727 umtxq_unlock(&uq->uq_key); 2728 if (error) 2729 break; 2730 rv = fueword32(&rwlock->rw_state, &state); 2731 if (rv == -1) { 2732 error = EFAULT; 2733 break; 2734 } 2735 } 2736 2737 /* decrease read waiter count, and may clear read contention bit */ 2738 rv = fueword32(&rwlock->rw_blocked_readers, 2739 &blocked_readers); 2740 if (rv == -1) { 2741 umtxq_unbusy_unlocked(&uq->uq_key); 2742 error = EFAULT; 2743 break; 2744 } 2745 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2746 if (blocked_readers == 1) { 2747 rv = fueword32(&rwlock->rw_state, &state); 2748 if (rv == -1) { 2749 umtxq_unbusy_unlocked(&uq->uq_key); 2750 error = EFAULT; 2751 break; 2752 } 2753 for (;;) { 2754 rv = casueword32(&rwlock->rw_state, state, 2755 &oldstate, state & ~URWLOCK_READ_WAITERS); 2756 if (rv == -1) { 2757 error = EFAULT; 2758 break; 2759 } 2760 if (oldstate == state) 2761 break; 2762 state = oldstate; 2763 error1 = umtxq_check_susp(td); 2764 if (error1 != 0) { 2765 if (error == 0) 2766 error = error1; 2767 break; 2768 } 2769 } 2770 } 2771 2772 umtxq_unbusy_unlocked(&uq->uq_key); 2773 if (error != 0) 2774 break; 2775 } 2776 umtx_key_release(&uq->uq_key); 2777 if (error == ERESTART) 2778 error = EINTR; 2779 return (error); 2780 } 2781 2782 static int 2783 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2784 { 2785 struct abs_timeout timo; 2786 struct umtx_q *uq; 2787 uint32_t flags; 2788 int32_t state, oldstate; 2789 int32_t blocked_writers; 2790 int32_t blocked_readers; 2791 int error, error1, rv; 2792 2793 uq = td->td_umtxq; 2794 error = fueword32(&rwlock->rw_flags, &flags); 2795 if (error == -1) 2796 return (EFAULT); 2797 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2798 if (error != 0) 2799 return (error); 2800 2801 if (timeout != NULL) 2802 abs_timeout_init2(&timo, timeout); 2803 2804 blocked_readers = 0; 2805 for (;;) { 2806 rv = fueword32(&rwlock->rw_state, &state); 2807 if (rv == -1) { 2808 umtx_key_release(&uq->uq_key); 2809 return (EFAULT); 2810 } 2811 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2812 rv = casueword32(&rwlock->rw_state, state, 2813 &oldstate, state | URWLOCK_WRITE_OWNER); 2814 if (rv == -1) { 2815 umtx_key_release(&uq->uq_key); 2816 return (EFAULT); 2817 } 2818 if (oldstate == state) { 2819 umtx_key_release(&uq->uq_key); 2820 return (0); 2821 } 2822 state = oldstate; 2823 error = umtxq_check_susp(td); 2824 if (error != 0) 2825 break; 2826 } 2827 2828 if (error) { 2829 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2830 blocked_readers != 0) { 2831 umtxq_lock(&uq->uq_key); 2832 umtxq_busy(&uq->uq_key); 2833 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2834 umtxq_unbusy(&uq->uq_key); 2835 umtxq_unlock(&uq->uq_key); 2836 } 2837 2838 break; 2839 } 2840 2841 /* grab monitor lock */ 2842 umtxq_lock(&uq->uq_key); 2843 umtxq_busy(&uq->uq_key); 2844 umtxq_unlock(&uq->uq_key); 2845 2846 /* 2847 * re-read the state, in case it changed between the try-lock above 2848 * and the check below 2849 */ 2850 rv = fueword32(&rwlock->rw_state, &state); 2851 if (rv == -1) 2852 error = EFAULT; 2853 2854 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2855 URWLOCK_READER_COUNT(state) != 0) && 2856 (state & URWLOCK_WRITE_WAITERS) == 0) { 2857 rv = casueword32(&rwlock->rw_state, state, 2858 &oldstate, state | URWLOCK_WRITE_WAITERS); 2859 if (rv == -1) { 2860 error = EFAULT; 2861 break; 2862 } 2863 if (oldstate == state) 2864 goto sleep; 2865 state = oldstate; 2866 error = umtxq_check_susp(td); 2867 if (error != 0) 2868 break; 2869 } 2870 if (error != 0) { 2871 umtxq_unbusy_unlocked(&uq->uq_key); 2872 break; 2873 } 2874 2875 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2876 umtxq_unbusy_unlocked(&uq->uq_key); 2877 error = umtxq_check_susp(td); 2878 if (error != 0) 2879 break; 2880 continue; 2881 } 2882 sleep: 2883 rv = fueword32(&rwlock->rw_blocked_writers, 2884 &blocked_writers); 2885 if (rv == -1) { 2886 umtxq_unbusy_unlocked(&uq->uq_key); 2887 error = EFAULT; 2888 break; 2889 } 2890 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2891 2892 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2893 umtxq_lock(&uq->uq_key); 2894 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2895 umtxq_unbusy(&uq->uq_key); 2896 2897 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2898 NULL : &timo); 2899 2900 umtxq_busy(&uq->uq_key); 2901 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2902 umtxq_unlock(&uq->uq_key); 2903 if (error) 2904 break; 2905 rv = fueword32(&rwlock->rw_state, &state); 2906 if (rv == -1) { 2907 error = EFAULT; 2908 break; 2909 } 2910 } 2911 2912 rv = fueword32(&rwlock->rw_blocked_writers, 2913 &blocked_writers); 2914 if (rv == -1) { 2915 umtxq_unbusy_unlocked(&uq->uq_key); 2916 error = EFAULT; 2917 break; 2918 } 2919 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2920 if (blocked_writers == 1) { 2921 rv = fueword32(&rwlock->rw_state, &state); 2922 if (rv == -1) { 2923 umtxq_unbusy_unlocked(&uq->uq_key); 2924 error = EFAULT; 2925 break; 2926 } 2927 for (;;) { 2928 rv = casueword32(&rwlock->rw_state, state, 2929 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2930 if (rv == -1) { 2931 error = EFAULT; 2932 break; 2933 } 2934 if (oldstate == state) 2935 break; 2936 state = oldstate; 2937 error1 = umtxq_check_susp(td); 2938 /* 2939 * We are leaving the URWLOCK_WRITE_WAITERS 2940 * behind, but this should not harm the 2941 * correctness. 2942 */ 2943 if (error1 != 0) { 2944 if (error == 0) 2945 error = error1; 2946 break; 2947 } 2948 } 2949 rv = fueword32(&rwlock->rw_blocked_readers, 2950 &blocked_readers); 2951 if (rv == -1) { 2952 umtxq_unbusy_unlocked(&uq->uq_key); 2953 error = EFAULT; 2954 break; 2955 } 2956 } else 2957 blocked_readers = 0; 2958 2959 umtxq_unbusy_unlocked(&uq->uq_key); 2960 } 2961 2962 umtx_key_release(&uq->uq_key); 2963 if (error == ERESTART) 2964 error = EINTR; 2965 return (error); 2966 } 2967 2968 static int 2969 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2970 { 2971 struct umtx_q *uq; 2972 uint32_t flags; 2973 int32_t state, oldstate; 2974 int error, rv, q, count; 2975 2976 uq = td->td_umtxq; 2977 error = fueword32(&rwlock->rw_flags, &flags); 2978 if (error == -1) 2979 return (EFAULT); 2980 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2981 if (error != 0) 2982 return (error); 2983 2984 error = fueword32(&rwlock->rw_state, &state); 2985 if (error == -1) { 2986 error = EFAULT; 2987 goto out; 2988 } 2989 if (state & URWLOCK_WRITE_OWNER) { 2990 for (;;) { 2991 rv = casueword32(&rwlock->rw_state, state, 2992 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2993 if (rv == -1) { 2994 error = EFAULT; 2995 goto out; 2996 } 2997 if (oldstate != state) { 2998 state = oldstate; 2999 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 3000 error = EPERM; 3001 goto out; 3002 } 3003 error = umtxq_check_susp(td); 3004 if (error != 0) 3005 goto out; 3006 } else 3007 break; 3008 } 3009 } else if (URWLOCK_READER_COUNT(state) != 0) { 3010 for (;;) { 3011 rv = casueword32(&rwlock->rw_state, state, 3012 &oldstate, state - 1); 3013 if (rv == -1) { 3014 error = EFAULT; 3015 goto out; 3016 } 3017 if (oldstate != state) { 3018 state = oldstate; 3019 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3020 error = EPERM; 3021 goto out; 3022 } 3023 error = umtxq_check_susp(td); 3024 if (error != 0) 3025 goto out; 3026 } else 3027 break; 3028 } 3029 } else { 3030 error = EPERM; 3031 goto out; 3032 } 3033 3034 count = 0; 3035 3036 if (!(flags & URWLOCK_PREFER_READER)) { 3037 if (state & URWLOCK_WRITE_WAITERS) { 3038 count = 1; 3039 q = UMTX_EXCLUSIVE_QUEUE; 3040 } else if (state & URWLOCK_READ_WAITERS) { 3041 count = INT_MAX; 3042 q = UMTX_SHARED_QUEUE; 3043 } 3044 } else { 3045 if (state & URWLOCK_READ_WAITERS) { 3046 count = INT_MAX; 3047 q = UMTX_SHARED_QUEUE; 3048 } else if (state & URWLOCK_WRITE_WAITERS) { 3049 count = 1; 3050 q = UMTX_EXCLUSIVE_QUEUE; 3051 } 3052 } 3053 3054 if (count) { 3055 umtxq_lock(&uq->uq_key); 3056 umtxq_busy(&uq->uq_key); 3057 umtxq_signal_queue(&uq->uq_key, count, q); 3058 umtxq_unbusy(&uq->uq_key); 3059 umtxq_unlock(&uq->uq_key); 3060 } 3061 out: 3062 umtx_key_release(&uq->uq_key); 3063 return (error); 3064 } 3065 3066 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3067 static int 3068 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3069 { 3070 struct abs_timeout timo; 3071 struct umtx_q *uq; 3072 uint32_t flags, count, count1; 3073 int error, rv; 3074 3075 uq = td->td_umtxq; 3076 error = fueword32(&sem->_flags, &flags); 3077 if (error == -1) 3078 return (EFAULT); 3079 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3080 if (error != 0) 3081 return (error); 3082 3083 if (timeout != NULL) 3084 abs_timeout_init2(&timo, timeout); 3085 3086 umtxq_lock(&uq->uq_key); 3087 umtxq_busy(&uq->uq_key); 3088 umtxq_insert(uq); 3089 umtxq_unlock(&uq->uq_key); 3090 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3091 if (rv == 0) 3092 rv = fueword32(&sem->_count, &count); 3093 if (rv == -1 || count != 0) { 3094 umtxq_lock(&uq->uq_key); 3095 umtxq_unbusy(&uq->uq_key); 3096 umtxq_remove(uq); 3097 umtxq_unlock(&uq->uq_key); 3098 umtx_key_release(&uq->uq_key); 3099 return (rv == -1 ? EFAULT : 0); 3100 } 3101 umtxq_lock(&uq->uq_key); 3102 umtxq_unbusy(&uq->uq_key); 3103 3104 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3105 3106 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3107 error = 0; 3108 else { 3109 umtxq_remove(uq); 3110 /* A relative timeout cannot be restarted. */ 3111 if (error == ERESTART && timeout != NULL && 3112 (timeout->_flags & UMTX_ABSTIME) == 0) 3113 error = EINTR; 3114 } 3115 umtxq_unlock(&uq->uq_key); 3116 umtx_key_release(&uq->uq_key); 3117 return (error); 3118 } 3119 3120 /* 3121 * Signal a userland semaphore. 3122 */ 3123 static int 3124 do_sem_wake(struct thread *td, struct _usem *sem) 3125 { 3126 struct umtx_key key; 3127 int error, cnt; 3128 uint32_t flags; 3129 3130 error = fueword32(&sem->_flags, &flags); 3131 if (error == -1) 3132 return (EFAULT); 3133 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3134 return (error); 3135 umtxq_lock(&key); 3136 umtxq_busy(&key); 3137 cnt = umtxq_count(&key); 3138 if (cnt > 0) { 3139 /* 3140 * Check if count is greater than 0, this means the memory is 3141 * still being referenced by user code, so we can safely 3142 * update _has_waiters flag. 3143 */ 3144 if (cnt == 1) { 3145 umtxq_unlock(&key); 3146 error = suword32(&sem->_has_waiters, 0); 3147 umtxq_lock(&key); 3148 if (error == -1) 3149 error = EFAULT; 3150 } 3151 umtxq_signal(&key, 1); 3152 } 3153 umtxq_unbusy(&key); 3154 umtxq_unlock(&key); 3155 umtx_key_release(&key); 3156 return (error); 3157 } 3158 #endif 3159 3160 static int 3161 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3162 { 3163 struct abs_timeout timo; 3164 struct umtx_q *uq; 3165 uint32_t count, flags; 3166 int error, rv; 3167 3168 uq = td->td_umtxq; 3169 flags = fuword32(&sem->_flags); 3170 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3171 if (error != 0) 3172 return (error); 3173 3174 if (timeout != NULL) 3175 abs_timeout_init2(&timo, timeout); 3176 3177 umtxq_lock(&uq->uq_key); 3178 umtxq_busy(&uq->uq_key); 3179 umtxq_insert(uq); 3180 umtxq_unlock(&uq->uq_key); 3181 rv = fueword32(&sem->_count, &count); 3182 if (rv == -1) { 3183 umtxq_lock(&uq->uq_key); 3184 umtxq_unbusy(&uq->uq_key); 3185 umtxq_remove(uq); 3186 umtxq_unlock(&uq->uq_key); 3187 umtx_key_release(&uq->uq_key); 3188 return (EFAULT); 3189 } 3190 for (;;) { 3191 if (USEM_COUNT(count) != 0) { 3192 umtxq_lock(&uq->uq_key); 3193 umtxq_unbusy(&uq->uq_key); 3194 umtxq_remove(uq); 3195 umtxq_unlock(&uq->uq_key); 3196 umtx_key_release(&uq->uq_key); 3197 return (0); 3198 } 3199 if (count == USEM_HAS_WAITERS) 3200 break; 3201 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3202 if (rv == -1) { 3203 umtxq_lock(&uq->uq_key); 3204 umtxq_unbusy(&uq->uq_key); 3205 umtxq_remove(uq); 3206 umtxq_unlock(&uq->uq_key); 3207 umtx_key_release(&uq->uq_key); 3208 return (EFAULT); 3209 } 3210 if (count == 0) 3211 break; 3212 } 3213 umtxq_lock(&uq->uq_key); 3214 umtxq_unbusy(&uq->uq_key); 3215 3216 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3217 3218 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3219 error = 0; 3220 else { 3221 umtxq_remove(uq); 3222 /* A relative timeout cannot be restarted. */ 3223 if (error == ERESTART && timeout != NULL && 3224 (timeout->_flags & UMTX_ABSTIME) == 0) 3225 error = EINTR; 3226 } 3227 umtxq_unlock(&uq->uq_key); 3228 umtx_key_release(&uq->uq_key); 3229 return (error); 3230 } 3231 3232 /* 3233 * Signal a userland semaphore. 3234 */ 3235 static int 3236 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3237 { 3238 struct umtx_key key; 3239 int error, cnt, rv; 3240 uint32_t count, flags; 3241 3242 rv = fueword32(&sem->_flags, &flags); 3243 if (rv == -1) 3244 return (EFAULT); 3245 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3246 return (error); 3247 umtxq_lock(&key); 3248 umtxq_busy(&key); 3249 cnt = umtxq_count(&key); 3250 if (cnt > 0) { 3251 /* 3252 * If this was the last sleeping thread, clear the waiters 3253 * flag in _count. 3254 */ 3255 if (cnt == 1) { 3256 umtxq_unlock(&key); 3257 rv = fueword32(&sem->_count, &count); 3258 while (rv != -1 && count & USEM_HAS_WAITERS) 3259 rv = casueword32(&sem->_count, count, &count, 3260 count & ~USEM_HAS_WAITERS); 3261 if (rv == -1) 3262 error = EFAULT; 3263 umtxq_lock(&key); 3264 } 3265 3266 umtxq_signal(&key, 1); 3267 } 3268 umtxq_unbusy(&key); 3269 umtxq_unlock(&key); 3270 umtx_key_release(&key); 3271 return (error); 3272 } 3273 3274 inline int 3275 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3276 { 3277 int error; 3278 3279 error = copyin(addr, tsp, sizeof(struct timespec)); 3280 if (error == 0) { 3281 if (tsp->tv_sec < 0 || 3282 tsp->tv_nsec >= 1000000000 || 3283 tsp->tv_nsec < 0) 3284 error = EINVAL; 3285 } 3286 return (error); 3287 } 3288 3289 static inline int 3290 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3291 { 3292 int error; 3293 3294 if (size <= sizeof(struct timespec)) { 3295 tp->_clockid = CLOCK_REALTIME; 3296 tp->_flags = 0; 3297 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3298 } else 3299 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3300 if (error != 0) 3301 return (error); 3302 if (tp->_timeout.tv_sec < 0 || 3303 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3304 return (EINVAL); 3305 return (0); 3306 } 3307 3308 static int 3309 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3310 { 3311 3312 return (EOPNOTSUPP); 3313 } 3314 3315 static int 3316 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3317 { 3318 struct _umtx_time timeout, *tm_p; 3319 int error; 3320 3321 if (uap->uaddr2 == NULL) 3322 tm_p = NULL; 3323 else { 3324 error = umtx_copyin_umtx_time( 3325 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3326 if (error != 0) 3327 return (error); 3328 tm_p = &timeout; 3329 } 3330 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3331 } 3332 3333 static int 3334 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3335 { 3336 struct _umtx_time timeout, *tm_p; 3337 int error; 3338 3339 if (uap->uaddr2 == NULL) 3340 tm_p = NULL; 3341 else { 3342 error = umtx_copyin_umtx_time( 3343 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3344 if (error != 0) 3345 return (error); 3346 tm_p = &timeout; 3347 } 3348 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3349 } 3350 3351 static int 3352 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3353 { 3354 struct _umtx_time *tm_p, timeout; 3355 int error; 3356 3357 if (uap->uaddr2 == NULL) 3358 tm_p = NULL; 3359 else { 3360 error = umtx_copyin_umtx_time( 3361 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3362 if (error != 0) 3363 return (error); 3364 tm_p = &timeout; 3365 } 3366 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3367 } 3368 3369 static int 3370 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3371 { 3372 3373 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3374 } 3375 3376 #define BATCH_SIZE 128 3377 static int 3378 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3379 { 3380 char *uaddrs[BATCH_SIZE], **upp; 3381 int count, error, i, pos, tocopy; 3382 3383 upp = (char **)uap->obj; 3384 error = 0; 3385 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3386 pos += tocopy) { 3387 tocopy = MIN(count, BATCH_SIZE); 3388 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3389 if (error != 0) 3390 break; 3391 for (i = 0; i < tocopy; ++i) 3392 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3393 maybe_yield(); 3394 } 3395 return (error); 3396 } 3397 3398 static int 3399 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3400 { 3401 3402 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3403 } 3404 3405 static int 3406 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3407 { 3408 struct _umtx_time *tm_p, timeout; 3409 int error; 3410 3411 /* Allow a null timespec (wait forever). */ 3412 if (uap->uaddr2 == NULL) 3413 tm_p = NULL; 3414 else { 3415 error = umtx_copyin_umtx_time( 3416 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3417 if (error != 0) 3418 return (error); 3419 tm_p = &timeout; 3420 } 3421 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3422 } 3423 3424 static int 3425 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3426 { 3427 3428 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3429 } 3430 3431 static int 3432 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3433 { 3434 struct _umtx_time *tm_p, timeout; 3435 int error; 3436 3437 /* Allow a null timespec (wait forever). */ 3438 if (uap->uaddr2 == NULL) 3439 tm_p = NULL; 3440 else { 3441 error = umtx_copyin_umtx_time( 3442 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3443 if (error != 0) 3444 return (error); 3445 tm_p = &timeout; 3446 } 3447 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3448 } 3449 3450 static int 3451 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3452 { 3453 3454 return (do_wake_umutex(td, uap->obj)); 3455 } 3456 3457 static int 3458 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3459 { 3460 3461 return (do_unlock_umutex(td, uap->obj, false)); 3462 } 3463 3464 static int 3465 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3466 { 3467 3468 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3469 } 3470 3471 static int 3472 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3473 { 3474 struct timespec *ts, timeout; 3475 int error; 3476 3477 /* Allow a null timespec (wait forever). */ 3478 if (uap->uaddr2 == NULL) 3479 ts = NULL; 3480 else { 3481 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3482 if (error != 0) 3483 return (error); 3484 ts = &timeout; 3485 } 3486 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3487 } 3488 3489 static int 3490 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3491 { 3492 3493 return (do_cv_signal(td, uap->obj)); 3494 } 3495 3496 static int 3497 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3498 { 3499 3500 return (do_cv_broadcast(td, uap->obj)); 3501 } 3502 3503 static int 3504 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3505 { 3506 struct _umtx_time timeout; 3507 int error; 3508 3509 /* Allow a null timespec (wait forever). */ 3510 if (uap->uaddr2 == NULL) { 3511 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3512 } else { 3513 error = umtx_copyin_umtx_time(uap->uaddr2, 3514 (size_t)uap->uaddr1, &timeout); 3515 if (error != 0) 3516 return (error); 3517 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3518 } 3519 return (error); 3520 } 3521 3522 static int 3523 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3524 { 3525 struct _umtx_time timeout; 3526 int error; 3527 3528 /* Allow a null timespec (wait forever). */ 3529 if (uap->uaddr2 == NULL) { 3530 error = do_rw_wrlock(td, uap->obj, 0); 3531 } else { 3532 error = umtx_copyin_umtx_time(uap->uaddr2, 3533 (size_t)uap->uaddr1, &timeout); 3534 if (error != 0) 3535 return (error); 3536 3537 error = do_rw_wrlock(td, uap->obj, &timeout); 3538 } 3539 return (error); 3540 } 3541 3542 static int 3543 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3544 { 3545 3546 return (do_rw_unlock(td, uap->obj)); 3547 } 3548 3549 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3550 static int 3551 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3552 { 3553 struct _umtx_time *tm_p, timeout; 3554 int error; 3555 3556 /* Allow a null timespec (wait forever). */ 3557 if (uap->uaddr2 == NULL) 3558 tm_p = NULL; 3559 else { 3560 error = umtx_copyin_umtx_time( 3561 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3562 if (error != 0) 3563 return (error); 3564 tm_p = &timeout; 3565 } 3566 return (do_sem_wait(td, uap->obj, tm_p)); 3567 } 3568 3569 static int 3570 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3571 { 3572 3573 return (do_sem_wake(td, uap->obj)); 3574 } 3575 #endif 3576 3577 static int 3578 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3579 { 3580 3581 return (do_wake2_umutex(td, uap->obj, uap->val)); 3582 } 3583 3584 static int 3585 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3586 { 3587 struct _umtx_time *tm_p, timeout; 3588 int error; 3589 3590 /* Allow a null timespec (wait forever). */ 3591 if (uap->uaddr2 == NULL) 3592 tm_p = NULL; 3593 else { 3594 error = umtx_copyin_umtx_time( 3595 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3596 if (error != 0) 3597 return (error); 3598 tm_p = &timeout; 3599 } 3600 return (do_sem2_wait(td, uap->obj, tm_p)); 3601 } 3602 3603 static int 3604 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3605 { 3606 3607 return (do_sem2_wake(td, uap->obj)); 3608 } 3609 3610 #define USHM_OBJ_UMTX(o) \ 3611 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3612 3613 #define USHMF_REG_LINKED 0x0001 3614 #define USHMF_OBJ_LINKED 0x0002 3615 struct umtx_shm_reg { 3616 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3617 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3618 struct umtx_key ushm_key; 3619 struct ucred *ushm_cred; 3620 struct shmfd *ushm_obj; 3621 u_int ushm_refcnt; 3622 u_int ushm_flags; 3623 }; 3624 3625 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3626 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3627 3628 static uma_zone_t umtx_shm_reg_zone; 3629 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3630 static struct mtx umtx_shm_lock; 3631 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3632 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3633 3634 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3635 3636 static void 3637 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3638 { 3639 struct umtx_shm_reg_head d; 3640 struct umtx_shm_reg *reg, *reg1; 3641 3642 TAILQ_INIT(&d); 3643 mtx_lock(&umtx_shm_lock); 3644 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3645 mtx_unlock(&umtx_shm_lock); 3646 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3647 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3648 umtx_shm_free_reg(reg); 3649 } 3650 } 3651 3652 static struct task umtx_shm_reg_delfree_task = 3653 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3654 3655 static struct umtx_shm_reg * 3656 umtx_shm_find_reg_locked(const struct umtx_key *key) 3657 { 3658 struct umtx_shm_reg *reg; 3659 struct umtx_shm_reg_head *reg_head; 3660 3661 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3662 mtx_assert(&umtx_shm_lock, MA_OWNED); 3663 reg_head = &umtx_shm_registry[key->hash]; 3664 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3665 KASSERT(reg->ushm_key.shared, 3666 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3667 if (reg->ushm_key.info.shared.object == 3668 key->info.shared.object && 3669 reg->ushm_key.info.shared.offset == 3670 key->info.shared.offset) { 3671 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3672 KASSERT(reg->ushm_refcnt > 0, 3673 ("reg %p refcnt 0 onlist", reg)); 3674 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3675 ("reg %p not linked", reg)); 3676 reg->ushm_refcnt++; 3677 return (reg); 3678 } 3679 } 3680 return (NULL); 3681 } 3682 3683 static struct umtx_shm_reg * 3684 umtx_shm_find_reg(const struct umtx_key *key) 3685 { 3686 struct umtx_shm_reg *reg; 3687 3688 mtx_lock(&umtx_shm_lock); 3689 reg = umtx_shm_find_reg_locked(key); 3690 mtx_unlock(&umtx_shm_lock); 3691 return (reg); 3692 } 3693 3694 static void 3695 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3696 { 3697 3698 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3699 crfree(reg->ushm_cred); 3700 shm_drop(reg->ushm_obj); 3701 uma_zfree(umtx_shm_reg_zone, reg); 3702 } 3703 3704 static bool 3705 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3706 { 3707 bool res; 3708 3709 mtx_assert(&umtx_shm_lock, MA_OWNED); 3710 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3711 reg->ushm_refcnt--; 3712 res = reg->ushm_refcnt == 0; 3713 if (res || force) { 3714 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3715 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3716 reg, ushm_reg_link); 3717 reg->ushm_flags &= ~USHMF_REG_LINKED; 3718 } 3719 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3720 LIST_REMOVE(reg, ushm_obj_link); 3721 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3722 } 3723 } 3724 return (res); 3725 } 3726 3727 static void 3728 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3729 { 3730 vm_object_t object; 3731 bool dofree; 3732 3733 if (force) { 3734 object = reg->ushm_obj->shm_object; 3735 VM_OBJECT_WLOCK(object); 3736 object->flags |= OBJ_UMTXDEAD; 3737 VM_OBJECT_WUNLOCK(object); 3738 } 3739 mtx_lock(&umtx_shm_lock); 3740 dofree = umtx_shm_unref_reg_locked(reg, force); 3741 mtx_unlock(&umtx_shm_lock); 3742 if (dofree) 3743 umtx_shm_free_reg(reg); 3744 } 3745 3746 void 3747 umtx_shm_object_init(vm_object_t object) 3748 { 3749 3750 LIST_INIT(USHM_OBJ_UMTX(object)); 3751 } 3752 3753 void 3754 umtx_shm_object_terminated(vm_object_t object) 3755 { 3756 struct umtx_shm_reg *reg, *reg1; 3757 bool dofree; 3758 3759 dofree = false; 3760 mtx_lock(&umtx_shm_lock); 3761 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3762 if (umtx_shm_unref_reg_locked(reg, true)) { 3763 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3764 ushm_reg_link); 3765 dofree = true; 3766 } 3767 } 3768 mtx_unlock(&umtx_shm_lock); 3769 if (dofree) 3770 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3771 } 3772 3773 static int 3774 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3775 struct umtx_shm_reg **res) 3776 { 3777 struct umtx_shm_reg *reg, *reg1; 3778 struct ucred *cred; 3779 int error; 3780 3781 reg = umtx_shm_find_reg(key); 3782 if (reg != NULL) { 3783 *res = reg; 3784 return (0); 3785 } 3786 cred = td->td_ucred; 3787 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3788 return (ENOMEM); 3789 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3790 reg->ushm_refcnt = 1; 3791 bcopy(key, ®->ushm_key, sizeof(*key)); 3792 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3793 reg->ushm_cred = crhold(cred); 3794 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3795 if (error != 0) { 3796 umtx_shm_free_reg(reg); 3797 return (error); 3798 } 3799 mtx_lock(&umtx_shm_lock); 3800 reg1 = umtx_shm_find_reg_locked(key); 3801 if (reg1 != NULL) { 3802 mtx_unlock(&umtx_shm_lock); 3803 umtx_shm_free_reg(reg); 3804 *res = reg1; 3805 return (0); 3806 } 3807 reg->ushm_refcnt++; 3808 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3809 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3810 ushm_obj_link); 3811 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3812 mtx_unlock(&umtx_shm_lock); 3813 *res = reg; 3814 return (0); 3815 } 3816 3817 static int 3818 umtx_shm_alive(struct thread *td, void *addr) 3819 { 3820 vm_map_t map; 3821 vm_map_entry_t entry; 3822 vm_object_t object; 3823 vm_pindex_t pindex; 3824 vm_prot_t prot; 3825 int res, ret; 3826 boolean_t wired; 3827 3828 map = &td->td_proc->p_vmspace->vm_map; 3829 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3830 &object, &pindex, &prot, &wired); 3831 if (res != KERN_SUCCESS) 3832 return (EFAULT); 3833 if (object == NULL) 3834 ret = EINVAL; 3835 else 3836 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3837 vm_map_lookup_done(map, entry); 3838 return (ret); 3839 } 3840 3841 static void 3842 umtx_shm_init(void) 3843 { 3844 int i; 3845 3846 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3847 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3848 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3849 for (i = 0; i < nitems(umtx_shm_registry); i++) 3850 TAILQ_INIT(&umtx_shm_registry[i]); 3851 } 3852 3853 static int 3854 umtx_shm(struct thread *td, void *addr, u_int flags) 3855 { 3856 struct umtx_key key; 3857 struct umtx_shm_reg *reg; 3858 struct file *fp; 3859 int error, fd; 3860 3861 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3862 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3863 return (EINVAL); 3864 if ((flags & UMTX_SHM_ALIVE) != 0) 3865 return (umtx_shm_alive(td, addr)); 3866 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3867 if (error != 0) 3868 return (error); 3869 KASSERT(key.shared == 1, ("non-shared key")); 3870 if ((flags & UMTX_SHM_CREAT) != 0) { 3871 error = umtx_shm_create_reg(td, &key, ®); 3872 } else { 3873 reg = umtx_shm_find_reg(&key); 3874 if (reg == NULL) 3875 error = ESRCH; 3876 } 3877 umtx_key_release(&key); 3878 if (error != 0) 3879 return (error); 3880 KASSERT(reg != NULL, ("no reg")); 3881 if ((flags & UMTX_SHM_DESTROY) != 0) { 3882 umtx_shm_unref_reg(reg, true); 3883 } else { 3884 #if 0 3885 #ifdef MAC 3886 error = mac_posixshm_check_open(td->td_ucred, 3887 reg->ushm_obj, FFLAGS(O_RDWR)); 3888 if (error == 0) 3889 #endif 3890 error = shm_access(reg->ushm_obj, td->td_ucred, 3891 FFLAGS(O_RDWR)); 3892 if (error == 0) 3893 #endif 3894 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3895 if (error == 0) { 3896 shm_hold(reg->ushm_obj); 3897 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3898 &shm_ops); 3899 td->td_retval[0] = fd; 3900 fdrop(fp, td); 3901 } 3902 } 3903 umtx_shm_unref_reg(reg, false); 3904 return (error); 3905 } 3906 3907 static int 3908 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3909 { 3910 3911 return (umtx_shm(td, uap->uaddr1, uap->val)); 3912 } 3913 3914 static int 3915 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3916 { 3917 3918 td->td_rb_list = rbp->robust_list_offset; 3919 td->td_rbp_list = rbp->robust_priv_list_offset; 3920 td->td_rb_inact = rbp->robust_inact_offset; 3921 return (0); 3922 } 3923 3924 static int 3925 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3926 { 3927 struct umtx_robust_lists_params rb; 3928 int error; 3929 3930 if (uap->val > sizeof(rb)) 3931 return (EINVAL); 3932 bzero(&rb, sizeof(rb)); 3933 error = copyin(uap->uaddr1, &rb, uap->val); 3934 if (error != 0) 3935 return (error); 3936 return (umtx_robust_lists(td, &rb)); 3937 } 3938 3939 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3940 3941 static const _umtx_op_func op_table[] = { 3942 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3943 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3944 [UMTX_OP_WAIT] = __umtx_op_wait, 3945 [UMTX_OP_WAKE] = __umtx_op_wake, 3946 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3947 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3948 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3949 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3950 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3951 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3952 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3953 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3954 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3955 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 3956 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 3957 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 3958 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 3959 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 3960 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 3961 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3962 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 3963 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 3964 #else 3965 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 3966 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 3967 #endif 3968 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 3969 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 3970 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 3971 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 3972 [UMTX_OP_SHM] = __umtx_op_shm, 3973 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 3974 }; 3975 3976 int 3977 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3978 { 3979 3980 if ((unsigned)uap->op < nitems(op_table)) 3981 return (*op_table[uap->op])(td, uap); 3982 return (EINVAL); 3983 } 3984 3985 #ifdef COMPAT_FREEBSD32 3986 3987 struct timespec32 { 3988 int32_t tv_sec; 3989 int32_t tv_nsec; 3990 }; 3991 3992 struct umtx_time32 { 3993 struct timespec32 timeout; 3994 uint32_t flags; 3995 uint32_t clockid; 3996 }; 3997 3998 static inline int 3999 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 4000 { 4001 struct timespec32 ts32; 4002 int error; 4003 4004 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4005 if (error == 0) { 4006 if (ts32.tv_sec < 0 || 4007 ts32.tv_nsec >= 1000000000 || 4008 ts32.tv_nsec < 0) 4009 error = EINVAL; 4010 else { 4011 tsp->tv_sec = ts32.tv_sec; 4012 tsp->tv_nsec = ts32.tv_nsec; 4013 } 4014 } 4015 return (error); 4016 } 4017 4018 static inline int 4019 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4020 { 4021 struct umtx_time32 t32; 4022 int error; 4023 4024 t32.clockid = CLOCK_REALTIME; 4025 t32.flags = 0; 4026 if (size <= sizeof(struct timespec32)) 4027 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4028 else 4029 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4030 if (error != 0) 4031 return (error); 4032 if (t32.timeout.tv_sec < 0 || 4033 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4034 return (EINVAL); 4035 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4036 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4037 tp->_flags = t32.flags; 4038 tp->_clockid = t32.clockid; 4039 return (0); 4040 } 4041 4042 static int 4043 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4044 { 4045 struct _umtx_time *tm_p, timeout; 4046 int error; 4047 4048 if (uap->uaddr2 == NULL) 4049 tm_p = NULL; 4050 else { 4051 error = umtx_copyin_umtx_time32(uap->uaddr2, 4052 (size_t)uap->uaddr1, &timeout); 4053 if (error != 0) 4054 return (error); 4055 tm_p = &timeout; 4056 } 4057 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4058 } 4059 4060 static int 4061 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4062 { 4063 struct _umtx_time *tm_p, timeout; 4064 int error; 4065 4066 /* Allow a null timespec (wait forever). */ 4067 if (uap->uaddr2 == NULL) 4068 tm_p = NULL; 4069 else { 4070 error = umtx_copyin_umtx_time(uap->uaddr2, 4071 (size_t)uap->uaddr1, &timeout); 4072 if (error != 0) 4073 return (error); 4074 tm_p = &timeout; 4075 } 4076 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4077 } 4078 4079 static int 4080 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4081 { 4082 struct _umtx_time *tm_p, timeout; 4083 int error; 4084 4085 /* Allow a null timespec (wait forever). */ 4086 if (uap->uaddr2 == NULL) 4087 tm_p = NULL; 4088 else { 4089 error = umtx_copyin_umtx_time32(uap->uaddr2, 4090 (size_t)uap->uaddr1, &timeout); 4091 if (error != 0) 4092 return (error); 4093 tm_p = &timeout; 4094 } 4095 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4096 } 4097 4098 static int 4099 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4100 { 4101 struct timespec *ts, timeout; 4102 int error; 4103 4104 /* Allow a null timespec (wait forever). */ 4105 if (uap->uaddr2 == NULL) 4106 ts = NULL; 4107 else { 4108 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4109 if (error != 0) 4110 return (error); 4111 ts = &timeout; 4112 } 4113 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4114 } 4115 4116 static int 4117 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4118 { 4119 struct _umtx_time timeout; 4120 int error; 4121 4122 /* Allow a null timespec (wait forever). */ 4123 if (uap->uaddr2 == NULL) { 4124 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4125 } else { 4126 error = umtx_copyin_umtx_time32(uap->uaddr2, 4127 (size_t)uap->uaddr1, &timeout); 4128 if (error != 0) 4129 return (error); 4130 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4131 } 4132 return (error); 4133 } 4134 4135 static int 4136 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4137 { 4138 struct _umtx_time timeout; 4139 int error; 4140 4141 /* Allow a null timespec (wait forever). */ 4142 if (uap->uaddr2 == NULL) { 4143 error = do_rw_wrlock(td, uap->obj, 0); 4144 } else { 4145 error = umtx_copyin_umtx_time32(uap->uaddr2, 4146 (size_t)uap->uaddr1, &timeout); 4147 if (error != 0) 4148 return (error); 4149 error = do_rw_wrlock(td, uap->obj, &timeout); 4150 } 4151 return (error); 4152 } 4153 4154 static int 4155 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4156 { 4157 struct _umtx_time *tm_p, timeout; 4158 int error; 4159 4160 if (uap->uaddr2 == NULL) 4161 tm_p = NULL; 4162 else { 4163 error = umtx_copyin_umtx_time32( 4164 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4165 if (error != 0) 4166 return (error); 4167 tm_p = &timeout; 4168 } 4169 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4170 } 4171 4172 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4173 static int 4174 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4175 { 4176 struct _umtx_time *tm_p, timeout; 4177 int error; 4178 4179 /* Allow a null timespec (wait forever). */ 4180 if (uap->uaddr2 == NULL) 4181 tm_p = NULL; 4182 else { 4183 error = umtx_copyin_umtx_time32(uap->uaddr2, 4184 (size_t)uap->uaddr1, &timeout); 4185 if (error != 0) 4186 return (error); 4187 tm_p = &timeout; 4188 } 4189 return (do_sem_wait(td, uap->obj, tm_p)); 4190 } 4191 #endif 4192 4193 static int 4194 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4195 { 4196 struct _umtx_time *tm_p, timeout; 4197 int error; 4198 4199 /* Allow a null timespec (wait forever). */ 4200 if (uap->uaddr2 == NULL) 4201 tm_p = NULL; 4202 else { 4203 error = umtx_copyin_umtx_time32(uap->uaddr2, 4204 (size_t)uap->uaddr1, &timeout); 4205 if (error != 0) 4206 return (error); 4207 tm_p = &timeout; 4208 } 4209 return (do_sem2_wait(td, uap->obj, tm_p)); 4210 } 4211 4212 static int 4213 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4214 { 4215 uint32_t uaddrs[BATCH_SIZE], **upp; 4216 int count, error, i, pos, tocopy; 4217 4218 upp = (uint32_t **)uap->obj; 4219 error = 0; 4220 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4221 pos += tocopy) { 4222 tocopy = MIN(count, BATCH_SIZE); 4223 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4224 if (error != 0) 4225 break; 4226 for (i = 0; i < tocopy; ++i) 4227 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4228 INT_MAX, 1); 4229 maybe_yield(); 4230 } 4231 return (error); 4232 } 4233 4234 struct umtx_robust_lists_params_compat32 { 4235 uint32_t robust_list_offset; 4236 uint32_t robust_priv_list_offset; 4237 uint32_t robust_inact_offset; 4238 }; 4239 4240 static int 4241 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4242 { 4243 struct umtx_robust_lists_params rb; 4244 struct umtx_robust_lists_params_compat32 rb32; 4245 int error; 4246 4247 if (uap->val > sizeof(rb32)) 4248 return (EINVAL); 4249 bzero(&rb, sizeof(rb)); 4250 bzero(&rb32, sizeof(rb32)); 4251 error = copyin(uap->uaddr1, &rb32, uap->val); 4252 if (error != 0) 4253 return (error); 4254 rb.robust_list_offset = rb32.robust_list_offset; 4255 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4256 rb.robust_inact_offset = rb32.robust_inact_offset; 4257 return (umtx_robust_lists(td, &rb)); 4258 } 4259 4260 static const _umtx_op_func op_table_compat32[] = { 4261 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4262 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4263 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4264 [UMTX_OP_WAKE] = __umtx_op_wake, 4265 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4266 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4267 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4268 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4269 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4270 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4271 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4272 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4273 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4274 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4275 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4276 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4277 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4278 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4279 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4280 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4281 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4282 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4283 #else 4284 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4285 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4286 #endif 4287 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4288 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4289 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4290 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4291 [UMTX_OP_SHM] = __umtx_op_shm, 4292 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4293 }; 4294 4295 int 4296 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4297 { 4298 4299 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4300 return (*op_table_compat32[uap->op])(td, 4301 (struct _umtx_op_args *)uap); 4302 } 4303 return (EINVAL); 4304 } 4305 #endif 4306 4307 void 4308 umtx_thread_init(struct thread *td) 4309 { 4310 4311 td->td_umtxq = umtxq_alloc(); 4312 td->td_umtxq->uq_thread = td; 4313 } 4314 4315 void 4316 umtx_thread_fini(struct thread *td) 4317 { 4318 4319 umtxq_free(td->td_umtxq); 4320 } 4321 4322 /* 4323 * It will be called when new thread is created, e.g fork(). 4324 */ 4325 void 4326 umtx_thread_alloc(struct thread *td) 4327 { 4328 struct umtx_q *uq; 4329 4330 uq = td->td_umtxq; 4331 uq->uq_inherited_pri = PRI_MAX; 4332 4333 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4334 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4335 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4336 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4337 } 4338 4339 /* 4340 * exec() hook. 4341 * 4342 * Clear robust lists for all process' threads, not delaying the 4343 * cleanup to thread_exit hook, since the relevant address space is 4344 * destroyed right now. 4345 */ 4346 static void 4347 umtx_exec_hook(void *arg __unused, struct proc *p, 4348 struct image_params *imgp __unused) 4349 { 4350 struct thread *td; 4351 4352 KASSERT(p == curproc, ("need curproc")); 4353 PROC_LOCK(p); 4354 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4355 (p->p_flag & P_STOPPED_SINGLE) != 0, 4356 ("curproc must be single-threaded")); 4357 FOREACH_THREAD_IN_PROC(p, td) { 4358 KASSERT(td == curthread || 4359 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4360 ("running thread %p %p", p, td)); 4361 PROC_UNLOCK(p); 4362 umtx_thread_cleanup(td); 4363 PROC_LOCK(p); 4364 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4365 } 4366 PROC_UNLOCK(p); 4367 } 4368 4369 /* 4370 * thread_exit() hook. 4371 */ 4372 void 4373 umtx_thread_exit(struct thread *td) 4374 { 4375 4376 umtx_thread_cleanup(td); 4377 } 4378 4379 static int 4380 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4381 { 4382 u_long res1; 4383 #ifdef COMPAT_FREEBSD32 4384 uint32_t res32; 4385 #endif 4386 int error; 4387 4388 #ifdef COMPAT_FREEBSD32 4389 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4390 error = fueword32((void *)ptr, &res32); 4391 if (error == 0) 4392 res1 = res32; 4393 } else 4394 #endif 4395 { 4396 error = fueword((void *)ptr, &res1); 4397 } 4398 if (error == 0) 4399 *res = res1; 4400 else 4401 error = EFAULT; 4402 return (error); 4403 } 4404 4405 static void 4406 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4407 { 4408 #ifdef COMPAT_FREEBSD32 4409 struct umutex32 m32; 4410 4411 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4412 memcpy(&m32, m, sizeof(m32)); 4413 *rb_list = m32.m_rb_lnk; 4414 } else 4415 #endif 4416 *rb_list = m->m_rb_lnk; 4417 } 4418 4419 static int 4420 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4421 { 4422 struct umutex m; 4423 int error; 4424 4425 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4426 error = copyin((void *)rbp, &m, sizeof(m)); 4427 if (error != 0) 4428 return (error); 4429 if (rb_list != NULL) 4430 umtx_read_rb_list(td, &m, rb_list); 4431 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4432 return (EINVAL); 4433 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4434 /* inact is cleared after unlock, allow the inconsistency */ 4435 return (inact ? 0 : EINVAL); 4436 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4437 } 4438 4439 static void 4440 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4441 const char *name) 4442 { 4443 int error, i; 4444 uintptr_t rbp; 4445 bool inact; 4446 4447 if (rb_list == 0) 4448 return; 4449 error = umtx_read_uptr(td, rb_list, &rbp); 4450 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4451 if (rbp == *rb_inact) { 4452 inact = true; 4453 *rb_inact = 0; 4454 } else 4455 inact = false; 4456 error = umtx_handle_rb(td, rbp, &rbp, inact); 4457 } 4458 if (i == umtx_max_rb && umtx_verbose_rb) { 4459 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4460 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4461 } 4462 if (error != 0 && umtx_verbose_rb) { 4463 uprintf("comm %s pid %d: handling %srb error %d\n", 4464 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4465 } 4466 } 4467 4468 /* 4469 * Clean up umtx data. 4470 */ 4471 static void 4472 umtx_thread_cleanup(struct thread *td) 4473 { 4474 struct umtx_q *uq; 4475 struct umtx_pi *pi; 4476 uintptr_t rb_inact; 4477 4478 /* 4479 * Disown pi mutexes. 4480 */ 4481 uq = td->td_umtxq; 4482 if (uq != NULL) { 4483 mtx_lock(&umtx_lock); 4484 uq->uq_inherited_pri = PRI_MAX; 4485 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4486 pi->pi_owner = NULL; 4487 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4488 } 4489 mtx_unlock(&umtx_lock); 4490 thread_lock(td); 4491 sched_lend_user_prio(td, PRI_MAX); 4492 thread_unlock(td); 4493 } 4494 4495 /* 4496 * Handle terminated robust mutexes. Must be done after 4497 * robust pi disown, otherwise unlock could see unowned 4498 * entries. 4499 */ 4500 rb_inact = td->td_rb_inact; 4501 if (rb_inact != 0) 4502 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4503 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4504 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4505 if (rb_inact != 0) 4506 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4507 } 4508