1 /*- 2 * Copyright (c) 2015, 2016 The FreeBSD Foundation 3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 #include "opt_umtx_profiling.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mman.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/resource.h> 51 #include <sys/resourcevar.h> 52 #include <sys/rwlock.h> 53 #include <sys/sbuf.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/eventhandler.h> 63 #include <sys/umtx.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 73 #include <machine/cpu.h> 74 75 #ifdef COMPAT_FREEBSD32 76 #include <compat/freebsd32/freebsd32_proto.h> 77 #endif 78 79 #define _UMUTEX_TRY 1 80 #define _UMUTEX_WAIT 2 81 82 #ifdef UMTX_PROFILING 83 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 84 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 85 #endif 86 87 /* Priority inheritance mutex info. */ 88 struct umtx_pi { 89 /* Owner thread */ 90 struct thread *pi_owner; 91 92 /* Reference count */ 93 int pi_refcount; 94 95 /* List entry to link umtx holding by thread */ 96 TAILQ_ENTRY(umtx_pi) pi_link; 97 98 /* List entry in hash */ 99 TAILQ_ENTRY(umtx_pi) pi_hashlink; 100 101 /* List for waiters */ 102 TAILQ_HEAD(,umtx_q) pi_blocked; 103 104 /* Identify a userland lock object */ 105 struct umtx_key pi_key; 106 }; 107 108 /* A userland synchronous object user. */ 109 struct umtx_q { 110 /* Linked list for the hash. */ 111 TAILQ_ENTRY(umtx_q) uq_link; 112 113 /* Umtx key. */ 114 struct umtx_key uq_key; 115 116 /* Umtx flags. */ 117 int uq_flags; 118 #define UQF_UMTXQ 0x0001 119 120 /* The thread waits on. */ 121 struct thread *uq_thread; 122 123 /* 124 * Blocked on PI mutex. read can use chain lock 125 * or umtx_lock, write must have both chain lock and 126 * umtx_lock being hold. 127 */ 128 struct umtx_pi *uq_pi_blocked; 129 130 /* On blocked list */ 131 TAILQ_ENTRY(umtx_q) uq_lockq; 132 133 /* Thread contending with us */ 134 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 135 136 /* Inherited priority from PP mutex */ 137 u_char uq_inherited_pri; 138 139 /* Spare queue ready to be reused */ 140 struct umtxq_queue *uq_spare_queue; 141 142 /* The queue we on */ 143 struct umtxq_queue *uq_cur_queue; 144 }; 145 146 TAILQ_HEAD(umtxq_head, umtx_q); 147 148 /* Per-key wait-queue */ 149 struct umtxq_queue { 150 struct umtxq_head head; 151 struct umtx_key key; 152 LIST_ENTRY(umtxq_queue) link; 153 int length; 154 }; 155 156 LIST_HEAD(umtxq_list, umtxq_queue); 157 158 /* Userland lock object's wait-queue chain */ 159 struct umtxq_chain { 160 /* Lock for this chain. */ 161 struct mtx uc_lock; 162 163 /* List of sleep queues. */ 164 struct umtxq_list uc_queue[2]; 165 #define UMTX_SHARED_QUEUE 0 166 #define UMTX_EXCLUSIVE_QUEUE 1 167 168 LIST_HEAD(, umtxq_queue) uc_spare_queue; 169 170 /* Busy flag */ 171 char uc_busy; 172 173 /* Chain lock waiters */ 174 int uc_waiters; 175 176 /* All PI in the list */ 177 TAILQ_HEAD(,umtx_pi) uc_pi_list; 178 179 #ifdef UMTX_PROFILING 180 u_int length; 181 u_int max_length; 182 #endif 183 }; 184 185 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 186 187 /* 188 * Don't propagate time-sharing priority, there is a security reason, 189 * a user can simply introduce PI-mutex, let thread A lock the mutex, 190 * and let another thread B block on the mutex, because B is 191 * sleeping, its priority will be boosted, this causes A's priority to 192 * be boosted via priority propagating too and will never be lowered even 193 * if it is using 100%CPU, this is unfair to other processes. 194 */ 195 196 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 197 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 198 PRI_MAX_TIMESHARE : (td)->td_user_pri) 199 200 #define GOLDEN_RATIO_PRIME 2654404609U 201 #define UMTX_CHAINS 512 202 #define UMTX_SHIFTS (__WORD_BIT - 9) 203 204 #define GET_SHARE(flags) \ 205 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 206 207 #define BUSY_SPINS 200 208 209 struct abs_timeout { 210 int clockid; 211 struct timespec cur; 212 struct timespec end; 213 }; 214 215 #ifdef COMPAT_FREEBSD32 216 struct umutex32 { 217 volatile __lwpid_t m_owner; /* Owner of the mutex */ 218 __uint32_t m_flags; /* Flags of the mutex */ 219 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 220 __uint32_t m_rb_lnk; /* Robust linkage */ 221 __uint32_t m_pad; 222 __uint32_t m_spare[2]; 223 }; 224 225 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 226 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 227 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 228 #endif 229 230 int umtx_shm_vnobj_persistent = 0; 231 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 232 &umtx_shm_vnobj_persistent, 0, 233 "False forces destruction of umtx attached to file, on last close"); 234 static int umtx_max_rb = 1000; 235 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 236 &umtx_max_rb, 0, 237 ""); 238 239 static uma_zone_t umtx_pi_zone; 240 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 241 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 242 static int umtx_pi_allocated; 243 244 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 245 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 246 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 247 static int umtx_verbose_rb = 1; 248 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 249 &umtx_verbose_rb, 0, 250 ""); 251 252 #ifdef UMTX_PROFILING 253 static long max_length; 254 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 255 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 256 #endif 257 258 static void umtx_shm_init(void); 259 static void umtxq_sysinit(void *); 260 static void umtxq_hash(struct umtx_key *key); 261 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 262 static void umtxq_lock(struct umtx_key *key); 263 static void umtxq_unlock(struct umtx_key *key); 264 static void umtxq_busy(struct umtx_key *key); 265 static void umtxq_unbusy(struct umtx_key *key); 266 static void umtxq_insert_queue(struct umtx_q *uq, int q); 267 static void umtxq_remove_queue(struct umtx_q *uq, int q); 268 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 269 static int umtxq_count(struct umtx_key *key); 270 static struct umtx_pi *umtx_pi_alloc(int); 271 static void umtx_pi_free(struct umtx_pi *pi); 272 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 273 bool rb); 274 static void umtx_thread_cleanup(struct thread *td); 275 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 276 struct image_params *imgp __unused); 277 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 278 279 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 280 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 281 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 282 283 static struct mtx umtx_lock; 284 285 #ifdef UMTX_PROFILING 286 static void 287 umtx_init_profiling(void) 288 { 289 struct sysctl_oid *chain_oid; 290 char chain_name[10]; 291 int i; 292 293 for (i = 0; i < UMTX_CHAINS; ++i) { 294 snprintf(chain_name, sizeof(chain_name), "%d", i); 295 chain_oid = SYSCTL_ADD_NODE(NULL, 296 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 297 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 298 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 299 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 300 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 301 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 302 } 303 } 304 305 static int 306 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 307 { 308 char buf[512]; 309 struct sbuf sb; 310 struct umtxq_chain *uc; 311 u_int fract, i, j, tot, whole; 312 u_int sf0, sf1, sf2, sf3, sf4; 313 u_int si0, si1, si2, si3, si4; 314 u_int sw0, sw1, sw2, sw3, sw4; 315 316 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 317 for (i = 0; i < 2; i++) { 318 tot = 0; 319 for (j = 0; j < UMTX_CHAINS; ++j) { 320 uc = &umtxq_chains[i][j]; 321 mtx_lock(&uc->uc_lock); 322 tot += uc->max_length; 323 mtx_unlock(&uc->uc_lock); 324 } 325 if (tot == 0) 326 sbuf_printf(&sb, "%u) Empty ", i); 327 else { 328 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 329 si0 = si1 = si2 = si3 = si4 = 0; 330 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 331 for (j = 0; j < UMTX_CHAINS; j++) { 332 uc = &umtxq_chains[i][j]; 333 mtx_lock(&uc->uc_lock); 334 whole = uc->max_length * 100; 335 mtx_unlock(&uc->uc_lock); 336 fract = (whole % tot) * 100; 337 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 338 sf0 = fract; 339 si0 = j; 340 sw0 = whole; 341 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 342 sf1)) { 343 sf1 = fract; 344 si1 = j; 345 sw1 = whole; 346 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 347 sf2)) { 348 sf2 = fract; 349 si2 = j; 350 sw2 = whole; 351 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 352 sf3)) { 353 sf3 = fract; 354 si3 = j; 355 sw3 = whole; 356 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 357 sf4)) { 358 sf4 = fract; 359 si4 = j; 360 sw4 = whole; 361 } 362 } 363 sbuf_printf(&sb, "queue %u:\n", i); 364 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 365 sf0 / tot, si0); 366 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 367 sf1 / tot, si1); 368 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 369 sf2 / tot, si2); 370 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 371 sf3 / tot, si3); 372 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 373 sf4 / tot, si4); 374 } 375 } 376 sbuf_trim(&sb); 377 sbuf_finish(&sb); 378 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 379 sbuf_delete(&sb); 380 return (0); 381 } 382 383 static int 384 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 385 { 386 struct umtxq_chain *uc; 387 u_int i, j; 388 int clear, error; 389 390 clear = 0; 391 error = sysctl_handle_int(oidp, &clear, 0, req); 392 if (error != 0 || req->newptr == NULL) 393 return (error); 394 395 if (clear != 0) { 396 for (i = 0; i < 2; ++i) { 397 for (j = 0; j < UMTX_CHAINS; ++j) { 398 uc = &umtxq_chains[i][j]; 399 mtx_lock(&uc->uc_lock); 400 uc->length = 0; 401 uc->max_length = 0; 402 mtx_unlock(&uc->uc_lock); 403 } 404 } 405 } 406 return (0); 407 } 408 409 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 410 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 411 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 412 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 413 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 414 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 415 #endif 416 417 static void 418 umtxq_sysinit(void *arg __unused) 419 { 420 int i, j; 421 422 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 423 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 424 for (i = 0; i < 2; ++i) { 425 for (j = 0; j < UMTX_CHAINS; ++j) { 426 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 427 MTX_DEF | MTX_DUPOK); 428 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 429 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 430 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 431 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 432 umtxq_chains[i][j].uc_busy = 0; 433 umtxq_chains[i][j].uc_waiters = 0; 434 #ifdef UMTX_PROFILING 435 umtxq_chains[i][j].length = 0; 436 umtxq_chains[i][j].max_length = 0; 437 #endif 438 } 439 } 440 #ifdef UMTX_PROFILING 441 umtx_init_profiling(); 442 #endif 443 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 444 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 445 EVENTHANDLER_PRI_ANY); 446 umtx_shm_init(); 447 } 448 449 struct umtx_q * 450 umtxq_alloc(void) 451 { 452 struct umtx_q *uq; 453 454 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 455 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 456 M_WAITOK | M_ZERO); 457 TAILQ_INIT(&uq->uq_spare_queue->head); 458 TAILQ_INIT(&uq->uq_pi_contested); 459 uq->uq_inherited_pri = PRI_MAX; 460 return (uq); 461 } 462 463 void 464 umtxq_free(struct umtx_q *uq) 465 { 466 467 MPASS(uq->uq_spare_queue != NULL); 468 free(uq->uq_spare_queue, M_UMTX); 469 free(uq, M_UMTX); 470 } 471 472 static inline void 473 umtxq_hash(struct umtx_key *key) 474 { 475 unsigned n; 476 477 n = (uintptr_t)key->info.both.a + key->info.both.b; 478 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 479 } 480 481 static inline struct umtxq_chain * 482 umtxq_getchain(struct umtx_key *key) 483 { 484 485 if (key->type <= TYPE_SEM) 486 return (&umtxq_chains[1][key->hash]); 487 return (&umtxq_chains[0][key->hash]); 488 } 489 490 /* 491 * Lock a chain. 492 */ 493 static inline void 494 umtxq_lock(struct umtx_key *key) 495 { 496 struct umtxq_chain *uc; 497 498 uc = umtxq_getchain(key); 499 mtx_lock(&uc->uc_lock); 500 } 501 502 /* 503 * Unlock a chain. 504 */ 505 static inline void 506 umtxq_unlock(struct umtx_key *key) 507 { 508 struct umtxq_chain *uc; 509 510 uc = umtxq_getchain(key); 511 mtx_unlock(&uc->uc_lock); 512 } 513 514 /* 515 * Set chain to busy state when following operation 516 * may be blocked (kernel mutex can not be used). 517 */ 518 static inline void 519 umtxq_busy(struct umtx_key *key) 520 { 521 struct umtxq_chain *uc; 522 523 uc = umtxq_getchain(key); 524 mtx_assert(&uc->uc_lock, MA_OWNED); 525 if (uc->uc_busy) { 526 #ifdef SMP 527 if (smp_cpus > 1) { 528 int count = BUSY_SPINS; 529 if (count > 0) { 530 umtxq_unlock(key); 531 while (uc->uc_busy && --count > 0) 532 cpu_spinwait(); 533 umtxq_lock(key); 534 } 535 } 536 #endif 537 while (uc->uc_busy) { 538 uc->uc_waiters++; 539 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 540 uc->uc_waiters--; 541 } 542 } 543 uc->uc_busy = 1; 544 } 545 546 /* 547 * Unbusy a chain. 548 */ 549 static inline void 550 umtxq_unbusy(struct umtx_key *key) 551 { 552 struct umtxq_chain *uc; 553 554 uc = umtxq_getchain(key); 555 mtx_assert(&uc->uc_lock, MA_OWNED); 556 KASSERT(uc->uc_busy != 0, ("not busy")); 557 uc->uc_busy = 0; 558 if (uc->uc_waiters) 559 wakeup_one(uc); 560 } 561 562 static inline void 563 umtxq_unbusy_unlocked(struct umtx_key *key) 564 { 565 566 umtxq_lock(key); 567 umtxq_unbusy(key); 568 umtxq_unlock(key); 569 } 570 571 static struct umtxq_queue * 572 umtxq_queue_lookup(struct umtx_key *key, int q) 573 { 574 struct umtxq_queue *uh; 575 struct umtxq_chain *uc; 576 577 uc = umtxq_getchain(key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 580 if (umtx_key_match(&uh->key, key)) 581 return (uh); 582 } 583 584 return (NULL); 585 } 586 587 static inline void 588 umtxq_insert_queue(struct umtx_q *uq, int q) 589 { 590 struct umtxq_queue *uh; 591 struct umtxq_chain *uc; 592 593 uc = umtxq_getchain(&uq->uq_key); 594 UMTXQ_LOCKED_ASSERT(uc); 595 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 596 uh = umtxq_queue_lookup(&uq->uq_key, q); 597 if (uh != NULL) { 598 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 599 } else { 600 uh = uq->uq_spare_queue; 601 uh->key = uq->uq_key; 602 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 603 #ifdef UMTX_PROFILING 604 uc->length++; 605 if (uc->length > uc->max_length) { 606 uc->max_length = uc->length; 607 if (uc->max_length > max_length) 608 max_length = uc->max_length; 609 } 610 #endif 611 } 612 uq->uq_spare_queue = NULL; 613 614 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 615 uh->length++; 616 uq->uq_flags |= UQF_UMTXQ; 617 uq->uq_cur_queue = uh; 618 return; 619 } 620 621 static inline void 622 umtxq_remove_queue(struct umtx_q *uq, int q) 623 { 624 struct umtxq_chain *uc; 625 struct umtxq_queue *uh; 626 627 uc = umtxq_getchain(&uq->uq_key); 628 UMTXQ_LOCKED_ASSERT(uc); 629 if (uq->uq_flags & UQF_UMTXQ) { 630 uh = uq->uq_cur_queue; 631 TAILQ_REMOVE(&uh->head, uq, uq_link); 632 uh->length--; 633 uq->uq_flags &= ~UQF_UMTXQ; 634 if (TAILQ_EMPTY(&uh->head)) { 635 KASSERT(uh->length == 0, 636 ("inconsistent umtxq_queue length")); 637 #ifdef UMTX_PROFILING 638 uc->length--; 639 #endif 640 LIST_REMOVE(uh, link); 641 } else { 642 uh = LIST_FIRST(&uc->uc_spare_queue); 643 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 644 LIST_REMOVE(uh, link); 645 } 646 uq->uq_spare_queue = uh; 647 uq->uq_cur_queue = NULL; 648 } 649 } 650 651 /* 652 * Check if there are multiple waiters 653 */ 654 static int 655 umtxq_count(struct umtx_key *key) 656 { 657 struct umtxq_chain *uc; 658 struct umtxq_queue *uh; 659 660 uc = umtxq_getchain(key); 661 UMTXQ_LOCKED_ASSERT(uc); 662 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 663 if (uh != NULL) 664 return (uh->length); 665 return (0); 666 } 667 668 /* 669 * Check if there are multiple PI waiters and returns first 670 * waiter. 671 */ 672 static int 673 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 674 { 675 struct umtxq_chain *uc; 676 struct umtxq_queue *uh; 677 678 *first = NULL; 679 uc = umtxq_getchain(key); 680 UMTXQ_LOCKED_ASSERT(uc); 681 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 682 if (uh != NULL) { 683 *first = TAILQ_FIRST(&uh->head); 684 return (uh->length); 685 } 686 return (0); 687 } 688 689 static int 690 umtxq_check_susp(struct thread *td) 691 { 692 struct proc *p; 693 int error; 694 695 /* 696 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 697 * eventually break the lockstep loop. 698 */ 699 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 700 return (0); 701 error = 0; 702 p = td->td_proc; 703 PROC_LOCK(p); 704 if (P_SHOULDSTOP(p) || 705 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 706 if (p->p_flag & P_SINGLE_EXIT) 707 error = EINTR; 708 else 709 error = ERESTART; 710 } 711 PROC_UNLOCK(p); 712 return (error); 713 } 714 715 /* 716 * Wake up threads waiting on an userland object. 717 */ 718 719 static int 720 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 721 { 722 struct umtxq_chain *uc; 723 struct umtxq_queue *uh; 724 struct umtx_q *uq; 725 int ret; 726 727 ret = 0; 728 uc = umtxq_getchain(key); 729 UMTXQ_LOCKED_ASSERT(uc); 730 uh = umtxq_queue_lookup(key, q); 731 if (uh != NULL) { 732 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 733 umtxq_remove_queue(uq, q); 734 wakeup(uq); 735 if (++ret >= n_wake) 736 return (ret); 737 } 738 } 739 return (ret); 740 } 741 742 743 /* 744 * Wake up specified thread. 745 */ 746 static inline void 747 umtxq_signal_thread(struct umtx_q *uq) 748 { 749 struct umtxq_chain *uc; 750 751 uc = umtxq_getchain(&uq->uq_key); 752 UMTXQ_LOCKED_ASSERT(uc); 753 umtxq_remove(uq); 754 wakeup(uq); 755 } 756 757 static inline int 758 tstohz(const struct timespec *tsp) 759 { 760 struct timeval tv; 761 762 TIMESPEC_TO_TIMEVAL(&tv, tsp); 763 return tvtohz(&tv); 764 } 765 766 static void 767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 768 const struct timespec *timeout) 769 { 770 771 timo->clockid = clockid; 772 if (!absolute) { 773 kern_clock_gettime(curthread, clockid, &timo->end); 774 timo->cur = timo->end; 775 timespecadd(&timo->end, timeout); 776 } else { 777 timo->end = *timeout; 778 kern_clock_gettime(curthread, clockid, &timo->cur); 779 } 780 } 781 782 static void 783 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 784 { 785 786 abs_timeout_init(timo, umtxtime->_clockid, 787 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 788 } 789 790 static inline void 791 abs_timeout_update(struct abs_timeout *timo) 792 { 793 794 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 795 } 796 797 static int 798 abs_timeout_gethz(struct abs_timeout *timo) 799 { 800 struct timespec tts; 801 802 if (timespeccmp(&timo->end, &timo->cur, <=)) 803 return (-1); 804 tts = timo->end; 805 timespecsub(&tts, &timo->cur); 806 return (tstohz(&tts)); 807 } 808 809 static uint32_t 810 umtx_unlock_val(uint32_t flags, bool rb) 811 { 812 813 if (rb) 814 return (UMUTEX_RB_OWNERDEAD); 815 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 816 return (UMUTEX_RB_NOTRECOV); 817 else 818 return (UMUTEX_UNOWNED); 819 820 } 821 822 /* 823 * Put thread into sleep state, before sleeping, check if 824 * thread was removed from umtx queue. 825 */ 826 static inline int 827 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 828 { 829 struct umtxq_chain *uc; 830 int error, timo; 831 832 uc = umtxq_getchain(&uq->uq_key); 833 UMTXQ_LOCKED_ASSERT(uc); 834 for (;;) { 835 if (!(uq->uq_flags & UQF_UMTXQ)) 836 return (0); 837 if (abstime != NULL) { 838 timo = abs_timeout_gethz(abstime); 839 if (timo < 0) 840 return (ETIMEDOUT); 841 } else 842 timo = 0; 843 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 844 if (error != EWOULDBLOCK) { 845 umtxq_lock(&uq->uq_key); 846 break; 847 } 848 if (abstime != NULL) 849 abs_timeout_update(abstime); 850 umtxq_lock(&uq->uq_key); 851 } 852 return (error); 853 } 854 855 /* 856 * Convert userspace address into unique logical address. 857 */ 858 int 859 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 860 { 861 struct thread *td = curthread; 862 vm_map_t map; 863 vm_map_entry_t entry; 864 vm_pindex_t pindex; 865 vm_prot_t prot; 866 boolean_t wired; 867 868 key->type = type; 869 if (share == THREAD_SHARE) { 870 key->shared = 0; 871 key->info.private.vs = td->td_proc->p_vmspace; 872 key->info.private.addr = (uintptr_t)addr; 873 } else { 874 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 875 map = &td->td_proc->p_vmspace->vm_map; 876 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 877 &entry, &key->info.shared.object, &pindex, &prot, 878 &wired) != KERN_SUCCESS) { 879 return (EFAULT); 880 } 881 882 if ((share == PROCESS_SHARE) || 883 (share == AUTO_SHARE && 884 VM_INHERIT_SHARE == entry->inheritance)) { 885 key->shared = 1; 886 key->info.shared.offset = (vm_offset_t)addr - 887 entry->start + entry->offset; 888 vm_object_reference(key->info.shared.object); 889 } else { 890 key->shared = 0; 891 key->info.private.vs = td->td_proc->p_vmspace; 892 key->info.private.addr = (uintptr_t)addr; 893 } 894 vm_map_lookup_done(map, entry); 895 } 896 897 umtxq_hash(key); 898 return (0); 899 } 900 901 /* 902 * Release key. 903 */ 904 void 905 umtx_key_release(struct umtx_key *key) 906 { 907 if (key->shared) 908 vm_object_deallocate(key->info.shared.object); 909 } 910 911 /* 912 * Fetch and compare value, sleep on the address if value is not changed. 913 */ 914 static int 915 do_wait(struct thread *td, void *addr, u_long id, 916 struct _umtx_time *timeout, int compat32, int is_private) 917 { 918 struct abs_timeout timo; 919 struct umtx_q *uq; 920 u_long tmp; 921 uint32_t tmp32; 922 int error = 0; 923 924 uq = td->td_umtxq; 925 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 926 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 927 return (error); 928 929 if (timeout != NULL) 930 abs_timeout_init2(&timo, timeout); 931 932 umtxq_lock(&uq->uq_key); 933 umtxq_insert(uq); 934 umtxq_unlock(&uq->uq_key); 935 if (compat32 == 0) { 936 error = fueword(addr, &tmp); 937 if (error != 0) 938 error = EFAULT; 939 } else { 940 error = fueword32(addr, &tmp32); 941 if (error == 0) 942 tmp = tmp32; 943 else 944 error = EFAULT; 945 } 946 umtxq_lock(&uq->uq_key); 947 if (error == 0) { 948 if (tmp == id) 949 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 950 NULL : &timo); 951 if ((uq->uq_flags & UQF_UMTXQ) == 0) 952 error = 0; 953 else 954 umtxq_remove(uq); 955 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 956 umtxq_remove(uq); 957 } 958 umtxq_unlock(&uq->uq_key); 959 umtx_key_release(&uq->uq_key); 960 if (error == ERESTART) 961 error = EINTR; 962 return (error); 963 } 964 965 /* 966 * Wake up threads sleeping on the specified address. 967 */ 968 int 969 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 970 { 971 struct umtx_key key; 972 int ret; 973 974 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 975 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 976 return (ret); 977 umtxq_lock(&key); 978 umtxq_signal(&key, n_wake); 979 umtxq_unlock(&key); 980 umtx_key_release(&key); 981 return (0); 982 } 983 984 /* 985 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 986 */ 987 static int 988 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 989 struct _umtx_time *timeout, int mode) 990 { 991 struct abs_timeout timo; 992 struct umtx_q *uq; 993 uint32_t owner, old, id; 994 int error, rv; 995 996 id = td->td_tid; 997 uq = td->td_umtxq; 998 error = 0; 999 if (timeout != NULL) 1000 abs_timeout_init2(&timo, timeout); 1001 1002 /* 1003 * Care must be exercised when dealing with umtx structure. It 1004 * can fault on any access. 1005 */ 1006 for (;;) { 1007 rv = fueword32(&m->m_owner, &owner); 1008 if (rv == -1) 1009 return (EFAULT); 1010 if (mode == _UMUTEX_WAIT) { 1011 if (owner == UMUTEX_UNOWNED || 1012 owner == UMUTEX_CONTESTED || 1013 owner == UMUTEX_RB_OWNERDEAD || 1014 owner == UMUTEX_RB_NOTRECOV) 1015 return (0); 1016 } else { 1017 /* 1018 * Robust mutex terminated. Kernel duty is to 1019 * return EOWNERDEAD to the userspace. The 1020 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1021 * by the common userspace code. 1022 */ 1023 if (owner == UMUTEX_RB_OWNERDEAD) { 1024 rv = casueword32(&m->m_owner, 1025 UMUTEX_RB_OWNERDEAD, &owner, 1026 id | UMUTEX_CONTESTED); 1027 if (rv == -1) 1028 return (EFAULT); 1029 if (owner == UMUTEX_RB_OWNERDEAD) 1030 return (EOWNERDEAD); /* success */ 1031 rv = umtxq_check_susp(td); 1032 if (rv != 0) 1033 return (rv); 1034 continue; 1035 } 1036 if (owner == UMUTEX_RB_NOTRECOV) 1037 return (ENOTRECOVERABLE); 1038 1039 1040 /* 1041 * Try the uncontested case. This should be 1042 * done in userland. 1043 */ 1044 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1045 &owner, id); 1046 /* The address was invalid. */ 1047 if (rv == -1) 1048 return (EFAULT); 1049 1050 /* The acquire succeeded. */ 1051 if (owner == UMUTEX_UNOWNED) 1052 return (0); 1053 1054 /* 1055 * If no one owns it but it is contested try 1056 * to acquire it. 1057 */ 1058 if (owner == UMUTEX_CONTESTED) { 1059 rv = casueword32(&m->m_owner, 1060 UMUTEX_CONTESTED, &owner, 1061 id | UMUTEX_CONTESTED); 1062 /* The address was invalid. */ 1063 if (rv == -1) 1064 return (EFAULT); 1065 1066 if (owner == UMUTEX_CONTESTED) 1067 return (0); 1068 1069 rv = umtxq_check_susp(td); 1070 if (rv != 0) 1071 return (rv); 1072 1073 /* 1074 * If this failed the lock has 1075 * changed, restart. 1076 */ 1077 continue; 1078 } 1079 } 1080 1081 if (mode == _UMUTEX_TRY) 1082 return (EBUSY); 1083 1084 /* 1085 * If we caught a signal, we have retried and now 1086 * exit immediately. 1087 */ 1088 if (error != 0) 1089 return (error); 1090 1091 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1092 GET_SHARE(flags), &uq->uq_key)) != 0) 1093 return (error); 1094 1095 umtxq_lock(&uq->uq_key); 1096 umtxq_busy(&uq->uq_key); 1097 umtxq_insert(uq); 1098 umtxq_unlock(&uq->uq_key); 1099 1100 /* 1101 * Set the contested bit so that a release in user space 1102 * knows to use the system call for unlock. If this fails 1103 * either some one else has acquired the lock or it has been 1104 * released. 1105 */ 1106 rv = casueword32(&m->m_owner, owner, &old, 1107 owner | UMUTEX_CONTESTED); 1108 1109 /* The address was invalid. */ 1110 if (rv == -1) { 1111 umtxq_lock(&uq->uq_key); 1112 umtxq_remove(uq); 1113 umtxq_unbusy(&uq->uq_key); 1114 umtxq_unlock(&uq->uq_key); 1115 umtx_key_release(&uq->uq_key); 1116 return (EFAULT); 1117 } 1118 1119 /* 1120 * We set the contested bit, sleep. Otherwise the lock changed 1121 * and we need to retry or we lost a race to the thread 1122 * unlocking the umtx. 1123 */ 1124 umtxq_lock(&uq->uq_key); 1125 umtxq_unbusy(&uq->uq_key); 1126 if (old == owner) 1127 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1128 NULL : &timo); 1129 umtxq_remove(uq); 1130 umtxq_unlock(&uq->uq_key); 1131 umtx_key_release(&uq->uq_key); 1132 1133 if (error == 0) 1134 error = umtxq_check_susp(td); 1135 } 1136 1137 return (0); 1138 } 1139 1140 /* 1141 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1142 */ 1143 static int 1144 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1145 { 1146 struct umtx_key key; 1147 uint32_t owner, old, id, newlock; 1148 int error, count; 1149 1150 id = td->td_tid; 1151 /* 1152 * Make sure we own this mtx. 1153 */ 1154 error = fueword32(&m->m_owner, &owner); 1155 if (error == -1) 1156 return (EFAULT); 1157 1158 if ((owner & ~UMUTEX_CONTESTED) != id) 1159 return (EPERM); 1160 1161 newlock = umtx_unlock_val(flags, rb); 1162 if ((owner & UMUTEX_CONTESTED) == 0) { 1163 error = casueword32(&m->m_owner, owner, &old, newlock); 1164 if (error == -1) 1165 return (EFAULT); 1166 if (old == owner) 1167 return (0); 1168 owner = old; 1169 } 1170 1171 /* We should only ever be in here for contested locks */ 1172 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1173 &key)) != 0) 1174 return (error); 1175 1176 umtxq_lock(&key); 1177 umtxq_busy(&key); 1178 count = umtxq_count(&key); 1179 umtxq_unlock(&key); 1180 1181 /* 1182 * When unlocking the umtx, it must be marked as unowned if 1183 * there is zero or one thread only waiting for it. 1184 * Otherwise, it must be marked as contested. 1185 */ 1186 if (count > 1) 1187 newlock |= UMUTEX_CONTESTED; 1188 error = casueword32(&m->m_owner, owner, &old, newlock); 1189 umtxq_lock(&key); 1190 umtxq_signal(&key, 1); 1191 umtxq_unbusy(&key); 1192 umtxq_unlock(&key); 1193 umtx_key_release(&key); 1194 if (error == -1) 1195 return (EFAULT); 1196 if (old != owner) 1197 return (EINVAL); 1198 return (0); 1199 } 1200 1201 /* 1202 * Check if the mutex is available and wake up a waiter, 1203 * only for simple mutex. 1204 */ 1205 static int 1206 do_wake_umutex(struct thread *td, struct umutex *m) 1207 { 1208 struct umtx_key key; 1209 uint32_t owner; 1210 uint32_t flags; 1211 int error; 1212 int count; 1213 1214 error = fueword32(&m->m_owner, &owner); 1215 if (error == -1) 1216 return (EFAULT); 1217 1218 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1219 owner != UMUTEX_RB_NOTRECOV) 1220 return (0); 1221 1222 error = fueword32(&m->m_flags, &flags); 1223 if (error == -1) 1224 return (EFAULT); 1225 1226 /* We should only ever be in here for contested locks */ 1227 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1228 &key)) != 0) 1229 return (error); 1230 1231 umtxq_lock(&key); 1232 umtxq_busy(&key); 1233 count = umtxq_count(&key); 1234 umtxq_unlock(&key); 1235 1236 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1237 owner != UMUTEX_RB_NOTRECOV) { 1238 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1239 UMUTEX_UNOWNED); 1240 if (error == -1) 1241 error = EFAULT; 1242 } 1243 1244 umtxq_lock(&key); 1245 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1246 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1247 umtxq_signal(&key, 1); 1248 umtxq_unbusy(&key); 1249 umtxq_unlock(&key); 1250 umtx_key_release(&key); 1251 return (error); 1252 } 1253 1254 /* 1255 * Check if the mutex has waiters and tries to fix contention bit. 1256 */ 1257 static int 1258 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1259 { 1260 struct umtx_key key; 1261 uint32_t owner, old; 1262 int type; 1263 int error; 1264 int count; 1265 1266 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1267 UMUTEX_ROBUST)) { 1268 case 0: 1269 case UMUTEX_ROBUST: 1270 type = TYPE_NORMAL_UMUTEX; 1271 break; 1272 case UMUTEX_PRIO_INHERIT: 1273 type = TYPE_PI_UMUTEX; 1274 break; 1275 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1276 type = TYPE_PI_ROBUST_UMUTEX; 1277 break; 1278 case UMUTEX_PRIO_PROTECT: 1279 type = TYPE_PP_UMUTEX; 1280 break; 1281 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1282 type = TYPE_PP_ROBUST_UMUTEX; 1283 break; 1284 default: 1285 return (EINVAL); 1286 } 1287 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1288 return (error); 1289 1290 owner = 0; 1291 umtxq_lock(&key); 1292 umtxq_busy(&key); 1293 count = umtxq_count(&key); 1294 umtxq_unlock(&key); 1295 /* 1296 * Only repair contention bit if there is a waiter, this means the mutex 1297 * is still being referenced by userland code, otherwise don't update 1298 * any memory. 1299 */ 1300 if (count > 1) { 1301 error = fueword32(&m->m_owner, &owner); 1302 if (error == -1) 1303 error = EFAULT; 1304 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1305 error = casueword32(&m->m_owner, owner, &old, 1306 owner | UMUTEX_CONTESTED); 1307 if (error == -1) { 1308 error = EFAULT; 1309 break; 1310 } 1311 if (old == owner) 1312 break; 1313 owner = old; 1314 error = umtxq_check_susp(td); 1315 if (error != 0) 1316 break; 1317 } 1318 } else if (count == 1) { 1319 error = fueword32(&m->m_owner, &owner); 1320 if (error == -1) 1321 error = EFAULT; 1322 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1323 (owner & UMUTEX_CONTESTED) == 0) { 1324 error = casueword32(&m->m_owner, owner, &old, 1325 owner | UMUTEX_CONTESTED); 1326 if (error == -1) { 1327 error = EFAULT; 1328 break; 1329 } 1330 if (old == owner) 1331 break; 1332 owner = old; 1333 error = umtxq_check_susp(td); 1334 if (error != 0) 1335 break; 1336 } 1337 } 1338 umtxq_lock(&key); 1339 if (error == EFAULT) { 1340 umtxq_signal(&key, INT_MAX); 1341 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1342 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1343 umtxq_signal(&key, 1); 1344 umtxq_unbusy(&key); 1345 umtxq_unlock(&key); 1346 umtx_key_release(&key); 1347 return (error); 1348 } 1349 1350 static inline struct umtx_pi * 1351 umtx_pi_alloc(int flags) 1352 { 1353 struct umtx_pi *pi; 1354 1355 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1356 TAILQ_INIT(&pi->pi_blocked); 1357 atomic_add_int(&umtx_pi_allocated, 1); 1358 return (pi); 1359 } 1360 1361 static inline void 1362 umtx_pi_free(struct umtx_pi *pi) 1363 { 1364 uma_zfree(umtx_pi_zone, pi); 1365 atomic_add_int(&umtx_pi_allocated, -1); 1366 } 1367 1368 /* 1369 * Adjust the thread's position on a pi_state after its priority has been 1370 * changed. 1371 */ 1372 static int 1373 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1374 { 1375 struct umtx_q *uq, *uq1, *uq2; 1376 struct thread *td1; 1377 1378 mtx_assert(&umtx_lock, MA_OWNED); 1379 if (pi == NULL) 1380 return (0); 1381 1382 uq = td->td_umtxq; 1383 1384 /* 1385 * Check if the thread needs to be moved on the blocked chain. 1386 * It needs to be moved if either its priority is lower than 1387 * the previous thread or higher than the next thread. 1388 */ 1389 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1390 uq2 = TAILQ_NEXT(uq, uq_lockq); 1391 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1392 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1393 /* 1394 * Remove thread from blocked chain and determine where 1395 * it should be moved to. 1396 */ 1397 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1398 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1399 td1 = uq1->uq_thread; 1400 MPASS(td1->td_proc->p_magic == P_MAGIC); 1401 if (UPRI(td1) > UPRI(td)) 1402 break; 1403 } 1404 1405 if (uq1 == NULL) 1406 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1407 else 1408 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1409 } 1410 return (1); 1411 } 1412 1413 static struct umtx_pi * 1414 umtx_pi_next(struct umtx_pi *pi) 1415 { 1416 struct umtx_q *uq_owner; 1417 1418 if (pi->pi_owner == NULL) 1419 return (NULL); 1420 uq_owner = pi->pi_owner->td_umtxq; 1421 if (uq_owner == NULL) 1422 return (NULL); 1423 return (uq_owner->uq_pi_blocked); 1424 } 1425 1426 /* 1427 * Floyd's Cycle-Finding Algorithm. 1428 */ 1429 static bool 1430 umtx_pi_check_loop(struct umtx_pi *pi) 1431 { 1432 struct umtx_pi *pi1; /* fast iterator */ 1433 1434 mtx_assert(&umtx_lock, MA_OWNED); 1435 if (pi == NULL) 1436 return (false); 1437 pi1 = pi; 1438 for (;;) { 1439 pi = umtx_pi_next(pi); 1440 if (pi == NULL) 1441 break; 1442 pi1 = umtx_pi_next(pi1); 1443 if (pi1 == NULL) 1444 break; 1445 pi1 = umtx_pi_next(pi1); 1446 if (pi1 == NULL) 1447 break; 1448 if (pi == pi1) 1449 return (true); 1450 } 1451 return (false); 1452 } 1453 1454 /* 1455 * Propagate priority when a thread is blocked on POSIX 1456 * PI mutex. 1457 */ 1458 static void 1459 umtx_propagate_priority(struct thread *td) 1460 { 1461 struct umtx_q *uq; 1462 struct umtx_pi *pi; 1463 int pri; 1464 1465 mtx_assert(&umtx_lock, MA_OWNED); 1466 pri = UPRI(td); 1467 uq = td->td_umtxq; 1468 pi = uq->uq_pi_blocked; 1469 if (pi == NULL) 1470 return; 1471 if (umtx_pi_check_loop(pi)) 1472 return; 1473 1474 for (;;) { 1475 td = pi->pi_owner; 1476 if (td == NULL || td == curthread) 1477 return; 1478 1479 MPASS(td->td_proc != NULL); 1480 MPASS(td->td_proc->p_magic == P_MAGIC); 1481 1482 thread_lock(td); 1483 if (td->td_lend_user_pri > pri) 1484 sched_lend_user_prio(td, pri); 1485 else { 1486 thread_unlock(td); 1487 break; 1488 } 1489 thread_unlock(td); 1490 1491 /* 1492 * Pick up the lock that td is blocked on. 1493 */ 1494 uq = td->td_umtxq; 1495 pi = uq->uq_pi_blocked; 1496 if (pi == NULL) 1497 break; 1498 /* Resort td on the list if needed. */ 1499 umtx_pi_adjust_thread(pi, td); 1500 } 1501 } 1502 1503 /* 1504 * Unpropagate priority for a PI mutex when a thread blocked on 1505 * it is interrupted by signal or resumed by others. 1506 */ 1507 static void 1508 umtx_repropagate_priority(struct umtx_pi *pi) 1509 { 1510 struct umtx_q *uq, *uq_owner; 1511 struct umtx_pi *pi2; 1512 int pri; 1513 1514 mtx_assert(&umtx_lock, MA_OWNED); 1515 1516 if (umtx_pi_check_loop(pi)) 1517 return; 1518 while (pi != NULL && pi->pi_owner != NULL) { 1519 pri = PRI_MAX; 1520 uq_owner = pi->pi_owner->td_umtxq; 1521 1522 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1523 uq = TAILQ_FIRST(&pi2->pi_blocked); 1524 if (uq != NULL) { 1525 if (pri > UPRI(uq->uq_thread)) 1526 pri = UPRI(uq->uq_thread); 1527 } 1528 } 1529 1530 if (pri > uq_owner->uq_inherited_pri) 1531 pri = uq_owner->uq_inherited_pri; 1532 thread_lock(pi->pi_owner); 1533 sched_lend_user_prio(pi->pi_owner, pri); 1534 thread_unlock(pi->pi_owner); 1535 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1536 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1537 } 1538 } 1539 1540 /* 1541 * Insert a PI mutex into owned list. 1542 */ 1543 static void 1544 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1545 { 1546 struct umtx_q *uq_owner; 1547 1548 uq_owner = owner->td_umtxq; 1549 mtx_assert(&umtx_lock, MA_OWNED); 1550 if (pi->pi_owner != NULL) 1551 panic("pi_owner != NULL"); 1552 pi->pi_owner = owner; 1553 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1554 } 1555 1556 1557 /* 1558 * Disown a PI mutex, and remove it from the owned list. 1559 */ 1560 static void 1561 umtx_pi_disown(struct umtx_pi *pi) 1562 { 1563 1564 mtx_assert(&umtx_lock, MA_OWNED); 1565 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1566 pi->pi_owner = NULL; 1567 } 1568 1569 /* 1570 * Claim ownership of a PI mutex. 1571 */ 1572 static int 1573 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1574 { 1575 struct umtx_q *uq; 1576 int pri; 1577 1578 mtx_lock(&umtx_lock); 1579 if (pi->pi_owner == owner) { 1580 mtx_unlock(&umtx_lock); 1581 return (0); 1582 } 1583 1584 if (pi->pi_owner != NULL) { 1585 /* 1586 * userland may have already messed the mutex, sigh. 1587 */ 1588 mtx_unlock(&umtx_lock); 1589 return (EPERM); 1590 } 1591 umtx_pi_setowner(pi, owner); 1592 uq = TAILQ_FIRST(&pi->pi_blocked); 1593 if (uq != NULL) { 1594 pri = UPRI(uq->uq_thread); 1595 thread_lock(owner); 1596 if (pri < UPRI(owner)) 1597 sched_lend_user_prio(owner, pri); 1598 thread_unlock(owner); 1599 } 1600 mtx_unlock(&umtx_lock); 1601 return (0); 1602 } 1603 1604 /* 1605 * Adjust a thread's order position in its blocked PI mutex, 1606 * this may result new priority propagating process. 1607 */ 1608 void 1609 umtx_pi_adjust(struct thread *td, u_char oldpri) 1610 { 1611 struct umtx_q *uq; 1612 struct umtx_pi *pi; 1613 1614 uq = td->td_umtxq; 1615 mtx_lock(&umtx_lock); 1616 /* 1617 * Pick up the lock that td is blocked on. 1618 */ 1619 pi = uq->uq_pi_blocked; 1620 if (pi != NULL) { 1621 umtx_pi_adjust_thread(pi, td); 1622 umtx_repropagate_priority(pi); 1623 } 1624 mtx_unlock(&umtx_lock); 1625 } 1626 1627 /* 1628 * Sleep on a PI mutex. 1629 */ 1630 static int 1631 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1632 const char *wmesg, struct abs_timeout *timo, bool shared) 1633 { 1634 struct umtxq_chain *uc; 1635 struct thread *td, *td1; 1636 struct umtx_q *uq1; 1637 int error, pri; 1638 1639 error = 0; 1640 td = uq->uq_thread; 1641 KASSERT(td == curthread, ("inconsistent uq_thread")); 1642 uc = umtxq_getchain(&uq->uq_key); 1643 UMTXQ_LOCKED_ASSERT(uc); 1644 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1645 umtxq_insert(uq); 1646 mtx_lock(&umtx_lock); 1647 if (pi->pi_owner == NULL) { 1648 mtx_unlock(&umtx_lock); 1649 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1650 mtx_lock(&umtx_lock); 1651 if (td1 != NULL) { 1652 if (pi->pi_owner == NULL) 1653 umtx_pi_setowner(pi, td1); 1654 PROC_UNLOCK(td1->td_proc); 1655 } 1656 } 1657 1658 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1659 pri = UPRI(uq1->uq_thread); 1660 if (pri > UPRI(td)) 1661 break; 1662 } 1663 1664 if (uq1 != NULL) 1665 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1666 else 1667 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1668 1669 uq->uq_pi_blocked = pi; 1670 thread_lock(td); 1671 td->td_flags |= TDF_UPIBLOCKED; 1672 thread_unlock(td); 1673 umtx_propagate_priority(td); 1674 mtx_unlock(&umtx_lock); 1675 umtxq_unbusy(&uq->uq_key); 1676 1677 error = umtxq_sleep(uq, wmesg, timo); 1678 umtxq_remove(uq); 1679 1680 mtx_lock(&umtx_lock); 1681 uq->uq_pi_blocked = NULL; 1682 thread_lock(td); 1683 td->td_flags &= ~TDF_UPIBLOCKED; 1684 thread_unlock(td); 1685 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1686 umtx_repropagate_priority(pi); 1687 mtx_unlock(&umtx_lock); 1688 umtxq_unlock(&uq->uq_key); 1689 1690 return (error); 1691 } 1692 1693 /* 1694 * Add reference count for a PI mutex. 1695 */ 1696 static void 1697 umtx_pi_ref(struct umtx_pi *pi) 1698 { 1699 struct umtxq_chain *uc; 1700 1701 uc = umtxq_getchain(&pi->pi_key); 1702 UMTXQ_LOCKED_ASSERT(uc); 1703 pi->pi_refcount++; 1704 } 1705 1706 /* 1707 * Decrease reference count for a PI mutex, if the counter 1708 * is decreased to zero, its memory space is freed. 1709 */ 1710 static void 1711 umtx_pi_unref(struct umtx_pi *pi) 1712 { 1713 struct umtxq_chain *uc; 1714 1715 uc = umtxq_getchain(&pi->pi_key); 1716 UMTXQ_LOCKED_ASSERT(uc); 1717 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1718 if (--pi->pi_refcount == 0) { 1719 mtx_lock(&umtx_lock); 1720 if (pi->pi_owner != NULL) 1721 umtx_pi_disown(pi); 1722 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1723 ("blocked queue not empty")); 1724 mtx_unlock(&umtx_lock); 1725 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1726 umtx_pi_free(pi); 1727 } 1728 } 1729 1730 /* 1731 * Find a PI mutex in hash table. 1732 */ 1733 static struct umtx_pi * 1734 umtx_pi_lookup(struct umtx_key *key) 1735 { 1736 struct umtxq_chain *uc; 1737 struct umtx_pi *pi; 1738 1739 uc = umtxq_getchain(key); 1740 UMTXQ_LOCKED_ASSERT(uc); 1741 1742 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1743 if (umtx_key_match(&pi->pi_key, key)) { 1744 return (pi); 1745 } 1746 } 1747 return (NULL); 1748 } 1749 1750 /* 1751 * Insert a PI mutex into hash table. 1752 */ 1753 static inline void 1754 umtx_pi_insert(struct umtx_pi *pi) 1755 { 1756 struct umtxq_chain *uc; 1757 1758 uc = umtxq_getchain(&pi->pi_key); 1759 UMTXQ_LOCKED_ASSERT(uc); 1760 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1761 } 1762 1763 /* 1764 * Lock a PI mutex. 1765 */ 1766 static int 1767 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1768 struct _umtx_time *timeout, int try) 1769 { 1770 struct abs_timeout timo; 1771 struct umtx_q *uq; 1772 struct umtx_pi *pi, *new_pi; 1773 uint32_t id, old_owner, owner, old; 1774 int error, rv; 1775 1776 id = td->td_tid; 1777 uq = td->td_umtxq; 1778 1779 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1780 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1781 &uq->uq_key)) != 0) 1782 return (error); 1783 1784 if (timeout != NULL) 1785 abs_timeout_init2(&timo, timeout); 1786 1787 umtxq_lock(&uq->uq_key); 1788 pi = umtx_pi_lookup(&uq->uq_key); 1789 if (pi == NULL) { 1790 new_pi = umtx_pi_alloc(M_NOWAIT); 1791 if (new_pi == NULL) { 1792 umtxq_unlock(&uq->uq_key); 1793 new_pi = umtx_pi_alloc(M_WAITOK); 1794 umtxq_lock(&uq->uq_key); 1795 pi = umtx_pi_lookup(&uq->uq_key); 1796 if (pi != NULL) { 1797 umtx_pi_free(new_pi); 1798 new_pi = NULL; 1799 } 1800 } 1801 if (new_pi != NULL) { 1802 new_pi->pi_key = uq->uq_key; 1803 umtx_pi_insert(new_pi); 1804 pi = new_pi; 1805 } 1806 } 1807 umtx_pi_ref(pi); 1808 umtxq_unlock(&uq->uq_key); 1809 1810 /* 1811 * Care must be exercised when dealing with umtx structure. It 1812 * can fault on any access. 1813 */ 1814 for (;;) { 1815 /* 1816 * Try the uncontested case. This should be done in userland. 1817 */ 1818 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1819 /* The address was invalid. */ 1820 if (rv == -1) { 1821 error = EFAULT; 1822 break; 1823 } 1824 1825 /* The acquire succeeded. */ 1826 if (owner == UMUTEX_UNOWNED) { 1827 error = 0; 1828 break; 1829 } 1830 1831 if (owner == UMUTEX_RB_NOTRECOV) { 1832 error = ENOTRECOVERABLE; 1833 break; 1834 } 1835 1836 /* If no one owns it but it is contested try to acquire it. */ 1837 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1838 old_owner = owner; 1839 rv = casueword32(&m->m_owner, owner, &owner, 1840 id | UMUTEX_CONTESTED); 1841 /* The address was invalid. */ 1842 if (rv == -1) { 1843 error = EFAULT; 1844 break; 1845 } 1846 1847 if (owner == old_owner) { 1848 umtxq_lock(&uq->uq_key); 1849 umtxq_busy(&uq->uq_key); 1850 error = umtx_pi_claim(pi, td); 1851 umtxq_unbusy(&uq->uq_key); 1852 umtxq_unlock(&uq->uq_key); 1853 if (error != 0) { 1854 /* 1855 * Since we're going to return an 1856 * error, restore the m_owner to its 1857 * previous, unowned state to avoid 1858 * compounding the problem. 1859 */ 1860 (void)casuword32(&m->m_owner, 1861 id | UMUTEX_CONTESTED, 1862 old_owner); 1863 } 1864 if (error == 0 && 1865 old_owner == UMUTEX_RB_OWNERDEAD) 1866 error = EOWNERDEAD; 1867 break; 1868 } 1869 1870 error = umtxq_check_susp(td); 1871 if (error != 0) 1872 break; 1873 1874 /* If this failed the lock has changed, restart. */ 1875 continue; 1876 } 1877 1878 if ((owner & ~UMUTEX_CONTESTED) == id) { 1879 error = EDEADLK; 1880 break; 1881 } 1882 1883 if (try != 0) { 1884 error = EBUSY; 1885 break; 1886 } 1887 1888 /* 1889 * If we caught a signal, we have retried and now 1890 * exit immediately. 1891 */ 1892 if (error != 0) 1893 break; 1894 1895 umtxq_lock(&uq->uq_key); 1896 umtxq_busy(&uq->uq_key); 1897 umtxq_unlock(&uq->uq_key); 1898 1899 /* 1900 * Set the contested bit so that a release in user space 1901 * knows to use the system call for unlock. If this fails 1902 * either some one else has acquired the lock or it has been 1903 * released. 1904 */ 1905 rv = casueword32(&m->m_owner, owner, &old, owner | 1906 UMUTEX_CONTESTED); 1907 1908 /* The address was invalid. */ 1909 if (rv == -1) { 1910 umtxq_unbusy_unlocked(&uq->uq_key); 1911 error = EFAULT; 1912 break; 1913 } 1914 1915 umtxq_lock(&uq->uq_key); 1916 /* 1917 * We set the contested bit, sleep. Otherwise the lock changed 1918 * and we need to retry or we lost a race to the thread 1919 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1920 * value for owner is impossible there. 1921 */ 1922 if (old == owner) { 1923 error = umtxq_sleep_pi(uq, pi, 1924 owner & ~UMUTEX_CONTESTED, 1925 "umtxpi", timeout == NULL ? NULL : &timo, 1926 (flags & USYNC_PROCESS_SHARED) != 0); 1927 if (error != 0) 1928 continue; 1929 } else { 1930 umtxq_unbusy(&uq->uq_key); 1931 umtxq_unlock(&uq->uq_key); 1932 } 1933 1934 error = umtxq_check_susp(td); 1935 if (error != 0) 1936 break; 1937 } 1938 1939 umtxq_lock(&uq->uq_key); 1940 umtx_pi_unref(pi); 1941 umtxq_unlock(&uq->uq_key); 1942 1943 umtx_key_release(&uq->uq_key); 1944 return (error); 1945 } 1946 1947 /* 1948 * Unlock a PI mutex. 1949 */ 1950 static int 1951 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1952 { 1953 struct umtx_key key; 1954 struct umtx_q *uq_first, *uq_first2, *uq_me; 1955 struct umtx_pi *pi, *pi2; 1956 uint32_t id, new_owner, old, owner; 1957 int count, error, pri; 1958 1959 id = td->td_tid; 1960 /* 1961 * Make sure we own this mtx. 1962 */ 1963 error = fueword32(&m->m_owner, &owner); 1964 if (error == -1) 1965 return (EFAULT); 1966 1967 if ((owner & ~UMUTEX_CONTESTED) != id) 1968 return (EPERM); 1969 1970 new_owner = umtx_unlock_val(flags, rb); 1971 1972 /* This should be done in userland */ 1973 if ((owner & UMUTEX_CONTESTED) == 0) { 1974 error = casueword32(&m->m_owner, owner, &old, new_owner); 1975 if (error == -1) 1976 return (EFAULT); 1977 if (old == owner) 1978 return (0); 1979 owner = old; 1980 } 1981 1982 /* We should only ever be in here for contested locks */ 1983 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1984 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1985 &key)) != 0) 1986 return (error); 1987 1988 umtxq_lock(&key); 1989 umtxq_busy(&key); 1990 count = umtxq_count_pi(&key, &uq_first); 1991 if (uq_first != NULL) { 1992 mtx_lock(&umtx_lock); 1993 pi = uq_first->uq_pi_blocked; 1994 KASSERT(pi != NULL, ("pi == NULL?")); 1995 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 1996 mtx_unlock(&umtx_lock); 1997 umtxq_unbusy(&key); 1998 umtxq_unlock(&key); 1999 umtx_key_release(&key); 2000 /* userland messed the mutex */ 2001 return (EPERM); 2002 } 2003 uq_me = td->td_umtxq; 2004 if (pi->pi_owner == td) 2005 umtx_pi_disown(pi); 2006 /* get highest priority thread which is still sleeping. */ 2007 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2008 while (uq_first != NULL && 2009 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2010 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2011 } 2012 pri = PRI_MAX; 2013 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2014 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2015 if (uq_first2 != NULL) { 2016 if (pri > UPRI(uq_first2->uq_thread)) 2017 pri = UPRI(uq_first2->uq_thread); 2018 } 2019 } 2020 thread_lock(td); 2021 sched_lend_user_prio(td, pri); 2022 thread_unlock(td); 2023 mtx_unlock(&umtx_lock); 2024 if (uq_first) 2025 umtxq_signal_thread(uq_first); 2026 } else { 2027 pi = umtx_pi_lookup(&key); 2028 /* 2029 * A umtx_pi can exist if a signal or timeout removed the 2030 * last waiter from the umtxq, but there is still 2031 * a thread in do_lock_pi() holding the umtx_pi. 2032 */ 2033 if (pi != NULL) { 2034 /* 2035 * The umtx_pi can be unowned, such as when a thread 2036 * has just entered do_lock_pi(), allocated the 2037 * umtx_pi, and unlocked the umtxq. 2038 * If the current thread owns it, it must disown it. 2039 */ 2040 mtx_lock(&umtx_lock); 2041 if (pi->pi_owner == td) 2042 umtx_pi_disown(pi); 2043 mtx_unlock(&umtx_lock); 2044 } 2045 } 2046 umtxq_unlock(&key); 2047 2048 /* 2049 * When unlocking the umtx, it must be marked as unowned if 2050 * there is zero or one thread only waiting for it. 2051 * Otherwise, it must be marked as contested. 2052 */ 2053 2054 if (count > 1) 2055 new_owner |= UMUTEX_CONTESTED; 2056 error = casueword32(&m->m_owner, owner, &old, new_owner); 2057 2058 umtxq_unbusy_unlocked(&key); 2059 umtx_key_release(&key); 2060 if (error == -1) 2061 return (EFAULT); 2062 if (old != owner) 2063 return (EINVAL); 2064 return (0); 2065 } 2066 2067 /* 2068 * Lock a PP mutex. 2069 */ 2070 static int 2071 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2072 struct _umtx_time *timeout, int try) 2073 { 2074 struct abs_timeout timo; 2075 struct umtx_q *uq, *uq2; 2076 struct umtx_pi *pi; 2077 uint32_t ceiling; 2078 uint32_t owner, id; 2079 int error, pri, old_inherited_pri, su, rv; 2080 2081 id = td->td_tid; 2082 uq = td->td_umtxq; 2083 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2084 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2085 &uq->uq_key)) != 0) 2086 return (error); 2087 2088 if (timeout != NULL) 2089 abs_timeout_init2(&timo, timeout); 2090 2091 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2092 for (;;) { 2093 old_inherited_pri = uq->uq_inherited_pri; 2094 umtxq_lock(&uq->uq_key); 2095 umtxq_busy(&uq->uq_key); 2096 umtxq_unlock(&uq->uq_key); 2097 2098 rv = fueword32(&m->m_ceilings[0], &ceiling); 2099 if (rv == -1) { 2100 error = EFAULT; 2101 goto out; 2102 } 2103 ceiling = RTP_PRIO_MAX - ceiling; 2104 if (ceiling > RTP_PRIO_MAX) { 2105 error = EINVAL; 2106 goto out; 2107 } 2108 2109 mtx_lock(&umtx_lock); 2110 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2111 mtx_unlock(&umtx_lock); 2112 error = EINVAL; 2113 goto out; 2114 } 2115 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2116 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2117 thread_lock(td); 2118 if (uq->uq_inherited_pri < UPRI(td)) 2119 sched_lend_user_prio(td, uq->uq_inherited_pri); 2120 thread_unlock(td); 2121 } 2122 mtx_unlock(&umtx_lock); 2123 2124 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2125 id | UMUTEX_CONTESTED); 2126 /* The address was invalid. */ 2127 if (rv == -1) { 2128 error = EFAULT; 2129 break; 2130 } 2131 2132 if (owner == UMUTEX_CONTESTED) { 2133 error = 0; 2134 break; 2135 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2136 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2137 &owner, id | UMUTEX_CONTESTED); 2138 if (rv == -1) { 2139 error = EFAULT; 2140 break; 2141 } 2142 if (owner == UMUTEX_RB_OWNERDEAD) { 2143 error = EOWNERDEAD; /* success */ 2144 break; 2145 } 2146 error = 0; 2147 } else if (owner == UMUTEX_RB_NOTRECOV) { 2148 error = ENOTRECOVERABLE; 2149 break; 2150 } 2151 2152 if (try != 0) { 2153 error = EBUSY; 2154 break; 2155 } 2156 2157 /* 2158 * If we caught a signal, we have retried and now 2159 * exit immediately. 2160 */ 2161 if (error != 0) 2162 break; 2163 2164 umtxq_lock(&uq->uq_key); 2165 umtxq_insert(uq); 2166 umtxq_unbusy(&uq->uq_key); 2167 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2168 NULL : &timo); 2169 umtxq_remove(uq); 2170 umtxq_unlock(&uq->uq_key); 2171 2172 mtx_lock(&umtx_lock); 2173 uq->uq_inherited_pri = old_inherited_pri; 2174 pri = PRI_MAX; 2175 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2176 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2177 if (uq2 != NULL) { 2178 if (pri > UPRI(uq2->uq_thread)) 2179 pri = UPRI(uq2->uq_thread); 2180 } 2181 } 2182 if (pri > uq->uq_inherited_pri) 2183 pri = uq->uq_inherited_pri; 2184 thread_lock(td); 2185 sched_lend_user_prio(td, pri); 2186 thread_unlock(td); 2187 mtx_unlock(&umtx_lock); 2188 } 2189 2190 if (error != 0 && error != EOWNERDEAD) { 2191 mtx_lock(&umtx_lock); 2192 uq->uq_inherited_pri = old_inherited_pri; 2193 pri = PRI_MAX; 2194 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2195 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2196 if (uq2 != NULL) { 2197 if (pri > UPRI(uq2->uq_thread)) 2198 pri = UPRI(uq2->uq_thread); 2199 } 2200 } 2201 if (pri > uq->uq_inherited_pri) 2202 pri = uq->uq_inherited_pri; 2203 thread_lock(td); 2204 sched_lend_user_prio(td, pri); 2205 thread_unlock(td); 2206 mtx_unlock(&umtx_lock); 2207 } 2208 2209 out: 2210 umtxq_unbusy_unlocked(&uq->uq_key); 2211 umtx_key_release(&uq->uq_key); 2212 return (error); 2213 } 2214 2215 /* 2216 * Unlock a PP mutex. 2217 */ 2218 static int 2219 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2220 { 2221 struct umtx_key key; 2222 struct umtx_q *uq, *uq2; 2223 struct umtx_pi *pi; 2224 uint32_t id, owner, rceiling; 2225 int error, pri, new_inherited_pri, su; 2226 2227 id = td->td_tid; 2228 uq = td->td_umtxq; 2229 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2230 2231 /* 2232 * Make sure we own this mtx. 2233 */ 2234 error = fueword32(&m->m_owner, &owner); 2235 if (error == -1) 2236 return (EFAULT); 2237 2238 if ((owner & ~UMUTEX_CONTESTED) != id) 2239 return (EPERM); 2240 2241 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2242 if (error != 0) 2243 return (error); 2244 2245 if (rceiling == -1) 2246 new_inherited_pri = PRI_MAX; 2247 else { 2248 rceiling = RTP_PRIO_MAX - rceiling; 2249 if (rceiling > RTP_PRIO_MAX) 2250 return (EINVAL); 2251 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2252 } 2253 2254 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2255 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2256 &key)) != 0) 2257 return (error); 2258 umtxq_lock(&key); 2259 umtxq_busy(&key); 2260 umtxq_unlock(&key); 2261 /* 2262 * For priority protected mutex, always set unlocked state 2263 * to UMUTEX_CONTESTED, so that userland always enters kernel 2264 * to lock the mutex, it is necessary because thread priority 2265 * has to be adjusted for such mutex. 2266 */ 2267 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2268 UMUTEX_CONTESTED); 2269 2270 umtxq_lock(&key); 2271 if (error == 0) 2272 umtxq_signal(&key, 1); 2273 umtxq_unbusy(&key); 2274 umtxq_unlock(&key); 2275 2276 if (error == -1) 2277 error = EFAULT; 2278 else { 2279 mtx_lock(&umtx_lock); 2280 if (su != 0) 2281 uq->uq_inherited_pri = new_inherited_pri; 2282 pri = PRI_MAX; 2283 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2284 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2285 if (uq2 != NULL) { 2286 if (pri > UPRI(uq2->uq_thread)) 2287 pri = UPRI(uq2->uq_thread); 2288 } 2289 } 2290 if (pri > uq->uq_inherited_pri) 2291 pri = uq->uq_inherited_pri; 2292 thread_lock(td); 2293 sched_lend_user_prio(td, pri); 2294 thread_unlock(td); 2295 mtx_unlock(&umtx_lock); 2296 } 2297 umtx_key_release(&key); 2298 return (error); 2299 } 2300 2301 static int 2302 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2303 uint32_t *old_ceiling) 2304 { 2305 struct umtx_q *uq; 2306 uint32_t flags, id, owner, save_ceiling; 2307 int error, rv, rv1; 2308 2309 error = fueword32(&m->m_flags, &flags); 2310 if (error == -1) 2311 return (EFAULT); 2312 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2313 return (EINVAL); 2314 if (ceiling > RTP_PRIO_MAX) 2315 return (EINVAL); 2316 id = td->td_tid; 2317 uq = td->td_umtxq; 2318 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2319 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2320 &uq->uq_key)) != 0) 2321 return (error); 2322 for (;;) { 2323 umtxq_lock(&uq->uq_key); 2324 umtxq_busy(&uq->uq_key); 2325 umtxq_unlock(&uq->uq_key); 2326 2327 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2328 if (rv == -1) { 2329 error = EFAULT; 2330 break; 2331 } 2332 2333 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2334 id | UMUTEX_CONTESTED); 2335 if (rv == -1) { 2336 error = EFAULT; 2337 break; 2338 } 2339 2340 if (owner == UMUTEX_CONTESTED) { 2341 rv = suword32(&m->m_ceilings[0], ceiling); 2342 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2343 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2344 break; 2345 } 2346 2347 if ((owner & ~UMUTEX_CONTESTED) == id) { 2348 rv = suword32(&m->m_ceilings[0], ceiling); 2349 error = rv == 0 ? 0 : EFAULT; 2350 break; 2351 } 2352 2353 if (owner == UMUTEX_RB_OWNERDEAD) { 2354 error = EOWNERDEAD; 2355 break; 2356 } else if (owner == UMUTEX_RB_NOTRECOV) { 2357 error = ENOTRECOVERABLE; 2358 break; 2359 } 2360 2361 /* 2362 * If we caught a signal, we have retried and now 2363 * exit immediately. 2364 */ 2365 if (error != 0) 2366 break; 2367 2368 /* 2369 * We set the contested bit, sleep. Otherwise the lock changed 2370 * and we need to retry or we lost a race to the thread 2371 * unlocking the umtx. 2372 */ 2373 umtxq_lock(&uq->uq_key); 2374 umtxq_insert(uq); 2375 umtxq_unbusy(&uq->uq_key); 2376 error = umtxq_sleep(uq, "umtxpp", NULL); 2377 umtxq_remove(uq); 2378 umtxq_unlock(&uq->uq_key); 2379 } 2380 umtxq_lock(&uq->uq_key); 2381 if (error == 0) 2382 umtxq_signal(&uq->uq_key, INT_MAX); 2383 umtxq_unbusy(&uq->uq_key); 2384 umtxq_unlock(&uq->uq_key); 2385 umtx_key_release(&uq->uq_key); 2386 if (error == 0 && old_ceiling != NULL) { 2387 rv = suword32(old_ceiling, save_ceiling); 2388 error = rv == 0 ? 0 : EFAULT; 2389 } 2390 return (error); 2391 } 2392 2393 /* 2394 * Lock a userland POSIX mutex. 2395 */ 2396 static int 2397 do_lock_umutex(struct thread *td, struct umutex *m, 2398 struct _umtx_time *timeout, int mode) 2399 { 2400 uint32_t flags; 2401 int error; 2402 2403 error = fueword32(&m->m_flags, &flags); 2404 if (error == -1) 2405 return (EFAULT); 2406 2407 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2408 case 0: 2409 error = do_lock_normal(td, m, flags, timeout, mode); 2410 break; 2411 case UMUTEX_PRIO_INHERIT: 2412 error = do_lock_pi(td, m, flags, timeout, mode); 2413 break; 2414 case UMUTEX_PRIO_PROTECT: 2415 error = do_lock_pp(td, m, flags, timeout, mode); 2416 break; 2417 default: 2418 return (EINVAL); 2419 } 2420 if (timeout == NULL) { 2421 if (error == EINTR && mode != _UMUTEX_WAIT) 2422 error = ERESTART; 2423 } else { 2424 /* Timed-locking is not restarted. */ 2425 if (error == ERESTART) 2426 error = EINTR; 2427 } 2428 return (error); 2429 } 2430 2431 /* 2432 * Unlock a userland POSIX mutex. 2433 */ 2434 static int 2435 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2436 { 2437 uint32_t flags; 2438 int error; 2439 2440 error = fueword32(&m->m_flags, &flags); 2441 if (error == -1) 2442 return (EFAULT); 2443 2444 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2445 case 0: 2446 return (do_unlock_normal(td, m, flags, rb)); 2447 case UMUTEX_PRIO_INHERIT: 2448 return (do_unlock_pi(td, m, flags, rb)); 2449 case UMUTEX_PRIO_PROTECT: 2450 return (do_unlock_pp(td, m, flags, rb)); 2451 } 2452 2453 return (EINVAL); 2454 } 2455 2456 static int 2457 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2458 struct timespec *timeout, u_long wflags) 2459 { 2460 struct abs_timeout timo; 2461 struct umtx_q *uq; 2462 uint32_t flags, clockid, hasw; 2463 int error; 2464 2465 uq = td->td_umtxq; 2466 error = fueword32(&cv->c_flags, &flags); 2467 if (error == -1) 2468 return (EFAULT); 2469 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2470 if (error != 0) 2471 return (error); 2472 2473 if ((wflags & CVWAIT_CLOCKID) != 0) { 2474 error = fueword32(&cv->c_clockid, &clockid); 2475 if (error == -1) { 2476 umtx_key_release(&uq->uq_key); 2477 return (EFAULT); 2478 } 2479 if (clockid < CLOCK_REALTIME || 2480 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2481 /* hmm, only HW clock id will work. */ 2482 umtx_key_release(&uq->uq_key); 2483 return (EINVAL); 2484 } 2485 } else { 2486 clockid = CLOCK_REALTIME; 2487 } 2488 2489 umtxq_lock(&uq->uq_key); 2490 umtxq_busy(&uq->uq_key); 2491 umtxq_insert(uq); 2492 umtxq_unlock(&uq->uq_key); 2493 2494 /* 2495 * Set c_has_waiters to 1 before releasing user mutex, also 2496 * don't modify cache line when unnecessary. 2497 */ 2498 error = fueword32(&cv->c_has_waiters, &hasw); 2499 if (error == 0 && hasw == 0) 2500 suword32(&cv->c_has_waiters, 1); 2501 2502 umtxq_unbusy_unlocked(&uq->uq_key); 2503 2504 error = do_unlock_umutex(td, m, false); 2505 2506 if (timeout != NULL) 2507 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2508 timeout); 2509 2510 umtxq_lock(&uq->uq_key); 2511 if (error == 0) { 2512 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2513 NULL : &timo); 2514 } 2515 2516 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2517 error = 0; 2518 else { 2519 /* 2520 * This must be timeout,interrupted by signal or 2521 * surprious wakeup, clear c_has_waiter flag when 2522 * necessary. 2523 */ 2524 umtxq_busy(&uq->uq_key); 2525 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2526 int oldlen = uq->uq_cur_queue->length; 2527 umtxq_remove(uq); 2528 if (oldlen == 1) { 2529 umtxq_unlock(&uq->uq_key); 2530 suword32(&cv->c_has_waiters, 0); 2531 umtxq_lock(&uq->uq_key); 2532 } 2533 } 2534 umtxq_unbusy(&uq->uq_key); 2535 if (error == ERESTART) 2536 error = EINTR; 2537 } 2538 2539 umtxq_unlock(&uq->uq_key); 2540 umtx_key_release(&uq->uq_key); 2541 return (error); 2542 } 2543 2544 /* 2545 * Signal a userland condition variable. 2546 */ 2547 static int 2548 do_cv_signal(struct thread *td, struct ucond *cv) 2549 { 2550 struct umtx_key key; 2551 int error, cnt, nwake; 2552 uint32_t flags; 2553 2554 error = fueword32(&cv->c_flags, &flags); 2555 if (error == -1) 2556 return (EFAULT); 2557 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2558 return (error); 2559 umtxq_lock(&key); 2560 umtxq_busy(&key); 2561 cnt = umtxq_count(&key); 2562 nwake = umtxq_signal(&key, 1); 2563 if (cnt <= nwake) { 2564 umtxq_unlock(&key); 2565 error = suword32(&cv->c_has_waiters, 0); 2566 if (error == -1) 2567 error = EFAULT; 2568 umtxq_lock(&key); 2569 } 2570 umtxq_unbusy(&key); 2571 umtxq_unlock(&key); 2572 umtx_key_release(&key); 2573 return (error); 2574 } 2575 2576 static int 2577 do_cv_broadcast(struct thread *td, struct ucond *cv) 2578 { 2579 struct umtx_key key; 2580 int error; 2581 uint32_t flags; 2582 2583 error = fueword32(&cv->c_flags, &flags); 2584 if (error == -1) 2585 return (EFAULT); 2586 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2587 return (error); 2588 2589 umtxq_lock(&key); 2590 umtxq_busy(&key); 2591 umtxq_signal(&key, INT_MAX); 2592 umtxq_unlock(&key); 2593 2594 error = suword32(&cv->c_has_waiters, 0); 2595 if (error == -1) 2596 error = EFAULT; 2597 2598 umtxq_unbusy_unlocked(&key); 2599 2600 umtx_key_release(&key); 2601 return (error); 2602 } 2603 2604 static int 2605 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2606 { 2607 struct abs_timeout timo; 2608 struct umtx_q *uq; 2609 uint32_t flags, wrflags; 2610 int32_t state, oldstate; 2611 int32_t blocked_readers; 2612 int error, rv; 2613 2614 uq = td->td_umtxq; 2615 error = fueword32(&rwlock->rw_flags, &flags); 2616 if (error == -1) 2617 return (EFAULT); 2618 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2619 if (error != 0) 2620 return (error); 2621 2622 if (timeout != NULL) 2623 abs_timeout_init2(&timo, timeout); 2624 2625 wrflags = URWLOCK_WRITE_OWNER; 2626 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2627 wrflags |= URWLOCK_WRITE_WAITERS; 2628 2629 for (;;) { 2630 rv = fueword32(&rwlock->rw_state, &state); 2631 if (rv == -1) { 2632 umtx_key_release(&uq->uq_key); 2633 return (EFAULT); 2634 } 2635 2636 /* try to lock it */ 2637 while (!(state & wrflags)) { 2638 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2639 umtx_key_release(&uq->uq_key); 2640 return (EAGAIN); 2641 } 2642 rv = casueword32(&rwlock->rw_state, state, 2643 &oldstate, state + 1); 2644 if (rv == -1) { 2645 umtx_key_release(&uq->uq_key); 2646 return (EFAULT); 2647 } 2648 if (oldstate == state) { 2649 umtx_key_release(&uq->uq_key); 2650 return (0); 2651 } 2652 error = umtxq_check_susp(td); 2653 if (error != 0) 2654 break; 2655 state = oldstate; 2656 } 2657 2658 if (error) 2659 break; 2660 2661 /* grab monitor lock */ 2662 umtxq_lock(&uq->uq_key); 2663 umtxq_busy(&uq->uq_key); 2664 umtxq_unlock(&uq->uq_key); 2665 2666 /* 2667 * re-read the state, in case it changed between the try-lock above 2668 * and the check below 2669 */ 2670 rv = fueword32(&rwlock->rw_state, &state); 2671 if (rv == -1) 2672 error = EFAULT; 2673 2674 /* set read contention bit */ 2675 while (error == 0 && (state & wrflags) && 2676 !(state & URWLOCK_READ_WAITERS)) { 2677 rv = casueword32(&rwlock->rw_state, state, 2678 &oldstate, state | URWLOCK_READ_WAITERS); 2679 if (rv == -1) { 2680 error = EFAULT; 2681 break; 2682 } 2683 if (oldstate == state) 2684 goto sleep; 2685 state = oldstate; 2686 error = umtxq_check_susp(td); 2687 if (error != 0) 2688 break; 2689 } 2690 if (error != 0) { 2691 umtxq_unbusy_unlocked(&uq->uq_key); 2692 break; 2693 } 2694 2695 /* state is changed while setting flags, restart */ 2696 if (!(state & wrflags)) { 2697 umtxq_unbusy_unlocked(&uq->uq_key); 2698 error = umtxq_check_susp(td); 2699 if (error != 0) 2700 break; 2701 continue; 2702 } 2703 2704 sleep: 2705 /* contention bit is set, before sleeping, increase read waiter count */ 2706 rv = fueword32(&rwlock->rw_blocked_readers, 2707 &blocked_readers); 2708 if (rv == -1) { 2709 umtxq_unbusy_unlocked(&uq->uq_key); 2710 error = EFAULT; 2711 break; 2712 } 2713 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2714 2715 while (state & wrflags) { 2716 umtxq_lock(&uq->uq_key); 2717 umtxq_insert(uq); 2718 umtxq_unbusy(&uq->uq_key); 2719 2720 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2721 NULL : &timo); 2722 2723 umtxq_busy(&uq->uq_key); 2724 umtxq_remove(uq); 2725 umtxq_unlock(&uq->uq_key); 2726 if (error) 2727 break; 2728 rv = fueword32(&rwlock->rw_state, &state); 2729 if (rv == -1) { 2730 error = EFAULT; 2731 break; 2732 } 2733 } 2734 2735 /* decrease read waiter count, and may clear read contention bit */ 2736 rv = fueword32(&rwlock->rw_blocked_readers, 2737 &blocked_readers); 2738 if (rv == -1) { 2739 umtxq_unbusy_unlocked(&uq->uq_key); 2740 error = EFAULT; 2741 break; 2742 } 2743 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2744 if (blocked_readers == 1) { 2745 rv = fueword32(&rwlock->rw_state, &state); 2746 if (rv == -1) 2747 error = EFAULT; 2748 while (error == 0) { 2749 rv = casueword32(&rwlock->rw_state, state, 2750 &oldstate, state & ~URWLOCK_READ_WAITERS); 2751 if (rv == -1) { 2752 error = EFAULT; 2753 break; 2754 } 2755 if (oldstate == state) 2756 break; 2757 state = oldstate; 2758 error = umtxq_check_susp(td); 2759 } 2760 } 2761 2762 umtxq_unbusy_unlocked(&uq->uq_key); 2763 if (error != 0) 2764 break; 2765 } 2766 umtx_key_release(&uq->uq_key); 2767 if (error == ERESTART) 2768 error = EINTR; 2769 return (error); 2770 } 2771 2772 static int 2773 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2774 { 2775 struct abs_timeout timo; 2776 struct umtx_q *uq; 2777 uint32_t flags; 2778 int32_t state, oldstate; 2779 int32_t blocked_writers; 2780 int32_t blocked_readers; 2781 int error, rv; 2782 2783 uq = td->td_umtxq; 2784 error = fueword32(&rwlock->rw_flags, &flags); 2785 if (error == -1) 2786 return (EFAULT); 2787 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2788 if (error != 0) 2789 return (error); 2790 2791 if (timeout != NULL) 2792 abs_timeout_init2(&timo, timeout); 2793 2794 blocked_readers = 0; 2795 for (;;) { 2796 rv = fueword32(&rwlock->rw_state, &state); 2797 if (rv == -1) { 2798 umtx_key_release(&uq->uq_key); 2799 return (EFAULT); 2800 } 2801 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2802 rv = casueword32(&rwlock->rw_state, state, 2803 &oldstate, state | URWLOCK_WRITE_OWNER); 2804 if (rv == -1) { 2805 umtx_key_release(&uq->uq_key); 2806 return (EFAULT); 2807 } 2808 if (oldstate == state) { 2809 umtx_key_release(&uq->uq_key); 2810 return (0); 2811 } 2812 state = oldstate; 2813 error = umtxq_check_susp(td); 2814 if (error != 0) 2815 break; 2816 } 2817 2818 if (error) { 2819 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2820 blocked_readers != 0) { 2821 umtxq_lock(&uq->uq_key); 2822 umtxq_busy(&uq->uq_key); 2823 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2824 umtxq_unbusy(&uq->uq_key); 2825 umtxq_unlock(&uq->uq_key); 2826 } 2827 2828 break; 2829 } 2830 2831 /* grab monitor lock */ 2832 umtxq_lock(&uq->uq_key); 2833 umtxq_busy(&uq->uq_key); 2834 umtxq_unlock(&uq->uq_key); 2835 2836 /* 2837 * re-read the state, in case it changed between the try-lock above 2838 * and the check below 2839 */ 2840 rv = fueword32(&rwlock->rw_state, &state); 2841 if (rv == -1) 2842 error = EFAULT; 2843 2844 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2845 URWLOCK_READER_COUNT(state) != 0) && 2846 (state & URWLOCK_WRITE_WAITERS) == 0) { 2847 rv = casueword32(&rwlock->rw_state, state, 2848 &oldstate, state | URWLOCK_WRITE_WAITERS); 2849 if (rv == -1) { 2850 error = EFAULT; 2851 break; 2852 } 2853 if (oldstate == state) 2854 goto sleep; 2855 state = oldstate; 2856 error = umtxq_check_susp(td); 2857 if (error != 0) 2858 break; 2859 } 2860 if (error != 0) { 2861 umtxq_unbusy_unlocked(&uq->uq_key); 2862 break; 2863 } 2864 2865 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2866 umtxq_unbusy_unlocked(&uq->uq_key); 2867 error = umtxq_check_susp(td); 2868 if (error != 0) 2869 break; 2870 continue; 2871 } 2872 sleep: 2873 rv = fueword32(&rwlock->rw_blocked_writers, 2874 &blocked_writers); 2875 if (rv == -1) { 2876 umtxq_unbusy_unlocked(&uq->uq_key); 2877 error = EFAULT; 2878 break; 2879 } 2880 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2881 2882 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2883 umtxq_lock(&uq->uq_key); 2884 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2885 umtxq_unbusy(&uq->uq_key); 2886 2887 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2888 NULL : &timo); 2889 2890 umtxq_busy(&uq->uq_key); 2891 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2892 umtxq_unlock(&uq->uq_key); 2893 if (error) 2894 break; 2895 rv = fueword32(&rwlock->rw_state, &state); 2896 if (rv == -1) { 2897 error = EFAULT; 2898 break; 2899 } 2900 } 2901 2902 rv = fueword32(&rwlock->rw_blocked_writers, 2903 &blocked_writers); 2904 if (rv == -1) { 2905 umtxq_unbusy_unlocked(&uq->uq_key); 2906 error = EFAULT; 2907 break; 2908 } 2909 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2910 if (blocked_writers == 1) { 2911 rv = fueword32(&rwlock->rw_state, &state); 2912 if (rv == -1) { 2913 umtxq_unbusy_unlocked(&uq->uq_key); 2914 error = EFAULT; 2915 break; 2916 } 2917 for (;;) { 2918 rv = casueword32(&rwlock->rw_state, state, 2919 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2920 if (rv == -1) { 2921 error = EFAULT; 2922 break; 2923 } 2924 if (oldstate == state) 2925 break; 2926 state = oldstate; 2927 error = umtxq_check_susp(td); 2928 /* 2929 * We are leaving the URWLOCK_WRITE_WAITERS 2930 * behind, but this should not harm the 2931 * correctness. 2932 */ 2933 if (error != 0) 2934 break; 2935 } 2936 rv = fueword32(&rwlock->rw_blocked_readers, 2937 &blocked_readers); 2938 if (rv == -1) { 2939 umtxq_unbusy_unlocked(&uq->uq_key); 2940 error = EFAULT; 2941 break; 2942 } 2943 } else 2944 blocked_readers = 0; 2945 2946 umtxq_unbusy_unlocked(&uq->uq_key); 2947 } 2948 2949 umtx_key_release(&uq->uq_key); 2950 if (error == ERESTART) 2951 error = EINTR; 2952 return (error); 2953 } 2954 2955 static int 2956 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2957 { 2958 struct umtx_q *uq; 2959 uint32_t flags; 2960 int32_t state, oldstate; 2961 int error, rv, q, count; 2962 2963 uq = td->td_umtxq; 2964 error = fueword32(&rwlock->rw_flags, &flags); 2965 if (error == -1) 2966 return (EFAULT); 2967 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2968 if (error != 0) 2969 return (error); 2970 2971 error = fueword32(&rwlock->rw_state, &state); 2972 if (error == -1) { 2973 error = EFAULT; 2974 goto out; 2975 } 2976 if (state & URWLOCK_WRITE_OWNER) { 2977 for (;;) { 2978 rv = casueword32(&rwlock->rw_state, state, 2979 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2980 if (rv == -1) { 2981 error = EFAULT; 2982 goto out; 2983 } 2984 if (oldstate != state) { 2985 state = oldstate; 2986 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2987 error = EPERM; 2988 goto out; 2989 } 2990 error = umtxq_check_susp(td); 2991 if (error != 0) 2992 goto out; 2993 } else 2994 break; 2995 } 2996 } else if (URWLOCK_READER_COUNT(state) != 0) { 2997 for (;;) { 2998 rv = casueword32(&rwlock->rw_state, state, 2999 &oldstate, state - 1); 3000 if (rv == -1) { 3001 error = EFAULT; 3002 goto out; 3003 } 3004 if (oldstate != state) { 3005 state = oldstate; 3006 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3007 error = EPERM; 3008 goto out; 3009 } 3010 error = umtxq_check_susp(td); 3011 if (error != 0) 3012 goto out; 3013 } else 3014 break; 3015 } 3016 } else { 3017 error = EPERM; 3018 goto out; 3019 } 3020 3021 count = 0; 3022 3023 if (!(flags & URWLOCK_PREFER_READER)) { 3024 if (state & URWLOCK_WRITE_WAITERS) { 3025 count = 1; 3026 q = UMTX_EXCLUSIVE_QUEUE; 3027 } else if (state & URWLOCK_READ_WAITERS) { 3028 count = INT_MAX; 3029 q = UMTX_SHARED_QUEUE; 3030 } 3031 } else { 3032 if (state & URWLOCK_READ_WAITERS) { 3033 count = INT_MAX; 3034 q = UMTX_SHARED_QUEUE; 3035 } else if (state & URWLOCK_WRITE_WAITERS) { 3036 count = 1; 3037 q = UMTX_EXCLUSIVE_QUEUE; 3038 } 3039 } 3040 3041 if (count) { 3042 umtxq_lock(&uq->uq_key); 3043 umtxq_busy(&uq->uq_key); 3044 umtxq_signal_queue(&uq->uq_key, count, q); 3045 umtxq_unbusy(&uq->uq_key); 3046 umtxq_unlock(&uq->uq_key); 3047 } 3048 out: 3049 umtx_key_release(&uq->uq_key); 3050 return (error); 3051 } 3052 3053 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3054 static int 3055 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3056 { 3057 struct abs_timeout timo; 3058 struct umtx_q *uq; 3059 uint32_t flags, count, count1; 3060 int error, rv; 3061 3062 uq = td->td_umtxq; 3063 error = fueword32(&sem->_flags, &flags); 3064 if (error == -1) 3065 return (EFAULT); 3066 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3067 if (error != 0) 3068 return (error); 3069 3070 if (timeout != NULL) 3071 abs_timeout_init2(&timo, timeout); 3072 3073 umtxq_lock(&uq->uq_key); 3074 umtxq_busy(&uq->uq_key); 3075 umtxq_insert(uq); 3076 umtxq_unlock(&uq->uq_key); 3077 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3078 if (rv == 0) 3079 rv = fueword32(&sem->_count, &count); 3080 if (rv == -1 || count != 0) { 3081 umtxq_lock(&uq->uq_key); 3082 umtxq_unbusy(&uq->uq_key); 3083 umtxq_remove(uq); 3084 umtxq_unlock(&uq->uq_key); 3085 umtx_key_release(&uq->uq_key); 3086 return (rv == -1 ? EFAULT : 0); 3087 } 3088 umtxq_lock(&uq->uq_key); 3089 umtxq_unbusy(&uq->uq_key); 3090 3091 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3092 3093 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3094 error = 0; 3095 else { 3096 umtxq_remove(uq); 3097 /* A relative timeout cannot be restarted. */ 3098 if (error == ERESTART && timeout != NULL && 3099 (timeout->_flags & UMTX_ABSTIME) == 0) 3100 error = EINTR; 3101 } 3102 umtxq_unlock(&uq->uq_key); 3103 umtx_key_release(&uq->uq_key); 3104 return (error); 3105 } 3106 3107 /* 3108 * Signal a userland semaphore. 3109 */ 3110 static int 3111 do_sem_wake(struct thread *td, struct _usem *sem) 3112 { 3113 struct umtx_key key; 3114 int error, cnt; 3115 uint32_t flags; 3116 3117 error = fueword32(&sem->_flags, &flags); 3118 if (error == -1) 3119 return (EFAULT); 3120 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3121 return (error); 3122 umtxq_lock(&key); 3123 umtxq_busy(&key); 3124 cnt = umtxq_count(&key); 3125 if (cnt > 0) { 3126 umtxq_signal(&key, 1); 3127 /* 3128 * Check if count is greater than 0, this means the memory is 3129 * still being referenced by user code, so we can safely 3130 * update _has_waiters flag. 3131 */ 3132 if (cnt == 1) { 3133 umtxq_unlock(&key); 3134 error = suword32(&sem->_has_waiters, 0); 3135 umtxq_lock(&key); 3136 if (error == -1) 3137 error = EFAULT; 3138 } 3139 } 3140 umtxq_unbusy(&key); 3141 umtxq_unlock(&key); 3142 umtx_key_release(&key); 3143 return (error); 3144 } 3145 #endif 3146 3147 static int 3148 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3149 { 3150 struct abs_timeout timo; 3151 struct umtx_q *uq; 3152 uint32_t count, flags; 3153 int error, rv; 3154 3155 uq = td->td_umtxq; 3156 flags = fuword32(&sem->_flags); 3157 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3158 if (error != 0) 3159 return (error); 3160 3161 if (timeout != NULL) 3162 abs_timeout_init2(&timo, timeout); 3163 3164 umtxq_lock(&uq->uq_key); 3165 umtxq_busy(&uq->uq_key); 3166 umtxq_insert(uq); 3167 umtxq_unlock(&uq->uq_key); 3168 rv = fueword32(&sem->_count, &count); 3169 if (rv == -1) { 3170 umtxq_lock(&uq->uq_key); 3171 umtxq_unbusy(&uq->uq_key); 3172 umtxq_remove(uq); 3173 umtxq_unlock(&uq->uq_key); 3174 umtx_key_release(&uq->uq_key); 3175 return (EFAULT); 3176 } 3177 for (;;) { 3178 if (USEM_COUNT(count) != 0) { 3179 umtxq_lock(&uq->uq_key); 3180 umtxq_unbusy(&uq->uq_key); 3181 umtxq_remove(uq); 3182 umtxq_unlock(&uq->uq_key); 3183 umtx_key_release(&uq->uq_key); 3184 return (0); 3185 } 3186 if (count == USEM_HAS_WAITERS) 3187 break; 3188 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3189 if (rv == -1) { 3190 umtxq_lock(&uq->uq_key); 3191 umtxq_unbusy(&uq->uq_key); 3192 umtxq_remove(uq); 3193 umtxq_unlock(&uq->uq_key); 3194 umtx_key_release(&uq->uq_key); 3195 return (EFAULT); 3196 } 3197 if (count == 0) 3198 break; 3199 } 3200 umtxq_lock(&uq->uq_key); 3201 umtxq_unbusy(&uq->uq_key); 3202 3203 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3204 3205 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3206 error = 0; 3207 else { 3208 umtxq_remove(uq); 3209 /* A relative timeout cannot be restarted. */ 3210 if (error == ERESTART && timeout != NULL && 3211 (timeout->_flags & UMTX_ABSTIME) == 0) 3212 error = EINTR; 3213 } 3214 umtxq_unlock(&uq->uq_key); 3215 umtx_key_release(&uq->uq_key); 3216 return (error); 3217 } 3218 3219 /* 3220 * Signal a userland semaphore. 3221 */ 3222 static int 3223 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3224 { 3225 struct umtx_key key; 3226 int error, cnt, rv; 3227 uint32_t count, flags; 3228 3229 rv = fueword32(&sem->_flags, &flags); 3230 if (rv == -1) 3231 return (EFAULT); 3232 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3233 return (error); 3234 umtxq_lock(&key); 3235 umtxq_busy(&key); 3236 cnt = umtxq_count(&key); 3237 if (cnt > 0) { 3238 umtxq_signal(&key, 1); 3239 3240 /* 3241 * If this was the last sleeping thread, clear the waiters 3242 * flag in _count. 3243 */ 3244 if (cnt == 1) { 3245 umtxq_unlock(&key); 3246 rv = fueword32(&sem->_count, &count); 3247 while (rv != -1 && count & USEM_HAS_WAITERS) 3248 rv = casueword32(&sem->_count, count, &count, 3249 count & ~USEM_HAS_WAITERS); 3250 if (rv == -1) 3251 error = EFAULT; 3252 umtxq_lock(&key); 3253 } 3254 } 3255 umtxq_unbusy(&key); 3256 umtxq_unlock(&key); 3257 umtx_key_release(&key); 3258 return (error); 3259 } 3260 3261 inline int 3262 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3263 { 3264 int error; 3265 3266 error = copyin(addr, tsp, sizeof(struct timespec)); 3267 if (error == 0) { 3268 if (tsp->tv_sec < 0 || 3269 tsp->tv_nsec >= 1000000000 || 3270 tsp->tv_nsec < 0) 3271 error = EINVAL; 3272 } 3273 return (error); 3274 } 3275 3276 static inline int 3277 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3278 { 3279 int error; 3280 3281 if (size <= sizeof(struct timespec)) { 3282 tp->_clockid = CLOCK_REALTIME; 3283 tp->_flags = 0; 3284 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3285 } else 3286 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3287 if (error != 0) 3288 return (error); 3289 if (tp->_timeout.tv_sec < 0 || 3290 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3291 return (EINVAL); 3292 return (0); 3293 } 3294 3295 static int 3296 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3297 { 3298 3299 return (EOPNOTSUPP); 3300 } 3301 3302 static int 3303 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3304 { 3305 struct _umtx_time timeout, *tm_p; 3306 int error; 3307 3308 if (uap->uaddr2 == NULL) 3309 tm_p = NULL; 3310 else { 3311 error = umtx_copyin_umtx_time( 3312 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3313 if (error != 0) 3314 return (error); 3315 tm_p = &timeout; 3316 } 3317 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3318 } 3319 3320 static int 3321 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3322 { 3323 struct _umtx_time timeout, *tm_p; 3324 int error; 3325 3326 if (uap->uaddr2 == NULL) 3327 tm_p = NULL; 3328 else { 3329 error = umtx_copyin_umtx_time( 3330 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3331 if (error != 0) 3332 return (error); 3333 tm_p = &timeout; 3334 } 3335 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3336 } 3337 3338 static int 3339 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3340 { 3341 struct _umtx_time *tm_p, timeout; 3342 int error; 3343 3344 if (uap->uaddr2 == NULL) 3345 tm_p = NULL; 3346 else { 3347 error = umtx_copyin_umtx_time( 3348 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3349 if (error != 0) 3350 return (error); 3351 tm_p = &timeout; 3352 } 3353 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3354 } 3355 3356 static int 3357 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3358 { 3359 3360 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3361 } 3362 3363 #define BATCH_SIZE 128 3364 static int 3365 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3366 { 3367 char *uaddrs[BATCH_SIZE], **upp; 3368 int count, error, i, pos, tocopy; 3369 3370 upp = (char **)uap->obj; 3371 error = 0; 3372 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3373 pos += tocopy) { 3374 tocopy = MIN(count, BATCH_SIZE); 3375 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3376 if (error != 0) 3377 break; 3378 for (i = 0; i < tocopy; ++i) 3379 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3380 maybe_yield(); 3381 } 3382 return (error); 3383 } 3384 3385 static int 3386 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3387 { 3388 3389 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3390 } 3391 3392 static int 3393 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3394 { 3395 struct _umtx_time *tm_p, timeout; 3396 int error; 3397 3398 /* Allow a null timespec (wait forever). */ 3399 if (uap->uaddr2 == NULL) 3400 tm_p = NULL; 3401 else { 3402 error = umtx_copyin_umtx_time( 3403 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3404 if (error != 0) 3405 return (error); 3406 tm_p = &timeout; 3407 } 3408 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3409 } 3410 3411 static int 3412 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3413 { 3414 3415 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3416 } 3417 3418 static int 3419 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3420 { 3421 struct _umtx_time *tm_p, timeout; 3422 int error; 3423 3424 /* Allow a null timespec (wait forever). */ 3425 if (uap->uaddr2 == NULL) 3426 tm_p = NULL; 3427 else { 3428 error = umtx_copyin_umtx_time( 3429 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3430 if (error != 0) 3431 return (error); 3432 tm_p = &timeout; 3433 } 3434 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3435 } 3436 3437 static int 3438 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3439 { 3440 3441 return (do_wake_umutex(td, uap->obj)); 3442 } 3443 3444 static int 3445 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3446 { 3447 3448 return (do_unlock_umutex(td, uap->obj, false)); 3449 } 3450 3451 static int 3452 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3453 { 3454 3455 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3456 } 3457 3458 static int 3459 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3460 { 3461 struct timespec *ts, timeout; 3462 int error; 3463 3464 /* Allow a null timespec (wait forever). */ 3465 if (uap->uaddr2 == NULL) 3466 ts = NULL; 3467 else { 3468 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3469 if (error != 0) 3470 return (error); 3471 ts = &timeout; 3472 } 3473 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3474 } 3475 3476 static int 3477 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3478 { 3479 3480 return (do_cv_signal(td, uap->obj)); 3481 } 3482 3483 static int 3484 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3485 { 3486 3487 return (do_cv_broadcast(td, uap->obj)); 3488 } 3489 3490 static int 3491 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3492 { 3493 struct _umtx_time timeout; 3494 int error; 3495 3496 /* Allow a null timespec (wait forever). */ 3497 if (uap->uaddr2 == NULL) { 3498 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3499 } else { 3500 error = umtx_copyin_umtx_time(uap->uaddr2, 3501 (size_t)uap->uaddr1, &timeout); 3502 if (error != 0) 3503 return (error); 3504 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3505 } 3506 return (error); 3507 } 3508 3509 static int 3510 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3511 { 3512 struct _umtx_time timeout; 3513 int error; 3514 3515 /* Allow a null timespec (wait forever). */ 3516 if (uap->uaddr2 == NULL) { 3517 error = do_rw_wrlock(td, uap->obj, 0); 3518 } else { 3519 error = umtx_copyin_umtx_time(uap->uaddr2, 3520 (size_t)uap->uaddr1, &timeout); 3521 if (error != 0) 3522 return (error); 3523 3524 error = do_rw_wrlock(td, uap->obj, &timeout); 3525 } 3526 return (error); 3527 } 3528 3529 static int 3530 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3531 { 3532 3533 return (do_rw_unlock(td, uap->obj)); 3534 } 3535 3536 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3537 static int 3538 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3539 { 3540 struct _umtx_time *tm_p, timeout; 3541 int error; 3542 3543 /* Allow a null timespec (wait forever). */ 3544 if (uap->uaddr2 == NULL) 3545 tm_p = NULL; 3546 else { 3547 error = umtx_copyin_umtx_time( 3548 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3549 if (error != 0) 3550 return (error); 3551 tm_p = &timeout; 3552 } 3553 return (do_sem_wait(td, uap->obj, tm_p)); 3554 } 3555 3556 static int 3557 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3558 { 3559 3560 return (do_sem_wake(td, uap->obj)); 3561 } 3562 #endif 3563 3564 static int 3565 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3566 { 3567 3568 return (do_wake2_umutex(td, uap->obj, uap->val)); 3569 } 3570 3571 static int 3572 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3573 { 3574 struct _umtx_time *tm_p, timeout; 3575 int error; 3576 3577 /* Allow a null timespec (wait forever). */ 3578 if (uap->uaddr2 == NULL) 3579 tm_p = NULL; 3580 else { 3581 error = umtx_copyin_umtx_time( 3582 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3583 if (error != 0) 3584 return (error); 3585 tm_p = &timeout; 3586 } 3587 return (do_sem2_wait(td, uap->obj, tm_p)); 3588 } 3589 3590 static int 3591 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3592 { 3593 3594 return (do_sem2_wake(td, uap->obj)); 3595 } 3596 3597 #define USHM_OBJ_UMTX(o) \ 3598 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3599 3600 #define USHMF_REG_LINKED 0x0001 3601 #define USHMF_OBJ_LINKED 0x0002 3602 struct umtx_shm_reg { 3603 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3604 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3605 struct umtx_key ushm_key; 3606 struct ucred *ushm_cred; 3607 struct shmfd *ushm_obj; 3608 u_int ushm_refcnt; 3609 u_int ushm_flags; 3610 }; 3611 3612 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3613 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3614 3615 static uma_zone_t umtx_shm_reg_zone; 3616 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3617 static struct mtx umtx_shm_lock; 3618 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3619 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3620 3621 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3622 3623 static void 3624 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3625 { 3626 struct umtx_shm_reg_head d; 3627 struct umtx_shm_reg *reg, *reg1; 3628 3629 TAILQ_INIT(&d); 3630 mtx_lock(&umtx_shm_lock); 3631 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3632 mtx_unlock(&umtx_shm_lock); 3633 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3634 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3635 umtx_shm_free_reg(reg); 3636 } 3637 } 3638 3639 static struct task umtx_shm_reg_delfree_task = 3640 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3641 3642 static struct umtx_shm_reg * 3643 umtx_shm_find_reg_locked(const struct umtx_key *key) 3644 { 3645 struct umtx_shm_reg *reg; 3646 struct umtx_shm_reg_head *reg_head; 3647 3648 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3649 mtx_assert(&umtx_shm_lock, MA_OWNED); 3650 reg_head = &umtx_shm_registry[key->hash]; 3651 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3652 KASSERT(reg->ushm_key.shared, 3653 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3654 if (reg->ushm_key.info.shared.object == 3655 key->info.shared.object && 3656 reg->ushm_key.info.shared.offset == 3657 key->info.shared.offset) { 3658 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3659 KASSERT(reg->ushm_refcnt > 0, 3660 ("reg %p refcnt 0 onlist", reg)); 3661 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3662 ("reg %p not linked", reg)); 3663 reg->ushm_refcnt++; 3664 return (reg); 3665 } 3666 } 3667 return (NULL); 3668 } 3669 3670 static struct umtx_shm_reg * 3671 umtx_shm_find_reg(const struct umtx_key *key) 3672 { 3673 struct umtx_shm_reg *reg; 3674 3675 mtx_lock(&umtx_shm_lock); 3676 reg = umtx_shm_find_reg_locked(key); 3677 mtx_unlock(&umtx_shm_lock); 3678 return (reg); 3679 } 3680 3681 static void 3682 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3683 { 3684 3685 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3686 crfree(reg->ushm_cred); 3687 shm_drop(reg->ushm_obj); 3688 uma_zfree(umtx_shm_reg_zone, reg); 3689 } 3690 3691 static bool 3692 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3693 { 3694 bool res; 3695 3696 mtx_assert(&umtx_shm_lock, MA_OWNED); 3697 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3698 reg->ushm_refcnt--; 3699 res = reg->ushm_refcnt == 0; 3700 if (res || force) { 3701 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3702 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3703 reg, ushm_reg_link); 3704 reg->ushm_flags &= ~USHMF_REG_LINKED; 3705 } 3706 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3707 LIST_REMOVE(reg, ushm_obj_link); 3708 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3709 } 3710 } 3711 return (res); 3712 } 3713 3714 static void 3715 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3716 { 3717 vm_object_t object; 3718 bool dofree; 3719 3720 if (force) { 3721 object = reg->ushm_obj->shm_object; 3722 VM_OBJECT_WLOCK(object); 3723 object->flags |= OBJ_UMTXDEAD; 3724 VM_OBJECT_WUNLOCK(object); 3725 } 3726 mtx_lock(&umtx_shm_lock); 3727 dofree = umtx_shm_unref_reg_locked(reg, force); 3728 mtx_unlock(&umtx_shm_lock); 3729 if (dofree) 3730 umtx_shm_free_reg(reg); 3731 } 3732 3733 void 3734 umtx_shm_object_init(vm_object_t object) 3735 { 3736 3737 LIST_INIT(USHM_OBJ_UMTX(object)); 3738 } 3739 3740 void 3741 umtx_shm_object_terminated(vm_object_t object) 3742 { 3743 struct umtx_shm_reg *reg, *reg1; 3744 bool dofree; 3745 3746 dofree = false; 3747 mtx_lock(&umtx_shm_lock); 3748 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3749 if (umtx_shm_unref_reg_locked(reg, true)) { 3750 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3751 ushm_reg_link); 3752 dofree = true; 3753 } 3754 } 3755 mtx_unlock(&umtx_shm_lock); 3756 if (dofree) 3757 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3758 } 3759 3760 static int 3761 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3762 struct umtx_shm_reg **res) 3763 { 3764 struct umtx_shm_reg *reg, *reg1; 3765 struct ucred *cred; 3766 int error; 3767 3768 reg = umtx_shm_find_reg(key); 3769 if (reg != NULL) { 3770 *res = reg; 3771 return (0); 3772 } 3773 cred = td->td_ucred; 3774 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3775 return (ENOMEM); 3776 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3777 reg->ushm_refcnt = 1; 3778 bcopy(key, ®->ushm_key, sizeof(*key)); 3779 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3780 reg->ushm_cred = crhold(cred); 3781 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3782 if (error != 0) { 3783 umtx_shm_free_reg(reg); 3784 return (error); 3785 } 3786 mtx_lock(&umtx_shm_lock); 3787 reg1 = umtx_shm_find_reg_locked(key); 3788 if (reg1 != NULL) { 3789 mtx_unlock(&umtx_shm_lock); 3790 umtx_shm_free_reg(reg); 3791 *res = reg1; 3792 return (0); 3793 } 3794 reg->ushm_refcnt++; 3795 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3796 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3797 ushm_obj_link); 3798 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3799 mtx_unlock(&umtx_shm_lock); 3800 *res = reg; 3801 return (0); 3802 } 3803 3804 static int 3805 umtx_shm_alive(struct thread *td, void *addr) 3806 { 3807 vm_map_t map; 3808 vm_map_entry_t entry; 3809 vm_object_t object; 3810 vm_pindex_t pindex; 3811 vm_prot_t prot; 3812 int res, ret; 3813 boolean_t wired; 3814 3815 map = &td->td_proc->p_vmspace->vm_map; 3816 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3817 &object, &pindex, &prot, &wired); 3818 if (res != KERN_SUCCESS) 3819 return (EFAULT); 3820 if (object == NULL) 3821 ret = EINVAL; 3822 else 3823 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3824 vm_map_lookup_done(map, entry); 3825 return (ret); 3826 } 3827 3828 static void 3829 umtx_shm_init(void) 3830 { 3831 int i; 3832 3833 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3834 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3835 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3836 for (i = 0; i < nitems(umtx_shm_registry); i++) 3837 TAILQ_INIT(&umtx_shm_registry[i]); 3838 } 3839 3840 static int 3841 umtx_shm(struct thread *td, void *addr, u_int flags) 3842 { 3843 struct umtx_key key; 3844 struct umtx_shm_reg *reg; 3845 struct file *fp; 3846 int error, fd; 3847 3848 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3849 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3850 return (EINVAL); 3851 if ((flags & UMTX_SHM_ALIVE) != 0) 3852 return (umtx_shm_alive(td, addr)); 3853 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3854 if (error != 0) 3855 return (error); 3856 KASSERT(key.shared == 1, ("non-shared key")); 3857 if ((flags & UMTX_SHM_CREAT) != 0) { 3858 error = umtx_shm_create_reg(td, &key, ®); 3859 } else { 3860 reg = umtx_shm_find_reg(&key); 3861 if (reg == NULL) 3862 error = ESRCH; 3863 } 3864 umtx_key_release(&key); 3865 if (error != 0) 3866 return (error); 3867 KASSERT(reg != NULL, ("no reg")); 3868 if ((flags & UMTX_SHM_DESTROY) != 0) { 3869 umtx_shm_unref_reg(reg, true); 3870 } else { 3871 #if 0 3872 #ifdef MAC 3873 error = mac_posixshm_check_open(td->td_ucred, 3874 reg->ushm_obj, FFLAGS(O_RDWR)); 3875 if (error == 0) 3876 #endif 3877 error = shm_access(reg->ushm_obj, td->td_ucred, 3878 FFLAGS(O_RDWR)); 3879 if (error == 0) 3880 #endif 3881 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3882 if (error == 0) { 3883 shm_hold(reg->ushm_obj); 3884 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3885 &shm_ops); 3886 td->td_retval[0] = fd; 3887 fdrop(fp, td); 3888 } 3889 } 3890 umtx_shm_unref_reg(reg, false); 3891 return (error); 3892 } 3893 3894 static int 3895 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3896 { 3897 3898 return (umtx_shm(td, uap->uaddr1, uap->val)); 3899 } 3900 3901 static int 3902 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3903 { 3904 3905 td->td_rb_list = rbp->robust_list_offset; 3906 td->td_rbp_list = rbp->robust_priv_list_offset; 3907 td->td_rb_inact = rbp->robust_inact_offset; 3908 return (0); 3909 } 3910 3911 static int 3912 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3913 { 3914 struct umtx_robust_lists_params rb; 3915 int error; 3916 3917 if (uap->val > sizeof(rb)) 3918 return (EINVAL); 3919 bzero(&rb, sizeof(rb)); 3920 error = copyin(uap->uaddr1, &rb, uap->val); 3921 if (error != 0) 3922 return (error); 3923 return (umtx_robust_lists(td, &rb)); 3924 } 3925 3926 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3927 3928 static const _umtx_op_func op_table[] = { 3929 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3930 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3931 [UMTX_OP_WAIT] = __umtx_op_wait, 3932 [UMTX_OP_WAKE] = __umtx_op_wake, 3933 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3934 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3935 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3936 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3937 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3938 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3939 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3940 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3941 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3942 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 3943 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 3944 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 3945 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 3946 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 3947 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 3948 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3949 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 3950 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 3951 #else 3952 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 3953 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 3954 #endif 3955 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 3956 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 3957 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 3958 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 3959 [UMTX_OP_SHM] = __umtx_op_shm, 3960 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 3961 }; 3962 3963 int 3964 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3965 { 3966 3967 if ((unsigned)uap->op < nitems(op_table)) 3968 return (*op_table[uap->op])(td, uap); 3969 return (EINVAL); 3970 } 3971 3972 #ifdef COMPAT_FREEBSD32 3973 3974 struct timespec32 { 3975 int32_t tv_sec; 3976 int32_t tv_nsec; 3977 }; 3978 3979 struct umtx_time32 { 3980 struct timespec32 timeout; 3981 uint32_t flags; 3982 uint32_t clockid; 3983 }; 3984 3985 static inline int 3986 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3987 { 3988 struct timespec32 ts32; 3989 int error; 3990 3991 error = copyin(addr, &ts32, sizeof(struct timespec32)); 3992 if (error == 0) { 3993 if (ts32.tv_sec < 0 || 3994 ts32.tv_nsec >= 1000000000 || 3995 ts32.tv_nsec < 0) 3996 error = EINVAL; 3997 else { 3998 tsp->tv_sec = ts32.tv_sec; 3999 tsp->tv_nsec = ts32.tv_nsec; 4000 } 4001 } 4002 return (error); 4003 } 4004 4005 static inline int 4006 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4007 { 4008 struct umtx_time32 t32; 4009 int error; 4010 4011 t32.clockid = CLOCK_REALTIME; 4012 t32.flags = 0; 4013 if (size <= sizeof(struct timespec32)) 4014 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4015 else 4016 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4017 if (error != 0) 4018 return (error); 4019 if (t32.timeout.tv_sec < 0 || 4020 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4021 return (EINVAL); 4022 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4023 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4024 tp->_flags = t32.flags; 4025 tp->_clockid = t32.clockid; 4026 return (0); 4027 } 4028 4029 static int 4030 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4031 { 4032 struct _umtx_time *tm_p, timeout; 4033 int error; 4034 4035 if (uap->uaddr2 == NULL) 4036 tm_p = NULL; 4037 else { 4038 error = umtx_copyin_umtx_time32(uap->uaddr2, 4039 (size_t)uap->uaddr1, &timeout); 4040 if (error != 0) 4041 return (error); 4042 tm_p = &timeout; 4043 } 4044 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4045 } 4046 4047 static int 4048 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4049 { 4050 struct _umtx_time *tm_p, timeout; 4051 int error; 4052 4053 /* Allow a null timespec (wait forever). */ 4054 if (uap->uaddr2 == NULL) 4055 tm_p = NULL; 4056 else { 4057 error = umtx_copyin_umtx_time(uap->uaddr2, 4058 (size_t)uap->uaddr1, &timeout); 4059 if (error != 0) 4060 return (error); 4061 tm_p = &timeout; 4062 } 4063 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4064 } 4065 4066 static int 4067 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4068 { 4069 struct _umtx_time *tm_p, timeout; 4070 int error; 4071 4072 /* Allow a null timespec (wait forever). */ 4073 if (uap->uaddr2 == NULL) 4074 tm_p = NULL; 4075 else { 4076 error = umtx_copyin_umtx_time32(uap->uaddr2, 4077 (size_t)uap->uaddr1, &timeout); 4078 if (error != 0) 4079 return (error); 4080 tm_p = &timeout; 4081 } 4082 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4083 } 4084 4085 static int 4086 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4087 { 4088 struct timespec *ts, timeout; 4089 int error; 4090 4091 /* Allow a null timespec (wait forever). */ 4092 if (uap->uaddr2 == NULL) 4093 ts = NULL; 4094 else { 4095 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4096 if (error != 0) 4097 return (error); 4098 ts = &timeout; 4099 } 4100 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4101 } 4102 4103 static int 4104 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4105 { 4106 struct _umtx_time timeout; 4107 int error; 4108 4109 /* Allow a null timespec (wait forever). */ 4110 if (uap->uaddr2 == NULL) { 4111 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4112 } else { 4113 error = umtx_copyin_umtx_time32(uap->uaddr2, 4114 (size_t)uap->uaddr1, &timeout); 4115 if (error != 0) 4116 return (error); 4117 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4118 } 4119 return (error); 4120 } 4121 4122 static int 4123 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4124 { 4125 struct _umtx_time timeout; 4126 int error; 4127 4128 /* Allow a null timespec (wait forever). */ 4129 if (uap->uaddr2 == NULL) { 4130 error = do_rw_wrlock(td, uap->obj, 0); 4131 } else { 4132 error = umtx_copyin_umtx_time32(uap->uaddr2, 4133 (size_t)uap->uaddr1, &timeout); 4134 if (error != 0) 4135 return (error); 4136 error = do_rw_wrlock(td, uap->obj, &timeout); 4137 } 4138 return (error); 4139 } 4140 4141 static int 4142 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4143 { 4144 struct _umtx_time *tm_p, timeout; 4145 int error; 4146 4147 if (uap->uaddr2 == NULL) 4148 tm_p = NULL; 4149 else { 4150 error = umtx_copyin_umtx_time32( 4151 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4152 if (error != 0) 4153 return (error); 4154 tm_p = &timeout; 4155 } 4156 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4157 } 4158 4159 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4160 static int 4161 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4162 { 4163 struct _umtx_time *tm_p, timeout; 4164 int error; 4165 4166 /* Allow a null timespec (wait forever). */ 4167 if (uap->uaddr2 == NULL) 4168 tm_p = NULL; 4169 else { 4170 error = umtx_copyin_umtx_time32(uap->uaddr2, 4171 (size_t)uap->uaddr1, &timeout); 4172 if (error != 0) 4173 return (error); 4174 tm_p = &timeout; 4175 } 4176 return (do_sem_wait(td, uap->obj, tm_p)); 4177 } 4178 #endif 4179 4180 static int 4181 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4182 { 4183 struct _umtx_time *tm_p, timeout; 4184 int error; 4185 4186 /* Allow a null timespec (wait forever). */ 4187 if (uap->uaddr2 == NULL) 4188 tm_p = NULL; 4189 else { 4190 error = umtx_copyin_umtx_time32(uap->uaddr2, 4191 (size_t)uap->uaddr1, &timeout); 4192 if (error != 0) 4193 return (error); 4194 tm_p = &timeout; 4195 } 4196 return (do_sem2_wait(td, uap->obj, tm_p)); 4197 } 4198 4199 static int 4200 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4201 { 4202 uint32_t uaddrs[BATCH_SIZE], **upp; 4203 int count, error, i, pos, tocopy; 4204 4205 upp = (uint32_t **)uap->obj; 4206 error = 0; 4207 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4208 pos += tocopy) { 4209 tocopy = MIN(count, BATCH_SIZE); 4210 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4211 if (error != 0) 4212 break; 4213 for (i = 0; i < tocopy; ++i) 4214 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4215 INT_MAX, 1); 4216 maybe_yield(); 4217 } 4218 return (error); 4219 } 4220 4221 struct umtx_robust_lists_params_compat32 { 4222 uint32_t robust_list_offset; 4223 uint32_t robust_priv_list_offset; 4224 uint32_t robust_inact_offset; 4225 }; 4226 4227 static int 4228 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4229 { 4230 struct umtx_robust_lists_params rb; 4231 struct umtx_robust_lists_params_compat32 rb32; 4232 int error; 4233 4234 if (uap->val > sizeof(rb32)) 4235 return (EINVAL); 4236 bzero(&rb, sizeof(rb)); 4237 bzero(&rb32, sizeof(rb32)); 4238 error = copyin(uap->uaddr1, &rb32, uap->val); 4239 if (error != 0) 4240 return (error); 4241 rb.robust_list_offset = rb32.robust_list_offset; 4242 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4243 rb.robust_inact_offset = rb32.robust_inact_offset; 4244 return (umtx_robust_lists(td, &rb)); 4245 } 4246 4247 static const _umtx_op_func op_table_compat32[] = { 4248 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4249 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4250 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4251 [UMTX_OP_WAKE] = __umtx_op_wake, 4252 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4253 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4254 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4255 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4256 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4257 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4258 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4259 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4260 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4261 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4262 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4263 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4264 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4265 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4266 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4267 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4268 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4269 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4270 #else 4271 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4272 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4273 #endif 4274 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4275 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4276 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4277 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4278 [UMTX_OP_SHM] = __umtx_op_shm, 4279 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4280 }; 4281 4282 int 4283 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4284 { 4285 4286 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4287 return (*op_table_compat32[uap->op])(td, 4288 (struct _umtx_op_args *)uap); 4289 } 4290 return (EINVAL); 4291 } 4292 #endif 4293 4294 void 4295 umtx_thread_init(struct thread *td) 4296 { 4297 4298 td->td_umtxq = umtxq_alloc(); 4299 td->td_umtxq->uq_thread = td; 4300 } 4301 4302 void 4303 umtx_thread_fini(struct thread *td) 4304 { 4305 4306 umtxq_free(td->td_umtxq); 4307 } 4308 4309 /* 4310 * It will be called when new thread is created, e.g fork(). 4311 */ 4312 void 4313 umtx_thread_alloc(struct thread *td) 4314 { 4315 struct umtx_q *uq; 4316 4317 uq = td->td_umtxq; 4318 uq->uq_inherited_pri = PRI_MAX; 4319 4320 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4321 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4322 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4323 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4324 } 4325 4326 /* 4327 * exec() hook. 4328 * 4329 * Clear robust lists for all process' threads, not delaying the 4330 * cleanup to thread_exit hook, since the relevant address space is 4331 * destroyed right now. 4332 */ 4333 static void 4334 umtx_exec_hook(void *arg __unused, struct proc *p, 4335 struct image_params *imgp __unused) 4336 { 4337 struct thread *td; 4338 4339 KASSERT(p == curproc, ("need curproc")); 4340 PROC_LOCK(p); 4341 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4342 (p->p_flag & P_STOPPED_SINGLE) != 0, 4343 ("curproc must be single-threaded")); 4344 FOREACH_THREAD_IN_PROC(p, td) { 4345 KASSERT(td == curthread || 4346 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4347 ("running thread %p %p", p, td)); 4348 PROC_UNLOCK(p); 4349 umtx_thread_cleanup(td); 4350 PROC_LOCK(p); 4351 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4352 } 4353 PROC_UNLOCK(p); 4354 } 4355 4356 /* 4357 * thread_exit() hook. 4358 */ 4359 void 4360 umtx_thread_exit(struct thread *td) 4361 { 4362 4363 umtx_thread_cleanup(td); 4364 } 4365 4366 static int 4367 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4368 { 4369 u_long res1; 4370 #ifdef COMPAT_FREEBSD32 4371 uint32_t res32; 4372 #endif 4373 int error; 4374 4375 #ifdef COMPAT_FREEBSD32 4376 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4377 error = fueword32((void *)ptr, &res32); 4378 if (error == 0) 4379 res1 = res32; 4380 } else 4381 #endif 4382 { 4383 error = fueword((void *)ptr, &res1); 4384 } 4385 if (error == 0) 4386 *res = res1; 4387 else 4388 error = EFAULT; 4389 return (error); 4390 } 4391 4392 static void 4393 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4394 { 4395 #ifdef COMPAT_FREEBSD32 4396 struct umutex32 m32; 4397 4398 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4399 memcpy(&m32, m, sizeof(m32)); 4400 *rb_list = m32.m_rb_lnk; 4401 } else 4402 #endif 4403 *rb_list = m->m_rb_lnk; 4404 } 4405 4406 static int 4407 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4408 { 4409 struct umutex m; 4410 int error; 4411 4412 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4413 error = copyin((void *)rbp, &m, sizeof(m)); 4414 if (error != 0) 4415 return (error); 4416 if (rb_list != NULL) 4417 umtx_read_rb_list(td, &m, rb_list); 4418 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4419 return (EINVAL); 4420 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4421 /* inact is cleared after unlock, allow the inconsistency */ 4422 return (inact ? 0 : EINVAL); 4423 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4424 } 4425 4426 static void 4427 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4428 const char *name) 4429 { 4430 int error, i; 4431 uintptr_t rbp; 4432 bool inact; 4433 4434 if (rb_list == 0) 4435 return; 4436 error = umtx_read_uptr(td, rb_list, &rbp); 4437 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4438 if (rbp == *rb_inact) { 4439 inact = true; 4440 *rb_inact = 0; 4441 } else 4442 inact = false; 4443 error = umtx_handle_rb(td, rbp, &rbp, inact); 4444 } 4445 if (i == umtx_max_rb && umtx_verbose_rb) { 4446 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4447 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4448 } 4449 if (error != 0 && umtx_verbose_rb) { 4450 uprintf("comm %s pid %d: handling %srb error %d\n", 4451 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4452 } 4453 } 4454 4455 /* 4456 * Clean up umtx data. 4457 */ 4458 static void 4459 umtx_thread_cleanup(struct thread *td) 4460 { 4461 struct umtx_q *uq; 4462 struct umtx_pi *pi; 4463 uintptr_t rb_inact; 4464 4465 /* 4466 * Disown pi mutexes. 4467 */ 4468 uq = td->td_umtxq; 4469 if (uq != NULL) { 4470 mtx_lock(&umtx_lock); 4471 uq->uq_inherited_pri = PRI_MAX; 4472 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4473 pi->pi_owner = NULL; 4474 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4475 } 4476 mtx_unlock(&umtx_lock); 4477 thread_lock(td); 4478 sched_lend_user_prio(td, PRI_MAX); 4479 thread_unlock(td); 4480 } 4481 4482 /* 4483 * Handle terminated robust mutexes. Must be done after 4484 * robust pi disown, otherwise unlock could see unowned 4485 * entries. 4486 */ 4487 rb_inact = td->td_rb_inact; 4488 if (rb_inact != 0) 4489 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4490 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4491 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4492 if (rb_inact != 0) 4493 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4494 } 4495