1 /*- 2 * Copyright (c) 2015, 2016 The FreeBSD Foundation 3 * Copyright (c) 2004, David Xu <davidxu@freebsd.org> 4 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Portions of this software were developed by Konstantin Belousov 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice unmodified, this list of conditions, and the following 15 * disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 #include "opt_umtx_profiling.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/mman.h> 47 #include <sys/mutex.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/resource.h> 51 #include <sys/resourcevar.h> 52 #include <sys/rwlock.h> 53 #include <sys/sbuf.h> 54 #include <sys/sched.h> 55 #include <sys/smp.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/sysproto.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/taskqueue.h> 62 #include <sys/eventhandler.h> 63 #include <sys/umtx.h> 64 65 #include <security/mac/mac_framework.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <vm/pmap.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 73 #include <machine/cpu.h> 74 75 #ifdef COMPAT_FREEBSD32 76 #include <compat/freebsd32/freebsd32_proto.h> 77 #endif 78 79 #define _UMUTEX_TRY 1 80 #define _UMUTEX_WAIT 2 81 82 #ifdef UMTX_PROFILING 83 #define UPROF_PERC_BIGGER(w, f, sw, sf) \ 84 (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) 85 #endif 86 87 /* Priority inheritance mutex info. */ 88 struct umtx_pi { 89 /* Owner thread */ 90 struct thread *pi_owner; 91 92 /* Reference count */ 93 int pi_refcount; 94 95 /* List entry to link umtx holding by thread */ 96 TAILQ_ENTRY(umtx_pi) pi_link; 97 98 /* List entry in hash */ 99 TAILQ_ENTRY(umtx_pi) pi_hashlink; 100 101 /* List for waiters */ 102 TAILQ_HEAD(,umtx_q) pi_blocked; 103 104 /* Identify a userland lock object */ 105 struct umtx_key pi_key; 106 }; 107 108 /* A userland synchronous object user. */ 109 struct umtx_q { 110 /* Linked list for the hash. */ 111 TAILQ_ENTRY(umtx_q) uq_link; 112 113 /* Umtx key. */ 114 struct umtx_key uq_key; 115 116 /* Umtx flags. */ 117 int uq_flags; 118 #define UQF_UMTXQ 0x0001 119 120 /* The thread waits on. */ 121 struct thread *uq_thread; 122 123 /* 124 * Blocked on PI mutex. read can use chain lock 125 * or umtx_lock, write must have both chain lock and 126 * umtx_lock being hold. 127 */ 128 struct umtx_pi *uq_pi_blocked; 129 130 /* On blocked list */ 131 TAILQ_ENTRY(umtx_q) uq_lockq; 132 133 /* Thread contending with us */ 134 TAILQ_HEAD(,umtx_pi) uq_pi_contested; 135 136 /* Inherited priority from PP mutex */ 137 u_char uq_inherited_pri; 138 139 /* Spare queue ready to be reused */ 140 struct umtxq_queue *uq_spare_queue; 141 142 /* The queue we on */ 143 struct umtxq_queue *uq_cur_queue; 144 }; 145 146 TAILQ_HEAD(umtxq_head, umtx_q); 147 148 /* Per-key wait-queue */ 149 struct umtxq_queue { 150 struct umtxq_head head; 151 struct umtx_key key; 152 LIST_ENTRY(umtxq_queue) link; 153 int length; 154 }; 155 156 LIST_HEAD(umtxq_list, umtxq_queue); 157 158 /* Userland lock object's wait-queue chain */ 159 struct umtxq_chain { 160 /* Lock for this chain. */ 161 struct mtx uc_lock; 162 163 /* List of sleep queues. */ 164 struct umtxq_list uc_queue[2]; 165 #define UMTX_SHARED_QUEUE 0 166 #define UMTX_EXCLUSIVE_QUEUE 1 167 168 LIST_HEAD(, umtxq_queue) uc_spare_queue; 169 170 /* Busy flag */ 171 char uc_busy; 172 173 /* Chain lock waiters */ 174 int uc_waiters; 175 176 /* All PI in the list */ 177 TAILQ_HEAD(,umtx_pi) uc_pi_list; 178 179 #ifdef UMTX_PROFILING 180 u_int length; 181 u_int max_length; 182 #endif 183 }; 184 185 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) 186 187 /* 188 * Don't propagate time-sharing priority, there is a security reason, 189 * a user can simply introduce PI-mutex, let thread A lock the mutex, 190 * and let another thread B block on the mutex, because B is 191 * sleeping, its priority will be boosted, this causes A's priority to 192 * be boosted via priority propagating too and will never be lowered even 193 * if it is using 100%CPU, this is unfair to other processes. 194 */ 195 196 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ 197 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ 198 PRI_MAX_TIMESHARE : (td)->td_user_pri) 199 200 #define GOLDEN_RATIO_PRIME 2654404609U 201 #define UMTX_CHAINS 512 202 #define UMTX_SHIFTS (__WORD_BIT - 9) 203 204 #define GET_SHARE(flags) \ 205 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) 206 207 #define BUSY_SPINS 200 208 209 struct abs_timeout { 210 int clockid; 211 struct timespec cur; 212 struct timespec end; 213 }; 214 215 #ifdef COMPAT_FREEBSD32 216 struct umutex32 { 217 volatile __lwpid_t m_owner; /* Owner of the mutex */ 218 __uint32_t m_flags; /* Flags of the mutex */ 219 __uint32_t m_ceilings[2]; /* Priority protect ceiling */ 220 __uint32_t m_rb_lnk; /* Robust linkage */ 221 __uint32_t m_pad; 222 __uint32_t m_spare[2]; 223 }; 224 225 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); 226 _Static_assert(__offsetof(struct umutex, m_spare[0]) == 227 __offsetof(struct umutex32, m_spare[0]), "m_spare32"); 228 #endif 229 230 int umtx_shm_vnobj_persistent = 0; 231 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, 232 &umtx_shm_vnobj_persistent, 0, 233 "False forces destruction of umtx attached to file, on last close"); 234 static int umtx_max_rb = 1000; 235 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, 236 &umtx_max_rb, 0, 237 ""); 238 239 static uma_zone_t umtx_pi_zone; 240 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; 241 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); 242 static int umtx_pi_allocated; 243 244 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); 245 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, 246 &umtx_pi_allocated, 0, "Allocated umtx_pi"); 247 static int umtx_verbose_rb = 1; 248 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, 249 &umtx_verbose_rb, 0, 250 ""); 251 252 #ifdef UMTX_PROFILING 253 static long max_length; 254 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); 255 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats"); 256 #endif 257 258 static void umtx_shm_init(void); 259 static void umtxq_sysinit(void *); 260 static void umtxq_hash(struct umtx_key *key); 261 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); 262 static void umtxq_lock(struct umtx_key *key); 263 static void umtxq_unlock(struct umtx_key *key); 264 static void umtxq_busy(struct umtx_key *key); 265 static void umtxq_unbusy(struct umtx_key *key); 266 static void umtxq_insert_queue(struct umtx_q *uq, int q); 267 static void umtxq_remove_queue(struct umtx_q *uq, int q); 268 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); 269 static int umtxq_count(struct umtx_key *key); 270 static struct umtx_pi *umtx_pi_alloc(int); 271 static void umtx_pi_free(struct umtx_pi *pi); 272 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, 273 bool rb); 274 static void umtx_thread_cleanup(struct thread *td); 275 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, 276 struct image_params *imgp __unused); 277 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); 278 279 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) 280 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) 281 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) 282 283 static struct mtx umtx_lock; 284 285 #ifdef UMTX_PROFILING 286 static void 287 umtx_init_profiling(void) 288 { 289 struct sysctl_oid *chain_oid; 290 char chain_name[10]; 291 int i; 292 293 for (i = 0; i < UMTX_CHAINS; ++i) { 294 snprintf(chain_name, sizeof(chain_name), "%d", i); 295 chain_oid = SYSCTL_ADD_NODE(NULL, 296 SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 297 chain_name, CTLFLAG_RD, NULL, "umtx hash stats"); 298 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 299 "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); 300 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 301 "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); 302 } 303 } 304 305 static int 306 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) 307 { 308 char buf[512]; 309 struct sbuf sb; 310 struct umtxq_chain *uc; 311 u_int fract, i, j, tot, whole; 312 u_int sf0, sf1, sf2, sf3, sf4; 313 u_int si0, si1, si2, si3, si4; 314 u_int sw0, sw1, sw2, sw3, sw4; 315 316 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 317 for (i = 0; i < 2; i++) { 318 tot = 0; 319 for (j = 0; j < UMTX_CHAINS; ++j) { 320 uc = &umtxq_chains[i][j]; 321 mtx_lock(&uc->uc_lock); 322 tot += uc->max_length; 323 mtx_unlock(&uc->uc_lock); 324 } 325 if (tot == 0) 326 sbuf_printf(&sb, "%u) Empty ", i); 327 else { 328 sf0 = sf1 = sf2 = sf3 = sf4 = 0; 329 si0 = si1 = si2 = si3 = si4 = 0; 330 sw0 = sw1 = sw2 = sw3 = sw4 = 0; 331 for (j = 0; j < UMTX_CHAINS; j++) { 332 uc = &umtxq_chains[i][j]; 333 mtx_lock(&uc->uc_lock); 334 whole = uc->max_length * 100; 335 mtx_unlock(&uc->uc_lock); 336 fract = (whole % tot) * 100; 337 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { 338 sf0 = fract; 339 si0 = j; 340 sw0 = whole; 341 } else if (UPROF_PERC_BIGGER(whole, fract, sw1, 342 sf1)) { 343 sf1 = fract; 344 si1 = j; 345 sw1 = whole; 346 } else if (UPROF_PERC_BIGGER(whole, fract, sw2, 347 sf2)) { 348 sf2 = fract; 349 si2 = j; 350 sw2 = whole; 351 } else if (UPROF_PERC_BIGGER(whole, fract, sw3, 352 sf3)) { 353 sf3 = fract; 354 si3 = j; 355 sw3 = whole; 356 } else if (UPROF_PERC_BIGGER(whole, fract, sw4, 357 sf4)) { 358 sf4 = fract; 359 si4 = j; 360 sw4 = whole; 361 } 362 } 363 sbuf_printf(&sb, "queue %u:\n", i); 364 sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, 365 sf0 / tot, si0); 366 sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, 367 sf1 / tot, si1); 368 sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, 369 sf2 / tot, si2); 370 sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, 371 sf3 / tot, si3); 372 sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, 373 sf4 / tot, si4); 374 } 375 } 376 sbuf_trim(&sb); 377 sbuf_finish(&sb); 378 sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); 379 sbuf_delete(&sb); 380 return (0); 381 } 382 383 static int 384 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) 385 { 386 struct umtxq_chain *uc; 387 u_int i, j; 388 int clear, error; 389 390 clear = 0; 391 error = sysctl_handle_int(oidp, &clear, 0, req); 392 if (error != 0 || req->newptr == NULL) 393 return (error); 394 395 if (clear != 0) { 396 for (i = 0; i < 2; ++i) { 397 for (j = 0; j < UMTX_CHAINS; ++j) { 398 uc = &umtxq_chains[i][j]; 399 mtx_lock(&uc->uc_lock); 400 uc->length = 0; 401 uc->max_length = 0; 402 mtx_unlock(&uc->uc_lock); 403 } 404 } 405 } 406 return (0); 407 } 408 409 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, 410 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 411 sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); 412 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, 413 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 414 sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); 415 #endif 416 417 static void 418 umtxq_sysinit(void *arg __unused) 419 { 420 int i, j; 421 422 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), 423 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 424 for (i = 0; i < 2; ++i) { 425 for (j = 0; j < UMTX_CHAINS; ++j) { 426 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, 427 MTX_DEF | MTX_DUPOK); 428 LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); 429 LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); 430 LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); 431 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); 432 umtxq_chains[i][j].uc_busy = 0; 433 umtxq_chains[i][j].uc_waiters = 0; 434 #ifdef UMTX_PROFILING 435 umtxq_chains[i][j].length = 0; 436 umtxq_chains[i][j].max_length = 0; 437 #endif 438 } 439 } 440 #ifdef UMTX_PROFILING 441 umtx_init_profiling(); 442 #endif 443 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); 444 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, 445 EVENTHANDLER_PRI_ANY); 446 umtx_shm_init(); 447 } 448 449 struct umtx_q * 450 umtxq_alloc(void) 451 { 452 struct umtx_q *uq; 453 454 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); 455 uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, 456 M_WAITOK | M_ZERO); 457 TAILQ_INIT(&uq->uq_spare_queue->head); 458 TAILQ_INIT(&uq->uq_pi_contested); 459 uq->uq_inherited_pri = PRI_MAX; 460 return (uq); 461 } 462 463 void 464 umtxq_free(struct umtx_q *uq) 465 { 466 467 MPASS(uq->uq_spare_queue != NULL); 468 free(uq->uq_spare_queue, M_UMTX); 469 free(uq, M_UMTX); 470 } 471 472 static inline void 473 umtxq_hash(struct umtx_key *key) 474 { 475 unsigned n; 476 477 n = (uintptr_t)key->info.both.a + key->info.both.b; 478 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; 479 } 480 481 static inline struct umtxq_chain * 482 umtxq_getchain(struct umtx_key *key) 483 { 484 485 if (key->type <= TYPE_SEM) 486 return (&umtxq_chains[1][key->hash]); 487 return (&umtxq_chains[0][key->hash]); 488 } 489 490 /* 491 * Lock a chain. 492 */ 493 static inline void 494 umtxq_lock(struct umtx_key *key) 495 { 496 struct umtxq_chain *uc; 497 498 uc = umtxq_getchain(key); 499 mtx_lock(&uc->uc_lock); 500 } 501 502 /* 503 * Unlock a chain. 504 */ 505 static inline void 506 umtxq_unlock(struct umtx_key *key) 507 { 508 struct umtxq_chain *uc; 509 510 uc = umtxq_getchain(key); 511 mtx_unlock(&uc->uc_lock); 512 } 513 514 /* 515 * Set chain to busy state when following operation 516 * may be blocked (kernel mutex can not be used). 517 */ 518 static inline void 519 umtxq_busy(struct umtx_key *key) 520 { 521 struct umtxq_chain *uc; 522 523 uc = umtxq_getchain(key); 524 mtx_assert(&uc->uc_lock, MA_OWNED); 525 if (uc->uc_busy) { 526 #ifdef SMP 527 if (smp_cpus > 1) { 528 int count = BUSY_SPINS; 529 if (count > 0) { 530 umtxq_unlock(key); 531 while (uc->uc_busy && --count > 0) 532 cpu_spinwait(); 533 umtxq_lock(key); 534 } 535 } 536 #endif 537 while (uc->uc_busy) { 538 uc->uc_waiters++; 539 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); 540 uc->uc_waiters--; 541 } 542 } 543 uc->uc_busy = 1; 544 } 545 546 /* 547 * Unbusy a chain. 548 */ 549 static inline void 550 umtxq_unbusy(struct umtx_key *key) 551 { 552 struct umtxq_chain *uc; 553 554 uc = umtxq_getchain(key); 555 mtx_assert(&uc->uc_lock, MA_OWNED); 556 KASSERT(uc->uc_busy != 0, ("not busy")); 557 uc->uc_busy = 0; 558 if (uc->uc_waiters) 559 wakeup_one(uc); 560 } 561 562 static inline void 563 umtxq_unbusy_unlocked(struct umtx_key *key) 564 { 565 566 umtxq_lock(key); 567 umtxq_unbusy(key); 568 umtxq_unlock(key); 569 } 570 571 static struct umtxq_queue * 572 umtxq_queue_lookup(struct umtx_key *key, int q) 573 { 574 struct umtxq_queue *uh; 575 struct umtxq_chain *uc; 576 577 uc = umtxq_getchain(key); 578 UMTXQ_LOCKED_ASSERT(uc); 579 LIST_FOREACH(uh, &uc->uc_queue[q], link) { 580 if (umtx_key_match(&uh->key, key)) 581 return (uh); 582 } 583 584 return (NULL); 585 } 586 587 static inline void 588 umtxq_insert_queue(struct umtx_q *uq, int q) 589 { 590 struct umtxq_queue *uh; 591 struct umtxq_chain *uc; 592 593 uc = umtxq_getchain(&uq->uq_key); 594 UMTXQ_LOCKED_ASSERT(uc); 595 KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); 596 uh = umtxq_queue_lookup(&uq->uq_key, q); 597 if (uh != NULL) { 598 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); 599 } else { 600 uh = uq->uq_spare_queue; 601 uh->key = uq->uq_key; 602 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); 603 #ifdef UMTX_PROFILING 604 uc->length++; 605 if (uc->length > uc->max_length) { 606 uc->max_length = uc->length; 607 if (uc->max_length > max_length) 608 max_length = uc->max_length; 609 } 610 #endif 611 } 612 uq->uq_spare_queue = NULL; 613 614 TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); 615 uh->length++; 616 uq->uq_flags |= UQF_UMTXQ; 617 uq->uq_cur_queue = uh; 618 return; 619 } 620 621 static inline void 622 umtxq_remove_queue(struct umtx_q *uq, int q) 623 { 624 struct umtxq_chain *uc; 625 struct umtxq_queue *uh; 626 627 uc = umtxq_getchain(&uq->uq_key); 628 UMTXQ_LOCKED_ASSERT(uc); 629 if (uq->uq_flags & UQF_UMTXQ) { 630 uh = uq->uq_cur_queue; 631 TAILQ_REMOVE(&uh->head, uq, uq_link); 632 uh->length--; 633 uq->uq_flags &= ~UQF_UMTXQ; 634 if (TAILQ_EMPTY(&uh->head)) { 635 KASSERT(uh->length == 0, 636 ("inconsistent umtxq_queue length")); 637 #ifdef UMTX_PROFILING 638 uc->length--; 639 #endif 640 LIST_REMOVE(uh, link); 641 } else { 642 uh = LIST_FIRST(&uc->uc_spare_queue); 643 KASSERT(uh != NULL, ("uc_spare_queue is empty")); 644 LIST_REMOVE(uh, link); 645 } 646 uq->uq_spare_queue = uh; 647 uq->uq_cur_queue = NULL; 648 } 649 } 650 651 /* 652 * Check if there are multiple waiters 653 */ 654 static int 655 umtxq_count(struct umtx_key *key) 656 { 657 struct umtxq_chain *uc; 658 struct umtxq_queue *uh; 659 660 uc = umtxq_getchain(key); 661 UMTXQ_LOCKED_ASSERT(uc); 662 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 663 if (uh != NULL) 664 return (uh->length); 665 return (0); 666 } 667 668 /* 669 * Check if there are multiple PI waiters and returns first 670 * waiter. 671 */ 672 static int 673 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) 674 { 675 struct umtxq_chain *uc; 676 struct umtxq_queue *uh; 677 678 *first = NULL; 679 uc = umtxq_getchain(key); 680 UMTXQ_LOCKED_ASSERT(uc); 681 uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); 682 if (uh != NULL) { 683 *first = TAILQ_FIRST(&uh->head); 684 return (uh->length); 685 } 686 return (0); 687 } 688 689 static int 690 umtxq_check_susp(struct thread *td) 691 { 692 struct proc *p; 693 int error; 694 695 /* 696 * The check for TDF_NEEDSUSPCHK is racy, but it is enough to 697 * eventually break the lockstep loop. 698 */ 699 if ((td->td_flags & TDF_NEEDSUSPCHK) == 0) 700 return (0); 701 error = 0; 702 p = td->td_proc; 703 PROC_LOCK(p); 704 if (P_SHOULDSTOP(p) || 705 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) { 706 if (p->p_flag & P_SINGLE_EXIT) 707 error = EINTR; 708 else 709 error = ERESTART; 710 } 711 PROC_UNLOCK(p); 712 return (error); 713 } 714 715 /* 716 * Wake up threads waiting on an userland object. 717 */ 718 719 static int 720 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) 721 { 722 struct umtxq_chain *uc; 723 struct umtxq_queue *uh; 724 struct umtx_q *uq; 725 int ret; 726 727 ret = 0; 728 uc = umtxq_getchain(key); 729 UMTXQ_LOCKED_ASSERT(uc); 730 uh = umtxq_queue_lookup(key, q); 731 if (uh != NULL) { 732 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { 733 umtxq_remove_queue(uq, q); 734 wakeup(uq); 735 if (++ret >= n_wake) 736 return (ret); 737 } 738 } 739 return (ret); 740 } 741 742 743 /* 744 * Wake up specified thread. 745 */ 746 static inline void 747 umtxq_signal_thread(struct umtx_q *uq) 748 { 749 struct umtxq_chain *uc; 750 751 uc = umtxq_getchain(&uq->uq_key); 752 UMTXQ_LOCKED_ASSERT(uc); 753 umtxq_remove(uq); 754 wakeup(uq); 755 } 756 757 static inline int 758 tstohz(const struct timespec *tsp) 759 { 760 struct timeval tv; 761 762 TIMESPEC_TO_TIMEVAL(&tv, tsp); 763 return tvtohz(&tv); 764 } 765 766 static void 767 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, 768 const struct timespec *timeout) 769 { 770 771 timo->clockid = clockid; 772 if (!absolute) { 773 kern_clock_gettime(curthread, clockid, &timo->end); 774 timo->cur = timo->end; 775 timespecadd(&timo->end, timeout); 776 } else { 777 timo->end = *timeout; 778 kern_clock_gettime(curthread, clockid, &timo->cur); 779 } 780 } 781 782 static void 783 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) 784 { 785 786 abs_timeout_init(timo, umtxtime->_clockid, 787 (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); 788 } 789 790 static inline void 791 abs_timeout_update(struct abs_timeout *timo) 792 { 793 794 kern_clock_gettime(curthread, timo->clockid, &timo->cur); 795 } 796 797 static int 798 abs_timeout_gethz(struct abs_timeout *timo) 799 { 800 struct timespec tts; 801 802 if (timespeccmp(&timo->end, &timo->cur, <=)) 803 return (-1); 804 tts = timo->end; 805 timespecsub(&tts, &timo->cur); 806 return (tstohz(&tts)); 807 } 808 809 static uint32_t 810 umtx_unlock_val(uint32_t flags, bool rb) 811 { 812 813 if (rb) 814 return (UMUTEX_RB_OWNERDEAD); 815 else if ((flags & UMUTEX_NONCONSISTENT) != 0) 816 return (UMUTEX_RB_NOTRECOV); 817 else 818 return (UMUTEX_UNOWNED); 819 820 } 821 822 /* 823 * Put thread into sleep state, before sleeping, check if 824 * thread was removed from umtx queue. 825 */ 826 static inline int 827 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) 828 { 829 struct umtxq_chain *uc; 830 int error, timo; 831 832 uc = umtxq_getchain(&uq->uq_key); 833 UMTXQ_LOCKED_ASSERT(uc); 834 for (;;) { 835 if (!(uq->uq_flags & UQF_UMTXQ)) 836 return (0); 837 if (abstime != NULL) { 838 timo = abs_timeout_gethz(abstime); 839 if (timo < 0) 840 return (ETIMEDOUT); 841 } else 842 timo = 0; 843 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); 844 if (error != EWOULDBLOCK) { 845 umtxq_lock(&uq->uq_key); 846 break; 847 } 848 if (abstime != NULL) 849 abs_timeout_update(abstime); 850 umtxq_lock(&uq->uq_key); 851 } 852 return (error); 853 } 854 855 /* 856 * Convert userspace address into unique logical address. 857 */ 858 int 859 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) 860 { 861 struct thread *td = curthread; 862 vm_map_t map; 863 vm_map_entry_t entry; 864 vm_pindex_t pindex; 865 vm_prot_t prot; 866 boolean_t wired; 867 868 key->type = type; 869 if (share == THREAD_SHARE) { 870 key->shared = 0; 871 key->info.private.vs = td->td_proc->p_vmspace; 872 key->info.private.addr = (uintptr_t)addr; 873 } else { 874 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); 875 map = &td->td_proc->p_vmspace->vm_map; 876 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, 877 &entry, &key->info.shared.object, &pindex, &prot, 878 &wired) != KERN_SUCCESS) { 879 return (EFAULT); 880 } 881 882 if ((share == PROCESS_SHARE) || 883 (share == AUTO_SHARE && 884 VM_INHERIT_SHARE == entry->inheritance)) { 885 key->shared = 1; 886 key->info.shared.offset = (vm_offset_t)addr - 887 entry->start + entry->offset; 888 vm_object_reference(key->info.shared.object); 889 } else { 890 key->shared = 0; 891 key->info.private.vs = td->td_proc->p_vmspace; 892 key->info.private.addr = (uintptr_t)addr; 893 } 894 vm_map_lookup_done(map, entry); 895 } 896 897 umtxq_hash(key); 898 return (0); 899 } 900 901 /* 902 * Release key. 903 */ 904 void 905 umtx_key_release(struct umtx_key *key) 906 { 907 if (key->shared) 908 vm_object_deallocate(key->info.shared.object); 909 } 910 911 /* 912 * Fetch and compare value, sleep on the address if value is not changed. 913 */ 914 static int 915 do_wait(struct thread *td, void *addr, u_long id, 916 struct _umtx_time *timeout, int compat32, int is_private) 917 { 918 struct abs_timeout timo; 919 struct umtx_q *uq; 920 u_long tmp; 921 uint32_t tmp32; 922 int error = 0; 923 924 uq = td->td_umtxq; 925 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, 926 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) 927 return (error); 928 929 if (timeout != NULL) 930 abs_timeout_init2(&timo, timeout); 931 932 umtxq_lock(&uq->uq_key); 933 umtxq_insert(uq); 934 umtxq_unlock(&uq->uq_key); 935 if (compat32 == 0) { 936 error = fueword(addr, &tmp); 937 if (error != 0) 938 error = EFAULT; 939 } else { 940 error = fueword32(addr, &tmp32); 941 if (error == 0) 942 tmp = tmp32; 943 else 944 error = EFAULT; 945 } 946 umtxq_lock(&uq->uq_key); 947 if (error == 0) { 948 if (tmp == id) 949 error = umtxq_sleep(uq, "uwait", timeout == NULL ? 950 NULL : &timo); 951 if ((uq->uq_flags & UQF_UMTXQ) == 0) 952 error = 0; 953 else 954 umtxq_remove(uq); 955 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 956 umtxq_remove(uq); 957 } 958 umtxq_unlock(&uq->uq_key); 959 umtx_key_release(&uq->uq_key); 960 if (error == ERESTART) 961 error = EINTR; 962 return (error); 963 } 964 965 /* 966 * Wake up threads sleeping on the specified address. 967 */ 968 int 969 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) 970 { 971 struct umtx_key key; 972 int ret; 973 974 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, 975 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) 976 return (ret); 977 umtxq_lock(&key); 978 umtxq_signal(&key, n_wake); 979 umtxq_unlock(&key); 980 umtx_key_release(&key); 981 return (0); 982 } 983 984 /* 985 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. 986 */ 987 static int 988 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, 989 struct _umtx_time *timeout, int mode) 990 { 991 struct abs_timeout timo; 992 struct umtx_q *uq; 993 uint32_t owner, old, id; 994 int error, rv; 995 996 id = td->td_tid; 997 uq = td->td_umtxq; 998 error = 0; 999 if (timeout != NULL) 1000 abs_timeout_init2(&timo, timeout); 1001 1002 /* 1003 * Care must be exercised when dealing with umtx structure. It 1004 * can fault on any access. 1005 */ 1006 for (;;) { 1007 rv = fueword32(&m->m_owner, &owner); 1008 if (rv == -1) 1009 return (EFAULT); 1010 if (mode == _UMUTEX_WAIT) { 1011 if (owner == UMUTEX_UNOWNED || 1012 owner == UMUTEX_CONTESTED || 1013 owner == UMUTEX_RB_OWNERDEAD || 1014 owner == UMUTEX_RB_NOTRECOV) 1015 return (0); 1016 } else { 1017 /* 1018 * Robust mutex terminated. Kernel duty is to 1019 * return EOWNERDEAD to the userspace. The 1020 * umutex.m_flags UMUTEX_NONCONSISTENT is set 1021 * by the common userspace code. 1022 */ 1023 if (owner == UMUTEX_RB_OWNERDEAD) { 1024 rv = casueword32(&m->m_owner, 1025 UMUTEX_RB_OWNERDEAD, &owner, 1026 id | UMUTEX_CONTESTED); 1027 if (rv == -1) 1028 return (EFAULT); 1029 if (owner == UMUTEX_RB_OWNERDEAD) 1030 return (EOWNERDEAD); /* success */ 1031 rv = umtxq_check_susp(td); 1032 if (rv != 0) 1033 return (rv); 1034 continue; 1035 } 1036 if (owner == UMUTEX_RB_NOTRECOV) 1037 return (ENOTRECOVERABLE); 1038 1039 1040 /* 1041 * Try the uncontested case. This should be 1042 * done in userland. 1043 */ 1044 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, 1045 &owner, id); 1046 /* The address was invalid. */ 1047 if (rv == -1) 1048 return (EFAULT); 1049 1050 /* The acquire succeeded. */ 1051 if (owner == UMUTEX_UNOWNED) 1052 return (0); 1053 1054 /* 1055 * If no one owns it but it is contested try 1056 * to acquire it. 1057 */ 1058 if (owner == UMUTEX_CONTESTED) { 1059 rv = casueword32(&m->m_owner, 1060 UMUTEX_CONTESTED, &owner, 1061 id | UMUTEX_CONTESTED); 1062 /* The address was invalid. */ 1063 if (rv == -1) 1064 return (EFAULT); 1065 1066 if (owner == UMUTEX_CONTESTED) 1067 return (0); 1068 1069 rv = umtxq_check_susp(td); 1070 if (rv != 0) 1071 return (rv); 1072 1073 /* 1074 * If this failed the lock has 1075 * changed, restart. 1076 */ 1077 continue; 1078 } 1079 } 1080 1081 if (mode == _UMUTEX_TRY) 1082 return (EBUSY); 1083 1084 /* 1085 * If we caught a signal, we have retried and now 1086 * exit immediately. 1087 */ 1088 if (error != 0) 1089 return (error); 1090 1091 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, 1092 GET_SHARE(flags), &uq->uq_key)) != 0) 1093 return (error); 1094 1095 umtxq_lock(&uq->uq_key); 1096 umtxq_busy(&uq->uq_key); 1097 umtxq_insert(uq); 1098 umtxq_unlock(&uq->uq_key); 1099 1100 /* 1101 * Set the contested bit so that a release in user space 1102 * knows to use the system call for unlock. If this fails 1103 * either some one else has acquired the lock or it has been 1104 * released. 1105 */ 1106 rv = casueword32(&m->m_owner, owner, &old, 1107 owner | UMUTEX_CONTESTED); 1108 1109 /* The address was invalid. */ 1110 if (rv == -1) { 1111 umtxq_lock(&uq->uq_key); 1112 umtxq_remove(uq); 1113 umtxq_unbusy(&uq->uq_key); 1114 umtxq_unlock(&uq->uq_key); 1115 umtx_key_release(&uq->uq_key); 1116 return (EFAULT); 1117 } 1118 1119 /* 1120 * We set the contested bit, sleep. Otherwise the lock changed 1121 * and we need to retry or we lost a race to the thread 1122 * unlocking the umtx. 1123 */ 1124 umtxq_lock(&uq->uq_key); 1125 umtxq_unbusy(&uq->uq_key); 1126 if (old == owner) 1127 error = umtxq_sleep(uq, "umtxn", timeout == NULL ? 1128 NULL : &timo); 1129 umtxq_remove(uq); 1130 umtxq_unlock(&uq->uq_key); 1131 umtx_key_release(&uq->uq_key); 1132 1133 if (error == 0) 1134 error = umtxq_check_susp(td); 1135 } 1136 1137 return (0); 1138 } 1139 1140 /* 1141 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. 1142 */ 1143 static int 1144 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1145 { 1146 struct umtx_key key; 1147 uint32_t owner, old, id, newlock; 1148 int error, count; 1149 1150 id = td->td_tid; 1151 /* 1152 * Make sure we own this mtx. 1153 */ 1154 error = fueword32(&m->m_owner, &owner); 1155 if (error == -1) 1156 return (EFAULT); 1157 1158 if ((owner & ~UMUTEX_CONTESTED) != id) 1159 return (EPERM); 1160 1161 newlock = umtx_unlock_val(flags, rb); 1162 if ((owner & UMUTEX_CONTESTED) == 0) { 1163 error = casueword32(&m->m_owner, owner, &old, newlock); 1164 if (error == -1) 1165 return (EFAULT); 1166 if (old == owner) 1167 return (0); 1168 owner = old; 1169 } 1170 1171 /* We should only ever be in here for contested locks */ 1172 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1173 &key)) != 0) 1174 return (error); 1175 1176 umtxq_lock(&key); 1177 umtxq_busy(&key); 1178 count = umtxq_count(&key); 1179 umtxq_unlock(&key); 1180 1181 /* 1182 * When unlocking the umtx, it must be marked as unowned if 1183 * there is zero or one thread only waiting for it. 1184 * Otherwise, it must be marked as contested. 1185 */ 1186 if (count > 1) 1187 newlock |= UMUTEX_CONTESTED; 1188 error = casueword32(&m->m_owner, owner, &old, newlock); 1189 umtxq_lock(&key); 1190 umtxq_signal(&key, 1); 1191 umtxq_unbusy(&key); 1192 umtxq_unlock(&key); 1193 umtx_key_release(&key); 1194 if (error == -1) 1195 return (EFAULT); 1196 if (old != owner) 1197 return (EINVAL); 1198 return (0); 1199 } 1200 1201 /* 1202 * Check if the mutex is available and wake up a waiter, 1203 * only for simple mutex. 1204 */ 1205 static int 1206 do_wake_umutex(struct thread *td, struct umutex *m) 1207 { 1208 struct umtx_key key; 1209 uint32_t owner; 1210 uint32_t flags; 1211 int error; 1212 int count; 1213 1214 error = fueword32(&m->m_owner, &owner); 1215 if (error == -1) 1216 return (EFAULT); 1217 1218 if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && 1219 owner != UMUTEX_RB_NOTRECOV) 1220 return (0); 1221 1222 error = fueword32(&m->m_flags, &flags); 1223 if (error == -1) 1224 return (EFAULT); 1225 1226 /* We should only ever be in here for contested locks */ 1227 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), 1228 &key)) != 0) 1229 return (error); 1230 1231 umtxq_lock(&key); 1232 umtxq_busy(&key); 1233 count = umtxq_count(&key); 1234 umtxq_unlock(&key); 1235 1236 if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && 1237 owner != UMUTEX_RB_NOTRECOV) { 1238 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 1239 UMUTEX_UNOWNED); 1240 if (error == -1) 1241 error = EFAULT; 1242 } 1243 1244 umtxq_lock(&key); 1245 if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1246 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1247 umtxq_signal(&key, 1); 1248 umtxq_unbusy(&key); 1249 umtxq_unlock(&key); 1250 umtx_key_release(&key); 1251 return (error); 1252 } 1253 1254 /* 1255 * Check if the mutex has waiters and tries to fix contention bit. 1256 */ 1257 static int 1258 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) 1259 { 1260 struct umtx_key key; 1261 uint32_t owner, old; 1262 int type; 1263 int error; 1264 int count; 1265 1266 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | 1267 UMUTEX_ROBUST)) { 1268 case 0: 1269 case UMUTEX_ROBUST: 1270 type = TYPE_NORMAL_UMUTEX; 1271 break; 1272 case UMUTEX_PRIO_INHERIT: 1273 type = TYPE_PI_UMUTEX; 1274 break; 1275 case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): 1276 type = TYPE_PI_ROBUST_UMUTEX; 1277 break; 1278 case UMUTEX_PRIO_PROTECT: 1279 type = TYPE_PP_UMUTEX; 1280 break; 1281 case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): 1282 type = TYPE_PP_ROBUST_UMUTEX; 1283 break; 1284 default: 1285 return (EINVAL); 1286 } 1287 if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) 1288 return (error); 1289 1290 owner = 0; 1291 umtxq_lock(&key); 1292 umtxq_busy(&key); 1293 count = umtxq_count(&key); 1294 umtxq_unlock(&key); 1295 /* 1296 * Only repair contention bit if there is a waiter, this means the mutex 1297 * is still being referenced by userland code, otherwise don't update 1298 * any memory. 1299 */ 1300 if (count > 1) { 1301 error = fueword32(&m->m_owner, &owner); 1302 if (error == -1) 1303 error = EFAULT; 1304 while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) { 1305 error = casueword32(&m->m_owner, owner, &old, 1306 owner | UMUTEX_CONTESTED); 1307 if (error == -1) { 1308 error = EFAULT; 1309 break; 1310 } 1311 if (old == owner) 1312 break; 1313 owner = old; 1314 error = umtxq_check_susp(td); 1315 if (error != 0) 1316 break; 1317 } 1318 } else if (count == 1) { 1319 error = fueword32(&m->m_owner, &owner); 1320 if (error == -1) 1321 error = EFAULT; 1322 while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 && 1323 (owner & UMUTEX_CONTESTED) == 0) { 1324 error = casueword32(&m->m_owner, owner, &old, 1325 owner | UMUTEX_CONTESTED); 1326 if (error == -1) { 1327 error = EFAULT; 1328 break; 1329 } 1330 if (old == owner) 1331 break; 1332 owner = old; 1333 error = umtxq_check_susp(td); 1334 if (error != 0) 1335 break; 1336 } 1337 } 1338 umtxq_lock(&key); 1339 if (error == EFAULT) { 1340 umtxq_signal(&key, INT_MAX); 1341 } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || 1342 owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) 1343 umtxq_signal(&key, 1); 1344 umtxq_unbusy(&key); 1345 umtxq_unlock(&key); 1346 umtx_key_release(&key); 1347 return (error); 1348 } 1349 1350 static inline struct umtx_pi * 1351 umtx_pi_alloc(int flags) 1352 { 1353 struct umtx_pi *pi; 1354 1355 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); 1356 TAILQ_INIT(&pi->pi_blocked); 1357 atomic_add_int(&umtx_pi_allocated, 1); 1358 return (pi); 1359 } 1360 1361 static inline void 1362 umtx_pi_free(struct umtx_pi *pi) 1363 { 1364 uma_zfree(umtx_pi_zone, pi); 1365 atomic_add_int(&umtx_pi_allocated, -1); 1366 } 1367 1368 /* 1369 * Adjust the thread's position on a pi_state after its priority has been 1370 * changed. 1371 */ 1372 static int 1373 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) 1374 { 1375 struct umtx_q *uq, *uq1, *uq2; 1376 struct thread *td1; 1377 1378 mtx_assert(&umtx_lock, MA_OWNED); 1379 if (pi == NULL) 1380 return (0); 1381 1382 uq = td->td_umtxq; 1383 1384 /* 1385 * Check if the thread needs to be moved on the blocked chain. 1386 * It needs to be moved if either its priority is lower than 1387 * the previous thread or higher than the next thread. 1388 */ 1389 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); 1390 uq2 = TAILQ_NEXT(uq, uq_lockq); 1391 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || 1392 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { 1393 /* 1394 * Remove thread from blocked chain and determine where 1395 * it should be moved to. 1396 */ 1397 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1398 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1399 td1 = uq1->uq_thread; 1400 MPASS(td1->td_proc->p_magic == P_MAGIC); 1401 if (UPRI(td1) > UPRI(td)) 1402 break; 1403 } 1404 1405 if (uq1 == NULL) 1406 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1407 else 1408 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1409 } 1410 return (1); 1411 } 1412 1413 static struct umtx_pi * 1414 umtx_pi_next(struct umtx_pi *pi) 1415 { 1416 struct umtx_q *uq_owner; 1417 1418 if (pi->pi_owner == NULL) 1419 return (NULL); 1420 uq_owner = pi->pi_owner->td_umtxq; 1421 if (uq_owner == NULL) 1422 return (NULL); 1423 return (uq_owner->uq_pi_blocked); 1424 } 1425 1426 /* 1427 * Floyd's Cycle-Finding Algorithm. 1428 */ 1429 static bool 1430 umtx_pi_check_loop(struct umtx_pi *pi) 1431 { 1432 struct umtx_pi *pi1; /* fast iterator */ 1433 1434 mtx_assert(&umtx_lock, MA_OWNED); 1435 if (pi == NULL) 1436 return (false); 1437 pi1 = pi; 1438 for (;;) { 1439 pi = umtx_pi_next(pi); 1440 if (pi == NULL) 1441 break; 1442 pi1 = umtx_pi_next(pi1); 1443 if (pi1 == NULL) 1444 break; 1445 pi1 = umtx_pi_next(pi1); 1446 if (pi1 == NULL) 1447 break; 1448 if (pi == pi1) 1449 return (true); 1450 } 1451 return (false); 1452 } 1453 1454 /* 1455 * Propagate priority when a thread is blocked on POSIX 1456 * PI mutex. 1457 */ 1458 static void 1459 umtx_propagate_priority(struct thread *td) 1460 { 1461 struct umtx_q *uq; 1462 struct umtx_pi *pi; 1463 int pri; 1464 1465 mtx_assert(&umtx_lock, MA_OWNED); 1466 pri = UPRI(td); 1467 uq = td->td_umtxq; 1468 pi = uq->uq_pi_blocked; 1469 if (pi == NULL) 1470 return; 1471 if (umtx_pi_check_loop(pi)) 1472 return; 1473 1474 for (;;) { 1475 td = pi->pi_owner; 1476 if (td == NULL || td == curthread) 1477 return; 1478 1479 MPASS(td->td_proc != NULL); 1480 MPASS(td->td_proc->p_magic == P_MAGIC); 1481 1482 thread_lock(td); 1483 if (td->td_lend_user_pri > pri) 1484 sched_lend_user_prio(td, pri); 1485 else { 1486 thread_unlock(td); 1487 break; 1488 } 1489 thread_unlock(td); 1490 1491 /* 1492 * Pick up the lock that td is blocked on. 1493 */ 1494 uq = td->td_umtxq; 1495 pi = uq->uq_pi_blocked; 1496 if (pi == NULL) 1497 break; 1498 /* Resort td on the list if needed. */ 1499 umtx_pi_adjust_thread(pi, td); 1500 } 1501 } 1502 1503 /* 1504 * Unpropagate priority for a PI mutex when a thread blocked on 1505 * it is interrupted by signal or resumed by others. 1506 */ 1507 static void 1508 umtx_repropagate_priority(struct umtx_pi *pi) 1509 { 1510 struct umtx_q *uq, *uq_owner; 1511 struct umtx_pi *pi2; 1512 int pri; 1513 1514 mtx_assert(&umtx_lock, MA_OWNED); 1515 1516 if (umtx_pi_check_loop(pi)) 1517 return; 1518 while (pi != NULL && pi->pi_owner != NULL) { 1519 pri = PRI_MAX; 1520 uq_owner = pi->pi_owner->td_umtxq; 1521 1522 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { 1523 uq = TAILQ_FIRST(&pi2->pi_blocked); 1524 if (uq != NULL) { 1525 if (pri > UPRI(uq->uq_thread)) 1526 pri = UPRI(uq->uq_thread); 1527 } 1528 } 1529 1530 if (pri > uq_owner->uq_inherited_pri) 1531 pri = uq_owner->uq_inherited_pri; 1532 thread_lock(pi->pi_owner); 1533 sched_lend_user_prio(pi->pi_owner, pri); 1534 thread_unlock(pi->pi_owner); 1535 if ((pi = uq_owner->uq_pi_blocked) != NULL) 1536 umtx_pi_adjust_thread(pi, uq_owner->uq_thread); 1537 } 1538 } 1539 1540 /* 1541 * Insert a PI mutex into owned list. 1542 */ 1543 static void 1544 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) 1545 { 1546 struct umtx_q *uq_owner; 1547 1548 uq_owner = owner->td_umtxq; 1549 mtx_assert(&umtx_lock, MA_OWNED); 1550 if (pi->pi_owner != NULL) 1551 panic("pi_owner != NULL"); 1552 pi->pi_owner = owner; 1553 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); 1554 } 1555 1556 1557 /* 1558 * Disown a PI mutex, and remove it from the owned list. 1559 */ 1560 static void 1561 umtx_pi_disown(struct umtx_pi *pi) 1562 { 1563 1564 mtx_assert(&umtx_lock, MA_OWNED); 1565 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); 1566 pi->pi_owner = NULL; 1567 } 1568 1569 /* 1570 * Claim ownership of a PI mutex. 1571 */ 1572 static int 1573 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) 1574 { 1575 struct umtx_q *uq; 1576 int pri; 1577 1578 mtx_lock(&umtx_lock); 1579 if (pi->pi_owner == owner) { 1580 mtx_unlock(&umtx_lock); 1581 return (0); 1582 } 1583 1584 if (pi->pi_owner != NULL) { 1585 /* 1586 * userland may have already messed the mutex, sigh. 1587 */ 1588 mtx_unlock(&umtx_lock); 1589 return (EPERM); 1590 } 1591 umtx_pi_setowner(pi, owner); 1592 uq = TAILQ_FIRST(&pi->pi_blocked); 1593 if (uq != NULL) { 1594 pri = UPRI(uq->uq_thread); 1595 thread_lock(owner); 1596 if (pri < UPRI(owner)) 1597 sched_lend_user_prio(owner, pri); 1598 thread_unlock(owner); 1599 } 1600 mtx_unlock(&umtx_lock); 1601 return (0); 1602 } 1603 1604 /* 1605 * Adjust a thread's order position in its blocked PI mutex, 1606 * this may result new priority propagating process. 1607 */ 1608 void 1609 umtx_pi_adjust(struct thread *td, u_char oldpri) 1610 { 1611 struct umtx_q *uq; 1612 struct umtx_pi *pi; 1613 1614 uq = td->td_umtxq; 1615 mtx_lock(&umtx_lock); 1616 /* 1617 * Pick up the lock that td is blocked on. 1618 */ 1619 pi = uq->uq_pi_blocked; 1620 if (pi != NULL) { 1621 umtx_pi_adjust_thread(pi, td); 1622 umtx_repropagate_priority(pi); 1623 } 1624 mtx_unlock(&umtx_lock); 1625 } 1626 1627 /* 1628 * Sleep on a PI mutex. 1629 */ 1630 static int 1631 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, 1632 const char *wmesg, struct abs_timeout *timo, bool shared) 1633 { 1634 struct umtxq_chain *uc; 1635 struct thread *td, *td1; 1636 struct umtx_q *uq1; 1637 int error, pri; 1638 1639 error = 0; 1640 td = uq->uq_thread; 1641 KASSERT(td == curthread, ("inconsistent uq_thread")); 1642 uc = umtxq_getchain(&uq->uq_key); 1643 UMTXQ_LOCKED_ASSERT(uc); 1644 KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); 1645 umtxq_insert(uq); 1646 mtx_lock(&umtx_lock); 1647 if (pi->pi_owner == NULL) { 1648 mtx_unlock(&umtx_lock); 1649 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); 1650 mtx_lock(&umtx_lock); 1651 if (td1 != NULL) { 1652 if (pi->pi_owner == NULL) 1653 umtx_pi_setowner(pi, td1); 1654 PROC_UNLOCK(td1->td_proc); 1655 } 1656 } 1657 1658 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { 1659 pri = UPRI(uq1->uq_thread); 1660 if (pri > UPRI(td)) 1661 break; 1662 } 1663 1664 if (uq1 != NULL) 1665 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); 1666 else 1667 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); 1668 1669 uq->uq_pi_blocked = pi; 1670 thread_lock(td); 1671 td->td_flags |= TDF_UPIBLOCKED; 1672 thread_unlock(td); 1673 umtx_propagate_priority(td); 1674 mtx_unlock(&umtx_lock); 1675 umtxq_unbusy(&uq->uq_key); 1676 1677 error = umtxq_sleep(uq, wmesg, timo); 1678 umtxq_remove(uq); 1679 1680 mtx_lock(&umtx_lock); 1681 uq->uq_pi_blocked = NULL; 1682 thread_lock(td); 1683 td->td_flags &= ~TDF_UPIBLOCKED; 1684 thread_unlock(td); 1685 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); 1686 umtx_repropagate_priority(pi); 1687 mtx_unlock(&umtx_lock); 1688 umtxq_unlock(&uq->uq_key); 1689 1690 return (error); 1691 } 1692 1693 /* 1694 * Add reference count for a PI mutex. 1695 */ 1696 static void 1697 umtx_pi_ref(struct umtx_pi *pi) 1698 { 1699 struct umtxq_chain *uc; 1700 1701 uc = umtxq_getchain(&pi->pi_key); 1702 UMTXQ_LOCKED_ASSERT(uc); 1703 pi->pi_refcount++; 1704 } 1705 1706 /* 1707 * Decrease reference count for a PI mutex, if the counter 1708 * is decreased to zero, its memory space is freed. 1709 */ 1710 static void 1711 umtx_pi_unref(struct umtx_pi *pi) 1712 { 1713 struct umtxq_chain *uc; 1714 1715 uc = umtxq_getchain(&pi->pi_key); 1716 UMTXQ_LOCKED_ASSERT(uc); 1717 KASSERT(pi->pi_refcount > 0, ("invalid reference count")); 1718 if (--pi->pi_refcount == 0) { 1719 mtx_lock(&umtx_lock); 1720 if (pi->pi_owner != NULL) 1721 umtx_pi_disown(pi); 1722 KASSERT(TAILQ_EMPTY(&pi->pi_blocked), 1723 ("blocked queue not empty")); 1724 mtx_unlock(&umtx_lock); 1725 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); 1726 umtx_pi_free(pi); 1727 } 1728 } 1729 1730 /* 1731 * Find a PI mutex in hash table. 1732 */ 1733 static struct umtx_pi * 1734 umtx_pi_lookup(struct umtx_key *key) 1735 { 1736 struct umtxq_chain *uc; 1737 struct umtx_pi *pi; 1738 1739 uc = umtxq_getchain(key); 1740 UMTXQ_LOCKED_ASSERT(uc); 1741 1742 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { 1743 if (umtx_key_match(&pi->pi_key, key)) { 1744 return (pi); 1745 } 1746 } 1747 return (NULL); 1748 } 1749 1750 /* 1751 * Insert a PI mutex into hash table. 1752 */ 1753 static inline void 1754 umtx_pi_insert(struct umtx_pi *pi) 1755 { 1756 struct umtxq_chain *uc; 1757 1758 uc = umtxq_getchain(&pi->pi_key); 1759 UMTXQ_LOCKED_ASSERT(uc); 1760 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); 1761 } 1762 1763 /* 1764 * Lock a PI mutex. 1765 */ 1766 static int 1767 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, 1768 struct _umtx_time *timeout, int try) 1769 { 1770 struct abs_timeout timo; 1771 struct umtx_q *uq; 1772 struct umtx_pi *pi, *new_pi; 1773 uint32_t id, old_owner, owner, old; 1774 int error, rv; 1775 1776 id = td->td_tid; 1777 uq = td->td_umtxq; 1778 1779 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1780 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1781 &uq->uq_key)) != 0) 1782 return (error); 1783 1784 if (timeout != NULL) 1785 abs_timeout_init2(&timo, timeout); 1786 1787 umtxq_lock(&uq->uq_key); 1788 pi = umtx_pi_lookup(&uq->uq_key); 1789 if (pi == NULL) { 1790 new_pi = umtx_pi_alloc(M_NOWAIT); 1791 if (new_pi == NULL) { 1792 umtxq_unlock(&uq->uq_key); 1793 new_pi = umtx_pi_alloc(M_WAITOK); 1794 umtxq_lock(&uq->uq_key); 1795 pi = umtx_pi_lookup(&uq->uq_key); 1796 if (pi != NULL) { 1797 umtx_pi_free(new_pi); 1798 new_pi = NULL; 1799 } 1800 } 1801 if (new_pi != NULL) { 1802 new_pi->pi_key = uq->uq_key; 1803 umtx_pi_insert(new_pi); 1804 pi = new_pi; 1805 } 1806 } 1807 umtx_pi_ref(pi); 1808 umtxq_unlock(&uq->uq_key); 1809 1810 /* 1811 * Care must be exercised when dealing with umtx structure. It 1812 * can fault on any access. 1813 */ 1814 for (;;) { 1815 /* 1816 * Try the uncontested case. This should be done in userland. 1817 */ 1818 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); 1819 /* The address was invalid. */ 1820 if (rv == -1) { 1821 error = EFAULT; 1822 break; 1823 } 1824 1825 /* The acquire succeeded. */ 1826 if (owner == UMUTEX_UNOWNED) { 1827 error = 0; 1828 break; 1829 } 1830 1831 if (owner == UMUTEX_RB_NOTRECOV) { 1832 error = ENOTRECOVERABLE; 1833 break; 1834 } 1835 1836 /* If no one owns it but it is contested try to acquire it. */ 1837 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { 1838 old_owner = owner; 1839 rv = casueword32(&m->m_owner, owner, &owner, 1840 id | UMUTEX_CONTESTED); 1841 /* The address was invalid. */ 1842 if (rv == -1) { 1843 error = EFAULT; 1844 break; 1845 } 1846 1847 if (owner == old_owner) { 1848 umtxq_lock(&uq->uq_key); 1849 umtxq_busy(&uq->uq_key); 1850 error = umtx_pi_claim(pi, td); 1851 umtxq_unbusy(&uq->uq_key); 1852 umtxq_unlock(&uq->uq_key); 1853 if (error != 0) { 1854 /* 1855 * Since we're going to return an 1856 * error, restore the m_owner to its 1857 * previous, unowned state to avoid 1858 * compounding the problem. 1859 */ 1860 (void)casuword32(&m->m_owner, 1861 id | UMUTEX_CONTESTED, 1862 old_owner); 1863 } 1864 if (error == 0 && 1865 old_owner == UMUTEX_RB_OWNERDEAD) 1866 error = EOWNERDEAD; 1867 break; 1868 } 1869 1870 error = umtxq_check_susp(td); 1871 if (error != 0) 1872 break; 1873 1874 /* If this failed the lock has changed, restart. */ 1875 continue; 1876 } 1877 1878 if ((owner & ~UMUTEX_CONTESTED) == id) { 1879 error = EDEADLK; 1880 break; 1881 } 1882 1883 if (try != 0) { 1884 error = EBUSY; 1885 break; 1886 } 1887 1888 /* 1889 * If we caught a signal, we have retried and now 1890 * exit immediately. 1891 */ 1892 if (error != 0) 1893 break; 1894 1895 umtxq_lock(&uq->uq_key); 1896 umtxq_busy(&uq->uq_key); 1897 umtxq_unlock(&uq->uq_key); 1898 1899 /* 1900 * Set the contested bit so that a release in user space 1901 * knows to use the system call for unlock. If this fails 1902 * either some one else has acquired the lock or it has been 1903 * released. 1904 */ 1905 rv = casueword32(&m->m_owner, owner, &old, owner | 1906 UMUTEX_CONTESTED); 1907 1908 /* The address was invalid. */ 1909 if (rv == -1) { 1910 umtxq_unbusy_unlocked(&uq->uq_key); 1911 error = EFAULT; 1912 break; 1913 } 1914 1915 umtxq_lock(&uq->uq_key); 1916 /* 1917 * We set the contested bit, sleep. Otherwise the lock changed 1918 * and we need to retry or we lost a race to the thread 1919 * unlocking the umtx. Note that the UMUTEX_RB_OWNERDEAD 1920 * value for owner is impossible there. 1921 */ 1922 if (old == owner) { 1923 error = umtxq_sleep_pi(uq, pi, 1924 owner & ~UMUTEX_CONTESTED, 1925 "umtxpi", timeout == NULL ? NULL : &timo, 1926 (flags & USYNC_PROCESS_SHARED) != 0); 1927 if (error != 0) 1928 continue; 1929 } else { 1930 umtxq_unbusy(&uq->uq_key); 1931 umtxq_unlock(&uq->uq_key); 1932 } 1933 1934 error = umtxq_check_susp(td); 1935 if (error != 0) 1936 break; 1937 } 1938 1939 umtxq_lock(&uq->uq_key); 1940 umtx_pi_unref(pi); 1941 umtxq_unlock(&uq->uq_key); 1942 1943 umtx_key_release(&uq->uq_key); 1944 return (error); 1945 } 1946 1947 /* 1948 * Unlock a PI mutex. 1949 */ 1950 static int 1951 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 1952 { 1953 struct umtx_key key; 1954 struct umtx_q *uq_first, *uq_first2, *uq_me; 1955 struct umtx_pi *pi, *pi2; 1956 uint32_t id, new_owner, old, owner; 1957 int count, error, pri; 1958 1959 id = td->td_tid; 1960 /* 1961 * Make sure we own this mtx. 1962 */ 1963 error = fueword32(&m->m_owner, &owner); 1964 if (error == -1) 1965 return (EFAULT); 1966 1967 if ((owner & ~UMUTEX_CONTESTED) != id) 1968 return (EPERM); 1969 1970 new_owner = umtx_unlock_val(flags, rb); 1971 1972 /* This should be done in userland */ 1973 if ((owner & UMUTEX_CONTESTED) == 0) { 1974 error = casueword32(&m->m_owner, owner, &old, new_owner); 1975 if (error == -1) 1976 return (EFAULT); 1977 if (old == owner) 1978 return (0); 1979 owner = old; 1980 } 1981 1982 /* We should only ever be in here for contested locks */ 1983 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 1984 TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), 1985 &key)) != 0) 1986 return (error); 1987 1988 umtxq_lock(&key); 1989 umtxq_busy(&key); 1990 count = umtxq_count_pi(&key, &uq_first); 1991 if (uq_first != NULL) { 1992 mtx_lock(&umtx_lock); 1993 pi = uq_first->uq_pi_blocked; 1994 KASSERT(pi != NULL, ("pi == NULL?")); 1995 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { 1996 mtx_unlock(&umtx_lock); 1997 umtxq_unbusy(&key); 1998 umtxq_unlock(&key); 1999 umtx_key_release(&key); 2000 /* userland messed the mutex */ 2001 return (EPERM); 2002 } 2003 uq_me = td->td_umtxq; 2004 if (pi->pi_owner == td) 2005 umtx_pi_disown(pi); 2006 /* get highest priority thread which is still sleeping. */ 2007 uq_first = TAILQ_FIRST(&pi->pi_blocked); 2008 while (uq_first != NULL && 2009 (uq_first->uq_flags & UQF_UMTXQ) == 0) { 2010 uq_first = TAILQ_NEXT(uq_first, uq_lockq); 2011 } 2012 pri = PRI_MAX; 2013 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { 2014 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); 2015 if (uq_first2 != NULL) { 2016 if (pri > UPRI(uq_first2->uq_thread)) 2017 pri = UPRI(uq_first2->uq_thread); 2018 } 2019 } 2020 thread_lock(td); 2021 sched_lend_user_prio(td, pri); 2022 thread_unlock(td); 2023 mtx_unlock(&umtx_lock); 2024 if (uq_first) 2025 umtxq_signal_thread(uq_first); 2026 } else { 2027 pi = umtx_pi_lookup(&key); 2028 /* 2029 * A umtx_pi can exist if a signal or timeout removed the 2030 * last waiter from the umtxq, but there is still 2031 * a thread in do_lock_pi() holding the umtx_pi. 2032 */ 2033 if (pi != NULL) { 2034 /* 2035 * The umtx_pi can be unowned, such as when a thread 2036 * has just entered do_lock_pi(), allocated the 2037 * umtx_pi, and unlocked the umtxq. 2038 * If the current thread owns it, it must disown it. 2039 */ 2040 mtx_lock(&umtx_lock); 2041 if (pi->pi_owner == td) 2042 umtx_pi_disown(pi); 2043 mtx_unlock(&umtx_lock); 2044 } 2045 } 2046 umtxq_unlock(&key); 2047 2048 /* 2049 * When unlocking the umtx, it must be marked as unowned if 2050 * there is zero or one thread only waiting for it. 2051 * Otherwise, it must be marked as contested. 2052 */ 2053 2054 if (count > 1) 2055 new_owner |= UMUTEX_CONTESTED; 2056 error = casueword32(&m->m_owner, owner, &old, new_owner); 2057 2058 umtxq_unbusy_unlocked(&key); 2059 umtx_key_release(&key); 2060 if (error == -1) 2061 return (EFAULT); 2062 if (old != owner) 2063 return (EINVAL); 2064 return (0); 2065 } 2066 2067 /* 2068 * Lock a PP mutex. 2069 */ 2070 static int 2071 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, 2072 struct _umtx_time *timeout, int try) 2073 { 2074 struct abs_timeout timo; 2075 struct umtx_q *uq, *uq2; 2076 struct umtx_pi *pi; 2077 uint32_t ceiling; 2078 uint32_t owner, id; 2079 int error, pri, old_inherited_pri, su, rv; 2080 2081 id = td->td_tid; 2082 uq = td->td_umtxq; 2083 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2084 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2085 &uq->uq_key)) != 0) 2086 return (error); 2087 2088 if (timeout != NULL) 2089 abs_timeout_init2(&timo, timeout); 2090 2091 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2092 for (;;) { 2093 old_inherited_pri = uq->uq_inherited_pri; 2094 umtxq_lock(&uq->uq_key); 2095 umtxq_busy(&uq->uq_key); 2096 umtxq_unlock(&uq->uq_key); 2097 2098 rv = fueword32(&m->m_ceilings[0], &ceiling); 2099 if (rv == -1) { 2100 error = EFAULT; 2101 goto out; 2102 } 2103 ceiling = RTP_PRIO_MAX - ceiling; 2104 if (ceiling > RTP_PRIO_MAX) { 2105 error = EINVAL; 2106 goto out; 2107 } 2108 2109 mtx_lock(&umtx_lock); 2110 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { 2111 mtx_unlock(&umtx_lock); 2112 error = EINVAL; 2113 goto out; 2114 } 2115 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { 2116 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; 2117 thread_lock(td); 2118 if (uq->uq_inherited_pri < UPRI(td)) 2119 sched_lend_user_prio(td, uq->uq_inherited_pri); 2120 thread_unlock(td); 2121 } 2122 mtx_unlock(&umtx_lock); 2123 2124 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2125 id | UMUTEX_CONTESTED); 2126 /* The address was invalid. */ 2127 if (rv == -1) { 2128 error = EFAULT; 2129 break; 2130 } 2131 2132 if (owner == UMUTEX_CONTESTED) { 2133 error = 0; 2134 break; 2135 } else if (owner == UMUTEX_RB_OWNERDEAD) { 2136 rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, 2137 &owner, id | UMUTEX_CONTESTED); 2138 if (rv == -1) { 2139 error = EFAULT; 2140 break; 2141 } 2142 if (owner == UMUTEX_RB_OWNERDEAD) { 2143 error = EOWNERDEAD; /* success */ 2144 break; 2145 } 2146 error = 0; 2147 } else if (owner == UMUTEX_RB_NOTRECOV) { 2148 error = ENOTRECOVERABLE; 2149 break; 2150 } 2151 2152 if (try != 0) { 2153 error = EBUSY; 2154 break; 2155 } 2156 2157 /* 2158 * If we caught a signal, we have retried and now 2159 * exit immediately. 2160 */ 2161 if (error != 0) 2162 break; 2163 2164 umtxq_lock(&uq->uq_key); 2165 umtxq_insert(uq); 2166 umtxq_unbusy(&uq->uq_key); 2167 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? 2168 NULL : &timo); 2169 umtxq_remove(uq); 2170 umtxq_unlock(&uq->uq_key); 2171 2172 mtx_lock(&umtx_lock); 2173 uq->uq_inherited_pri = old_inherited_pri; 2174 pri = PRI_MAX; 2175 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2176 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2177 if (uq2 != NULL) { 2178 if (pri > UPRI(uq2->uq_thread)) 2179 pri = UPRI(uq2->uq_thread); 2180 } 2181 } 2182 if (pri > uq->uq_inherited_pri) 2183 pri = uq->uq_inherited_pri; 2184 thread_lock(td); 2185 sched_lend_user_prio(td, pri); 2186 thread_unlock(td); 2187 mtx_unlock(&umtx_lock); 2188 } 2189 2190 if (error != 0 && error != EOWNERDEAD) { 2191 mtx_lock(&umtx_lock); 2192 uq->uq_inherited_pri = old_inherited_pri; 2193 pri = PRI_MAX; 2194 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2195 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2196 if (uq2 != NULL) { 2197 if (pri > UPRI(uq2->uq_thread)) 2198 pri = UPRI(uq2->uq_thread); 2199 } 2200 } 2201 if (pri > uq->uq_inherited_pri) 2202 pri = uq->uq_inherited_pri; 2203 thread_lock(td); 2204 sched_lend_user_prio(td, pri); 2205 thread_unlock(td); 2206 mtx_unlock(&umtx_lock); 2207 } 2208 2209 out: 2210 umtxq_unbusy_unlocked(&uq->uq_key); 2211 umtx_key_release(&uq->uq_key); 2212 return (error); 2213 } 2214 2215 /* 2216 * Unlock a PP mutex. 2217 */ 2218 static int 2219 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) 2220 { 2221 struct umtx_key key; 2222 struct umtx_q *uq, *uq2; 2223 struct umtx_pi *pi; 2224 uint32_t id, owner, rceiling; 2225 int error, pri, new_inherited_pri, su; 2226 2227 id = td->td_tid; 2228 uq = td->td_umtxq; 2229 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); 2230 2231 /* 2232 * Make sure we own this mtx. 2233 */ 2234 error = fueword32(&m->m_owner, &owner); 2235 if (error == -1) 2236 return (EFAULT); 2237 2238 if ((owner & ~UMUTEX_CONTESTED) != id) 2239 return (EPERM); 2240 2241 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); 2242 if (error != 0) 2243 return (error); 2244 2245 if (rceiling == -1) 2246 new_inherited_pri = PRI_MAX; 2247 else { 2248 rceiling = RTP_PRIO_MAX - rceiling; 2249 if (rceiling > RTP_PRIO_MAX) 2250 return (EINVAL); 2251 new_inherited_pri = PRI_MIN_REALTIME + rceiling; 2252 } 2253 2254 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2255 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2256 &key)) != 0) 2257 return (error); 2258 umtxq_lock(&key); 2259 umtxq_busy(&key); 2260 umtxq_unlock(&key); 2261 /* 2262 * For priority protected mutex, always set unlocked state 2263 * to UMUTEX_CONTESTED, so that userland always enters kernel 2264 * to lock the mutex, it is necessary because thread priority 2265 * has to be adjusted for such mutex. 2266 */ 2267 error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | 2268 UMUTEX_CONTESTED); 2269 2270 umtxq_lock(&key); 2271 if (error == 0) 2272 umtxq_signal(&key, 1); 2273 umtxq_unbusy(&key); 2274 umtxq_unlock(&key); 2275 2276 if (error == -1) 2277 error = EFAULT; 2278 else { 2279 mtx_lock(&umtx_lock); 2280 if (su != 0) 2281 uq->uq_inherited_pri = new_inherited_pri; 2282 pri = PRI_MAX; 2283 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { 2284 uq2 = TAILQ_FIRST(&pi->pi_blocked); 2285 if (uq2 != NULL) { 2286 if (pri > UPRI(uq2->uq_thread)) 2287 pri = UPRI(uq2->uq_thread); 2288 } 2289 } 2290 if (pri > uq->uq_inherited_pri) 2291 pri = uq->uq_inherited_pri; 2292 thread_lock(td); 2293 sched_lend_user_prio(td, pri); 2294 thread_unlock(td); 2295 mtx_unlock(&umtx_lock); 2296 } 2297 umtx_key_release(&key); 2298 return (error); 2299 } 2300 2301 static int 2302 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, 2303 uint32_t *old_ceiling) 2304 { 2305 struct umtx_q *uq; 2306 uint32_t flags, id, owner, save_ceiling; 2307 int error, rv, rv1; 2308 2309 error = fueword32(&m->m_flags, &flags); 2310 if (error == -1) 2311 return (EFAULT); 2312 if ((flags & UMUTEX_PRIO_PROTECT) == 0) 2313 return (EINVAL); 2314 if (ceiling > RTP_PRIO_MAX) 2315 return (EINVAL); 2316 id = td->td_tid; 2317 uq = td->td_umtxq; 2318 if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? 2319 TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), 2320 &uq->uq_key)) != 0) 2321 return (error); 2322 for (;;) { 2323 umtxq_lock(&uq->uq_key); 2324 umtxq_busy(&uq->uq_key); 2325 umtxq_unlock(&uq->uq_key); 2326 2327 rv = fueword32(&m->m_ceilings[0], &save_ceiling); 2328 if (rv == -1) { 2329 error = EFAULT; 2330 break; 2331 } 2332 2333 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, 2334 id | UMUTEX_CONTESTED); 2335 if (rv == -1) { 2336 error = EFAULT; 2337 break; 2338 } 2339 2340 if (owner == UMUTEX_CONTESTED) { 2341 rv = suword32(&m->m_ceilings[0], ceiling); 2342 rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); 2343 error = (rv == 0 && rv1 == 0) ? 0: EFAULT; 2344 break; 2345 } 2346 2347 if ((owner & ~UMUTEX_CONTESTED) == id) { 2348 rv = suword32(&m->m_ceilings[0], ceiling); 2349 error = rv == 0 ? 0 : EFAULT; 2350 break; 2351 } 2352 2353 if (owner == UMUTEX_RB_OWNERDEAD) { 2354 error = EOWNERDEAD; 2355 break; 2356 } else if (owner == UMUTEX_RB_NOTRECOV) { 2357 error = ENOTRECOVERABLE; 2358 break; 2359 } 2360 2361 /* 2362 * If we caught a signal, we have retried and now 2363 * exit immediately. 2364 */ 2365 if (error != 0) 2366 break; 2367 2368 /* 2369 * We set the contested bit, sleep. Otherwise the lock changed 2370 * and we need to retry or we lost a race to the thread 2371 * unlocking the umtx. 2372 */ 2373 umtxq_lock(&uq->uq_key); 2374 umtxq_insert(uq); 2375 umtxq_unbusy(&uq->uq_key); 2376 error = umtxq_sleep(uq, "umtxpp", NULL); 2377 umtxq_remove(uq); 2378 umtxq_unlock(&uq->uq_key); 2379 } 2380 umtxq_lock(&uq->uq_key); 2381 if (error == 0) 2382 umtxq_signal(&uq->uq_key, INT_MAX); 2383 umtxq_unbusy(&uq->uq_key); 2384 umtxq_unlock(&uq->uq_key); 2385 umtx_key_release(&uq->uq_key); 2386 if (error == 0 && old_ceiling != NULL) { 2387 rv = suword32(old_ceiling, save_ceiling); 2388 error = rv == 0 ? 0 : EFAULT; 2389 } 2390 return (error); 2391 } 2392 2393 /* 2394 * Lock a userland POSIX mutex. 2395 */ 2396 static int 2397 do_lock_umutex(struct thread *td, struct umutex *m, 2398 struct _umtx_time *timeout, int mode) 2399 { 2400 uint32_t flags; 2401 int error; 2402 2403 error = fueword32(&m->m_flags, &flags); 2404 if (error == -1) 2405 return (EFAULT); 2406 2407 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2408 case 0: 2409 error = do_lock_normal(td, m, flags, timeout, mode); 2410 break; 2411 case UMUTEX_PRIO_INHERIT: 2412 error = do_lock_pi(td, m, flags, timeout, mode); 2413 break; 2414 case UMUTEX_PRIO_PROTECT: 2415 error = do_lock_pp(td, m, flags, timeout, mode); 2416 break; 2417 default: 2418 return (EINVAL); 2419 } 2420 if (timeout == NULL) { 2421 if (error == EINTR && mode != _UMUTEX_WAIT) 2422 error = ERESTART; 2423 } else { 2424 /* Timed-locking is not restarted. */ 2425 if (error == ERESTART) 2426 error = EINTR; 2427 } 2428 return (error); 2429 } 2430 2431 /* 2432 * Unlock a userland POSIX mutex. 2433 */ 2434 static int 2435 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) 2436 { 2437 uint32_t flags; 2438 int error; 2439 2440 error = fueword32(&m->m_flags, &flags); 2441 if (error == -1) 2442 return (EFAULT); 2443 2444 switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { 2445 case 0: 2446 return (do_unlock_normal(td, m, flags, rb)); 2447 case UMUTEX_PRIO_INHERIT: 2448 return (do_unlock_pi(td, m, flags, rb)); 2449 case UMUTEX_PRIO_PROTECT: 2450 return (do_unlock_pp(td, m, flags, rb)); 2451 } 2452 2453 return (EINVAL); 2454 } 2455 2456 static int 2457 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, 2458 struct timespec *timeout, u_long wflags) 2459 { 2460 struct abs_timeout timo; 2461 struct umtx_q *uq; 2462 uint32_t flags, clockid, hasw; 2463 int error; 2464 2465 uq = td->td_umtxq; 2466 error = fueword32(&cv->c_flags, &flags); 2467 if (error == -1) 2468 return (EFAULT); 2469 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); 2470 if (error != 0) 2471 return (error); 2472 2473 if ((wflags & CVWAIT_CLOCKID) != 0) { 2474 error = fueword32(&cv->c_clockid, &clockid); 2475 if (error == -1) { 2476 umtx_key_release(&uq->uq_key); 2477 return (EFAULT); 2478 } 2479 if (clockid < CLOCK_REALTIME || 2480 clockid >= CLOCK_THREAD_CPUTIME_ID) { 2481 /* hmm, only HW clock id will work. */ 2482 umtx_key_release(&uq->uq_key); 2483 return (EINVAL); 2484 } 2485 } else { 2486 clockid = CLOCK_REALTIME; 2487 } 2488 2489 umtxq_lock(&uq->uq_key); 2490 umtxq_busy(&uq->uq_key); 2491 umtxq_insert(uq); 2492 umtxq_unlock(&uq->uq_key); 2493 2494 /* 2495 * Set c_has_waiters to 1 before releasing user mutex, also 2496 * don't modify cache line when unnecessary. 2497 */ 2498 error = fueword32(&cv->c_has_waiters, &hasw); 2499 if (error == 0 && hasw == 0) 2500 suword32(&cv->c_has_waiters, 1); 2501 2502 umtxq_unbusy_unlocked(&uq->uq_key); 2503 2504 error = do_unlock_umutex(td, m, false); 2505 2506 if (timeout != NULL) 2507 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, 2508 timeout); 2509 2510 umtxq_lock(&uq->uq_key); 2511 if (error == 0) { 2512 error = umtxq_sleep(uq, "ucond", timeout == NULL ? 2513 NULL : &timo); 2514 } 2515 2516 if ((uq->uq_flags & UQF_UMTXQ) == 0) 2517 error = 0; 2518 else { 2519 /* 2520 * This must be timeout,interrupted by signal or 2521 * surprious wakeup, clear c_has_waiter flag when 2522 * necessary. 2523 */ 2524 umtxq_busy(&uq->uq_key); 2525 if ((uq->uq_flags & UQF_UMTXQ) != 0) { 2526 int oldlen = uq->uq_cur_queue->length; 2527 umtxq_remove(uq); 2528 if (oldlen == 1) { 2529 umtxq_unlock(&uq->uq_key); 2530 suword32(&cv->c_has_waiters, 0); 2531 umtxq_lock(&uq->uq_key); 2532 } 2533 } 2534 umtxq_unbusy(&uq->uq_key); 2535 if (error == ERESTART) 2536 error = EINTR; 2537 } 2538 2539 umtxq_unlock(&uq->uq_key); 2540 umtx_key_release(&uq->uq_key); 2541 return (error); 2542 } 2543 2544 /* 2545 * Signal a userland condition variable. 2546 */ 2547 static int 2548 do_cv_signal(struct thread *td, struct ucond *cv) 2549 { 2550 struct umtx_key key; 2551 int error, cnt, nwake; 2552 uint32_t flags; 2553 2554 error = fueword32(&cv->c_flags, &flags); 2555 if (error == -1) 2556 return (EFAULT); 2557 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2558 return (error); 2559 umtxq_lock(&key); 2560 umtxq_busy(&key); 2561 cnt = umtxq_count(&key); 2562 nwake = umtxq_signal(&key, 1); 2563 if (cnt <= nwake) { 2564 umtxq_unlock(&key); 2565 error = suword32(&cv->c_has_waiters, 0); 2566 if (error == -1) 2567 error = EFAULT; 2568 umtxq_lock(&key); 2569 } 2570 umtxq_unbusy(&key); 2571 umtxq_unlock(&key); 2572 umtx_key_release(&key); 2573 return (error); 2574 } 2575 2576 static int 2577 do_cv_broadcast(struct thread *td, struct ucond *cv) 2578 { 2579 struct umtx_key key; 2580 int error; 2581 uint32_t flags; 2582 2583 error = fueword32(&cv->c_flags, &flags); 2584 if (error == -1) 2585 return (EFAULT); 2586 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) 2587 return (error); 2588 2589 umtxq_lock(&key); 2590 umtxq_busy(&key); 2591 umtxq_signal(&key, INT_MAX); 2592 umtxq_unlock(&key); 2593 2594 error = suword32(&cv->c_has_waiters, 0); 2595 if (error == -1) 2596 error = EFAULT; 2597 2598 umtxq_unbusy_unlocked(&key); 2599 2600 umtx_key_release(&key); 2601 return (error); 2602 } 2603 2604 static int 2605 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) 2606 { 2607 struct abs_timeout timo; 2608 struct umtx_q *uq; 2609 uint32_t flags, wrflags; 2610 int32_t state, oldstate; 2611 int32_t blocked_readers; 2612 int error, error1, rv; 2613 2614 uq = td->td_umtxq; 2615 error = fueword32(&rwlock->rw_flags, &flags); 2616 if (error == -1) 2617 return (EFAULT); 2618 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2619 if (error != 0) 2620 return (error); 2621 2622 if (timeout != NULL) 2623 abs_timeout_init2(&timo, timeout); 2624 2625 wrflags = URWLOCK_WRITE_OWNER; 2626 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) 2627 wrflags |= URWLOCK_WRITE_WAITERS; 2628 2629 for (;;) { 2630 rv = fueword32(&rwlock->rw_state, &state); 2631 if (rv == -1) { 2632 umtx_key_release(&uq->uq_key); 2633 return (EFAULT); 2634 } 2635 2636 /* try to lock it */ 2637 while (!(state & wrflags)) { 2638 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { 2639 umtx_key_release(&uq->uq_key); 2640 return (EAGAIN); 2641 } 2642 rv = casueword32(&rwlock->rw_state, state, 2643 &oldstate, state + 1); 2644 if (rv == -1) { 2645 umtx_key_release(&uq->uq_key); 2646 return (EFAULT); 2647 } 2648 if (oldstate == state) { 2649 umtx_key_release(&uq->uq_key); 2650 return (0); 2651 } 2652 error = umtxq_check_susp(td); 2653 if (error != 0) 2654 break; 2655 state = oldstate; 2656 } 2657 2658 if (error) 2659 break; 2660 2661 /* grab monitor lock */ 2662 umtxq_lock(&uq->uq_key); 2663 umtxq_busy(&uq->uq_key); 2664 umtxq_unlock(&uq->uq_key); 2665 2666 /* 2667 * re-read the state, in case it changed between the try-lock above 2668 * and the check below 2669 */ 2670 rv = fueword32(&rwlock->rw_state, &state); 2671 if (rv == -1) 2672 error = EFAULT; 2673 2674 /* set read contention bit */ 2675 while (error == 0 && (state & wrflags) && 2676 !(state & URWLOCK_READ_WAITERS)) { 2677 rv = casueword32(&rwlock->rw_state, state, 2678 &oldstate, state | URWLOCK_READ_WAITERS); 2679 if (rv == -1) { 2680 error = EFAULT; 2681 break; 2682 } 2683 if (oldstate == state) 2684 goto sleep; 2685 state = oldstate; 2686 error = umtxq_check_susp(td); 2687 if (error != 0) 2688 break; 2689 } 2690 if (error != 0) { 2691 umtxq_unbusy_unlocked(&uq->uq_key); 2692 break; 2693 } 2694 2695 /* state is changed while setting flags, restart */ 2696 if (!(state & wrflags)) { 2697 umtxq_unbusy_unlocked(&uq->uq_key); 2698 error = umtxq_check_susp(td); 2699 if (error != 0) 2700 break; 2701 continue; 2702 } 2703 2704 sleep: 2705 /* contention bit is set, before sleeping, increase read waiter count */ 2706 rv = fueword32(&rwlock->rw_blocked_readers, 2707 &blocked_readers); 2708 if (rv == -1) { 2709 umtxq_unbusy_unlocked(&uq->uq_key); 2710 error = EFAULT; 2711 break; 2712 } 2713 suword32(&rwlock->rw_blocked_readers, blocked_readers+1); 2714 2715 while (state & wrflags) { 2716 umtxq_lock(&uq->uq_key); 2717 umtxq_insert(uq); 2718 umtxq_unbusy(&uq->uq_key); 2719 2720 error = umtxq_sleep(uq, "urdlck", timeout == NULL ? 2721 NULL : &timo); 2722 2723 umtxq_busy(&uq->uq_key); 2724 umtxq_remove(uq); 2725 umtxq_unlock(&uq->uq_key); 2726 if (error) 2727 break; 2728 rv = fueword32(&rwlock->rw_state, &state); 2729 if (rv == -1) { 2730 error = EFAULT; 2731 break; 2732 } 2733 } 2734 2735 /* decrease read waiter count, and may clear read contention bit */ 2736 rv = fueword32(&rwlock->rw_blocked_readers, 2737 &blocked_readers); 2738 if (rv == -1) { 2739 umtxq_unbusy_unlocked(&uq->uq_key); 2740 error = EFAULT; 2741 break; 2742 } 2743 suword32(&rwlock->rw_blocked_readers, blocked_readers-1); 2744 if (blocked_readers == 1) { 2745 rv = fueword32(&rwlock->rw_state, &state); 2746 if (rv == -1) { 2747 umtxq_unbusy_unlocked(&uq->uq_key); 2748 error = EFAULT; 2749 break; 2750 } 2751 for (;;) { 2752 rv = casueword32(&rwlock->rw_state, state, 2753 &oldstate, state & ~URWLOCK_READ_WAITERS); 2754 if (rv == -1) { 2755 error = EFAULT; 2756 break; 2757 } 2758 if (oldstate == state) 2759 break; 2760 state = oldstate; 2761 error1 = umtxq_check_susp(td); 2762 if (error1 != 0) { 2763 if (error == 0) 2764 error = error1; 2765 break; 2766 } 2767 } 2768 } 2769 2770 umtxq_unbusy_unlocked(&uq->uq_key); 2771 if (error != 0) 2772 break; 2773 } 2774 umtx_key_release(&uq->uq_key); 2775 if (error == ERESTART) 2776 error = EINTR; 2777 return (error); 2778 } 2779 2780 static int 2781 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) 2782 { 2783 struct abs_timeout timo; 2784 struct umtx_q *uq; 2785 uint32_t flags; 2786 int32_t state, oldstate; 2787 int32_t blocked_writers; 2788 int32_t blocked_readers; 2789 int error, error1, rv; 2790 2791 uq = td->td_umtxq; 2792 error = fueword32(&rwlock->rw_flags, &flags); 2793 if (error == -1) 2794 return (EFAULT); 2795 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2796 if (error != 0) 2797 return (error); 2798 2799 if (timeout != NULL) 2800 abs_timeout_init2(&timo, timeout); 2801 2802 blocked_readers = 0; 2803 for (;;) { 2804 rv = fueword32(&rwlock->rw_state, &state); 2805 if (rv == -1) { 2806 umtx_key_release(&uq->uq_key); 2807 return (EFAULT); 2808 } 2809 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2810 rv = casueword32(&rwlock->rw_state, state, 2811 &oldstate, state | URWLOCK_WRITE_OWNER); 2812 if (rv == -1) { 2813 umtx_key_release(&uq->uq_key); 2814 return (EFAULT); 2815 } 2816 if (oldstate == state) { 2817 umtx_key_release(&uq->uq_key); 2818 return (0); 2819 } 2820 state = oldstate; 2821 error = umtxq_check_susp(td); 2822 if (error != 0) 2823 break; 2824 } 2825 2826 if (error) { 2827 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && 2828 blocked_readers != 0) { 2829 umtxq_lock(&uq->uq_key); 2830 umtxq_busy(&uq->uq_key); 2831 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); 2832 umtxq_unbusy(&uq->uq_key); 2833 umtxq_unlock(&uq->uq_key); 2834 } 2835 2836 break; 2837 } 2838 2839 /* grab monitor lock */ 2840 umtxq_lock(&uq->uq_key); 2841 umtxq_busy(&uq->uq_key); 2842 umtxq_unlock(&uq->uq_key); 2843 2844 /* 2845 * re-read the state, in case it changed between the try-lock above 2846 * and the check below 2847 */ 2848 rv = fueword32(&rwlock->rw_state, &state); 2849 if (rv == -1) 2850 error = EFAULT; 2851 2852 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || 2853 URWLOCK_READER_COUNT(state) != 0) && 2854 (state & URWLOCK_WRITE_WAITERS) == 0) { 2855 rv = casueword32(&rwlock->rw_state, state, 2856 &oldstate, state | URWLOCK_WRITE_WAITERS); 2857 if (rv == -1) { 2858 error = EFAULT; 2859 break; 2860 } 2861 if (oldstate == state) 2862 goto sleep; 2863 state = oldstate; 2864 error = umtxq_check_susp(td); 2865 if (error != 0) 2866 break; 2867 } 2868 if (error != 0) { 2869 umtxq_unbusy_unlocked(&uq->uq_key); 2870 break; 2871 } 2872 2873 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { 2874 umtxq_unbusy_unlocked(&uq->uq_key); 2875 error = umtxq_check_susp(td); 2876 if (error != 0) 2877 break; 2878 continue; 2879 } 2880 sleep: 2881 rv = fueword32(&rwlock->rw_blocked_writers, 2882 &blocked_writers); 2883 if (rv == -1) { 2884 umtxq_unbusy_unlocked(&uq->uq_key); 2885 error = EFAULT; 2886 break; 2887 } 2888 suword32(&rwlock->rw_blocked_writers, blocked_writers+1); 2889 2890 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { 2891 umtxq_lock(&uq->uq_key); 2892 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2893 umtxq_unbusy(&uq->uq_key); 2894 2895 error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? 2896 NULL : &timo); 2897 2898 umtxq_busy(&uq->uq_key); 2899 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); 2900 umtxq_unlock(&uq->uq_key); 2901 if (error) 2902 break; 2903 rv = fueword32(&rwlock->rw_state, &state); 2904 if (rv == -1) { 2905 error = EFAULT; 2906 break; 2907 } 2908 } 2909 2910 rv = fueword32(&rwlock->rw_blocked_writers, 2911 &blocked_writers); 2912 if (rv == -1) { 2913 umtxq_unbusy_unlocked(&uq->uq_key); 2914 error = EFAULT; 2915 break; 2916 } 2917 suword32(&rwlock->rw_blocked_writers, blocked_writers-1); 2918 if (blocked_writers == 1) { 2919 rv = fueword32(&rwlock->rw_state, &state); 2920 if (rv == -1) { 2921 umtxq_unbusy_unlocked(&uq->uq_key); 2922 error = EFAULT; 2923 break; 2924 } 2925 for (;;) { 2926 rv = casueword32(&rwlock->rw_state, state, 2927 &oldstate, state & ~URWLOCK_WRITE_WAITERS); 2928 if (rv == -1) { 2929 error = EFAULT; 2930 break; 2931 } 2932 if (oldstate == state) 2933 break; 2934 state = oldstate; 2935 error1 = umtxq_check_susp(td); 2936 /* 2937 * We are leaving the URWLOCK_WRITE_WAITERS 2938 * behind, but this should not harm the 2939 * correctness. 2940 */ 2941 if (error1 != 0) { 2942 if (error == 0) 2943 error = error1; 2944 break; 2945 } 2946 } 2947 rv = fueword32(&rwlock->rw_blocked_readers, 2948 &blocked_readers); 2949 if (rv == -1) { 2950 umtxq_unbusy_unlocked(&uq->uq_key); 2951 error = EFAULT; 2952 break; 2953 } 2954 } else 2955 blocked_readers = 0; 2956 2957 umtxq_unbusy_unlocked(&uq->uq_key); 2958 } 2959 2960 umtx_key_release(&uq->uq_key); 2961 if (error == ERESTART) 2962 error = EINTR; 2963 return (error); 2964 } 2965 2966 static int 2967 do_rw_unlock(struct thread *td, struct urwlock *rwlock) 2968 { 2969 struct umtx_q *uq; 2970 uint32_t flags; 2971 int32_t state, oldstate; 2972 int error, rv, q, count; 2973 2974 uq = td->td_umtxq; 2975 error = fueword32(&rwlock->rw_flags, &flags); 2976 if (error == -1) 2977 return (EFAULT); 2978 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); 2979 if (error != 0) 2980 return (error); 2981 2982 error = fueword32(&rwlock->rw_state, &state); 2983 if (error == -1) { 2984 error = EFAULT; 2985 goto out; 2986 } 2987 if (state & URWLOCK_WRITE_OWNER) { 2988 for (;;) { 2989 rv = casueword32(&rwlock->rw_state, state, 2990 &oldstate, state & ~URWLOCK_WRITE_OWNER); 2991 if (rv == -1) { 2992 error = EFAULT; 2993 goto out; 2994 } 2995 if (oldstate != state) { 2996 state = oldstate; 2997 if (!(oldstate & URWLOCK_WRITE_OWNER)) { 2998 error = EPERM; 2999 goto out; 3000 } 3001 error = umtxq_check_susp(td); 3002 if (error != 0) 3003 goto out; 3004 } else 3005 break; 3006 } 3007 } else if (URWLOCK_READER_COUNT(state) != 0) { 3008 for (;;) { 3009 rv = casueword32(&rwlock->rw_state, state, 3010 &oldstate, state - 1); 3011 if (rv == -1) { 3012 error = EFAULT; 3013 goto out; 3014 } 3015 if (oldstate != state) { 3016 state = oldstate; 3017 if (URWLOCK_READER_COUNT(oldstate) == 0) { 3018 error = EPERM; 3019 goto out; 3020 } 3021 error = umtxq_check_susp(td); 3022 if (error != 0) 3023 goto out; 3024 } else 3025 break; 3026 } 3027 } else { 3028 error = EPERM; 3029 goto out; 3030 } 3031 3032 count = 0; 3033 3034 if (!(flags & URWLOCK_PREFER_READER)) { 3035 if (state & URWLOCK_WRITE_WAITERS) { 3036 count = 1; 3037 q = UMTX_EXCLUSIVE_QUEUE; 3038 } else if (state & URWLOCK_READ_WAITERS) { 3039 count = INT_MAX; 3040 q = UMTX_SHARED_QUEUE; 3041 } 3042 } else { 3043 if (state & URWLOCK_READ_WAITERS) { 3044 count = INT_MAX; 3045 q = UMTX_SHARED_QUEUE; 3046 } else if (state & URWLOCK_WRITE_WAITERS) { 3047 count = 1; 3048 q = UMTX_EXCLUSIVE_QUEUE; 3049 } 3050 } 3051 3052 if (count) { 3053 umtxq_lock(&uq->uq_key); 3054 umtxq_busy(&uq->uq_key); 3055 umtxq_signal_queue(&uq->uq_key, count, q); 3056 umtxq_unbusy(&uq->uq_key); 3057 umtxq_unlock(&uq->uq_key); 3058 } 3059 out: 3060 umtx_key_release(&uq->uq_key); 3061 return (error); 3062 } 3063 3064 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3065 static int 3066 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) 3067 { 3068 struct abs_timeout timo; 3069 struct umtx_q *uq; 3070 uint32_t flags, count, count1; 3071 int error, rv; 3072 3073 uq = td->td_umtxq; 3074 error = fueword32(&sem->_flags, &flags); 3075 if (error == -1) 3076 return (EFAULT); 3077 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3078 if (error != 0) 3079 return (error); 3080 3081 if (timeout != NULL) 3082 abs_timeout_init2(&timo, timeout); 3083 3084 umtxq_lock(&uq->uq_key); 3085 umtxq_busy(&uq->uq_key); 3086 umtxq_insert(uq); 3087 umtxq_unlock(&uq->uq_key); 3088 rv = casueword32(&sem->_has_waiters, 0, &count1, 1); 3089 if (rv == 0) 3090 rv = fueword32(&sem->_count, &count); 3091 if (rv == -1 || count != 0) { 3092 umtxq_lock(&uq->uq_key); 3093 umtxq_unbusy(&uq->uq_key); 3094 umtxq_remove(uq); 3095 umtxq_unlock(&uq->uq_key); 3096 umtx_key_release(&uq->uq_key); 3097 return (rv == -1 ? EFAULT : 0); 3098 } 3099 umtxq_lock(&uq->uq_key); 3100 umtxq_unbusy(&uq->uq_key); 3101 3102 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3103 3104 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3105 error = 0; 3106 else { 3107 umtxq_remove(uq); 3108 /* A relative timeout cannot be restarted. */ 3109 if (error == ERESTART && timeout != NULL && 3110 (timeout->_flags & UMTX_ABSTIME) == 0) 3111 error = EINTR; 3112 } 3113 umtxq_unlock(&uq->uq_key); 3114 umtx_key_release(&uq->uq_key); 3115 return (error); 3116 } 3117 3118 /* 3119 * Signal a userland semaphore. 3120 */ 3121 static int 3122 do_sem_wake(struct thread *td, struct _usem *sem) 3123 { 3124 struct umtx_key key; 3125 int error, cnt; 3126 uint32_t flags; 3127 3128 error = fueword32(&sem->_flags, &flags); 3129 if (error == -1) 3130 return (EFAULT); 3131 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3132 return (error); 3133 umtxq_lock(&key); 3134 umtxq_busy(&key); 3135 cnt = umtxq_count(&key); 3136 if (cnt > 0) { 3137 /* 3138 * Check if count is greater than 0, this means the memory is 3139 * still being referenced by user code, so we can safely 3140 * update _has_waiters flag. 3141 */ 3142 if (cnt == 1) { 3143 umtxq_unlock(&key); 3144 error = suword32(&sem->_has_waiters, 0); 3145 umtxq_lock(&key); 3146 if (error == -1) 3147 error = EFAULT; 3148 } 3149 umtxq_signal(&key, 1); 3150 } 3151 umtxq_unbusy(&key); 3152 umtxq_unlock(&key); 3153 umtx_key_release(&key); 3154 return (error); 3155 } 3156 #endif 3157 3158 static int 3159 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) 3160 { 3161 struct abs_timeout timo; 3162 struct umtx_q *uq; 3163 uint32_t count, flags; 3164 int error, rv; 3165 3166 uq = td->td_umtxq; 3167 flags = fuword32(&sem->_flags); 3168 error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); 3169 if (error != 0) 3170 return (error); 3171 3172 if (timeout != NULL) 3173 abs_timeout_init2(&timo, timeout); 3174 3175 umtxq_lock(&uq->uq_key); 3176 umtxq_busy(&uq->uq_key); 3177 umtxq_insert(uq); 3178 umtxq_unlock(&uq->uq_key); 3179 rv = fueword32(&sem->_count, &count); 3180 if (rv == -1) { 3181 umtxq_lock(&uq->uq_key); 3182 umtxq_unbusy(&uq->uq_key); 3183 umtxq_remove(uq); 3184 umtxq_unlock(&uq->uq_key); 3185 umtx_key_release(&uq->uq_key); 3186 return (EFAULT); 3187 } 3188 for (;;) { 3189 if (USEM_COUNT(count) != 0) { 3190 umtxq_lock(&uq->uq_key); 3191 umtxq_unbusy(&uq->uq_key); 3192 umtxq_remove(uq); 3193 umtxq_unlock(&uq->uq_key); 3194 umtx_key_release(&uq->uq_key); 3195 return (0); 3196 } 3197 if (count == USEM_HAS_WAITERS) 3198 break; 3199 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); 3200 if (rv == -1) { 3201 umtxq_lock(&uq->uq_key); 3202 umtxq_unbusy(&uq->uq_key); 3203 umtxq_remove(uq); 3204 umtxq_unlock(&uq->uq_key); 3205 umtx_key_release(&uq->uq_key); 3206 return (EFAULT); 3207 } 3208 if (count == 0) 3209 break; 3210 } 3211 umtxq_lock(&uq->uq_key); 3212 umtxq_unbusy(&uq->uq_key); 3213 3214 error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); 3215 3216 if ((uq->uq_flags & UQF_UMTXQ) == 0) 3217 error = 0; 3218 else { 3219 umtxq_remove(uq); 3220 /* A relative timeout cannot be restarted. */ 3221 if (error == ERESTART && timeout != NULL && 3222 (timeout->_flags & UMTX_ABSTIME) == 0) 3223 error = EINTR; 3224 } 3225 umtxq_unlock(&uq->uq_key); 3226 umtx_key_release(&uq->uq_key); 3227 return (error); 3228 } 3229 3230 /* 3231 * Signal a userland semaphore. 3232 */ 3233 static int 3234 do_sem2_wake(struct thread *td, struct _usem2 *sem) 3235 { 3236 struct umtx_key key; 3237 int error, cnt, rv; 3238 uint32_t count, flags; 3239 3240 rv = fueword32(&sem->_flags, &flags); 3241 if (rv == -1) 3242 return (EFAULT); 3243 if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) 3244 return (error); 3245 umtxq_lock(&key); 3246 umtxq_busy(&key); 3247 cnt = umtxq_count(&key); 3248 if (cnt > 0) { 3249 /* 3250 * If this was the last sleeping thread, clear the waiters 3251 * flag in _count. 3252 */ 3253 if (cnt == 1) { 3254 umtxq_unlock(&key); 3255 rv = fueword32(&sem->_count, &count); 3256 while (rv != -1 && count & USEM_HAS_WAITERS) 3257 rv = casueword32(&sem->_count, count, &count, 3258 count & ~USEM_HAS_WAITERS); 3259 if (rv == -1) 3260 error = EFAULT; 3261 umtxq_lock(&key); 3262 } 3263 3264 umtxq_signal(&key, 1); 3265 } 3266 umtxq_unbusy(&key); 3267 umtxq_unlock(&key); 3268 umtx_key_release(&key); 3269 return (error); 3270 } 3271 3272 inline int 3273 umtx_copyin_timeout(const void *addr, struct timespec *tsp) 3274 { 3275 int error; 3276 3277 error = copyin(addr, tsp, sizeof(struct timespec)); 3278 if (error == 0) { 3279 if (tsp->tv_sec < 0 || 3280 tsp->tv_nsec >= 1000000000 || 3281 tsp->tv_nsec < 0) 3282 error = EINVAL; 3283 } 3284 return (error); 3285 } 3286 3287 static inline int 3288 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) 3289 { 3290 int error; 3291 3292 if (size <= sizeof(struct timespec)) { 3293 tp->_clockid = CLOCK_REALTIME; 3294 tp->_flags = 0; 3295 error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); 3296 } else 3297 error = copyin(addr, tp, sizeof(struct _umtx_time)); 3298 if (error != 0) 3299 return (error); 3300 if (tp->_timeout.tv_sec < 0 || 3301 tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) 3302 return (EINVAL); 3303 return (0); 3304 } 3305 3306 static int 3307 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) 3308 { 3309 3310 return (EOPNOTSUPP); 3311 } 3312 3313 static int 3314 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) 3315 { 3316 struct _umtx_time timeout, *tm_p; 3317 int error; 3318 3319 if (uap->uaddr2 == NULL) 3320 tm_p = NULL; 3321 else { 3322 error = umtx_copyin_umtx_time( 3323 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3324 if (error != 0) 3325 return (error); 3326 tm_p = &timeout; 3327 } 3328 return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); 3329 } 3330 3331 static int 3332 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) 3333 { 3334 struct _umtx_time timeout, *tm_p; 3335 int error; 3336 3337 if (uap->uaddr2 == NULL) 3338 tm_p = NULL; 3339 else { 3340 error = umtx_copyin_umtx_time( 3341 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3342 if (error != 0) 3343 return (error); 3344 tm_p = &timeout; 3345 } 3346 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 3347 } 3348 3349 static int 3350 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) 3351 { 3352 struct _umtx_time *tm_p, timeout; 3353 int error; 3354 3355 if (uap->uaddr2 == NULL) 3356 tm_p = NULL; 3357 else { 3358 error = umtx_copyin_umtx_time( 3359 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3360 if (error != 0) 3361 return (error); 3362 tm_p = &timeout; 3363 } 3364 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 3365 } 3366 3367 static int 3368 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) 3369 { 3370 3371 return (kern_umtx_wake(td, uap->obj, uap->val, 0)); 3372 } 3373 3374 #define BATCH_SIZE 128 3375 static int 3376 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) 3377 { 3378 char *uaddrs[BATCH_SIZE], **upp; 3379 int count, error, i, pos, tocopy; 3380 3381 upp = (char **)uap->obj; 3382 error = 0; 3383 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 3384 pos += tocopy) { 3385 tocopy = MIN(count, BATCH_SIZE); 3386 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); 3387 if (error != 0) 3388 break; 3389 for (i = 0; i < tocopy; ++i) 3390 kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); 3391 maybe_yield(); 3392 } 3393 return (error); 3394 } 3395 3396 static int 3397 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) 3398 { 3399 3400 return (kern_umtx_wake(td, uap->obj, uap->val, 1)); 3401 } 3402 3403 static int 3404 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) 3405 { 3406 struct _umtx_time *tm_p, timeout; 3407 int error; 3408 3409 /* Allow a null timespec (wait forever). */ 3410 if (uap->uaddr2 == NULL) 3411 tm_p = NULL; 3412 else { 3413 error = umtx_copyin_umtx_time( 3414 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3415 if (error != 0) 3416 return (error); 3417 tm_p = &timeout; 3418 } 3419 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 3420 } 3421 3422 static int 3423 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) 3424 { 3425 3426 return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); 3427 } 3428 3429 static int 3430 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) 3431 { 3432 struct _umtx_time *tm_p, timeout; 3433 int error; 3434 3435 /* Allow a null timespec (wait forever). */ 3436 if (uap->uaddr2 == NULL) 3437 tm_p = NULL; 3438 else { 3439 error = umtx_copyin_umtx_time( 3440 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3441 if (error != 0) 3442 return (error); 3443 tm_p = &timeout; 3444 } 3445 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 3446 } 3447 3448 static int 3449 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) 3450 { 3451 3452 return (do_wake_umutex(td, uap->obj)); 3453 } 3454 3455 static int 3456 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) 3457 { 3458 3459 return (do_unlock_umutex(td, uap->obj, false)); 3460 } 3461 3462 static int 3463 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) 3464 { 3465 3466 return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); 3467 } 3468 3469 static int 3470 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) 3471 { 3472 struct timespec *ts, timeout; 3473 int error; 3474 3475 /* Allow a null timespec (wait forever). */ 3476 if (uap->uaddr2 == NULL) 3477 ts = NULL; 3478 else { 3479 error = umtx_copyin_timeout(uap->uaddr2, &timeout); 3480 if (error != 0) 3481 return (error); 3482 ts = &timeout; 3483 } 3484 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 3485 } 3486 3487 static int 3488 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) 3489 { 3490 3491 return (do_cv_signal(td, uap->obj)); 3492 } 3493 3494 static int 3495 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) 3496 { 3497 3498 return (do_cv_broadcast(td, uap->obj)); 3499 } 3500 3501 static int 3502 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) 3503 { 3504 struct _umtx_time timeout; 3505 int error; 3506 3507 /* Allow a null timespec (wait forever). */ 3508 if (uap->uaddr2 == NULL) { 3509 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 3510 } else { 3511 error = umtx_copyin_umtx_time(uap->uaddr2, 3512 (size_t)uap->uaddr1, &timeout); 3513 if (error != 0) 3514 return (error); 3515 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 3516 } 3517 return (error); 3518 } 3519 3520 static int 3521 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) 3522 { 3523 struct _umtx_time timeout; 3524 int error; 3525 3526 /* Allow a null timespec (wait forever). */ 3527 if (uap->uaddr2 == NULL) { 3528 error = do_rw_wrlock(td, uap->obj, 0); 3529 } else { 3530 error = umtx_copyin_umtx_time(uap->uaddr2, 3531 (size_t)uap->uaddr1, &timeout); 3532 if (error != 0) 3533 return (error); 3534 3535 error = do_rw_wrlock(td, uap->obj, &timeout); 3536 } 3537 return (error); 3538 } 3539 3540 static int 3541 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) 3542 { 3543 3544 return (do_rw_unlock(td, uap->obj)); 3545 } 3546 3547 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3548 static int 3549 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) 3550 { 3551 struct _umtx_time *tm_p, timeout; 3552 int error; 3553 3554 /* Allow a null timespec (wait forever). */ 3555 if (uap->uaddr2 == NULL) 3556 tm_p = NULL; 3557 else { 3558 error = umtx_copyin_umtx_time( 3559 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3560 if (error != 0) 3561 return (error); 3562 tm_p = &timeout; 3563 } 3564 return (do_sem_wait(td, uap->obj, tm_p)); 3565 } 3566 3567 static int 3568 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) 3569 { 3570 3571 return (do_sem_wake(td, uap->obj)); 3572 } 3573 #endif 3574 3575 static int 3576 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) 3577 { 3578 3579 return (do_wake2_umutex(td, uap->obj, uap->val)); 3580 } 3581 3582 static int 3583 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) 3584 { 3585 struct _umtx_time *tm_p, timeout; 3586 int error; 3587 3588 /* Allow a null timespec (wait forever). */ 3589 if (uap->uaddr2 == NULL) 3590 tm_p = NULL; 3591 else { 3592 error = umtx_copyin_umtx_time( 3593 uap->uaddr2, (size_t)uap->uaddr1, &timeout); 3594 if (error != 0) 3595 return (error); 3596 tm_p = &timeout; 3597 } 3598 return (do_sem2_wait(td, uap->obj, tm_p)); 3599 } 3600 3601 static int 3602 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) 3603 { 3604 3605 return (do_sem2_wake(td, uap->obj)); 3606 } 3607 3608 #define USHM_OBJ_UMTX(o) \ 3609 ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) 3610 3611 #define USHMF_REG_LINKED 0x0001 3612 #define USHMF_OBJ_LINKED 0x0002 3613 struct umtx_shm_reg { 3614 TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; 3615 LIST_ENTRY(umtx_shm_reg) ushm_obj_link; 3616 struct umtx_key ushm_key; 3617 struct ucred *ushm_cred; 3618 struct shmfd *ushm_obj; 3619 u_int ushm_refcnt; 3620 u_int ushm_flags; 3621 }; 3622 3623 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); 3624 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); 3625 3626 static uma_zone_t umtx_shm_reg_zone; 3627 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; 3628 static struct mtx umtx_shm_lock; 3629 static struct umtx_shm_reg_head umtx_shm_reg_delfree = 3630 TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); 3631 3632 static void umtx_shm_free_reg(struct umtx_shm_reg *reg); 3633 3634 static void 3635 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) 3636 { 3637 struct umtx_shm_reg_head d; 3638 struct umtx_shm_reg *reg, *reg1; 3639 3640 TAILQ_INIT(&d); 3641 mtx_lock(&umtx_shm_lock); 3642 TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); 3643 mtx_unlock(&umtx_shm_lock); 3644 TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { 3645 TAILQ_REMOVE(&d, reg, ushm_reg_link); 3646 umtx_shm_free_reg(reg); 3647 } 3648 } 3649 3650 static struct task umtx_shm_reg_delfree_task = 3651 TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); 3652 3653 static struct umtx_shm_reg * 3654 umtx_shm_find_reg_locked(const struct umtx_key *key) 3655 { 3656 struct umtx_shm_reg *reg; 3657 struct umtx_shm_reg_head *reg_head; 3658 3659 KASSERT(key->shared, ("umtx_p_find_rg: private key")); 3660 mtx_assert(&umtx_shm_lock, MA_OWNED); 3661 reg_head = &umtx_shm_registry[key->hash]; 3662 TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { 3663 KASSERT(reg->ushm_key.shared, 3664 ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); 3665 if (reg->ushm_key.info.shared.object == 3666 key->info.shared.object && 3667 reg->ushm_key.info.shared.offset == 3668 key->info.shared.offset) { 3669 KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); 3670 KASSERT(reg->ushm_refcnt > 0, 3671 ("reg %p refcnt 0 onlist", reg)); 3672 KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, 3673 ("reg %p not linked", reg)); 3674 reg->ushm_refcnt++; 3675 return (reg); 3676 } 3677 } 3678 return (NULL); 3679 } 3680 3681 static struct umtx_shm_reg * 3682 umtx_shm_find_reg(const struct umtx_key *key) 3683 { 3684 struct umtx_shm_reg *reg; 3685 3686 mtx_lock(&umtx_shm_lock); 3687 reg = umtx_shm_find_reg_locked(key); 3688 mtx_unlock(&umtx_shm_lock); 3689 return (reg); 3690 } 3691 3692 static void 3693 umtx_shm_free_reg(struct umtx_shm_reg *reg) 3694 { 3695 3696 chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); 3697 crfree(reg->ushm_cred); 3698 shm_drop(reg->ushm_obj); 3699 uma_zfree(umtx_shm_reg_zone, reg); 3700 } 3701 3702 static bool 3703 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) 3704 { 3705 bool res; 3706 3707 mtx_assert(&umtx_shm_lock, MA_OWNED); 3708 KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); 3709 reg->ushm_refcnt--; 3710 res = reg->ushm_refcnt == 0; 3711 if (res || force) { 3712 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { 3713 TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], 3714 reg, ushm_reg_link); 3715 reg->ushm_flags &= ~USHMF_REG_LINKED; 3716 } 3717 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { 3718 LIST_REMOVE(reg, ushm_obj_link); 3719 reg->ushm_flags &= ~USHMF_OBJ_LINKED; 3720 } 3721 } 3722 return (res); 3723 } 3724 3725 static void 3726 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) 3727 { 3728 vm_object_t object; 3729 bool dofree; 3730 3731 if (force) { 3732 object = reg->ushm_obj->shm_object; 3733 VM_OBJECT_WLOCK(object); 3734 object->flags |= OBJ_UMTXDEAD; 3735 VM_OBJECT_WUNLOCK(object); 3736 } 3737 mtx_lock(&umtx_shm_lock); 3738 dofree = umtx_shm_unref_reg_locked(reg, force); 3739 mtx_unlock(&umtx_shm_lock); 3740 if (dofree) 3741 umtx_shm_free_reg(reg); 3742 } 3743 3744 void 3745 umtx_shm_object_init(vm_object_t object) 3746 { 3747 3748 LIST_INIT(USHM_OBJ_UMTX(object)); 3749 } 3750 3751 void 3752 umtx_shm_object_terminated(vm_object_t object) 3753 { 3754 struct umtx_shm_reg *reg, *reg1; 3755 bool dofree; 3756 3757 dofree = false; 3758 mtx_lock(&umtx_shm_lock); 3759 LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { 3760 if (umtx_shm_unref_reg_locked(reg, true)) { 3761 TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, 3762 ushm_reg_link); 3763 dofree = true; 3764 } 3765 } 3766 mtx_unlock(&umtx_shm_lock); 3767 if (dofree) 3768 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); 3769 } 3770 3771 static int 3772 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, 3773 struct umtx_shm_reg **res) 3774 { 3775 struct umtx_shm_reg *reg, *reg1; 3776 struct ucred *cred; 3777 int error; 3778 3779 reg = umtx_shm_find_reg(key); 3780 if (reg != NULL) { 3781 *res = reg; 3782 return (0); 3783 } 3784 cred = td->td_ucred; 3785 if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) 3786 return (ENOMEM); 3787 reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); 3788 reg->ushm_refcnt = 1; 3789 bcopy(key, ®->ushm_key, sizeof(*key)); 3790 reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR); 3791 reg->ushm_cred = crhold(cred); 3792 error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); 3793 if (error != 0) { 3794 umtx_shm_free_reg(reg); 3795 return (error); 3796 } 3797 mtx_lock(&umtx_shm_lock); 3798 reg1 = umtx_shm_find_reg_locked(key); 3799 if (reg1 != NULL) { 3800 mtx_unlock(&umtx_shm_lock); 3801 umtx_shm_free_reg(reg); 3802 *res = reg1; 3803 return (0); 3804 } 3805 reg->ushm_refcnt++; 3806 TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); 3807 LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, 3808 ushm_obj_link); 3809 reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; 3810 mtx_unlock(&umtx_shm_lock); 3811 *res = reg; 3812 return (0); 3813 } 3814 3815 static int 3816 umtx_shm_alive(struct thread *td, void *addr) 3817 { 3818 vm_map_t map; 3819 vm_map_entry_t entry; 3820 vm_object_t object; 3821 vm_pindex_t pindex; 3822 vm_prot_t prot; 3823 int res, ret; 3824 boolean_t wired; 3825 3826 map = &td->td_proc->p_vmspace->vm_map; 3827 res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, 3828 &object, &pindex, &prot, &wired); 3829 if (res != KERN_SUCCESS) 3830 return (EFAULT); 3831 if (object == NULL) 3832 ret = EINVAL; 3833 else 3834 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; 3835 vm_map_lookup_done(map, entry); 3836 return (ret); 3837 } 3838 3839 static void 3840 umtx_shm_init(void) 3841 { 3842 int i; 3843 3844 umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), 3845 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 3846 mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); 3847 for (i = 0; i < nitems(umtx_shm_registry); i++) 3848 TAILQ_INIT(&umtx_shm_registry[i]); 3849 } 3850 3851 static int 3852 umtx_shm(struct thread *td, void *addr, u_int flags) 3853 { 3854 struct umtx_key key; 3855 struct umtx_shm_reg *reg; 3856 struct file *fp; 3857 int error, fd; 3858 3859 if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | 3860 UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) 3861 return (EINVAL); 3862 if ((flags & UMTX_SHM_ALIVE) != 0) 3863 return (umtx_shm_alive(td, addr)); 3864 error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); 3865 if (error != 0) 3866 return (error); 3867 KASSERT(key.shared == 1, ("non-shared key")); 3868 if ((flags & UMTX_SHM_CREAT) != 0) { 3869 error = umtx_shm_create_reg(td, &key, ®); 3870 } else { 3871 reg = umtx_shm_find_reg(&key); 3872 if (reg == NULL) 3873 error = ESRCH; 3874 } 3875 umtx_key_release(&key); 3876 if (error != 0) 3877 return (error); 3878 KASSERT(reg != NULL, ("no reg")); 3879 if ((flags & UMTX_SHM_DESTROY) != 0) { 3880 umtx_shm_unref_reg(reg, true); 3881 } else { 3882 #if 0 3883 #ifdef MAC 3884 error = mac_posixshm_check_open(td->td_ucred, 3885 reg->ushm_obj, FFLAGS(O_RDWR)); 3886 if (error == 0) 3887 #endif 3888 error = shm_access(reg->ushm_obj, td->td_ucred, 3889 FFLAGS(O_RDWR)); 3890 if (error == 0) 3891 #endif 3892 error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); 3893 if (error == 0) { 3894 shm_hold(reg->ushm_obj); 3895 finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, 3896 &shm_ops); 3897 td->td_retval[0] = fd; 3898 fdrop(fp, td); 3899 } 3900 } 3901 umtx_shm_unref_reg(reg, false); 3902 return (error); 3903 } 3904 3905 static int 3906 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) 3907 { 3908 3909 return (umtx_shm(td, uap->uaddr1, uap->val)); 3910 } 3911 3912 static int 3913 umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) 3914 { 3915 3916 td->td_rb_list = rbp->robust_list_offset; 3917 td->td_rbp_list = rbp->robust_priv_list_offset; 3918 td->td_rb_inact = rbp->robust_inact_offset; 3919 return (0); 3920 } 3921 3922 static int 3923 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) 3924 { 3925 struct umtx_robust_lists_params rb; 3926 int error; 3927 3928 if (uap->val > sizeof(rb)) 3929 return (EINVAL); 3930 bzero(&rb, sizeof(rb)); 3931 error = copyin(uap->uaddr1, &rb, uap->val); 3932 if (error != 0) 3933 return (error); 3934 return (umtx_robust_lists(td, &rb)); 3935 } 3936 3937 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); 3938 3939 static const _umtx_op_func op_table[] = { 3940 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 3941 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 3942 [UMTX_OP_WAIT] = __umtx_op_wait, 3943 [UMTX_OP_WAKE] = __umtx_op_wake, 3944 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 3945 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, 3946 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 3947 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 3948 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, 3949 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 3950 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 3951 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, 3952 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, 3953 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, 3954 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 3955 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, 3956 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 3957 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, 3958 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 3959 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 3960 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, 3961 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 3962 #else 3963 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 3964 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 3965 #endif 3966 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, 3967 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 3968 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, 3969 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 3970 [UMTX_OP_SHM] = __umtx_op_shm, 3971 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, 3972 }; 3973 3974 int 3975 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) 3976 { 3977 3978 if ((unsigned)uap->op < nitems(op_table)) 3979 return (*op_table[uap->op])(td, uap); 3980 return (EINVAL); 3981 } 3982 3983 #ifdef COMPAT_FREEBSD32 3984 3985 struct timespec32 { 3986 int32_t tv_sec; 3987 int32_t tv_nsec; 3988 }; 3989 3990 struct umtx_time32 { 3991 struct timespec32 timeout; 3992 uint32_t flags; 3993 uint32_t clockid; 3994 }; 3995 3996 static inline int 3997 umtx_copyin_timeout32(void *addr, struct timespec *tsp) 3998 { 3999 struct timespec32 ts32; 4000 int error; 4001 4002 error = copyin(addr, &ts32, sizeof(struct timespec32)); 4003 if (error == 0) { 4004 if (ts32.tv_sec < 0 || 4005 ts32.tv_nsec >= 1000000000 || 4006 ts32.tv_nsec < 0) 4007 error = EINVAL; 4008 else { 4009 tsp->tv_sec = ts32.tv_sec; 4010 tsp->tv_nsec = ts32.tv_nsec; 4011 } 4012 } 4013 return (error); 4014 } 4015 4016 static inline int 4017 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) 4018 { 4019 struct umtx_time32 t32; 4020 int error; 4021 4022 t32.clockid = CLOCK_REALTIME; 4023 t32.flags = 0; 4024 if (size <= sizeof(struct timespec32)) 4025 error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); 4026 else 4027 error = copyin(addr, &t32, sizeof(struct umtx_time32)); 4028 if (error != 0) 4029 return (error); 4030 if (t32.timeout.tv_sec < 0 || 4031 t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) 4032 return (EINVAL); 4033 tp->_timeout.tv_sec = t32.timeout.tv_sec; 4034 tp->_timeout.tv_nsec = t32.timeout.tv_nsec; 4035 tp->_flags = t32.flags; 4036 tp->_clockid = t32.clockid; 4037 return (0); 4038 } 4039 4040 static int 4041 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4042 { 4043 struct _umtx_time *tm_p, timeout; 4044 int error; 4045 4046 if (uap->uaddr2 == NULL) 4047 tm_p = NULL; 4048 else { 4049 error = umtx_copyin_umtx_time32(uap->uaddr2, 4050 (size_t)uap->uaddr1, &timeout); 4051 if (error != 0) 4052 return (error); 4053 tm_p = &timeout; 4054 } 4055 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); 4056 } 4057 4058 static int 4059 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4060 { 4061 struct _umtx_time *tm_p, timeout; 4062 int error; 4063 4064 /* Allow a null timespec (wait forever). */ 4065 if (uap->uaddr2 == NULL) 4066 tm_p = NULL; 4067 else { 4068 error = umtx_copyin_umtx_time(uap->uaddr2, 4069 (size_t)uap->uaddr1, &timeout); 4070 if (error != 0) 4071 return (error); 4072 tm_p = &timeout; 4073 } 4074 return (do_lock_umutex(td, uap->obj, tm_p, 0)); 4075 } 4076 4077 static int 4078 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) 4079 { 4080 struct _umtx_time *tm_p, timeout; 4081 int error; 4082 4083 /* Allow a null timespec (wait forever). */ 4084 if (uap->uaddr2 == NULL) 4085 tm_p = NULL; 4086 else { 4087 error = umtx_copyin_umtx_time32(uap->uaddr2, 4088 (size_t)uap->uaddr1, &timeout); 4089 if (error != 0) 4090 return (error); 4091 tm_p = &timeout; 4092 } 4093 return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); 4094 } 4095 4096 static int 4097 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4098 { 4099 struct timespec *ts, timeout; 4100 int error; 4101 4102 /* Allow a null timespec (wait forever). */ 4103 if (uap->uaddr2 == NULL) 4104 ts = NULL; 4105 else { 4106 error = umtx_copyin_timeout32(uap->uaddr2, &timeout); 4107 if (error != 0) 4108 return (error); 4109 ts = &timeout; 4110 } 4111 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); 4112 } 4113 4114 static int 4115 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4116 { 4117 struct _umtx_time timeout; 4118 int error; 4119 4120 /* Allow a null timespec (wait forever). */ 4121 if (uap->uaddr2 == NULL) { 4122 error = do_rw_rdlock(td, uap->obj, uap->val, 0); 4123 } else { 4124 error = umtx_copyin_umtx_time32(uap->uaddr2, 4125 (size_t)uap->uaddr1, &timeout); 4126 if (error != 0) 4127 return (error); 4128 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); 4129 } 4130 return (error); 4131 } 4132 4133 static int 4134 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) 4135 { 4136 struct _umtx_time timeout; 4137 int error; 4138 4139 /* Allow a null timespec (wait forever). */ 4140 if (uap->uaddr2 == NULL) { 4141 error = do_rw_wrlock(td, uap->obj, 0); 4142 } else { 4143 error = umtx_copyin_umtx_time32(uap->uaddr2, 4144 (size_t)uap->uaddr1, &timeout); 4145 if (error != 0) 4146 return (error); 4147 error = do_rw_wrlock(td, uap->obj, &timeout); 4148 } 4149 return (error); 4150 } 4151 4152 static int 4153 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) 4154 { 4155 struct _umtx_time *tm_p, timeout; 4156 int error; 4157 4158 if (uap->uaddr2 == NULL) 4159 tm_p = NULL; 4160 else { 4161 error = umtx_copyin_umtx_time32( 4162 uap->uaddr2, (size_t)uap->uaddr1,&timeout); 4163 if (error != 0) 4164 return (error); 4165 tm_p = &timeout; 4166 } 4167 return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); 4168 } 4169 4170 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4171 static int 4172 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4173 { 4174 struct _umtx_time *tm_p, timeout; 4175 int error; 4176 4177 /* Allow a null timespec (wait forever). */ 4178 if (uap->uaddr2 == NULL) 4179 tm_p = NULL; 4180 else { 4181 error = umtx_copyin_umtx_time32(uap->uaddr2, 4182 (size_t)uap->uaddr1, &timeout); 4183 if (error != 0) 4184 return (error); 4185 tm_p = &timeout; 4186 } 4187 return (do_sem_wait(td, uap->obj, tm_p)); 4188 } 4189 #endif 4190 4191 static int 4192 __umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) 4193 { 4194 struct _umtx_time *tm_p, timeout; 4195 int error; 4196 4197 /* Allow a null timespec (wait forever). */ 4198 if (uap->uaddr2 == NULL) 4199 tm_p = NULL; 4200 else { 4201 error = umtx_copyin_umtx_time32(uap->uaddr2, 4202 (size_t)uap->uaddr1, &timeout); 4203 if (error != 0) 4204 return (error); 4205 tm_p = &timeout; 4206 } 4207 return (do_sem2_wait(td, uap->obj, tm_p)); 4208 } 4209 4210 static int 4211 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) 4212 { 4213 uint32_t uaddrs[BATCH_SIZE], **upp; 4214 int count, error, i, pos, tocopy; 4215 4216 upp = (uint32_t **)uap->obj; 4217 error = 0; 4218 for (count = uap->val, pos = 0; count > 0; count -= tocopy, 4219 pos += tocopy) { 4220 tocopy = MIN(count, BATCH_SIZE); 4221 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); 4222 if (error != 0) 4223 break; 4224 for (i = 0; i < tocopy; ++i) 4225 kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], 4226 INT_MAX, 1); 4227 maybe_yield(); 4228 } 4229 return (error); 4230 } 4231 4232 struct umtx_robust_lists_params_compat32 { 4233 uint32_t robust_list_offset; 4234 uint32_t robust_priv_list_offset; 4235 uint32_t robust_inact_offset; 4236 }; 4237 4238 static int 4239 __umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) 4240 { 4241 struct umtx_robust_lists_params rb; 4242 struct umtx_robust_lists_params_compat32 rb32; 4243 int error; 4244 4245 if (uap->val > sizeof(rb32)) 4246 return (EINVAL); 4247 bzero(&rb, sizeof(rb)); 4248 bzero(&rb32, sizeof(rb32)); 4249 error = copyin(uap->uaddr1, &rb32, uap->val); 4250 if (error != 0) 4251 return (error); 4252 rb.robust_list_offset = rb32.robust_list_offset; 4253 rb.robust_priv_list_offset = rb32.robust_priv_list_offset; 4254 rb.robust_inact_offset = rb32.robust_inact_offset; 4255 return (umtx_robust_lists(td, &rb)); 4256 } 4257 4258 static const _umtx_op_func op_table_compat32[] = { 4259 [UMTX_OP_RESERVED0] = __umtx_op_unimpl, 4260 [UMTX_OP_RESERVED1] = __umtx_op_unimpl, 4261 [UMTX_OP_WAIT] = __umtx_op_wait_compat32, 4262 [UMTX_OP_WAKE] = __umtx_op_wake, 4263 [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, 4264 [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, 4265 [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, 4266 [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, 4267 [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, 4268 [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, 4269 [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, 4270 [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, 4271 [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, 4272 [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, 4273 [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, 4274 [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, 4275 [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, 4276 [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, 4277 [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, 4278 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) 4279 [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, 4280 [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, 4281 #else 4282 [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, 4283 [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, 4284 #endif 4285 [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, 4286 [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, 4287 [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, 4288 [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, 4289 [UMTX_OP_SHM] = __umtx_op_shm, 4290 [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, 4291 }; 4292 4293 int 4294 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) 4295 { 4296 4297 if ((unsigned)uap->op < nitems(op_table_compat32)) { 4298 return (*op_table_compat32[uap->op])(td, 4299 (struct _umtx_op_args *)uap); 4300 } 4301 return (EINVAL); 4302 } 4303 #endif 4304 4305 void 4306 umtx_thread_init(struct thread *td) 4307 { 4308 4309 td->td_umtxq = umtxq_alloc(); 4310 td->td_umtxq->uq_thread = td; 4311 } 4312 4313 void 4314 umtx_thread_fini(struct thread *td) 4315 { 4316 4317 umtxq_free(td->td_umtxq); 4318 } 4319 4320 /* 4321 * It will be called when new thread is created, e.g fork(). 4322 */ 4323 void 4324 umtx_thread_alloc(struct thread *td) 4325 { 4326 struct umtx_q *uq; 4327 4328 uq = td->td_umtxq; 4329 uq->uq_inherited_pri = PRI_MAX; 4330 4331 KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); 4332 KASSERT(uq->uq_thread == td, ("uq_thread != td")); 4333 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); 4334 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); 4335 } 4336 4337 /* 4338 * exec() hook. 4339 * 4340 * Clear robust lists for all process' threads, not delaying the 4341 * cleanup to thread_exit hook, since the relevant address space is 4342 * destroyed right now. 4343 */ 4344 static void 4345 umtx_exec_hook(void *arg __unused, struct proc *p, 4346 struct image_params *imgp __unused) 4347 { 4348 struct thread *td; 4349 4350 KASSERT(p == curproc, ("need curproc")); 4351 PROC_LOCK(p); 4352 KASSERT((p->p_flag & P_HADTHREADS) == 0 || 4353 (p->p_flag & P_STOPPED_SINGLE) != 0, 4354 ("curproc must be single-threaded")); 4355 FOREACH_THREAD_IN_PROC(p, td) { 4356 KASSERT(td == curthread || 4357 ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), 4358 ("running thread %p %p", p, td)); 4359 PROC_UNLOCK(p); 4360 umtx_thread_cleanup(td); 4361 PROC_LOCK(p); 4362 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; 4363 } 4364 PROC_UNLOCK(p); 4365 } 4366 4367 /* 4368 * thread_exit() hook. 4369 */ 4370 void 4371 umtx_thread_exit(struct thread *td) 4372 { 4373 4374 umtx_thread_cleanup(td); 4375 } 4376 4377 static int 4378 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) 4379 { 4380 u_long res1; 4381 #ifdef COMPAT_FREEBSD32 4382 uint32_t res32; 4383 #endif 4384 int error; 4385 4386 #ifdef COMPAT_FREEBSD32 4387 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4388 error = fueword32((void *)ptr, &res32); 4389 if (error == 0) 4390 res1 = res32; 4391 } else 4392 #endif 4393 { 4394 error = fueword((void *)ptr, &res1); 4395 } 4396 if (error == 0) 4397 *res = res1; 4398 else 4399 error = EFAULT; 4400 return (error); 4401 } 4402 4403 static void 4404 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) 4405 { 4406 #ifdef COMPAT_FREEBSD32 4407 struct umutex32 m32; 4408 4409 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 4410 memcpy(&m32, m, sizeof(m32)); 4411 *rb_list = m32.m_rb_lnk; 4412 } else 4413 #endif 4414 *rb_list = m->m_rb_lnk; 4415 } 4416 4417 static int 4418 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) 4419 { 4420 struct umutex m; 4421 int error; 4422 4423 KASSERT(td->td_proc == curproc, ("need current vmspace")); 4424 error = copyin((void *)rbp, &m, sizeof(m)); 4425 if (error != 0) 4426 return (error); 4427 if (rb_list != NULL) 4428 umtx_read_rb_list(td, &m, rb_list); 4429 if ((m.m_flags & UMUTEX_ROBUST) == 0) 4430 return (EINVAL); 4431 if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) 4432 /* inact is cleared after unlock, allow the inconsistency */ 4433 return (inact ? 0 : EINVAL); 4434 return (do_unlock_umutex(td, (struct umutex *)rbp, true)); 4435 } 4436 4437 static void 4438 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, 4439 const char *name) 4440 { 4441 int error, i; 4442 uintptr_t rbp; 4443 bool inact; 4444 4445 if (rb_list == 0) 4446 return; 4447 error = umtx_read_uptr(td, rb_list, &rbp); 4448 for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { 4449 if (rbp == *rb_inact) { 4450 inact = true; 4451 *rb_inact = 0; 4452 } else 4453 inact = false; 4454 error = umtx_handle_rb(td, rbp, &rbp, inact); 4455 } 4456 if (i == umtx_max_rb && umtx_verbose_rb) { 4457 uprintf("comm %s pid %d: reached umtx %smax rb %d\n", 4458 td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); 4459 } 4460 if (error != 0 && umtx_verbose_rb) { 4461 uprintf("comm %s pid %d: handling %srb error %d\n", 4462 td->td_proc->p_comm, td->td_proc->p_pid, name, error); 4463 } 4464 } 4465 4466 /* 4467 * Clean up umtx data. 4468 */ 4469 static void 4470 umtx_thread_cleanup(struct thread *td) 4471 { 4472 struct umtx_q *uq; 4473 struct umtx_pi *pi; 4474 uintptr_t rb_inact; 4475 4476 /* 4477 * Disown pi mutexes. 4478 */ 4479 uq = td->td_umtxq; 4480 if (uq != NULL) { 4481 mtx_lock(&umtx_lock); 4482 uq->uq_inherited_pri = PRI_MAX; 4483 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { 4484 pi->pi_owner = NULL; 4485 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); 4486 } 4487 mtx_unlock(&umtx_lock); 4488 thread_lock(td); 4489 sched_lend_user_prio(td, PRI_MAX); 4490 thread_unlock(td); 4491 } 4492 4493 /* 4494 * Handle terminated robust mutexes. Must be done after 4495 * robust pi disown, otherwise unlock could see unowned 4496 * entries. 4497 */ 4498 rb_inact = td->td_rb_inact; 4499 if (rb_inact != 0) 4500 (void)umtx_read_uptr(td, rb_inact, &rb_inact); 4501 umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); 4502 umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); 4503 if (rb_inact != 0) 4504 (void)umtx_handle_rb(td, rb_inact, NULL, true); 4505 } 4506