1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009-2021 Dmitry Chagin <dchagin@FreeBSD.org> 5 * Copyright (c) 2008 Roman Divacky 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/imgact.h> 31 #include <sys/imgact_elf.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/priv.h> 36 #include <sys/proc.h> 37 #include <sys/sched.h> 38 #include <sys/sysent.h> 39 #include <sys/vnode.h> 40 #include <sys/umtxvar.h> 41 42 #ifdef COMPAT_LINUX32 43 #include <machine/../linux32/linux.h> 44 #include <machine/../linux32/linux32_proto.h> 45 #else 46 #include <machine/../linux/linux.h> 47 #include <machine/../linux/linux_proto.h> 48 #endif 49 #include <compat/linux/linux_emul.h> 50 #include <compat/linux/linux_futex.h> 51 #include <compat/linux/linux_misc.h> 52 #include <compat/linux/linux_time.h> 53 #include <compat/linux/linux_util.h> 54 55 #define FUTEX_SHARED 0x8 /* shared futex */ 56 #define FUTEX_UNOWNED 0 57 58 #define GET_SHARED(a) (a->flags & FUTEX_SHARED) ? AUTO_SHARE : THREAD_SHARE 59 60 static int futex_atomic_op(struct thread *, int, uint32_t *, int *); 61 static int handle_futex_death(struct thread *td, struct linux_emuldata *, 62 uint32_t *, unsigned int, bool); 63 static int fetch_robust_entry(struct linux_robust_list **, 64 struct linux_robust_list **, unsigned int *); 65 66 struct linux_futex_args { 67 uint32_t *uaddr; 68 int32_t op; 69 uint32_t flags; 70 bool clockrt; 71 uint32_t val; 72 struct timespec *ts; 73 uint32_t *uaddr2; 74 uint32_t val3; 75 bool val3_compare; 76 struct timespec kts; 77 }; 78 79 static inline int futex_key_get(const void *, int, int, struct umtx_key *); 80 static void linux_umtx_abs_timeout_init(struct umtx_abs_timeout *, 81 struct linux_futex_args *); 82 static int linux_futex(struct thread *, struct linux_futex_args *); 83 static int linux_futex_wait(struct thread *, struct linux_futex_args *); 84 static int linux_futex_wake(struct thread *, struct linux_futex_args *); 85 static int linux_futex_requeue(struct thread *, struct linux_futex_args *); 86 static int linux_futex_wakeop(struct thread *, struct linux_futex_args *); 87 static int linux_futex_lock_pi(struct thread *, bool, struct linux_futex_args *); 88 static int linux_futex_unlock_pi(struct thread *, bool, 89 struct linux_futex_args *); 90 static int futex_wake_pi(struct thread *, uint32_t *, bool); 91 92 static int 93 futex_key_get(const void *uaddr, int type, int share, struct umtx_key *key) 94 { 95 96 /* Check that futex address is a 32bit aligned. */ 97 if (!__is_aligned(uaddr, sizeof(uint32_t))) 98 return (EINVAL); 99 return (umtx_key_get(uaddr, type, share, key)); 100 } 101 102 int 103 futex_wake(struct thread *td, uint32_t *uaddr, int val, bool shared) 104 { 105 struct linux_futex_args args; 106 107 bzero(&args, sizeof(args)); 108 args.op = LINUX_FUTEX_WAKE; 109 args.uaddr = uaddr; 110 args.flags = shared == true ? FUTEX_SHARED : 0; 111 args.val = val; 112 args.val3 = FUTEX_BITSET_MATCH_ANY; 113 114 return (linux_futex_wake(td, &args)); 115 } 116 117 static int 118 futex_wake_pi(struct thread *td, uint32_t *uaddr, bool shared) 119 { 120 struct linux_futex_args args; 121 122 bzero(&args, sizeof(args)); 123 args.op = LINUX_FUTEX_UNLOCK_PI; 124 args.uaddr = uaddr; 125 args.flags = shared == true ? FUTEX_SHARED : 0; 126 127 return (linux_futex_unlock_pi(td, true, &args)); 128 } 129 130 static int 131 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr, 132 int *res) 133 { 134 int op = (encoded_op >> 28) & 7; 135 int cmp = (encoded_op >> 24) & 15; 136 int oparg = (encoded_op << 8) >> 20; 137 int cmparg = (encoded_op << 20) >> 20; 138 int oldval = 0, ret; 139 140 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 141 oparg = 1 << oparg; 142 143 switch (op) { 144 case FUTEX_OP_SET: 145 ret = futex_xchgl(oparg, uaddr, &oldval); 146 break; 147 case FUTEX_OP_ADD: 148 ret = futex_addl(oparg, uaddr, &oldval); 149 break; 150 case FUTEX_OP_OR: 151 ret = futex_orl(oparg, uaddr, &oldval); 152 break; 153 case FUTEX_OP_ANDN: 154 ret = futex_andl(~oparg, uaddr, &oldval); 155 break; 156 case FUTEX_OP_XOR: 157 ret = futex_xorl(oparg, uaddr, &oldval); 158 break; 159 default: 160 ret = ENOSYS; 161 break; 162 } 163 164 if (ret != 0) 165 return (ret); 166 167 switch (cmp) { 168 case FUTEX_OP_CMP_EQ: 169 *res = (oldval == cmparg); 170 break; 171 case FUTEX_OP_CMP_NE: 172 *res = (oldval != cmparg); 173 break; 174 case FUTEX_OP_CMP_LT: 175 *res = (oldval < cmparg); 176 break; 177 case FUTEX_OP_CMP_GE: 178 *res = (oldval >= cmparg); 179 break; 180 case FUTEX_OP_CMP_LE: 181 *res = (oldval <= cmparg); 182 break; 183 case FUTEX_OP_CMP_GT: 184 *res = (oldval > cmparg); 185 break; 186 default: 187 ret = ENOSYS; 188 } 189 190 return (ret); 191 } 192 193 static int 194 linux_futex(struct thread *td, struct linux_futex_args *args) 195 { 196 struct linux_pemuldata *pem; 197 struct proc *p; 198 199 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { 200 args->flags = 0; 201 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; 202 } else 203 args->flags = FUTEX_SHARED; 204 205 args->clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; 206 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; 207 208 if (args->clockrt && 209 args->op != LINUX_FUTEX_WAIT_BITSET && 210 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI && 211 args->op != LINUX_FUTEX_LOCK_PI2) 212 return (ENOSYS); 213 214 switch (args->op) { 215 case LINUX_FUTEX_WAIT: 216 args->val3 = FUTEX_BITSET_MATCH_ANY; 217 /* FALLTHROUGH */ 218 219 case LINUX_FUTEX_WAIT_BITSET: 220 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", 221 args->uaddr, args->val, args->val3); 222 223 return (linux_futex_wait(td, args)); 224 225 case LINUX_FUTEX_WAKE: 226 args->val3 = FUTEX_BITSET_MATCH_ANY; 227 /* FALLTHROUGH */ 228 229 case LINUX_FUTEX_WAKE_BITSET: 230 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", 231 args->uaddr, args->val, args->val3); 232 233 return (linux_futex_wake(td, args)); 234 235 case LINUX_FUTEX_REQUEUE: 236 /* 237 * Glibc does not use this operation since version 2.3.3, 238 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 239 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 240 * FUTEX_REQUEUE returned EINVAL. 241 */ 242 pem = pem_find(td->td_proc); 243 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { 244 linux_msg(td, "unsupported FUTEX_REQUEUE"); 245 pem->flags |= LINUX_XDEPR_REQUEUEOP; 246 } 247 248 /* 249 * The above is true, however musl libc does make use of the 250 * futex requeue operation, allow operation for brands which 251 * set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags. 252 */ 253 p = td->td_proc; 254 Elf_Brandinfo *bi = p->p_elf_brandinfo; 255 if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0) 256 return (EINVAL); 257 args->val3_compare = false; 258 /* FALLTHROUGH */ 259 260 case LINUX_FUTEX_CMP_REQUEUE: 261 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " 262 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", 263 args->uaddr, args->val, args->val3, args->uaddr2, 264 args->ts); 265 266 return (linux_futex_requeue(td, args)); 267 268 case LINUX_FUTEX_WAKE_OP: 269 LINUX_CTR5(sys_futex, "WAKE_OP " 270 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", 271 args->uaddr, args->val, args->uaddr2, args->val3, 272 args->ts); 273 274 return (linux_futex_wakeop(td, args)); 275 276 case LINUX_FUTEX_LOCK_PI: 277 args->clockrt = true; 278 /* FALLTHROUGH */ 279 280 case LINUX_FUTEX_LOCK_PI2: 281 LINUX_CTR2(sys_futex, "LOCKPI uaddr %p val 0x%x", 282 args->uaddr, args->val); 283 284 return (linux_futex_lock_pi(td, false, args)); 285 286 case LINUX_FUTEX_UNLOCK_PI: 287 LINUX_CTR1(sys_futex, "UNLOCKPI uaddr %p", 288 args->uaddr); 289 290 return (linux_futex_unlock_pi(td, false, args)); 291 292 case LINUX_FUTEX_TRYLOCK_PI: 293 LINUX_CTR1(sys_futex, "TRYLOCKPI uaddr %p", 294 args->uaddr); 295 296 return (linux_futex_lock_pi(td, true, args)); 297 298 /* 299 * Current implementation of FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI 300 * can't be used anymore to implement conditional variables. 301 * A detailed explanation can be found here: 302 * 303 * https://sourceware.org/bugzilla/show_bug.cgi?id=13165 304 * and here http://austingroupbugs.net/view.php?id=609 305 * 306 * And since commit 307 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=ed19993b5b0d05d62cc883571519a67dae481a14 308 * glibc does not use them. 309 */ 310 case LINUX_FUTEX_WAIT_REQUEUE_PI: 311 /* not yet implemented */ 312 pem = pem_find(td->td_proc); 313 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 314 linux_msg(td, "unsupported FUTEX_WAIT_REQUEUE_PI"); 315 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 316 } 317 return (ENOSYS); 318 319 case LINUX_FUTEX_CMP_REQUEUE_PI: 320 /* not yet implemented */ 321 pem = pem_find(td->td_proc); 322 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 323 linux_msg(td, "unsupported FUTEX_CMP_REQUEUE_PI"); 324 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 325 } 326 return (ENOSYS); 327 328 default: 329 linux_msg(td, "unsupported futex op %d", args->op); 330 return (ENOSYS); 331 } 332 } 333 334 /* 335 * pi protocol: 336 * - 0 futex word value means unlocked. 337 * - TID futex word value means locked. 338 * Userspace uses atomic ops to lock/unlock these futexes without entering the 339 * kernel. If the lock-acquire fastpath fails, (transition from 0 to TID fails), 340 * then FUTEX_LOCK_PI is called. 341 * The kernel atomically set FUTEX_WAITERS bit in the futex word value, if no 342 * other waiters exists looks up the thread that owns the futex (it has put its 343 * own TID into the futex value) and made this thread the owner of the internal 344 * pi-aware lock object (mutex). Then the kernel tries to lock the internal lock 345 * object, on which it blocks. Once it returns, it has the mutex acquired, and it 346 * sets the futex value to its own TID and returns (futex value contains 347 * FUTEX_WAITERS|TID). 348 * The unlock fastpath would fail (because the FUTEX_WAITERS bit is set) and 349 * FUTEX_UNLOCK_PI will be called. 350 * If a futex is found to be held at exit time, the kernel sets the OWNER_DIED 351 * bit of the futex word and wakes up the next futex waiter (if any), WAITERS 352 * bit is preserved (if any). 353 * If OWNER_DIED bit is set the kernel sanity checks the futex word value against 354 * the internal futex state and if correct, acquire futex. 355 */ 356 static int 357 linux_futex_lock_pi(struct thread *td, bool try, struct linux_futex_args *args) 358 { 359 struct umtx_abs_timeout timo; 360 struct linux_emuldata *em; 361 struct umtx_pi *pi, *new_pi; 362 struct thread *td1; 363 struct umtx_q *uq; 364 int error, rv; 365 uint32_t owner, old_owner; 366 367 em = em_find(td); 368 uq = td->td_umtxq; 369 error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args), 370 &uq->uq_key); 371 if (error != 0) 372 return (error); 373 if (args->ts != NULL) 374 linux_umtx_abs_timeout_init(&timo, args); 375 376 umtxq_lock(&uq->uq_key); 377 pi = umtx_pi_lookup(&uq->uq_key); 378 if (pi == NULL) { 379 new_pi = umtx_pi_alloc(M_NOWAIT); 380 if (new_pi == NULL) { 381 umtxq_unlock(&uq->uq_key); 382 new_pi = umtx_pi_alloc(M_WAITOK); 383 umtxq_lock(&uq->uq_key); 384 pi = umtx_pi_lookup(&uq->uq_key); 385 if (pi != NULL) { 386 umtx_pi_free(new_pi); 387 new_pi = NULL; 388 } 389 } 390 if (new_pi != NULL) { 391 new_pi->pi_key = uq->uq_key; 392 umtx_pi_insert(new_pi); 393 pi = new_pi; 394 } 395 } 396 umtx_pi_ref(pi); 397 umtxq_unlock(&uq->uq_key); 398 for (;;) { 399 /* Try uncontested case first. */ 400 rv = casueword32(args->uaddr, FUTEX_UNOWNED, &owner, em->em_tid); 401 /* The acquire succeeded. */ 402 if (rv == 0) { 403 error = 0; 404 break; 405 } 406 if (rv == -1) { 407 error = EFAULT; 408 break; 409 } 410 411 /* 412 * Nobody owns it, but the acquire failed. This can happen 413 * with ll/sc atomic. 414 */ 415 if (owner == FUTEX_UNOWNED) { 416 error = thread_check_susp(td, true); 417 if (error != 0) 418 break; 419 continue; 420 } 421 422 /* 423 * Avoid overwriting a possible error from sleep due 424 * to the pending signal with suspension check result. 425 */ 426 if (error == 0) { 427 error = thread_check_susp(td, true); 428 if (error != 0) 429 break; 430 } 431 432 /* The futex word at *uaddr is already locked by the caller. */ 433 if ((owner & FUTEX_TID_MASK) == em->em_tid) { 434 error = EDEADLK; 435 break; 436 } 437 438 /* 439 * Futex owner died, handle_futex_death() set the OWNER_DIED bit 440 * and clear tid. Try to acquire it. 441 */ 442 if ((owner & FUTEX_TID_MASK) == FUTEX_UNOWNED) { 443 old_owner = owner; 444 owner = owner & (FUTEX_WAITERS | FUTEX_OWNER_DIED); 445 owner |= em->em_tid; 446 rv = casueword32(args->uaddr, old_owner, &owner, owner); 447 if (rv == -1) { 448 error = EFAULT; 449 break; 450 } 451 if (rv == 1) { 452 if (error == 0) { 453 error = thread_check_susp(td, true); 454 if (error != 0) 455 break; 456 } 457 458 /* 459 * If this failed the lock could 460 * changed, restart. 461 */ 462 continue; 463 } 464 465 umtxq_lock(&uq->uq_key); 466 umtxq_busy(&uq->uq_key); 467 error = umtx_pi_claim(pi, td); 468 umtxq_unbusy(&uq->uq_key); 469 umtxq_unlock(&uq->uq_key); 470 if (error != 0) { 471 /* 472 * Since we're going to return an 473 * error, restore the futex to its 474 * previous, unowned state to avoid 475 * compounding the problem. 476 */ 477 (void)casuword32(args->uaddr, owner, old_owner); 478 } 479 break; 480 } 481 482 /* 483 * Inconsistent state: OWNER_DIED is set and tid is not 0. 484 * Linux does some checks of futex state, we return EINVAL, 485 * as the user space can take care of this. 486 */ 487 if ((owner & FUTEX_OWNER_DIED) != FUTEX_UNOWNED) { 488 error = EINVAL; 489 break; 490 } 491 492 if (try != 0) { 493 error = EBUSY; 494 break; 495 } 496 497 /* 498 * If we caught a signal, we have retried and now 499 * exit immediately. 500 */ 501 if (error != 0) 502 break; 503 504 umtxq_lock(&uq->uq_key); 505 umtxq_busy(&uq->uq_key); 506 umtxq_unlock(&uq->uq_key); 507 508 /* 509 * Set the contested bit so that a release in user space knows 510 * to use the system call for unlock. If this fails either some 511 * one else has acquired the lock or it has been released. 512 */ 513 rv = casueword32(args->uaddr, owner, &owner, 514 owner | FUTEX_WAITERS); 515 if (rv == -1) { 516 umtxq_unbusy_unlocked(&uq->uq_key); 517 error = EFAULT; 518 break; 519 } 520 if (rv == 1) { 521 umtxq_unbusy_unlocked(&uq->uq_key); 522 error = thread_check_susp(td, true); 523 if (error != 0) 524 break; 525 526 /* 527 * The lock changed and we need to retry or we 528 * lost a race to the thread unlocking the umtx. 529 */ 530 continue; 531 } 532 533 /* 534 * Substitute Linux thread id by native thread id to 535 * avoid refactoring code of umtxq_sleep_pi(). 536 */ 537 td1 = linux_tdfind(td, owner & FUTEX_TID_MASK, -1); 538 if (td1 != NULL) { 539 owner = td1->td_tid; 540 PROC_UNLOCK(td1->td_proc); 541 } else { 542 umtxq_unbusy_unlocked(&uq->uq_key); 543 error = EINVAL; 544 break; 545 } 546 547 umtxq_lock(&uq->uq_key); 548 549 /* We set the contested bit, sleep. */ 550 error = umtxq_sleep_pi(uq, pi, owner, "futexp", 551 args->ts == NULL ? NULL : &timo, 552 (args->flags & FUTEX_SHARED) != 0); 553 if (error != 0) 554 continue; 555 556 error = thread_check_susp(td, false); 557 if (error != 0) 558 break; 559 } 560 561 umtxq_lock(&uq->uq_key); 562 umtx_pi_unref(pi); 563 umtxq_unlock(&uq->uq_key); 564 umtx_key_release(&uq->uq_key); 565 return (error); 566 } 567 568 static int 569 linux_futex_unlock_pi(struct thread *td, bool rb, struct linux_futex_args *args) 570 { 571 struct linux_emuldata *em; 572 struct umtx_key key; 573 uint32_t old, owner, new_owner; 574 int count, error; 575 576 em = em_find(td); 577 578 /* 579 * Make sure we own this mtx. 580 */ 581 error = fueword32(args->uaddr, &owner); 582 if (error == -1) 583 return (EFAULT); 584 if (!rb && (owner & FUTEX_TID_MASK) != em->em_tid) 585 return (EPERM); 586 587 error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args), &key); 588 if (error != 0) 589 return (error); 590 umtxq_lock(&key); 591 umtxq_busy(&key); 592 error = umtx_pi_drop(td, &key, rb, &count); 593 if (error != 0 || rb) { 594 umtxq_unbusy(&key); 595 umtxq_unlock(&key); 596 umtx_key_release(&key); 597 return (error); 598 } 599 umtxq_unlock(&key); 600 601 /* 602 * When unlocking the futex, it must be marked as unowned if 603 * there is zero or one thread only waiting for it. 604 * Otherwise, it must be marked as contested. 605 */ 606 if (count > 1) 607 new_owner = FUTEX_WAITERS; 608 else 609 new_owner = FUTEX_UNOWNED; 610 611 again: 612 error = casueword32(args->uaddr, owner, &old, new_owner); 613 if (error == 1) { 614 error = thread_check_susp(td, false); 615 if (error == 0) 616 goto again; 617 } 618 umtxq_unbusy_unlocked(&key); 619 umtx_key_release(&key); 620 if (error == -1) 621 return (EFAULT); 622 if (error == 0 && old != owner) 623 return (EINVAL); 624 return (error); 625 } 626 627 static int 628 linux_futex_wakeop(struct thread *td, struct linux_futex_args *args) 629 { 630 struct umtx_key key, key2; 631 int nrwake, op_ret, ret; 632 int error, count; 633 634 if (args->uaddr == args->uaddr2) 635 return (EINVAL); 636 637 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key); 638 if (error != 0) 639 return (error); 640 error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2); 641 if (error != 0) { 642 umtx_key_release(&key); 643 return (error); 644 } 645 umtxq_lock(&key); 646 umtxq_busy(&key); 647 umtxq_unlock(&key); 648 error = futex_atomic_op(td, args->val3, args->uaddr2, &op_ret); 649 umtxq_lock(&key); 650 umtxq_unbusy(&key); 651 if (error != 0) 652 goto out; 653 ret = umtxq_signal_mask(&key, args->val, args->val3); 654 if (op_ret > 0) { 655 nrwake = (int)(unsigned long)args->ts; 656 umtxq_lock(&key2); 657 count = umtxq_count(&key2); 658 if (count > 0) 659 ret += umtxq_signal_mask(&key2, nrwake, args->val3); 660 else 661 ret += umtxq_signal_mask(&key, nrwake, args->val3); 662 umtxq_unlock(&key2); 663 } 664 td->td_retval[0] = ret; 665 out: 666 umtxq_unlock(&key); 667 umtx_key_release(&key2); 668 umtx_key_release(&key); 669 return (error); 670 } 671 672 static int 673 linux_futex_requeue(struct thread *td, struct linux_futex_args *args) 674 { 675 int nrwake, nrrequeue; 676 struct umtx_key key, key2; 677 int error; 678 uint32_t uval; 679 680 /* 681 * Linux allows this, we would not, it is an incorrect 682 * usage of declared ABI, so return EINVAL. 683 */ 684 if (args->uaddr == args->uaddr2) 685 return (EINVAL); 686 687 nrrequeue = (int)(unsigned long)args->ts; 688 nrwake = args->val; 689 /* 690 * Sanity check to prevent signed integer overflow, 691 * see Linux CVE-2018-6927 692 */ 693 if (nrwake < 0 || nrrequeue < 0) 694 return (EINVAL); 695 696 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key); 697 if (error != 0) 698 return (error); 699 error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2); 700 if (error != 0) { 701 umtx_key_release(&key); 702 return (error); 703 } 704 umtxq_lock(&key); 705 umtxq_busy(&key); 706 umtxq_unlock(&key); 707 error = fueword32(args->uaddr, &uval); 708 if (error != 0) 709 error = EFAULT; 710 else if (args->val3_compare == true && uval != args->val3) 711 error = EWOULDBLOCK; 712 umtxq_lock(&key); 713 umtxq_unbusy(&key); 714 if (error == 0) { 715 umtxq_lock(&key2); 716 td->td_retval[0] = umtxq_requeue(&key, nrwake, &key2, nrrequeue); 717 umtxq_unlock(&key2); 718 } 719 umtxq_unlock(&key); 720 umtx_key_release(&key2); 721 umtx_key_release(&key); 722 return (error); 723 } 724 725 static int 726 linux_futex_wake(struct thread *td, struct linux_futex_args *args) 727 { 728 struct umtx_key key; 729 int error; 730 731 if (args->val3 == 0) 732 return (EINVAL); 733 734 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key); 735 if (error != 0) 736 return (error); 737 umtxq_lock(&key); 738 td->td_retval[0] = umtxq_signal_mask(&key, args->val, args->val3); 739 umtxq_unlock(&key); 740 umtx_key_release(&key); 741 return (0); 742 } 743 744 static int 745 linux_futex_wait(struct thread *td, struct linux_futex_args *args) 746 { 747 struct umtx_abs_timeout timo; 748 struct umtx_q *uq; 749 uint32_t uval; 750 int error; 751 752 if (args->val3 == 0) 753 error = EINVAL; 754 755 uq = td->td_umtxq; 756 error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), 757 &uq->uq_key); 758 if (error != 0) 759 return (error); 760 if (args->ts != NULL) 761 linux_umtx_abs_timeout_init(&timo, args); 762 umtxq_lock(&uq->uq_key); 763 umtxq_busy(&uq->uq_key); 764 uq->uq_bitset = args->val3; 765 umtxq_insert(uq); 766 umtxq_unlock(&uq->uq_key); 767 error = fueword32(args->uaddr, &uval); 768 if (error != 0) 769 error = EFAULT; 770 else if (uval != args->val) 771 error = EWOULDBLOCK; 772 umtxq_lock(&uq->uq_key); 773 umtxq_unbusy(&uq->uq_key); 774 if (error == 0) { 775 error = umtxq_sleep(uq, "futex", 776 args->ts == NULL ? NULL : &timo); 777 if ((uq->uq_flags & UQF_UMTXQ) == 0) 778 error = 0; 779 else 780 umtxq_remove(uq); 781 } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { 782 umtxq_remove(uq); 783 } 784 umtxq_unlock(&uq->uq_key); 785 umtx_key_release(&uq->uq_key); 786 if (error == ERESTART) 787 error = EINTR; 788 return (error); 789 } 790 791 static void 792 linux_umtx_abs_timeout_init(struct umtx_abs_timeout *timo, 793 struct linux_futex_args *args) 794 { 795 int clockid, absolute; 796 797 /* 798 * The FUTEX_CLOCK_REALTIME option bit can be employed only with the 799 * FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI, FUTEX_LOCK_PI2. 800 * For FUTEX_WAIT, timeout is interpreted as a relative value, for other 801 * futex operations timeout is interpreted as an absolute value. 802 * If FUTEX_CLOCK_REALTIME option bit is set, the Linux kernel measures 803 * the timeout against the CLOCK_REALTIME clock, otherwise the kernel 804 * measures the timeout against the CLOCK_MONOTONIC clock. 805 */ 806 clockid = args->clockrt ? CLOCK_REALTIME : CLOCK_MONOTONIC; 807 absolute = args->op == LINUX_FUTEX_WAIT ? false : true; 808 umtx_abs_timeout_init(timo, clockid, absolute, args->ts); 809 } 810 811 int 812 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 813 { 814 struct linux_futex_args fargs = { 815 .uaddr = args->uaddr, 816 .op = args->op, 817 .val = args->val, 818 .ts = NULL, 819 .uaddr2 = args->uaddr2, 820 .val3 = args->val3, 821 .val3_compare = true, 822 }; 823 int error; 824 825 switch (args->op & LINUX_FUTEX_CMD_MASK) { 826 case LINUX_FUTEX_WAIT: 827 case LINUX_FUTEX_WAIT_BITSET: 828 case LINUX_FUTEX_LOCK_PI: 829 case LINUX_FUTEX_LOCK_PI2: 830 if (args->timeout != NULL) { 831 error = linux_get_timespec(&fargs.kts, args->timeout); 832 if (error != 0) 833 return (error); 834 fargs.ts = &fargs.kts; 835 } 836 break; 837 default: 838 fargs.ts = PTRIN(args->timeout); 839 } 840 return (linux_futex(td, &fargs)); 841 } 842 843 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 844 int 845 linux_sys_futex_time64(struct thread *td, 846 struct linux_sys_futex_time64_args *args) 847 { 848 struct linux_futex_args fargs = { 849 .uaddr = args->uaddr, 850 .op = args->op, 851 .val = args->val, 852 .ts = NULL, 853 .uaddr2 = args->uaddr2, 854 .val3 = args->val3, 855 .val3_compare = true, 856 }; 857 int error; 858 859 switch (args->op & LINUX_FUTEX_CMD_MASK) { 860 case LINUX_FUTEX_WAIT: 861 case LINUX_FUTEX_WAIT_BITSET: 862 case LINUX_FUTEX_LOCK_PI: 863 case LINUX_FUTEX_LOCK_PI2: 864 if (args->timeout != NULL) { 865 error = linux_get_timespec64(&fargs.kts, args->timeout); 866 if (error != 0) 867 return (error); 868 fargs.ts = &fargs.kts; 869 } 870 break; 871 default: 872 fargs.ts = PTRIN(args->timeout); 873 } 874 return (linux_futex(td, &fargs)); 875 } 876 #endif 877 878 int 879 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 880 { 881 struct linux_emuldata *em; 882 883 if (args->len != sizeof(struct linux_robust_list_head)) 884 return (EINVAL); 885 886 em = em_find(td); 887 em->robust_futexes = args->head; 888 889 return (0); 890 } 891 892 int 893 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 894 { 895 struct linux_emuldata *em; 896 struct linux_robust_list_head *head; 897 l_size_t len; 898 struct thread *td2; 899 int error; 900 901 if (!args->pid) { 902 em = em_find(td); 903 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 904 head = em->robust_futexes; 905 } else { 906 td2 = linux_tdfind(td, args->pid, -1); 907 if (td2 == NULL) 908 return (ESRCH); 909 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { 910 PROC_UNLOCK(td2->td_proc); 911 return (EPERM); 912 } 913 914 em = em_find(td2); 915 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 916 /* XXX: ptrace? */ 917 if (priv_check(td, PRIV_CRED_SETUID) || 918 priv_check(td, PRIV_CRED_SETEUID) || 919 p_candebug(td, td2->td_proc)) { 920 PROC_UNLOCK(td2->td_proc); 921 return (EPERM); 922 } 923 head = em->robust_futexes; 924 925 PROC_UNLOCK(td2->td_proc); 926 } 927 928 len = sizeof(struct linux_robust_list_head); 929 error = copyout(&len, args->len, sizeof(l_size_t)); 930 if (error != 0) 931 return (EFAULT); 932 933 return (copyout(&head, args->head, sizeof(l_uintptr_t))); 934 } 935 936 static int 937 handle_futex_death(struct thread *td, struct linux_emuldata *em, uint32_t *uaddr, 938 unsigned int pi, bool pending_op) 939 { 940 uint32_t uval, nval, mval; 941 int error; 942 943 retry: 944 error = fueword32(uaddr, &uval); 945 if (error != 0) 946 return (EFAULT); 947 948 /* 949 * Special case for regular (non PI) futexes. The unlock path in 950 * user space has two race scenarios: 951 * 952 * 1. The unlock path releases the user space futex value and 953 * before it can execute the futex() syscall to wake up 954 * waiters it is killed. 955 * 956 * 2. A woken up waiter is killed before it can acquire the 957 * futex in user space. 958 * 959 * In both cases the TID validation below prevents a wakeup of 960 * potential waiters which can cause these waiters to block 961 * forever. 962 * 963 * In both cases it is safe to attempt waking up a potential 964 * waiter without touching the user space futex value and trying 965 * to set the OWNER_DIED bit. 966 */ 967 if (pending_op && !pi && !uval) { 968 (void)futex_wake(td, uaddr, 1, true); 969 return (0); 970 } 971 972 if ((uval & FUTEX_TID_MASK) == em->em_tid) { 973 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 974 error = casueword32(uaddr, uval, &nval, mval); 975 if (error == -1) 976 return (EFAULT); 977 if (error == 1) { 978 error = thread_check_susp(td, false); 979 if (error != 0) 980 return (error); 981 goto retry; 982 } 983 984 if (!pi && (uval & FUTEX_WAITERS)) { 985 error = futex_wake(td, uaddr, 1, true); 986 if (error != 0) 987 return (error); 988 } else if (pi && (uval & FUTEX_WAITERS)) { 989 error = futex_wake_pi(td, uaddr, true); 990 if (error != 0) 991 return (error); 992 } 993 } 994 995 return (0); 996 } 997 998 static int 999 fetch_robust_entry(struct linux_robust_list **entry, 1000 struct linux_robust_list **head, unsigned int *pi) 1001 { 1002 l_ulong uentry; 1003 int error; 1004 1005 error = copyin((const void *)head, &uentry, sizeof(uentry)); 1006 if (error != 0) 1007 return (EFAULT); 1008 1009 *entry = (void *)(uentry & ~1UL); 1010 *pi = uentry & 1; 1011 1012 return (0); 1013 } 1014 1015 #define LINUX_HANDLE_DEATH_PENDING true 1016 #define LINUX_HANDLE_DEATH_LIST false 1017 1018 /* This walks the list of robust futexes releasing them. */ 1019 void 1020 release_futexes(struct thread *td, struct linux_emuldata *em) 1021 { 1022 struct linux_robust_list_head *head; 1023 struct linux_robust_list *entry, *next_entry, *pending; 1024 unsigned int limit = 2048, pi, next_pi, pip; 1025 uint32_t *uaddr; 1026 l_long futex_offset; 1027 int error; 1028 1029 head = em->robust_futexes; 1030 if (head == NULL) 1031 return; 1032 1033 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) 1034 return; 1035 1036 error = copyin(&head->futex_offset, &futex_offset, 1037 sizeof(futex_offset)); 1038 if (error != 0) 1039 return; 1040 1041 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) 1042 return; 1043 1044 while (entry != &head->list) { 1045 error = fetch_robust_entry(&next_entry, PTRIN(&entry->next), 1046 &next_pi); 1047 1048 /* 1049 * A pending lock might already be on the list, so 1050 * don't process it twice. 1051 */ 1052 if (entry != pending) { 1053 uaddr = (uint32_t *)((caddr_t)entry + futex_offset); 1054 if (handle_futex_death(td, em, uaddr, pi, 1055 LINUX_HANDLE_DEATH_LIST)) 1056 return; 1057 } 1058 if (error != 0) 1059 return; 1060 1061 entry = next_entry; 1062 pi = next_pi; 1063 1064 if (!--limit) 1065 break; 1066 1067 sched_relinquish(curthread); 1068 } 1069 1070 if (pending) { 1071 uaddr = (uint32_t *)((caddr_t)pending + futex_offset); 1072 (void)handle_futex_death(td, em, uaddr, pip, 1073 LINUX_HANDLE_DEATH_PENDING); 1074 } 1075 } 1076