1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/imgact.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/queue.h> 52 #include <sys/sched.h> 53 #include <sys/sx.h> 54 55 #ifdef COMPAT_LINUX32 56 #include <machine/../linux32/linux.h> 57 #include <machine/../linux32/linux32_proto.h> 58 #else 59 #include <machine/../linux/linux.h> 60 #include <machine/../linux/linux_proto.h> 61 #endif 62 #include <compat/linux/linux_futex.h> 63 #include <compat/linux/linux_emul.h> 64 65 MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes"); 66 MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp"); 67 68 struct futex; 69 70 struct waiting_proc { 71 uint32_t wp_flags; 72 struct futex *wp_futex; 73 TAILQ_ENTRY(waiting_proc) wp_list; 74 }; 75 76 struct futex { 77 struct sx f_lck; 78 uint32_t *f_uaddr; 79 uint32_t f_refcount; 80 LIST_ENTRY(futex) f_list; 81 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 82 }; 83 84 struct futex_list futex_list; 85 86 #define FUTEX_LOCK(f) sx_xlock(&(f)->f_lck) 87 #define FUTEX_UNLOCK(f) sx_xunlock(&(f)->f_lck) 88 #define FUTEX_INIT(f) sx_init_flags(&(f)->f_lck, "ftlk", 0) 89 #define FUTEX_DESTROY(f) sx_destroy(&(f)->f_lck) 90 #define FUTEX_ASSERT_LOCKED(f) sx_assert(&(f)->f_lck, SA_XLOCKED) 91 92 struct mtx futex_mtx; /* protects the futex list */ 93 #define FUTEXES_LOCK mtx_lock(&futex_mtx) 94 #define FUTEXES_UNLOCK mtx_unlock(&futex_mtx) 95 96 /* flags for futex_get() */ 97 #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ 98 #define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ 99 #define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ 100 101 /* wp_flags */ 102 #define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list 103 * of futex where thread sleep to wp_list 104 * of another futex. 105 */ 106 #define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex 107 * wp_list to prevent double wakeup. 108 */ 109 110 /* support.s */ 111 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); 112 int futex_addl(int oparg, uint32_t *uaddr, int *oldval); 113 int futex_orl(int oparg, uint32_t *uaddr, int *oldval); 114 int futex_andl(int oparg, uint32_t *uaddr, int *oldval); 115 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); 116 117 static void 118 futex_put(struct futex *f, struct waiting_proc *wp) 119 { 120 121 FUTEX_ASSERT_LOCKED(f); 122 if (wp != NULL) { 123 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) 124 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 125 free(wp, M_FUTEX_WP); 126 } 127 128 FUTEXES_LOCK; 129 if (--f->f_refcount == 0) { 130 LIST_REMOVE(f, f_list); 131 FUTEXES_UNLOCK; 132 FUTEX_UNLOCK(f); 133 134 FUTEX_DESTROY(f); 135 free(f, M_FUTEX); 136 return; 137 } 138 139 FUTEXES_UNLOCK; 140 FUTEX_UNLOCK(f); 141 } 142 143 static int 144 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) 145 { 146 struct futex *f, *tmpf; 147 148 *newf = tmpf = NULL; 149 150 retry: 151 FUTEXES_LOCK; 152 LIST_FOREACH(f, &futex_list, f_list) { 153 if (f->f_uaddr == uaddr) { 154 if (tmpf != NULL) { 155 FUTEX_UNLOCK(tmpf); 156 FUTEX_DESTROY(tmpf); 157 free(tmpf, M_FUTEX); 158 } 159 if (flags & FUTEX_DONTEXISTS) { 160 FUTEXES_UNLOCK; 161 return (EINVAL); 162 } 163 164 /* 165 * Increment refcount of the found futex to 166 * prevent it from deallocation before FUTEX_LOCK() 167 */ 168 ++f->f_refcount; 169 FUTEXES_UNLOCK; 170 171 FUTEX_LOCK(f); 172 *newf = f; 173 return (0); 174 } 175 } 176 177 if (flags & FUTEX_DONTCREATE) { 178 FUTEXES_UNLOCK; 179 return (0); 180 } 181 182 if (tmpf == NULL) { 183 FUTEXES_UNLOCK; 184 tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); 185 tmpf->f_uaddr = uaddr; 186 tmpf->f_refcount = 1; 187 FUTEX_INIT(tmpf); 188 TAILQ_INIT(&tmpf->f_waiting_proc); 189 190 /* 191 * Lock the new futex before an insert into the futex_list 192 * to prevent futex usage by other. 193 */ 194 FUTEX_LOCK(tmpf); 195 goto retry; 196 } 197 198 LIST_INSERT_HEAD(&futex_list, tmpf, f_list); 199 FUTEXES_UNLOCK; 200 201 *newf = tmpf; 202 return (0); 203 } 204 205 static int 206 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, 207 uint32_t flags) 208 { 209 int error; 210 211 if (flags & FUTEX_CREATE_WP) { 212 *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); 213 (*wp)->wp_flags = 0; 214 } 215 error = futex_get0(uaddr, f, flags); 216 if (error) { 217 if (flags & FUTEX_CREATE_WP) 218 free(*wp, M_FUTEX_WP); 219 return (error); 220 } 221 if (flags & FUTEX_CREATE_WP) { 222 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); 223 (*wp)->wp_futex = *f; 224 } 225 226 return (error); 227 } 228 229 static int 230 futex_sleep(struct futex *f, struct waiting_proc *wp, unsigned long timeout) 231 { 232 int error; 233 234 FUTEX_ASSERT_LOCKED(f); 235 error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout); 236 if (wp->wp_flags & FUTEX_WP_REQUEUED) { 237 KASSERT(f != wp->wp_futex, ("futex != wp_futex")); 238 futex_put(f, NULL); 239 f = wp->wp_futex; 240 FUTEX_LOCK(f); 241 } 242 243 futex_put(f, wp); 244 return (error); 245 } 246 247 static int 248 futex_wake(struct futex *f, int n) 249 { 250 struct waiting_proc *wp, *wpt; 251 int count = 0; 252 253 FUTEX_ASSERT_LOCKED(f); 254 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 255 wp->wp_flags |= FUTEX_WP_REMOVED; 256 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 257 wakeup_one(wp); 258 if (++count == n) 259 break; 260 } 261 262 return (count); 263 } 264 265 static int 266 futex_requeue(struct futex *f, int n, struct futex *f2, int n2) 267 { 268 struct waiting_proc *wp, *wpt; 269 int count = 0; 270 271 FUTEX_ASSERT_LOCKED(f); 272 FUTEX_ASSERT_LOCKED(f2); 273 274 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 275 if (++count <= n) { 276 wp->wp_flags |= FUTEX_WP_REMOVED; 277 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 278 wakeup_one(wp); 279 } else { 280 wp->wp_flags |= FUTEX_WP_REQUEUED; 281 /* Move wp to wp_list of f2 futex */ 282 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 283 TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); 284 285 /* 286 * Thread which sleeps on wp after waking should 287 * acquire f2 lock, so increment refcount of f2 to 288 * prevent it from premature deallocation. 289 */ 290 wp->wp_futex = f2; 291 FUTEXES_LOCK; 292 ++f2->f_refcount; 293 FUTEXES_UNLOCK; 294 if (count - n >= n2) 295 break; 296 } 297 } 298 299 return (count); 300 } 301 302 static int 303 futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts) 304 { 305 struct l_timespec timeout = {0, 0}; 306 struct timeval tv = {0, 0}; 307 int timeout_hz; 308 int error; 309 310 if (ts != NULL) { 311 error = copyin(ts, &timeout, sizeof(timeout)); 312 if (error) 313 return (error); 314 } 315 316 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 317 timeout_hz = tvtohz(&tv); 318 319 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 320 timeout_hz = 0; 321 322 /* 323 * If the user process requests a non null timeout, 324 * make sure we do not turn it into an infinite 325 * timeout because timeout_hz gets null. 326 * 327 * We use a minimal timeout of 1/hz. Maybe it would 328 * make sense to just return ETIMEDOUT without sleeping. 329 */ 330 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 331 (timeout_hz == 0)) 332 timeout_hz = 1; 333 334 error = futex_sleep(f, wp, timeout_hz); 335 if (error == EWOULDBLOCK) 336 error = ETIMEDOUT; 337 338 return (error); 339 } 340 341 static int 342 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) 343 { 344 int op = (encoded_op >> 28) & 7; 345 int cmp = (encoded_op >> 24) & 15; 346 int oparg = (encoded_op << 8) >> 20; 347 int cmparg = (encoded_op << 20) >> 20; 348 int oldval = 0, ret; 349 350 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 351 oparg = 1 << oparg; 352 353 #ifdef DEBUG 354 if (ldebug(sys_futex)) 355 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %x, " 356 "cmparg = %x, uaddr = %p\n", 357 op, cmp, oparg, cmparg, uaddr); 358 #endif 359 /* XXX: linux verifies access here and returns EFAULT */ 360 361 switch (op) { 362 case FUTEX_OP_SET: 363 ret = futex_xchgl(oparg, uaddr, &oldval); 364 break; 365 case FUTEX_OP_ADD: 366 ret = futex_addl(oparg, uaddr, &oldval); 367 break; 368 case FUTEX_OP_OR: 369 ret = futex_orl(oparg, uaddr, &oldval); 370 break; 371 case FUTEX_OP_ANDN: 372 ret = futex_andl(~oparg, uaddr, &oldval); 373 break; 374 case FUTEX_OP_XOR: 375 ret = futex_xorl(oparg, uaddr, &oldval); 376 break; 377 default: 378 ret = -ENOSYS; 379 break; 380 } 381 382 if (ret) 383 return (ret); 384 385 switch (cmp) { 386 case FUTEX_OP_CMP_EQ: 387 return (oldval == cmparg); 388 case FUTEX_OP_CMP_NE: 389 return (oldval != cmparg); 390 case FUTEX_OP_CMP_LT: 391 return (oldval < cmparg); 392 case FUTEX_OP_CMP_GE: 393 return (oldval >= cmparg); 394 case FUTEX_OP_CMP_LE: 395 return (oldval <= cmparg); 396 case FUTEX_OP_CMP_GT: 397 return (oldval > cmparg); 398 default: 399 return (-ENOSYS); 400 } 401 } 402 403 int 404 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 405 { 406 int op_ret, val, ret, nrwake; 407 struct linux_emuldata *em; 408 struct waiting_proc *wp; 409 struct futex *f, *f2; 410 int error = 0; 411 412 /* 413 * Our implementation provides only privates futexes. Most of the apps 414 * should use private futexes but don't claim so. Therefore we treat 415 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works 416 * in most cases (ie. when futexes are not shared on file descriptor 417 * or between different processes.). 418 */ 419 args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG); 420 421 switch (args->op) { 422 case LINUX_FUTEX_WAIT: 423 424 #ifdef DEBUG 425 if (ldebug(sys_futex)) 426 printf(ARGS(sys_futex, "futex_wait val %d uaddr %p"), 427 args->val, args->uaddr); 428 #endif 429 error = futex_get(args->uaddr, &wp, &f, FUTEX_CREATE_WP); 430 if (error) 431 return (error); 432 error = copyin(args->uaddr, &val, sizeof(val)); 433 if (error) { 434 futex_put(f, wp); 435 return (error); 436 } 437 if (val != args->val) { 438 #ifdef DEBUG 439 if (ldebug(sys_futex)) 440 printf(ARGS(sys_futex, "futex_wait uaddr %p WHOOPS %d != %d"), 441 args->uaddr, args->val, val); 442 #endif 443 futex_put(f, wp); 444 return (EWOULDBLOCK); 445 } 446 447 error = futex_wait(f, wp, args->timeout); 448 break; 449 450 case LINUX_FUTEX_WAKE: 451 452 /* 453 * XXX: Linux is able to cope with different addresses 454 * corresponding to the same mapped memory in the sleeping 455 * and waker process(es). 456 */ 457 #ifdef DEBUG 458 if (ldebug(sys_futex)) 459 printf(ARGS(sys_futex, "futex_wake val %d uaddr %p"), 460 args->val, args->uaddr); 461 #endif 462 error = futex_get(args->uaddr, NULL, &f, FUTEX_DONTCREATE); 463 if (error) 464 return (error); 465 if (f == NULL) { 466 td->td_retval[0] = 0; 467 return (error);; 468 } 469 td->td_retval[0] = futex_wake(f, args->val); 470 futex_put(f, NULL); 471 break; 472 473 case LINUX_FUTEX_CMP_REQUEUE: 474 475 #ifdef DEBUG 476 if (ldebug(sys_futex)) 477 printf(ARGS(sys_futex, "futex_cmp_requeue uaddr %p " 478 "val %d val3 %d uaddr2 %p val2 %d"), 479 args->uaddr, args->val, args->val3, args->uaddr2, 480 (int)(unsigned long)args->timeout); 481 #endif 482 483 /* 484 * Linux allows this, we would not, it is an incorrect 485 * usage of declared ABI, so return EINVAL. 486 */ 487 if (args->uaddr == args->uaddr2) 488 return (EINVAL); 489 error = futex_get0(args->uaddr, &f, 0); 490 if (error) 491 return (error); 492 493 /* 494 * To avoid deadlocks return EINVAL if second futex 495 * exists at this time. Otherwise create the new futex 496 * and ignore false positive LOR which thus happens. 497 * 498 * Glibc fall back to FUTEX_WAKE in case of any error 499 * returned by FUTEX_CMP_REQUEUE. 500 */ 501 error = futex_get0(args->uaddr2, &f2, FUTEX_DONTEXISTS); 502 if (error) { 503 futex_put(f, NULL); 504 return (error); 505 } 506 error = copyin(args->uaddr, &val, sizeof(val)); 507 if (error) { 508 futex_put(f2, NULL); 509 futex_put(f, NULL); 510 return (error); 511 } 512 if (val != args->val3) { 513 #ifdef DEBUG 514 if (ldebug(sys_futex)) 515 printf(ARGS(sys_futex, "futex_cmp_requeue WHOOPS" 516 " VAL %d != UVAL %d"), args->val, val); 517 #endif 518 futex_put(f2, NULL); 519 futex_put(f, NULL); 520 return (EAGAIN); 521 } 522 523 nrwake = (int)(unsigned long)args->timeout; 524 td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); 525 futex_put(f2, NULL); 526 futex_put(f, NULL); 527 break; 528 529 case LINUX_FUTEX_WAKE_OP: 530 531 #ifdef DEBUG 532 if (ldebug(sys_futex)) 533 printf(ARGS(sys_futex, "futex_wake_op " 534 "uaddr %p op %d val %x uaddr2 %p val3 %x"), 535 args->uaddr, args->op, args->val, 536 args->uaddr2, args->val3); 537 #endif 538 error = futex_get0(args->uaddr, &f, 0); 539 if (error) 540 return (error); 541 if (args->uaddr != args->uaddr2) 542 error = futex_get0(args->uaddr2, &f2, 0); 543 if (error) { 544 futex_put(f, NULL); 545 return (error); 546 } 547 548 /* 549 * This function returns positive number as results and 550 * negative as errors 551 */ 552 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 553 554 if (op_ret < 0) { 555 /* XXX: We don't handle the EFAULT yet. */ 556 if (op_ret != -EFAULT) { 557 if (f2 != NULL) 558 futex_put(f2, NULL); 559 futex_put(f, NULL); 560 return (-op_ret); 561 } 562 if (f2 != NULL) 563 futex_put(f2, NULL); 564 futex_put(f, NULL); 565 return (EFAULT); 566 } 567 568 ret = futex_wake(f, args->val); 569 570 if (op_ret > 0) { 571 op_ret = 0; 572 nrwake = (int)(unsigned long)args->timeout; 573 574 if (f2 != NULL) 575 op_ret += futex_wake(f2, nrwake); 576 else 577 op_ret += futex_wake(f, nrwake); 578 ret += op_ret; 579 580 } 581 if (f2 != NULL) 582 futex_put(f2, NULL); 583 futex_put(f, NULL); 584 td->td_retval[0] = ret; 585 break; 586 587 case LINUX_FUTEX_LOCK_PI: 588 /* not yet implemented */ 589 return (ENOSYS); 590 591 case LINUX_FUTEX_UNLOCK_PI: 592 /* not yet implemented */ 593 return (ENOSYS); 594 595 case LINUX_FUTEX_TRYLOCK_PI: 596 /* not yet implemented */ 597 return (ENOSYS); 598 599 case LINUX_FUTEX_REQUEUE: 600 601 /* 602 * Glibc does not use this operation since version 2.3.3, 603 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 604 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 605 * FUTEX_REQUEUE returned EINVAL. 606 */ 607 em = em_find(td->td_proc, EMUL_DONTLOCK); 608 if (em->used_requeue == 0) { 609 printf("linux(%s (%d)) sys_futex: " 610 "unsupported futex_requeue op\n", 611 td->td_proc->p_comm, td->td_proc->p_pid); 612 em->used_requeue = 1; 613 } 614 return (EINVAL); 615 616 default: 617 printf("linux_sys_futex: unknown op %d\n", args->op); 618 return (ENOSYS); 619 } 620 621 return (error); 622 } 623 624 int 625 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 626 { 627 struct linux_emuldata *em; 628 629 #ifdef DEBUG 630 if (ldebug(set_robust_list)) 631 printf(ARGS(set_robust_list, "head %p len %d"), 632 args->head, args->len); 633 #endif 634 635 if (args->len != sizeof(struct linux_robust_list_head)) 636 return (EINVAL); 637 638 em = em_find(td->td_proc, EMUL_DOLOCK); 639 em->robust_futexes = args->head; 640 EMUL_UNLOCK(&emul_lock); 641 642 return (0); 643 } 644 645 int 646 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 647 { 648 struct linux_emuldata *em; 649 struct linux_robust_list_head *head; 650 l_size_t len = sizeof(struct linux_robust_list_head); 651 int error = 0; 652 653 #ifdef DEBUG 654 if (ldebug(get_robust_list)) 655 printf(ARGS(get_robust_list, "")); 656 #endif 657 658 if (!args->pid) { 659 em = em_find(td->td_proc, EMUL_DONTLOCK); 660 head = em->robust_futexes; 661 } else { 662 struct proc *p; 663 664 p = pfind(args->pid); 665 if (p == NULL) 666 return (ESRCH); 667 668 em = em_find(p, EMUL_DONTLOCK); 669 /* XXX: ptrace? */ 670 if (priv_check(td, PRIV_CRED_SETUID) || 671 priv_check(td, PRIV_CRED_SETEUID) || 672 p_candebug(td, p)) 673 return (EPERM); 674 head = em->robust_futexes; 675 676 PROC_UNLOCK(p); 677 } 678 679 error = copyout(&len, args->len, sizeof(l_size_t)); 680 if (error) 681 return (EFAULT); 682 683 error = copyout(head, args->head, sizeof(struct linux_robust_list_head)); 684 685 return (error); 686 } 687 688 static int 689 handle_futex_death(struct proc *p, uint32_t *uaddr, int pi) 690 { 691 uint32_t uval, nval, mval; 692 struct futex *f; 693 int error; 694 695 retry: 696 if (copyin(uaddr, &uval, 4)) 697 return (EFAULT); 698 if ((uval & FUTEX_TID_MASK) == p->p_pid) { 699 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 700 nval = casuword32(uaddr, uval, mval); 701 702 if (nval == -1) 703 return (EFAULT); 704 705 if (nval != uval) 706 goto retry; 707 708 if (!pi && (uval & FUTEX_WAITERS)) { 709 error = futex_get(uaddr, NULL, &f, 710 FUTEX_DONTCREATE); 711 if (error) 712 return (error); 713 if (f != NULL) { 714 futex_wake(f, 1); 715 futex_put(f, NULL); 716 } 717 } 718 } 719 720 return (0); 721 } 722 723 static int 724 fetch_robust_entry(struct linux_robust_list **entry, 725 struct linux_robust_list **head, int *pi) 726 { 727 l_ulong uentry; 728 729 if (copyin((const void *)head, &uentry, sizeof(l_ulong))) 730 return (EFAULT); 731 732 *entry = (void *)(uentry & ~1UL); 733 *pi = uentry & 1; 734 735 return (0); 736 } 737 738 /* This walks the list of robust futexes releasing them. */ 739 void 740 release_futexes(struct proc *p) 741 { 742 struct linux_robust_list_head *head = NULL; 743 struct linux_robust_list *entry, *next_entry, *pending; 744 unsigned int limit = 2048, pi, next_pi, pip; 745 struct linux_emuldata *em; 746 l_long futex_offset; 747 int rc; 748 749 em = em_find(p, EMUL_DONTLOCK); 750 head = em->robust_futexes; 751 752 if (head == NULL) 753 return; 754 755 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) 756 return; 757 758 if (copyin(&head->futex_offset, &futex_offset, sizeof(futex_offset))) 759 return; 760 761 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) 762 return; 763 764 while (entry != &head->list) { 765 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); 766 767 if (entry != pending) 768 if (handle_futex_death(p, (uint32_t *)entry + futex_offset, pi)) 769 return; 770 if (rc) 771 return; 772 773 entry = next_entry; 774 pi = next_pi; 775 776 if (!--limit) 777 break; 778 779 sched_relinquish(curthread); 780 } 781 782 if (pending) 783 handle_futex_death(p, (uint32_t *)pending + futex_offset, pip); 784 } 785