1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 2009-2016 Dmitry Chagin 7 * Copyright (c) 2005 Emmanuel Dreyfus 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by Emmanuel Dreyfus 21 * 4. The name of the author may not be used to endorse or promote 22 * products derived from this software without specific prior written 23 * permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 27 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 #if 0 41 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 42 #endif 43 44 #include "opt_compat.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/imgact.h> 49 #include <sys/kernel.h> 50 #include <sys/ktr.h> 51 #include <sys/lock.h> 52 #include <sys/malloc.h> 53 #include <sys/mutex.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/queue.h> 57 #include <sys/sched.h> 58 #include <sys/sdt.h> 59 #include <sys/umtx.h> 60 61 #include <vm/vm_extern.h> 62 63 #ifdef COMPAT_LINUX32 64 #include <machine/../linux32/linux.h> 65 #include <machine/../linux32/linux32_proto.h> 66 #else 67 #include <machine/../linux/linux.h> 68 #include <machine/../linux/linux_proto.h> 69 #endif 70 #include <compat/linux/linux_dtrace.h> 71 #include <compat/linux/linux_emul.h> 72 #include <compat/linux/linux_futex.h> 73 #include <compat/linux/linux_timer.h> 74 #include <compat/linux/linux_util.h> 75 76 /* DTrace init */ 77 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 78 79 /** 80 * Futex part for the special DTrace module "locks". 81 */ 82 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); 83 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); 84 85 /** 86 * Per futex probes. 87 */ 88 LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); 89 LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); 90 91 /** 92 * DTrace probes in this module. 93 */ 94 LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", 95 "struct waiting_proc *"); 96 LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", 97 "int"); 98 LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", 99 "int"); 100 LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); 101 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", 102 "uint32_t"); 103 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); 104 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", 105 "int"); 106 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); 107 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); 108 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); 109 LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", 110 "struct waiting_proc **", "struct futex **"); 111 LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); 112 LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); 113 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", 114 "struct waiting_proc **", "struct timespec *"); 115 LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", 116 "struct waiting_proc *", "uint32_t *", "uint32_t"); 117 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", 118 "struct waiting_proc *"); 119 LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); 120 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", 121 "uint32_t"); 122 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", 123 "struct waiting_proc *", "uint32_t"); 124 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); 125 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); 126 LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", 127 "struct futex *", "int"); 128 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); 129 LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", 130 "struct waiting_proc *", "uint32_t"); 131 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); 132 LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", 133 "struct waiting_proc **", "struct timespec *", "uint32_t"); 134 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); 135 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); 136 LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", 137 "int", "uint32_t"); 138 LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", 139 "int"); 140 LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); 141 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); 142 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); 143 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); 144 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", 145 "struct linux_sys_futex_args *"); 146 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); 147 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); 148 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); 149 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", 150 "uint32_t", "uint32_t"); 151 LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, 152 "uint32_t *", "uint32_t", "int", "uint32_t"); 153 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", 154 "uint32_t", "uint32_t"); 155 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", 156 "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); 157 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, 158 "uint32_t", "int"); 159 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", 160 "int", "uint32_t", "uint32_t *", "uint32_t"); 161 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); 162 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); 163 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); 164 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); 165 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); 166 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); 167 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); 168 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); 169 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); 170 LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", 171 "struct linux_set_robust_list_args *"); 172 LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); 173 LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); 174 LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", 175 "struct linux_get_robust_list_args *"); 176 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); 177 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); 178 LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, 179 "struct linux_emuldata *", "uint32_t *", "unsigned int"); 180 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); 181 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); 182 LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, 183 "struct linux_robust_list **", "struct linux_robust_list **", 184 "unsigned int *"); 185 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); 186 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); 187 LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", 188 "struct linux_emuldata *"); 189 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); 190 LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); 191 192 struct futex; 193 194 struct waiting_proc { 195 uint32_t wp_flags; 196 struct futex *wp_futex; 197 TAILQ_ENTRY(waiting_proc) wp_list; 198 }; 199 200 struct futex { 201 struct mtx f_lck; 202 uint32_t *f_uaddr; /* user-supplied value, for debug */ 203 struct umtx_key f_key; 204 uint32_t f_refcount; 205 uint32_t f_bitset; 206 LIST_ENTRY(futex) f_list; 207 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 208 }; 209 210 struct futex_list futex_list; 211 212 #define FUTEX_LOCK(f) mtx_lock(&(f)->f_lck) 213 #define FUTEX_LOCKED(f) mtx_owned(&(f)->f_lck) 214 #define FUTEX_UNLOCK(f) mtx_unlock(&(f)->f_lck) 215 #define FUTEX_INIT(f) do { \ 216 mtx_init(&(f)->f_lck, "ftlk", NULL, \ 217 MTX_DUPOK); \ 218 LIN_SDT_PROBE1(futex, futex, create, \ 219 &(f)->f_lck); \ 220 } while (0) 221 #define FUTEX_DESTROY(f) do { \ 222 LIN_SDT_PROBE1(futex, futex, destroy, \ 223 &(f)->f_lck); \ 224 mtx_destroy(&(f)->f_lck); \ 225 } while (0) 226 #define FUTEX_ASSERT_LOCKED(f) mtx_assert(&(f)->f_lck, MA_OWNED) 227 #define FUTEX_ASSERT_UNLOCKED(f) mtx_assert(&(f)->f_lck, MA_NOTOWNED) 228 229 struct mtx futex_mtx; /* protects the futex list */ 230 #define FUTEXES_LOCK do { \ 231 mtx_lock(&futex_mtx); \ 232 LIN_SDT_PROBE1(locks, futex_mtx, \ 233 locked, &futex_mtx); \ 234 } while (0) 235 #define FUTEXES_UNLOCK do { \ 236 LIN_SDT_PROBE1(locks, futex_mtx, \ 237 unlock, &futex_mtx); \ 238 mtx_unlock(&futex_mtx); \ 239 } while (0) 240 241 /* flags for futex_get() */ 242 #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ 243 #define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ 244 #define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ 245 #define FUTEX_SHARED 0x8 /* shared futex */ 246 #define FUTEX_DONTLOCK 0x10 /* don't lock futex */ 247 248 /* wp_flags */ 249 #define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list 250 * of futex where thread sleep to wp_list 251 * of another futex. 252 */ 253 #define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex 254 * wp_list to prevent double wakeup. 255 */ 256 257 static void futex_put(struct futex *, struct waiting_proc *); 258 static int futex_get0(uint32_t *, struct futex **f, uint32_t); 259 static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, 260 uint32_t); 261 static int futex_sleep(struct futex *, struct waiting_proc *, struct timespec *); 262 static int futex_wake(struct futex *, int, uint32_t); 263 static int futex_requeue(struct futex *, int, struct futex *, int); 264 static int futex_copyin_timeout(int, struct l_timespec *, int, 265 struct timespec *); 266 static int futex_wait(struct futex *, struct waiting_proc *, struct timespec *, 267 uint32_t); 268 static void futex_lock(struct futex *); 269 static void futex_unlock(struct futex *); 270 static int futex_atomic_op(struct thread *, int, uint32_t *); 271 static int handle_futex_death(struct linux_emuldata *, uint32_t *, 272 unsigned int); 273 static int fetch_robust_entry(struct linux_robust_list **, 274 struct linux_robust_list **, unsigned int *); 275 276 /* support.s */ 277 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); 278 int futex_addl(int oparg, uint32_t *uaddr, int *oldval); 279 int futex_orl(int oparg, uint32_t *uaddr, int *oldval); 280 int futex_andl(int oparg, uint32_t *uaddr, int *oldval); 281 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); 282 283 284 static int 285 futex_copyin_timeout(int op, struct l_timespec *luts, int clockrt, 286 struct timespec *ts) 287 { 288 struct l_timespec lts; 289 struct timespec kts; 290 int error; 291 292 error = copyin(luts, <s, sizeof(lts)); 293 if (error) 294 return (error); 295 296 error = linux_to_native_timespec(ts, <s); 297 if (error) 298 return (error); 299 if (clockrt) { 300 nanotime(&kts); 301 timespecsub(ts, &kts); 302 } else if (op == LINUX_FUTEX_WAIT_BITSET) { 303 nanouptime(&kts); 304 timespecsub(ts, &kts); 305 } 306 return (error); 307 } 308 309 static void 310 futex_put(struct futex *f, struct waiting_proc *wp) 311 { 312 LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); 313 314 if (wp != NULL) { 315 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) 316 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 317 free(wp, M_FUTEX_WP); 318 } 319 320 FUTEXES_LOCK; 321 if (--f->f_refcount == 0) { 322 LIST_REMOVE(f, f_list); 323 FUTEXES_UNLOCK; 324 if (FUTEX_LOCKED(f)) 325 futex_unlock(f); 326 327 LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, 328 f->f_refcount, f->f_key.shared); 329 LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " 330 "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); 331 umtx_key_release(&f->f_key); 332 FUTEX_DESTROY(f); 333 free(f, M_FUTEX); 334 335 LIN_SDT_PROBE0(futex, futex_put, return); 336 return; 337 } 338 339 LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, 340 f->f_key.shared); 341 LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", 342 f->f_uaddr, f->f_refcount, f->f_key.shared); 343 FUTEXES_UNLOCK; 344 if (FUTEX_LOCKED(f)) 345 futex_unlock(f); 346 347 LIN_SDT_PROBE0(futex, futex_put, return); 348 } 349 350 static int 351 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) 352 { 353 struct futex *f, *tmpf; 354 struct umtx_key key; 355 int error; 356 357 LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); 358 359 *newf = tmpf = NULL; 360 361 error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? 362 AUTO_SHARE : THREAD_SHARE, &key); 363 if (error) { 364 LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); 365 LIN_SDT_PROBE1(futex, futex_get0, return, error); 366 return (error); 367 } 368 retry: 369 FUTEXES_LOCK; 370 LIST_FOREACH(f, &futex_list, f_list) { 371 if (umtx_key_match(&f->f_key, &key)) { 372 if (tmpf != NULL) { 373 if (FUTEX_LOCKED(tmpf)) 374 futex_unlock(tmpf); 375 FUTEX_DESTROY(tmpf); 376 free(tmpf, M_FUTEX); 377 } 378 if (flags & FUTEX_DONTEXISTS) { 379 FUTEXES_UNLOCK; 380 umtx_key_release(&key); 381 382 LIN_SDT_PROBE1(futex, futex_get0, return, 383 EINVAL); 384 return (EINVAL); 385 } 386 387 /* 388 * Increment refcount of the found futex to 389 * prevent it from deallocation before FUTEX_LOCK() 390 */ 391 ++f->f_refcount; 392 FUTEXES_UNLOCK; 393 umtx_key_release(&key); 394 395 if ((flags & FUTEX_DONTLOCK) == 0) 396 futex_lock(f); 397 *newf = f; 398 LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, 399 f->f_refcount, f->f_key.shared); 400 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", 401 uaddr, f->f_refcount, f->f_key.shared); 402 403 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 404 return (0); 405 } 406 } 407 408 if (flags & FUTEX_DONTCREATE) { 409 FUTEXES_UNLOCK; 410 umtx_key_release(&key); 411 LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); 412 LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); 413 414 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 415 return (0); 416 } 417 418 if (tmpf == NULL) { 419 FUTEXES_UNLOCK; 420 tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); 421 tmpf->f_uaddr = uaddr; 422 tmpf->f_key = key; 423 tmpf->f_refcount = 1; 424 tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY; 425 FUTEX_INIT(tmpf); 426 TAILQ_INIT(&tmpf->f_waiting_proc); 427 428 /* 429 * Lock the new futex before an insert into the futex_list 430 * to prevent futex usage by other. 431 */ 432 if ((flags & FUTEX_DONTLOCK) == 0) 433 futex_lock(tmpf); 434 goto retry; 435 } 436 437 LIST_INSERT_HEAD(&futex_list, tmpf, f_list); 438 FUTEXES_UNLOCK; 439 440 LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, 441 tmpf->f_key.shared); 442 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", 443 uaddr, tmpf->f_refcount, tmpf->f_key.shared); 444 *newf = tmpf; 445 446 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 447 return (0); 448 } 449 450 static int 451 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, 452 uint32_t flags) 453 { 454 int error; 455 456 LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); 457 458 if (flags & FUTEX_CREATE_WP) { 459 *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); 460 (*wp)->wp_flags = 0; 461 } 462 error = futex_get0(uaddr, f, flags); 463 if (error) { 464 LIN_SDT_PROBE0(futex, futex_get, error); 465 466 if (flags & FUTEX_CREATE_WP) 467 free(*wp, M_FUTEX_WP); 468 469 LIN_SDT_PROBE1(futex, futex_get, return, error); 470 return (error); 471 } 472 if (flags & FUTEX_CREATE_WP) { 473 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); 474 (*wp)->wp_futex = *f; 475 } 476 477 LIN_SDT_PROBE1(futex, futex_get, return, error); 478 return (error); 479 } 480 481 static inline void 482 futex_lock(struct futex *f) 483 { 484 485 LINUX_CTR3(sys_futex, "futex_lock uaddr %p ref %d shared %d", 486 f->f_uaddr, f->f_refcount, f->f_key.shared); 487 FUTEX_ASSERT_UNLOCKED(f); 488 FUTEX_LOCK(f); 489 } 490 491 static inline void 492 futex_unlock(struct futex *f) 493 { 494 495 LINUX_CTR3(sys_futex, "futex_unlock uaddr %p ref %d shared %d", 496 f->f_uaddr, f->f_refcount, f->f_key.shared); 497 FUTEX_ASSERT_LOCKED(f); 498 FUTEX_UNLOCK(f); 499 } 500 501 static int 502 futex_sleep(struct futex *f, struct waiting_proc *wp, struct timespec *ts) 503 { 504 struct timespec uts; 505 sbintime_t sbt, prec, tmp; 506 time_t over; 507 int error; 508 509 FUTEX_ASSERT_LOCKED(f); 510 if (ts != NULL) { 511 uts = *ts; 512 if (uts.tv_sec > INT32_MAX / 2) { 513 over = uts.tv_sec - INT32_MAX / 2; 514 uts.tv_sec -= over; 515 } 516 tmp = tstosbt(uts); 517 if (TIMESEL(&sbt, tmp)) 518 sbt += tc_tick_sbt; 519 sbt += tmp; 520 prec = tmp; 521 prec >>= tc_precexp; 522 } else { 523 sbt = 0; 524 prec = 0; 525 } 526 LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, sbt); 527 LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %ld ref %d", 528 f->f_uaddr, wp, sbt, f->f_refcount); 529 530 error = msleep_sbt(wp, &f->f_lck, PCATCH, "futex", sbt, prec, C_ABSOLUTE); 531 if (wp->wp_flags & FUTEX_WP_REQUEUED) { 532 KASSERT(f != wp->wp_futex, ("futex != wp_futex")); 533 534 if (error) { 535 LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, 536 f->f_uaddr, wp, wp->wp_futex->f_uaddr, 537 wp->wp_futex->f_refcount); 538 } 539 540 LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" 541 " %p requeued uaddr %p ref %d", 542 error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, 543 wp->wp_futex->f_refcount); 544 futex_put(f, NULL); 545 f = wp->wp_futex; 546 futex_lock(f); 547 } else { 548 if (error) { 549 LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, 550 f->f_uaddr, wp); 551 } 552 LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", 553 error, f->f_uaddr, wp); 554 } 555 556 futex_put(f, wp); 557 558 LIN_SDT_PROBE1(futex, futex_sleep, return, error); 559 return (error); 560 } 561 562 static int 563 futex_wake(struct futex *f, int n, uint32_t bitset) 564 { 565 struct waiting_proc *wp, *wpt; 566 int count = 0; 567 568 LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); 569 570 if (bitset == 0) { 571 LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); 572 return (EINVAL); 573 } 574 575 FUTEX_ASSERT_LOCKED(f); 576 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 577 LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, 578 f->f_refcount); 579 LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", 580 f->f_uaddr, wp, f->f_refcount); 581 /* 582 * Unless we find a matching bit in 583 * the bitset, continue searching. 584 */ 585 if (!(wp->wp_futex->f_bitset & bitset)) 586 continue; 587 588 wp->wp_flags |= FUTEX_WP_REMOVED; 589 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 590 LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); 591 wakeup_one(wp); 592 if (++count == n) 593 break; 594 } 595 596 LIN_SDT_PROBE1(futex, futex_wake, return, count); 597 return (count); 598 } 599 600 static int 601 futex_requeue(struct futex *f, int n, struct futex *f2, int n2) 602 { 603 struct waiting_proc *wp, *wpt; 604 int count = 0; 605 606 LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); 607 608 FUTEX_ASSERT_LOCKED(f); 609 FUTEX_ASSERT_LOCKED(f2); 610 611 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 612 if (++count <= n) { 613 LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", 614 f->f_uaddr, wp); 615 wp->wp_flags |= FUTEX_WP_REMOVED; 616 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 617 LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); 618 wakeup_one(wp); 619 } else { 620 LIN_SDT_PROBE3(futex, futex_requeue, requeue, 621 f->f_uaddr, wp, f2->f_uaddr); 622 LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", 623 f->f_uaddr, wp, f2->f_uaddr); 624 wp->wp_flags |= FUTEX_WP_REQUEUED; 625 /* Move wp to wp_list of f2 futex */ 626 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 627 TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); 628 629 /* 630 * Thread which sleeps on wp after waking should 631 * acquire f2 lock, so increment refcount of f2 to 632 * prevent it from premature deallocation. 633 */ 634 wp->wp_futex = f2; 635 FUTEXES_LOCK; 636 ++f2->f_refcount; 637 FUTEXES_UNLOCK; 638 if (count - n >= n2) 639 break; 640 } 641 } 642 643 LIN_SDT_PROBE1(futex, futex_requeue, return, count); 644 return (count); 645 } 646 647 static int 648 futex_wait(struct futex *f, struct waiting_proc *wp, struct timespec *ts, 649 uint32_t bitset) 650 { 651 int error; 652 653 LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset); 654 655 if (bitset == 0) { 656 LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); 657 return (EINVAL); 658 } 659 660 f->f_bitset = bitset; 661 error = futex_sleep(f, wp, ts); 662 if (error) 663 LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); 664 if (error == EWOULDBLOCK) 665 error = ETIMEDOUT; 666 667 LIN_SDT_PROBE1(futex, futex_wait, return, error); 668 return (error); 669 } 670 671 static int 672 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) 673 { 674 int op = (encoded_op >> 28) & 7; 675 int cmp = (encoded_op >> 24) & 15; 676 int oparg = (encoded_op << 8) >> 20; 677 int cmparg = (encoded_op << 20) >> 20; 678 int oldval = 0, ret; 679 680 LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); 681 682 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 683 oparg = 1 << oparg; 684 685 LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, 686 cmparg); 687 688 /* XXX: Linux verifies access here and returns EFAULT */ 689 LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); 690 691 switch (op) { 692 case FUTEX_OP_SET: 693 ret = futex_xchgl(oparg, uaddr, &oldval); 694 break; 695 case FUTEX_OP_ADD: 696 ret = futex_addl(oparg, uaddr, &oldval); 697 break; 698 case FUTEX_OP_OR: 699 ret = futex_orl(oparg, uaddr, &oldval); 700 break; 701 case FUTEX_OP_ANDN: 702 ret = futex_andl(~oparg, uaddr, &oldval); 703 break; 704 case FUTEX_OP_XOR: 705 ret = futex_xorl(oparg, uaddr, &oldval); 706 break; 707 default: 708 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); 709 ret = -ENOSYS; 710 break; 711 } 712 713 if (ret) { 714 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 715 return (ret); 716 } 717 718 switch (cmp) { 719 case FUTEX_OP_CMP_EQ: 720 ret = (oldval == cmparg); 721 break; 722 case FUTEX_OP_CMP_NE: 723 ret = (oldval != cmparg); 724 break; 725 case FUTEX_OP_CMP_LT: 726 ret = (oldval < cmparg); 727 break; 728 case FUTEX_OP_CMP_GE: 729 ret = (oldval >= cmparg); 730 break; 731 case FUTEX_OP_CMP_LE: 732 ret = (oldval <= cmparg); 733 break; 734 case FUTEX_OP_CMP_GT: 735 ret = (oldval > cmparg); 736 break; 737 default: 738 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); 739 ret = -ENOSYS; 740 } 741 742 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 743 return (ret); 744 } 745 746 int 747 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 748 { 749 int clockrt, nrwake, op_ret, ret; 750 struct linux_pemuldata *pem; 751 struct waiting_proc *wp; 752 struct futex *f, *f2; 753 struct timespec uts, *ts; 754 int error, save; 755 uint32_t flags, val; 756 757 LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); 758 759 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { 760 flags = 0; 761 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; 762 } else 763 flags = FUTEX_SHARED; 764 765 /* 766 * Currently support for switching between CLOCK_MONOTONIC and 767 * CLOCK_REALTIME is not present. However Linux forbids the use of 768 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and 769 * FUTEX_WAIT_REQUEUE_PI. 770 */ 771 clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; 772 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; 773 if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && 774 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { 775 LIN_SDT_PROBE0(futex, linux_sys_futex, 776 unimplemented_clockswitch); 777 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 778 return (ENOSYS); 779 } 780 781 error = 0; 782 f = f2 = NULL; 783 784 switch (args->op) { 785 case LINUX_FUTEX_WAIT: 786 args->val3 = FUTEX_BITSET_MATCH_ANY; 787 /* FALLTHROUGH */ 788 789 case LINUX_FUTEX_WAIT_BITSET: 790 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, 791 args->val, args->val3); 792 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", 793 args->uaddr, args->val, args->val3); 794 795 if (args->timeout != NULL) { 796 error = futex_copyin_timeout(args->op, args->timeout, 797 clockrt, &uts); 798 if (error) { 799 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 800 error); 801 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 802 return (error); 803 } 804 ts = &uts; 805 } else 806 ts = NULL; 807 808 retry0: 809 error = futex_get(args->uaddr, &wp, &f, 810 flags | FUTEX_CREATE_WP); 811 if (error) { 812 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 813 return (error); 814 } 815 816 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 817 if (error) { 818 futex_put(f, wp); 819 error = copyin(args->uaddr, &val, sizeof(val)); 820 if (error == 0) 821 goto retry0; 822 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 823 error); 824 LINUX_CTR1(sys_futex, "WAIT copyin failed %d", 825 error); 826 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 827 return (error); 828 } 829 if (val != args->val) { 830 LIN_SDT_PROBE4(futex, linux_sys_futex, 831 debug_wait_value_neq, args->uaddr, args->val, val, 832 args->val3); 833 LINUX_CTR3(sys_futex, 834 "WAIT uaddr %p val 0x%x != uval 0x%x", 835 args->uaddr, args->val, val); 836 futex_put(f, wp); 837 838 LIN_SDT_PROBE1(futex, linux_sys_futex, return, 839 EWOULDBLOCK); 840 return (EWOULDBLOCK); 841 } 842 843 error = futex_wait(f, wp, ts, args->val3); 844 break; 845 846 case LINUX_FUTEX_WAKE: 847 args->val3 = FUTEX_BITSET_MATCH_ANY; 848 /* FALLTHROUGH */ 849 850 case LINUX_FUTEX_WAKE_BITSET: 851 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, 852 args->val, args->val3); 853 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", 854 args->uaddr, args->val, args->val3); 855 856 error = futex_get(args->uaddr, NULL, &f, 857 flags | FUTEX_DONTCREATE); 858 if (error) { 859 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 860 return (error); 861 } 862 863 if (f == NULL) { 864 td->td_retval[0] = 0; 865 866 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 867 return (error); 868 } 869 td->td_retval[0] = futex_wake(f, args->val, args->val3); 870 futex_put(f, NULL); 871 break; 872 873 case LINUX_FUTEX_CMP_REQUEUE: 874 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, 875 args->uaddr, args->val, args->val3, args->uaddr2, 876 args->timeout); 877 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " 878 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", 879 args->uaddr, args->val, args->val3, args->uaddr2, 880 args->timeout); 881 882 /* 883 * Linux allows this, we would not, it is an incorrect 884 * usage of declared ABI, so return EINVAL. 885 */ 886 if (args->uaddr == args->uaddr2) { 887 LIN_SDT_PROBE0(futex, linux_sys_futex, 888 invalid_cmp_requeue_use); 889 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 890 return (EINVAL); 891 } 892 893 retry1: 894 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 895 if (error) { 896 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 897 return (error); 898 } 899 900 /* 901 * To avoid deadlocks return EINVAL if second futex 902 * exists at this time. 903 * 904 * Glibc fall back to FUTEX_WAKE in case of any error 905 * returned by FUTEX_CMP_REQUEUE. 906 */ 907 error = futex_get(args->uaddr2, NULL, &f2, 908 flags | FUTEX_DONTEXISTS | FUTEX_DONTLOCK); 909 if (error) { 910 futex_put(f, NULL); 911 912 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 913 return (error); 914 } 915 futex_lock(f); 916 futex_lock(f2); 917 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 918 if (error) { 919 futex_put(f2, NULL); 920 futex_put(f, NULL); 921 error = copyin(args->uaddr, &val, sizeof(val)); 922 if (error == 0) 923 goto retry1; 924 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 925 error); 926 LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", 927 error); 928 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 929 return (error); 930 } 931 if (val != args->val3) { 932 LIN_SDT_PROBE2(futex, linux_sys_futex, 933 debug_cmp_requeue_value_neq, args->val, val); 934 LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x", 935 args->val, val); 936 futex_put(f2, NULL); 937 futex_put(f, NULL); 938 939 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); 940 return (EAGAIN); 941 } 942 943 nrwake = (int)(unsigned long)args->timeout; 944 td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); 945 futex_put(f2, NULL); 946 futex_put(f, NULL); 947 break; 948 949 case LINUX_FUTEX_WAKE_OP: 950 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, 951 args->uaddr, args->op, args->val, args->uaddr2, args->val3); 952 LINUX_CTR5(sys_futex, "WAKE_OP " 953 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", 954 args->uaddr, args->val, args->uaddr2, args->val3, 955 args->timeout); 956 957 if (args->uaddr == args->uaddr2) { 958 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 959 return (EINVAL); 960 } 961 962 retry2: 963 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 964 if (error) { 965 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 966 return (error); 967 } 968 969 error = futex_get(args->uaddr2, NULL, &f2, flags | FUTEX_DONTLOCK); 970 if (error) { 971 futex_put(f, NULL); 972 973 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 974 return (error); 975 } 976 futex_lock(f); 977 futex_lock(f2); 978 979 /* 980 * This function returns positive number as results and 981 * negative as errors 982 */ 983 save = vm_fault_disable_pagefaults(); 984 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 985 vm_fault_enable_pagefaults(save); 986 987 LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x", 988 args->uaddr, op_ret); 989 990 if (op_ret < 0) { 991 if (f2 != NULL) 992 futex_put(f2, NULL); 993 futex_put(f, NULL); 994 error = copyin(args->uaddr2, &val, sizeof(val)); 995 if (error == 0) 996 goto retry2; 997 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 998 return (error); 999 } 1000 1001 ret = futex_wake(f, args->val, args->val3); 1002 1003 if (op_ret > 0) { 1004 op_ret = 0; 1005 nrwake = (int)(unsigned long)args->timeout; 1006 1007 if (f2 != NULL) 1008 op_ret += futex_wake(f2, nrwake, args->val3); 1009 else 1010 op_ret += futex_wake(f, nrwake, args->val3); 1011 ret += op_ret; 1012 1013 } 1014 if (f2 != NULL) 1015 futex_put(f2, NULL); 1016 futex_put(f, NULL); 1017 td->td_retval[0] = ret; 1018 break; 1019 1020 case LINUX_FUTEX_LOCK_PI: 1021 /* not yet implemented */ 1022 pem = pem_find(td->td_proc); 1023 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1024 linux_msg(td, 1025 "linux_sys_futex: " 1026 "unsupported futex_pi op\n"); 1027 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1028 LIN_SDT_PROBE0(futex, linux_sys_futex, 1029 unimplemented_lock_pi); 1030 } 1031 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1032 return (ENOSYS); 1033 1034 case LINUX_FUTEX_UNLOCK_PI: 1035 /* not yet implemented */ 1036 pem = pem_find(td->td_proc); 1037 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1038 linux_msg(td, 1039 "linux_sys_futex: " 1040 "unsupported futex_pi op\n"); 1041 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1042 LIN_SDT_PROBE0(futex, linux_sys_futex, 1043 unimplemented_unlock_pi); 1044 } 1045 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1046 return (ENOSYS); 1047 1048 case LINUX_FUTEX_TRYLOCK_PI: 1049 /* not yet implemented */ 1050 pem = pem_find(td->td_proc); 1051 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1052 linux_msg(td, 1053 "linux_sys_futex: " 1054 "unsupported futex_pi op\n"); 1055 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1056 LIN_SDT_PROBE0(futex, linux_sys_futex, 1057 unimplemented_trylock_pi); 1058 } 1059 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1060 return (ENOSYS); 1061 1062 case LINUX_FUTEX_REQUEUE: 1063 /* 1064 * Glibc does not use this operation since version 2.3.3, 1065 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 1066 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 1067 * FUTEX_REQUEUE returned EINVAL. 1068 */ 1069 pem = pem_find(td->td_proc); 1070 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { 1071 linux_msg(td, 1072 "linux_sys_futex: " 1073 "unsupported futex_requeue op\n"); 1074 pem->flags |= LINUX_XDEPR_REQUEUEOP; 1075 LIN_SDT_PROBE0(futex, linux_sys_futex, 1076 deprecated_requeue); 1077 } 1078 1079 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 1080 return (EINVAL); 1081 1082 case LINUX_FUTEX_WAIT_REQUEUE_PI: 1083 /* not yet implemented */ 1084 pem = pem_find(td->td_proc); 1085 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1086 linux_msg(td, 1087 "linux_sys_futex: " 1088 "unsupported futex_pi op\n"); 1089 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1090 LIN_SDT_PROBE0(futex, linux_sys_futex, 1091 unimplemented_wait_requeue_pi); 1092 } 1093 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1094 return (ENOSYS); 1095 1096 case LINUX_FUTEX_CMP_REQUEUE_PI: 1097 /* not yet implemented */ 1098 pem = pem_find(td->td_proc); 1099 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1100 linux_msg(td, 1101 "linux_sys_futex: " 1102 "unsupported futex_pi op\n"); 1103 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1104 LIN_SDT_PROBE0(futex, linux_sys_futex, 1105 unimplemented_cmp_requeue_pi); 1106 } 1107 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1108 return (ENOSYS); 1109 1110 default: 1111 linux_msg(td, 1112 "linux_sys_futex: unknown op %d\n", args->op); 1113 LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, 1114 args->op); 1115 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1116 return (ENOSYS); 1117 } 1118 1119 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 1120 return (error); 1121 } 1122 1123 int 1124 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 1125 { 1126 struct linux_emuldata *em; 1127 1128 LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); 1129 1130 if (args->len != sizeof(struct linux_robust_list_head)) { 1131 LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); 1132 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); 1133 return (EINVAL); 1134 } 1135 1136 em = em_find(td); 1137 em->robust_futexes = args->head; 1138 1139 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); 1140 return (0); 1141 } 1142 1143 int 1144 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 1145 { 1146 struct linux_emuldata *em; 1147 struct linux_robust_list_head *head; 1148 l_size_t len = sizeof(struct linux_robust_list_head); 1149 struct thread *td2; 1150 int error = 0; 1151 1152 LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); 1153 1154 if (!args->pid) { 1155 em = em_find(td); 1156 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1157 head = em->robust_futexes; 1158 } else { 1159 td2 = tdfind(args->pid, -1); 1160 if (td2 == NULL) { 1161 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1162 ESRCH); 1163 return (ESRCH); 1164 } 1165 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { 1166 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1167 EPERM); 1168 PROC_UNLOCK(td2->td_proc); 1169 return (EPERM); 1170 } 1171 1172 em = em_find(td2); 1173 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1174 /* XXX: ptrace? */ 1175 if (priv_check(td, PRIV_CRED_SETUID) || 1176 priv_check(td, PRIV_CRED_SETEUID) || 1177 p_candebug(td, td2->td_proc)) { 1178 PROC_UNLOCK(td2->td_proc); 1179 1180 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1181 EPERM); 1182 return (EPERM); 1183 } 1184 head = em->robust_futexes; 1185 1186 PROC_UNLOCK(td2->td_proc); 1187 } 1188 1189 error = copyout(&len, args->len, sizeof(l_size_t)); 1190 if (error) { 1191 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1192 error); 1193 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); 1194 return (EFAULT); 1195 } 1196 1197 error = copyout(&head, args->head, sizeof(head)); 1198 if (error) { 1199 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1200 error); 1201 } 1202 1203 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); 1204 return (error); 1205 } 1206 1207 static int 1208 handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, 1209 unsigned int pi) 1210 { 1211 uint32_t uval, nval, mval; 1212 struct futex *f; 1213 int error; 1214 1215 LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); 1216 1217 retry: 1218 error = copyin(uaddr, &uval, 4); 1219 if (error) { 1220 LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); 1221 LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); 1222 return (EFAULT); 1223 } 1224 if ((uval & FUTEX_TID_MASK) == em->em_tid) { 1225 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1226 nval = casuword32(uaddr, uval, mval); 1227 1228 if (nval == -1) { 1229 LIN_SDT_PROBE1(futex, handle_futex_death, return, 1230 EFAULT); 1231 return (EFAULT); 1232 } 1233 1234 if (nval != uval) 1235 goto retry; 1236 1237 if (!pi && (uval & FUTEX_WAITERS)) { 1238 error = futex_get(uaddr, NULL, &f, 1239 FUTEX_DONTCREATE | FUTEX_SHARED); 1240 if (error) { 1241 LIN_SDT_PROBE1(futex, handle_futex_death, 1242 return, error); 1243 return (error); 1244 } 1245 if (f != NULL) { 1246 futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); 1247 futex_put(f, NULL); 1248 } 1249 } 1250 } 1251 1252 LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); 1253 return (0); 1254 } 1255 1256 static int 1257 fetch_robust_entry(struct linux_robust_list **entry, 1258 struct linux_robust_list **head, unsigned int *pi) 1259 { 1260 l_ulong uentry; 1261 int error; 1262 1263 LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); 1264 1265 error = copyin((const void *)head, &uentry, sizeof(l_ulong)); 1266 if (error) { 1267 LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); 1268 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); 1269 return (EFAULT); 1270 } 1271 1272 *entry = (void *)(uentry & ~1UL); 1273 *pi = uentry & 1; 1274 1275 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); 1276 return (0); 1277 } 1278 1279 /* This walks the list of robust futexes releasing them. */ 1280 void 1281 release_futexes(struct thread *td, struct linux_emuldata *em) 1282 { 1283 struct linux_robust_list_head *head = NULL; 1284 struct linux_robust_list *entry, *next_entry, *pending; 1285 unsigned int limit = 2048, pi, next_pi, pip; 1286 l_long futex_offset; 1287 int rc, error; 1288 1289 LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); 1290 1291 head = em->robust_futexes; 1292 1293 if (head == NULL) { 1294 LIN_SDT_PROBE0(futex, release_futexes, return); 1295 return; 1296 } 1297 1298 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { 1299 LIN_SDT_PROBE0(futex, release_futexes, return); 1300 return; 1301 } 1302 1303 error = copyin(&head->futex_offset, &futex_offset, 1304 sizeof(futex_offset)); 1305 if (error) { 1306 LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); 1307 LIN_SDT_PROBE0(futex, release_futexes, return); 1308 return; 1309 } 1310 1311 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { 1312 LIN_SDT_PROBE0(futex, release_futexes, return); 1313 return; 1314 } 1315 1316 while (entry != &head->list) { 1317 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); 1318 1319 if (entry != pending) 1320 if (handle_futex_death(em, 1321 (uint32_t *)((caddr_t)entry + futex_offset), pi)) { 1322 LIN_SDT_PROBE0(futex, release_futexes, return); 1323 return; 1324 } 1325 if (rc) { 1326 LIN_SDT_PROBE0(futex, release_futexes, return); 1327 return; 1328 } 1329 1330 entry = next_entry; 1331 pi = next_pi; 1332 1333 if (!--limit) 1334 break; 1335 1336 sched_relinquish(curthread); 1337 } 1338 1339 if (pending) 1340 handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); 1341 1342 LIN_SDT_PROBE0(futex, release_futexes, return); 1343 } 1344