1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2016 Dmitry Chagin 5 * Copyright (c) 2005 Emmanuel Dreyfus 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Emmanuel Dreyfus 19 * 4. The name of the author may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 #if 0 39 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 40 #endif 41 42 #include "opt_compat.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/imgact.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mutex.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/sched.h> 56 #include <sys/sdt.h> 57 #include <sys/umtx.h> 58 59 #include <vm/vm_extern.h> 60 61 #ifdef COMPAT_LINUX32 62 #include <machine/../linux32/linux.h> 63 #include <machine/../linux32/linux32_proto.h> 64 #else 65 #include <machine/../linux/linux.h> 66 #include <machine/../linux/linux_proto.h> 67 #endif 68 #include <compat/linux/linux_dtrace.h> 69 #include <compat/linux/linux_emul.h> 70 #include <compat/linux/linux_futex.h> 71 #include <compat/linux/linux_timer.h> 72 #include <compat/linux/linux_util.h> 73 74 /* DTrace init */ 75 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 76 77 /** 78 * Futex part for the special DTrace module "locks". 79 */ 80 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); 81 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); 82 83 /** 84 * Per futex probes. 85 */ 86 LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); 87 LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); 88 89 /** 90 * DTrace probes in this module. 91 */ 92 LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", 93 "struct waiting_proc *"); 94 LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", 95 "int"); 96 LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", 97 "int"); 98 LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); 99 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", 100 "uint32_t"); 101 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); 102 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", 103 "int"); 104 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); 105 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); 106 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); 107 LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", 108 "struct waiting_proc **", "struct futex **"); 109 LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); 110 LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); 111 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", 112 "struct waiting_proc **", "struct timespec *"); 113 LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", 114 "struct waiting_proc *", "uint32_t *", "uint32_t"); 115 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", 116 "struct waiting_proc *"); 117 LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); 118 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", 119 "uint32_t"); 120 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", 121 "struct waiting_proc *", "uint32_t"); 122 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); 123 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); 124 LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", 125 "struct futex *", "int"); 126 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); 127 LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", 128 "struct waiting_proc *", "uint32_t"); 129 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); 130 LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", 131 "struct waiting_proc **", "struct timespec *", "uint32_t"); 132 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); 133 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); 134 LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", 135 "int", "uint32_t"); 136 LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", 137 "int"); 138 LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); 139 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); 140 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); 141 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); 142 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", 143 "struct linux_sys_futex_args *"); 144 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); 145 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); 146 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); 147 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", 148 "uint32_t", "uint32_t"); 149 LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, 150 "uint32_t *", "uint32_t", "int", "uint32_t"); 151 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", 152 "uint32_t", "uint32_t"); 153 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", 154 "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); 155 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, 156 "uint32_t", "int"); 157 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", 158 "int", "uint32_t", "uint32_t *", "uint32_t"); 159 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); 160 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); 161 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); 162 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); 163 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); 164 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); 165 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); 166 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); 167 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); 168 LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", 169 "struct linux_set_robust_list_args *"); 170 LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); 171 LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); 172 LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", 173 "struct linux_get_robust_list_args *"); 174 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); 175 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); 176 LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, 177 "struct linux_emuldata *", "uint32_t *", "unsigned int"); 178 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); 179 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); 180 LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, 181 "struct linux_robust_list **", "struct linux_robust_list **", 182 "unsigned int *"); 183 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); 184 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); 185 LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", 186 "struct linux_emuldata *"); 187 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); 188 LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); 189 190 struct futex; 191 192 struct waiting_proc { 193 uint32_t wp_flags; 194 struct futex *wp_futex; 195 TAILQ_ENTRY(waiting_proc) wp_list; 196 }; 197 198 struct futex { 199 struct mtx f_lck; 200 uint32_t *f_uaddr; /* user-supplied value, for debug */ 201 struct umtx_key f_key; 202 uint32_t f_refcount; 203 uint32_t f_bitset; 204 LIST_ENTRY(futex) f_list; 205 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 206 }; 207 208 struct futex_list futex_list; 209 210 #define FUTEX_LOCK(f) mtx_lock(&(f)->f_lck) 211 #define FUTEX_LOCKED(f) mtx_owned(&(f)->f_lck) 212 #define FUTEX_UNLOCK(f) mtx_unlock(&(f)->f_lck) 213 #define FUTEX_INIT(f) do { \ 214 mtx_init(&(f)->f_lck, "ftlk", NULL, \ 215 MTX_DUPOK); \ 216 LIN_SDT_PROBE1(futex, futex, create, \ 217 &(f)->f_lck); \ 218 } while (0) 219 #define FUTEX_DESTROY(f) do { \ 220 LIN_SDT_PROBE1(futex, futex, destroy, \ 221 &(f)->f_lck); \ 222 mtx_destroy(&(f)->f_lck); \ 223 } while (0) 224 #define FUTEX_ASSERT_LOCKED(f) mtx_assert(&(f)->f_lck, MA_OWNED) 225 #define FUTEX_ASSERT_UNLOCKED(f) mtx_assert(&(f)->f_lck, MA_NOTOWNED) 226 227 struct mtx futex_mtx; /* protects the futex list */ 228 #define FUTEXES_LOCK do { \ 229 mtx_lock(&futex_mtx); \ 230 LIN_SDT_PROBE1(locks, futex_mtx, \ 231 locked, &futex_mtx); \ 232 } while (0) 233 #define FUTEXES_UNLOCK do { \ 234 LIN_SDT_PROBE1(locks, futex_mtx, \ 235 unlock, &futex_mtx); \ 236 mtx_unlock(&futex_mtx); \ 237 } while (0) 238 239 /* flags for futex_get() */ 240 #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ 241 #define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ 242 #define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ 243 #define FUTEX_SHARED 0x8 /* shared futex */ 244 #define FUTEX_DONTLOCK 0x10 /* don't lock futex */ 245 246 /* wp_flags */ 247 #define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list 248 * of futex where thread sleep to wp_list 249 * of another futex. 250 */ 251 #define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex 252 * wp_list to prevent double wakeup. 253 */ 254 255 static void futex_put(struct futex *, struct waiting_proc *); 256 static int futex_get0(uint32_t *, struct futex **f, uint32_t); 257 static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, 258 uint32_t); 259 static int futex_sleep(struct futex *, struct waiting_proc *, struct timespec *); 260 static int futex_wake(struct futex *, int, uint32_t); 261 static int futex_requeue(struct futex *, int, struct futex *, int); 262 static int futex_copyin_timeout(int, struct l_timespec *, int, 263 struct timespec *); 264 static int futex_wait(struct futex *, struct waiting_proc *, struct timespec *, 265 uint32_t); 266 static void futex_lock(struct futex *); 267 static void futex_unlock(struct futex *); 268 static int futex_atomic_op(struct thread *, int, uint32_t *); 269 static int handle_futex_death(struct linux_emuldata *, uint32_t *, 270 unsigned int); 271 static int fetch_robust_entry(struct linux_robust_list **, 272 struct linux_robust_list **, unsigned int *); 273 274 /* support.s */ 275 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); 276 int futex_addl(int oparg, uint32_t *uaddr, int *oldval); 277 int futex_orl(int oparg, uint32_t *uaddr, int *oldval); 278 int futex_andl(int oparg, uint32_t *uaddr, int *oldval); 279 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); 280 281 282 static int 283 futex_copyin_timeout(int op, struct l_timespec *luts, int clockrt, 284 struct timespec *ts) 285 { 286 struct l_timespec lts; 287 struct timespec kts; 288 int error; 289 290 error = copyin(luts, <s, sizeof(lts)); 291 if (error) 292 return (error); 293 294 error = linux_to_native_timespec(ts, <s); 295 if (error) 296 return (error); 297 if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 298 return (EINVAL); 299 300 if (clockrt) { 301 nanotime(&kts); 302 timespecsub(ts, &kts); 303 } else if (op == LINUX_FUTEX_WAIT_BITSET) { 304 nanouptime(&kts); 305 timespecsub(ts, &kts); 306 } 307 return (error); 308 } 309 310 static void 311 futex_put(struct futex *f, struct waiting_proc *wp) 312 { 313 LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); 314 315 if (wp != NULL) { 316 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) 317 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 318 free(wp, M_FUTEX_WP); 319 } 320 321 FUTEXES_LOCK; 322 if (--f->f_refcount == 0) { 323 LIST_REMOVE(f, f_list); 324 FUTEXES_UNLOCK; 325 if (FUTEX_LOCKED(f)) 326 futex_unlock(f); 327 328 LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, 329 f->f_refcount, f->f_key.shared); 330 LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " 331 "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); 332 umtx_key_release(&f->f_key); 333 FUTEX_DESTROY(f); 334 free(f, M_FUTEX); 335 336 LIN_SDT_PROBE0(futex, futex_put, return); 337 return; 338 } 339 340 LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, 341 f->f_key.shared); 342 LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", 343 f->f_uaddr, f->f_refcount, f->f_key.shared); 344 FUTEXES_UNLOCK; 345 if (FUTEX_LOCKED(f)) 346 futex_unlock(f); 347 348 LIN_SDT_PROBE0(futex, futex_put, return); 349 } 350 351 static int 352 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) 353 { 354 struct futex *f, *tmpf; 355 struct umtx_key key; 356 int error; 357 358 LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); 359 360 *newf = tmpf = NULL; 361 362 error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? 363 AUTO_SHARE : THREAD_SHARE, &key); 364 if (error) { 365 LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); 366 LIN_SDT_PROBE1(futex, futex_get0, return, error); 367 return (error); 368 } 369 retry: 370 FUTEXES_LOCK; 371 LIST_FOREACH(f, &futex_list, f_list) { 372 if (umtx_key_match(&f->f_key, &key)) { 373 if (tmpf != NULL) { 374 if (FUTEX_LOCKED(tmpf)) 375 futex_unlock(tmpf); 376 FUTEX_DESTROY(tmpf); 377 free(tmpf, M_FUTEX); 378 } 379 if (flags & FUTEX_DONTEXISTS) { 380 FUTEXES_UNLOCK; 381 umtx_key_release(&key); 382 383 LIN_SDT_PROBE1(futex, futex_get0, return, 384 EINVAL); 385 return (EINVAL); 386 } 387 388 /* 389 * Increment refcount of the found futex to 390 * prevent it from deallocation before FUTEX_LOCK() 391 */ 392 ++f->f_refcount; 393 FUTEXES_UNLOCK; 394 umtx_key_release(&key); 395 396 if ((flags & FUTEX_DONTLOCK) == 0) 397 futex_lock(f); 398 *newf = f; 399 LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, 400 f->f_refcount, f->f_key.shared); 401 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", 402 uaddr, f->f_refcount, f->f_key.shared); 403 404 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 405 return (0); 406 } 407 } 408 409 if (flags & FUTEX_DONTCREATE) { 410 FUTEXES_UNLOCK; 411 umtx_key_release(&key); 412 LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); 413 LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); 414 415 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 416 return (0); 417 } 418 419 if (tmpf == NULL) { 420 FUTEXES_UNLOCK; 421 tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); 422 tmpf->f_uaddr = uaddr; 423 tmpf->f_key = key; 424 tmpf->f_refcount = 1; 425 tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY; 426 FUTEX_INIT(tmpf); 427 TAILQ_INIT(&tmpf->f_waiting_proc); 428 429 /* 430 * Lock the new futex before an insert into the futex_list 431 * to prevent futex usage by other. 432 */ 433 if ((flags & FUTEX_DONTLOCK) == 0) 434 futex_lock(tmpf); 435 goto retry; 436 } 437 438 LIST_INSERT_HEAD(&futex_list, tmpf, f_list); 439 FUTEXES_UNLOCK; 440 441 LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, 442 tmpf->f_key.shared); 443 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", 444 uaddr, tmpf->f_refcount, tmpf->f_key.shared); 445 *newf = tmpf; 446 447 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 448 return (0); 449 } 450 451 static int 452 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, 453 uint32_t flags) 454 { 455 int error; 456 457 LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); 458 459 if (flags & FUTEX_CREATE_WP) { 460 *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); 461 (*wp)->wp_flags = 0; 462 } 463 error = futex_get0(uaddr, f, flags); 464 if (error) { 465 LIN_SDT_PROBE0(futex, futex_get, error); 466 467 if (flags & FUTEX_CREATE_WP) 468 free(*wp, M_FUTEX_WP); 469 470 LIN_SDT_PROBE1(futex, futex_get, return, error); 471 return (error); 472 } 473 if (flags & FUTEX_CREATE_WP) { 474 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); 475 (*wp)->wp_futex = *f; 476 } 477 478 LIN_SDT_PROBE1(futex, futex_get, return, error); 479 return (error); 480 } 481 482 static inline void 483 futex_lock(struct futex *f) 484 { 485 486 LINUX_CTR3(sys_futex, "futex_lock uaddr %p ref %d shared %d", 487 f->f_uaddr, f->f_refcount, f->f_key.shared); 488 FUTEX_ASSERT_UNLOCKED(f); 489 FUTEX_LOCK(f); 490 } 491 492 static inline void 493 futex_unlock(struct futex *f) 494 { 495 496 LINUX_CTR3(sys_futex, "futex_unlock uaddr %p ref %d shared %d", 497 f->f_uaddr, f->f_refcount, f->f_key.shared); 498 FUTEX_ASSERT_LOCKED(f); 499 FUTEX_UNLOCK(f); 500 } 501 502 static int 503 futex_sleep(struct futex *f, struct waiting_proc *wp, struct timespec *ts) 504 { 505 struct timespec uts; 506 sbintime_t sbt, prec, tmp; 507 time_t over; 508 int error; 509 510 FUTEX_ASSERT_LOCKED(f); 511 if (ts != NULL) { 512 uts = *ts; 513 if (uts.tv_sec > INT32_MAX / 2) { 514 over = uts.tv_sec - INT32_MAX / 2; 515 uts.tv_sec -= over; 516 } 517 tmp = tstosbt(uts); 518 if (TIMESEL(&sbt, tmp)) 519 sbt += tc_tick_sbt; 520 sbt += tmp; 521 prec = tmp; 522 prec >>= tc_precexp; 523 } else { 524 sbt = 0; 525 prec = 0; 526 } 527 LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, sbt); 528 LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %ld ref %d", 529 f->f_uaddr, wp, sbt, f->f_refcount); 530 531 error = msleep_sbt(wp, &f->f_lck, PCATCH, "futex", sbt, prec, C_ABSOLUTE); 532 if (wp->wp_flags & FUTEX_WP_REQUEUED) { 533 KASSERT(f != wp->wp_futex, ("futex != wp_futex")); 534 535 if (error) { 536 LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, 537 f->f_uaddr, wp, wp->wp_futex->f_uaddr, 538 wp->wp_futex->f_refcount); 539 } 540 541 LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" 542 " %p requeued uaddr %p ref %d", 543 error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, 544 wp->wp_futex->f_refcount); 545 futex_put(f, NULL); 546 f = wp->wp_futex; 547 futex_lock(f); 548 } else { 549 if (error) { 550 LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, 551 f->f_uaddr, wp); 552 } 553 LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", 554 error, f->f_uaddr, wp); 555 } 556 557 futex_put(f, wp); 558 559 LIN_SDT_PROBE1(futex, futex_sleep, return, error); 560 return (error); 561 } 562 563 static int 564 futex_wake(struct futex *f, int n, uint32_t bitset) 565 { 566 struct waiting_proc *wp, *wpt; 567 int count = 0; 568 569 LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); 570 571 if (bitset == 0) { 572 LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); 573 return (EINVAL); 574 } 575 576 FUTEX_ASSERT_LOCKED(f); 577 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 578 LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, 579 f->f_refcount); 580 LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", 581 f->f_uaddr, wp, f->f_refcount); 582 /* 583 * Unless we find a matching bit in 584 * the bitset, continue searching. 585 */ 586 if (!(wp->wp_futex->f_bitset & bitset)) 587 continue; 588 589 wp->wp_flags |= FUTEX_WP_REMOVED; 590 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 591 LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); 592 wakeup_one(wp); 593 if (++count == n) 594 break; 595 } 596 597 LIN_SDT_PROBE1(futex, futex_wake, return, count); 598 return (count); 599 } 600 601 static int 602 futex_requeue(struct futex *f, int n, struct futex *f2, int n2) 603 { 604 struct waiting_proc *wp, *wpt; 605 int count = 0; 606 607 LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); 608 609 FUTEX_ASSERT_LOCKED(f); 610 FUTEX_ASSERT_LOCKED(f2); 611 612 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 613 if (++count <= n) { 614 LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", 615 f->f_uaddr, wp); 616 wp->wp_flags |= FUTEX_WP_REMOVED; 617 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 618 LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); 619 wakeup_one(wp); 620 } else { 621 LIN_SDT_PROBE3(futex, futex_requeue, requeue, 622 f->f_uaddr, wp, f2->f_uaddr); 623 LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", 624 f->f_uaddr, wp, f2->f_uaddr); 625 wp->wp_flags |= FUTEX_WP_REQUEUED; 626 /* Move wp to wp_list of f2 futex */ 627 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 628 TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); 629 630 /* 631 * Thread which sleeps on wp after waking should 632 * acquire f2 lock, so increment refcount of f2 to 633 * prevent it from premature deallocation. 634 */ 635 wp->wp_futex = f2; 636 FUTEXES_LOCK; 637 ++f2->f_refcount; 638 FUTEXES_UNLOCK; 639 if (count - n >= n2) 640 break; 641 } 642 } 643 644 LIN_SDT_PROBE1(futex, futex_requeue, return, count); 645 return (count); 646 } 647 648 static int 649 futex_wait(struct futex *f, struct waiting_proc *wp, struct timespec *ts, 650 uint32_t bitset) 651 { 652 int error; 653 654 LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset); 655 656 if (bitset == 0) { 657 LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); 658 return (EINVAL); 659 } 660 661 f->f_bitset = bitset; 662 error = futex_sleep(f, wp, ts); 663 if (error) 664 LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); 665 if (error == EWOULDBLOCK) 666 error = ETIMEDOUT; 667 668 LIN_SDT_PROBE1(futex, futex_wait, return, error); 669 return (error); 670 } 671 672 static int 673 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) 674 { 675 int op = (encoded_op >> 28) & 7; 676 int cmp = (encoded_op >> 24) & 15; 677 int oparg = (encoded_op << 8) >> 20; 678 int cmparg = (encoded_op << 20) >> 20; 679 int oldval = 0, ret; 680 681 LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); 682 683 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 684 oparg = 1 << oparg; 685 686 LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, 687 cmparg); 688 689 /* XXX: Linux verifies access here and returns EFAULT */ 690 LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); 691 692 switch (op) { 693 case FUTEX_OP_SET: 694 ret = futex_xchgl(oparg, uaddr, &oldval); 695 break; 696 case FUTEX_OP_ADD: 697 ret = futex_addl(oparg, uaddr, &oldval); 698 break; 699 case FUTEX_OP_OR: 700 ret = futex_orl(oparg, uaddr, &oldval); 701 break; 702 case FUTEX_OP_ANDN: 703 ret = futex_andl(~oparg, uaddr, &oldval); 704 break; 705 case FUTEX_OP_XOR: 706 ret = futex_xorl(oparg, uaddr, &oldval); 707 break; 708 default: 709 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); 710 ret = -ENOSYS; 711 break; 712 } 713 714 if (ret) { 715 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 716 return (ret); 717 } 718 719 switch (cmp) { 720 case FUTEX_OP_CMP_EQ: 721 ret = (oldval == cmparg); 722 break; 723 case FUTEX_OP_CMP_NE: 724 ret = (oldval != cmparg); 725 break; 726 case FUTEX_OP_CMP_LT: 727 ret = (oldval < cmparg); 728 break; 729 case FUTEX_OP_CMP_GE: 730 ret = (oldval >= cmparg); 731 break; 732 case FUTEX_OP_CMP_LE: 733 ret = (oldval <= cmparg); 734 break; 735 case FUTEX_OP_CMP_GT: 736 ret = (oldval > cmparg); 737 break; 738 default: 739 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); 740 ret = -ENOSYS; 741 } 742 743 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 744 return (ret); 745 } 746 747 int 748 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 749 { 750 int clockrt, nrwake, op_ret, ret; 751 struct linux_pemuldata *pem; 752 struct waiting_proc *wp; 753 struct futex *f, *f2; 754 struct timespec uts, *ts; 755 int error, save; 756 uint32_t flags, val; 757 758 LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); 759 760 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { 761 flags = 0; 762 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; 763 } else 764 flags = FUTEX_SHARED; 765 766 /* 767 * Currently support for switching between CLOCK_MONOTONIC and 768 * CLOCK_REALTIME is not present. However Linux forbids the use of 769 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and 770 * FUTEX_WAIT_REQUEUE_PI. 771 */ 772 clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; 773 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; 774 if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && 775 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { 776 LIN_SDT_PROBE0(futex, linux_sys_futex, 777 unimplemented_clockswitch); 778 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 779 return (ENOSYS); 780 } 781 782 error = 0; 783 f = f2 = NULL; 784 785 switch (args->op) { 786 case LINUX_FUTEX_WAIT: 787 args->val3 = FUTEX_BITSET_MATCH_ANY; 788 /* FALLTHROUGH */ 789 790 case LINUX_FUTEX_WAIT_BITSET: 791 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, 792 args->val, args->val3); 793 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", 794 args->uaddr, args->val, args->val3); 795 796 if (args->timeout != NULL) { 797 error = futex_copyin_timeout(args->op, args->timeout, 798 clockrt, &uts); 799 if (error) { 800 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 801 error); 802 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 803 return (error); 804 } 805 ts = &uts; 806 } else 807 ts = NULL; 808 809 retry0: 810 error = futex_get(args->uaddr, &wp, &f, 811 flags | FUTEX_CREATE_WP); 812 if (error) { 813 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 814 return (error); 815 } 816 817 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 818 if (error) { 819 futex_put(f, wp); 820 error = copyin(args->uaddr, &val, sizeof(val)); 821 if (error == 0) 822 goto retry0; 823 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 824 error); 825 LINUX_CTR1(sys_futex, "WAIT copyin failed %d", 826 error); 827 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 828 return (error); 829 } 830 if (val != args->val) { 831 LIN_SDT_PROBE4(futex, linux_sys_futex, 832 debug_wait_value_neq, args->uaddr, args->val, val, 833 args->val3); 834 LINUX_CTR3(sys_futex, 835 "WAIT uaddr %p val 0x%x != uval 0x%x", 836 args->uaddr, args->val, val); 837 futex_put(f, wp); 838 839 LIN_SDT_PROBE1(futex, linux_sys_futex, return, 840 EWOULDBLOCK); 841 return (EWOULDBLOCK); 842 } 843 844 error = futex_wait(f, wp, ts, args->val3); 845 break; 846 847 case LINUX_FUTEX_WAKE: 848 args->val3 = FUTEX_BITSET_MATCH_ANY; 849 /* FALLTHROUGH */ 850 851 case LINUX_FUTEX_WAKE_BITSET: 852 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, 853 args->val, args->val3); 854 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", 855 args->uaddr, args->val, args->val3); 856 857 error = futex_get(args->uaddr, NULL, &f, 858 flags | FUTEX_DONTCREATE); 859 if (error) { 860 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 861 return (error); 862 } 863 864 if (f == NULL) { 865 td->td_retval[0] = 0; 866 867 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 868 return (error); 869 } 870 td->td_retval[0] = futex_wake(f, args->val, args->val3); 871 futex_put(f, NULL); 872 break; 873 874 case LINUX_FUTEX_CMP_REQUEUE: 875 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, 876 args->uaddr, args->val, args->val3, args->uaddr2, 877 args->timeout); 878 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " 879 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", 880 args->uaddr, args->val, args->val3, args->uaddr2, 881 args->timeout); 882 883 /* 884 * Linux allows this, we would not, it is an incorrect 885 * usage of declared ABI, so return EINVAL. 886 */ 887 if (args->uaddr == args->uaddr2) { 888 LIN_SDT_PROBE0(futex, linux_sys_futex, 889 invalid_cmp_requeue_use); 890 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 891 return (EINVAL); 892 } 893 894 retry1: 895 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 896 if (error) { 897 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 898 return (error); 899 } 900 901 /* 902 * To avoid deadlocks return EINVAL if second futex 903 * exists at this time. 904 * 905 * Glibc fall back to FUTEX_WAKE in case of any error 906 * returned by FUTEX_CMP_REQUEUE. 907 */ 908 error = futex_get(args->uaddr2, NULL, &f2, 909 flags | FUTEX_DONTEXISTS | FUTEX_DONTLOCK); 910 if (error) { 911 futex_put(f, NULL); 912 913 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 914 return (error); 915 } 916 futex_lock(f); 917 futex_lock(f2); 918 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 919 if (error) { 920 futex_put(f2, NULL); 921 futex_put(f, NULL); 922 error = copyin(args->uaddr, &val, sizeof(val)); 923 if (error == 0) 924 goto retry1; 925 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 926 error); 927 LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", 928 error); 929 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 930 return (error); 931 } 932 if (val != args->val3) { 933 LIN_SDT_PROBE2(futex, linux_sys_futex, 934 debug_cmp_requeue_value_neq, args->val, val); 935 LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x", 936 args->val, val); 937 futex_put(f2, NULL); 938 futex_put(f, NULL); 939 940 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); 941 return (EAGAIN); 942 } 943 944 nrwake = (int)(unsigned long)args->timeout; 945 td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); 946 futex_put(f2, NULL); 947 futex_put(f, NULL); 948 break; 949 950 case LINUX_FUTEX_WAKE_OP: 951 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, 952 args->uaddr, args->op, args->val, args->uaddr2, args->val3); 953 LINUX_CTR5(sys_futex, "WAKE_OP " 954 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", 955 args->uaddr, args->val, args->uaddr2, args->val3, 956 args->timeout); 957 958 retry2: 959 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 960 if (error) { 961 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 962 return (error); 963 } 964 965 if (args->uaddr != args->uaddr2) 966 error = futex_get(args->uaddr2, NULL, &f2, 967 flags | FUTEX_DONTLOCK); 968 if (error) { 969 futex_put(f, NULL); 970 971 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 972 return (error); 973 } 974 futex_lock(f); 975 futex_lock(f2); 976 977 /* 978 * This function returns positive number as results and 979 * negative as errors 980 */ 981 save = vm_fault_disable_pagefaults(); 982 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 983 vm_fault_enable_pagefaults(save); 984 985 LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x", 986 args->uaddr, op_ret); 987 988 if (op_ret < 0) { 989 if (f2 != NULL) 990 futex_put(f2, NULL); 991 futex_put(f, NULL); 992 error = copyin(args->uaddr2, &val, sizeof(val)); 993 if (error == 0) 994 goto retry2; 995 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 996 return (error); 997 } 998 999 ret = futex_wake(f, args->val, args->val3); 1000 1001 if (op_ret > 0) { 1002 op_ret = 0; 1003 nrwake = (int)(unsigned long)args->timeout; 1004 1005 if (f2 != NULL) 1006 op_ret += futex_wake(f2, nrwake, args->val3); 1007 else 1008 op_ret += futex_wake(f, nrwake, args->val3); 1009 ret += op_ret; 1010 1011 } 1012 if (f2 != NULL) 1013 futex_put(f2, NULL); 1014 futex_put(f, NULL); 1015 td->td_retval[0] = ret; 1016 break; 1017 1018 case LINUX_FUTEX_LOCK_PI: 1019 /* not yet implemented */ 1020 pem = pem_find(td->td_proc); 1021 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1022 linux_msg(td, 1023 "linux_sys_futex: " 1024 "unsupported futex_pi op\n"); 1025 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1026 LIN_SDT_PROBE0(futex, linux_sys_futex, 1027 unimplemented_lock_pi); 1028 } 1029 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1030 return (ENOSYS); 1031 1032 case LINUX_FUTEX_UNLOCK_PI: 1033 /* not yet implemented */ 1034 pem = pem_find(td->td_proc); 1035 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1036 linux_msg(td, 1037 "linux_sys_futex: " 1038 "unsupported futex_pi op\n"); 1039 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1040 LIN_SDT_PROBE0(futex, linux_sys_futex, 1041 unimplemented_unlock_pi); 1042 } 1043 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1044 return (ENOSYS); 1045 1046 case LINUX_FUTEX_TRYLOCK_PI: 1047 /* not yet implemented */ 1048 pem = pem_find(td->td_proc); 1049 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1050 linux_msg(td, 1051 "linux_sys_futex: " 1052 "unsupported futex_pi op\n"); 1053 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1054 LIN_SDT_PROBE0(futex, linux_sys_futex, 1055 unimplemented_trylock_pi); 1056 } 1057 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1058 return (ENOSYS); 1059 1060 case LINUX_FUTEX_REQUEUE: 1061 /* 1062 * Glibc does not use this operation since version 2.3.3, 1063 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 1064 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 1065 * FUTEX_REQUEUE returned EINVAL. 1066 */ 1067 pem = pem_find(td->td_proc); 1068 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { 1069 linux_msg(td, 1070 "linux_sys_futex: " 1071 "unsupported futex_requeue op\n"); 1072 pem->flags |= LINUX_XDEPR_REQUEUEOP; 1073 LIN_SDT_PROBE0(futex, linux_sys_futex, 1074 deprecated_requeue); 1075 } 1076 1077 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 1078 return (EINVAL); 1079 1080 case LINUX_FUTEX_WAIT_REQUEUE_PI: 1081 /* not yet implemented */ 1082 pem = pem_find(td->td_proc); 1083 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1084 linux_msg(td, 1085 "linux_sys_futex: " 1086 "unsupported futex_pi op\n"); 1087 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1088 LIN_SDT_PROBE0(futex, linux_sys_futex, 1089 unimplemented_wait_requeue_pi); 1090 } 1091 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1092 return (ENOSYS); 1093 1094 case LINUX_FUTEX_CMP_REQUEUE_PI: 1095 /* not yet implemented */ 1096 pem = pem_find(td->td_proc); 1097 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1098 linux_msg(td, 1099 "linux_sys_futex: " 1100 "unsupported futex_pi op\n"); 1101 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1102 LIN_SDT_PROBE0(futex, linux_sys_futex, 1103 unimplemented_cmp_requeue_pi); 1104 } 1105 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1106 return (ENOSYS); 1107 1108 default: 1109 linux_msg(td, 1110 "linux_sys_futex: unknown op %d\n", args->op); 1111 LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, 1112 args->op); 1113 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1114 return (ENOSYS); 1115 } 1116 1117 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 1118 return (error); 1119 } 1120 1121 int 1122 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 1123 { 1124 struct linux_emuldata *em; 1125 1126 LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); 1127 1128 if (args->len != sizeof(struct linux_robust_list_head)) { 1129 LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); 1130 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); 1131 return (EINVAL); 1132 } 1133 1134 em = em_find(td); 1135 em->robust_futexes = args->head; 1136 1137 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); 1138 return (0); 1139 } 1140 1141 int 1142 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 1143 { 1144 struct linux_emuldata *em; 1145 struct linux_robust_list_head *head; 1146 l_size_t len = sizeof(struct linux_robust_list_head); 1147 struct thread *td2; 1148 int error = 0; 1149 1150 LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); 1151 1152 if (!args->pid) { 1153 em = em_find(td); 1154 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1155 head = em->robust_futexes; 1156 } else { 1157 td2 = tdfind(args->pid, -1); 1158 if (td2 == NULL) { 1159 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1160 ESRCH); 1161 return (ESRCH); 1162 } 1163 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { 1164 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1165 EPERM); 1166 PROC_UNLOCK(td2->td_proc); 1167 return (EPERM); 1168 } 1169 1170 em = em_find(td2); 1171 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1172 /* XXX: ptrace? */ 1173 if (priv_check(td, PRIV_CRED_SETUID) || 1174 priv_check(td, PRIV_CRED_SETEUID) || 1175 p_candebug(td, td2->td_proc)) { 1176 PROC_UNLOCK(td2->td_proc); 1177 1178 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1179 EPERM); 1180 return (EPERM); 1181 } 1182 head = em->robust_futexes; 1183 1184 PROC_UNLOCK(td2->td_proc); 1185 } 1186 1187 error = copyout(&len, args->len, sizeof(l_size_t)); 1188 if (error) { 1189 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1190 error); 1191 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); 1192 return (EFAULT); 1193 } 1194 1195 error = copyout(&head, args->head, sizeof(head)); 1196 if (error) { 1197 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1198 error); 1199 } 1200 1201 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); 1202 return (error); 1203 } 1204 1205 static int 1206 handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, 1207 unsigned int pi) 1208 { 1209 uint32_t uval, nval, mval; 1210 struct futex *f; 1211 int error; 1212 1213 LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); 1214 1215 retry: 1216 error = copyin(uaddr, &uval, 4); 1217 if (error) { 1218 LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); 1219 LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); 1220 return (EFAULT); 1221 } 1222 if ((uval & FUTEX_TID_MASK) == em->em_tid) { 1223 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1224 nval = casuword32(uaddr, uval, mval); 1225 1226 if (nval == -1) { 1227 LIN_SDT_PROBE1(futex, handle_futex_death, return, 1228 EFAULT); 1229 return (EFAULT); 1230 } 1231 1232 if (nval != uval) 1233 goto retry; 1234 1235 if (!pi && (uval & FUTEX_WAITERS)) { 1236 error = futex_get(uaddr, NULL, &f, 1237 FUTEX_DONTCREATE | FUTEX_SHARED); 1238 if (error) { 1239 LIN_SDT_PROBE1(futex, handle_futex_death, 1240 return, error); 1241 return (error); 1242 } 1243 if (f != NULL) { 1244 futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); 1245 futex_put(f, NULL); 1246 } 1247 } 1248 } 1249 1250 LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); 1251 return (0); 1252 } 1253 1254 static int 1255 fetch_robust_entry(struct linux_robust_list **entry, 1256 struct linux_robust_list **head, unsigned int *pi) 1257 { 1258 l_ulong uentry; 1259 int error; 1260 1261 LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); 1262 1263 error = copyin((const void *)head, &uentry, sizeof(l_ulong)); 1264 if (error) { 1265 LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); 1266 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); 1267 return (EFAULT); 1268 } 1269 1270 *entry = (void *)(uentry & ~1UL); 1271 *pi = uentry & 1; 1272 1273 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); 1274 return (0); 1275 } 1276 1277 /* This walks the list of robust futexes releasing them. */ 1278 void 1279 release_futexes(struct thread *td, struct linux_emuldata *em) 1280 { 1281 struct linux_robust_list_head *head = NULL; 1282 struct linux_robust_list *entry, *next_entry, *pending; 1283 unsigned int limit = 2048, pi, next_pi, pip; 1284 l_long futex_offset; 1285 int rc, error; 1286 1287 LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); 1288 1289 head = em->robust_futexes; 1290 1291 if (head == NULL) { 1292 LIN_SDT_PROBE0(futex, release_futexes, return); 1293 return; 1294 } 1295 1296 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { 1297 LIN_SDT_PROBE0(futex, release_futexes, return); 1298 return; 1299 } 1300 1301 error = copyin(&head->futex_offset, &futex_offset, 1302 sizeof(futex_offset)); 1303 if (error) { 1304 LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); 1305 LIN_SDT_PROBE0(futex, release_futexes, return); 1306 return; 1307 } 1308 1309 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { 1310 LIN_SDT_PROBE0(futex, release_futexes, return); 1311 return; 1312 } 1313 1314 while (entry != &head->list) { 1315 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); 1316 1317 if (entry != pending) 1318 if (handle_futex_death(em, 1319 (uint32_t *)((caddr_t)entry + futex_offset), pi)) { 1320 LIN_SDT_PROBE0(futex, release_futexes, return); 1321 return; 1322 } 1323 if (rc) { 1324 LIN_SDT_PROBE0(futex, release_futexes, return); 1325 return; 1326 } 1327 1328 entry = next_entry; 1329 pi = next_pi; 1330 1331 if (!--limit) 1332 break; 1333 1334 sched_relinquish(curthread); 1335 } 1336 1337 if (pending) 1338 handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); 1339 1340 LIN_SDT_PROBE0(futex, release_futexes, return); 1341 } 1342