1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2016 Dmitry Chagin 5 * Copyright (c) 2005 Emmanuel Dreyfus 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Emmanuel Dreyfus 19 * 4. The name of the author may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 #if 0 39 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 40 #endif 41 42 #include "opt_compat.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/imgact.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mutex.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/sched.h> 56 #include <sys/sdt.h> 57 #include <sys/umtx.h> 58 59 #include <vm/vm_extern.h> 60 61 #ifdef COMPAT_LINUX32 62 #include <machine/../linux32/linux.h> 63 #include <machine/../linux32/linux32_proto.h> 64 #else 65 #include <machine/../linux/linux.h> 66 #include <machine/../linux/linux_proto.h> 67 #endif 68 #include <compat/linux/linux_dtrace.h> 69 #include <compat/linux/linux_emul.h> 70 #include <compat/linux/linux_futex.h> 71 #include <compat/linux/linux_timer.h> 72 #include <compat/linux/linux_util.h> 73 74 /* DTrace init */ 75 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 76 77 /** 78 * Futex part for the special DTrace module "locks". 79 */ 80 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); 81 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); 82 83 /** 84 * Per futex probes. 85 */ 86 LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); 87 LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); 88 89 /** 90 * DTrace probes in this module. 91 */ 92 LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", 93 "struct waiting_proc *"); 94 LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", 95 "int"); 96 LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", 97 "int"); 98 LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); 99 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", 100 "uint32_t"); 101 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); 102 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", 103 "int"); 104 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); 105 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); 106 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); 107 LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", 108 "struct waiting_proc **", "struct futex **"); 109 LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); 110 LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); 111 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", 112 "struct waiting_proc **", "struct timespec *"); 113 LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", 114 "struct waiting_proc *", "uint32_t *", "uint32_t"); 115 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", 116 "struct waiting_proc *"); 117 LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); 118 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", 119 "uint32_t"); 120 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", 121 "struct waiting_proc *", "uint32_t"); 122 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); 123 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); 124 LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", 125 "struct futex *", "int"); 126 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); 127 LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", 128 "struct waiting_proc *", "uint32_t"); 129 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); 130 LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", 131 "struct waiting_proc **", "struct timespec *", "uint32_t"); 132 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); 133 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); 134 LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", 135 "int", "uint32_t"); 136 LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", 137 "int"); 138 LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); 139 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); 140 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); 141 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); 142 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", 143 "struct linux_sys_futex_args *"); 144 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); 145 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); 146 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); 147 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", 148 "uint32_t", "uint32_t"); 149 LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, 150 "uint32_t *", "uint32_t", "int", "uint32_t"); 151 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", 152 "uint32_t", "uint32_t"); 153 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", 154 "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); 155 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, 156 "uint32_t", "int"); 157 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", 158 "int", "uint32_t", "uint32_t *", "uint32_t"); 159 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); 160 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); 161 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); 162 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); 163 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); 164 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); 165 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); 166 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); 167 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); 168 LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", 169 "struct linux_set_robust_list_args *"); 170 LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); 171 LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); 172 LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", 173 "struct linux_get_robust_list_args *"); 174 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); 175 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); 176 LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, 177 "struct linux_emuldata *", "uint32_t *", "unsigned int"); 178 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); 179 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); 180 LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, 181 "struct linux_robust_list **", "struct linux_robust_list **", 182 "unsigned int *"); 183 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); 184 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); 185 LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", 186 "struct linux_emuldata *"); 187 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); 188 LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); 189 190 struct futex; 191 192 struct waiting_proc { 193 uint32_t wp_flags; 194 struct futex *wp_futex; 195 TAILQ_ENTRY(waiting_proc) wp_list; 196 }; 197 198 struct futex { 199 struct mtx f_lck; 200 uint32_t *f_uaddr; /* user-supplied value, for debug */ 201 struct umtx_key f_key; 202 uint32_t f_refcount; 203 uint32_t f_bitset; 204 LIST_ENTRY(futex) f_list; 205 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 206 }; 207 208 struct futex_list futex_list; 209 210 #define FUTEX_LOCK(f) mtx_lock(&(f)->f_lck) 211 #define FUTEX_LOCKED(f) mtx_owned(&(f)->f_lck) 212 #define FUTEX_UNLOCK(f) mtx_unlock(&(f)->f_lck) 213 #define FUTEX_INIT(f) do { \ 214 mtx_init(&(f)->f_lck, "ftlk", NULL, \ 215 MTX_DUPOK); \ 216 LIN_SDT_PROBE1(futex, futex, create, \ 217 &(f)->f_lck); \ 218 } while (0) 219 #define FUTEX_DESTROY(f) do { \ 220 LIN_SDT_PROBE1(futex, futex, destroy, \ 221 &(f)->f_lck); \ 222 mtx_destroy(&(f)->f_lck); \ 223 } while (0) 224 #define FUTEX_ASSERT_LOCKED(f) mtx_assert(&(f)->f_lck, MA_OWNED) 225 #define FUTEX_ASSERT_UNLOCKED(f) mtx_assert(&(f)->f_lck, MA_NOTOWNED) 226 227 struct mtx futex_mtx; /* protects the futex list */ 228 #define FUTEXES_LOCK do { \ 229 mtx_lock(&futex_mtx); \ 230 LIN_SDT_PROBE1(locks, futex_mtx, \ 231 locked, &futex_mtx); \ 232 } while (0) 233 #define FUTEXES_UNLOCK do { \ 234 LIN_SDT_PROBE1(locks, futex_mtx, \ 235 unlock, &futex_mtx); \ 236 mtx_unlock(&futex_mtx); \ 237 } while (0) 238 239 /* flags for futex_get() */ 240 #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ 241 #define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ 242 #define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ 243 #define FUTEX_SHARED 0x8 /* shared futex */ 244 #define FUTEX_DONTLOCK 0x10 /* don't lock futex */ 245 246 /* wp_flags */ 247 #define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list 248 * of futex where thread sleep to wp_list 249 * of another futex. 250 */ 251 #define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex 252 * wp_list to prevent double wakeup. 253 */ 254 255 static void futex_put(struct futex *, struct waiting_proc *); 256 static int futex_get0(uint32_t *, struct futex **f, uint32_t); 257 static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, 258 uint32_t); 259 static int futex_sleep(struct futex *, struct waiting_proc *, struct timespec *); 260 static int futex_wake(struct futex *, int, uint32_t); 261 static int futex_requeue(struct futex *, int, struct futex *, int); 262 static int futex_copyin_timeout(int, struct l_timespec *, int, 263 struct timespec *); 264 static int futex_wait(struct futex *, struct waiting_proc *, struct timespec *, 265 uint32_t); 266 static void futex_lock(struct futex *); 267 static void futex_unlock(struct futex *); 268 static int futex_atomic_op(struct thread *, int, uint32_t *); 269 static int handle_futex_death(struct linux_emuldata *, uint32_t *, 270 unsigned int); 271 static int fetch_robust_entry(struct linux_robust_list **, 272 struct linux_robust_list **, unsigned int *); 273 274 /* support.s */ 275 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); 276 int futex_addl(int oparg, uint32_t *uaddr, int *oldval); 277 int futex_orl(int oparg, uint32_t *uaddr, int *oldval); 278 int futex_andl(int oparg, uint32_t *uaddr, int *oldval); 279 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); 280 281 282 static int 283 futex_copyin_timeout(int op, struct l_timespec *luts, int clockrt, 284 struct timespec *ts) 285 { 286 struct l_timespec lts; 287 struct timespec kts; 288 int error; 289 290 error = copyin(luts, <s, sizeof(lts)); 291 if (error) 292 return (error); 293 294 error = linux_to_native_timespec(ts, <s); 295 if (error) 296 return (error); 297 if (clockrt) { 298 nanotime(&kts); 299 timespecsub(ts, &kts); 300 } else if (op == LINUX_FUTEX_WAIT_BITSET) { 301 nanouptime(&kts); 302 timespecsub(ts, &kts); 303 } 304 return (error); 305 } 306 307 static void 308 futex_put(struct futex *f, struct waiting_proc *wp) 309 { 310 LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); 311 312 if (wp != NULL) { 313 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) 314 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 315 free(wp, M_FUTEX_WP); 316 } 317 318 FUTEXES_LOCK; 319 if (--f->f_refcount == 0) { 320 LIST_REMOVE(f, f_list); 321 FUTEXES_UNLOCK; 322 if (FUTEX_LOCKED(f)) 323 futex_unlock(f); 324 325 LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, 326 f->f_refcount, f->f_key.shared); 327 LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " 328 "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); 329 umtx_key_release(&f->f_key); 330 FUTEX_DESTROY(f); 331 free(f, M_FUTEX); 332 333 LIN_SDT_PROBE0(futex, futex_put, return); 334 return; 335 } 336 337 LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, 338 f->f_key.shared); 339 LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", 340 f->f_uaddr, f->f_refcount, f->f_key.shared); 341 FUTEXES_UNLOCK; 342 if (FUTEX_LOCKED(f)) 343 futex_unlock(f); 344 345 LIN_SDT_PROBE0(futex, futex_put, return); 346 } 347 348 static int 349 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) 350 { 351 struct futex *f, *tmpf; 352 struct umtx_key key; 353 int error; 354 355 LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); 356 357 *newf = tmpf = NULL; 358 359 error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? 360 AUTO_SHARE : THREAD_SHARE, &key); 361 if (error) { 362 LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); 363 LIN_SDT_PROBE1(futex, futex_get0, return, error); 364 return (error); 365 } 366 retry: 367 FUTEXES_LOCK; 368 LIST_FOREACH(f, &futex_list, f_list) { 369 if (umtx_key_match(&f->f_key, &key)) { 370 if (tmpf != NULL) { 371 if (FUTEX_LOCKED(tmpf)) 372 futex_unlock(tmpf); 373 FUTEX_DESTROY(tmpf); 374 free(tmpf, M_FUTEX); 375 } 376 if (flags & FUTEX_DONTEXISTS) { 377 FUTEXES_UNLOCK; 378 umtx_key_release(&key); 379 380 LIN_SDT_PROBE1(futex, futex_get0, return, 381 EINVAL); 382 return (EINVAL); 383 } 384 385 /* 386 * Increment refcount of the found futex to 387 * prevent it from deallocation before FUTEX_LOCK() 388 */ 389 ++f->f_refcount; 390 FUTEXES_UNLOCK; 391 umtx_key_release(&key); 392 393 if ((flags & FUTEX_DONTLOCK) == 0) 394 futex_lock(f); 395 *newf = f; 396 LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, 397 f->f_refcount, f->f_key.shared); 398 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", 399 uaddr, f->f_refcount, f->f_key.shared); 400 401 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 402 return (0); 403 } 404 } 405 406 if (flags & FUTEX_DONTCREATE) { 407 FUTEXES_UNLOCK; 408 umtx_key_release(&key); 409 LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); 410 LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); 411 412 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 413 return (0); 414 } 415 416 if (tmpf == NULL) { 417 FUTEXES_UNLOCK; 418 tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); 419 tmpf->f_uaddr = uaddr; 420 tmpf->f_key = key; 421 tmpf->f_refcount = 1; 422 tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY; 423 FUTEX_INIT(tmpf); 424 TAILQ_INIT(&tmpf->f_waiting_proc); 425 426 /* 427 * Lock the new futex before an insert into the futex_list 428 * to prevent futex usage by other. 429 */ 430 if ((flags & FUTEX_DONTLOCK) == 0) 431 futex_lock(tmpf); 432 goto retry; 433 } 434 435 LIST_INSERT_HEAD(&futex_list, tmpf, f_list); 436 FUTEXES_UNLOCK; 437 438 LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, 439 tmpf->f_key.shared); 440 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", 441 uaddr, tmpf->f_refcount, tmpf->f_key.shared); 442 *newf = tmpf; 443 444 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 445 return (0); 446 } 447 448 static int 449 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, 450 uint32_t flags) 451 { 452 int error; 453 454 LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); 455 456 if (flags & FUTEX_CREATE_WP) { 457 *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); 458 (*wp)->wp_flags = 0; 459 } 460 error = futex_get0(uaddr, f, flags); 461 if (error) { 462 LIN_SDT_PROBE0(futex, futex_get, error); 463 464 if (flags & FUTEX_CREATE_WP) 465 free(*wp, M_FUTEX_WP); 466 467 LIN_SDT_PROBE1(futex, futex_get, return, error); 468 return (error); 469 } 470 if (flags & FUTEX_CREATE_WP) { 471 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); 472 (*wp)->wp_futex = *f; 473 } 474 475 LIN_SDT_PROBE1(futex, futex_get, return, error); 476 return (error); 477 } 478 479 static inline void 480 futex_lock(struct futex *f) 481 { 482 483 LINUX_CTR3(sys_futex, "futex_lock uaddr %p ref %d shared %d", 484 f->f_uaddr, f->f_refcount, f->f_key.shared); 485 FUTEX_ASSERT_UNLOCKED(f); 486 FUTEX_LOCK(f); 487 } 488 489 static inline void 490 futex_unlock(struct futex *f) 491 { 492 493 LINUX_CTR3(sys_futex, "futex_unlock uaddr %p ref %d shared %d", 494 f->f_uaddr, f->f_refcount, f->f_key.shared); 495 FUTEX_ASSERT_LOCKED(f); 496 FUTEX_UNLOCK(f); 497 } 498 499 static int 500 futex_sleep(struct futex *f, struct waiting_proc *wp, struct timespec *ts) 501 { 502 struct timespec uts; 503 sbintime_t sbt, prec, tmp; 504 time_t over; 505 int error; 506 507 FUTEX_ASSERT_LOCKED(f); 508 if (ts != NULL) { 509 uts = *ts; 510 if (uts.tv_sec > INT32_MAX / 2) { 511 over = uts.tv_sec - INT32_MAX / 2; 512 uts.tv_sec -= over; 513 } 514 tmp = tstosbt(uts); 515 if (TIMESEL(&sbt, tmp)) 516 sbt += tc_tick_sbt; 517 sbt += tmp; 518 prec = tmp; 519 prec >>= tc_precexp; 520 } else { 521 sbt = 0; 522 prec = 0; 523 } 524 LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, sbt); 525 LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %ld ref %d", 526 f->f_uaddr, wp, sbt, f->f_refcount); 527 528 error = msleep_sbt(wp, &f->f_lck, PCATCH, "futex", sbt, prec, C_ABSOLUTE); 529 if (wp->wp_flags & FUTEX_WP_REQUEUED) { 530 KASSERT(f != wp->wp_futex, ("futex != wp_futex")); 531 532 if (error) { 533 LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, 534 f->f_uaddr, wp, wp->wp_futex->f_uaddr, 535 wp->wp_futex->f_refcount); 536 } 537 538 LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" 539 " %p requeued uaddr %p ref %d", 540 error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, 541 wp->wp_futex->f_refcount); 542 futex_put(f, NULL); 543 f = wp->wp_futex; 544 futex_lock(f); 545 } else { 546 if (error) { 547 LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, 548 f->f_uaddr, wp); 549 } 550 LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", 551 error, f->f_uaddr, wp); 552 } 553 554 futex_put(f, wp); 555 556 LIN_SDT_PROBE1(futex, futex_sleep, return, error); 557 return (error); 558 } 559 560 static int 561 futex_wake(struct futex *f, int n, uint32_t bitset) 562 { 563 struct waiting_proc *wp, *wpt; 564 int count = 0; 565 566 LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); 567 568 if (bitset == 0) { 569 LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); 570 return (EINVAL); 571 } 572 573 FUTEX_ASSERT_LOCKED(f); 574 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 575 LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, 576 f->f_refcount); 577 LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", 578 f->f_uaddr, wp, f->f_refcount); 579 /* 580 * Unless we find a matching bit in 581 * the bitset, continue searching. 582 */ 583 if (!(wp->wp_futex->f_bitset & bitset)) 584 continue; 585 586 wp->wp_flags |= FUTEX_WP_REMOVED; 587 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 588 LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); 589 wakeup_one(wp); 590 if (++count == n) 591 break; 592 } 593 594 LIN_SDT_PROBE1(futex, futex_wake, return, count); 595 return (count); 596 } 597 598 static int 599 futex_requeue(struct futex *f, int n, struct futex *f2, int n2) 600 { 601 struct waiting_proc *wp, *wpt; 602 int count = 0; 603 604 LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); 605 606 FUTEX_ASSERT_LOCKED(f); 607 FUTEX_ASSERT_LOCKED(f2); 608 609 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 610 if (++count <= n) { 611 LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", 612 f->f_uaddr, wp); 613 wp->wp_flags |= FUTEX_WP_REMOVED; 614 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 615 LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); 616 wakeup_one(wp); 617 } else { 618 LIN_SDT_PROBE3(futex, futex_requeue, requeue, 619 f->f_uaddr, wp, f2->f_uaddr); 620 LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", 621 f->f_uaddr, wp, f2->f_uaddr); 622 wp->wp_flags |= FUTEX_WP_REQUEUED; 623 /* Move wp to wp_list of f2 futex */ 624 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 625 TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); 626 627 /* 628 * Thread which sleeps on wp after waking should 629 * acquire f2 lock, so increment refcount of f2 to 630 * prevent it from premature deallocation. 631 */ 632 wp->wp_futex = f2; 633 FUTEXES_LOCK; 634 ++f2->f_refcount; 635 FUTEXES_UNLOCK; 636 if (count - n >= n2) 637 break; 638 } 639 } 640 641 LIN_SDT_PROBE1(futex, futex_requeue, return, count); 642 return (count); 643 } 644 645 static int 646 futex_wait(struct futex *f, struct waiting_proc *wp, struct timespec *ts, 647 uint32_t bitset) 648 { 649 int error; 650 651 LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset); 652 653 if (bitset == 0) { 654 LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); 655 return (EINVAL); 656 } 657 658 f->f_bitset = bitset; 659 error = futex_sleep(f, wp, ts); 660 if (error) 661 LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); 662 if (error == EWOULDBLOCK) 663 error = ETIMEDOUT; 664 665 LIN_SDT_PROBE1(futex, futex_wait, return, error); 666 return (error); 667 } 668 669 static int 670 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) 671 { 672 int op = (encoded_op >> 28) & 7; 673 int cmp = (encoded_op >> 24) & 15; 674 int oparg = (encoded_op << 8) >> 20; 675 int cmparg = (encoded_op << 20) >> 20; 676 int oldval = 0, ret; 677 678 LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); 679 680 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 681 oparg = 1 << oparg; 682 683 LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, 684 cmparg); 685 686 /* XXX: Linux verifies access here and returns EFAULT */ 687 LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); 688 689 switch (op) { 690 case FUTEX_OP_SET: 691 ret = futex_xchgl(oparg, uaddr, &oldval); 692 break; 693 case FUTEX_OP_ADD: 694 ret = futex_addl(oparg, uaddr, &oldval); 695 break; 696 case FUTEX_OP_OR: 697 ret = futex_orl(oparg, uaddr, &oldval); 698 break; 699 case FUTEX_OP_ANDN: 700 ret = futex_andl(~oparg, uaddr, &oldval); 701 break; 702 case FUTEX_OP_XOR: 703 ret = futex_xorl(oparg, uaddr, &oldval); 704 break; 705 default: 706 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); 707 ret = -ENOSYS; 708 break; 709 } 710 711 if (ret) { 712 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 713 return (ret); 714 } 715 716 switch (cmp) { 717 case FUTEX_OP_CMP_EQ: 718 ret = (oldval == cmparg); 719 break; 720 case FUTEX_OP_CMP_NE: 721 ret = (oldval != cmparg); 722 break; 723 case FUTEX_OP_CMP_LT: 724 ret = (oldval < cmparg); 725 break; 726 case FUTEX_OP_CMP_GE: 727 ret = (oldval >= cmparg); 728 break; 729 case FUTEX_OP_CMP_LE: 730 ret = (oldval <= cmparg); 731 break; 732 case FUTEX_OP_CMP_GT: 733 ret = (oldval > cmparg); 734 break; 735 default: 736 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); 737 ret = -ENOSYS; 738 } 739 740 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 741 return (ret); 742 } 743 744 int 745 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 746 { 747 int clockrt, nrwake, op_ret, ret; 748 struct linux_pemuldata *pem; 749 struct waiting_proc *wp; 750 struct futex *f, *f2; 751 struct timespec uts, *ts; 752 int error, save; 753 uint32_t flags, val; 754 755 LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); 756 757 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { 758 flags = 0; 759 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; 760 } else 761 flags = FUTEX_SHARED; 762 763 /* 764 * Currently support for switching between CLOCK_MONOTONIC and 765 * CLOCK_REALTIME is not present. However Linux forbids the use of 766 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and 767 * FUTEX_WAIT_REQUEUE_PI. 768 */ 769 clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; 770 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; 771 if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && 772 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { 773 LIN_SDT_PROBE0(futex, linux_sys_futex, 774 unimplemented_clockswitch); 775 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 776 return (ENOSYS); 777 } 778 779 error = 0; 780 f = f2 = NULL; 781 782 switch (args->op) { 783 case LINUX_FUTEX_WAIT: 784 args->val3 = FUTEX_BITSET_MATCH_ANY; 785 /* FALLTHROUGH */ 786 787 case LINUX_FUTEX_WAIT_BITSET: 788 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, 789 args->val, args->val3); 790 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", 791 args->uaddr, args->val, args->val3); 792 793 if (args->timeout != NULL) { 794 error = futex_copyin_timeout(args->op, args->timeout, 795 clockrt, &uts); 796 if (error) { 797 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 798 error); 799 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 800 return (error); 801 } 802 ts = &uts; 803 } else 804 ts = NULL; 805 806 retry0: 807 error = futex_get(args->uaddr, &wp, &f, 808 flags | FUTEX_CREATE_WP); 809 if (error) { 810 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 811 return (error); 812 } 813 814 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 815 if (error) { 816 futex_put(f, wp); 817 error = copyin(args->uaddr, &val, sizeof(val)); 818 if (error == 0) 819 goto retry0; 820 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 821 error); 822 LINUX_CTR1(sys_futex, "WAIT copyin failed %d", 823 error); 824 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 825 return (error); 826 } 827 if (val != args->val) { 828 LIN_SDT_PROBE4(futex, linux_sys_futex, 829 debug_wait_value_neq, args->uaddr, args->val, val, 830 args->val3); 831 LINUX_CTR3(sys_futex, 832 "WAIT uaddr %p val 0x%x != uval 0x%x", 833 args->uaddr, args->val, val); 834 futex_put(f, wp); 835 836 LIN_SDT_PROBE1(futex, linux_sys_futex, return, 837 EWOULDBLOCK); 838 return (EWOULDBLOCK); 839 } 840 841 error = futex_wait(f, wp, ts, args->val3); 842 break; 843 844 case LINUX_FUTEX_WAKE: 845 args->val3 = FUTEX_BITSET_MATCH_ANY; 846 /* FALLTHROUGH */ 847 848 case LINUX_FUTEX_WAKE_BITSET: 849 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, 850 args->val, args->val3); 851 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", 852 args->uaddr, args->val, args->val3); 853 854 error = futex_get(args->uaddr, NULL, &f, 855 flags | FUTEX_DONTCREATE); 856 if (error) { 857 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 858 return (error); 859 } 860 861 if (f == NULL) { 862 td->td_retval[0] = 0; 863 864 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 865 return (error); 866 } 867 td->td_retval[0] = futex_wake(f, args->val, args->val3); 868 futex_put(f, NULL); 869 break; 870 871 case LINUX_FUTEX_CMP_REQUEUE: 872 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, 873 args->uaddr, args->val, args->val3, args->uaddr2, 874 args->timeout); 875 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " 876 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", 877 args->uaddr, args->val, args->val3, args->uaddr2, 878 args->timeout); 879 880 /* 881 * Linux allows this, we would not, it is an incorrect 882 * usage of declared ABI, so return EINVAL. 883 */ 884 if (args->uaddr == args->uaddr2) { 885 LIN_SDT_PROBE0(futex, linux_sys_futex, 886 invalid_cmp_requeue_use); 887 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 888 return (EINVAL); 889 } 890 891 retry1: 892 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 893 if (error) { 894 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 895 return (error); 896 } 897 898 /* 899 * To avoid deadlocks return EINVAL if second futex 900 * exists at this time. 901 * 902 * Glibc fall back to FUTEX_WAKE in case of any error 903 * returned by FUTEX_CMP_REQUEUE. 904 */ 905 error = futex_get(args->uaddr2, NULL, &f2, 906 flags | FUTEX_DONTEXISTS | FUTEX_DONTLOCK); 907 if (error) { 908 futex_put(f, NULL); 909 910 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 911 return (error); 912 } 913 futex_lock(f); 914 futex_lock(f2); 915 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 916 if (error) { 917 futex_put(f2, NULL); 918 futex_put(f, NULL); 919 error = copyin(args->uaddr, &val, sizeof(val)); 920 if (error == 0) 921 goto retry1; 922 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 923 error); 924 LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", 925 error); 926 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 927 return (error); 928 } 929 if (val != args->val3) { 930 LIN_SDT_PROBE2(futex, linux_sys_futex, 931 debug_cmp_requeue_value_neq, args->val, val); 932 LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x", 933 args->val, val); 934 futex_put(f2, NULL); 935 futex_put(f, NULL); 936 937 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); 938 return (EAGAIN); 939 } 940 941 nrwake = (int)(unsigned long)args->timeout; 942 td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); 943 futex_put(f2, NULL); 944 futex_put(f, NULL); 945 break; 946 947 case LINUX_FUTEX_WAKE_OP: 948 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, 949 args->uaddr, args->op, args->val, args->uaddr2, args->val3); 950 LINUX_CTR5(sys_futex, "WAKE_OP " 951 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", 952 args->uaddr, args->val, args->uaddr2, args->val3, 953 args->timeout); 954 955 if (args->uaddr == args->uaddr2) { 956 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 957 return (EINVAL); 958 } 959 960 retry2: 961 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 962 if (error) { 963 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 964 return (error); 965 } 966 967 error = futex_get(args->uaddr2, NULL, &f2, flags | FUTEX_DONTLOCK); 968 if (error) { 969 futex_put(f, NULL); 970 971 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 972 return (error); 973 } 974 futex_lock(f); 975 futex_lock(f2); 976 977 /* 978 * This function returns positive number as results and 979 * negative as errors 980 */ 981 save = vm_fault_disable_pagefaults(); 982 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 983 vm_fault_enable_pagefaults(save); 984 985 LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x", 986 args->uaddr, op_ret); 987 988 if (op_ret < 0) { 989 if (f2 != NULL) 990 futex_put(f2, NULL); 991 futex_put(f, NULL); 992 error = copyin(args->uaddr2, &val, sizeof(val)); 993 if (error == 0) 994 goto retry2; 995 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 996 return (error); 997 } 998 999 ret = futex_wake(f, args->val, args->val3); 1000 1001 if (op_ret > 0) { 1002 op_ret = 0; 1003 nrwake = (int)(unsigned long)args->timeout; 1004 1005 if (f2 != NULL) 1006 op_ret += futex_wake(f2, nrwake, args->val3); 1007 else 1008 op_ret += futex_wake(f, nrwake, args->val3); 1009 ret += op_ret; 1010 1011 } 1012 if (f2 != NULL) 1013 futex_put(f2, NULL); 1014 futex_put(f, NULL); 1015 td->td_retval[0] = ret; 1016 break; 1017 1018 case LINUX_FUTEX_LOCK_PI: 1019 /* not yet implemented */ 1020 pem = pem_find(td->td_proc); 1021 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1022 linux_msg(td, 1023 "linux_sys_futex: " 1024 "unsupported futex_pi op\n"); 1025 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1026 LIN_SDT_PROBE0(futex, linux_sys_futex, 1027 unimplemented_lock_pi); 1028 } 1029 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1030 return (ENOSYS); 1031 1032 case LINUX_FUTEX_UNLOCK_PI: 1033 /* not yet implemented */ 1034 pem = pem_find(td->td_proc); 1035 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1036 linux_msg(td, 1037 "linux_sys_futex: " 1038 "unsupported futex_pi op\n"); 1039 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1040 LIN_SDT_PROBE0(futex, linux_sys_futex, 1041 unimplemented_unlock_pi); 1042 } 1043 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1044 return (ENOSYS); 1045 1046 case LINUX_FUTEX_TRYLOCK_PI: 1047 /* not yet implemented */ 1048 pem = pem_find(td->td_proc); 1049 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1050 linux_msg(td, 1051 "linux_sys_futex: " 1052 "unsupported futex_pi op\n"); 1053 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1054 LIN_SDT_PROBE0(futex, linux_sys_futex, 1055 unimplemented_trylock_pi); 1056 } 1057 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1058 return (ENOSYS); 1059 1060 case LINUX_FUTEX_REQUEUE: 1061 /* 1062 * Glibc does not use this operation since version 2.3.3, 1063 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 1064 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 1065 * FUTEX_REQUEUE returned EINVAL. 1066 */ 1067 pem = pem_find(td->td_proc); 1068 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { 1069 linux_msg(td, 1070 "linux_sys_futex: " 1071 "unsupported futex_requeue op\n"); 1072 pem->flags |= LINUX_XDEPR_REQUEUEOP; 1073 LIN_SDT_PROBE0(futex, linux_sys_futex, 1074 deprecated_requeue); 1075 } 1076 1077 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 1078 return (EINVAL); 1079 1080 case LINUX_FUTEX_WAIT_REQUEUE_PI: 1081 /* not yet implemented */ 1082 pem = pem_find(td->td_proc); 1083 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1084 linux_msg(td, 1085 "linux_sys_futex: " 1086 "unsupported futex_pi op\n"); 1087 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1088 LIN_SDT_PROBE0(futex, linux_sys_futex, 1089 unimplemented_wait_requeue_pi); 1090 } 1091 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1092 return (ENOSYS); 1093 1094 case LINUX_FUTEX_CMP_REQUEUE_PI: 1095 /* not yet implemented */ 1096 pem = pem_find(td->td_proc); 1097 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1098 linux_msg(td, 1099 "linux_sys_futex: " 1100 "unsupported futex_pi op\n"); 1101 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1102 LIN_SDT_PROBE0(futex, linux_sys_futex, 1103 unimplemented_cmp_requeue_pi); 1104 } 1105 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1106 return (ENOSYS); 1107 1108 default: 1109 linux_msg(td, 1110 "linux_sys_futex: unknown op %d\n", args->op); 1111 LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, 1112 args->op); 1113 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1114 return (ENOSYS); 1115 } 1116 1117 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 1118 return (error); 1119 } 1120 1121 int 1122 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 1123 { 1124 struct linux_emuldata *em; 1125 1126 LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); 1127 1128 if (args->len != sizeof(struct linux_robust_list_head)) { 1129 LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); 1130 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); 1131 return (EINVAL); 1132 } 1133 1134 em = em_find(td); 1135 em->robust_futexes = args->head; 1136 1137 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); 1138 return (0); 1139 } 1140 1141 int 1142 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 1143 { 1144 struct linux_emuldata *em; 1145 struct linux_robust_list_head *head; 1146 l_size_t len = sizeof(struct linux_robust_list_head); 1147 struct thread *td2; 1148 int error = 0; 1149 1150 LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); 1151 1152 if (!args->pid) { 1153 em = em_find(td); 1154 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1155 head = em->robust_futexes; 1156 } else { 1157 td2 = tdfind(args->pid, -1); 1158 if (td2 == NULL) { 1159 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1160 ESRCH); 1161 return (ESRCH); 1162 } 1163 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { 1164 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1165 EPERM); 1166 PROC_UNLOCK(td2->td_proc); 1167 return (EPERM); 1168 } 1169 1170 em = em_find(td2); 1171 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1172 /* XXX: ptrace? */ 1173 if (priv_check(td, PRIV_CRED_SETUID) || 1174 priv_check(td, PRIV_CRED_SETEUID) || 1175 p_candebug(td, td2->td_proc)) { 1176 PROC_UNLOCK(td2->td_proc); 1177 1178 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1179 EPERM); 1180 return (EPERM); 1181 } 1182 head = em->robust_futexes; 1183 1184 PROC_UNLOCK(td2->td_proc); 1185 } 1186 1187 error = copyout(&len, args->len, sizeof(l_size_t)); 1188 if (error) { 1189 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1190 error); 1191 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); 1192 return (EFAULT); 1193 } 1194 1195 error = copyout(&head, args->head, sizeof(head)); 1196 if (error) { 1197 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1198 error); 1199 } 1200 1201 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); 1202 return (error); 1203 } 1204 1205 static int 1206 handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, 1207 unsigned int pi) 1208 { 1209 uint32_t uval, nval, mval; 1210 struct futex *f; 1211 int error; 1212 1213 LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); 1214 1215 retry: 1216 error = copyin(uaddr, &uval, 4); 1217 if (error) { 1218 LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); 1219 LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); 1220 return (EFAULT); 1221 } 1222 if ((uval & FUTEX_TID_MASK) == em->em_tid) { 1223 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1224 nval = casuword32(uaddr, uval, mval); 1225 1226 if (nval == -1) { 1227 LIN_SDT_PROBE1(futex, handle_futex_death, return, 1228 EFAULT); 1229 return (EFAULT); 1230 } 1231 1232 if (nval != uval) 1233 goto retry; 1234 1235 if (!pi && (uval & FUTEX_WAITERS)) { 1236 error = futex_get(uaddr, NULL, &f, 1237 FUTEX_DONTCREATE | FUTEX_SHARED); 1238 if (error) { 1239 LIN_SDT_PROBE1(futex, handle_futex_death, 1240 return, error); 1241 return (error); 1242 } 1243 if (f != NULL) { 1244 futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); 1245 futex_put(f, NULL); 1246 } 1247 } 1248 } 1249 1250 LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); 1251 return (0); 1252 } 1253 1254 static int 1255 fetch_robust_entry(struct linux_robust_list **entry, 1256 struct linux_robust_list **head, unsigned int *pi) 1257 { 1258 l_ulong uentry; 1259 int error; 1260 1261 LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); 1262 1263 error = copyin((const void *)head, &uentry, sizeof(l_ulong)); 1264 if (error) { 1265 LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); 1266 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); 1267 return (EFAULT); 1268 } 1269 1270 *entry = (void *)(uentry & ~1UL); 1271 *pi = uentry & 1; 1272 1273 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); 1274 return (0); 1275 } 1276 1277 /* This walks the list of robust futexes releasing them. */ 1278 void 1279 release_futexes(struct thread *td, struct linux_emuldata *em) 1280 { 1281 struct linux_robust_list_head *head = NULL; 1282 struct linux_robust_list *entry, *next_entry, *pending; 1283 unsigned int limit = 2048, pi, next_pi, pip; 1284 l_long futex_offset; 1285 int rc, error; 1286 1287 LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); 1288 1289 head = em->robust_futexes; 1290 1291 if (head == NULL) { 1292 LIN_SDT_PROBE0(futex, release_futexes, return); 1293 return; 1294 } 1295 1296 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { 1297 LIN_SDT_PROBE0(futex, release_futexes, return); 1298 return; 1299 } 1300 1301 error = copyin(&head->futex_offset, &futex_offset, 1302 sizeof(futex_offset)); 1303 if (error) { 1304 LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); 1305 LIN_SDT_PROBE0(futex, release_futexes, return); 1306 return; 1307 } 1308 1309 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { 1310 LIN_SDT_PROBE0(futex, release_futexes, return); 1311 return; 1312 } 1313 1314 while (entry != &head->list) { 1315 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); 1316 1317 if (entry != pending) 1318 if (handle_futex_death(em, 1319 (uint32_t *)((caddr_t)entry + futex_offset), pi)) { 1320 LIN_SDT_PROBE0(futex, release_futexes, return); 1321 return; 1322 } 1323 if (rc) { 1324 LIN_SDT_PROBE0(futex, release_futexes, return); 1325 return; 1326 } 1327 1328 entry = next_entry; 1329 pi = next_pi; 1330 1331 if (!--limit) 1332 break; 1333 1334 sched_relinquish(curthread); 1335 } 1336 1337 if (pending) 1338 handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); 1339 1340 LIN_SDT_PROBE0(futex, release_futexes, return); 1341 } 1342