1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2009-2016 Dmitry Chagin 5 * Copyright (c) 2005 Emmanuel Dreyfus 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Emmanuel Dreyfus 19 * 4. The name of the author may not be used to endorse or promote 20 * products derived from this software without specific prior written 21 * permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 #if 0 39 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 40 #endif 41 42 #include "opt_compat.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/imgact.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/mutex.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/sched.h> 56 #include <sys/sdt.h> 57 #include <sys/umtx.h> 58 59 #include <vm/vm_extern.h> 60 61 #ifdef COMPAT_LINUX32 62 #include <machine/../linux32/linux.h> 63 #include <machine/../linux32/linux32_proto.h> 64 #else 65 #include <machine/../linux/linux.h> 66 #include <machine/../linux/linux_proto.h> 67 #endif 68 #include <compat/linux/linux_dtrace.h> 69 #include <compat/linux/linux_emul.h> 70 #include <compat/linux/linux_futex.h> 71 #include <compat/linux/linux_timer.h> 72 #include <compat/linux/linux_util.h> 73 74 /* DTrace init */ 75 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); 76 77 /** 78 * Futex part for the special DTrace module "locks". 79 */ 80 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *"); 81 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *"); 82 83 /** 84 * Per futex probes. 85 */ 86 LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *"); 87 LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *"); 88 89 /** 90 * DTrace probes in this module. 91 */ 92 LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *", 93 "struct waiting_proc *"); 94 LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t", 95 "int"); 96 LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t", 97 "int"); 98 LIN_SDT_PROBE_DEFINE0(futex, futex_put, return); 99 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **", 100 "uint32_t"); 101 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int"); 102 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t", 103 "int"); 104 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *"); 105 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int"); 106 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int"); 107 LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *", 108 "struct waiting_proc **", "struct futex **"); 109 LIN_SDT_PROBE_DEFINE0(futex, futex_get, error); 110 LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int"); 111 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *", 112 "struct waiting_proc **", "struct timespec *"); 113 LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *", 114 "struct waiting_proc *", "uint32_t *", "uint32_t"); 115 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *", 116 "struct waiting_proc *"); 117 LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int"); 118 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int", 119 "uint32_t"); 120 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t", 121 "struct waiting_proc *", "uint32_t"); 122 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *"); 123 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int"); 124 LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int", 125 "struct futex *", "int"); 126 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *"); 127 LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *", 128 "struct waiting_proc *", "uint32_t"); 129 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int"); 130 LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *", 131 "struct waiting_proc **", "struct timespec *", "uint32_t"); 132 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int"); 133 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int"); 134 LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *", 135 "int", "uint32_t"); 136 LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int", 137 "int"); 138 LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check); 139 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int"); 140 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int"); 141 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int"); 142 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *", 143 "struct linux_sys_futex_args *"); 144 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch); 145 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int"); 146 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use); 147 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *", 148 "uint32_t", "uint32_t"); 149 LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq, 150 "uint32_t *", "uint32_t", "int", "uint32_t"); 151 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *", 152 "uint32_t", "uint32_t"); 153 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *", 154 "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *"); 155 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq, 156 "uint32_t", "int"); 157 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *", 158 "int", "uint32_t", "uint32_t *", "uint32_t"); 159 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault); 160 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi); 161 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi); 162 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi); 163 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue); 164 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi); 165 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi); 166 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int"); 167 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int"); 168 LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *", 169 "struct linux_set_robust_list_args *"); 170 LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error); 171 LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int"); 172 LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", 173 "struct linux_get_robust_list_args *"); 174 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); 175 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); 176 LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, 177 "struct linux_emuldata *", "uint32_t *", "unsigned int"); 178 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); 179 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); 180 LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, 181 "struct linux_robust_list **", "struct linux_robust_list **", 182 "unsigned int *"); 183 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); 184 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); 185 LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", 186 "struct linux_emuldata *"); 187 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); 188 LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); 189 190 struct futex; 191 192 struct waiting_proc { 193 uint32_t wp_flags; 194 struct futex *wp_futex; 195 TAILQ_ENTRY(waiting_proc) wp_list; 196 }; 197 198 struct futex { 199 struct mtx f_lck; 200 uint32_t *f_uaddr; /* user-supplied value, for debug */ 201 struct umtx_key f_key; 202 uint32_t f_refcount; 203 uint32_t f_bitset; 204 LIST_ENTRY(futex) f_list; 205 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 206 }; 207 208 struct futex_list futex_list; 209 210 #define FUTEX_LOCK(f) mtx_lock(&(f)->f_lck) 211 #define FUTEX_LOCKED(f) mtx_owned(&(f)->f_lck) 212 #define FUTEX_UNLOCK(f) mtx_unlock(&(f)->f_lck) 213 #define FUTEX_INIT(f) do { \ 214 mtx_init(&(f)->f_lck, "ftlk", NULL, \ 215 MTX_DUPOK); \ 216 LIN_SDT_PROBE1(futex, futex, create, \ 217 &(f)->f_lck); \ 218 } while (0) 219 #define FUTEX_DESTROY(f) do { \ 220 LIN_SDT_PROBE1(futex, futex, destroy, \ 221 &(f)->f_lck); \ 222 mtx_destroy(&(f)->f_lck); \ 223 } while (0) 224 #define FUTEX_ASSERT_LOCKED(f) mtx_assert(&(f)->f_lck, MA_OWNED) 225 #define FUTEX_ASSERT_UNLOCKED(f) mtx_assert(&(f)->f_lck, MA_NOTOWNED) 226 227 struct mtx futex_mtx; /* protects the futex list */ 228 #define FUTEXES_LOCK do { \ 229 mtx_lock(&futex_mtx); \ 230 LIN_SDT_PROBE1(locks, futex_mtx, \ 231 locked, &futex_mtx); \ 232 } while (0) 233 #define FUTEXES_UNLOCK do { \ 234 LIN_SDT_PROBE1(locks, futex_mtx, \ 235 unlock, &futex_mtx); \ 236 mtx_unlock(&futex_mtx); \ 237 } while (0) 238 239 /* flags for futex_get() */ 240 #define FUTEX_CREATE_WP 0x1 /* create waiting_proc */ 241 #define FUTEX_DONTCREATE 0x2 /* don't create futex if not exists */ 242 #define FUTEX_DONTEXISTS 0x4 /* return EINVAL if futex exists */ 243 #define FUTEX_SHARED 0x8 /* shared futex */ 244 #define FUTEX_DONTLOCK 0x10 /* don't lock futex */ 245 246 /* wp_flags */ 247 #define FUTEX_WP_REQUEUED 0x1 /* wp requeued - wp moved from wp_list 248 * of futex where thread sleep to wp_list 249 * of another futex. 250 */ 251 #define FUTEX_WP_REMOVED 0x2 /* wp is woken up and removed from futex 252 * wp_list to prevent double wakeup. 253 */ 254 255 static void futex_put(struct futex *, struct waiting_proc *); 256 static int futex_get0(uint32_t *, struct futex **f, uint32_t); 257 static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, 258 uint32_t); 259 static int futex_sleep(struct futex *, struct waiting_proc *, struct timespec *); 260 static int futex_wake(struct futex *, int, uint32_t); 261 static int futex_requeue(struct futex *, int, struct futex *, int); 262 static int futex_copyin_timeout(int, struct l_timespec *, int, 263 struct timespec *); 264 static int futex_wait(struct futex *, struct waiting_proc *, struct timespec *, 265 uint32_t); 266 static void futex_lock(struct futex *); 267 static void futex_unlock(struct futex *); 268 static int futex_atomic_op(struct thread *, int, uint32_t *); 269 static int handle_futex_death(struct linux_emuldata *, uint32_t *, 270 unsigned int); 271 static int fetch_robust_entry(struct linux_robust_list **, 272 struct linux_robust_list **, unsigned int *); 273 274 /* support.s */ 275 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); 276 int futex_addl(int oparg, uint32_t *uaddr, int *oldval); 277 int futex_orl(int oparg, uint32_t *uaddr, int *oldval); 278 int futex_andl(int oparg, uint32_t *uaddr, int *oldval); 279 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); 280 281 282 static int 283 futex_copyin_timeout(int op, struct l_timespec *luts, int clockrt, 284 struct timespec *ts) 285 { 286 struct l_timespec lts; 287 struct timespec kts; 288 int error; 289 290 error = copyin(luts, <s, sizeof(lts)); 291 if (error) 292 return (error); 293 294 error = linux_to_native_timespec(ts, <s); 295 if (error) 296 return (error); 297 if (clockrt) { 298 nanotime(&kts); 299 timespecsub(ts, &kts); 300 } else if (op == LINUX_FUTEX_WAIT_BITSET) { 301 nanouptime(&kts); 302 timespecsub(ts, &kts); 303 } 304 return (error); 305 } 306 307 static void 308 futex_put(struct futex *f, struct waiting_proc *wp) 309 { 310 LIN_SDT_PROBE2(futex, futex_put, entry, f, wp); 311 312 if (wp != NULL) { 313 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0) 314 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 315 free(wp, M_FUTEX_WP); 316 } 317 318 FUTEXES_LOCK; 319 if (--f->f_refcount == 0) { 320 LIST_REMOVE(f, f_list); 321 FUTEXES_UNLOCK; 322 if (FUTEX_LOCKED(f)) 323 futex_unlock(f); 324 325 LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr, 326 f->f_refcount, f->f_key.shared); 327 LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d " 328 "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared); 329 umtx_key_release(&f->f_key); 330 FUTEX_DESTROY(f); 331 free(f, M_FUTEX); 332 333 LIN_SDT_PROBE0(futex, futex_put, return); 334 return; 335 } 336 337 LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount, 338 f->f_key.shared); 339 LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d", 340 f->f_uaddr, f->f_refcount, f->f_key.shared); 341 FUTEXES_UNLOCK; 342 if (FUTEX_LOCKED(f)) 343 futex_unlock(f); 344 345 LIN_SDT_PROBE0(futex, futex_put, return); 346 } 347 348 static int 349 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags) 350 { 351 struct futex *f, *tmpf; 352 struct umtx_key key; 353 int error; 354 355 LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags); 356 357 *newf = tmpf = NULL; 358 359 error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ? 360 AUTO_SHARE : THREAD_SHARE, &key); 361 if (error) { 362 LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error); 363 LIN_SDT_PROBE1(futex, futex_get0, return, error); 364 return (error); 365 } 366 retry: 367 FUTEXES_LOCK; 368 LIST_FOREACH(f, &futex_list, f_list) { 369 if (umtx_key_match(&f->f_key, &key)) { 370 if (tmpf != NULL) { 371 if (FUTEX_LOCKED(tmpf)) 372 futex_unlock(tmpf); 373 FUTEX_DESTROY(tmpf); 374 free(tmpf, M_FUTEX); 375 } 376 if (flags & FUTEX_DONTEXISTS) { 377 FUTEXES_UNLOCK; 378 umtx_key_release(&key); 379 380 LIN_SDT_PROBE1(futex, futex_get0, return, 381 EINVAL); 382 return (EINVAL); 383 } 384 385 /* 386 * Increment refcount of the found futex to 387 * prevent it from deallocation before FUTEX_LOCK() 388 */ 389 ++f->f_refcount; 390 FUTEXES_UNLOCK; 391 umtx_key_release(&key); 392 393 if ((flags & FUTEX_DONTLOCK) == 0) 394 futex_lock(f); 395 *newf = f; 396 LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr, 397 f->f_refcount, f->f_key.shared); 398 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d", 399 uaddr, f->f_refcount, f->f_key.shared); 400 401 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 402 return (0); 403 } 404 } 405 406 if (flags & FUTEX_DONTCREATE) { 407 FUTEXES_UNLOCK; 408 umtx_key_release(&key); 409 LIN_SDT_PROBE1(futex, futex_get0, null, uaddr); 410 LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr); 411 412 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 413 return (0); 414 } 415 416 if (tmpf == NULL) { 417 FUTEXES_UNLOCK; 418 tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO); 419 tmpf->f_uaddr = uaddr; 420 tmpf->f_key = key; 421 tmpf->f_refcount = 1; 422 tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY; 423 FUTEX_INIT(tmpf); 424 TAILQ_INIT(&tmpf->f_waiting_proc); 425 426 /* 427 * Lock the new futex before an insert into the futex_list 428 * to prevent futex usage by other. 429 */ 430 if ((flags & FUTEX_DONTLOCK) == 0) 431 futex_lock(tmpf); 432 goto retry; 433 } 434 435 LIST_INSERT_HEAD(&futex_list, tmpf, f_list); 436 FUTEXES_UNLOCK; 437 438 LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount, 439 tmpf->f_key.shared); 440 LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new", 441 uaddr, tmpf->f_refcount, tmpf->f_key.shared); 442 *newf = tmpf; 443 444 LIN_SDT_PROBE1(futex, futex_get0, return, 0); 445 return (0); 446 } 447 448 static int 449 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f, 450 uint32_t flags) 451 { 452 int error; 453 454 LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f); 455 456 if (flags & FUTEX_CREATE_WP) { 457 *wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK); 458 (*wp)->wp_flags = 0; 459 } 460 error = futex_get0(uaddr, f, flags); 461 if (error) { 462 LIN_SDT_PROBE0(futex, futex_get, error); 463 464 if (flags & FUTEX_CREATE_WP) 465 free(*wp, M_FUTEX_WP); 466 467 LIN_SDT_PROBE1(futex, futex_get, return, error); 468 return (error); 469 } 470 if (flags & FUTEX_CREATE_WP) { 471 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list); 472 (*wp)->wp_futex = *f; 473 } 474 475 LIN_SDT_PROBE1(futex, futex_get, return, error); 476 return (error); 477 } 478 479 static inline void 480 futex_lock(struct futex *f) 481 { 482 483 LINUX_CTR3(sys_futex, "futex_lock uaddr %p ref %d shared %d", 484 f->f_uaddr, f->f_refcount, f->f_key.shared); 485 FUTEX_ASSERT_UNLOCKED(f); 486 FUTEX_LOCK(f); 487 } 488 489 static inline void 490 futex_unlock(struct futex *f) 491 { 492 493 LINUX_CTR3(sys_futex, "futex_unlock uaddr %p ref %d shared %d", 494 f->f_uaddr, f->f_refcount, f->f_key.shared); 495 FUTEX_ASSERT_LOCKED(f); 496 FUTEX_UNLOCK(f); 497 } 498 499 static int 500 futex_sleep(struct futex *f, struct waiting_proc *wp, struct timespec *ts) 501 { 502 struct timespec uts; 503 sbintime_t sbt, prec, tmp; 504 time_t over; 505 int error; 506 507 FUTEX_ASSERT_LOCKED(f); 508 if (ts != NULL) { 509 uts = *ts; 510 if (uts.tv_sec > INT32_MAX / 2) { 511 over = uts.tv_sec - INT32_MAX / 2; 512 uts.tv_sec -= over; 513 } 514 tmp = tstosbt(uts); 515 if (TIMESEL(&sbt, tmp)) 516 sbt += tc_tick_sbt; 517 sbt += tmp; 518 prec = tmp; 519 prec >>= tc_precexp; 520 } else { 521 sbt = 0; 522 prec = 0; 523 } 524 LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, sbt); 525 LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %ld ref %d", 526 f->f_uaddr, wp, sbt, f->f_refcount); 527 528 error = msleep_sbt(wp, &f->f_lck, PCATCH, "futex", sbt, prec, C_ABSOLUTE); 529 if (wp->wp_flags & FUTEX_WP_REQUEUED) { 530 KASSERT(f != wp->wp_futex, ("futex != wp_futex")); 531 532 if (error) { 533 LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error, 534 f->f_uaddr, wp, wp->wp_futex->f_uaddr, 535 wp->wp_futex->f_refcount); 536 } 537 538 LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp" 539 " %p requeued uaddr %p ref %d", 540 error, f->f_uaddr, wp, wp->wp_futex->f_uaddr, 541 wp->wp_futex->f_refcount); 542 futex_put(f, NULL); 543 f = wp->wp_futex; 544 futex_lock(f); 545 } else { 546 if (error) { 547 LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error, 548 f->f_uaddr, wp); 549 } 550 LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p", 551 error, f->f_uaddr, wp); 552 } 553 554 futex_put(f, wp); 555 556 LIN_SDT_PROBE1(futex, futex_sleep, return, error); 557 return (error); 558 } 559 560 static int 561 futex_wake(struct futex *f, int n, uint32_t bitset) 562 { 563 struct waiting_proc *wp, *wpt; 564 int count = 0; 565 566 LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset); 567 568 if (bitset == 0) { 569 LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL); 570 return (EINVAL); 571 } 572 573 FUTEX_ASSERT_LOCKED(f); 574 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 575 LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp, 576 f->f_refcount); 577 LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d", 578 f->f_uaddr, wp, f->f_refcount); 579 /* 580 * Unless we find a matching bit in 581 * the bitset, continue searching. 582 */ 583 if (!(wp->wp_futex->f_bitset & bitset)) 584 continue; 585 586 wp->wp_flags |= FUTEX_WP_REMOVED; 587 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 588 LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp); 589 wakeup_one(wp); 590 if (++count == n) 591 break; 592 } 593 594 LIN_SDT_PROBE1(futex, futex_wake, return, count); 595 return (count); 596 } 597 598 static int 599 futex_requeue(struct futex *f, int n, struct futex *f2, int n2) 600 { 601 struct waiting_proc *wp, *wpt; 602 int count = 0; 603 604 LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2); 605 606 FUTEX_ASSERT_LOCKED(f); 607 FUTEX_ASSERT_LOCKED(f2); 608 609 TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) { 610 if (++count <= n) { 611 LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p", 612 f->f_uaddr, wp); 613 wp->wp_flags |= FUTEX_WP_REMOVED; 614 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 615 LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp); 616 wakeup_one(wp); 617 } else { 618 LIN_SDT_PROBE3(futex, futex_requeue, requeue, 619 f->f_uaddr, wp, f2->f_uaddr); 620 LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p", 621 f->f_uaddr, wp, f2->f_uaddr); 622 wp->wp_flags |= FUTEX_WP_REQUEUED; 623 /* Move wp to wp_list of f2 futex */ 624 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 625 TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list); 626 627 /* 628 * Thread which sleeps on wp after waking should 629 * acquire f2 lock, so increment refcount of f2 to 630 * prevent it from premature deallocation. 631 */ 632 wp->wp_futex = f2; 633 FUTEXES_LOCK; 634 ++f2->f_refcount; 635 FUTEXES_UNLOCK; 636 if (count - n >= n2) 637 break; 638 } 639 } 640 641 LIN_SDT_PROBE1(futex, futex_requeue, return, count); 642 return (count); 643 } 644 645 static int 646 futex_wait(struct futex *f, struct waiting_proc *wp, struct timespec *ts, 647 uint32_t bitset) 648 { 649 int error; 650 651 LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset); 652 653 if (bitset == 0) { 654 LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL); 655 return (EINVAL); 656 } 657 658 f->f_bitset = bitset; 659 error = futex_sleep(f, wp, ts); 660 if (error) 661 LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error); 662 if (error == EWOULDBLOCK) 663 error = ETIMEDOUT; 664 665 LIN_SDT_PROBE1(futex, futex_wait, return, error); 666 return (error); 667 } 668 669 static int 670 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr) 671 { 672 int op = (encoded_op >> 28) & 7; 673 int cmp = (encoded_op >> 24) & 15; 674 int oparg = (encoded_op << 8) >> 20; 675 int cmparg = (encoded_op << 20) >> 20; 676 int oldval = 0, ret; 677 678 LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr); 679 680 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 681 oparg = 1 << oparg; 682 683 LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg, 684 cmparg); 685 686 /* XXX: Linux verifies access here and returns EFAULT */ 687 LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check); 688 689 switch (op) { 690 case FUTEX_OP_SET: 691 ret = futex_xchgl(oparg, uaddr, &oldval); 692 break; 693 case FUTEX_OP_ADD: 694 ret = futex_addl(oparg, uaddr, &oldval); 695 break; 696 case FUTEX_OP_OR: 697 ret = futex_orl(oparg, uaddr, &oldval); 698 break; 699 case FUTEX_OP_ANDN: 700 ret = futex_andl(~oparg, uaddr, &oldval); 701 break; 702 case FUTEX_OP_XOR: 703 ret = futex_xorl(oparg, uaddr, &oldval); 704 break; 705 default: 706 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op); 707 ret = -ENOSYS; 708 break; 709 } 710 711 if (ret) { 712 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 713 return (ret); 714 } 715 716 switch (cmp) { 717 case FUTEX_OP_CMP_EQ: 718 ret = (oldval == cmparg); 719 break; 720 case FUTEX_OP_CMP_NE: 721 ret = (oldval != cmparg); 722 break; 723 case FUTEX_OP_CMP_LT: 724 ret = (oldval < cmparg); 725 break; 726 case FUTEX_OP_CMP_GE: 727 ret = (oldval >= cmparg); 728 break; 729 case FUTEX_OP_CMP_LE: 730 ret = (oldval <= cmparg); 731 break; 732 case FUTEX_OP_CMP_GT: 733 ret = (oldval > cmparg); 734 break; 735 default: 736 LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp); 737 ret = -ENOSYS; 738 } 739 740 LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret); 741 return (ret); 742 } 743 744 int 745 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 746 { 747 int clockrt, nrwake, op_ret, ret; 748 struct linux_pemuldata *pem; 749 struct waiting_proc *wp; 750 struct futex *f, *f2; 751 struct timespec uts, *ts; 752 int error, save; 753 uint32_t flags, val; 754 755 LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args); 756 757 if (args->op & LINUX_FUTEX_PRIVATE_FLAG) { 758 flags = 0; 759 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG; 760 } else 761 flags = FUTEX_SHARED; 762 763 /* 764 * Currently support for switching between CLOCK_MONOTONIC and 765 * CLOCK_REALTIME is not present. However Linux forbids the use of 766 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and 767 * FUTEX_WAIT_REQUEUE_PI. 768 */ 769 clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME; 770 args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME; 771 if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET && 772 args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) { 773 LIN_SDT_PROBE0(futex, linux_sys_futex, 774 unimplemented_clockswitch); 775 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 776 return (ENOSYS); 777 } 778 779 error = 0; 780 f = f2 = NULL; 781 782 switch (args->op) { 783 case LINUX_FUTEX_WAIT: 784 args->val3 = FUTEX_BITSET_MATCH_ANY; 785 /* FALLTHROUGH */ 786 787 case LINUX_FUTEX_WAIT_BITSET: 788 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr, 789 args->val, args->val3); 790 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", 791 args->uaddr, args->val, args->val3); 792 793 if (args->timeout != NULL) { 794 error = futex_copyin_timeout(args->op, args->timeout, 795 clockrt, &uts); 796 if (error) { 797 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 798 error); 799 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 800 return (error); 801 } 802 ts = &uts; 803 } else 804 ts = NULL; 805 806 retry0: 807 error = futex_get(args->uaddr, &wp, &f, 808 flags | FUTEX_CREATE_WP); 809 if (error) { 810 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 811 return (error); 812 } 813 814 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 815 if (error) { 816 futex_put(f, wp); 817 error = copyin(args->uaddr, &val, sizeof(val)); 818 if (error == 0) 819 goto retry0; 820 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 821 error); 822 LINUX_CTR1(sys_futex, "WAIT copyin failed %d", 823 error); 824 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 825 return (error); 826 } 827 if (val != args->val) { 828 LIN_SDT_PROBE4(futex, linux_sys_futex, 829 debug_wait_value_neq, args->uaddr, args->val, val, 830 args->val3); 831 LINUX_CTR3(sys_futex, 832 "WAIT uaddr %p val 0x%x != uval 0x%x", 833 args->uaddr, args->val, val); 834 futex_put(f, wp); 835 836 LIN_SDT_PROBE1(futex, linux_sys_futex, return, 837 EWOULDBLOCK); 838 return (EWOULDBLOCK); 839 } 840 841 error = futex_wait(f, wp, ts, args->val3); 842 break; 843 844 case LINUX_FUTEX_WAKE: 845 args->val3 = FUTEX_BITSET_MATCH_ANY; 846 /* FALLTHROUGH */ 847 848 case LINUX_FUTEX_WAKE_BITSET: 849 LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr, 850 args->val, args->val3); 851 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x", 852 args->uaddr, args->val, args->val3); 853 854 error = futex_get(args->uaddr, NULL, &f, 855 flags | FUTEX_DONTCREATE); 856 if (error) { 857 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 858 return (error); 859 } 860 861 if (f == NULL) { 862 td->td_retval[0] = 0; 863 864 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 865 return (error); 866 } 867 td->td_retval[0] = futex_wake(f, args->val, args->val3); 868 futex_put(f, NULL); 869 break; 870 871 case LINUX_FUTEX_CMP_REQUEUE: 872 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue, 873 args->uaddr, args->val, args->val3, args->uaddr2, 874 args->timeout); 875 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p " 876 "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x", 877 args->uaddr, args->val, args->val3, args->uaddr2, 878 args->timeout); 879 880 /* 881 * Linux allows this, we would not, it is an incorrect 882 * usage of declared ABI, so return EINVAL. 883 */ 884 if (args->uaddr == args->uaddr2) { 885 LIN_SDT_PROBE0(futex, linux_sys_futex, 886 invalid_cmp_requeue_use); 887 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 888 return (EINVAL); 889 } 890 891 retry1: 892 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 893 if (error) { 894 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 895 return (error); 896 } 897 898 /* 899 * To avoid deadlocks return EINVAL if second futex 900 * exists at this time. 901 * 902 * Glibc fall back to FUTEX_WAKE in case of any error 903 * returned by FUTEX_CMP_REQUEUE. 904 */ 905 error = futex_get(args->uaddr2, NULL, &f2, 906 flags | FUTEX_DONTEXISTS | FUTEX_DONTLOCK); 907 if (error) { 908 futex_put(f, NULL); 909 910 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 911 return (error); 912 } 913 futex_lock(f); 914 futex_lock(f2); 915 error = copyin_nofault(args->uaddr, &val, sizeof(val)); 916 if (error) { 917 futex_put(f2, NULL); 918 futex_put(f, NULL); 919 error = copyin(args->uaddr, &val, sizeof(val)); 920 if (error == 0) 921 goto retry1; 922 LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, 923 error); 924 LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d", 925 error); 926 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 927 return (error); 928 } 929 if (val != args->val3) { 930 LIN_SDT_PROBE2(futex, linux_sys_futex, 931 debug_cmp_requeue_value_neq, args->val, val); 932 LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x", 933 args->val, val); 934 futex_put(f2, NULL); 935 futex_put(f, NULL); 936 937 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN); 938 return (EAGAIN); 939 } 940 941 nrwake = (int)(unsigned long)args->timeout; 942 td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake); 943 futex_put(f2, NULL); 944 futex_put(f, NULL); 945 break; 946 947 case LINUX_FUTEX_WAKE_OP: 948 LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op, 949 args->uaddr, args->op, args->val, args->uaddr2, args->val3); 950 LINUX_CTR5(sys_futex, "WAKE_OP " 951 "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x", 952 args->uaddr, args->val, args->uaddr2, args->val3, 953 args->timeout); 954 955 retry2: 956 error = futex_get(args->uaddr, NULL, &f, flags | FUTEX_DONTLOCK); 957 if (error) { 958 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 959 return (error); 960 } 961 962 if (args->uaddr != args->uaddr2) 963 error = futex_get(args->uaddr2, NULL, &f2, 964 flags | FUTEX_DONTLOCK); 965 if (error) { 966 futex_put(f, NULL); 967 968 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 969 return (error); 970 } 971 futex_lock(f); 972 futex_lock(f2); 973 974 /* 975 * This function returns positive number as results and 976 * negative as errors 977 */ 978 save = vm_fault_disable_pagefaults(); 979 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 980 vm_fault_enable_pagefaults(save); 981 982 LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x", 983 args->uaddr, op_ret); 984 985 if (op_ret < 0) { 986 if (f2 != NULL) 987 futex_put(f2, NULL); 988 futex_put(f, NULL); 989 error = copyin(args->uaddr2, &val, sizeof(val)); 990 if (error == 0) 991 goto retry2; 992 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 993 return (error); 994 } 995 996 ret = futex_wake(f, args->val, args->val3); 997 998 if (op_ret > 0) { 999 op_ret = 0; 1000 nrwake = (int)(unsigned long)args->timeout; 1001 1002 if (f2 != NULL) 1003 op_ret += futex_wake(f2, nrwake, args->val3); 1004 else 1005 op_ret += futex_wake(f, nrwake, args->val3); 1006 ret += op_ret; 1007 1008 } 1009 if (f2 != NULL) 1010 futex_put(f2, NULL); 1011 futex_put(f, NULL); 1012 td->td_retval[0] = ret; 1013 break; 1014 1015 case LINUX_FUTEX_LOCK_PI: 1016 /* not yet implemented */ 1017 pem = pem_find(td->td_proc); 1018 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1019 linux_msg(td, 1020 "linux_sys_futex: " 1021 "unsupported futex_pi op\n"); 1022 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1023 LIN_SDT_PROBE0(futex, linux_sys_futex, 1024 unimplemented_lock_pi); 1025 } 1026 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1027 return (ENOSYS); 1028 1029 case LINUX_FUTEX_UNLOCK_PI: 1030 /* not yet implemented */ 1031 pem = pem_find(td->td_proc); 1032 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1033 linux_msg(td, 1034 "linux_sys_futex: " 1035 "unsupported futex_pi op\n"); 1036 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1037 LIN_SDT_PROBE0(futex, linux_sys_futex, 1038 unimplemented_unlock_pi); 1039 } 1040 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1041 return (ENOSYS); 1042 1043 case LINUX_FUTEX_TRYLOCK_PI: 1044 /* not yet implemented */ 1045 pem = pem_find(td->td_proc); 1046 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1047 linux_msg(td, 1048 "linux_sys_futex: " 1049 "unsupported futex_pi op\n"); 1050 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1051 LIN_SDT_PROBE0(futex, linux_sys_futex, 1052 unimplemented_trylock_pi); 1053 } 1054 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1055 return (ENOSYS); 1056 1057 case LINUX_FUTEX_REQUEUE: 1058 /* 1059 * Glibc does not use this operation since version 2.3.3, 1060 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation. 1061 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when 1062 * FUTEX_REQUEUE returned EINVAL. 1063 */ 1064 pem = pem_find(td->td_proc); 1065 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { 1066 linux_msg(td, 1067 "linux_sys_futex: " 1068 "unsupported futex_requeue op\n"); 1069 pem->flags |= LINUX_XDEPR_REQUEUEOP; 1070 LIN_SDT_PROBE0(futex, linux_sys_futex, 1071 deprecated_requeue); 1072 } 1073 1074 LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL); 1075 return (EINVAL); 1076 1077 case LINUX_FUTEX_WAIT_REQUEUE_PI: 1078 /* not yet implemented */ 1079 pem = pem_find(td->td_proc); 1080 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1081 linux_msg(td, 1082 "linux_sys_futex: " 1083 "unsupported futex_pi op\n"); 1084 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1085 LIN_SDT_PROBE0(futex, linux_sys_futex, 1086 unimplemented_wait_requeue_pi); 1087 } 1088 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1089 return (ENOSYS); 1090 1091 case LINUX_FUTEX_CMP_REQUEUE_PI: 1092 /* not yet implemented */ 1093 pem = pem_find(td->td_proc); 1094 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { 1095 linux_msg(td, 1096 "linux_sys_futex: " 1097 "unsupported futex_pi op\n"); 1098 pem->flags |= LINUX_XUNSUP_FUTEXPIOP; 1099 LIN_SDT_PROBE0(futex, linux_sys_futex, 1100 unimplemented_cmp_requeue_pi); 1101 } 1102 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1103 return (ENOSYS); 1104 1105 default: 1106 linux_msg(td, 1107 "linux_sys_futex: unknown op %d\n", args->op); 1108 LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation, 1109 args->op); 1110 LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); 1111 return (ENOSYS); 1112 } 1113 1114 LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); 1115 return (error); 1116 } 1117 1118 int 1119 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args) 1120 { 1121 struct linux_emuldata *em; 1122 1123 LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args); 1124 1125 if (args->len != sizeof(struct linux_robust_list_head)) { 1126 LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error); 1127 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL); 1128 return (EINVAL); 1129 } 1130 1131 em = em_find(td); 1132 em->robust_futexes = args->head; 1133 1134 LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); 1135 return (0); 1136 } 1137 1138 int 1139 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args) 1140 { 1141 struct linux_emuldata *em; 1142 struct linux_robust_list_head *head; 1143 l_size_t len = sizeof(struct linux_robust_list_head); 1144 struct thread *td2; 1145 int error = 0; 1146 1147 LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); 1148 1149 if (!args->pid) { 1150 em = em_find(td); 1151 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1152 head = em->robust_futexes; 1153 } else { 1154 td2 = tdfind(args->pid, -1); 1155 if (td2 == NULL) { 1156 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1157 ESRCH); 1158 return (ESRCH); 1159 } 1160 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { 1161 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1162 EPERM); 1163 PROC_UNLOCK(td2->td_proc); 1164 return (EPERM); 1165 } 1166 1167 em = em_find(td2); 1168 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); 1169 /* XXX: ptrace? */ 1170 if (priv_check(td, PRIV_CRED_SETUID) || 1171 priv_check(td, PRIV_CRED_SETEUID) || 1172 p_candebug(td, td2->td_proc)) { 1173 PROC_UNLOCK(td2->td_proc); 1174 1175 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, 1176 EPERM); 1177 return (EPERM); 1178 } 1179 head = em->robust_futexes; 1180 1181 PROC_UNLOCK(td2->td_proc); 1182 } 1183 1184 error = copyout(&len, args->len, sizeof(l_size_t)); 1185 if (error) { 1186 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1187 error); 1188 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT); 1189 return (EFAULT); 1190 } 1191 1192 error = copyout(&head, args->head, sizeof(head)); 1193 if (error) { 1194 LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error, 1195 error); 1196 } 1197 1198 LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error); 1199 return (error); 1200 } 1201 1202 static int 1203 handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, 1204 unsigned int pi) 1205 { 1206 uint32_t uval, nval, mval; 1207 struct futex *f; 1208 int error; 1209 1210 LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); 1211 1212 retry: 1213 error = copyin(uaddr, &uval, 4); 1214 if (error) { 1215 LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error); 1216 LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); 1217 return (EFAULT); 1218 } 1219 if ((uval & FUTEX_TID_MASK) == em->em_tid) { 1220 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1221 nval = casuword32(uaddr, uval, mval); 1222 1223 if (nval == -1) { 1224 LIN_SDT_PROBE1(futex, handle_futex_death, return, 1225 EFAULT); 1226 return (EFAULT); 1227 } 1228 1229 if (nval != uval) 1230 goto retry; 1231 1232 if (!pi && (uval & FUTEX_WAITERS)) { 1233 error = futex_get(uaddr, NULL, &f, 1234 FUTEX_DONTCREATE | FUTEX_SHARED); 1235 if (error) { 1236 LIN_SDT_PROBE1(futex, handle_futex_death, 1237 return, error); 1238 return (error); 1239 } 1240 if (f != NULL) { 1241 futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY); 1242 futex_put(f, NULL); 1243 } 1244 } 1245 } 1246 1247 LIN_SDT_PROBE1(futex, handle_futex_death, return, 0); 1248 return (0); 1249 } 1250 1251 static int 1252 fetch_robust_entry(struct linux_robust_list **entry, 1253 struct linux_robust_list **head, unsigned int *pi) 1254 { 1255 l_ulong uentry; 1256 int error; 1257 1258 LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi); 1259 1260 error = copyin((const void *)head, &uentry, sizeof(l_ulong)); 1261 if (error) { 1262 LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error); 1263 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT); 1264 return (EFAULT); 1265 } 1266 1267 *entry = (void *)(uentry & ~1UL); 1268 *pi = uentry & 1; 1269 1270 LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0); 1271 return (0); 1272 } 1273 1274 /* This walks the list of robust futexes releasing them. */ 1275 void 1276 release_futexes(struct thread *td, struct linux_emuldata *em) 1277 { 1278 struct linux_robust_list_head *head = NULL; 1279 struct linux_robust_list *entry, *next_entry, *pending; 1280 unsigned int limit = 2048, pi, next_pi, pip; 1281 l_long futex_offset; 1282 int rc, error; 1283 1284 LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); 1285 1286 head = em->robust_futexes; 1287 1288 if (head == NULL) { 1289 LIN_SDT_PROBE0(futex, release_futexes, return); 1290 return; 1291 } 1292 1293 if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) { 1294 LIN_SDT_PROBE0(futex, release_futexes, return); 1295 return; 1296 } 1297 1298 error = copyin(&head->futex_offset, &futex_offset, 1299 sizeof(futex_offset)); 1300 if (error) { 1301 LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error); 1302 LIN_SDT_PROBE0(futex, release_futexes, return); 1303 return; 1304 } 1305 1306 if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) { 1307 LIN_SDT_PROBE0(futex, release_futexes, return); 1308 return; 1309 } 1310 1311 while (entry != &head->list) { 1312 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); 1313 1314 if (entry != pending) 1315 if (handle_futex_death(em, 1316 (uint32_t *)((caddr_t)entry + futex_offset), pi)) { 1317 LIN_SDT_PROBE0(futex, release_futexes, return); 1318 return; 1319 } 1320 if (rc) { 1321 LIN_SDT_PROBE0(futex, release_futexes, return); 1322 return; 1323 } 1324 1325 entry = next_entry; 1326 pi = next_pi; 1327 1328 if (!--limit) 1329 break; 1330 1331 sched_relinquish(curthread); 1332 } 1333 1334 if (pending) 1335 handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); 1336 1337 LIN_SDT_PROBE0(futex, release_futexes, return); 1338 } 1339