1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/queue.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/sx.h> 51 #include <sys/malloc.h> 52 53 #ifdef COMPAT_LINUX32 54 #include <machine/../linux32/linux.h> 55 #include <machine/../linux32/linux32_proto.h> 56 #else 57 #include <machine/../linux/linux.h> 58 #include <machine/../linux/linux_proto.h> 59 #endif 60 #include <compat/linux/linux_futex.h> 61 62 struct futex; 63 64 struct waiting_proc { 65 struct thread *wp_t; 66 struct futex *wp_new_futex; 67 TAILQ_ENTRY(waiting_proc) wp_list; 68 }; 69 struct futex { 70 void *f_uaddr; 71 int f_refcount; 72 LIST_ENTRY(futex) f_list; 73 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 74 }; 75 76 LIST_HEAD(futex_list, futex) futex_list; 77 struct sx futex_sx; /* this protects the LIST of futexes */ 78 79 #define FUTEX_LOCK sx_xlock(&futex_sx) 80 #define FUTEX_UNLOCK sx_xunlock(&futex_sx) 81 82 #define FUTEX_LOCKED 1 83 #define FUTEX_UNLOCKED 0 84 85 #define FUTEX_SYSTEM_LOCK mtx_lock(&Giant) 86 #define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant) 87 88 static struct futex *futex_get(void *, int); 89 static void futex_put(struct futex *); 90 static int futex_sleep(struct futex *, struct thread *, unsigned long); 91 static int futex_wake(struct futex *, int, struct futex *, int); 92 static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr); 93 94 /* support.s */ 95 int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); 96 int futex_addl(int oparg, caddr_t uaddr, int *oldval); 97 int futex_orl(int oparg, caddr_t uaddr, int *oldval); 98 int futex_andl(int oparg, caddr_t uaddr, int *oldval); 99 int futex_xorl(int oparg, caddr_t uaddr, int *oldval); 100 101 int 102 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 103 { 104 int val; 105 int ret; 106 struct l_timespec timeout = {0, 0}; 107 int error = 0; 108 struct futex *f; 109 struct futex *newf; 110 int timeout_hz; 111 struct timeval tv = {0, 0}; 112 struct futex *f2; 113 int op_ret; 114 115 #ifdef DEBUG 116 if (ldebug(sys_futex)) 117 printf(ARGS(futex, "%p, %i, %i, *, %p, %i"), args->uaddr, args->op, 118 args->val, args->uaddr2, args->val3); 119 #endif 120 121 /* 122 * Our implementation provides only privates futexes. Most of the apps 123 * should use private futexes but don't claim so. Therefore we treat 124 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works 125 * in most cases (ie. when futexes are not shared on file descriptor 126 * or between different processes.). 127 */ 128 args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG); 129 130 switch (args->op) { 131 case LINUX_FUTEX_WAIT: 132 FUTEX_SYSTEM_LOCK; 133 134 if ((error = copyin(args->uaddr, 135 &val, sizeof(val))) != 0) { 136 FUTEX_SYSTEM_UNLOCK; 137 return error; 138 } 139 140 if (val != args->val) { 141 FUTEX_SYSTEM_UNLOCK; 142 return EWOULDBLOCK; 143 } 144 145 if (args->timeout != NULL) { 146 if ((error = copyin(args->timeout, 147 &timeout, sizeof(timeout))) != 0) { 148 FUTEX_SYSTEM_UNLOCK; 149 return error; 150 } 151 } 152 153 #ifdef DEBUG 154 if (ldebug(sys_futex)) 155 printf("FUTEX_WAIT %d: val = %d, uaddr = %p, " 156 "*uaddr = %d, timeout = %d.%09lu\n", 157 td->td_proc->p_pid, args->val, 158 args->uaddr, val, timeout.tv_sec, 159 (unsigned long)timeout.tv_nsec); 160 #endif 161 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 162 timeout_hz = tvtohz(&tv); 163 164 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 165 timeout_hz = 0; 166 /* 167 * If the user process requests a non null timeout, 168 * make sure we do not turn it into an infinite 169 * timeout because timeout_hz gets null. 170 * 171 * We use a minimal timeout of 1/hz. Maybe it would 172 * make sense to just return ETIMEDOUT without sleeping. 173 */ 174 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 175 (timeout_hz == 0)) 176 timeout_hz = 1; 177 178 179 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 180 ret = futex_sleep(f, td, timeout_hz); 181 futex_put(f); 182 183 #ifdef DEBUG 184 if (ldebug(sys_futex)) 185 printf("FUTEX_WAIT %d: uaddr = %p, " 186 "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret); 187 #endif 188 189 FUTEX_SYSTEM_UNLOCK; 190 switch (ret) { 191 case EWOULDBLOCK: /* timeout */ 192 return ETIMEDOUT; 193 break; 194 case EINTR: /* signal */ 195 return EINTR; 196 break; 197 case 0: /* FUTEX_WAKE received */ 198 #ifdef DEBUG 199 if (ldebug(sys_futex)) 200 printf("FUTEX_WAIT %d: uaddr = %p, " 201 "got FUTEX_WAKE\n", 202 td->td_proc->p_pid, args->uaddr); 203 #endif 204 return 0; 205 break; 206 default: 207 #ifdef DEBUG 208 if (ldebug(sys_futex)) 209 printf("FUTEX_WAIT: unexpected ret = %d\n", 210 ret); 211 #endif 212 break; 213 } 214 215 /* NOTREACHED */ 216 break; 217 218 case LINUX_FUTEX_WAKE: 219 FUTEX_SYSTEM_LOCK; 220 221 /* 222 * XXX: Linux is able to cope with different addresses 223 * corresponding to the same mapped memory in the sleeping 224 * and waker process(es). 225 */ 226 #ifdef DEBUG 227 if (ldebug(sys_futex)) 228 printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n", 229 td->td_proc->p_pid, args->uaddr, args->val); 230 #endif 231 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 232 td->td_retval[0] = futex_wake(f, args->val, NULL, 0); 233 futex_put(f); 234 235 FUTEX_SYSTEM_UNLOCK; 236 break; 237 238 case LINUX_FUTEX_CMP_REQUEUE: 239 FUTEX_SYSTEM_LOCK; 240 241 if ((error = copyin(args->uaddr, 242 &val, sizeof(val))) != 0) { 243 FUTEX_SYSTEM_UNLOCK; 244 return error; 245 } 246 247 if (val != args->val3) { 248 FUTEX_SYSTEM_UNLOCK; 249 return EAGAIN; 250 } 251 252 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 253 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 254 td->td_retval[0] = futex_wake(f, args->val, newf, 255 (int)(unsigned long)args->timeout); 256 futex_put(f); 257 futex_put(newf); 258 259 FUTEX_SYSTEM_UNLOCK; 260 break; 261 262 case LINUX_FUTEX_REQUEUE: 263 FUTEX_SYSTEM_LOCK; 264 265 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 266 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 267 td->td_retval[0] = futex_wake(f, args->val, newf, 268 (int)(unsigned long)args->timeout); 269 futex_put(f); 270 futex_put(newf); 271 272 FUTEX_SYSTEM_UNLOCK; 273 break; 274 275 case LINUX_FUTEX_FD: 276 #ifdef DEBUG 277 printf("linux_sys_futex: unimplemented op %d\n", 278 args->op); 279 #endif 280 return (ENOSYS); 281 282 case LINUX_FUTEX_WAKE_OP: 283 FUTEX_SYSTEM_LOCK; 284 #ifdef DEBUG 285 if (ldebug(sys_futex)) 286 printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, " 287 "val = %x, uaddr2 = %p, val3 = %x\n", 288 td->td_proc->p_pid, args->uaddr, args->op, 289 args->val, args->uaddr2, args->val3); 290 #endif 291 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 292 f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED); 293 294 /* 295 * This function returns positive number as results and 296 * negative as errors 297 */ 298 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 299 #ifdef DEBUG 300 if (ldebug(sys_futex)) 301 printf("futex_atomic_op ret %d\n", op_ret); 302 #endif 303 if (op_ret < 0) { 304 /* XXX: We don't handle the EFAULT yet. */ 305 if (op_ret != -EFAULT) { 306 futex_put(f); 307 futex_put(f2); 308 FUTEX_SYSTEM_UNLOCK; 309 return (-op_ret); 310 } 311 312 futex_put(f); 313 futex_put(f2); 314 315 FUTEX_SYSTEM_UNLOCK; 316 return (EFAULT); 317 } 318 319 ret = futex_wake(f, args->val, NULL, 0); 320 futex_put(f); 321 if (op_ret > 0) { 322 op_ret = 0; 323 /* 324 * Linux abuses the address of the timespec parameter 325 * as the number of retries. 326 */ 327 op_ret += futex_wake(f2, 328 (int)(unsigned long)args->timeout, NULL, 0); 329 ret += op_ret; 330 } 331 futex_put(f2); 332 td->td_retval[0] = ret; 333 334 FUTEX_SYSTEM_UNLOCK; 335 break; 336 337 case LINUX_FUTEX_LOCK_PI: 338 /* not yet implemented */ 339 return (ENOSYS); 340 341 case LINUX_FUTEX_UNLOCK_PI: 342 /* not yet implemented */ 343 return (ENOSYS); 344 345 case LINUX_FUTEX_TRYLOCK_PI: 346 /* not yet implemented */ 347 return (ENOSYS); 348 349 default: 350 printf("linux_sys_futex: unknown op %d\n", 351 args->op); 352 return (ENOSYS); 353 } 354 return (0); 355 } 356 357 static struct futex * 358 futex_get(void *uaddr, int locked) 359 { 360 struct futex *f; 361 362 if (locked == FUTEX_UNLOCKED) 363 FUTEX_LOCK; 364 LIST_FOREACH(f, &futex_list, f_list) { 365 if (f->f_uaddr == uaddr) { 366 f->f_refcount++; 367 if (locked == FUTEX_UNLOCKED) 368 FUTEX_UNLOCK; 369 return f; 370 } 371 } 372 373 f = malloc(sizeof(*f), M_LINUX, M_WAITOK); 374 f->f_uaddr = uaddr; 375 f->f_refcount = 1; 376 TAILQ_INIT(&f->f_waiting_proc); 377 LIST_INSERT_HEAD(&futex_list, f, f_list); 378 if (locked == FUTEX_UNLOCKED) 379 FUTEX_UNLOCK; 380 381 return f; 382 } 383 384 static void 385 futex_put(f) 386 struct futex *f; 387 { 388 FUTEX_LOCK; 389 f->f_refcount--; 390 if (f->f_refcount == 0) { 391 LIST_REMOVE(f, f_list); 392 free(f, M_LINUX); 393 } 394 FUTEX_UNLOCK; 395 396 return; 397 } 398 399 static int 400 futex_sleep(struct futex *f, struct thread *td, unsigned long timeout) 401 { 402 struct waiting_proc *wp; 403 int ret; 404 405 wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK); 406 wp->wp_t = td; 407 wp->wp_new_futex = NULL; 408 FUTEX_LOCK; 409 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); 410 FUTEX_UNLOCK; 411 412 #ifdef DEBUG 413 if (ldebug(sys_futex)) 414 printf("FUTEX --> %d tlseep timeout = %ld\n", 415 td->td_proc->p_pid, timeout); 416 #endif 417 ret = tsleep(wp, PCATCH | PZERO, "linuxfutex", timeout); 418 #ifdef DEBUG 419 if (ldebug(sys_futex)) 420 printf("FUTEX -> %d tsleep returns %d\n", 421 td->td_proc->p_pid, ret); 422 #endif 423 424 FUTEX_LOCK; 425 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 426 FUTEX_UNLOCK; 427 428 /* if we got woken up in futex_wake */ 429 if ((ret == 0) && (wp->wp_new_futex != NULL)) { 430 /* suspend us on the new futex */ 431 ret = futex_sleep(wp->wp_new_futex, td, timeout); 432 /* and release the old one */ 433 futex_put(wp->wp_new_futex); 434 } 435 436 free(wp, M_LINUX); 437 438 return ret; 439 } 440 441 static int 442 futex_wake(struct futex *f, int n, struct futex *newf, int n2) 443 { 444 struct waiting_proc *wp; 445 int count; 446 447 /* 448 * Linux is very strange it wakes up N threads for 449 * all operations BUT requeue ones where its N+1 450 * mimic this. 451 */ 452 count = newf ? 0 : 1; 453 454 FUTEX_LOCK; 455 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { 456 if (count <= n) { 457 wakeup_one(wp); 458 count++; 459 } else { 460 if (newf != NULL) { 461 /* futex_put called after tsleep */ 462 wp->wp_new_futex = futex_get(newf->f_uaddr, 463 FUTEX_LOCKED); 464 wakeup_one(wp); 465 if (count - n >= n2) 466 break; 467 } 468 } 469 } 470 FUTEX_UNLOCK; 471 472 return count; 473 } 474 475 static int 476 futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr) 477 { 478 int op = (encoded_op >> 28) & 7; 479 int cmp = (encoded_op >> 24) & 15; 480 int oparg = (encoded_op << 8) >> 20; 481 int cmparg = (encoded_op << 20) >> 20; 482 int oldval = 0, ret; 483 484 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 485 oparg = 1 << oparg; 486 487 #ifdef DEBUG 488 if (ldebug(sys_futex)) 489 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %x, " 490 "cmparg = %x, uaddr = %p\n", 491 op, cmp, oparg, cmparg, uaddr); 492 #endif 493 /* XXX: linux verifies access here and returns EFAULT */ 494 495 switch (op) { 496 case FUTEX_OP_SET: 497 ret = futex_xchgl(oparg, uaddr, &oldval); 498 break; 499 case FUTEX_OP_ADD: 500 ret = futex_addl(oparg, uaddr, &oldval); 501 break; 502 case FUTEX_OP_OR: 503 ret = futex_orl(oparg, uaddr, &oldval); 504 break; 505 case FUTEX_OP_ANDN: 506 ret = futex_andl(~oparg, uaddr, &oldval); 507 break; 508 case FUTEX_OP_XOR: 509 ret = futex_xorl(oparg, uaddr, &oldval); 510 break; 511 default: 512 ret = -ENOSYS; 513 break; 514 } 515 516 if (ret) 517 return (ret); 518 519 switch (cmp) { 520 case FUTEX_OP_CMP_EQ: 521 return (oldval == cmparg); 522 case FUTEX_OP_CMP_NE: 523 return (oldval != cmparg); 524 case FUTEX_OP_CMP_LT: 525 return (oldval < cmparg); 526 case FUTEX_OP_CMP_GE: 527 return (oldval >= cmparg); 528 case FUTEX_OP_CMP_LE: 529 return (oldval <= cmparg); 530 case FUTEX_OP_CMP_GT: 531 return (oldval > cmparg); 532 default: 533 return (-ENOSYS); 534 } 535 } 536