1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/queue.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/sx.h> 51 #include <sys/malloc.h> 52 53 #ifdef COMPAT_LINUX32 54 #include <machine/../linux32/linux.h> 55 #include <machine/../linux32/linux32_proto.h> 56 #else 57 #include <machine/../linux/linux.h> 58 #include <machine/../linux/linux_proto.h> 59 #endif 60 #include <compat/linux/linux_futex.h> 61 62 struct futex; 63 64 struct waiting_proc { 65 struct thread *wp_t; 66 struct futex *wp_new_futex; 67 TAILQ_ENTRY(waiting_proc) wp_list; 68 }; 69 struct futex { 70 void *f_uaddr; 71 int f_refcount; 72 LIST_ENTRY(futex) f_list; 73 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 74 }; 75 76 LIST_HEAD(futex_list, futex) futex_list; 77 struct sx futex_sx; /* this protects the LIST of futexes */ 78 79 #define FUTEX_LOCK sx_xlock(&futex_sx) 80 #define FUTEX_UNLOCK sx_xunlock(&futex_sx) 81 82 #define FUTEX_LOCKED 1 83 #define FUTEX_UNLOCKED 0 84 85 #define FUTEX_SYSTEM_LOCK mtx_lock(&Giant) 86 #define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant) 87 88 static struct futex *futex_get(void *, int); 89 static void futex_put(struct futex *); 90 static int futex_sleep(struct futex *, struct thread *, unsigned long); 91 static int futex_wake(struct futex *, int, struct futex *, int); 92 static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr); 93 static int futex_orl(int oparg, caddr_t uaddr, int *oldval); 94 static int futex_andl(int oparg, caddr_t uaddr, int *oldval); 95 static int futex_xorl(int oparg, caddr_t uaddr, int *oldval); 96 97 /* support.s */ 98 int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); 99 int futex_addl(int oparg, caddr_t uaddr, int *oldval); 100 101 int 102 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 103 { 104 int val; 105 int ret; 106 struct l_timespec timeout = {0, 0}; 107 int error = 0; 108 struct futex *f; 109 struct futex *newf; 110 int timeout_hz; 111 struct timeval tv = {0, 0}; 112 struct futex *f2; 113 int op_ret; 114 115 #ifdef DEBUG 116 if (ldebug(sys_futex)) 117 printf(ARGS(futex, "%p, %i, %i"), args->uaddr, args->op, 118 args->val); 119 #endif 120 121 switch (args->op) { 122 case LINUX_FUTEX_WAIT: 123 FUTEX_SYSTEM_LOCK; 124 125 if ((error = copyin(args->uaddr, 126 &val, sizeof(val))) != 0) { 127 FUTEX_SYSTEM_UNLOCK; 128 return error; 129 } 130 131 if (val != args->val) { 132 FUTEX_SYSTEM_UNLOCK; 133 return EWOULDBLOCK; 134 } 135 136 if (args->timeout != NULL) { 137 if ((error = copyin(args->timeout, 138 &timeout, sizeof(timeout))) != 0) { 139 FUTEX_SYSTEM_UNLOCK; 140 return error; 141 } 142 } 143 144 #ifdef DEBUG 145 if (ldebug(sys_futex)) 146 printf("FUTEX_WAIT %d: val = %d, uaddr = %p, " 147 "*uaddr = %d, timeout = %d.%09lu\n", 148 td->td_proc->p_pid, args->val, 149 args->uaddr, val, timeout.tv_sec, 150 (unsigned long)timeout.tv_nsec); 151 #endif 152 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 153 timeout_hz = tvtohz(&tv); 154 155 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 156 timeout_hz = 0; 157 /* 158 * If the user process requests a non null timeout, 159 * make sure we do not turn it into an infinite 160 * timeout because timeout_hz gets null. 161 * 162 * We use a minimal timeout of 1/hz. Maybe it would 163 * make sense to just return ETIMEDOUT without sleeping. 164 */ 165 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 166 (timeout_hz == 0)) 167 timeout_hz = 1; 168 169 170 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 171 ret = futex_sleep(f, td, timeout_hz); 172 futex_put(f); 173 174 #ifdef DEBUG 175 if (ldebug(sys_futex)) 176 printf("FUTEX_WAIT %d: uaddr = %p, " 177 "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret); 178 #endif 179 180 FUTEX_SYSTEM_UNLOCK; 181 switch (ret) { 182 case EWOULDBLOCK: /* timeout */ 183 return ETIMEDOUT; 184 break; 185 case EINTR: /* signal */ 186 return EINTR; 187 break; 188 case 0: /* FUTEX_WAKE received */ 189 #ifdef DEBUG 190 if (ldebug(sys_futex)) 191 printf("FUTEX_WAIT %d: uaddr = %p, " 192 "got FUTEX_WAKE\n", 193 td->td_proc->p_pid, args->uaddr); 194 #endif 195 return 0; 196 break; 197 default: 198 #ifdef DEBUG 199 if (ldebug(sys_futex)) 200 printf("FUTEX_WAIT: unexpected ret = %d\n", 201 ret); 202 #endif 203 break; 204 } 205 206 /* NOTREACHED */ 207 break; 208 209 case LINUX_FUTEX_WAKE: 210 FUTEX_SYSTEM_LOCK; 211 212 /* 213 * XXX: Linux is able to cope with different addresses 214 * corresponding to the same mapped memory in the sleeping 215 * and waker process(es). 216 */ 217 #ifdef DEBUG 218 if (ldebug(sys_futex)) 219 printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n", 220 td->td_proc->p_pid, args->uaddr, args->val); 221 #endif 222 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 223 td->td_retval[0] = futex_wake(f, args->val, NULL, 0); 224 futex_put(f); 225 226 FUTEX_SYSTEM_UNLOCK; 227 break; 228 229 case LINUX_FUTEX_CMP_REQUEUE: 230 FUTEX_SYSTEM_LOCK; 231 232 if ((error = copyin(args->uaddr, 233 &val, sizeof(val))) != 0) { 234 FUTEX_SYSTEM_UNLOCK; 235 return error; 236 } 237 238 if (val != args->val3) { 239 FUTEX_SYSTEM_UNLOCK; 240 return EAGAIN; 241 } 242 243 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 244 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 245 td->td_retval[0] = futex_wake(f, args->val, newf, 246 (int)(unsigned long)args->timeout); 247 futex_put(f); 248 futex_put(newf); 249 250 FUTEX_SYSTEM_UNLOCK; 251 break; 252 253 case LINUX_FUTEX_REQUEUE: 254 FUTEX_SYSTEM_LOCK; 255 256 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 257 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 258 td->td_retval[0] = futex_wake(f, args->val, newf, 259 (int)(unsigned long)args->timeout); 260 futex_put(f); 261 futex_put(newf); 262 263 FUTEX_SYSTEM_UNLOCK; 264 break; 265 266 case LINUX_FUTEX_FD: 267 /* XXX: Linux plans to remove this operation */ 268 printf("linux_sys_futex: unimplemented op %d\n", 269 args->op); 270 break; 271 272 case LINUX_FUTEX_WAKE_OP: 273 FUTEX_SYSTEM_LOCK; 274 #ifdef DEBUG 275 if (ldebug(sys_futex)) 276 printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, " 277 "val = %d, uaddr2 = %p, val3 = %d\n", 278 td->td_proc->p_pid, args->uaddr, args->op, 279 args->val, args->uaddr2, args->val3); 280 #endif 281 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 282 f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED); 283 284 /* 285 * This function returns positive number as results and 286 * negative as errors 287 */ 288 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 289 if (op_ret < 0) { 290 291 /* XXX: We don't handle the EFAULT yet. */ 292 if (op_ret != -EFAULT) { 293 futex_put(f); 294 futex_put(f2); 295 FUTEX_SYSTEM_UNLOCK; 296 return (-op_ret); 297 } 298 299 futex_put(f); 300 futex_put(f2); 301 302 FUTEX_SYSTEM_UNLOCK; 303 return (EFAULT); 304 305 } 306 307 ret = futex_wake(f, args->val, NULL, 0); 308 futex_put(f); 309 if (op_ret > 0) { 310 op_ret = 0; 311 /* 312 * Linux abuses the address of the timespec parameter 313 * as the number of retries. 314 */ 315 op_ret += futex_wake(f2, 316 (int)(unsigned long)args->timeout, NULL, 0); 317 ret += op_ret; 318 } 319 futex_put(f2); 320 td->td_retval[0] = ret; 321 322 FUTEX_SYSTEM_UNLOCK; 323 break; 324 325 default: 326 printf("linux_sys_futex: unknown op %d\n", 327 args->op); 328 break; 329 } 330 return 0; 331 } 332 333 static struct futex * 334 futex_get(void *uaddr, int locked) 335 { 336 struct futex *f; 337 338 if (locked == FUTEX_UNLOCKED) 339 FUTEX_LOCK; 340 LIST_FOREACH(f, &futex_list, f_list) { 341 if (f->f_uaddr == uaddr) { 342 f->f_refcount++; 343 if (locked == FUTEX_UNLOCKED) 344 FUTEX_UNLOCK; 345 return f; 346 } 347 } 348 349 f = malloc(sizeof(*f), M_LINUX, M_WAITOK); 350 f->f_uaddr = uaddr; 351 f->f_refcount = 1; 352 TAILQ_INIT(&f->f_waiting_proc); 353 LIST_INSERT_HEAD(&futex_list, f, f_list); 354 if (locked == FUTEX_UNLOCKED) 355 FUTEX_UNLOCK; 356 357 return f; 358 } 359 360 static void 361 futex_put(f) 362 struct futex *f; 363 { 364 FUTEX_LOCK; 365 f->f_refcount--; 366 if (f->f_refcount == 0) { 367 LIST_REMOVE(f, f_list); 368 free(f, M_LINUX); 369 } 370 FUTEX_UNLOCK; 371 372 return; 373 } 374 375 static int 376 futex_sleep(struct futex *f, struct thread *td, unsigned long timeout) 377 { 378 struct waiting_proc *wp; 379 int ret; 380 381 wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK); 382 wp->wp_t = td; 383 wp->wp_new_futex = NULL; 384 FUTEX_LOCK; 385 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); 386 FUTEX_UNLOCK; 387 388 #ifdef DEBUG 389 if (ldebug(sys_futex)) 390 printf("FUTEX --> %d tlseep timeout = %ld\n", 391 td->td_proc->p_pid, timeout); 392 #endif 393 ret = tsleep(wp, PCATCH | PZERO, "linuxfutex", timeout); 394 #ifdef DEBUG 395 if (ldebug(sys_futex)) 396 printf("FUTEX -> %d tsleep returns %d\n", 397 td->td_proc->p_pid, ret); 398 #endif 399 400 FUTEX_LOCK; 401 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 402 FUTEX_UNLOCK; 403 404 if ((ret == 0) && (wp->wp_new_futex != NULL)) { 405 ret = futex_sleep(wp->wp_new_futex, td, timeout); 406 futex_put(wp->wp_new_futex); /* futex_get called in wakeup */ 407 } 408 409 free(wp, M_LINUX); 410 411 return ret; 412 } 413 414 static int 415 futex_wake(struct futex *f, int n, struct futex *newf, int n2) 416 { 417 struct waiting_proc *wp; 418 int count; 419 420 /* 421 * Linux is very strange it wakes up N threads for 422 * all operations BUT requeue ones where its N+1 423 * mimic this. 424 */ 425 count = newf ? 0 : 1; 426 427 FUTEX_LOCK; 428 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { 429 if (count <= n) { 430 wakeup_one(wp); 431 count++; 432 } else { 433 if (newf != NULL) { 434 /* futex_put called after tsleep */ 435 wp->wp_new_futex = futex_get(newf->f_uaddr, 436 FUTEX_LOCKED); 437 wakeup_one(wp); 438 if (count - n >= n2) 439 break; 440 } 441 } 442 } 443 FUTEX_UNLOCK; 444 445 return count; 446 } 447 448 static int 449 futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr) 450 { 451 int op = (encoded_op >> 28) & 7; 452 int cmp = (encoded_op >> 24) & 15; 453 int oparg = (encoded_op << 8) >> 20; 454 int cmparg = (encoded_op << 20) >> 20; 455 int oldval = 0, ret; 456 457 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 458 oparg = 1 << oparg; 459 460 #ifdef DEBUG 461 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %d, cmparg = %d, " 462 "uaddr = %p\n", op, cmp, oparg, cmparg, uaddr); 463 #endif 464 /* XXX: linux verifies access here and returns EFAULT */ 465 466 switch (op) { 467 case FUTEX_OP_SET: 468 ret = futex_xchgl(oparg, uaddr, &oldval); 469 break; 470 case FUTEX_OP_ADD: 471 ret = futex_addl(oparg, uaddr, &oldval); 472 break; 473 case FUTEX_OP_OR: 474 ret = futex_orl(oparg, uaddr, &oldval); 475 break; 476 case FUTEX_OP_ANDN: 477 ret = futex_andl(~oparg, uaddr, &oldval); 478 break; 479 case FUTEX_OP_XOR: 480 ret = futex_xorl(oparg, uaddr, &oldval); 481 break; 482 default: 483 ret = -ENOSYS; 484 } 485 486 if (!ret) 487 switch (cmp) { 488 case FUTEX_OP_CMP_EQ: 489 ret = (oldval == cmparg); 490 break; 491 case FUTEX_OP_CMP_NE: 492 ret = (oldval != cmparg); 493 break; 494 case FUTEX_OP_CMP_LT: 495 ret = (oldval < cmparg); 496 break; 497 case FUTEX_OP_CMP_GE: 498 ret = (oldval >= cmparg); 499 break; 500 case FUTEX_OP_CMP_LE: 501 ret = (oldval <= cmparg); 502 break; 503 case FUTEX_OP_CMP_GT: 504 ret = (oldval > cmparg); 505 break; 506 default: 507 ret = -ENOSYS; 508 } 509 510 return (ret); 511 } 512 513 static int 514 futex_orl(int oparg, caddr_t uaddr, int *oldval) 515 { 516 uint32_t ua, ua_old; 517 518 for (;;) { 519 ua = ua_old = fuword32(uaddr); 520 ua |= oparg; 521 if (casuword32((void *)uaddr, ua_old, ua) == ua_old) 522 return ua_old; 523 } 524 } 525 526 static int 527 futex_andl(int oparg, caddr_t uaddr, int *oldval) 528 { 529 uint32_t ua, ua_old; 530 531 for (;;) { 532 ua = ua_old = fuword32(uaddr); 533 ua &= oparg; 534 if (casuword32((void *)uaddr, ua_old, ua) == ua_old) 535 return ua_old; 536 } 537 } 538 539 static int 540 futex_xorl(int oparg, caddr_t uaddr, int *oldval) 541 { 542 uint32_t ua, ua_old; 543 544 for (;;) { 545 ua = ua_old = fuword32(uaddr); 546 ua ^= oparg; 547 if (casuword32((void *)uaddr, ua_old, ua) == ua_old) 548 return ua_old; 549 } 550 } 551