1 /* $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/queue.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/sx.h> 51 #include <sys/malloc.h> 52 53 #ifdef COMPAT_LINUX32 54 #include <machine/../linux32/linux.h> 55 #include <machine/../linux32/linux32_proto.h> 56 #else 57 #include <machine/../linux/linux.h> 58 #include <machine/../linux/linux_proto.h> 59 #endif 60 #include <compat/linux/linux_futex.h> 61 62 struct futex; 63 64 struct waiting_proc { 65 struct thread *wp_t; 66 struct futex *wp_new_futex; 67 TAILQ_ENTRY(waiting_proc) wp_list; 68 }; 69 struct futex { 70 void *f_uaddr; 71 int f_refcount; 72 LIST_ENTRY(futex) f_list; 73 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 74 }; 75 76 LIST_HEAD(futex_list, futex) futex_list; 77 struct sx futex_sx; /* this protects the LIST of futexes */ 78 79 #define FUTEX_LOCK sx_xlock(&futex_sx) 80 #define FUTEX_UNLOCK sx_xunlock(&futex_sx) 81 82 #define FUTEX_LOCKED 1 83 #define FUTEX_UNLOCKED 0 84 85 #define FUTEX_SYSTEM_LOCK mtx_lock(&Giant) 86 #define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant) 87 88 static struct futex *futex_get(void *, int); 89 static void futex_put(struct futex *); 90 static int futex_sleep(struct futex *, struct thread *, unsigned long); 91 static int futex_wake(struct futex *, int, struct futex *); 92 #ifdef __i386__ 93 static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr); 94 #endif 95 96 /* support.s */ 97 int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); 98 int futex_addl(int oparg, caddr_t uaddr, int *oldval); 99 int futex_orl(int oparg, caddr_t uaddr, int *oldval); 100 int futex_andnl(int oparg, caddr_t uaddr, int *oldval); 101 int futex_xorl(int oparg, caddr_t uaddr, int *oldval); 102 103 int 104 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 105 { 106 int val; 107 int ret; 108 struct l_timespec timeout = {0, 0}; 109 int error = 0; 110 struct futex *f; 111 struct futex *newf; 112 int timeout_hz; 113 struct timeval tv = {0, 0}; 114 #ifdef __i386__ 115 struct futex *f2; 116 int op_ret; 117 #endif 118 119 #ifdef DEBUG 120 if (ldebug(sys_futex)) 121 printf(ARGS(futex, "%p, %i, %i"), args->uaddr, args->op, 122 args->val); 123 #endif 124 125 switch (args->op) { 126 case LINUX_FUTEX_WAIT: 127 FUTEX_SYSTEM_LOCK; 128 129 if ((error = copyin(args->uaddr, 130 &val, sizeof(val))) != 0) { 131 FUTEX_SYSTEM_UNLOCK; 132 return error; 133 } 134 135 if (val != args->val) { 136 FUTEX_SYSTEM_UNLOCK; 137 return EWOULDBLOCK; 138 } 139 140 if (args->timeout != NULL) { 141 if ((error = copyin(args->timeout, 142 &timeout, sizeof(timeout))) != 0) { 143 FUTEX_SYSTEM_UNLOCK; 144 return error; 145 } 146 } 147 148 #ifdef DEBUG 149 if (ldebug(sys_futex)) 150 printf("FUTEX_WAIT %d: val = %d, uaddr = %p, " 151 "*uaddr = %d, timeout = %d.%09lu\n", 152 td->td_proc->p_pid, args->val, 153 args->uaddr, val, timeout.tv_sec, 154 (unsigned long)timeout.tv_nsec); 155 #endif 156 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 157 timeout_hz = tvtohz(&tv); 158 159 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 160 timeout_hz = 0; 161 /* 162 * If the user process requests a non null timeout, 163 * make sure we do not turn it into an infinite 164 * timeout because timeout_hz gets null. 165 * 166 * We use a minimal timeout of 1/hz. Maybe it would 167 * make sense to just return ETIMEDOUT without sleeping. 168 */ 169 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 170 (timeout_hz == 0)) 171 timeout_hz = 1; 172 173 174 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 175 ret = futex_sleep(f, td, timeout_hz); 176 futex_put(f); 177 178 #ifdef DEBUG 179 if (ldebug(sys_futex)) 180 printf("FUTEX_WAIT %d: uaddr = %p, " 181 "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret); 182 #endif 183 184 FUTEX_SYSTEM_UNLOCK; 185 switch (ret) { 186 case EWOULDBLOCK: /* timeout */ 187 return ETIMEDOUT; 188 break; 189 case EINTR: /* signal */ 190 return EINTR; 191 break; 192 case 0: /* FUTEX_WAKE received */ 193 #ifdef DEBUG 194 if (ldebug(sys_futex)) 195 printf("FUTEX_WAIT %d: uaddr = %p, got FUTEX_WAKE\n", 196 td->td_proc->p_pid, args->uaddr); 197 #endif 198 return 0; 199 break; 200 default: 201 #ifdef DEBUG 202 if (ldebug(sys_futex)) 203 printf("FUTEX_WAIT: unexpected ret = %d\n", ret); 204 #endif 205 break; 206 } 207 208 /* NOTREACHED */ 209 break; 210 211 case LINUX_FUTEX_WAKE: 212 FUTEX_SYSTEM_LOCK; 213 214 /* 215 * XXX: Linux is able cope with different addresses 216 * corresponding to the same mapped memory in the sleeping 217 * and the waker process. 218 */ 219 #ifdef DEBUG 220 if (ldebug(sys_futex)) 221 printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n", 222 td->td_proc->p_pid, args->uaddr, args->val); 223 #endif 224 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 225 td->td_retval[0] = futex_wake(f, args->val, NULL); 226 futex_put(f); 227 228 FUTEX_SYSTEM_UNLOCK; 229 break; 230 231 case LINUX_FUTEX_CMP_REQUEUE: 232 FUTEX_SYSTEM_LOCK; 233 234 if ((error = copyin(args->uaddr, 235 &val, sizeof(val))) != 0) { 236 FUTEX_SYSTEM_UNLOCK; 237 return error; 238 } 239 240 if (val != args->val3) { 241 FUTEX_SYSTEM_UNLOCK; 242 return EAGAIN; 243 } 244 245 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 246 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 247 td->td_retval[0] = futex_wake(f, args->val, newf); 248 futex_put(f); 249 futex_put(newf); 250 251 FUTEX_SYSTEM_UNLOCK; 252 break; 253 254 case LINUX_FUTEX_REQUEUE: 255 FUTEX_SYSTEM_LOCK; 256 257 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 258 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 259 td->td_retval[0] = futex_wake(f, args->val, newf); 260 futex_put(f); 261 futex_put(newf); 262 263 FUTEX_SYSTEM_UNLOCK; 264 break; 265 266 case LINUX_FUTEX_FD: 267 printf("linux_sys_futex: unimplemented op %d\n", 268 args->op); 269 break; 270 271 case LINUX_FUTEX_WAKE_OP: 272 #ifdef __i386__ 273 FUTEX_SYSTEM_LOCK; 274 #ifdef DEBUG 275 if (ldebug(sys_futex)) 276 printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, val = %d, uaddr2 = %p, val3 = %d\n", 277 td->td_proc->p_pid, args->uaddr, args->op, args->val, 278 args->uaddr2, args->val3); 279 #endif 280 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 281 f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED); 282 283 /* 284 * This function returns positive number as results and 285 * negative as errors 286 */ 287 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 288 if (op_ret < 0) { 289 290 /* XXX: we dont handle the EFAULT yet */ 291 if (op_ret != -EFAULT) { 292 futex_put(f); 293 futex_put(f2); 294 FUTEX_SYSTEM_UNLOCK; 295 return (-op_ret); 296 } 297 298 futex_put(f); 299 futex_put(f2); 300 301 FUTEX_SYSTEM_UNLOCK; 302 return (EFAULT); 303 304 } 305 306 ret = futex_wake(f, args->val, NULL); 307 futex_put(f); 308 if (op_ret > 0) { 309 op_ret = 0; 310 /* 311 * Linux uses the address of the timespec parameter 312 * as the number of retries, so any large number will 313 * be ok. 314 */ 315 op_ret += futex_wake(f2, 0x7fffffff, NULL); 316 ret += op_ret; 317 } 318 futex_put(f2); 319 td->td_retval[0] = ret; 320 321 FUTEX_SYSTEM_UNLOCK; 322 #else 323 printf("linux_sys_futex: wake_op not implemented"); 324 #endif 325 break; 326 327 default: 328 printf("linux_sys_futex: unknown op %d\n", 329 args->op); 330 break; 331 } 332 return 0; 333 } 334 335 static struct futex * 336 futex_get(void *uaddr, int locked) 337 { 338 struct futex *f; 339 340 if (locked == FUTEX_UNLOCKED) 341 FUTEX_LOCK; 342 LIST_FOREACH(f, &futex_list, f_list) { 343 if (f->f_uaddr == uaddr) { 344 f->f_refcount++; 345 if (locked == FUTEX_UNLOCKED) 346 FUTEX_UNLOCK; 347 return f; 348 } 349 } 350 351 f = malloc(sizeof(*f), M_LINUX, M_WAITOK); 352 f->f_uaddr = uaddr; 353 f->f_refcount = 1; 354 TAILQ_INIT(&f->f_waiting_proc); 355 LIST_INSERT_HEAD(&futex_list, f, f_list); 356 if (locked == FUTEX_UNLOCKED) 357 FUTEX_UNLOCK; 358 359 return f; 360 } 361 362 static void 363 futex_put(f) 364 struct futex *f; 365 { 366 FUTEX_LOCK; 367 f->f_refcount--; 368 if (f->f_refcount == 0) { 369 LIST_REMOVE(f, f_list); 370 free(f, M_LINUX); 371 } 372 FUTEX_UNLOCK; 373 374 return; 375 } 376 377 static int 378 futex_sleep(struct futex *f, struct thread *td, unsigned long timeout) 379 { 380 struct waiting_proc *wp; 381 int ret; 382 383 wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK); 384 wp->wp_t = td; 385 wp->wp_new_futex = NULL; 386 FUTEX_LOCK; 387 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); 388 FUTEX_UNLOCK; 389 390 #ifdef DEBUG 391 if (ldebug(sys_futex)) 392 printf("FUTEX --> %d tlseep timeout = %ld\n", td->td_proc->p_pid, 393 timeout); 394 #endif 395 ret = tsleep(wp, PCATCH | PZERO, "linuxfutex", timeout); 396 #ifdef DEBUG 397 if (ldebug(sys_futex)) 398 printf("FUTEX -> %d tsleep returns %d\n", 399 td->td_proc->p_pid, ret); 400 #endif 401 402 FUTEX_LOCK; 403 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 404 FUTEX_UNLOCK; 405 406 if ((ret == 0) && (wp->wp_new_futex != NULL)) { 407 ret = futex_sleep(wp->wp_new_futex, td, timeout); 408 futex_put(wp->wp_new_futex); /* futex_get called in wakeup */ 409 } 410 411 free(wp, M_LINUX); 412 413 return ret; 414 } 415 416 static int 417 futex_wake(struct futex *f, int n, struct futex *newf) 418 { 419 struct waiting_proc *wp; 420 int count = 0; 421 422 FUTEX_LOCK; 423 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { 424 if (count <= n) { 425 wakeup_one(wp); 426 count++; 427 } else { 428 if (newf != NULL) { 429 /* futex_put called after tsleep */ 430 wp->wp_new_futex = futex_get(newf->f_uaddr, FUTEX_LOCKED); 431 wakeup_one(wp); 432 } 433 } 434 } 435 FUTEX_UNLOCK; 436 437 return count; 438 } 439 440 #ifdef __i386__ 441 static int 442 futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr) 443 { 444 int op = (encoded_op >> 28) & 7; 445 int cmp = (encoded_op >> 24) & 15; 446 int oparg = (encoded_op << 8) >> 20; 447 int cmparg = (encoded_op << 20) >> 20; 448 int oldval = 0, ret; 449 450 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 451 oparg = 1 << oparg; 452 453 #ifdef DEBUG 454 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %d, cmparg = %d, uaddr = %p\n", 455 op, cmp, oparg, cmparg, uaddr); 456 #endif 457 /* XXX: linux verifies access here and returns EFAULT */ 458 459 critical_enter(); 460 461 switch (op) { 462 case FUTEX_OP_SET: 463 ret = futex_xchgl(oparg, uaddr, &oldval); 464 break; 465 case FUTEX_OP_ADD: 466 ret = futex_addl(oparg, uaddr, &oldval); 467 break; 468 case FUTEX_OP_OR: 469 ret = futex_orl(oparg, uaddr, &oldval); 470 break; 471 case FUTEX_OP_ANDN: 472 ret = futex_andnl(oparg, uaddr, &oldval); 473 break; 474 case FUTEX_OP_XOR: 475 ret = futex_xorl(oparg, uaddr, &oldval); 476 break; 477 default: 478 ret = -ENOSYS; 479 } 480 481 critical_exit(); 482 483 if (!ret) 484 switch (cmp) { 485 case FUTEX_OP_CMP_EQ: 486 ret = (oldval == cmparg); 487 break; 488 case FUTEX_OP_CMP_NE: 489 ret = (oldval != cmparg); 490 break; 491 case FUTEX_OP_CMP_LT: 492 ret = (oldval < cmparg); 493 break; 494 case FUTEX_OP_CMP_GE: 495 ret = (oldval >= cmparg); 496 break; 497 case FUTEX_OP_CMP_LE: 498 ret = (oldval <= cmparg); 499 break; 500 case FUTEX_OP_CMP_GT: 501 ret = (oldval > cmparg); 502 break; 503 default: ret = -ENOSYS; 504 } 505 506 return (ret); 507 } 508 #endif 509