1 /* $NetBSD: linux_futex.c,v 1.5 2005/11/23 16:14:57 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.5 2005/11/23 16:14:57 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/queue.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/sx.h> 51 #include <sys/malloc.h> 52 53 #ifdef COMPAT_LINUX32 54 #include <machine/../linux32/linux.h> 55 #include <machine/../linux32/linux32_proto.h> 56 #else 57 #include <machine/../linux/linux.h> 58 #include <machine/../linux/linux_proto.h> 59 #endif 60 #include <compat/linux/linux_futex.h> 61 62 struct futex; 63 64 struct waiting_proc { 65 struct thread *wp_t; 66 struct futex *wp_new_futex; 67 TAILQ_ENTRY(waiting_proc) wp_list; 68 }; 69 struct futex { 70 void *f_uaddr; 71 int f_refcount; 72 LIST_ENTRY(futex) f_list; 73 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 74 }; 75 76 LIST_HEAD(futex_list, futex) futex_list; 77 struct sx futex_sx; /* this protects the LIST of futexes */ 78 79 #define FUTEX_LOCK sx_xlock(&futex_sx) 80 #define FUTEX_UNLOCK sx_xunlock(&futex_sx) 81 82 #define FUTEX_LOCKED 1 83 #define FUTEX_UNLOCKED 0 84 85 #define FUTEX_SYSTEM_LOCK mtx_lock(&Giant) 86 #define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant) 87 88 static struct futex *futex_get(void *, int); 89 static void futex_put(struct futex *); 90 static int futex_sleep(struct futex *, struct thread *, unsigned long); 91 static int futex_wake(struct futex *, int, struct futex *); 92 #ifdef __i386__ 93 static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr); 94 #endif 95 96 /* support.s */ 97 int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); 98 int futex_addl(int oparg, caddr_t uaddr, int *oldval); 99 int futex_orl(int oparg, caddr_t uaddr, int *oldval); 100 int futex_andnl(int oparg, caddr_t uaddr, int *oldval); 101 int futex_xorl(int oparg, caddr_t uaddr, int *oldval); 102 103 int 104 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 105 { 106 int val; 107 int ret; 108 struct l_timespec timeout = { 0, 0 }; 109 int error = 0; 110 struct futex *f; 111 struct futex *newf; 112 int timeout_hz; 113 struct timeval tv = {0, 0}; 114 #ifdef __i386__ 115 struct futex *f2; 116 int op_ret; 117 #endif 118 119 #ifdef DEBUG 120 if (ldebug(sys_futex)) 121 printf(ARGS(futex,"%p, %i, %i"), args->uaddr, args->op, args->val); 122 #endif 123 124 switch (args->op) { 125 case LINUX_FUTEX_WAIT: 126 FUTEX_SYSTEM_LOCK; 127 128 if ((error = copyin(args->uaddr, 129 &val, sizeof(val))) != 0) { 130 FUTEX_SYSTEM_UNLOCK; 131 return error; 132 } 133 134 if (val != args->val) { 135 FUTEX_SYSTEM_UNLOCK; 136 return EWOULDBLOCK; 137 } 138 139 if (args->timeout != NULL) { 140 if ((error = copyin(args->timeout, 141 &timeout, sizeof(timeout))) != 0) { 142 FUTEX_SYSTEM_UNLOCK; 143 return error; 144 } 145 } 146 147 #ifdef DEBUG 148 if (ldebug(sys_futex)) 149 printf("FUTEX_WAIT %d: val = %d, uaddr = %p, " 150 "*uaddr = %d, timeout = %d.%09lu\n", 151 td->td_proc->p_pid, args->val, 152 args->uaddr, val, timeout.tv_sec, 153 (unsigned long)timeout.tv_nsec); 154 #endif 155 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 156 timeout_hz = tvtohz(&tv); 157 158 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 159 timeout_hz = 0; 160 /* 161 * If the user process requests a non null timeout, 162 * make sure we do not turn it into an infinite 163 * timeout because timeout_hz gets null. 164 * 165 * We use a minimal timeout of 1/hz. Maybe it would 166 * make sense to just return ETIMEDOUT without sleeping. 167 */ 168 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 169 (timeout_hz == 0)) 170 timeout_hz = 1; 171 172 173 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 174 ret = futex_sleep(f, td, timeout_hz); 175 futex_put(f); 176 177 #ifdef DEBUG 178 if (ldebug(sys_futex)) 179 printf("FUTEX_WAIT %d: uaddr = %p, " 180 "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret); 181 #endif 182 183 FUTEX_SYSTEM_UNLOCK; 184 switch (ret) { 185 case EWOULDBLOCK: /* timeout */ 186 return ETIMEDOUT; 187 break; 188 case EINTR: /* signal */ 189 return EINTR; 190 break; 191 case 0: /* FUTEX_WAKE received */ 192 #ifdef DEBUG 193 if (ldebug(sys_futex)) 194 printf("FUTEX_WAIT %d: uaddr = %p, got FUTEX_WAKE\n", 195 td->td_proc->p_pid, args->uaddr); 196 #endif 197 return 0; 198 break; 199 default: 200 #ifdef DEBUG 201 if (ldebug(sys_futex)) 202 printf("FUTEX_WAIT: unexpected ret = %d\n", ret); 203 #endif 204 break; 205 } 206 207 /* NOTREACHED */ 208 break; 209 210 case LINUX_FUTEX_WAKE: 211 FUTEX_SYSTEM_LOCK; 212 213 /* 214 * XXX: Linux is able cope with different addresses 215 * corresponding to the same mapped memory in the sleeping 216 * and the waker process. 217 */ 218 #ifdef DEBUG 219 if (ldebug(sys_futex)) 220 printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n", 221 td->td_proc->p_pid, args->uaddr, args->val); 222 #endif 223 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 224 td->td_retval[0] = futex_wake(f, args->val, NULL); 225 futex_put(f); 226 227 FUTEX_SYSTEM_UNLOCK; 228 break; 229 230 case LINUX_FUTEX_CMP_REQUEUE: 231 FUTEX_SYSTEM_LOCK; 232 233 if ((error = copyin(args->uaddr, 234 &val, sizeof(val))) != 0) { 235 FUTEX_SYSTEM_UNLOCK; 236 return error; 237 } 238 239 if (val != args->val3) { 240 FUTEX_SYSTEM_UNLOCK; 241 return EAGAIN; 242 } 243 244 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 245 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 246 td->td_retval[0] = futex_wake(f, args->val, newf); 247 futex_put(f); 248 futex_put(newf); 249 250 FUTEX_SYSTEM_UNLOCK; 251 break; 252 253 case LINUX_FUTEX_REQUEUE: 254 FUTEX_SYSTEM_LOCK; 255 256 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 257 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 258 td->td_retval[0] = futex_wake(f, args->val, newf); 259 futex_put(f); 260 futex_put(newf); 261 262 FUTEX_SYSTEM_UNLOCK; 263 break; 264 265 case LINUX_FUTEX_FD: 266 printf("linux_sys_futex: unimplemented op %d\n", 267 args->op); 268 break; 269 270 case LINUX_FUTEX_WAKE_OP: 271 #ifdef __i386__ 272 FUTEX_SYSTEM_LOCK; 273 #ifdef DEBUG 274 if (ldebug(sys_futex)) 275 printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, val = %d, uaddr2 = %p, val3 = %d\n", 276 td->td_proc->p_pid, args->uaddr, args->op, args->val, args->uaddr2, args->val3); 277 #endif 278 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 279 f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED); 280 281 /* This function returns positive number as results 282 * and negative as errors 283 */ 284 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 285 if (op_ret < 0) { 286 287 /* XXX: we dont handle the EFAULT yet */ 288 if (op_ret != -EFAULT) { 289 futex_put(f); 290 futex_put(f2); 291 FUTEX_SYSTEM_UNLOCK; 292 return (-op_ret); 293 } 294 295 futex_put(f); 296 futex_put(f2); 297 298 FUTEX_SYSTEM_UNLOCK; 299 return (EFAULT); 300 301 } 302 303 ret = futex_wake(f, args->val, NULL); 304 futex_put(f); 305 if (op_ret > 0) { 306 op_ret = 0; 307 /* 308 * Linux uses the address of the timespec parameter 309 * as the number of retries, so any large number will 310 * be ok. 311 */ 312 op_ret += futex_wake(f2, 0x7fffffff, NULL); 313 ret += op_ret; 314 } 315 futex_put(f2); 316 td->td_retval[0] = ret; 317 318 FUTEX_SYSTEM_UNLOCK; 319 #else 320 printf("linux_sys_futex: wake_op not implemented"); 321 #endif 322 break; 323 324 default: 325 printf("linux_sys_futex: unknown op %d\n", 326 args->op); 327 break; 328 } 329 return 0; 330 } 331 332 static struct futex * 333 futex_get(void *uaddr, int locked) 334 { 335 struct futex *f; 336 337 if (locked == FUTEX_UNLOCKED) 338 FUTEX_LOCK; 339 LIST_FOREACH(f, &futex_list, f_list) { 340 if (f->f_uaddr == uaddr) { 341 f->f_refcount++; 342 if (locked == FUTEX_UNLOCKED) 343 FUTEX_UNLOCK; 344 return f; 345 } 346 } 347 348 f = malloc(sizeof(*f), M_LINUX, M_WAITOK); 349 f->f_uaddr = uaddr; 350 f->f_refcount = 1; 351 TAILQ_INIT(&f->f_waiting_proc); 352 LIST_INSERT_HEAD(&futex_list, f, f_list); 353 if (locked == FUTEX_UNLOCKED) 354 FUTEX_UNLOCK; 355 356 return f; 357 } 358 359 static void 360 futex_put(f) 361 struct futex *f; 362 { 363 FUTEX_LOCK; 364 f->f_refcount--; 365 if (f->f_refcount == 0) { 366 LIST_REMOVE(f, f_list); 367 free(f, M_LINUX); 368 } 369 FUTEX_UNLOCK; 370 371 return; 372 } 373 374 static int 375 futex_sleep(struct futex *f, struct thread *td, unsigned long timeout) 376 { 377 struct waiting_proc *wp; 378 int ret; 379 380 wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK); 381 wp->wp_t = td; 382 wp->wp_new_futex = NULL; 383 FUTEX_LOCK; 384 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); 385 FUTEX_UNLOCK; 386 387 #ifdef DEBUG 388 if (ldebug(sys_futex)) 389 printf("FUTEX --> %d tlseep timeout = %ld\n", td->td_proc->p_pid, 390 timeout); 391 #endif 392 ret = tsleep(wp, PCATCH|PZERO, "linuxfutex", timeout); 393 394 FUTEX_LOCK; 395 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 396 FUTEX_UNLOCK; 397 398 if ((ret == 0) && (wp->wp_new_futex != NULL)) { 399 ret = futex_sleep(wp->wp_new_futex, td, timeout); 400 futex_put(wp->wp_new_futex); /* futex_get called in wakeup */ 401 } 402 403 free(wp, M_LINUX); 404 405 return ret; 406 } 407 408 static int 409 futex_wake(struct futex *f, int n, struct futex *newf) 410 { 411 struct waiting_proc *wp; 412 int count = 0; 413 414 FUTEX_LOCK; 415 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { 416 if (count <= n) { 417 wakeup_one(wp); 418 count++; 419 } else { 420 if (newf != NULL) { 421 /* futex_put called after tsleep */ 422 wp->wp_new_futex = futex_get(newf->f_uaddr, FUTEX_LOCKED); 423 wakeup_one(wp); 424 } 425 } 426 } 427 FUTEX_UNLOCK; 428 429 return count; 430 } 431 432 #ifdef __i386__ 433 static int 434 futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr) 435 { 436 int op = (encoded_op >> 28) & 7; 437 int cmp = (encoded_op >> 24) & 15; 438 int oparg = (encoded_op << 8) >> 20; 439 int cmparg = (encoded_op << 20) >> 20; 440 int oldval = 0, ret; 441 442 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 443 oparg = 1 << oparg; 444 445 #ifdef DEBUG 446 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %d, cmparg = %d, uaddr = %p\n", 447 op, cmp, oparg, cmparg, uaddr); 448 #endif 449 /* XXX: linux verifies access here and returns EFAULT */ 450 451 critical_enter(); 452 453 switch (op) { 454 case FUTEX_OP_SET: 455 ret = futex_xchgl(oparg, uaddr, &oldval); 456 break; 457 case FUTEX_OP_ADD: 458 ret = futex_addl(oparg, uaddr, &oldval); 459 break; 460 case FUTEX_OP_OR: 461 ret = futex_orl(oparg, uaddr, &oldval); 462 break; 463 case FUTEX_OP_ANDN: 464 ret = futex_andnl(oparg, uaddr, &oldval); 465 break; 466 case FUTEX_OP_XOR: 467 ret = futex_xorl(oparg, uaddr, &oldval); 468 break; 469 default: 470 ret = -ENOSYS; 471 } 472 473 critical_exit(); 474 475 if (!ret) 476 switch (cmp) { 477 case FUTEX_OP_CMP_EQ: 478 ret = (oldval == cmparg); 479 break; 480 case FUTEX_OP_CMP_NE: 481 ret = (oldval != cmparg); 482 break; 483 case FUTEX_OP_CMP_LT: 484 ret = (oldval < cmparg); 485 break; 486 case FUTEX_OP_CMP_GE: 487 ret = (oldval >= cmparg); 488 break; 489 case FUTEX_OP_CMP_LE: 490 ret = (oldval <= cmparg); 491 break; 492 case FUTEX_OP_CMP_GT: 493 ret = (oldval > cmparg); 494 break; 495 default: ret = -ENOSYS; 496 } 497 498 return (ret); 499 } 500 #endif 501