1 /* $NetBSD: linux_futex.c,v 1.5 2005/11/23 16:14:57 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.5 2005/11/23 16:14:57 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/queue.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/malloc.h> 51 52 #ifdef COMPAT_LINUX32 53 #include <machine/../linux32/linux.h> 54 #include <machine/../linux32/linux32_proto.h> 55 #else 56 #include <machine/../linux/linux.h> 57 #include <machine/../linux/linux_proto.h> 58 #endif 59 #include <compat/linux/linux_futex.h> 60 61 struct futex; 62 63 struct waiting_proc { 64 struct thread *wp_t; 65 struct futex *wp_new_futex; 66 TAILQ_ENTRY(waiting_proc) wp_list; 67 }; 68 struct futex { 69 void *f_uaddr; 70 int f_refcount; 71 LIST_ENTRY(futex) f_list; 72 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 73 }; 74 75 LIST_HEAD(futex_list, futex) futex_list; 76 struct mtx futex_mtx; /* this protects the LIST of futexes */ 77 78 #define FUTEX_LOCK mtx_lock(&futex_mtx) 79 #define FUTEX_UNLOCK mtx_unlock(&futex_mtx) 80 81 #define FUTEX_LOCKED 1 82 #define FUTEX_UNLOCKED 0 83 84 #define FUTEX_SYSTEM_LOCK mtx_lock(&Giant) 85 #define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant) 86 87 static struct futex *futex_get(void *, int); 88 static void futex_put(struct futex *); 89 static int futex_sleep(struct futex *, struct thread *, unsigned long); 90 static int futex_wake(struct futex *, int, struct futex *); 91 #ifdef __i386__ 92 static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr); 93 #endif 94 95 /* support.s */ 96 int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); 97 int futex_addl(int oparg, caddr_t uaddr, int *oldval); 98 int futex_orl(int oparg, caddr_t uaddr, int *oldval); 99 int futex_andnl(int oparg, caddr_t uaddr, int *oldval); 100 int futex_xorl(int oparg, caddr_t uaddr, int *oldval); 101 102 int 103 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 104 { 105 int val; 106 int ret; 107 struct l_timespec timeout = { 0, 0 }; 108 int error = 0; 109 struct futex *f; 110 struct futex *newf; 111 int timeout_hz; 112 struct timeval tv = {0, 0}; 113 #ifdef __i386__ 114 struct futex *f2; 115 int op_ret; 116 #endif 117 118 #ifdef DEBUG 119 if (ldebug(sys_futex)) 120 printf(ARGS(futex,"%p, %i, %i"), args->uaddr, args->op, args->val); 121 #endif 122 123 switch (args->op) { 124 case LINUX_FUTEX_WAIT: 125 FUTEX_SYSTEM_LOCK; 126 127 if ((error = copyin(args->uaddr, 128 &val, sizeof(val))) != 0) { 129 FUTEX_SYSTEM_UNLOCK; 130 return error; 131 } 132 133 if (val != args->val) { 134 FUTEX_SYSTEM_UNLOCK; 135 return EWOULDBLOCK; 136 } 137 138 if (args->timeout != NULL) { 139 if ((error = copyin(args->timeout, 140 &timeout, sizeof(timeout))) != 0) { 141 FUTEX_SYSTEM_UNLOCK; 142 return error; 143 } 144 } 145 146 #ifdef DEBUG 147 if (ldebug(sys_futex)) 148 printf("FUTEX_WAIT %d: val = %d, uaddr = %p, " 149 "*uaddr = %d, timeout = %d.%09lu\n", 150 td->td_proc->p_pid, args->val, 151 args->uaddr, val, timeout.tv_sec, 152 (unsigned long)timeout.tv_nsec); 153 #endif 154 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 155 timeout_hz = tvtohz(&tv); 156 157 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 158 timeout_hz = 0; 159 /* 160 * If the user process requests a non null timeout, 161 * make sure we do not turn it into an infinite 162 * timeout because timeout_hz gets null. 163 * 164 * We use a minimal timeout of 1/hz. Maybe it would 165 * make sense to just return ETIMEDOUT without sleeping. 166 */ 167 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 168 (timeout_hz == 0)) 169 timeout_hz = 1; 170 171 172 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 173 ret = futex_sleep(f, td, timeout_hz); 174 futex_put(f); 175 176 #ifdef DEBUG 177 if (ldebug(sys_futex)) 178 printf("FUTEX_WAIT %d: uaddr = %p, " 179 "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret); 180 #endif 181 182 FUTEX_SYSTEM_UNLOCK; 183 switch (ret) { 184 case EWOULDBLOCK: /* timeout */ 185 return ETIMEDOUT; 186 break; 187 case EINTR: /* signal */ 188 return EINTR; 189 break; 190 case 0: /* FUTEX_WAKE received */ 191 #ifdef DEBUG 192 if (ldebug(sys_futex)) 193 printf("FUTEX_WAIT %d: uaddr = %p, got FUTEX_WAKE\n", 194 td->td_proc->p_pid, args->uaddr); 195 #endif 196 return 0; 197 break; 198 default: 199 #ifdef DEBUG 200 if (ldebug(sys_futex)) 201 printf("FUTEX_WAIT: unexpected ret = %d\n", ret); 202 #endif 203 break; 204 } 205 206 /* NOTREACHED */ 207 break; 208 209 case LINUX_FUTEX_WAKE: 210 FUTEX_SYSTEM_LOCK; 211 212 /* 213 * XXX: Linux is able cope with different addresses 214 * corresponding to the same mapped memory in the sleeping 215 * and the waker process. 216 */ 217 #ifdef DEBUG 218 if (ldebug(sys_futex)) 219 printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n", 220 td->td_proc->p_pid, args->uaddr, args->val); 221 #endif 222 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 223 td->td_retval[0] = futex_wake(f, args->val, NULL); 224 futex_put(f); 225 226 FUTEX_SYSTEM_UNLOCK; 227 break; 228 229 case LINUX_FUTEX_CMP_REQUEUE: 230 FUTEX_SYSTEM_LOCK; 231 232 if ((error = copyin(args->uaddr, 233 &val, sizeof(val))) != 0) { 234 FUTEX_SYSTEM_UNLOCK; 235 return error; 236 } 237 238 if (val != args->val3) { 239 FUTEX_SYSTEM_UNLOCK; 240 return EAGAIN; 241 } 242 243 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 244 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 245 td->td_retval[0] = futex_wake(f, args->val, newf); 246 futex_put(f); 247 futex_put(newf); 248 249 FUTEX_SYSTEM_UNLOCK; 250 break; 251 252 case LINUX_FUTEX_REQUEUE: 253 FUTEX_SYSTEM_LOCK; 254 255 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 256 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 257 td->td_retval[0] = futex_wake(f, args->val, newf); 258 futex_put(f); 259 futex_put(newf); 260 261 FUTEX_SYSTEM_UNLOCK; 262 break; 263 264 case LINUX_FUTEX_FD: 265 printf("linux_sys_futex: unimplemented op %d\n", 266 args->op); 267 break; 268 269 case LINUX_FUTEX_WAKE_OP: 270 #ifdef __i386__ 271 FUTEX_SYSTEM_LOCK; 272 #ifdef DEBUG 273 if (ldebug(sys_futex)) 274 printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, val = %d, uaddr2 = %p, val3 = %d\n", 275 td->td_proc->p_pid, args->uaddr, args->op, args->val, args->uaddr2, args->val3); 276 #endif 277 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 278 f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED); 279 280 /* This function returns positive number as results 281 * and negative as errors 282 */ 283 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 284 if (op_ret < 0) { 285 286 /* XXX: we dont handle the EFAULT yet */ 287 if (op_ret != -EFAULT) { 288 futex_put(f); 289 futex_put(f2); 290 FUTEX_SYSTEM_UNLOCK; 291 return (-op_ret); 292 } 293 294 futex_put(f); 295 futex_put(f2); 296 297 FUTEX_SYSTEM_UNLOCK; 298 return (EFAULT); 299 300 } 301 302 ret = futex_wake(f, args->val, NULL); 303 futex_put(f); 304 if (op_ret > 0) { 305 #ifdef DEBUG 306 printf("second wakeup\n"); 307 #endif 308 op_ret = 0; 309 /* 310 * Linux uses the address of the timespec parameter 311 * as the number of retries, so any large number will 312 * be ok. 313 */ 314 op_ret += futex_wake(f2, 0x7fffffff, NULL); 315 ret += op_ret; 316 } 317 futex_put(f2); 318 td->td_retval[0] = ret; 319 320 FUTEX_SYSTEM_UNLOCK; 321 #else 322 printf("linux_sys_futex: wake_op not implemented"); 323 #endif 324 break; 325 326 default: 327 printf("linux_sys_futex: unknown op %d\n", 328 args->op); 329 break; 330 } 331 return 0; 332 } 333 334 static struct futex * 335 futex_get(void *uaddr, int locked) 336 { 337 struct futex *f; 338 339 if (locked == FUTEX_UNLOCKED) 340 FUTEX_LOCK; 341 LIST_FOREACH(f, &futex_list, f_list) { 342 if (f->f_uaddr == uaddr) { 343 f->f_refcount++; 344 if (locked == FUTEX_UNLOCKED) 345 FUTEX_UNLOCK; 346 return f; 347 } 348 } 349 if (locked == FUTEX_UNLOCKED) 350 FUTEX_UNLOCK; 351 352 /* Not found, create it */ 353 f = malloc(sizeof(*f), M_LINUX, M_WAITOK); 354 f->f_uaddr = uaddr; 355 f->f_refcount = 1; 356 TAILQ_INIT(&f->f_waiting_proc); 357 if (locked == FUTEX_UNLOCKED) 358 FUTEX_LOCK; 359 LIST_INSERT_HEAD(&futex_list, f, f_list); 360 if (locked == FUTEX_UNLOCKED) 361 FUTEX_UNLOCK; 362 363 return f; 364 } 365 366 static void 367 futex_put(f) 368 struct futex *f; 369 { 370 FUTEX_LOCK; 371 f->f_refcount--; 372 if (f->f_refcount == 0) { 373 LIST_REMOVE(f, f_list); 374 free(f, M_LINUX); 375 } 376 FUTEX_UNLOCK; 377 378 return; 379 } 380 381 static int 382 futex_sleep(struct futex *f, struct thread *td, unsigned long timeout) 383 { 384 struct waiting_proc *wp; 385 int ret; 386 387 wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK); 388 wp->wp_t = td; 389 wp->wp_new_futex = NULL; 390 FUTEX_LOCK; 391 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); 392 FUTEX_UNLOCK; 393 394 #ifdef DEBUG 395 if (ldebug(sys_futex)) 396 printf("FUTEX --> %d tlseep timeout = %ld\n", td->td_proc->p_pid, 397 timeout); 398 #endif 399 ret = tsleep(wp, PCATCH|PZERO, "linuxfutex", timeout); 400 401 FUTEX_LOCK; 402 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 403 FUTEX_UNLOCK; 404 405 if ((ret == 0) && (wp->wp_new_futex != NULL)) { 406 ret = futex_sleep(wp->wp_new_futex, td, timeout); 407 futex_put(wp->wp_new_futex); /* futex_get called in wakeup */ 408 } 409 410 free(wp, M_LINUX); 411 412 return ret; 413 } 414 415 static int 416 futex_wake(struct futex *f, int n, struct futex *newf) 417 { 418 struct waiting_proc *wp; 419 int count = 0; 420 421 FUTEX_LOCK; 422 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { 423 if (count <= n) { 424 wakeup(wp); 425 count++; 426 } else { 427 if (newf != NULL) { 428 /* futex_put called after tsleep */ 429 wp->wp_new_futex = futex_get(newf->f_uaddr, FUTEX_LOCKED); 430 wakeup(wp); 431 } 432 } 433 } 434 FUTEX_UNLOCK; 435 436 return count; 437 } 438 439 #ifdef __i386__ 440 static int 441 futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr) 442 { 443 int op = (encoded_op >> 28) & 7; 444 int cmp = (encoded_op >> 24) & 15; 445 int oparg = (encoded_op << 8) >> 20; 446 int cmparg = (encoded_op << 20) >> 20; 447 int oldval = 0, ret; 448 449 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 450 oparg = 1 << oparg; 451 452 #ifdef DEBUG 453 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %d, cmparg = %d, uaddr = %p\n", 454 op, cmp, oparg, cmparg, uaddr); 455 #endif 456 /* XXX: linux verifies access here and returns EFAULT */ 457 458 critical_enter(); 459 460 switch (op) { 461 case FUTEX_OP_SET: 462 ret = futex_xchgl(oparg, uaddr, &oldval); 463 break; 464 case FUTEX_OP_ADD: 465 ret = futex_addl(oparg, uaddr, &oldval); 466 break; 467 case FUTEX_OP_OR: 468 ret = futex_orl(oparg, uaddr, &oldval); 469 break; 470 case FUTEX_OP_ANDN: 471 ret = futex_andnl(oparg, uaddr, &oldval); 472 break; 473 case FUTEX_OP_XOR: 474 ret = futex_xorl(oparg, uaddr, &oldval); 475 break; 476 default: 477 ret = -ENOSYS; 478 } 479 480 critical_exit(); 481 482 if (!ret) 483 switch (cmp) { 484 case FUTEX_OP_CMP_EQ: 485 ret = (oldval == cmparg); 486 break; 487 case FUTEX_OP_CMP_NE: 488 ret = (oldval != cmparg); 489 break; 490 case FUTEX_OP_CMP_LT: 491 ret = (oldval < cmparg); 492 break; 493 case FUTEX_OP_CMP_GE: 494 ret = (oldval >= cmparg); 495 break; 496 case FUTEX_OP_CMP_LE: 497 ret = (oldval <= cmparg); 498 break; 499 case FUTEX_OP_CMP_GT: 500 ret = (oldval > cmparg); 501 break; 502 default: ret = -ENOSYS; 503 } 504 505 return (ret); 506 } 507 #endif 508