1 /* $NetBSD: linux_futex.c,v 1.5 2005/11/23 16:14:57 manu Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 #if 0 37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.5 2005/11/23 16:14:57 manu Exp $"); 38 #endif 39 40 #include "opt_compat.h" 41 42 #include <sys/param.h> 43 #include <sys/types.h> 44 #include <sys/time.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/queue.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/malloc.h> 51 52 #ifdef COMPAT_LINUX32 53 #include <machine/../linux32/linux.h> 54 #include <machine/../linux32/linux32_proto.h> 55 #else 56 #include <machine/../linux/linux.h> 57 #include <machine/../linux/linux_proto.h> 58 #endif 59 #include <compat/linux/linux_futex.h> 60 61 struct futex; 62 63 struct waiting_proc { 64 struct thread *wp_t; 65 struct futex *wp_new_futex; 66 TAILQ_ENTRY(waiting_proc) wp_list; 67 }; 68 struct futex { 69 void *f_uaddr; 70 int f_refcount; 71 LIST_ENTRY(futex) f_list; 72 TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc; 73 }; 74 75 LIST_HEAD(futex_list, futex) futex_list; 76 struct mtx futex_mtx; /* this protects the LIST of futexes */ 77 78 #define FUTEX_LOCK mtx_lock(&futex_mtx) 79 #define FUTEX_UNLOCK mtx_unlock(&futex_mtx) 80 81 #define FUTEX_LOCKED 1 82 #define FUTEX_UNLOCKED 0 83 84 #define FUTEX_SYSTEM_LOCK mtx_lock(&Giant) 85 #define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant) 86 87 static struct futex *futex_get(void *, int); 88 static void futex_put(struct futex *); 89 static int futex_sleep(struct futex *, struct thread *, unsigned long); 90 static int futex_wake(struct futex *, int, struct futex *); 91 #ifdef __i386__ 92 static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr); 93 #endif 94 95 /* support.s */ 96 int futex_xchgl(int oparg, caddr_t uaddr, int *oldval); 97 int futex_addl(int oparg, caddr_t uaddr, int *oldval); 98 int futex_orl(int oparg, caddr_t uaddr, int *oldval); 99 int futex_andnl(int oparg, caddr_t uaddr, int *oldval); 100 int futex_xorl(int oparg, caddr_t uaddr, int *oldval); 101 102 int 103 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) 104 { 105 int val; 106 int ret; 107 struct l_timespec timeout = { 0, 0 }; 108 int error = 0; 109 struct futex *f; 110 struct futex *newf; 111 int timeout_hz; 112 struct timeval tv = {0, 0}; 113 #ifdef __i386__ 114 struct futex *f2; 115 int op_ret; 116 #endif 117 118 #ifdef DEBUG 119 if (ldebug(sys_futex)) 120 printf(ARGS(futex,"%p, %i, %i"), args->uaddr, args->op, args->val); 121 #endif 122 123 switch (args->op) { 124 case LINUX_FUTEX_WAIT: 125 FUTEX_SYSTEM_LOCK; 126 127 if ((error = copyin(args->uaddr, 128 &val, sizeof(val))) != 0) { 129 FUTEX_SYSTEM_UNLOCK; 130 return error; 131 } 132 133 if (val != args->val) { 134 FUTEX_SYSTEM_UNLOCK; 135 return EWOULDBLOCK; 136 } 137 138 if (args->timeout != NULL) { 139 if ((error = copyin(args->timeout, 140 &timeout, sizeof(timeout))) != 0) { 141 FUTEX_SYSTEM_UNLOCK; 142 return error; 143 } 144 } 145 146 #ifdef DEBUG 147 if (ldebug(sys_futex)) 148 printf("FUTEX_WAIT %d: val = %d, uaddr = %p, " 149 "*uaddr = %d, timeout = %d.%09ld\n", 150 td->td_proc->p_pid, args->val, 151 args->uaddr, val, timeout.tv_sec, timeout.tv_nsec); 152 #endif 153 tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000; 154 timeout_hz = tvtohz(&tv); 155 156 if (timeout.tv_sec == 0 && timeout.tv_nsec == 0) 157 timeout_hz = 0; 158 /* 159 * If the user process requests a non null timeout, 160 * make sure we do not turn it into an infinite 161 * timeout because timeout_hz gets null. 162 * 163 * We use a minimal timeout of 1/hz. Maybe it would 164 * make sense to just return ETIMEDOUT without sleeping. 165 */ 166 if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) && 167 (timeout_hz == 0)) 168 timeout_hz = 1; 169 170 171 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 172 ret = futex_sleep(f, td, timeout_hz); 173 futex_put(f); 174 175 #ifdef DEBUG 176 if (ldebug(sys_futex)) 177 printf("FUTEX_WAIT %d: uaddr = %p, " 178 "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret); 179 #endif 180 181 FUTEX_SYSTEM_UNLOCK; 182 switch (ret) { 183 case EWOULDBLOCK: /* timeout */ 184 return ETIMEDOUT; 185 break; 186 case EINTR: /* signal */ 187 return EINTR; 188 break; 189 case 0: /* FUTEX_WAKE received */ 190 #ifdef DEBUG 191 if (ldebug(sys_futex)) 192 printf("FUTEX_WAIT %d: uaddr = %p, got FUTEX_WAKE\n", 193 td->td_proc->p_pid, args->uaddr); 194 #endif 195 return 0; 196 break; 197 default: 198 #ifdef DEBUG 199 if (ldebug(sys_futex)) 200 printf("FUTEX_WAIT: unexpected ret = %d\n", ret); 201 #endif 202 break; 203 } 204 205 /* NOTREACHED */ 206 break; 207 208 case LINUX_FUTEX_WAKE: 209 FUTEX_SYSTEM_LOCK; 210 211 /* 212 * XXX: Linux is able cope with different addresses 213 * corresponding to the same mapped memory in the sleeping 214 * and the waker process. 215 */ 216 #ifdef DEBUG 217 if (ldebug(sys_futex)) 218 printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n", 219 td->td_proc->p_pid, args->uaddr, args->val); 220 #endif 221 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 222 td->td_retval[0] = futex_wake(f, args->val, NULL); 223 futex_put(f); 224 225 FUTEX_SYSTEM_UNLOCK; 226 break; 227 228 case LINUX_FUTEX_CMP_REQUEUE: 229 FUTEX_SYSTEM_LOCK; 230 231 if ((error = copyin(args->uaddr, 232 &val, sizeof(val))) != 0) { 233 FUTEX_SYSTEM_UNLOCK; 234 return error; 235 } 236 237 if (val != args->val3) { 238 FUTEX_SYSTEM_UNLOCK; 239 return EAGAIN; 240 } 241 242 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 243 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 244 td->td_retval[0] = futex_wake(f, args->val, newf); 245 futex_put(f); 246 futex_put(newf); 247 248 FUTEX_SYSTEM_UNLOCK; 249 break; 250 251 case LINUX_FUTEX_REQUEUE: 252 FUTEX_SYSTEM_LOCK; 253 254 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 255 newf = futex_get(args->uaddr2, FUTEX_UNLOCKED); 256 td->td_retval[0] = futex_wake(f, args->val, newf); 257 futex_put(f); 258 futex_put(newf); 259 260 FUTEX_SYSTEM_UNLOCK; 261 break; 262 263 case LINUX_FUTEX_FD: 264 printf("linux_sys_futex: unimplemented op %d\n", 265 args->op); 266 break; 267 268 case LINUX_FUTEX_WAKE_OP: 269 #ifdef __i386__ 270 FUTEX_SYSTEM_LOCK; 271 #ifdef DEBUG 272 if (ldebug(sys_futex)) 273 printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, val = %d, uaddr2 = %p, val3 = %d\n", 274 td->td_proc->p_pid, args->uaddr, args->op, args->val, args->uaddr2, args->val3); 275 #endif 276 f = futex_get(args->uaddr, FUTEX_UNLOCKED); 277 f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED); 278 279 /* This function returns positive number as results 280 * and negative as errors 281 */ 282 op_ret = futex_atomic_op(td, args->val3, args->uaddr2); 283 if (op_ret < 0) { 284 285 /* XXX: we dont handle the EFAULT yet */ 286 if (op_ret != -EFAULT) { 287 futex_put(f); 288 futex_put(f2); 289 FUTEX_SYSTEM_UNLOCK; 290 return (-op_ret); 291 } 292 293 futex_put(f); 294 futex_put(f2); 295 296 FUTEX_SYSTEM_UNLOCK; 297 return (EFAULT); 298 299 } 300 301 ret = futex_wake(f, args->val, NULL); 302 futex_put(f); 303 if (op_ret > 0) { 304 printf("second wakeup\n"); 305 op_ret = 0; 306 /* Linux always puts there 0 retries */ 307 op_ret += futex_wake(f2, 0, NULL); 308 ret += op_ret; 309 } 310 futex_put(f2); 311 td->td_retval[0] = ret; 312 313 FUTEX_SYSTEM_UNLOCK; 314 #else 315 printf("linux_sys_futex: wake_op not implemented"); 316 #endif 317 break; 318 319 default: 320 printf("linux_sys_futex: unknown op %d\n", 321 args->op); 322 break; 323 } 324 return 0; 325 } 326 327 static struct futex * 328 futex_get(void *uaddr, int locked) 329 { 330 struct futex *f; 331 332 if (locked == FUTEX_UNLOCKED) 333 FUTEX_LOCK; 334 LIST_FOREACH(f, &futex_list, f_list) { 335 if (f->f_uaddr == uaddr) { 336 f->f_refcount++; 337 if (locked == FUTEX_UNLOCKED) 338 FUTEX_UNLOCK; 339 return f; 340 } 341 } 342 if (locked == FUTEX_UNLOCKED) 343 FUTEX_UNLOCK; 344 345 /* Not found, create it */ 346 f = malloc(sizeof(*f), M_LINUX, M_WAITOK); 347 f->f_uaddr = uaddr; 348 f->f_refcount = 1; 349 TAILQ_INIT(&f->f_waiting_proc); 350 if (locked == FUTEX_UNLOCKED) 351 FUTEX_LOCK; 352 LIST_INSERT_HEAD(&futex_list, f, f_list); 353 if (locked == FUTEX_UNLOCKED) 354 FUTEX_UNLOCK; 355 356 return f; 357 } 358 359 static void 360 futex_put(f) 361 struct futex *f; 362 { 363 FUTEX_LOCK; 364 f->f_refcount--; 365 if (f->f_refcount == 0) { 366 LIST_REMOVE(f, f_list); 367 free(f, M_LINUX); 368 } 369 FUTEX_UNLOCK; 370 371 return; 372 } 373 374 static int 375 futex_sleep(struct futex *f, struct thread *td, unsigned long timeout) 376 { 377 struct waiting_proc *wp; 378 int ret; 379 380 wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK); 381 wp->wp_t = td; 382 wp->wp_new_futex = NULL; 383 FUTEX_LOCK; 384 TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); 385 FUTEX_UNLOCK; 386 387 #ifdef DEBUG 388 if (ldebug(sys_futex)) 389 printf("FUTEX --> %d tlseep timeout = %ld\n", td->td_proc->p_pid, 390 timeout); 391 #endif 392 ret = tsleep(wp, PCATCH|PZERO, "linuxfutex", timeout); 393 394 FUTEX_LOCK; 395 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); 396 FUTEX_UNLOCK; 397 398 if ((ret == 0) && (wp->wp_new_futex != NULL)) { 399 ret = futex_sleep(wp->wp_new_futex, td, timeout); 400 futex_put(wp->wp_new_futex); /* futex_get called in wakeup */ 401 } 402 403 free(wp, M_LINUX); 404 405 return ret; 406 } 407 408 static int 409 futex_wake(struct futex *f, int n, struct futex *newf) 410 { 411 struct waiting_proc *wp; 412 int count = 0; 413 414 FUTEX_LOCK; 415 TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) { 416 if (count <= n) { 417 wakeup(wp); 418 count++; 419 } else { 420 if (newf != NULL) { 421 /* futex_put called after tsleep */ 422 wp->wp_new_futex = futex_get(newf->f_uaddr, FUTEX_LOCKED); 423 wakeup(wp); 424 } 425 } 426 } 427 FUTEX_UNLOCK; 428 429 return count; 430 } 431 432 #ifdef __i386__ 433 static int 434 futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr) 435 { 436 int op = (encoded_op >> 28) & 7; 437 int cmp = (encoded_op >> 24) & 15; 438 int oparg = (encoded_op << 8) >> 20; 439 int cmparg = (encoded_op << 20) >> 20; 440 int oldval = 0, ret; 441 442 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 443 oparg = 1 << oparg; 444 445 #ifdef DEBUG 446 printf("futex_atomic_op: op = %d, cmp = %d, oparg = %d, cmparg = %d, uaddr = %p\n", 447 op, cmp, oparg, cmparg, uaddr); 448 #endif 449 /* XXX: linux verifies access here and returns EFAULT */ 450 451 critical_enter(); 452 453 switch (op) { 454 case FUTEX_OP_SET: 455 ret = futex_xchgl(oparg, uaddr, &oldval); 456 break; 457 case FUTEX_OP_ADD: 458 ret = futex_addl(oparg, uaddr, &oldval); 459 break; 460 case FUTEX_OP_OR: 461 ret = futex_orl(oparg, uaddr, &oldval); 462 break; 463 case FUTEX_OP_ANDN: 464 ret = futex_andnl(oparg, uaddr, &oldval); 465 break; 466 case FUTEX_OP_XOR: 467 ret = futex_xorl(oparg, uaddr, &oldval); 468 break; 469 default: 470 ret = -ENOSYS; 471 } 472 473 critical_exit(); 474 475 if (!ret) 476 switch (cmp) { 477 case FUTEX_OP_CMP_EQ: 478 ret = (oldval == cmparg); 479 break; 480 case FUTEX_OP_CMP_NE: 481 ret = (oldval != cmparg); 482 break; 483 case FUTEX_OP_CMP_LT: 484 ret = (oldval < cmparg); 485 break; 486 case FUTEX_OP_CMP_GE: 487 ret = (oldval >= cmparg); 488 break; 489 case FUTEX_OP_CMP_LE: 490 ret = (oldval <= cmparg); 491 break; 492 case FUTEX_OP_CMP_GT: 493 ret = (oldval > cmparg); 494 break; 495 default: ret = -ENOSYS; 496 } 497 498 return (ret); 499 } 500 #endif 501