1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/syscalls.h> 4 #include <linux/time_namespace.h> 5 6 #include "futex.h" 7 8 /* 9 * Support for robust futexes: the kernel cleans up held futexes at 10 * thread exit time. 11 * 12 * Implementation: user-space maintains a per-thread list of locks it 13 * is holding. Upon do_exit(), the kernel carefully walks this list, 14 * and marks all locks that are owned by this thread with the 15 * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is 16 * always manipulated with the lock held, so the list is private and 17 * per-thread. Userspace also maintains a per-thread 'list_op_pending' 18 * field, to allow the kernel to clean up if the thread dies after 19 * acquiring the lock, but just before it could have added itself to 20 * the list. There can only be one such pending lock. 21 */ 22 23 /** 24 * sys_set_robust_list() - Set the robust-futex list head of a task 25 * @head: pointer to the list-head 26 * @len: length of the list-head, as userspace expects 27 */ 28 SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) 29 { 30 /* The kernel knows only one size for now. */ 31 if (unlikely(len != sizeof(*head))) 32 return -EINVAL; 33 34 current->futex.robust_list = head; 35 return 0; 36 } 37 38 static inline void __user *futex_task_robust_list(struct task_struct *p, bool compat) 39 { 40 #ifdef CONFIG_COMPAT 41 if (compat) 42 return p->futex.compat_robust_list; 43 #endif 44 return p->futex.robust_list; 45 } 46 47 static void __user *futex_get_robust_list_common(int pid, bool compat) 48 { 49 struct task_struct *p = current; 50 void __user *head; 51 int ret; 52 53 scoped_guard(rcu) { 54 if (pid) { 55 p = find_task_by_vpid(pid); 56 if (!p) 57 return (void __user *)ERR_PTR(-ESRCH); 58 } 59 get_task_struct(p); 60 } 61 62 /* 63 * Hold exec_update_lock to serialize with concurrent exec() 64 * so ptrace_may_access() is checked against stable credentials 65 */ 66 ret = down_read_killable(&p->signal->exec_update_lock); 67 if (ret) 68 goto err_put; 69 70 ret = -EPERM; 71 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) 72 goto err_unlock; 73 74 head = futex_task_robust_list(p, compat); 75 76 up_read(&p->signal->exec_update_lock); 77 put_task_struct(p); 78 79 return head; 80 81 err_unlock: 82 up_read(&p->signal->exec_update_lock); 83 err_put: 84 put_task_struct(p); 85 return (void __user *)ERR_PTR(ret); 86 } 87 88 /** 89 * sys_get_robust_list() - Get the robust-futex list head of a task 90 * @pid: pid of the process [zero for current task] 91 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 92 * @len_ptr: pointer to a length field, the kernel fills in the header size 93 */ 94 SYSCALL_DEFINE3(get_robust_list, int, pid, 95 struct robust_list_head __user * __user *, head_ptr, 96 size_t __user *, len_ptr) 97 { 98 struct robust_list_head __user *head = futex_get_robust_list_common(pid, false); 99 100 if (IS_ERR(head)) 101 return PTR_ERR(head); 102 103 if (put_user(sizeof(*head), len_ptr)) 104 return -EFAULT; 105 return put_user(head, head_ptr); 106 } 107 108 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 109 u32 __user *uaddr2, u32 val2, u32 val3) 110 { 111 unsigned int flags = futex_to_flags(op); 112 int cmd = op & FUTEX_CMD_MASK; 113 114 if (flags & FLAGS_CLOCKRT) { 115 if (cmd != FUTEX_WAIT_BITSET && 116 cmd != FUTEX_WAIT_REQUEUE_PI && 117 cmd != FUTEX_LOCK_PI2) 118 return -ENOSYS; 119 } 120 121 if (flags & FLAGS_ROBUST_UNLOCK) { 122 if (cmd != FUTEX_WAKE && 123 cmd != FUTEX_WAKE_BITSET && 124 cmd != FUTEX_UNLOCK_PI) 125 return -ENOSYS; 126 } 127 128 switch (cmd) { 129 case FUTEX_WAIT: 130 val3 = FUTEX_BITSET_MATCH_ANY; 131 fallthrough; 132 case FUTEX_WAIT_BITSET: 133 return futex_wait(uaddr, flags, val, timeout, val3); 134 case FUTEX_WAKE: 135 val3 = FUTEX_BITSET_MATCH_ANY; 136 fallthrough; 137 case FUTEX_WAKE_BITSET: 138 return futex_wake(uaddr, flags, uaddr2, val, val3); 139 case FUTEX_REQUEUE: 140 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); 141 case FUTEX_CMP_REQUEUE: 142 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0); 143 case FUTEX_WAKE_OP: 144 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); 145 case FUTEX_LOCK_PI: 146 flags |= FLAGS_CLOCKRT; 147 fallthrough; 148 case FUTEX_LOCK_PI2: 149 return futex_lock_pi(uaddr, flags, timeout, 0); 150 case FUTEX_UNLOCK_PI: 151 return futex_unlock_pi(uaddr, flags, uaddr2); 152 case FUTEX_TRYLOCK_PI: 153 return futex_lock_pi(uaddr, flags, NULL, 1); 154 case FUTEX_WAIT_REQUEUE_PI: 155 val3 = FUTEX_BITSET_MATCH_ANY; 156 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, 157 uaddr2); 158 case FUTEX_CMP_REQUEUE_PI: 159 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1); 160 } 161 return -ENOSYS; 162 } 163 164 static __always_inline bool futex_cmd_has_timeout(u32 cmd) 165 { 166 switch (cmd) { 167 case FUTEX_WAIT: 168 case FUTEX_LOCK_PI: 169 case FUTEX_LOCK_PI2: 170 case FUTEX_WAIT_BITSET: 171 case FUTEX_WAIT_REQUEUE_PI: 172 return true; 173 } 174 return false; 175 } 176 177 static __always_inline int 178 futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) 179 { 180 if (!timespec64_valid(ts)) 181 return -EINVAL; 182 183 *t = timespec64_to_ktime(*ts); 184 if (cmd == FUTEX_WAIT) 185 *t = ktime_add_safe(ktime_get(), *t); 186 else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) 187 *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); 188 return 0; 189 } 190 191 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, 192 const struct __kernel_timespec __user *, utime, 193 u32 __user *, uaddr2, u32, val3) 194 { 195 int ret, cmd = op & FUTEX_CMD_MASK; 196 ktime_t t, *tp = NULL; 197 struct timespec64 ts; 198 199 if (utime && futex_cmd_has_timeout(cmd)) { 200 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) 201 return -EFAULT; 202 if (get_timespec64(&ts, utime)) 203 return -EFAULT; 204 ret = futex_init_timeout(cmd, op, &ts, &t); 205 if (ret) 206 return ret; 207 tp = &t; 208 } 209 210 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); 211 } 212 213 /** 214 * futex_parse_waitv - Parse a waitv array from userspace 215 * @futexv: Kernel side list of waiters to be filled 216 * @uwaitv: Userspace list to be parsed 217 * @nr_futexes: Length of futexv 218 * @wake: Wake to call when futex is woken 219 * @wake_data: Data for the wake handler 220 * 221 * Return: Error code on failure, 0 on success 222 */ 223 int futex_parse_waitv(struct futex_vector *futexv, 224 struct futex_waitv __user *uwaitv, 225 unsigned int nr_futexes, futex_wake_fn *wake, 226 void *wake_data) 227 { 228 struct futex_waitv aux; 229 unsigned int i; 230 231 for (i = 0; i < nr_futexes; i++) { 232 unsigned int flags; 233 234 if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) 235 return -EFAULT; 236 237 if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) 238 return -EINVAL; 239 240 flags = futex2_to_flags(aux.flags); 241 if (!futex_flags_valid(flags)) 242 return -EINVAL; 243 244 if (!futex_validate_input(flags, aux.val)) 245 return -EINVAL; 246 247 futexv[i].w.flags = flags; 248 futexv[i].w.val = aux.val; 249 futexv[i].w.uaddr = aux.uaddr; 250 futexv[i].q = futex_q_init; 251 futexv[i].q.wake = wake; 252 futexv[i].q.wake_data = wake_data; 253 } 254 255 return 0; 256 } 257 258 static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, 259 clockid_t clockid, struct hrtimer_sleeper *to) 260 { 261 int flag_clkid = 0, flag_init = 0; 262 struct timespec64 ts; 263 ktime_t time; 264 int ret; 265 266 if (!timeout) 267 return 0; 268 269 if (clockid == CLOCK_REALTIME) { 270 flag_clkid = FLAGS_CLOCKRT; 271 flag_init = FUTEX_CLOCK_REALTIME; 272 } 273 274 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 275 return -EINVAL; 276 277 if (get_timespec64(&ts, timeout)) 278 return -EFAULT; 279 280 /* 281 * Since there's no opcode for futex_waitv, use 282 * FUTEX_WAIT_BITSET that uses absolute timeout as well 283 */ 284 ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); 285 if (ret) 286 return ret; 287 288 futex_setup_timer(&time, to, flag_clkid, 0); 289 return 0; 290 } 291 292 static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) 293 { 294 hrtimer_cancel(&to->timer); 295 destroy_hrtimer_on_stack(&to->timer); 296 } 297 298 /** 299 * sys_futex_waitv - Wait on a list of futexes 300 * @waiters: List of futexes to wait on 301 * @nr_futexes: Length of futexv 302 * @flags: Flag for timeout (monotonic/realtime) 303 * @timeout: Optional absolute timeout. 304 * @clockid: Clock to be used for the timeout, realtime or monotonic. 305 * 306 * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes 307 * if a futex_wake() is performed at any uaddr. The syscall returns immediately 308 * if any waiter has *uaddr != val. *timeout is an optional timeout value for 309 * the operation. Each waiter has individual flags. The `flags` argument for 310 * the syscall should be used solely for specifying the timeout as realtime, if 311 * needed. Flags for private futexes, sizes, etc. should be used on the 312 * individual flags of each waiter. 313 * 314 * Returns the array index of one of the woken futexes. No further information 315 * is provided: any number of other futexes may also have been woken by the 316 * same event, and if more than one futex was woken, the retrned index may 317 * refer to any one of them. (It is not necessaryily the futex with the 318 * smallest index, nor the one most recently woken, nor...) 319 */ 320 321 SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, 322 unsigned int, nr_futexes, unsigned int, flags, 323 struct __kernel_timespec __user *, timeout, clockid_t, clockid) 324 { 325 struct hrtimer_sleeper to; 326 struct futex_vector *futexv; 327 int ret; 328 329 /* This syscall supports no flags for now */ 330 if (flags) 331 return -EINVAL; 332 333 if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) 334 return -EINVAL; 335 336 if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) 337 return ret; 338 339 futexv = kzalloc_objs(*futexv, nr_futexes); 340 if (!futexv) { 341 ret = -ENOMEM; 342 goto destroy_timer; 343 } 344 345 ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark, 346 NULL); 347 if (!ret) 348 ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); 349 350 kfree(futexv); 351 352 destroy_timer: 353 if (timeout) 354 futex2_destroy_timeout(&to); 355 return ret; 356 } 357 358 /* 359 * sys_futex_wake - Wake a number of futexes 360 * @uaddr: Address of the futex(es) to wake 361 * @mask: bitmask 362 * @nr: Number of the futexes to wake 363 * @flags: FUTEX2 flags 364 * 365 * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the 366 * futex2 family of calls. 367 */ 368 369 SYSCALL_DEFINE4(futex_wake, 370 void __user *, uaddr, 371 unsigned long, mask, 372 int, nr, 373 unsigned int, flags) 374 { 375 if (flags & ~FUTEX2_VALID_MASK) 376 return -EINVAL; 377 378 flags = futex2_to_flags(flags); 379 if (!futex_flags_valid(flags)) 380 return -EINVAL; 381 382 if (!futex_validate_input(flags, mask)) 383 return -EINVAL; 384 385 return futex_wake(uaddr, FLAGS_STRICT | flags, NULL, nr, mask); 386 } 387 388 /* 389 * sys_futex_wait - Wait on a futex 390 * @uaddr: Address of the futex to wait on 391 * @val: Value of @uaddr 392 * @mask: bitmask 393 * @flags: FUTEX2 flags 394 * @timeout: Optional absolute timeout 395 * @clockid: Clock to be used for the timeout, realtime or monotonic 396 * 397 * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the 398 * futex2 familiy of calls. 399 */ 400 401 SYSCALL_DEFINE6(futex_wait, 402 void __user *, uaddr, 403 unsigned long, val, 404 unsigned long, mask, 405 unsigned int, flags, 406 struct __kernel_timespec __user *, timeout, 407 clockid_t, clockid) 408 { 409 struct hrtimer_sleeper to; 410 int ret; 411 412 if (flags & ~FUTEX2_VALID_MASK) 413 return -EINVAL; 414 415 flags = futex2_to_flags(flags); 416 if (!futex_flags_valid(flags)) 417 return -EINVAL; 418 419 if (!futex_validate_input(flags, val) || 420 !futex_validate_input(flags, mask)) 421 return -EINVAL; 422 423 if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) 424 return ret; 425 426 ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); 427 428 if (timeout) 429 futex2_destroy_timeout(&to); 430 431 return ret; 432 } 433 434 /* 435 * sys_futex_requeue - Requeue a waiter from one futex to another 436 * @waiters: array describing the source and destination futex 437 * @flags: unused 438 * @nr_wake: number of futexes to wake 439 * @nr_requeue: number of futexes to requeue 440 * 441 * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the 442 * futex2 family of calls. 443 */ 444 445 SYSCALL_DEFINE4(futex_requeue, 446 struct futex_waitv __user *, waiters, 447 unsigned int, flags, 448 int, nr_wake, 449 int, nr_requeue) 450 { 451 struct futex_vector futexes[2]; 452 u32 cmpval; 453 int ret; 454 455 if (flags) 456 return -EINVAL; 457 458 if (!waiters) 459 return -EINVAL; 460 461 ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL); 462 if (ret) 463 return ret; 464 465 /* 466 * For now mandate both flags are identical, like the sys_futex() 467 * interface has. If/when we merge the variable sized futex support, 468 * that patch can modify this test to allow a difference in size. 469 */ 470 if (futexes[0].w.flags != futexes[1].w.flags) 471 return -EINVAL; 472 473 cmpval = futexes[0].w.val; 474 475 return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, 476 u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, 477 nr_wake, nr_requeue, &cmpval, 0); 478 } 479 480 #ifdef CONFIG_COMPAT 481 COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, 482 compat_size_t, len) 483 { 484 if (unlikely(len != sizeof(*head))) 485 return -EINVAL; 486 487 current->futex.compat_robust_list = head; 488 return 0; 489 } 490 491 COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, 492 compat_uptr_t __user *, head_ptr, 493 compat_size_t __user *, len_ptr) 494 { 495 struct compat_robust_list_head __user *head = futex_get_robust_list_common(pid, true); 496 497 if (IS_ERR(head)) 498 return PTR_ERR(head); 499 500 if (put_user(sizeof(*head), len_ptr)) 501 return -EFAULT; 502 return put_user(ptr_to_compat(head), head_ptr); 503 } 504 #endif /* CONFIG_COMPAT */ 505 506 #ifdef CONFIG_COMPAT_32BIT_TIME 507 SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, 508 const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, 509 u32, val3) 510 { 511 int ret, cmd = op & FUTEX_CMD_MASK; 512 ktime_t t, *tp = NULL; 513 struct timespec64 ts; 514 515 if (utime && futex_cmd_has_timeout(cmd)) { 516 if (get_old_timespec32(&ts, utime)) 517 return -EFAULT; 518 ret = futex_init_timeout(cmd, op, &ts, &t); 519 if (ret) 520 return ret; 521 tp = &t; 522 } 523 524 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); 525 } 526 #endif /* CONFIG_COMPAT_32BIT_TIME */ 527