1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 #include <linux/syscalls.h> 4 #include <linux/time_namespace.h> 5 6 #include "futex.h" 7 8 /* 9 * Support for robust futexes: the kernel cleans up held futexes at 10 * thread exit time. 11 * 12 * Implementation: user-space maintains a per-thread list of locks it 13 * is holding. Upon do_exit(), the kernel carefully walks this list, 14 * and marks all locks that are owned by this thread with the 15 * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is 16 * always manipulated with the lock held, so the list is private and 17 * per-thread. Userspace also maintains a per-thread 'list_op_pending' 18 * field, to allow the kernel to clean up if the thread dies after 19 * acquiring the lock, but just before it could have added itself to 20 * the list. There can only be one such pending lock. 21 */ 22 23 /** 24 * sys_set_robust_list() - Set the robust-futex list head of a task 25 * @head: pointer to the list-head 26 * @len: length of the list-head, as userspace expects 27 */ 28 SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, 29 size_t, len) 30 { 31 /* 32 * The kernel knows only one size for now: 33 */ 34 if (unlikely(len != sizeof(*head))) 35 return -EINVAL; 36 37 current->robust_list = head; 38 39 return 0; 40 } 41 42 static inline void __user *futex_task_robust_list(struct task_struct *p, bool compat) 43 { 44 #ifdef CONFIG_COMPAT 45 if (compat) 46 return p->compat_robust_list; 47 #endif 48 return p->robust_list; 49 } 50 51 static void __user *futex_get_robust_list_common(int pid, bool compat) 52 { 53 struct task_struct *p = current; 54 void __user *head; 55 int ret; 56 57 scoped_guard(rcu) { 58 if (pid) { 59 p = find_task_by_vpid(pid); 60 if (!p) 61 return (void __user *)ERR_PTR(-ESRCH); 62 } 63 get_task_struct(p); 64 } 65 66 /* 67 * Hold exec_update_lock to serialize with concurrent exec() 68 * so ptrace_may_access() is checked against stable credentials 69 */ 70 ret = down_read_killable(&p->signal->exec_update_lock); 71 if (ret) 72 goto err_put; 73 74 ret = -EPERM; 75 if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) 76 goto err_unlock; 77 78 head = futex_task_robust_list(p, compat); 79 80 up_read(&p->signal->exec_update_lock); 81 put_task_struct(p); 82 83 return head; 84 85 err_unlock: 86 up_read(&p->signal->exec_update_lock); 87 err_put: 88 put_task_struct(p); 89 return (void __user *)ERR_PTR(ret); 90 } 91 92 /** 93 * sys_get_robust_list() - Get the robust-futex list head of a task 94 * @pid: pid of the process [zero for current task] 95 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 96 * @len_ptr: pointer to a length field, the kernel fills in the header size 97 */ 98 SYSCALL_DEFINE3(get_robust_list, int, pid, 99 struct robust_list_head __user * __user *, head_ptr, 100 size_t __user *, len_ptr) 101 { 102 struct robust_list_head __user *head = futex_get_robust_list_common(pid, false); 103 104 if (IS_ERR(head)) 105 return PTR_ERR(head); 106 107 if (put_user(sizeof(*head), len_ptr)) 108 return -EFAULT; 109 return put_user(head, head_ptr); 110 } 111 112 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 113 u32 __user *uaddr2, u32 val2, u32 val3) 114 { 115 unsigned int flags = futex_to_flags(op); 116 int cmd = op & FUTEX_CMD_MASK; 117 118 if (flags & FLAGS_CLOCKRT) { 119 if (cmd != FUTEX_WAIT_BITSET && 120 cmd != FUTEX_WAIT_REQUEUE_PI && 121 cmd != FUTEX_LOCK_PI2) 122 return -ENOSYS; 123 } 124 125 switch (cmd) { 126 case FUTEX_WAIT: 127 val3 = FUTEX_BITSET_MATCH_ANY; 128 fallthrough; 129 case FUTEX_WAIT_BITSET: 130 return futex_wait(uaddr, flags, val, timeout, val3); 131 case FUTEX_WAKE: 132 val3 = FUTEX_BITSET_MATCH_ANY; 133 fallthrough; 134 case FUTEX_WAKE_BITSET: 135 return futex_wake(uaddr, flags, val, val3); 136 case FUTEX_REQUEUE: 137 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); 138 case FUTEX_CMP_REQUEUE: 139 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0); 140 case FUTEX_WAKE_OP: 141 return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); 142 case FUTEX_LOCK_PI: 143 flags |= FLAGS_CLOCKRT; 144 fallthrough; 145 case FUTEX_LOCK_PI2: 146 return futex_lock_pi(uaddr, flags, timeout, 0); 147 case FUTEX_UNLOCK_PI: 148 return futex_unlock_pi(uaddr, flags); 149 case FUTEX_TRYLOCK_PI: 150 return futex_lock_pi(uaddr, flags, NULL, 1); 151 case FUTEX_WAIT_REQUEUE_PI: 152 val3 = FUTEX_BITSET_MATCH_ANY; 153 return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, 154 uaddr2); 155 case FUTEX_CMP_REQUEUE_PI: 156 return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1); 157 } 158 return -ENOSYS; 159 } 160 161 static __always_inline bool futex_cmd_has_timeout(u32 cmd) 162 { 163 switch (cmd) { 164 case FUTEX_WAIT: 165 case FUTEX_LOCK_PI: 166 case FUTEX_LOCK_PI2: 167 case FUTEX_WAIT_BITSET: 168 case FUTEX_WAIT_REQUEUE_PI: 169 return true; 170 } 171 return false; 172 } 173 174 static __always_inline int 175 futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) 176 { 177 if (!timespec64_valid(ts)) 178 return -EINVAL; 179 180 *t = timespec64_to_ktime(*ts); 181 if (cmd == FUTEX_WAIT) 182 *t = ktime_add_safe(ktime_get(), *t); 183 else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) 184 *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); 185 return 0; 186 } 187 188 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, 189 const struct __kernel_timespec __user *, utime, 190 u32 __user *, uaddr2, u32, val3) 191 { 192 int ret, cmd = op & FUTEX_CMD_MASK; 193 ktime_t t, *tp = NULL; 194 struct timespec64 ts; 195 196 if (utime && futex_cmd_has_timeout(cmd)) { 197 if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) 198 return -EFAULT; 199 if (get_timespec64(&ts, utime)) 200 return -EFAULT; 201 ret = futex_init_timeout(cmd, op, &ts, &t); 202 if (ret) 203 return ret; 204 tp = &t; 205 } 206 207 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); 208 } 209 210 /** 211 * futex_parse_waitv - Parse a waitv array from userspace 212 * @futexv: Kernel side list of waiters to be filled 213 * @uwaitv: Userspace list to be parsed 214 * @nr_futexes: Length of futexv 215 * @wake: Wake to call when futex is woken 216 * @wake_data: Data for the wake handler 217 * 218 * Return: Error code on failure, 0 on success 219 */ 220 int futex_parse_waitv(struct futex_vector *futexv, 221 struct futex_waitv __user *uwaitv, 222 unsigned int nr_futexes, futex_wake_fn *wake, 223 void *wake_data) 224 { 225 struct futex_waitv aux; 226 unsigned int i; 227 228 for (i = 0; i < nr_futexes; i++) { 229 unsigned int flags; 230 231 if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) 232 return -EFAULT; 233 234 if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) 235 return -EINVAL; 236 237 flags = futex2_to_flags(aux.flags); 238 if (!futex_flags_valid(flags)) 239 return -EINVAL; 240 241 if (!futex_validate_input(flags, aux.val)) 242 return -EINVAL; 243 244 futexv[i].w.flags = flags; 245 futexv[i].w.val = aux.val; 246 futexv[i].w.uaddr = aux.uaddr; 247 futexv[i].q = futex_q_init; 248 futexv[i].q.wake = wake; 249 futexv[i].q.wake_data = wake_data; 250 } 251 252 return 0; 253 } 254 255 static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, 256 clockid_t clockid, struct hrtimer_sleeper *to) 257 { 258 int flag_clkid = 0, flag_init = 0; 259 struct timespec64 ts; 260 ktime_t time; 261 int ret; 262 263 if (!timeout) 264 return 0; 265 266 if (clockid == CLOCK_REALTIME) { 267 flag_clkid = FLAGS_CLOCKRT; 268 flag_init = FUTEX_CLOCK_REALTIME; 269 } 270 271 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 272 return -EINVAL; 273 274 if (get_timespec64(&ts, timeout)) 275 return -EFAULT; 276 277 /* 278 * Since there's no opcode for futex_waitv, use 279 * FUTEX_WAIT_BITSET that uses absolute timeout as well 280 */ 281 ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); 282 if (ret) 283 return ret; 284 285 futex_setup_timer(&time, to, flag_clkid, 0); 286 return 0; 287 } 288 289 static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) 290 { 291 hrtimer_cancel(&to->timer); 292 destroy_hrtimer_on_stack(&to->timer); 293 } 294 295 /** 296 * sys_futex_waitv - Wait on a list of futexes 297 * @waiters: List of futexes to wait on 298 * @nr_futexes: Length of futexv 299 * @flags: Flag for timeout (monotonic/realtime) 300 * @timeout: Optional absolute timeout. 301 * @clockid: Clock to be used for the timeout, realtime or monotonic. 302 * 303 * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes 304 * if a futex_wake() is performed at any uaddr. The syscall returns immediately 305 * if any waiter has *uaddr != val. *timeout is an optional timeout value for 306 * the operation. Each waiter has individual flags. The `flags` argument for 307 * the syscall should be used solely for specifying the timeout as realtime, if 308 * needed. Flags for private futexes, sizes, etc. should be used on the 309 * individual flags of each waiter. 310 * 311 * Returns the array index of one of the woken futexes. No further information 312 * is provided: any number of other futexes may also have been woken by the 313 * same event, and if more than one futex was woken, the retrned index may 314 * refer to any one of them. (It is not necessaryily the futex with the 315 * smallest index, nor the one most recently woken, nor...) 316 */ 317 318 SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, 319 unsigned int, nr_futexes, unsigned int, flags, 320 struct __kernel_timespec __user *, timeout, clockid_t, clockid) 321 { 322 struct hrtimer_sleeper to; 323 struct futex_vector *futexv; 324 int ret; 325 326 /* This syscall supports no flags for now */ 327 if (flags) 328 return -EINVAL; 329 330 if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) 331 return -EINVAL; 332 333 if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) 334 return ret; 335 336 futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); 337 if (!futexv) { 338 ret = -ENOMEM; 339 goto destroy_timer; 340 } 341 342 ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark, 343 NULL); 344 if (!ret) 345 ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); 346 347 kfree(futexv); 348 349 destroy_timer: 350 if (timeout) 351 futex2_destroy_timeout(&to); 352 return ret; 353 } 354 355 /* 356 * sys_futex_wake - Wake a number of futexes 357 * @uaddr: Address of the futex(es) to wake 358 * @mask: bitmask 359 * @nr: Number of the futexes to wake 360 * @flags: FUTEX2 flags 361 * 362 * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the 363 * futex2 family of calls. 364 */ 365 366 SYSCALL_DEFINE4(futex_wake, 367 void __user *, uaddr, 368 unsigned long, mask, 369 int, nr, 370 unsigned int, flags) 371 { 372 if (flags & ~FUTEX2_VALID_MASK) 373 return -EINVAL; 374 375 flags = futex2_to_flags(flags); 376 if (!futex_flags_valid(flags)) 377 return -EINVAL; 378 379 if (!futex_validate_input(flags, mask)) 380 return -EINVAL; 381 382 return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); 383 } 384 385 /* 386 * sys_futex_wait - Wait on a futex 387 * @uaddr: Address of the futex to wait on 388 * @val: Value of @uaddr 389 * @mask: bitmask 390 * @flags: FUTEX2 flags 391 * @timeout: Optional absolute timeout 392 * @clockid: Clock to be used for the timeout, realtime or monotonic 393 * 394 * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the 395 * futex2 familiy of calls. 396 */ 397 398 SYSCALL_DEFINE6(futex_wait, 399 void __user *, uaddr, 400 unsigned long, val, 401 unsigned long, mask, 402 unsigned int, flags, 403 struct __kernel_timespec __user *, timeout, 404 clockid_t, clockid) 405 { 406 struct hrtimer_sleeper to; 407 int ret; 408 409 if (flags & ~FUTEX2_VALID_MASK) 410 return -EINVAL; 411 412 flags = futex2_to_flags(flags); 413 if (!futex_flags_valid(flags)) 414 return -EINVAL; 415 416 if (!futex_validate_input(flags, val) || 417 !futex_validate_input(flags, mask)) 418 return -EINVAL; 419 420 if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) 421 return ret; 422 423 ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); 424 425 if (timeout) 426 futex2_destroy_timeout(&to); 427 428 return ret; 429 } 430 431 /* 432 * sys_futex_requeue - Requeue a waiter from one futex to another 433 * @waiters: array describing the source and destination futex 434 * @flags: unused 435 * @nr_wake: number of futexes to wake 436 * @nr_requeue: number of futexes to requeue 437 * 438 * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the 439 * futex2 family of calls. 440 */ 441 442 SYSCALL_DEFINE4(futex_requeue, 443 struct futex_waitv __user *, waiters, 444 unsigned int, flags, 445 int, nr_wake, 446 int, nr_requeue) 447 { 448 struct futex_vector futexes[2]; 449 u32 cmpval; 450 int ret; 451 452 if (flags) 453 return -EINVAL; 454 455 if (!waiters) 456 return -EINVAL; 457 458 ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL); 459 if (ret) 460 return ret; 461 462 cmpval = futexes[0].w.val; 463 464 return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, 465 u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, 466 nr_wake, nr_requeue, &cmpval, 0); 467 } 468 469 #ifdef CONFIG_COMPAT 470 COMPAT_SYSCALL_DEFINE2(set_robust_list, 471 struct compat_robust_list_head __user *, head, 472 compat_size_t, len) 473 { 474 if (unlikely(len != sizeof(*head))) 475 return -EINVAL; 476 477 current->compat_robust_list = head; 478 479 return 0; 480 } 481 482 COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, 483 compat_uptr_t __user *, head_ptr, 484 compat_size_t __user *, len_ptr) 485 { 486 struct compat_robust_list_head __user *head = futex_get_robust_list_common(pid, true); 487 488 if (IS_ERR(head)) 489 return PTR_ERR(head); 490 491 if (put_user(sizeof(*head), len_ptr)) 492 return -EFAULT; 493 return put_user(ptr_to_compat(head), head_ptr); 494 } 495 #endif /* CONFIG_COMPAT */ 496 497 #ifdef CONFIG_COMPAT_32BIT_TIME 498 SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, 499 const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, 500 u32, val3) 501 { 502 int ret, cmd = op & FUTEX_CMD_MASK; 503 ktime_t t, *tp = NULL; 504 struct timespec64 ts; 505 506 if (utime && futex_cmd_has_timeout(cmd)) { 507 if (get_old_timespec32(&ts, utime)) 508 return -EFAULT; 509 ret = futex_init_timeout(cmd, op, &ts, &t); 510 if (ret) 511 return ret; 512 tp = &t; 513 } 514 515 return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); 516 } 517 #endif /* CONFIG_COMPAT_32BIT_TIME */ 518 519