1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _FUTEX_H 3 #define _FUTEX_H 4 5 #include <linux/futex.h> 6 #include <linux/rtmutex.h> 7 #include <linux/sched/wake_q.h> 8 #include <linux/compat.h> 9 #include <linux/uaccess.h> 10 #include <linux/cleanup.h> 11 12 #ifdef CONFIG_PREEMPT_RT 13 #include <linux/rcuwait.h> 14 #endif 15 16 #include <asm/futex.h> 17 18 /* 19 * Futex flags used to encode options to functions and preserve them across 20 * restarts. 21 */ 22 #define FLAGS_SIZE_8 0x0000 23 #define FLAGS_SIZE_16 0x0001 24 #define FLAGS_SIZE_32 0x0002 25 #define FLAGS_SIZE_64 0x0003 26 27 #define FLAGS_SIZE_MASK 0x0003 28 29 #ifdef CONFIG_MMU 30 # define FLAGS_SHARED 0x0010 31 #else 32 /* 33 * NOMMU does not have per process address space. Let the compiler optimize 34 * code away. 35 */ 36 # define FLAGS_SHARED 0x0000 37 #endif 38 #define FLAGS_CLOCKRT 0x0020 39 #define FLAGS_HAS_TIMEOUT 0x0040 40 #define FLAGS_NUMA 0x0080 41 #define FLAGS_STRICT 0x0100 42 43 /* FUTEX_ to FLAGS_ */ 44 static inline unsigned int futex_to_flags(unsigned int op) 45 { 46 unsigned int flags = FLAGS_SIZE_32; 47 48 if (!(op & FUTEX_PRIVATE_FLAG)) 49 flags |= FLAGS_SHARED; 50 51 if (op & FUTEX_CLOCK_REALTIME) 52 flags |= FLAGS_CLOCKRT; 53 54 return flags; 55 } 56 57 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_PRIVATE) 58 59 /* FUTEX2_ to FLAGS_ */ 60 static inline unsigned int futex2_to_flags(unsigned int flags2) 61 { 62 unsigned int flags = flags2 & FUTEX2_SIZE_MASK; 63 64 if (!(flags2 & FUTEX2_PRIVATE)) 65 flags |= FLAGS_SHARED; 66 67 if (flags2 & FUTEX2_NUMA) 68 flags |= FLAGS_NUMA; 69 70 return flags; 71 } 72 73 static inline unsigned int futex_size(unsigned int flags) 74 { 75 return 1 << (flags & FLAGS_SIZE_MASK); 76 } 77 78 static inline bool futex_flags_valid(unsigned int flags) 79 { 80 /* Only 64bit futexes for 64bit code */ 81 if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { 82 if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64) 83 return false; 84 } 85 86 /* Only 32bit futexes are implemented -- for now */ 87 if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) 88 return false; 89 90 /* 91 * Must be able to represent both FUTEX_NO_NODE and every valid nodeid 92 * in a futex word. 93 */ 94 if (flags & FLAGS_NUMA) { 95 int bits = 8 * futex_size(flags); 96 u64 max = ~0ULL; 97 98 max >>= 64 - bits; 99 if (nr_node_ids >= max) 100 return false; 101 } 102 103 return true; 104 } 105 106 static inline bool futex_validate_input(unsigned int flags, u64 val) 107 { 108 int bits = 8 * futex_size(flags); 109 110 if (bits < 64 && (val >> bits)) 111 return false; 112 113 return true; 114 } 115 116 #ifdef CONFIG_FAIL_FUTEX 117 extern bool should_fail_futex(bool fshared); 118 #else 119 static inline bool should_fail_futex(bool fshared) 120 { 121 return false; 122 } 123 #endif 124 125 /* 126 * Hash buckets are shared by all the futex_keys that hash to the same 127 * location. Each key may have multiple futex_q structures, one for each task 128 * waiting on a futex. 129 */ 130 struct futex_hash_bucket { 131 atomic_t waiters; 132 spinlock_t lock; 133 struct plist_head chain; 134 struct futex_private_hash *priv; 135 } ____cacheline_aligned_in_smp; 136 137 /* 138 * Priority Inheritance state: 139 */ 140 struct futex_pi_state { 141 /* 142 * list of 'owned' pi_state instances - these have to be 143 * cleaned up in do_exit() if the task exits prematurely: 144 */ 145 struct list_head list; 146 147 /* 148 * The PI object: 149 */ 150 struct rt_mutex_base pi_mutex; 151 152 struct task_struct *owner; 153 refcount_t refcount; 154 155 union futex_key key; 156 } __randomize_layout; 157 158 struct futex_q; 159 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q); 160 161 /** 162 * struct futex_q - The hashed futex queue entry, one per waiting task 163 * @list: priority-sorted list of tasks waiting on this futex 164 * @task: the task waiting on the futex 165 * @lock_ptr: the hash bucket lock 166 * @wake: the wake handler for this queue 167 * @wake_data: data associated with the wake handler 168 * @key: the key the futex is hashed on 169 * @pi_state: optional priority inheritance state 170 * @rt_waiter: rt_waiter storage for use with requeue_pi 171 * @requeue_pi_key: the requeue_pi target futex key 172 * @bitset: bitset for the optional bitmasked wakeup 173 * @requeue_state: State field for futex_requeue_pi() 174 * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) 175 * 176 * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so 177 * we can wake only the relevant ones (hashed queues may be shared). 178 * 179 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 180 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 181 * The order of wakeup is always to make the first condition true, then 182 * the second. 183 * 184 * PI futexes are typically woken before they are removed from the hash list via 185 * the rt_mutex code. See futex_unqueue_pi(). 186 */ 187 struct futex_q { 188 struct plist_node list; 189 190 struct task_struct *task; 191 spinlock_t *lock_ptr; 192 futex_wake_fn *wake; 193 void *wake_data; 194 union futex_key key; 195 struct futex_pi_state *pi_state; 196 struct rt_mutex_waiter *rt_waiter; 197 union futex_key *requeue_pi_key; 198 u32 bitset; 199 atomic_t requeue_state; 200 bool drop_hb_ref; 201 #ifdef CONFIG_PREEMPT_RT 202 struct rcuwait requeue_wait; 203 #endif 204 } __randomize_layout; 205 206 extern const struct futex_q futex_q_init; 207 208 enum futex_access { 209 FUTEX_READ, 210 FUTEX_WRITE 211 }; 212 213 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, 214 enum futex_access rw); 215 extern void futex_q_lockptr_lock(struct futex_q *q); 216 extern struct hrtimer_sleeper * 217 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, 218 int flags, u64 range_ns); 219 220 extern struct futex_hash_bucket *futex_hash(union futex_key *key); 221 #ifdef CONFIG_FUTEX_PRIVATE_HASH 222 extern void futex_hash_get(struct futex_hash_bucket *hb); 223 extern void futex_hash_put(struct futex_hash_bucket *hb); 224 225 extern struct futex_private_hash *futex_private_hash(void); 226 extern bool futex_private_hash_get(struct futex_private_hash *fph); 227 extern void futex_private_hash_put(struct futex_private_hash *fph); 228 229 #else /* !CONFIG_FUTEX_PRIVATE_HASH */ 230 static inline void futex_hash_get(struct futex_hash_bucket *hb) { } 231 static inline void futex_hash_put(struct futex_hash_bucket *hb) { } 232 static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } 233 static inline bool futex_private_hash_get(void) { return false; } 234 static inline void futex_private_hash_put(struct futex_private_hash *fph) { } 235 #endif 236 237 DEFINE_CLASS(hb, struct futex_hash_bucket *, 238 if (_T) futex_hash_put(_T), 239 futex_hash(key), union futex_key *key); 240 241 DEFINE_CLASS(private_hash, struct futex_private_hash *, 242 if (_T) futex_private_hash_put(_T), 243 futex_private_hash(), void); 244 245 /** 246 * futex_match - Check whether two futex keys are equal 247 * @key1: Pointer to key1 248 * @key2: Pointer to key2 249 * 250 * Return 1 if two futex_keys are equal, 0 otherwise. 251 */ 252 static inline int futex_match(union futex_key *key1, union futex_key *key2) 253 { 254 return (key1 && key2 255 && key1->both.word == key2->both.word 256 && key1->both.ptr == key2->both.ptr 257 && key1->both.offset == key2->both.offset); 258 } 259 260 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, 261 struct futex_q *q, union futex_key *key2, 262 struct task_struct *task); 263 extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout); 264 extern bool __futex_wake_mark(struct futex_q *q); 265 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q); 266 267 extern int fault_in_user_writeable(u32 __user *uaddr); 268 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key); 269 270 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) 271 { 272 int ret; 273 274 pagefault_disable(); 275 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); 276 pagefault_enable(); 277 278 return ret; 279 } 280 281 /* 282 * This does a plain atomic user space read, and the user pointer has 283 * already been verified earlier by get_futex_key() to be both aligned 284 * and actually in user space, just like futex_atomic_cmpxchg_inatomic(). 285 * 286 * We still want to avoid any speculation, and while __get_user() is 287 * the traditional model for this, it's actually slower than doing 288 * this manually these days. 289 * 290 * We could just have a per-architecture special function for it, 291 * the same way we do futex_atomic_cmpxchg_inatomic(), but rather 292 * than force everybody to do that, write it out long-hand using 293 * the low-level user-access infrastructure. 294 * 295 * This looks a bit overkill, but generally just results in a couple 296 * of instructions. 297 */ 298 static __always_inline int futex_get_value(u32 *dest, u32 __user *from) 299 { 300 u32 val; 301 302 if (can_do_masked_user_access()) 303 from = masked_user_access_begin(from); 304 else if (!user_read_access_begin(from, sizeof(*from))) 305 return -EFAULT; 306 unsafe_get_user(val, from, Efault); 307 user_read_access_end(); 308 *dest = val; 309 return 0; 310 Efault: 311 user_read_access_end(); 312 return -EFAULT; 313 } 314 315 static __always_inline int futex_put_value(u32 val, u32 __user *to) 316 { 317 if (can_do_masked_user_access()) 318 to = masked_user_access_begin(to); 319 else if (!user_read_access_begin(to, sizeof(*to))) 320 return -EFAULT; 321 unsafe_put_user(val, to, Efault); 322 user_read_access_end(); 323 return 0; 324 Efault: 325 user_read_access_end(); 326 return -EFAULT; 327 } 328 329 static inline int futex_get_value_locked(u32 *dest, u32 __user *from) 330 { 331 int ret; 332 333 pagefault_disable(); 334 ret = futex_get_value(dest, from); 335 pagefault_enable(); 336 337 return ret; 338 } 339 340 extern void __futex_unqueue(struct futex_q *q); 341 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 342 struct task_struct *task); 343 extern int futex_unqueue(struct futex_q *q); 344 345 /** 346 * futex_queue() - Enqueue the futex_q on the futex_hash_bucket 347 * @q: The futex_q to enqueue 348 * @hb: The destination hash bucket 349 * @task: Task queueing this futex 350 * 351 * The hb->lock must be held by the caller, and is released here. A call to 352 * futex_queue() is typically paired with exactly one call to futex_unqueue(). The 353 * exceptions involve the PI related operations, which may use futex_unqueue_pi() 354 * or nothing if the unqueue is done as part of the wake process and the unqueue 355 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for 356 * an example). 357 * 358 * Note that @task may be NULL, for async usage of futexes. 359 */ 360 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, 361 struct task_struct *task) 362 __releases(&hb->lock) 363 { 364 __futex_queue(q, hb, task); 365 spin_unlock(&hb->lock); 366 } 367 368 extern void futex_unqueue_pi(struct futex_q *q); 369 370 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting); 371 372 /* 373 * Reflects a new waiter being added to the waitqueue. 374 */ 375 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb) 376 { 377 #ifdef CONFIG_SMP 378 atomic_inc(&hb->waiters); 379 /* 380 * Full barrier (A), see the ordering comment above. 381 */ 382 smp_mb__after_atomic(); 383 #endif 384 } 385 386 /* 387 * Reflects a waiter being removed from the waitqueue by wakeup 388 * paths. 389 */ 390 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb) 391 { 392 #ifdef CONFIG_SMP 393 atomic_dec(&hb->waiters); 394 #endif 395 } 396 397 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb) 398 { 399 #ifdef CONFIG_SMP 400 /* 401 * Full barrier (B), see the ordering comment above. 402 */ 403 smp_mb(); 404 return atomic_read(&hb->waiters); 405 #else 406 return 1; 407 #endif 408 } 409 410 extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb); 411 extern void futex_q_unlock(struct futex_hash_bucket *hb); 412 413 414 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, 415 union futex_key *key, 416 struct futex_pi_state **ps, 417 struct task_struct *task, 418 struct task_struct **exiting, 419 int set_waiters); 420 421 extern int refill_pi_state_cache(void); 422 extern void get_pi_state(struct futex_pi_state *pi_state); 423 extern void put_pi_state(struct futex_pi_state *pi_state); 424 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked); 425 426 /* 427 * Express the locking dependencies for lockdep: 428 */ 429 static inline void 430 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) 431 { 432 if (hb1 > hb2) 433 swap(hb1, hb2); 434 435 spin_lock(&hb1->lock); 436 if (hb1 != hb2) 437 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); 438 } 439 440 static inline void 441 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) 442 { 443 spin_unlock(&hb1->lock); 444 if (hb1 != hb2) 445 spin_unlock(&hb2->lock); 446 } 447 448 /* syscalls */ 449 450 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 451 val, ktime_t *abs_time, u32 bitset, u32 __user 452 *uaddr2); 453 454 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1, 455 u32 __user *uaddr2, unsigned int flags2, 456 int nr_wake, int nr_requeue, 457 u32 *cmpval, int requeue_pi); 458 459 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 460 struct hrtimer_sleeper *to, u32 bitset); 461 462 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 463 ktime_t *abs_time, u32 bitset); 464 465 /** 466 * struct futex_vector - Auxiliary struct for futex_waitv() 467 * @w: Userspace provided data 468 * @q: Kernel side data 469 * 470 * Struct used to build an array with all data need for futex_waitv() 471 */ 472 struct futex_vector { 473 struct futex_waitv w; 474 struct futex_q q; 475 }; 476 477 extern int futex_parse_waitv(struct futex_vector *futexv, 478 struct futex_waitv __user *uwaitv, 479 unsigned int nr_futexes, futex_wake_fn *wake, 480 void *wake_data); 481 482 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count, 483 int *woken); 484 485 extern int futex_unqueue_multiple(struct futex_vector *v, int count); 486 487 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count, 488 struct hrtimer_sleeper *to); 489 490 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset); 491 492 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags, 493 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op); 494 495 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags); 496 497 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock); 498 499 #endif /* _FUTEX_H */ 500