1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _FUTEX_H 3 #define _FUTEX_H 4 5 #include <linux/futex.h> 6 #include <linux/rtmutex.h> 7 #include <linux/sched/wake_q.h> 8 #include <linux/compat.h> 9 #include <linux/uaccess.h> 10 11 #ifdef CONFIG_PREEMPT_RT 12 #include <linux/rcuwait.h> 13 #endif 14 15 #include <asm/futex.h> 16 17 /* 18 * Futex flags used to encode options to functions and preserve them across 19 * restarts. 20 */ 21 #define FLAGS_SIZE_8 0x0000 22 #define FLAGS_SIZE_16 0x0001 23 #define FLAGS_SIZE_32 0x0002 24 #define FLAGS_SIZE_64 0x0003 25 26 #define FLAGS_SIZE_MASK 0x0003 27 28 #ifdef CONFIG_MMU 29 # define FLAGS_SHARED 0x0010 30 #else 31 /* 32 * NOMMU does not have per process address space. Let the compiler optimize 33 * code away. 34 */ 35 # define FLAGS_SHARED 0x0000 36 #endif 37 #define FLAGS_CLOCKRT 0x0020 38 #define FLAGS_HAS_TIMEOUT 0x0040 39 #define FLAGS_NUMA 0x0080 40 #define FLAGS_STRICT 0x0100 41 42 /* FUTEX_ to FLAGS_ */ 43 static inline unsigned int futex_to_flags(unsigned int op) 44 { 45 unsigned int flags = FLAGS_SIZE_32; 46 47 if (!(op & FUTEX_PRIVATE_FLAG)) 48 flags |= FLAGS_SHARED; 49 50 if (op & FUTEX_CLOCK_REALTIME) 51 flags |= FLAGS_CLOCKRT; 52 53 return flags; 54 } 55 56 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE) 57 58 /* FUTEX2_ to FLAGS_ */ 59 static inline unsigned int futex2_to_flags(unsigned int flags2) 60 { 61 unsigned int flags = flags2 & FUTEX2_SIZE_MASK; 62 63 if (!(flags2 & FUTEX2_PRIVATE)) 64 flags |= FLAGS_SHARED; 65 66 if (flags2 & FUTEX2_NUMA) 67 flags |= FLAGS_NUMA; 68 69 return flags; 70 } 71 72 static inline unsigned int futex_size(unsigned int flags) 73 { 74 return 1 << (flags & FLAGS_SIZE_MASK); 75 } 76 77 static inline bool futex_flags_valid(unsigned int flags) 78 { 79 /* Only 64bit futexes for 64bit code */ 80 if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { 81 if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64) 82 return false; 83 } 84 85 /* Only 32bit futexes are implemented -- for now */ 86 if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) 87 return false; 88 89 return true; 90 } 91 92 static inline bool futex_validate_input(unsigned int flags, u64 val) 93 { 94 int bits = 8 * futex_size(flags); 95 96 if (bits < 64 && (val >> bits)) 97 return false; 98 99 return true; 100 } 101 102 #ifdef CONFIG_FAIL_FUTEX 103 extern bool should_fail_futex(bool fshared); 104 #else 105 static inline bool should_fail_futex(bool fshared) 106 { 107 return false; 108 } 109 #endif 110 111 /* 112 * Hash buckets are shared by all the futex_keys that hash to the same 113 * location. Each key may have multiple futex_q structures, one for each task 114 * waiting on a futex. 115 */ 116 struct futex_hash_bucket { 117 atomic_t waiters; 118 spinlock_t lock; 119 struct plist_head chain; 120 } ____cacheline_aligned_in_smp; 121 122 /* 123 * Priority Inheritance state: 124 */ 125 struct futex_pi_state { 126 /* 127 * list of 'owned' pi_state instances - these have to be 128 * cleaned up in do_exit() if the task exits prematurely: 129 */ 130 struct list_head list; 131 132 /* 133 * The PI object: 134 */ 135 struct rt_mutex_base pi_mutex; 136 137 struct task_struct *owner; 138 refcount_t refcount; 139 140 union futex_key key; 141 } __randomize_layout; 142 143 struct futex_q; 144 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q); 145 146 /** 147 * struct futex_q - The hashed futex queue entry, one per waiting task 148 * @list: priority-sorted list of tasks waiting on this futex 149 * @task: the task waiting on the futex 150 * @lock_ptr: the hash bucket lock 151 * @wake: the wake handler for this queue 152 * @wake_data: data associated with the wake handler 153 * @key: the key the futex is hashed on 154 * @pi_state: optional priority inheritance state 155 * @rt_waiter: rt_waiter storage for use with requeue_pi 156 * @requeue_pi_key: the requeue_pi target futex key 157 * @bitset: bitset for the optional bitmasked wakeup 158 * @requeue_state: State field for futex_requeue_pi() 159 * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) 160 * 161 * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so 162 * we can wake only the relevant ones (hashed queues may be shared). 163 * 164 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 165 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 166 * The order of wakeup is always to make the first condition true, then 167 * the second. 168 * 169 * PI futexes are typically woken before they are removed from the hash list via 170 * the rt_mutex code. See futex_unqueue_pi(). 171 */ 172 struct futex_q { 173 struct plist_node list; 174 175 struct task_struct *task; 176 spinlock_t *lock_ptr; 177 futex_wake_fn *wake; 178 void *wake_data; 179 union futex_key key; 180 struct futex_pi_state *pi_state; 181 struct rt_mutex_waiter *rt_waiter; 182 union futex_key *requeue_pi_key; 183 u32 bitset; 184 atomic_t requeue_state; 185 #ifdef CONFIG_PREEMPT_RT 186 struct rcuwait requeue_wait; 187 #endif 188 } __randomize_layout; 189 190 extern const struct futex_q futex_q_init; 191 192 enum futex_access { 193 FUTEX_READ, 194 FUTEX_WRITE 195 }; 196 197 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, 198 enum futex_access rw); 199 200 extern struct hrtimer_sleeper * 201 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, 202 int flags, u64 range_ns); 203 204 extern struct futex_hash_bucket *futex_hash(union futex_key *key); 205 206 /** 207 * futex_match - Check whether two futex keys are equal 208 * @key1: Pointer to key1 209 * @key2: Pointer to key2 210 * 211 * Return 1 if two futex_keys are equal, 0 otherwise. 212 */ 213 static inline int futex_match(union futex_key *key1, union futex_key *key2) 214 { 215 return (key1 && key2 216 && key1->both.word == key2->both.word 217 && key1->both.ptr == key2->both.ptr 218 && key1->both.offset == key2->both.offset); 219 } 220 221 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, 222 struct futex_q *q, struct futex_hash_bucket **hb); 223 extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, 224 struct hrtimer_sleeper *timeout); 225 extern bool __futex_wake_mark(struct futex_q *q); 226 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q); 227 228 extern int fault_in_user_writeable(u32 __user *uaddr); 229 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key); 230 231 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval) 232 { 233 int ret; 234 235 pagefault_disable(); 236 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); 237 pagefault_enable(); 238 239 return ret; 240 } 241 242 /* 243 * This does a plain atomic user space read, and the user pointer has 244 * already been verified earlier by get_futex_key() to be both aligned 245 * and actually in user space, just like futex_atomic_cmpxchg_inatomic(). 246 * 247 * We still want to avoid any speculation, and while __get_user() is 248 * the traditional model for this, it's actually slower than doing 249 * this manually these days. 250 * 251 * We could just have a per-architecture special function for it, 252 * the same way we do futex_atomic_cmpxchg_inatomic(), but rather 253 * than force everybody to do that, write it out long-hand using 254 * the low-level user-access infrastructure. 255 * 256 * This looks a bit overkill, but generally just results in a couple 257 * of instructions. 258 */ 259 static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from) 260 { 261 u32 val; 262 263 if (can_do_masked_user_access()) 264 from = masked_user_access_begin(from); 265 else if (!user_read_access_begin(from, sizeof(*from))) 266 return -EFAULT; 267 unsafe_get_user(val, from, Efault); 268 user_access_end(); 269 *dest = val; 270 return 0; 271 Efault: 272 user_access_end(); 273 return -EFAULT; 274 } 275 276 static inline int futex_get_value_locked(u32 *dest, u32 __user *from) 277 { 278 int ret; 279 280 pagefault_disable(); 281 ret = futex_read_inatomic(dest, from); 282 pagefault_enable(); 283 284 return ret; 285 } 286 287 extern void __futex_unqueue(struct futex_q *q); 288 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); 289 extern int futex_unqueue(struct futex_q *q); 290 291 /** 292 * futex_queue() - Enqueue the futex_q on the futex_hash_bucket 293 * @q: The futex_q to enqueue 294 * @hb: The destination hash bucket 295 * 296 * The hb->lock must be held by the caller, and is released here. A call to 297 * futex_queue() is typically paired with exactly one call to futex_unqueue(). The 298 * exceptions involve the PI related operations, which may use futex_unqueue_pi() 299 * or nothing if the unqueue is done as part of the wake process and the unqueue 300 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for 301 * an example). 302 */ 303 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) 304 __releases(&hb->lock) 305 { 306 __futex_queue(q, hb); 307 spin_unlock(&hb->lock); 308 } 309 310 extern void futex_unqueue_pi(struct futex_q *q); 311 312 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting); 313 314 /* 315 * Reflects a new waiter being added to the waitqueue. 316 */ 317 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb) 318 { 319 #ifdef CONFIG_SMP 320 atomic_inc(&hb->waiters); 321 /* 322 * Full barrier (A), see the ordering comment above. 323 */ 324 smp_mb__after_atomic(); 325 #endif 326 } 327 328 /* 329 * Reflects a waiter being removed from the waitqueue by wakeup 330 * paths. 331 */ 332 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb) 333 { 334 #ifdef CONFIG_SMP 335 atomic_dec(&hb->waiters); 336 #endif 337 } 338 339 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb) 340 { 341 #ifdef CONFIG_SMP 342 /* 343 * Full barrier (B), see the ordering comment above. 344 */ 345 smp_mb(); 346 return atomic_read(&hb->waiters); 347 #else 348 return 1; 349 #endif 350 } 351 352 extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q); 353 extern void futex_q_unlock(struct futex_hash_bucket *hb); 354 355 356 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, 357 union futex_key *key, 358 struct futex_pi_state **ps, 359 struct task_struct *task, 360 struct task_struct **exiting, 361 int set_waiters); 362 363 extern int refill_pi_state_cache(void); 364 extern void get_pi_state(struct futex_pi_state *pi_state); 365 extern void put_pi_state(struct futex_pi_state *pi_state); 366 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked); 367 368 /* 369 * Express the locking dependencies for lockdep: 370 */ 371 static inline void 372 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) 373 { 374 if (hb1 > hb2) 375 swap(hb1, hb2); 376 377 spin_lock(&hb1->lock); 378 if (hb1 != hb2) 379 spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); 380 } 381 382 static inline void 383 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) 384 { 385 spin_unlock(&hb1->lock); 386 if (hb1 != hb2) 387 spin_unlock(&hb2->lock); 388 } 389 390 /* syscalls */ 391 392 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 393 val, ktime_t *abs_time, u32 bitset, u32 __user 394 *uaddr2); 395 396 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1, 397 u32 __user *uaddr2, unsigned int flags2, 398 int nr_wake, int nr_requeue, 399 u32 *cmpval, int requeue_pi); 400 401 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 402 struct hrtimer_sleeper *to, u32 bitset); 403 404 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, 405 ktime_t *abs_time, u32 bitset); 406 407 /** 408 * struct futex_vector - Auxiliary struct for futex_waitv() 409 * @w: Userspace provided data 410 * @q: Kernel side data 411 * 412 * Struct used to build an array with all data need for futex_waitv() 413 */ 414 struct futex_vector { 415 struct futex_waitv w; 416 struct futex_q q; 417 }; 418 419 extern int futex_parse_waitv(struct futex_vector *futexv, 420 struct futex_waitv __user *uwaitv, 421 unsigned int nr_futexes, futex_wake_fn *wake, 422 void *wake_data); 423 424 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count, 425 int *woken); 426 427 extern int futex_unqueue_multiple(struct futex_vector *v, int count); 428 429 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count, 430 struct hrtimer_sleeper *to); 431 432 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset); 433 434 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags, 435 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op); 436 437 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags); 438 439 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock); 440 441 #endif /* _FUTEX_H */ 442