xref: /linux/kernel/futex/futex.h (revision fa76887bb72ae11347730271e6a04c147b7527e6)
1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef _FUTEX_H
3  #define _FUTEX_H
4  
5  #include <linux/futex.h>
6  #include <linux/rtmutex.h>
7  #include <linux/sched/wake_q.h>
8  #include <linux/compat.h>
9  #include <linux/uaccess.h>
10  
11  #ifdef CONFIG_PREEMPT_RT
12  #include <linux/rcuwait.h>
13  #endif
14  
15  #include <asm/futex.h>
16  
17  /*
18   * Futex flags used to encode options to functions and preserve them across
19   * restarts.
20   */
21  #define FLAGS_SIZE_8		0x0000
22  #define FLAGS_SIZE_16		0x0001
23  #define FLAGS_SIZE_32		0x0002
24  #define FLAGS_SIZE_64		0x0003
25  
26  #define FLAGS_SIZE_MASK		0x0003
27  
28  #ifdef CONFIG_MMU
29  # define FLAGS_SHARED		0x0010
30  #else
31  /*
32   * NOMMU does not have per process address space. Let the compiler optimize
33   * code away.
34   */
35  # define FLAGS_SHARED		0x0000
36  #endif
37  #define FLAGS_CLOCKRT		0x0020
38  #define FLAGS_HAS_TIMEOUT	0x0040
39  #define FLAGS_NUMA		0x0080
40  #define FLAGS_STRICT		0x0100
41  
42  /* FUTEX_ to FLAGS_ */
futex_to_flags(unsigned int op)43  static inline unsigned int futex_to_flags(unsigned int op)
44  {
45  	unsigned int flags = FLAGS_SIZE_32;
46  
47  	if (!(op & FUTEX_PRIVATE_FLAG))
48  		flags |= FLAGS_SHARED;
49  
50  	if (op & FUTEX_CLOCK_REALTIME)
51  		flags |= FLAGS_CLOCKRT;
52  
53  	return flags;
54  }
55  
56  #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
57  
58  /* FUTEX2_ to FLAGS_ */
futex2_to_flags(unsigned int flags2)59  static inline unsigned int futex2_to_flags(unsigned int flags2)
60  {
61  	unsigned int flags = flags2 & FUTEX2_SIZE_MASK;
62  
63  	if (!(flags2 & FUTEX2_PRIVATE))
64  		flags |= FLAGS_SHARED;
65  
66  	if (flags2 & FUTEX2_NUMA)
67  		flags |= FLAGS_NUMA;
68  
69  	return flags;
70  }
71  
futex_size(unsigned int flags)72  static inline unsigned int futex_size(unsigned int flags)
73  {
74  	return 1 << (flags & FLAGS_SIZE_MASK);
75  }
76  
futex_flags_valid(unsigned int flags)77  static inline bool futex_flags_valid(unsigned int flags)
78  {
79  	/* Only 64bit futexes for 64bit code */
80  	if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) {
81  		if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64)
82  			return false;
83  	}
84  
85  	/* Only 32bit futexes are implemented -- for now */
86  	if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
87  		return false;
88  
89  	return true;
90  }
91  
futex_validate_input(unsigned int flags,u64 val)92  static inline bool futex_validate_input(unsigned int flags, u64 val)
93  {
94  	int bits = 8 * futex_size(flags);
95  
96  	if (bits < 64 && (val >> bits))
97  		return false;
98  
99  	return true;
100  }
101  
102  #ifdef CONFIG_FAIL_FUTEX
103  extern bool should_fail_futex(bool fshared);
104  #else
should_fail_futex(bool fshared)105  static inline bool should_fail_futex(bool fshared)
106  {
107  	return false;
108  }
109  #endif
110  
111  /*
112   * Hash buckets are shared by all the futex_keys that hash to the same
113   * location.  Each key may have multiple futex_q structures, one for each task
114   * waiting on a futex.
115   */
116  struct futex_hash_bucket {
117  	atomic_t waiters;
118  	spinlock_t lock;
119  	struct plist_head chain;
120  } ____cacheline_aligned_in_smp;
121  
122  /*
123   * Priority Inheritance state:
124   */
125  struct futex_pi_state {
126  	/*
127  	 * list of 'owned' pi_state instances - these have to be
128  	 * cleaned up in do_exit() if the task exits prematurely:
129  	 */
130  	struct list_head list;
131  
132  	/*
133  	 * The PI object:
134  	 */
135  	struct rt_mutex_base pi_mutex;
136  
137  	struct task_struct *owner;
138  	refcount_t refcount;
139  
140  	union futex_key key;
141  } __randomize_layout;
142  
143  struct futex_q;
144  typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
145  
146  /**
147   * struct futex_q - The hashed futex queue entry, one per waiting task
148   * @list:		priority-sorted list of tasks waiting on this futex
149   * @task:		the task waiting on the futex
150   * @lock_ptr:		the hash bucket lock
151   * @wake:		the wake handler for this queue
152   * @wake_data:		data associated with the wake handler
153   * @key:		the key the futex is hashed on
154   * @pi_state:		optional priority inheritance state
155   * @rt_waiter:		rt_waiter storage for use with requeue_pi
156   * @requeue_pi_key:	the requeue_pi target futex key
157   * @bitset:		bitset for the optional bitmasked wakeup
158   * @requeue_state:	State field for futex_requeue_pi()
159   * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
160   *
161   * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
162   * we can wake only the relevant ones (hashed queues may be shared).
163   *
164   * A futex_q has a woken state, just like tasks have TASK_RUNNING.
165   * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
166   * The order of wakeup is always to make the first condition true, then
167   * the second.
168   *
169   * PI futexes are typically woken before they are removed from the hash list via
170   * the rt_mutex code. See futex_unqueue_pi().
171   */
172  struct futex_q {
173  	struct plist_node list;
174  
175  	struct task_struct *task;
176  	spinlock_t *lock_ptr;
177  	futex_wake_fn *wake;
178  	void *wake_data;
179  	union futex_key key;
180  	struct futex_pi_state *pi_state;
181  	struct rt_mutex_waiter *rt_waiter;
182  	union futex_key *requeue_pi_key;
183  	u32 bitset;
184  	atomic_t requeue_state;
185  #ifdef CONFIG_PREEMPT_RT
186  	struct rcuwait requeue_wait;
187  #endif
188  } __randomize_layout;
189  
190  extern const struct futex_q futex_q_init;
191  
192  enum futex_access {
193  	FUTEX_READ,
194  	FUTEX_WRITE
195  };
196  
197  extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
198  			 enum futex_access rw);
199  
200  extern struct hrtimer_sleeper *
201  futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
202  		  int flags, u64 range_ns);
203  
204  extern struct futex_hash_bucket *futex_hash(union futex_key *key);
205  
206  /**
207   * futex_match - Check whether two futex keys are equal
208   * @key1:	Pointer to key1
209   * @key2:	Pointer to key2
210   *
211   * Return 1 if two futex_keys are equal, 0 otherwise.
212   */
futex_match(union futex_key * key1,union futex_key * key2)213  static inline int futex_match(union futex_key *key1, union futex_key *key2)
214  {
215  	return (key1 && key2
216  		&& key1->both.word == key2->both.word
217  		&& key1->both.ptr == key2->both.ptr
218  		&& key1->both.offset == key2->both.offset);
219  }
220  
221  extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
222  			    struct futex_q *q, struct futex_hash_bucket **hb);
223  extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
224  				   struct hrtimer_sleeper *timeout);
225  extern bool __futex_wake_mark(struct futex_q *q);
226  extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
227  
228  extern int fault_in_user_writeable(u32 __user *uaddr);
229  extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
230  
futex_cmpxchg_value_locked(u32 * curval,u32 __user * uaddr,u32 uval,u32 newval)231  static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
232  {
233  	int ret;
234  
235  	pagefault_disable();
236  	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
237  	pagefault_enable();
238  
239  	return ret;
240  }
241  
242  /*
243   * This does a plain atomic user space read, and the user pointer has
244   * already been verified earlier by get_futex_key() to be both aligned
245   * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
246   *
247   * We still want to avoid any speculation, and while __get_user() is
248   * the traditional model for this, it's actually slower than doing
249   * this manually these days.
250   *
251   * We could just have a per-architecture special function for it,
252   * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
253   * than force everybody to do that, write it out long-hand using
254   * the low-level user-access infrastructure.
255   *
256   * This looks a bit overkill, but generally just results in a couple
257   * of instructions.
258   */
futex_read_inatomic(u32 * dest,u32 __user * from)259  static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
260  {
261  	u32 val;
262  
263  	if (can_do_masked_user_access())
264  		from = masked_user_access_begin(from);
265  	else if (!user_read_access_begin(from, sizeof(*from)))
266  		return -EFAULT;
267  	unsafe_get_user(val, from, Efault);
268  	user_read_access_end();
269  	*dest = val;
270  	return 0;
271  Efault:
272  	user_read_access_end();
273  	return -EFAULT;
274  }
275  
futex_get_value_locked(u32 * dest,u32 __user * from)276  static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
277  {
278  	int ret;
279  
280  	pagefault_disable();
281  	ret = futex_read_inatomic(dest, from);
282  	pagefault_enable();
283  
284  	return ret;
285  }
286  
287  extern void __futex_unqueue(struct futex_q *q);
288  extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
289  				struct task_struct *task);
290  extern int futex_unqueue(struct futex_q *q);
291  
292  /**
293   * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
294   * @q:	The futex_q to enqueue
295   * @hb:	The destination hash bucket
296   * @task: Task queueing this futex
297   *
298   * The hb->lock must be held by the caller, and is released here. A call to
299   * futex_queue() is typically paired with exactly one call to futex_unqueue().  The
300   * exceptions involve the PI related operations, which may use futex_unqueue_pi()
301   * or nothing if the unqueue is done as part of the wake process and the unqueue
302   * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
303   * an example).
304   *
305   * Note that @task may be NULL, for async usage of futexes.
306   */
futex_queue(struct futex_q * q,struct futex_hash_bucket * hb,struct task_struct * task)307  static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
308  			       struct task_struct *task)
309  	__releases(&hb->lock)
310  {
311  	__futex_queue(q, hb, task);
312  	spin_unlock(&hb->lock);
313  }
314  
315  extern void futex_unqueue_pi(struct futex_q *q);
316  
317  extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
318  
319  /*
320   * Reflects a new waiter being added to the waitqueue.
321   */
futex_hb_waiters_inc(struct futex_hash_bucket * hb)322  static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
323  {
324  #ifdef CONFIG_SMP
325  	atomic_inc(&hb->waiters);
326  	/*
327  	 * Full barrier (A), see the ordering comment above.
328  	 */
329  	smp_mb__after_atomic();
330  #endif
331  }
332  
333  /*
334   * Reflects a waiter being removed from the waitqueue by wakeup
335   * paths.
336   */
futex_hb_waiters_dec(struct futex_hash_bucket * hb)337  static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
338  {
339  #ifdef CONFIG_SMP
340  	atomic_dec(&hb->waiters);
341  #endif
342  }
343  
futex_hb_waiters_pending(struct futex_hash_bucket * hb)344  static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
345  {
346  #ifdef CONFIG_SMP
347  	/*
348  	 * Full barrier (B), see the ordering comment above.
349  	 */
350  	smp_mb();
351  	return atomic_read(&hb->waiters);
352  #else
353  	return 1;
354  #endif
355  }
356  
357  extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
358  extern void futex_q_unlock(struct futex_hash_bucket *hb);
359  
360  
361  extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
362  				union futex_key *key,
363  				struct futex_pi_state **ps,
364  				struct task_struct *task,
365  				struct task_struct **exiting,
366  				int set_waiters);
367  
368  extern int refill_pi_state_cache(void);
369  extern void get_pi_state(struct futex_pi_state *pi_state);
370  extern void put_pi_state(struct futex_pi_state *pi_state);
371  extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
372  
373  /*
374   * Express the locking dependencies for lockdep:
375   */
376  static inline void
double_lock_hb(struct futex_hash_bucket * hb1,struct futex_hash_bucket * hb2)377  double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
378  {
379  	if (hb1 > hb2)
380  		swap(hb1, hb2);
381  
382  	spin_lock(&hb1->lock);
383  	if (hb1 != hb2)
384  		spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
385  }
386  
387  static inline void
double_unlock_hb(struct futex_hash_bucket * hb1,struct futex_hash_bucket * hb2)388  double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
389  {
390  	spin_unlock(&hb1->lock);
391  	if (hb1 != hb2)
392  		spin_unlock(&hb2->lock);
393  }
394  
395  /* syscalls */
396  
397  extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
398  				 val, ktime_t *abs_time, u32 bitset, u32 __user
399  				 *uaddr2);
400  
401  extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
402  			 u32 __user *uaddr2, unsigned int flags2,
403  			 int nr_wake, int nr_requeue,
404  			 u32 *cmpval, int requeue_pi);
405  
406  extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
407  			struct hrtimer_sleeper *to, u32 bitset);
408  
409  extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
410  		      ktime_t *abs_time, u32 bitset);
411  
412  /**
413   * struct futex_vector - Auxiliary struct for futex_waitv()
414   * @w: Userspace provided data
415   * @q: Kernel side data
416   *
417   * Struct used to build an array with all data need for futex_waitv()
418   */
419  struct futex_vector {
420  	struct futex_waitv w;
421  	struct futex_q q;
422  };
423  
424  extern int futex_parse_waitv(struct futex_vector *futexv,
425  			     struct futex_waitv __user *uwaitv,
426  			     unsigned int nr_futexes, futex_wake_fn *wake,
427  			     void *wake_data);
428  
429  extern int futex_wait_multiple_setup(struct futex_vector *vs, int count,
430  				     int *woken);
431  
432  extern int futex_unqueue_multiple(struct futex_vector *v, int count);
433  
434  extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
435  			       struct hrtimer_sleeper *to);
436  
437  extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
438  
439  extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
440  			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
441  
442  extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
443  
444  extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
445  
446  #endif /* _FUTEX_H */
447