xref: /linux/kernel/futex/futex.h (revision 79d2e1919a2728ef49d938eb20ebd5903c14dfb0)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _FUTEX_H
3 #define _FUTEX_H
4 
5 #include <linux/futex.h>
6 #include <linux/rtmutex.h>
7 #include <linux/sched/wake_q.h>
8 #include <linux/compat.h>
9 #include <linux/uaccess.h>
10 
11 #ifdef CONFIG_PREEMPT_RT
12 #include <linux/rcuwait.h>
13 #endif
14 
15 #include <asm/futex.h>
16 
17 /*
18  * Futex flags used to encode options to functions and preserve them across
19  * restarts.
20  */
21 #define FLAGS_SIZE_8		0x0000
22 #define FLAGS_SIZE_16		0x0001
23 #define FLAGS_SIZE_32		0x0002
24 #define FLAGS_SIZE_64		0x0003
25 
26 #define FLAGS_SIZE_MASK		0x0003
27 
28 #ifdef CONFIG_MMU
29 # define FLAGS_SHARED		0x0010
30 #else
31 /*
32  * NOMMU does not have per process address space. Let the compiler optimize
33  * code away.
34  */
35 # define FLAGS_SHARED		0x0000
36 #endif
37 #define FLAGS_CLOCKRT		0x0020
38 #define FLAGS_HAS_TIMEOUT	0x0040
39 #define FLAGS_NUMA		0x0080
40 #define FLAGS_STRICT		0x0100
41 
42 /* FUTEX_ to FLAGS_ */
43 static inline unsigned int futex_to_flags(unsigned int op)
44 {
45 	unsigned int flags = FLAGS_SIZE_32;
46 
47 	if (!(op & FUTEX_PRIVATE_FLAG))
48 		flags |= FLAGS_SHARED;
49 
50 	if (op & FUTEX_CLOCK_REALTIME)
51 		flags |= FLAGS_CLOCKRT;
52 
53 	return flags;
54 }
55 
56 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
57 
58 /* FUTEX2_ to FLAGS_ */
59 static inline unsigned int futex2_to_flags(unsigned int flags2)
60 {
61 	unsigned int flags = flags2 & FUTEX2_SIZE_MASK;
62 
63 	if (!(flags2 & FUTEX2_PRIVATE))
64 		flags |= FLAGS_SHARED;
65 
66 	if (flags2 & FUTEX2_NUMA)
67 		flags |= FLAGS_NUMA;
68 
69 	return flags;
70 }
71 
72 static inline unsigned int futex_size(unsigned int flags)
73 {
74 	return 1 << (flags & FLAGS_SIZE_MASK);
75 }
76 
77 static inline bool futex_flags_valid(unsigned int flags)
78 {
79 	/* Only 64bit futexes for 64bit code */
80 	if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) {
81 		if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64)
82 			return false;
83 	}
84 
85 	/* Only 32bit futexes are implemented -- for now */
86 	if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
87 		return false;
88 
89 	return true;
90 }
91 
92 static inline bool futex_validate_input(unsigned int flags, u64 val)
93 {
94 	int bits = 8 * futex_size(flags);
95 
96 	if (bits < 64 && (val >> bits))
97 		return false;
98 
99 	return true;
100 }
101 
102 #ifdef CONFIG_FAIL_FUTEX
103 extern bool should_fail_futex(bool fshared);
104 #else
105 static inline bool should_fail_futex(bool fshared)
106 {
107 	return false;
108 }
109 #endif
110 
111 /*
112  * Hash buckets are shared by all the futex_keys that hash to the same
113  * location.  Each key may have multiple futex_q structures, one for each task
114  * waiting on a futex.
115  */
116 struct futex_hash_bucket {
117 	atomic_t waiters;
118 	spinlock_t lock;
119 	struct plist_head chain;
120 } ____cacheline_aligned_in_smp;
121 
122 /*
123  * Priority Inheritance state:
124  */
125 struct futex_pi_state {
126 	/*
127 	 * list of 'owned' pi_state instances - these have to be
128 	 * cleaned up in do_exit() if the task exits prematurely:
129 	 */
130 	struct list_head list;
131 
132 	/*
133 	 * The PI object:
134 	 */
135 	struct rt_mutex_base pi_mutex;
136 
137 	struct task_struct *owner;
138 	refcount_t refcount;
139 
140 	union futex_key key;
141 } __randomize_layout;
142 
143 struct futex_q;
144 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
145 
146 /**
147  * struct futex_q - The hashed futex queue entry, one per waiting task
148  * @list:		priority-sorted list of tasks waiting on this futex
149  * @task:		the task waiting on the futex
150  * @lock_ptr:		the hash bucket lock
151  * @wake:		the wake handler for this queue
152  * @wake_data:		data associated with the wake handler
153  * @key:		the key the futex is hashed on
154  * @pi_state:		optional priority inheritance state
155  * @rt_waiter:		rt_waiter storage for use with requeue_pi
156  * @requeue_pi_key:	the requeue_pi target futex key
157  * @bitset:		bitset for the optional bitmasked wakeup
158  * @requeue_state:	State field for futex_requeue_pi()
159  * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
160  *
161  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
162  * we can wake only the relevant ones (hashed queues may be shared).
163  *
164  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
165  * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
166  * The order of wakeup is always to make the first condition true, then
167  * the second.
168  *
169  * PI futexes are typically woken before they are removed from the hash list via
170  * the rt_mutex code. See futex_unqueue_pi().
171  */
172 struct futex_q {
173 	struct plist_node list;
174 
175 	struct task_struct *task;
176 	spinlock_t *lock_ptr;
177 	futex_wake_fn *wake;
178 	void *wake_data;
179 	union futex_key key;
180 	struct futex_pi_state *pi_state;
181 	struct rt_mutex_waiter *rt_waiter;
182 	union futex_key *requeue_pi_key;
183 	u32 bitset;
184 	atomic_t requeue_state;
185 #ifdef CONFIG_PREEMPT_RT
186 	struct rcuwait requeue_wait;
187 #endif
188 } __randomize_layout;
189 
190 extern const struct futex_q futex_q_init;
191 
192 enum futex_access {
193 	FUTEX_READ,
194 	FUTEX_WRITE
195 };
196 
197 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
198 			 enum futex_access rw);
199 
200 extern struct hrtimer_sleeper *
201 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
202 		  int flags, u64 range_ns);
203 
204 extern struct futex_hash_bucket *futex_hash(union futex_key *key);
205 
206 /**
207  * futex_match - Check whether two futex keys are equal
208  * @key1:	Pointer to key1
209  * @key2:	Pointer to key2
210  *
211  * Return 1 if two futex_keys are equal, 0 otherwise.
212  */
213 static inline int futex_match(union futex_key *key1, union futex_key *key2)
214 {
215 	return (key1 && key2
216 		&& key1->both.word == key2->both.word
217 		&& key1->both.ptr == key2->both.ptr
218 		&& key1->both.offset == key2->both.offset);
219 }
220 
221 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
222 			    struct futex_q *q, struct futex_hash_bucket **hb);
223 extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
224 				   struct hrtimer_sleeper *timeout);
225 extern bool __futex_wake_mark(struct futex_q *q);
226 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
227 
228 extern int fault_in_user_writeable(u32 __user *uaddr);
229 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
230 
231 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
232 {
233 	int ret;
234 
235 	pagefault_disable();
236 	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
237 	pagefault_enable();
238 
239 	return ret;
240 }
241 
242 /*
243  * This does a plain atomic user space read, and the user pointer has
244  * already been verified earlier by get_futex_key() to be both aligned
245  * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
246  *
247  * We still want to avoid any speculation, and while __get_user() is
248  * the traditional model for this, it's actually slower than doing
249  * this manually these days.
250  *
251  * We could just have a per-architecture special function for it,
252  * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
253  * than force everybody to do that, write it out long-hand using
254  * the low-level user-access infrastructure.
255  *
256  * This looks a bit overkill, but generally just results in a couple
257  * of instructions.
258  */
259 static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
260 {
261 	u32 val;
262 
263 	if (can_do_masked_user_access())
264 		from = masked_user_access_begin(from);
265 	else if (!user_read_access_begin(from, sizeof(*from)))
266 		return -EFAULT;
267 	unsafe_get_user(val, from, Efault);
268 	user_read_access_end();
269 	*dest = val;
270 	return 0;
271 Efault:
272 	user_read_access_end();
273 	return -EFAULT;
274 }
275 
276 static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
277 {
278 	int ret;
279 
280 	pagefault_disable();
281 	ret = futex_read_inatomic(dest, from);
282 	pagefault_enable();
283 
284 	return ret;
285 }
286 
287 extern void __futex_unqueue(struct futex_q *q);
288 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
289 extern int futex_unqueue(struct futex_q *q);
290 
291 /**
292  * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
293  * @q:	The futex_q to enqueue
294  * @hb:	The destination hash bucket
295  *
296  * The hb->lock must be held by the caller, and is released here. A call to
297  * futex_queue() is typically paired with exactly one call to futex_unqueue().  The
298  * exceptions involve the PI related operations, which may use futex_unqueue_pi()
299  * or nothing if the unqueue is done as part of the wake process and the unqueue
300  * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
301  * an example).
302  */
303 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb)
304 	__releases(&hb->lock)
305 {
306 	__futex_queue(q, hb);
307 	spin_unlock(&hb->lock);
308 }
309 
310 extern void futex_unqueue_pi(struct futex_q *q);
311 
312 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
313 
314 /*
315  * Reflects a new waiter being added to the waitqueue.
316  */
317 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
318 {
319 #ifdef CONFIG_SMP
320 	atomic_inc(&hb->waiters);
321 	/*
322 	 * Full barrier (A), see the ordering comment above.
323 	 */
324 	smp_mb__after_atomic();
325 #endif
326 }
327 
328 /*
329  * Reflects a waiter being removed from the waitqueue by wakeup
330  * paths.
331  */
332 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
333 {
334 #ifdef CONFIG_SMP
335 	atomic_dec(&hb->waiters);
336 #endif
337 }
338 
339 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
340 {
341 #ifdef CONFIG_SMP
342 	/*
343 	 * Full barrier (B), see the ordering comment above.
344 	 */
345 	smp_mb();
346 	return atomic_read(&hb->waiters);
347 #else
348 	return 1;
349 #endif
350 }
351 
352 extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
353 extern void futex_q_unlock(struct futex_hash_bucket *hb);
354 
355 
356 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
357 				union futex_key *key,
358 				struct futex_pi_state **ps,
359 				struct task_struct *task,
360 				struct task_struct **exiting,
361 				int set_waiters);
362 
363 extern int refill_pi_state_cache(void);
364 extern void get_pi_state(struct futex_pi_state *pi_state);
365 extern void put_pi_state(struct futex_pi_state *pi_state);
366 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
367 
368 /*
369  * Express the locking dependencies for lockdep:
370  */
371 static inline void
372 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
373 {
374 	if (hb1 > hb2)
375 		swap(hb1, hb2);
376 
377 	spin_lock(&hb1->lock);
378 	if (hb1 != hb2)
379 		spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
380 }
381 
382 static inline void
383 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
384 {
385 	spin_unlock(&hb1->lock);
386 	if (hb1 != hb2)
387 		spin_unlock(&hb2->lock);
388 }
389 
390 /* syscalls */
391 
392 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
393 				 val, ktime_t *abs_time, u32 bitset, u32 __user
394 				 *uaddr2);
395 
396 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
397 			 u32 __user *uaddr2, unsigned int flags2,
398 			 int nr_wake, int nr_requeue,
399 			 u32 *cmpval, int requeue_pi);
400 
401 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
402 			struct hrtimer_sleeper *to, u32 bitset);
403 
404 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
405 		      ktime_t *abs_time, u32 bitset);
406 
407 /**
408  * struct futex_vector - Auxiliary struct for futex_waitv()
409  * @w: Userspace provided data
410  * @q: Kernel side data
411  *
412  * Struct used to build an array with all data need for futex_waitv()
413  */
414 struct futex_vector {
415 	struct futex_waitv w;
416 	struct futex_q q;
417 };
418 
419 extern int futex_parse_waitv(struct futex_vector *futexv,
420 			     struct futex_waitv __user *uwaitv,
421 			     unsigned int nr_futexes, futex_wake_fn *wake,
422 			     void *wake_data);
423 
424 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count,
425 				     int *woken);
426 
427 extern int futex_unqueue_multiple(struct futex_vector *v, int count);
428 
429 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
430 			       struct hrtimer_sleeper *to);
431 
432 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
433 
434 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
435 			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
436 
437 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
438 
439 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
440 
441 #endif /* _FUTEX_H */
442