xref: /linux/kernel/futex/futex.h (revision 80367ad01d93ac781b0e1df246edaf006928002f)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _FUTEX_H
3 #define _FUTEX_H
4 
5 #include <linux/futex.h>
6 #include <linux/rtmutex.h>
7 #include <linux/sched/wake_q.h>
8 #include <linux/compat.h>
9 #include <linux/uaccess.h>
10 #include <linux/cleanup.h>
11 
12 #ifdef CONFIG_PREEMPT_RT
13 #include <linux/rcuwait.h>
14 #endif
15 
16 #include <asm/futex.h>
17 
18 /*
19  * Futex flags used to encode options to functions and preserve them across
20  * restarts.
21  */
22 #define FLAGS_SIZE_8		0x0000
23 #define FLAGS_SIZE_16		0x0001
24 #define FLAGS_SIZE_32		0x0002
25 #define FLAGS_SIZE_64		0x0003
26 
27 #define FLAGS_SIZE_MASK		0x0003
28 
29 #ifdef CONFIG_MMU
30 # define FLAGS_SHARED		0x0010
31 #else
32 /*
33  * NOMMU does not have per process address space. Let the compiler optimize
34  * code away.
35  */
36 # define FLAGS_SHARED		0x0000
37 #endif
38 #define FLAGS_CLOCKRT		0x0020
39 #define FLAGS_HAS_TIMEOUT	0x0040
40 #define FLAGS_NUMA		0x0080
41 #define FLAGS_STRICT		0x0100
42 
43 /* FUTEX_ to FLAGS_ */
44 static inline unsigned int futex_to_flags(unsigned int op)
45 {
46 	unsigned int flags = FLAGS_SIZE_32;
47 
48 	if (!(op & FUTEX_PRIVATE_FLAG))
49 		flags |= FLAGS_SHARED;
50 
51 	if (op & FUTEX_CLOCK_REALTIME)
52 		flags |= FLAGS_CLOCKRT;
53 
54 	return flags;
55 }
56 
57 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
58 
59 /* FUTEX2_ to FLAGS_ */
60 static inline unsigned int futex2_to_flags(unsigned int flags2)
61 {
62 	unsigned int flags = flags2 & FUTEX2_SIZE_MASK;
63 
64 	if (!(flags2 & FUTEX2_PRIVATE))
65 		flags |= FLAGS_SHARED;
66 
67 	if (flags2 & FUTEX2_NUMA)
68 		flags |= FLAGS_NUMA;
69 
70 	return flags;
71 }
72 
73 static inline unsigned int futex_size(unsigned int flags)
74 {
75 	return 1 << (flags & FLAGS_SIZE_MASK);
76 }
77 
78 static inline bool futex_flags_valid(unsigned int flags)
79 {
80 	/* Only 64bit futexes for 64bit code */
81 	if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) {
82 		if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64)
83 			return false;
84 	}
85 
86 	/* Only 32bit futexes are implemented -- for now */
87 	if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
88 		return false;
89 
90 	return true;
91 }
92 
93 static inline bool futex_validate_input(unsigned int flags, u64 val)
94 {
95 	int bits = 8 * futex_size(flags);
96 
97 	if (bits < 64 && (val >> bits))
98 		return false;
99 
100 	return true;
101 }
102 
103 #ifdef CONFIG_FAIL_FUTEX
104 extern bool should_fail_futex(bool fshared);
105 #else
106 static inline bool should_fail_futex(bool fshared)
107 {
108 	return false;
109 }
110 #endif
111 
112 /*
113  * Hash buckets are shared by all the futex_keys that hash to the same
114  * location.  Each key may have multiple futex_q structures, one for each task
115  * waiting on a futex.
116  */
117 struct futex_hash_bucket {
118 	atomic_t waiters;
119 	spinlock_t lock;
120 	struct plist_head chain;
121 	struct futex_private_hash *priv;
122 } ____cacheline_aligned_in_smp;
123 
124 /*
125  * Priority Inheritance state:
126  */
127 struct futex_pi_state {
128 	/*
129 	 * list of 'owned' pi_state instances - these have to be
130 	 * cleaned up in do_exit() if the task exits prematurely:
131 	 */
132 	struct list_head list;
133 
134 	/*
135 	 * The PI object:
136 	 */
137 	struct rt_mutex_base pi_mutex;
138 
139 	struct task_struct *owner;
140 	refcount_t refcount;
141 
142 	union futex_key key;
143 } __randomize_layout;
144 
145 struct futex_q;
146 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
147 
148 /**
149  * struct futex_q - The hashed futex queue entry, one per waiting task
150  * @list:		priority-sorted list of tasks waiting on this futex
151  * @task:		the task waiting on the futex
152  * @lock_ptr:		the hash bucket lock
153  * @wake:		the wake handler for this queue
154  * @wake_data:		data associated with the wake handler
155  * @key:		the key the futex is hashed on
156  * @pi_state:		optional priority inheritance state
157  * @rt_waiter:		rt_waiter storage for use with requeue_pi
158  * @requeue_pi_key:	the requeue_pi target futex key
159  * @bitset:		bitset for the optional bitmasked wakeup
160  * @requeue_state:	State field for futex_requeue_pi()
161  * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
162  *
163  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
164  * we can wake only the relevant ones (hashed queues may be shared).
165  *
166  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
167  * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
168  * The order of wakeup is always to make the first condition true, then
169  * the second.
170  *
171  * PI futexes are typically woken before they are removed from the hash list via
172  * the rt_mutex code. See futex_unqueue_pi().
173  */
174 struct futex_q {
175 	struct plist_node list;
176 
177 	struct task_struct *task;
178 	spinlock_t *lock_ptr;
179 	futex_wake_fn *wake;
180 	void *wake_data;
181 	union futex_key key;
182 	struct futex_pi_state *pi_state;
183 	struct rt_mutex_waiter *rt_waiter;
184 	union futex_key *requeue_pi_key;
185 	u32 bitset;
186 	atomic_t requeue_state;
187 	bool drop_hb_ref;
188 #ifdef CONFIG_PREEMPT_RT
189 	struct rcuwait requeue_wait;
190 #endif
191 } __randomize_layout;
192 
193 extern const struct futex_q futex_q_init;
194 
195 enum futex_access {
196 	FUTEX_READ,
197 	FUTEX_WRITE
198 };
199 
200 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
201 			 enum futex_access rw);
202 extern void futex_q_lockptr_lock(struct futex_q *q);
203 extern struct hrtimer_sleeper *
204 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
205 		  int flags, u64 range_ns);
206 
207 extern struct futex_hash_bucket *futex_hash(union futex_key *key);
208 #ifdef CONFIG_FUTEX_PRIVATE_HASH
209 extern void futex_hash_get(struct futex_hash_bucket *hb);
210 extern void futex_hash_put(struct futex_hash_bucket *hb);
211 
212 extern struct futex_private_hash *futex_private_hash(void);
213 extern bool futex_private_hash_get(struct futex_private_hash *fph);
214 extern void futex_private_hash_put(struct futex_private_hash *fph);
215 
216 #else /* !CONFIG_FUTEX_PRIVATE_HASH */
217 static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
218 static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
219 static inline struct futex_private_hash *futex_private_hash(void) { return NULL; }
220 static inline bool futex_private_hash_get(void) { return false; }
221 static inline void futex_private_hash_put(struct futex_private_hash *fph) { }
222 #endif
223 
224 DEFINE_CLASS(hb, struct futex_hash_bucket *,
225 	     if (_T) futex_hash_put(_T),
226 	     futex_hash(key), union futex_key *key);
227 
228 DEFINE_CLASS(private_hash, struct futex_private_hash *,
229 	     if (_T) futex_private_hash_put(_T),
230 	     futex_private_hash(), void);
231 
232 /**
233  * futex_match - Check whether two futex keys are equal
234  * @key1:	Pointer to key1
235  * @key2:	Pointer to key2
236  *
237  * Return 1 if two futex_keys are equal, 0 otherwise.
238  */
239 static inline int futex_match(union futex_key *key1, union futex_key *key2)
240 {
241 	return (key1 && key2
242 		&& key1->both.word == key2->both.word
243 		&& key1->both.ptr == key2->both.ptr
244 		&& key1->both.offset == key2->both.offset);
245 }
246 
247 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
248 			    struct futex_q *q, union futex_key *key2,
249 			    struct task_struct *task);
250 extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout);
251 extern bool __futex_wake_mark(struct futex_q *q);
252 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
253 
254 extern int fault_in_user_writeable(u32 __user *uaddr);
255 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
256 
257 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
258 {
259 	int ret;
260 
261 	pagefault_disable();
262 	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
263 	pagefault_enable();
264 
265 	return ret;
266 }
267 
268 /*
269  * This does a plain atomic user space read, and the user pointer has
270  * already been verified earlier by get_futex_key() to be both aligned
271  * and actually in user space, just like futex_atomic_cmpxchg_inatomic().
272  *
273  * We still want to avoid any speculation, and while __get_user() is
274  * the traditional model for this, it's actually slower than doing
275  * this manually these days.
276  *
277  * We could just have a per-architecture special function for it,
278  * the same way we do futex_atomic_cmpxchg_inatomic(), but rather
279  * than force everybody to do that, write it out long-hand using
280  * the low-level user-access infrastructure.
281  *
282  * This looks a bit overkill, but generally just results in a couple
283  * of instructions.
284  */
285 static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
286 {
287 	u32 val;
288 
289 	if (can_do_masked_user_access())
290 		from = masked_user_access_begin(from);
291 	else if (!user_read_access_begin(from, sizeof(*from)))
292 		return -EFAULT;
293 	unsafe_get_user(val, from, Efault);
294 	user_read_access_end();
295 	*dest = val;
296 	return 0;
297 Efault:
298 	user_read_access_end();
299 	return -EFAULT;
300 }
301 
302 static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
303 {
304 	int ret;
305 
306 	pagefault_disable();
307 	ret = futex_read_inatomic(dest, from);
308 	pagefault_enable();
309 
310 	return ret;
311 }
312 
313 extern void __futex_unqueue(struct futex_q *q);
314 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
315 				struct task_struct *task);
316 extern int futex_unqueue(struct futex_q *q);
317 
318 /**
319  * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
320  * @q:	The futex_q to enqueue
321  * @hb:	The destination hash bucket
322  * @task: Task queueing this futex
323  *
324  * The hb->lock must be held by the caller, and is released here. A call to
325  * futex_queue() is typically paired with exactly one call to futex_unqueue().  The
326  * exceptions involve the PI related operations, which may use futex_unqueue_pi()
327  * or nothing if the unqueue is done as part of the wake process and the unqueue
328  * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
329  * an example).
330  *
331  * Note that @task may be NULL, for async usage of futexes.
332  */
333 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
334 			       struct task_struct *task)
335 	__releases(&hb->lock)
336 {
337 	__futex_queue(q, hb, task);
338 	spin_unlock(&hb->lock);
339 }
340 
341 extern void futex_unqueue_pi(struct futex_q *q);
342 
343 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
344 
345 /*
346  * Reflects a new waiter being added to the waitqueue.
347  */
348 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
349 {
350 #ifdef CONFIG_SMP
351 	atomic_inc(&hb->waiters);
352 	/*
353 	 * Full barrier (A), see the ordering comment above.
354 	 */
355 	smp_mb__after_atomic();
356 #endif
357 }
358 
359 /*
360  * Reflects a waiter being removed from the waitqueue by wakeup
361  * paths.
362  */
363 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
364 {
365 #ifdef CONFIG_SMP
366 	atomic_dec(&hb->waiters);
367 #endif
368 }
369 
370 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
371 {
372 #ifdef CONFIG_SMP
373 	/*
374 	 * Full barrier (B), see the ordering comment above.
375 	 */
376 	smp_mb();
377 	return atomic_read(&hb->waiters);
378 #else
379 	return 1;
380 #endif
381 }
382 
383 extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb);
384 extern void futex_q_unlock(struct futex_hash_bucket *hb);
385 
386 
387 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
388 				union futex_key *key,
389 				struct futex_pi_state **ps,
390 				struct task_struct *task,
391 				struct task_struct **exiting,
392 				int set_waiters);
393 
394 extern int refill_pi_state_cache(void);
395 extern void get_pi_state(struct futex_pi_state *pi_state);
396 extern void put_pi_state(struct futex_pi_state *pi_state);
397 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
398 
399 /*
400  * Express the locking dependencies for lockdep:
401  */
402 static inline void
403 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
404 {
405 	if (hb1 > hb2)
406 		swap(hb1, hb2);
407 
408 	spin_lock(&hb1->lock);
409 	if (hb1 != hb2)
410 		spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
411 }
412 
413 static inline void
414 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
415 {
416 	spin_unlock(&hb1->lock);
417 	if (hb1 != hb2)
418 		spin_unlock(&hb2->lock);
419 }
420 
421 /* syscalls */
422 
423 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
424 				 val, ktime_t *abs_time, u32 bitset, u32 __user
425 				 *uaddr2);
426 
427 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
428 			 u32 __user *uaddr2, unsigned int flags2,
429 			 int nr_wake, int nr_requeue,
430 			 u32 *cmpval, int requeue_pi);
431 
432 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
433 			struct hrtimer_sleeper *to, u32 bitset);
434 
435 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
436 		      ktime_t *abs_time, u32 bitset);
437 
438 /**
439  * struct futex_vector - Auxiliary struct for futex_waitv()
440  * @w: Userspace provided data
441  * @q: Kernel side data
442  *
443  * Struct used to build an array with all data need for futex_waitv()
444  */
445 struct futex_vector {
446 	struct futex_waitv w;
447 	struct futex_q q;
448 };
449 
450 extern int futex_parse_waitv(struct futex_vector *futexv,
451 			     struct futex_waitv __user *uwaitv,
452 			     unsigned int nr_futexes, futex_wake_fn *wake,
453 			     void *wake_data);
454 
455 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count,
456 				     int *woken);
457 
458 extern int futex_unqueue_multiple(struct futex_vector *v, int count);
459 
460 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
461 			       struct hrtimer_sleeper *to);
462 
463 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
464 
465 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
466 			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
467 
468 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
469 
470 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
471 
472 #endif /* _FUTEX_H */
473