xref: /linux/kernel/futex/futex.h (revision 056e065a6b6e01ab54bb9770c0d5a15350e571e2)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _FUTEX_H
3 #define _FUTEX_H
4 
5 #include <linux/futex.h>
6 #include <linux/rtmutex.h>
7 #include <linux/sched/wake_q.h>
8 #include <linux/compat.h>
9 #include <linux/uaccess.h>
10 #include <linux/cleanup.h>
11 
12 #ifdef CONFIG_PREEMPT_RT
13 #include <linux/rcuwait.h>
14 #endif
15 
16 #include <asm/futex.h>
17 
18 /*
19  * Futex flags used to encode options to functions and preserve them across
20  * restarts.
21  */
22 #define FLAGS_SIZE_8		0x0000
23 #define FLAGS_SIZE_16		0x0001
24 #define FLAGS_SIZE_32		0x0002
25 #define FLAGS_SIZE_64		0x0003
26 
27 #define FLAGS_SIZE_MASK		0x0003
28 
29 #ifdef CONFIG_MMU
30 # define FLAGS_SHARED		0x0010
31 #else
32 /*
33  * NOMMU does not have per process address space. Let the compiler optimize
34  * code away.
35  */
36 # define FLAGS_SHARED		0x0000
37 #endif
38 #define FLAGS_CLOCKRT		0x0020
39 #define FLAGS_HAS_TIMEOUT	0x0040
40 #define FLAGS_NUMA		0x0080
41 #define FLAGS_STRICT		0x0100
42 #define FLAGS_MPOL		0x0200
43 #define FLAGS_ROBUST_UNLOCK	0x0400
44 #define FLAGS_ROBUST_LIST32	0x0800
45 
46 /* FUTEX_ to FLAGS_ */
47 static inline unsigned int futex_to_flags(unsigned int op)
48 {
49 	unsigned int flags = FLAGS_SIZE_32;
50 
51 	if (!(op & FUTEX_PRIVATE_FLAG))
52 		flags |= FLAGS_SHARED;
53 
54 	if (op & FUTEX_CLOCK_REALTIME)
55 		flags |= FLAGS_CLOCKRT;
56 
57 	if (op & FUTEX_ROBUST_UNLOCK)
58 		flags |= FLAGS_ROBUST_UNLOCK;
59 
60 	if (op & FUTEX_ROBUST_LIST32)
61 		flags |= FLAGS_ROBUST_LIST32;
62 
63 	return flags;
64 }
65 
66 #define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE)
67 
68 /* FUTEX2_ to FLAGS_ */
69 static inline unsigned int futex2_to_flags(unsigned int flags2)
70 {
71 	unsigned int flags = flags2 & FUTEX2_SIZE_MASK;
72 
73 	if (!(flags2 & FUTEX2_PRIVATE))
74 		flags |= FLAGS_SHARED;
75 
76 	if (flags2 & FUTEX2_NUMA)
77 		flags |= FLAGS_NUMA;
78 
79 	if (flags2 & FUTEX2_MPOL)
80 		flags |= FLAGS_MPOL;
81 
82 	return flags;
83 }
84 
85 static inline unsigned int futex_size(unsigned int flags)
86 {
87 	return 1 << (flags & FLAGS_SIZE_MASK);
88 }
89 
90 static inline bool futex_flags_valid(unsigned int flags)
91 {
92 	/* Only 64bit futexes for 64bit code */
93 	if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) {
94 		if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64)
95 			return false;
96 	}
97 
98 	/* Only 32bit futexes are implemented -- for now */
99 	if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
100 		return false;
101 
102 	/*
103 	 * Must be able to represent both FUTEX_NO_NODE and every valid nodeid
104 	 * in a futex word.
105 	 */
106 	if (flags & FLAGS_NUMA) {
107 		int bits = 8 * futex_size(flags);
108 		u64 max = ~0ULL;
109 
110 		max >>= 64 - bits;
111 		if (nr_node_ids >= max)
112 			return false;
113 	}
114 
115 	return true;
116 }
117 
118 static inline bool futex_validate_input(unsigned int flags, u64 val)
119 {
120 	int bits = 8 * futex_size(flags);
121 
122 	if (bits < 64 && (val >> bits))
123 		return false;
124 
125 	return true;
126 }
127 
128 #ifdef CONFIG_FAIL_FUTEX
129 extern bool should_fail_futex(bool fshared);
130 #else
131 static inline bool should_fail_futex(bool fshared)
132 {
133 	return false;
134 }
135 #endif
136 
137 static inline bool futex_key_is_private(union futex_key *key)
138 {
139 	/*
140 	 * Relies on get_futex_key() to set either bit for shared
141 	 * futexes -- see comment with union futex_key.
142 	 */
143 	return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED));
144 }
145 
146 /*
147  * Hash buckets are shared by all the futex_keys that hash to the same
148  * location.  Each key may have multiple futex_q structures, one for each task
149  * waiting on a futex.
150  */
151 struct futex_hash_bucket {
152 	atomic_t waiters;
153 	spinlock_t lock;
154 	struct plist_head chain;
155 } ____cacheline_aligned_in_smp;
156 
157 /*
158  * Priority Inheritance state:
159  */
160 struct futex_pi_state {
161 	/*
162 	 * list of 'owned' pi_state instances - these have to be
163 	 * cleaned up in do_exit() if the task exits prematurely:
164 	 */
165 	struct list_head list;
166 
167 	/*
168 	 * The PI object:
169 	 */
170 	struct rt_mutex_base pi_mutex;
171 
172 	struct task_struct *owner;
173 	refcount_t refcount;
174 
175 	union futex_key key;
176 } __randomize_layout;
177 
178 struct futex_q;
179 typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
180 
181 /**
182  * struct futex_q - The hashed futex queue entry, one per waiting task
183  * @list:		priority-sorted list of tasks waiting on this futex
184  * @task:		the task waiting on the futex
185  * @lock_ptr:		the hash bucket lock
186  * @wake:		the wake handler for this queue
187  * @wake_data:		data associated with the wake handler
188  * @key:		the key the futex is hashed on
189  * @pi_state:		optional priority inheritance state
190  * @rt_waiter:		rt_waiter storage for use with requeue_pi
191  * @requeue_pi_key:	the requeue_pi target futex key
192  * @bitset:		bitset for the optional bitmasked wakeup
193  * @requeue_state:	State field for futex_requeue_pi()
194  * @drop_fph:		Waiter should drop the extra private hash reference when set
195  * @requeue_wait:	RCU wait for futex_requeue_pi() (RT only)
196  *
197  * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
198  * we can wake only the relevant ones (hashed queues may be shared).
199  *
200  * A futex_q has a woken state, just like tasks have TASK_RUNNING.
201  * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
202  * The order of wakeup is always to make the first condition true, then
203  * the second.
204  *
205  * PI futexes are typically woken before they are removed from the hash list via
206  * the rt_mutex code. See futex_unqueue_pi().
207  */
208 struct futex_q {
209 	struct plist_node list;
210 
211 	struct task_struct *task;
212 	spinlock_t *lock_ptr;
213 	futex_wake_fn *wake;
214 	void *wake_data;
215 	union futex_key key;
216 	struct futex_pi_state *pi_state;
217 	struct rt_mutex_waiter *rt_waiter;
218 	union futex_key *requeue_pi_key;
219 	u32 bitset;
220 	atomic_t requeue_state;
221 	struct futex_private_hash *drop_fph;
222 #ifdef CONFIG_PREEMPT_RT
223 	struct rcuwait requeue_wait;
224 #endif
225 } __randomize_layout;
226 
227 extern const struct futex_q futex_q_init;
228 
229 enum futex_access {
230 	FUTEX_READ,
231 	FUTEX_WRITE
232 };
233 
234 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
235 			 enum futex_access rw);
236 extern void futex_q_lockptr_lock(struct futex_q *q) __acquires(q->lock_ptr);
237 extern struct hrtimer_sleeper *
238 futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
239 		  int flags, u64 range_ns);
240 
241 struct futex_bucket_ref {
242 	struct futex_hash_bucket *hb;
243 	struct futex_private_hash *fph;
244 };
245 
246 #ifdef CONFIG_FUTEX_PRIVATE_HASH
247 extern struct futex_private_hash *futex_private_hash(struct mm_struct *mm);
248 extern void futex_private_hash_put(struct futex_private_hash *fph);
249 
250 #else /* !CONFIG_FUTEX_PRIVATE_HASH */
251 static inline struct futex_private_hash *futex_private_hash(struct mm_struct *mm) { return NULL; }
252 static inline void futex_private_hash_put(struct futex_private_hash *fph) { }
253 #endif
254 
255 extern struct futex_bucket_ref futex_hash(union futex_key *key);
256 
257 DEFINE_CLASS(hbr, struct futex_bucket_ref,
258 	     if (_T.fph) futex_private_hash_put(_T.fph),
259 	     futex_hash(key), union futex_key *key);
260 
261 DEFINE_CLASS(private_hash, struct futex_private_hash *,
262 	     if (_T) futex_private_hash_put(_T),
263 	     futex_private_hash(mm), struct mm_struct *mm);
264 
265 /**
266  * futex_match - Check whether two futex keys are equal
267  * @key1:	Pointer to key1
268  * @key2:	Pointer to key2
269  *
270  * Return 1 if two futex_keys are equal, 0 otherwise.
271  */
272 static inline int futex_match(union futex_key *key1, union futex_key *key2)
273 {
274 	return (key1 && key2
275 		&& key1->both.word == key2->both.word
276 		&& key1->both.ptr == key2->both.ptr
277 		&& key1->both.offset == key2->both.offset);
278 }
279 
280 extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
281 			    struct futex_q *q, union futex_key *key2,
282 			    struct task_struct *task);
283 extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout);
284 extern bool __futex_wake_mark(struct futex_q *q);
285 extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
286 
287 extern int fault_in_user_writeable(u32 __user *uaddr);
288 extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
289 
290 static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
291 {
292 	int ret;
293 
294 	pagefault_disable();
295 	ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
296 	pagefault_enable();
297 
298 	return ret;
299 }
300 
301 /* Read from user memory with pagefaults disabled */
302 static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
303 {
304 	guard(pagefault)();
305 	return get_user_inline(*dest, from);
306 }
307 
308 extern void __futex_unqueue(struct futex_q *q);
309 extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
310 				struct task_struct *task);
311 extern int futex_unqueue(struct futex_q *q);
312 
313 /**
314  * futex_queue() - Enqueue the futex_q on the futex_hash_bucket
315  * @q:	The futex_q to enqueue
316  * @hb:	The destination hash bucket
317  * @task: Task queueing this futex
318  *
319  * The hb->lock must be held by the caller, and is released here. A call to
320  * futex_queue() is typically paired with exactly one call to futex_unqueue().  The
321  * exceptions involve the PI related operations, which may use futex_unqueue_pi()
322  * or nothing if the unqueue is done as part of the wake process and the unqueue
323  * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
324  * an example).
325  *
326  * Note that @task may be NULL, for async usage of futexes.
327  */
328 static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
329 			       struct task_struct *task)
330 	__releases(&hb->lock)
331 	__releases(q->lock_ptr)
332 {
333 	__futex_queue(q, hb, task);
334 	spin_unlock(&hb->lock);
335 	__release(q->lock_ptr);
336 }
337 
338 extern void futex_unqueue_pi(struct futex_q *q);
339 
340 extern void wait_for_owner_exiting(int ret, struct task_struct *exiting);
341 
342 /*
343  * Reflects a new waiter being added to the waitqueue.
344  */
345 static inline void futex_hb_waiters_inc(struct futex_hash_bucket *hb)
346 {
347 #ifdef CONFIG_SMP
348 	atomic_inc(&hb->waiters);
349 	/*
350 	 * Full barrier (A), see the ordering comment above.
351 	 */
352 	smp_mb__after_atomic();
353 #endif
354 }
355 
356 /*
357  * Reflects a waiter being removed from the waitqueue by wakeup
358  * paths.
359  */
360 static inline void futex_hb_waiters_dec(struct futex_hash_bucket *hb)
361 {
362 #ifdef CONFIG_SMP
363 	atomic_dec(&hb->waiters);
364 #endif
365 }
366 
367 static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
368 {
369 #ifdef CONFIG_SMP
370 	/*
371 	 * Full barrier (B), see the ordering comment above.
372 	 */
373 	smp_mb();
374 	return atomic_read(&hb->waiters);
375 #else
376 	return 1;
377 #endif
378 }
379 
380 extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb)
381 	__acquires(&hb->lock)
382 	__acquires(q->lock_ptr);
383 
384 extern void futex_q_unlock(struct futex_hash_bucket *hb)
385 	__releases(&hb->lock);
386 
387 extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
388 				union futex_key *key,
389 				struct futex_pi_state **ps,
390 				struct task_struct *task,
391 				struct task_struct **exiting,
392 				int set_waiters);
393 
394 extern int refill_pi_state_cache(void);
395 extern void get_pi_state(struct futex_pi_state *pi_state);
396 extern void put_pi_state(struct futex_pi_state *pi_state);
397 extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked);
398 
399 /*
400  * Express the locking dependencies for lockdep:
401  */
402 static inline void
403 double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
404 	__acquires(&hb1->lock)
405 	__acquires(&hb2->lock)
406 	__no_context_analysis
407 {
408 	if (hb1 > hb2)
409 		swap(hb1, hb2);
410 
411 	spin_lock(&hb1->lock);
412 	if (hb1 != hb2)
413 		spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
414 }
415 
416 static inline void
417 double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
418 	__releases(&hb1->lock)
419 	__releases(&hb2->lock)
420 	__no_context_analysis
421 {
422 	spin_unlock(&hb1->lock);
423 	if (hb1 != hb2)
424 		spin_unlock(&hb2->lock);
425 }
426 
427 /* syscalls */
428 
429 extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
430 				 val, ktime_t *abs_time, u32 bitset, u32 __user
431 				 *uaddr2);
432 
433 extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
434 			 u32 __user *uaddr2, unsigned int flags2,
435 			 int nr_wake, int nr_requeue,
436 			 u32 *cmpval, int requeue_pi);
437 
438 extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
439 			struct hrtimer_sleeper *to, u32 bitset);
440 
441 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
442 		      ktime_t *abs_time, u32 bitset);
443 
444 /**
445  * struct futex_vector - Auxiliary struct for futex_waitv()
446  * @w: Userspace provided data
447  * @q: Kernel side data
448  *
449  * Struct used to build an array with all data need for futex_waitv()
450  */
451 struct futex_vector {
452 	struct futex_waitv w;
453 	struct futex_q q;
454 };
455 
456 extern int futex_parse_waitv(struct futex_vector *futexv,
457 			     struct futex_waitv __user *uwaitv,
458 			     unsigned int nr_futexes, futex_wake_fn *wake,
459 			     void *wake_data);
460 
461 extern int futex_wait_multiple_setup(struct futex_vector *vs, int count,
462 				     int *woken);
463 
464 extern int futex_unqueue_multiple(struct futex_vector *v, int count);
465 
466 extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
467 			       struct hrtimer_sleeper *to);
468 
469 extern int futex_wake(u32 __user *uaddr, unsigned int flags, void __user *pop,
470 		      int nr_wake, u32 bitset);
471 
472 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
473 			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
474 
475 extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags, void __user *pop);
476 
477 extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
478 
479 bool futex_robust_list_clear_pending(void __user *pop, unsigned int flags);
480 
481 #endif /* _FUTEX_H */
482