xref: /linux/arch/arm64/include/asm/rqspinlock.h (revision b676ac484f847bbe5c7d29603f41475b64fefe55)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_RQSPINLOCK_H
3 #define _ASM_RQSPINLOCK_H
4 
5 #include <asm/barrier.h>
6 
7 /*
8  * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
9  * version based on [0]. In rqspinlock code, our conditional expression involves
10  * checking the value _and_ additionally a timeout. However, on arm64, the
11  * WFE-based implementation may never spin again if no stores occur to the
12  * locked byte in the lock word. As such, we may be stuck forever if
13  * event-stream based unblocking is not available on the platform for WFE spin
14  * loops (arch_timer_evtstrm_available).
15  *
16  * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
17  * copy-paste.
18  *
19  * While we rely on the implementation to amortize the cost of sampling
20  * cond_expr for us, it will not happen when event stream support is
21  * unavailable, time_expr check is amortized. This is not the common case, and
22  * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
23  * comparison, hence just let it be. In case of event-stream, the loop is woken
24  * up at microsecond granularity.
25  *
26  * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
27  */
28 
29 #ifndef smp_cond_load_acquire_timewait
30 
31 #define smp_cond_time_check_count	200
32 
33 #define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns,	\
34 					 time_limit_ns) ({		\
35 	typeof(ptr) __PTR = (ptr);					\
36 	__unqual_scalar_typeof(*ptr) VAL;				\
37 	unsigned int __count = 0;					\
38 	for (;;) {							\
39 		VAL = READ_ONCE(*__PTR);				\
40 		if (cond_expr)						\
41 			break;						\
42 		cpu_relax();						\
43 		if (__count++ < smp_cond_time_check_count)		\
44 			continue;					\
45 		if ((time_expr_ns) >= (time_limit_ns))			\
46 			break;						\
47 		__count = 0;						\
48 	}								\
49 	(typeof(*ptr))VAL;						\
50 })
51 
52 #define __smp_cond_load_acquire_timewait(ptr, cond_expr,		\
53 					 time_expr_ns, time_limit_ns)	\
54 ({									\
55 	typeof(ptr) __PTR = (ptr);					\
56 	__unqual_scalar_typeof(*ptr) VAL;				\
57 	for (;;) {							\
58 		VAL = smp_load_acquire(__PTR);				\
59 		if (cond_expr)						\
60 			break;						\
61 		__cmpwait_relaxed(__PTR, VAL);				\
62 		if ((time_expr_ns) >= (time_limit_ns))			\
63 			break;						\
64 	}								\
65 	(typeof(*ptr))VAL;						\
66 })
67 
68 #define smp_cond_load_acquire_timewait(ptr, cond_expr,			\
69 				      time_expr_ns, time_limit_ns)	\
70 ({									\
71 	__unqual_scalar_typeof(*ptr) _val;				\
72 	int __wfe = arch_timer_evtstrm_available();			\
73 									\
74 	if (likely(__wfe)) {						\
75 		_val = __smp_cond_load_acquire_timewait(ptr, cond_expr,	\
76 							time_expr_ns,	\
77 							time_limit_ns);	\
78 	} else {							\
79 		_val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr,	\
80 							time_expr_ns,	\
81 							time_limit_ns);	\
82 		smp_acquire__after_ctrl_dep();				\
83 	}								\
84 	(typeof(*ptr))_val;						\
85 })
86 
87 #endif
88 
89 #define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
90 
91 #include <asm-generic/rqspinlock.h>
92 
93 #endif /* _ASM_RQSPINLOCK_H */
94