1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2013 ARM Ltd.
4 */
5 #ifndef __ASM_PERCPU_H
6 #define __ASM_PERCPU_H
7
8 #include <linux/preempt.h>
9
10 #include <asm/alternative.h>
11 #include <asm/cmpxchg.h>
12 #include <asm/stack_pointer.h>
13 #include <asm/sysreg.h>
14
set_my_cpu_offset(unsigned long off)15 static inline void set_my_cpu_offset(unsigned long off)
16 {
17 asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
18 "msr tpidr_el2, %0",
19 ARM64_HAS_VIRT_HOST_EXTN)
20 :: "r" (off) : "memory");
21 }
22
__hyp_my_cpu_offset(void)23 static inline unsigned long __hyp_my_cpu_offset(void)
24 {
25 /*
26 * Non-VHE hyp code runs with preemption disabled. No need to hazard
27 * the register access against barrier() as in __kern_my_cpu_offset.
28 */
29 return read_sysreg(tpidr_el2);
30 }
31
__kern_my_cpu_offset(void)32 static inline unsigned long __kern_my_cpu_offset(void)
33 {
34 unsigned long off;
35
36 /*
37 * We want to allow caching the value, so avoid using volatile and
38 * instead use a fake stack read to hazard against barrier().
39 */
40 asm(ALTERNATIVE("mrs %0, tpidr_el1",
41 "mrs %0, tpidr_el2",
42 ARM64_HAS_VIRT_HOST_EXTN)
43 : "=r" (off) :
44 "Q" (*(const unsigned long *)current_stack_pointer));
45
46 return off;
47 }
48
49 #ifdef __KVM_NVHE_HYPERVISOR__
50 #define __my_cpu_offset __hyp_my_cpu_offset()
51 #else
52 #define __my_cpu_offset __kern_my_cpu_offset()
53 #endif
54
55 #define PERCPU_RW_OPS(sz) \
56 static inline unsigned long __percpu_read_##sz(void *ptr) \
57 { \
58 return READ_ONCE(*(u##sz *)ptr); \
59 } \
60 \
61 static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
62 { \
63 WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
64 }
65
66 #define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
67 static inline void \
68 __percpu_##name##_case_##sz(void *ptr, unsigned long val) \
69 { \
70 unsigned int loop; \
71 u##sz tmp; \
72 \
73 asm volatile (ARM64_LSE_ATOMIC_INSN( \
74 /* LL/SC */ \
75 "1: ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n" \
76 #op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
77 " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
78 " cbnz %w[loop], 1b", \
79 /* LSE atomics */ \
80 #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \
81 __nops(3)) \
82 : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
83 [ptr] "+Q"(*(u##sz *)ptr) \
84 : [val] "r" ((u##sz)(val))); \
85 }
86
87 #define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
88 static inline u##sz \
89 __percpu_##name##_return_case_##sz(void *ptr, unsigned long val) \
90 { \
91 unsigned int loop; \
92 u##sz ret; \
93 \
94 asm volatile (ARM64_LSE_ATOMIC_INSN( \
95 /* LL/SC */ \
96 "1: ldxr" #sfx "\t%" #w "[ret], %[ptr]\n" \
97 #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
98 " stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n" \
99 " cbnz %w[loop], 1b", \
100 /* LSE atomics */ \
101 #op_lse "\t%" #w "[val], %" #w "[ret], %[ptr]\n" \
102 #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
103 __nops(2)) \
104 : [loop] "=&r" (loop), [ret] "=&r" (ret), \
105 [ptr] "+Q"(*(u##sz *)ptr) \
106 : [val] "r" ((u##sz)(val))); \
107 \
108 return ret; \
109 }
110
111 #define PERCPU_OP(name, op_llsc, op_lse) \
112 __PERCPU_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
113 __PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
114 __PERCPU_OP_CASE(w, , name, 32, op_llsc, op_lse) \
115 __PERCPU_OP_CASE( , , name, 64, op_llsc, op_lse)
116
117 #define PERCPU_RET_OP(name, op_llsc, op_lse) \
118 __PERCPU_RET_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
119 __PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
120 __PERCPU_RET_OP_CASE(w, , name, 32, op_llsc, op_lse) \
121 __PERCPU_RET_OP_CASE( , , name, 64, op_llsc, op_lse)
122
123 PERCPU_RW_OPS(8)
124 PERCPU_RW_OPS(16)
125 PERCPU_RW_OPS(32)
126 PERCPU_RW_OPS(64)
127
128 /*
129 * Use value-returning atomics for CPU-local ops as they are more likely
130 * to execute "near" to the CPU (e.g. in L1$).
131 *
132 * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop
133 */
134 PERCPU_OP(add, add, ldadd)
135 PERCPU_OP(andnot, bic, ldclr)
136 PERCPU_OP(or, orr, ldset)
137 PERCPU_RET_OP(add, add, ldadd)
138
139 #undef PERCPU_RW_OPS
140 #undef __PERCPU_OP_CASE
141 #undef __PERCPU_RET_OP_CASE
142 #undef PERCPU_OP
143 #undef PERCPU_RET_OP
144
145 /*
146 * It would be nice to avoid the conditional call into the scheduler when
147 * re-enabling preemption for preemptible kernels, but doing that in a way
148 * which builds inside a module would mean messing directly with the preempt
149 * count. If you do this, peterz and tglx will hunt you down.
150 *
151 * Not to mention it'll break the actual preemption model for missing a
152 * preemption point when TIF_NEED_RESCHED gets set while preemption is
153 * disabled.
154 */
155
156 #define _pcp_protect(op, pcp, ...) \
157 ({ \
158 preempt_disable_notrace(); \
159 op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
160 preempt_enable_notrace(); \
161 })
162
163 #define _pcp_protect_return(op, pcp, args...) \
164 ({ \
165 typeof(pcp) __retval; \
166 preempt_disable_notrace(); \
167 __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
168 preempt_enable_notrace(); \
169 __retval; \
170 })
171
172 #define this_cpu_read_1(pcp) \
173 _pcp_protect_return(__percpu_read_8, pcp)
174 #define this_cpu_read_2(pcp) \
175 _pcp_protect_return(__percpu_read_16, pcp)
176 #define this_cpu_read_4(pcp) \
177 _pcp_protect_return(__percpu_read_32, pcp)
178 #define this_cpu_read_8(pcp) \
179 _pcp_protect_return(__percpu_read_64, pcp)
180
181 #define this_cpu_write_1(pcp, val) \
182 _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
183 #define this_cpu_write_2(pcp, val) \
184 _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
185 #define this_cpu_write_4(pcp, val) \
186 _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
187 #define this_cpu_write_8(pcp, val) \
188 _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
189
190 #define this_cpu_add_1(pcp, val) \
191 _pcp_protect(__percpu_add_case_8, pcp, val)
192 #define this_cpu_add_2(pcp, val) \
193 _pcp_protect(__percpu_add_case_16, pcp, val)
194 #define this_cpu_add_4(pcp, val) \
195 _pcp_protect(__percpu_add_case_32, pcp, val)
196 #define this_cpu_add_8(pcp, val) \
197 _pcp_protect(__percpu_add_case_64, pcp, val)
198
199 #define this_cpu_add_return_1(pcp, val) \
200 _pcp_protect_return(__percpu_add_return_case_8, pcp, val)
201 #define this_cpu_add_return_2(pcp, val) \
202 _pcp_protect_return(__percpu_add_return_case_16, pcp, val)
203 #define this_cpu_add_return_4(pcp, val) \
204 _pcp_protect_return(__percpu_add_return_case_32, pcp, val)
205 #define this_cpu_add_return_8(pcp, val) \
206 _pcp_protect_return(__percpu_add_return_case_64, pcp, val)
207
208 #define this_cpu_and_1(pcp, val) \
209 _pcp_protect(__percpu_andnot_case_8, pcp, ~val)
210 #define this_cpu_and_2(pcp, val) \
211 _pcp_protect(__percpu_andnot_case_16, pcp, ~val)
212 #define this_cpu_and_4(pcp, val) \
213 _pcp_protect(__percpu_andnot_case_32, pcp, ~val)
214 #define this_cpu_and_8(pcp, val) \
215 _pcp_protect(__percpu_andnot_case_64, pcp, ~val)
216
217 #define this_cpu_or_1(pcp, val) \
218 _pcp_protect(__percpu_or_case_8, pcp, val)
219 #define this_cpu_or_2(pcp, val) \
220 _pcp_protect(__percpu_or_case_16, pcp, val)
221 #define this_cpu_or_4(pcp, val) \
222 _pcp_protect(__percpu_or_case_32, pcp, val)
223 #define this_cpu_or_8(pcp, val) \
224 _pcp_protect(__percpu_or_case_64, pcp, val)
225
226 #define this_cpu_xchg_1(pcp, val) \
227 _pcp_protect_return(xchg_relaxed, pcp, val)
228 #define this_cpu_xchg_2(pcp, val) \
229 _pcp_protect_return(xchg_relaxed, pcp, val)
230 #define this_cpu_xchg_4(pcp, val) \
231 _pcp_protect_return(xchg_relaxed, pcp, val)
232 #define this_cpu_xchg_8(pcp, val) \
233 _pcp_protect_return(xchg_relaxed, pcp, val)
234
235 #define this_cpu_cmpxchg_1(pcp, o, n) \
236 _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
237 #define this_cpu_cmpxchg_2(pcp, o, n) \
238 _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
239 #define this_cpu_cmpxchg_4(pcp, o, n) \
240 _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
241 #define this_cpu_cmpxchg_8(pcp, o, n) \
242 _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
243
244 #define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
245
246 #define this_cpu_cmpxchg128(pcp, o, n) \
247 ({ \
248 typedef typeof(pcp) pcp_op_T__; \
249 u128 old__, new__, ret__; \
250 pcp_op_T__ *ptr__; \
251 old__ = o; \
252 new__ = n; \
253 preempt_disable_notrace(); \
254 ptr__ = raw_cpu_ptr(&(pcp)); \
255 ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \
256 preempt_enable_notrace(); \
257 ret__; \
258 })
259
260 #ifdef __KVM_NVHE_HYPERVISOR__
261 extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
262 #define __per_cpu_offset
263 #define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu))
264 #endif
265
266 #include <asm-generic/percpu.h>
267
268 /* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
269 #if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT)
270 #undef this_cpu_ptr
271 #define this_cpu_ptr raw_cpu_ptr
272 #undef __this_cpu_read
273 #define __this_cpu_read raw_cpu_read
274 #undef __this_cpu_write
275 #define __this_cpu_write raw_cpu_write
276 #endif
277
278 #endif /* __ASM_PERCPU_H */
279