1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2014 Regents of the University of California
4 */
5
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
8
9 #include <linux/bug.h>
10
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
16
17 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
18 swap_append, r, p, n) \
19 ({ \
20 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
21 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
22 __asm__ __volatile__ ( \
23 prepend \
24 " amoswap" swap_sfx " %0, %z2, %1\n" \
25 swap_append \
26 : "=&r" (r), "+A" (*(p)) \
27 : "rJ" (n) \
28 : "memory"); \
29 } else { \
30 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
31 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
32 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
33 << __s; \
34 ulong __newx = (ulong)(n) << __s; \
35 ulong __retx; \
36 ulong __rc; \
37 \
38 __asm__ __volatile__ ( \
39 prepend \
40 "0: lr.w %0, %2\n" \
41 " and %1, %0, %z4\n" \
42 " or %1, %1, %z3\n" \
43 " sc.w" sc_sfx " %1, %1, %2\n" \
44 " bnez %1, 0b\n" \
45 sc_append \
46 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
47 : "rJ" (__newx), "rJ" (~__mask) \
48 : "memory"); \
49 \
50 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
51 } \
52 })
53
54 #define __arch_xchg(sfx, prepend, append, r, p, n) \
55 ({ \
56 __asm__ __volatile__ ( \
57 prepend \
58 " amoswap" sfx " %0, %2, %1\n" \
59 append \
60 : "=r" (r), "+A" (*(p)) \
61 : "r" (n) \
62 : "memory"); \
63 })
64
65 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
66 sc_append, swap_append) \
67 ({ \
68 __typeof__(ptr) __ptr = (ptr); \
69 __typeof__(*(__ptr)) __new = (new); \
70 __typeof__(*(__ptr)) __ret; \
71 \
72 switch (sizeof(*__ptr)) { \
73 case 1: \
74 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
75 prepend, sc_append, swap_append, \
76 __ret, __ptr, __new); \
77 break; \
78 case 2: \
79 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
80 prepend, sc_append, swap_append, \
81 __ret, __ptr, __new); \
82 break; \
83 case 4: \
84 __arch_xchg(".w" swap_sfx, prepend, swap_append, \
85 __ret, __ptr, __new); \
86 break; \
87 case 8: \
88 __arch_xchg(".d" swap_sfx, prepend, swap_append, \
89 __ret, __ptr, __new); \
90 break; \
91 default: \
92 BUILD_BUG(); \
93 } \
94 (__typeof__(*(__ptr)))__ret; \
95 })
96
97 #define arch_xchg_relaxed(ptr, x) \
98 _arch_xchg(ptr, x, "", "", "", "", "")
99
100 #define arch_xchg_acquire(ptr, x) \
101 _arch_xchg(ptr, x, "", "", "", \
102 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
103
104 #define arch_xchg_release(ptr, x) \
105 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
106
107 #define arch_xchg(ptr, x) \
108 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
109
110 #define xchg32(ptr, x) \
111 ({ \
112 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
113 arch_xchg((ptr), (x)); \
114 })
115
116 #define xchg64(ptr, x) \
117 ({ \
118 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
119 arch_xchg((ptr), (x)); \
120 })
121
122 /*
123 * Atomic compare and exchange. Compare OLD with MEM, if identical,
124 * store NEW in MEM. Return the initial value in MEM. Success is
125 * indicated by comparing RETURN with OLD.
126 */
127 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
128 sc_prepend, sc_append, \
129 cas_prepend, cas_append, \
130 r, p, o, n) \
131 ({ \
132 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
133 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
134 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
135 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
136 r = o; \
137 \
138 __asm__ __volatile__ ( \
139 cas_prepend \
140 " amocas" cas_sfx " %0, %z2, %1\n" \
141 cas_append \
142 : "+&r" (r), "+A" (*(p)) \
143 : "rJ" (n) \
144 : "memory"); \
145 } else { \
146 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
147 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
148 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
149 << __s; \
150 ulong __newx = (ulong)(n) << __s; \
151 ulong __oldx = (ulong)(o) << __s; \
152 ulong __retx; \
153 ulong __rc; \
154 \
155 __asm__ __volatile__ ( \
156 sc_prepend \
157 "0: lr.w %0, %2\n" \
158 " and %1, %0, %z5\n" \
159 " bne %1, %z3, 1f\n" \
160 " and %1, %0, %z6\n" \
161 " or %1, %1, %z4\n" \
162 " sc.w" sc_sfx " %1, %1, %2\n" \
163 " bnez %1, 0b\n" \
164 sc_append \
165 "1:\n" \
166 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
167 : "rJ" ((long)__oldx), "rJ" (__newx), \
168 "rJ" (__mask), "rJ" (~__mask) \
169 : "memory"); \
170 \
171 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
172 } \
173 })
174
175 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
176 sc_prepend, sc_append, \
177 cas_prepend, cas_append, \
178 r, p, co, o, n) \
179 ({ \
180 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
181 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
182 r = o; \
183 \
184 __asm__ __volatile__ ( \
185 cas_prepend \
186 " amocas" cas_sfx " %0, %z2, %1\n" \
187 cas_append \
188 : "+&r" (r), "+A" (*(p)) \
189 : "rJ" (n) \
190 : "memory"); \
191 } else { \
192 register unsigned int __rc; \
193 \
194 __asm__ __volatile__ ( \
195 sc_prepend \
196 "0: lr" lr_sfx " %0, %2\n" \
197 " bne %0, %z3, 1f\n" \
198 " sc" sc_sfx " %1, %z4, %2\n" \
199 " bnez %1, 0b\n" \
200 sc_append \
201 "1:\n" \
202 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
203 : "rJ" (co o), "rJ" (n) \
204 : "memory"); \
205 } \
206 })
207
208 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
209 sc_prepend, sc_append, \
210 cas_prepend, cas_append) \
211 ({ \
212 __typeof__(ptr) __ptr = (ptr); \
213 __typeof__(*(__ptr)) __old = (old); \
214 __typeof__(*(__ptr)) __new = (new); \
215 __typeof__(*(__ptr)) __ret; \
216 \
217 switch (sizeof(*__ptr)) { \
218 case 1: \
219 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
220 sc_prepend, sc_append, \
221 cas_prepend, cas_append, \
222 __ret, __ptr, __old, __new); \
223 break; \
224 case 2: \
225 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
226 sc_prepend, sc_append, \
227 cas_prepend, cas_append, \
228 __ret, __ptr, __old, __new); \
229 break; \
230 case 4: \
231 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
232 sc_prepend, sc_append, \
233 cas_prepend, cas_append, \
234 __ret, __ptr, (long), __old, __new); \
235 break; \
236 case 8: \
237 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
238 sc_prepend, sc_append, \
239 cas_prepend, cas_append, \
240 __ret, __ptr, /**/, __old, __new); \
241 break; \
242 default: \
243 BUILD_BUG(); \
244 } \
245 (__typeof__(*(__ptr)))__ret; \
246 })
247
248 /*
249 * These macros are here to improve the readability of the arch_cmpxchg_XXX()
250 * macros.
251 */
252 #define SC_SFX(x) x
253 #define CAS_SFX(x) x
254 #define SC_PREPEND(x) x
255 #define SC_APPEND(x) x
256 #define CAS_PREPEND(x) x
257 #define CAS_APPEND(x) x
258
259 #define arch_cmpxchg_relaxed(ptr, o, n) \
260 _arch_cmpxchg((ptr), (o), (n), \
261 SC_SFX(""), CAS_SFX(""), \
262 SC_PREPEND(""), SC_APPEND(""), \
263 CAS_PREPEND(""), CAS_APPEND(""))
264
265 #define arch_cmpxchg_acquire(ptr, o, n) \
266 _arch_cmpxchg((ptr), (o), (n), \
267 SC_SFX(""), CAS_SFX(""), \
268 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
269 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
270
271 #define arch_cmpxchg_release(ptr, o, n) \
272 _arch_cmpxchg((ptr), (o), (n), \
273 SC_SFX(""), CAS_SFX(""), \
274 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
275 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
276
277 #define arch_cmpxchg(ptr, o, n) \
278 _arch_cmpxchg((ptr), (o), (n), \
279 SC_SFX(".rl"), CAS_SFX(".aqrl"), \
280 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
281 CAS_PREPEND(""), CAS_APPEND(""))
282
283 #define arch_cmpxchg_local(ptr, o, n) \
284 arch_cmpxchg_relaxed((ptr), (o), (n))
285
286 #define arch_cmpxchg64(ptr, o, n) \
287 ({ \
288 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
289 arch_cmpxchg((ptr), (o), (n)); \
290 })
291
292 #define arch_cmpxchg64_local(ptr, o, n) \
293 ({ \
294 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
295 arch_cmpxchg_relaxed((ptr), (o), (n)); \
296 })
297
298 #define arch_cmpxchg64_relaxed(ptr, o, n) \
299 ({ \
300 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
301 arch_cmpxchg_relaxed((ptr), (o), (n)); \
302 })
303
304 #define arch_cmpxchg64_acquire(ptr, o, n) \
305 ({ \
306 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
307 arch_cmpxchg_acquire((ptr), (o), (n)); \
308 })
309
310 #define arch_cmpxchg64_release(ptr, o, n) \
311 ({ \
312 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
313 arch_cmpxchg_release((ptr), (o), (n)); \
314 })
315
316 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
317
318 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
319
320 union __u128_halves {
321 u128 full;
322 struct {
323 u64 low, high;
324 };
325 };
326
327 #define __arch_cmpxchg128(p, o, n, cas_sfx) \
328 ({ \
329 __typeof__(*(p)) __o = (o); \
330 union __u128_halves __hn = { .full = (n) }; \
331 union __u128_halves __ho = { .full = (__o) }; \
332 register unsigned long t1 asm ("t1") = __hn.low; \
333 register unsigned long t2 asm ("t2") = __hn.high; \
334 register unsigned long t3 asm ("t3") = __ho.low; \
335 register unsigned long t4 asm ("t4") = __ho.high; \
336 \
337 __asm__ __volatile__ ( \
338 " amocas.q" cas_sfx " %0, %z3, %2" \
339 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
340 : "rJ" (t1), "rJ" (t2) \
341 : "memory"); \
342 \
343 ((u128)t4 << 64) | t3; \
344 })
345
346 #define arch_cmpxchg128(ptr, o, n) \
347 __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
348
349 #define arch_cmpxchg128_local(ptr, o, n) \
350 __arch_cmpxchg128((ptr), (o), (n), "")
351
352 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
353
354 #ifdef CONFIG_RISCV_ISA_ZAWRS
355 /*
356 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
357 * @val we expect it to still terminate within a "reasonable" amount of time
358 * for an implementation-specific other reason, a pending, locally-enabled
359 * interrupt, or because it has been configured to raise an illegal
360 * instruction exception.
361 */
__cmpwait(volatile void * ptr,unsigned long val,int size)362 static __always_inline void __cmpwait(volatile void *ptr,
363 unsigned long val,
364 int size)
365 {
366 unsigned long tmp;
367
368 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
369 0, RISCV_ISA_EXT_ZAWRS, 1)
370 : : : : no_zawrs);
371
372 switch (size) {
373 case 1:
374 fallthrough;
375 case 2:
376 /* RISC-V doesn't have lr instructions on byte and half-word. */
377 goto no_zawrs;
378 case 4:
379 asm volatile(
380 " lr.w %0, %1\n"
381 " xor %0, %0, %2\n"
382 " bnez %0, 1f\n"
383 ZAWRS_WRS_NTO "\n"
384 "1:"
385 : "=&r" (tmp), "+A" (*(u32 *)ptr)
386 : "r" (val));
387 break;
388 #if __riscv_xlen == 64
389 case 8:
390 asm volatile(
391 " lr.d %0, %1\n"
392 " xor %0, %0, %2\n"
393 " bnez %0, 1f\n"
394 ZAWRS_WRS_NTO "\n"
395 "1:"
396 : "=&r" (tmp), "+A" (*(u64 *)ptr)
397 : "r" (val));
398 break;
399 #endif
400 default:
401 BUILD_BUG();
402 }
403
404 return;
405
406 no_zawrs:
407 asm volatile(RISCV_PAUSE : : : "memory");
408 }
409
410 #define __cmpwait_relaxed(ptr, val) \
411 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
412 #endif
413
414 #endif /* _ASM_RISCV_CMPXCHG_H */
415