1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2014 Regents of the University of California
4 */
5
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
8
9 #include <linux/bug.h>
10
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
16 #include <asm/processor.h>
17 #include <asm/errata_list.h>
18
19 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
20 swap_append, r, p, n) \
21 ({ \
22 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
23 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
24 __asm__ __volatile__ ( \
25 prepend \
26 " amoswap" swap_sfx " %0, %z2, %1\n" \
27 swap_append \
28 : "=&r" (r), "+A" (*(p)) \
29 : "rJ" (n) \
30 : "memory"); \
31 } else { \
32 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
33 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
34 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
35 << __s; \
36 ulong __newx = (ulong)(n) << __s; \
37 ulong __retx; \
38 ulong __rc; \
39 \
40 __asm__ __volatile__ ( \
41 prepend \
42 PREFETCHW_ASM(%5) \
43 "0: lr.w %0, %2\n" \
44 " and %1, %0, %z4\n" \
45 " or %1, %1, %z3\n" \
46 " sc.w" sc_sfx " %1, %1, %2\n" \
47 " bnez %1, 0b\n" \
48 sc_append \
49 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
50 : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
51 : "memory"); \
52 \
53 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
54 } \
55 })
56
57 #define __arch_xchg(sfx, prepend, append, r, p, n) \
58 ({ \
59 __asm__ __volatile__ ( \
60 prepend \
61 " amoswap" sfx " %0, %2, %1\n" \
62 append \
63 : "=r" (r), "+A" (*(p)) \
64 : "r" (n) \
65 : "memory"); \
66 })
67
68 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
69 sc_append, swap_append) \
70 ({ \
71 __typeof__(ptr) __ptr = (ptr); \
72 __typeof__(*(__ptr)) __new = (new); \
73 __typeof__(*(__ptr)) __ret; \
74 \
75 switch (sizeof(*__ptr)) { \
76 case 1: \
77 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
78 prepend, sc_append, swap_append, \
79 __ret, __ptr, __new); \
80 break; \
81 case 2: \
82 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
83 prepend, sc_append, swap_append, \
84 __ret, __ptr, __new); \
85 break; \
86 case 4: \
87 __arch_xchg(".w" swap_sfx, prepend, swap_append, \
88 __ret, __ptr, __new); \
89 break; \
90 case 8: \
91 __arch_xchg(".d" swap_sfx, prepend, swap_append, \
92 __ret, __ptr, __new); \
93 break; \
94 default: \
95 BUILD_BUG(); \
96 } \
97 (__typeof__(*(__ptr)))__ret; \
98 })
99
100 #define arch_xchg_relaxed(ptr, x) \
101 _arch_xchg(ptr, x, "", "", "", "", "")
102
103 #define arch_xchg_acquire(ptr, x) \
104 _arch_xchg(ptr, x, "", "", "", \
105 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
106
107 #define arch_xchg_release(ptr, x) \
108 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
109
110 #define arch_xchg(ptr, x) \
111 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
112
113 #define xchg32(ptr, x) \
114 ({ \
115 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
116 arch_xchg((ptr), (x)); \
117 })
118
119 #define xchg64(ptr, x) \
120 ({ \
121 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
122 arch_xchg((ptr), (x)); \
123 })
124
125 /*
126 * Atomic compare and exchange. Compare OLD with MEM, if identical,
127 * store NEW in MEM. Return the initial value in MEM. Success is
128 * indicated by comparing RETURN with OLD.
129 */
130 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
131 sc_prepend, sc_append, \
132 cas_prepend, cas_append, \
133 r, p, o, n) \
134 ({ \
135 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
136 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
137 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \
138 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
139 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
140 r = o; \
141 \
142 __asm__ __volatile__ ( \
143 cas_prepend \
144 " amocas" cas_sfx " %0, %z2, %1\n" \
145 cas_append \
146 : "+&r" (r), "+A" (*(p)) \
147 : "rJ" (n) \
148 : "memory"); \
149 } else { \
150 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
151 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
152 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
153 << __s; \
154 ulong __newx = (ulong)(n) << __s; \
155 ulong __oldx = (ulong)(o) << __s; \
156 ulong __retx; \
157 ulong __rc; \
158 \
159 __asm__ __volatile__ ( \
160 sc_prepend \
161 "0: lr.w %0, %2\n" \
162 " and %1, %0, %z5\n" \
163 " bne %1, %z3, 1f\n" \
164 " and %1, %0, %z6\n" \
165 " or %1, %1, %z4\n" \
166 " sc.w" sc_sfx " %1, %1, %2\n" \
167 " bnez %1, 0b\n" \
168 sc_append \
169 "1:\n" \
170 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
171 : "rJ" ((long)__oldx), "rJ" (__newx), \
172 "rJ" (__mask), "rJ" (~__mask) \
173 : "memory"); \
174 \
175 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
176 } \
177 })
178
179 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
180 sc_prepend, sc_append, \
181 cas_prepend, cas_append, \
182 r, p, co, o, n) \
183 ({ \
184 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
185 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \
186 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
187 r = o; \
188 \
189 __asm__ __volatile__ ( \
190 cas_prepend \
191 " amocas" cas_sfx " %0, %z2, %1\n" \
192 cas_append \
193 : "+&r" (r), "+A" (*(p)) \
194 : "rJ" (n) \
195 : "memory"); \
196 } else { \
197 register unsigned int __rc; \
198 \
199 __asm__ __volatile__ ( \
200 sc_prepend \
201 "0: lr" lr_sfx " %0, %2\n" \
202 " bne %0, %z3, 1f\n" \
203 " sc" sc_sfx " %1, %z4, %2\n" \
204 " bnez %1, 0b\n" \
205 sc_append \
206 "1:\n" \
207 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
208 : "rJ" (co o), "rJ" (n) \
209 : "memory"); \
210 } \
211 })
212
213 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
214 sc_prepend, sc_append, \
215 cas_prepend, cas_append) \
216 ({ \
217 __typeof__(ptr) __ptr = (ptr); \
218 __typeof__(*(__ptr)) __old = (old); \
219 __typeof__(*(__ptr)) __new = (new); \
220 __typeof__(*(__ptr)) __ret; \
221 \
222 switch (sizeof(*__ptr)) { \
223 case 1: \
224 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
225 sc_prepend, sc_append, \
226 cas_prepend, cas_append, \
227 __ret, __ptr, __old, __new); \
228 break; \
229 case 2: \
230 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
231 sc_prepend, sc_append, \
232 cas_prepend, cas_append, \
233 __ret, __ptr, __old, __new); \
234 break; \
235 case 4: \
236 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
237 sc_prepend, sc_append, \
238 cas_prepend, cas_append, \
239 __ret, __ptr, (long)(int)(long), __old, __new); \
240 break; \
241 case 8: \
242 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
243 sc_prepend, sc_append, \
244 cas_prepend, cas_append, \
245 __ret, __ptr, /**/, __old, __new); \
246 break; \
247 default: \
248 BUILD_BUG(); \
249 } \
250 (__typeof__(*(__ptr)))__ret; \
251 })
252
253 /*
254 * These macros are here to improve the readability of the arch_cmpxchg_XXX()
255 * macros.
256 */
257 #define SC_SFX(x) x
258 #define CAS_SFX(x) x
259 #define SC_PREPEND(x) x
260 #define SC_APPEND(x) x
261 #define CAS_PREPEND(x) x
262 #define CAS_APPEND(x) x
263
264 #define arch_cmpxchg_relaxed(ptr, o, n) \
265 _arch_cmpxchg((ptr), (o), (n), \
266 SC_SFX(""), CAS_SFX(""), \
267 SC_PREPEND(""), SC_APPEND(""), \
268 CAS_PREPEND(""), CAS_APPEND(""))
269
270 #define arch_cmpxchg_acquire(ptr, o, n) \
271 _arch_cmpxchg((ptr), (o), (n), \
272 SC_SFX(""), CAS_SFX(""), \
273 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
274 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
275
276 #define arch_cmpxchg_release(ptr, o, n) \
277 _arch_cmpxchg((ptr), (o), (n), \
278 SC_SFX(""), CAS_SFX(""), \
279 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
280 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
281
282 #define arch_cmpxchg(ptr, o, n) \
283 _arch_cmpxchg((ptr), (o), (n), \
284 SC_SFX(".rl"), CAS_SFX(".aqrl"), \
285 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
286 CAS_PREPEND(""), CAS_APPEND(""))
287
288 #define arch_cmpxchg_local(ptr, o, n) \
289 arch_cmpxchg_relaxed((ptr), (o), (n))
290
291 #define arch_cmpxchg64(ptr, o, n) \
292 ({ \
293 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
294 arch_cmpxchg((ptr), (o), (n)); \
295 })
296
297 #define arch_cmpxchg64_local(ptr, o, n) \
298 ({ \
299 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
300 arch_cmpxchg_relaxed((ptr), (o), (n)); \
301 })
302
303 #define arch_cmpxchg64_relaxed(ptr, o, n) \
304 ({ \
305 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
306 arch_cmpxchg_relaxed((ptr), (o), (n)); \
307 })
308
309 #define arch_cmpxchg64_acquire(ptr, o, n) \
310 ({ \
311 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
312 arch_cmpxchg_acquire((ptr), (o), (n)); \
313 })
314
315 #define arch_cmpxchg64_release(ptr, o, n) \
316 ({ \
317 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
318 arch_cmpxchg_release((ptr), (o), (n)); \
319 })
320
321 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) && defined(CONFIG_TOOLCHAIN_HAS_ZACAS)
322
323 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
324
325 union __u128_halves {
326 u128 full;
327 struct {
328 u64 low, high;
329 };
330 };
331
332 #define __arch_cmpxchg128(p, o, n, cas_sfx) \
333 ({ \
334 __typeof__(*(p)) __o = (o); \
335 union __u128_halves __hn = { .full = (n) }; \
336 union __u128_halves __ho = { .full = (__o) }; \
337 register unsigned long t1 asm ("t1") = __hn.low; \
338 register unsigned long t2 asm ("t2") = __hn.high; \
339 register unsigned long t3 asm ("t3") = __ho.low; \
340 register unsigned long t4 asm ("t4") = __ho.high; \
341 \
342 __asm__ __volatile__ ( \
343 " amocas.q" cas_sfx " %0, %z3, %2" \
344 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
345 : "rJ" (t1), "rJ" (t2) \
346 : "memory"); \
347 \
348 ((u128)t4 << 64) | t3; \
349 })
350
351 #define arch_cmpxchg128(ptr, o, n) \
352 __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
353
354 #define arch_cmpxchg128_local(ptr, o, n) \
355 __arch_cmpxchg128((ptr), (o), (n), "")
356
357 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS && CONFIG_TOOLCHAIN_HAS_ZACAS */
358
359 #ifdef CONFIG_RISCV_ISA_ZAWRS
360 /*
361 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
362 * @val we expect it to still terminate within a "reasonable" amount of time
363 * for an implementation-specific other reason, a pending, locally-enabled
364 * interrupt, or because it has been configured to raise an illegal
365 * instruction exception.
366 */
__cmpwait(volatile void * ptr,unsigned long val,int size)367 static __always_inline void __cmpwait(volatile void *ptr,
368 unsigned long val,
369 int size)
370 {
371 unsigned long tmp;
372
373 u32 *__ptr32b;
374 ulong __s, __val, __mask;
375
376 if (!riscv_has_extension_likely(RISCV_ISA_EXT_ZAWRS)) {
377 ALT_RISCV_PAUSE();
378 return;
379 }
380
381 switch (size) {
382 case 1:
383 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
384 __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
385 __val = val << __s;
386 __mask = 0xff << __s;
387
388 asm volatile(
389 " lr.w %0, %1\n"
390 " and %0, %0, %3\n"
391 " xor %0, %0, %2\n"
392 " bnez %0, 1f\n"
393 ZAWRS_WRS_NTO "\n"
394 "1:"
395 : "=&r" (tmp), "+A" (*(__ptr32b))
396 : "r" (__val), "r" (__mask)
397 : "memory");
398 break;
399 case 2:
400 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
401 __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
402 __val = val << __s;
403 __mask = 0xffff << __s;
404
405 asm volatile(
406 " lr.w %0, %1\n"
407 " and %0, %0, %3\n"
408 " xor %0, %0, %2\n"
409 " bnez %0, 1f\n"
410 ZAWRS_WRS_NTO "\n"
411 "1:"
412 : "=&r" (tmp), "+A" (*(__ptr32b))
413 : "r" (__val), "r" (__mask)
414 : "memory");
415 break;
416 case 4:
417 asm volatile(
418 " lr.w %0, %1\n"
419 " xor %0, %0, %2\n"
420 " bnez %0, 1f\n"
421 ZAWRS_WRS_NTO "\n"
422 "1:"
423 : "=&r" (tmp), "+A" (*(u32 *)ptr)
424 : "r" (val));
425 break;
426 #if __riscv_xlen == 64
427 case 8:
428 asm volatile(
429 " lr.d %0, %1\n"
430 " xor %0, %0, %2\n"
431 " bnez %0, 1f\n"
432 ZAWRS_WRS_NTO "\n"
433 "1:"
434 : "=&r" (tmp), "+A" (*(u64 *)ptr)
435 : "r" (val));
436 break;
437 #endif
438 default:
439 BUILD_BUG();
440 }
441 }
442
443 #define __cmpwait_relaxed(ptr, val) \
444 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
445 #endif
446
447 #endif /* _ASM_RISCV_CMPXCHG_H */
448