1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2014 Regents of the University of California
4 */
5
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
8
9 #include <linux/bug.h>
10
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
16 #include <asm/processor.h>
17
18 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
19 swap_append, r, p, n) \
20 ({ \
21 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
22 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
23 __asm__ __volatile__ ( \
24 prepend \
25 " amoswap" swap_sfx " %0, %z2, %1\n" \
26 swap_append \
27 : "=&r" (r), "+A" (*(p)) \
28 : "rJ" (n) \
29 : "memory"); \
30 } else { \
31 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
32 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
33 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
34 << __s; \
35 ulong __newx = (ulong)(n) << __s; \
36 ulong __retx; \
37 ulong __rc; \
38 \
39 __asm__ __volatile__ ( \
40 prepend \
41 PREFETCHW_ASM(%5) \
42 "0: lr.w %0, %2\n" \
43 " and %1, %0, %z4\n" \
44 " or %1, %1, %z3\n" \
45 " sc.w" sc_sfx " %1, %1, %2\n" \
46 " bnez %1, 0b\n" \
47 sc_append \
48 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
49 : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
50 : "memory"); \
51 \
52 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
53 } \
54 })
55
56 #define __arch_xchg(sfx, prepend, append, r, p, n) \
57 ({ \
58 __asm__ __volatile__ ( \
59 prepend \
60 " amoswap" sfx " %0, %2, %1\n" \
61 append \
62 : "=r" (r), "+A" (*(p)) \
63 : "r" (n) \
64 : "memory"); \
65 })
66
67 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
68 sc_append, swap_append) \
69 ({ \
70 __typeof__(ptr) __ptr = (ptr); \
71 __typeof__(*(__ptr)) __new = (new); \
72 __typeof__(*(__ptr)) __ret; \
73 \
74 switch (sizeof(*__ptr)) { \
75 case 1: \
76 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
77 prepend, sc_append, swap_append, \
78 __ret, __ptr, __new); \
79 break; \
80 case 2: \
81 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
82 prepend, sc_append, swap_append, \
83 __ret, __ptr, __new); \
84 break; \
85 case 4: \
86 __arch_xchg(".w" swap_sfx, prepend, swap_append, \
87 __ret, __ptr, __new); \
88 break; \
89 case 8: \
90 __arch_xchg(".d" swap_sfx, prepend, swap_append, \
91 __ret, __ptr, __new); \
92 break; \
93 default: \
94 BUILD_BUG(); \
95 } \
96 (__typeof__(*(__ptr)))__ret; \
97 })
98
99 #define arch_xchg_relaxed(ptr, x) \
100 _arch_xchg(ptr, x, "", "", "", "", "")
101
102 #define arch_xchg_acquire(ptr, x) \
103 _arch_xchg(ptr, x, "", "", "", \
104 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
105
106 #define arch_xchg_release(ptr, x) \
107 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
108
109 #define arch_xchg(ptr, x) \
110 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
111
112 #define xchg32(ptr, x) \
113 ({ \
114 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
115 arch_xchg((ptr), (x)); \
116 })
117
118 #define xchg64(ptr, x) \
119 ({ \
120 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
121 arch_xchg((ptr), (x)); \
122 })
123
124 /*
125 * Atomic compare and exchange. Compare OLD with MEM, if identical,
126 * store NEW in MEM. Return the initial value in MEM. Success is
127 * indicated by comparing RETURN with OLD.
128 */
129 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
130 sc_prepend, sc_append, \
131 cas_prepend, cas_append, \
132 r, p, o, n) \
133 ({ \
134 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
135 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
136 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
137 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
138 r = o; \
139 \
140 __asm__ __volatile__ ( \
141 cas_prepend \
142 " amocas" cas_sfx " %0, %z2, %1\n" \
143 cas_append \
144 : "+&r" (r), "+A" (*(p)) \
145 : "rJ" (n) \
146 : "memory"); \
147 } else { \
148 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
149 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
150 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
151 << __s; \
152 ulong __newx = (ulong)(n) << __s; \
153 ulong __oldx = (ulong)(o) << __s; \
154 ulong __retx; \
155 ulong __rc; \
156 \
157 __asm__ __volatile__ ( \
158 sc_prepend \
159 "0: lr.w %0, %2\n" \
160 " and %1, %0, %z5\n" \
161 " bne %1, %z3, 1f\n" \
162 " and %1, %0, %z6\n" \
163 " or %1, %1, %z4\n" \
164 " sc.w" sc_sfx " %1, %1, %2\n" \
165 " bnez %1, 0b\n" \
166 sc_append \
167 "1:\n" \
168 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
169 : "rJ" ((long)__oldx), "rJ" (__newx), \
170 "rJ" (__mask), "rJ" (~__mask) \
171 : "memory"); \
172 \
173 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
174 } \
175 })
176
177 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
178 sc_prepend, sc_append, \
179 cas_prepend, cas_append, \
180 r, p, co, o, n) \
181 ({ \
182 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
183 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
184 r = o; \
185 \
186 __asm__ __volatile__ ( \
187 cas_prepend \
188 " amocas" cas_sfx " %0, %z2, %1\n" \
189 cas_append \
190 : "+&r" (r), "+A" (*(p)) \
191 : "rJ" (n) \
192 : "memory"); \
193 } else { \
194 register unsigned int __rc; \
195 \
196 __asm__ __volatile__ ( \
197 sc_prepend \
198 "0: lr" lr_sfx " %0, %2\n" \
199 " bne %0, %z3, 1f\n" \
200 " sc" sc_sfx " %1, %z4, %2\n" \
201 " bnez %1, 0b\n" \
202 sc_append \
203 "1:\n" \
204 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
205 : "rJ" (co o), "rJ" (n) \
206 : "memory"); \
207 } \
208 })
209
210 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
211 sc_prepend, sc_append, \
212 cas_prepend, cas_append) \
213 ({ \
214 __typeof__(ptr) __ptr = (ptr); \
215 __typeof__(*(__ptr)) __old = (old); \
216 __typeof__(*(__ptr)) __new = (new); \
217 __typeof__(*(__ptr)) __ret; \
218 \
219 switch (sizeof(*__ptr)) { \
220 case 1: \
221 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
222 sc_prepend, sc_append, \
223 cas_prepend, cas_append, \
224 __ret, __ptr, __old, __new); \
225 break; \
226 case 2: \
227 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
228 sc_prepend, sc_append, \
229 cas_prepend, cas_append, \
230 __ret, __ptr, __old, __new); \
231 break; \
232 case 4: \
233 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
234 sc_prepend, sc_append, \
235 cas_prepend, cas_append, \
236 __ret, __ptr, (long)(int)(long), __old, __new); \
237 break; \
238 case 8: \
239 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
240 sc_prepend, sc_append, \
241 cas_prepend, cas_append, \
242 __ret, __ptr, /**/, __old, __new); \
243 break; \
244 default: \
245 BUILD_BUG(); \
246 } \
247 (__typeof__(*(__ptr)))__ret; \
248 })
249
250 /*
251 * These macros are here to improve the readability of the arch_cmpxchg_XXX()
252 * macros.
253 */
254 #define SC_SFX(x) x
255 #define CAS_SFX(x) x
256 #define SC_PREPEND(x) x
257 #define SC_APPEND(x) x
258 #define CAS_PREPEND(x) x
259 #define CAS_APPEND(x) x
260
261 #define arch_cmpxchg_relaxed(ptr, o, n) \
262 _arch_cmpxchg((ptr), (o), (n), \
263 SC_SFX(""), CAS_SFX(""), \
264 SC_PREPEND(""), SC_APPEND(""), \
265 CAS_PREPEND(""), CAS_APPEND(""))
266
267 #define arch_cmpxchg_acquire(ptr, o, n) \
268 _arch_cmpxchg((ptr), (o), (n), \
269 SC_SFX(""), CAS_SFX(""), \
270 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
271 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
272
273 #define arch_cmpxchg_release(ptr, o, n) \
274 _arch_cmpxchg((ptr), (o), (n), \
275 SC_SFX(""), CAS_SFX(""), \
276 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
277 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
278
279 #define arch_cmpxchg(ptr, o, n) \
280 _arch_cmpxchg((ptr), (o), (n), \
281 SC_SFX(".rl"), CAS_SFX(".aqrl"), \
282 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
283 CAS_PREPEND(""), CAS_APPEND(""))
284
285 #define arch_cmpxchg_local(ptr, o, n) \
286 arch_cmpxchg_relaxed((ptr), (o), (n))
287
288 #define arch_cmpxchg64(ptr, o, n) \
289 ({ \
290 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
291 arch_cmpxchg((ptr), (o), (n)); \
292 })
293
294 #define arch_cmpxchg64_local(ptr, o, n) \
295 ({ \
296 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
297 arch_cmpxchg_relaxed((ptr), (o), (n)); \
298 })
299
300 #define arch_cmpxchg64_relaxed(ptr, o, n) \
301 ({ \
302 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
303 arch_cmpxchg_relaxed((ptr), (o), (n)); \
304 })
305
306 #define arch_cmpxchg64_acquire(ptr, o, n) \
307 ({ \
308 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
309 arch_cmpxchg_acquire((ptr), (o), (n)); \
310 })
311
312 #define arch_cmpxchg64_release(ptr, o, n) \
313 ({ \
314 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
315 arch_cmpxchg_release((ptr), (o), (n)); \
316 })
317
318 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
319
320 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
321
322 union __u128_halves {
323 u128 full;
324 struct {
325 u64 low, high;
326 };
327 };
328
329 #define __arch_cmpxchg128(p, o, n, cas_sfx) \
330 ({ \
331 __typeof__(*(p)) __o = (o); \
332 union __u128_halves __hn = { .full = (n) }; \
333 union __u128_halves __ho = { .full = (__o) }; \
334 register unsigned long t1 asm ("t1") = __hn.low; \
335 register unsigned long t2 asm ("t2") = __hn.high; \
336 register unsigned long t3 asm ("t3") = __ho.low; \
337 register unsigned long t4 asm ("t4") = __ho.high; \
338 \
339 __asm__ __volatile__ ( \
340 " amocas.q" cas_sfx " %0, %z3, %2" \
341 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
342 : "rJ" (t1), "rJ" (t2) \
343 : "memory"); \
344 \
345 ((u128)t4 << 64) | t3; \
346 })
347
348 #define arch_cmpxchg128(ptr, o, n) \
349 __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
350
351 #define arch_cmpxchg128_local(ptr, o, n) \
352 __arch_cmpxchg128((ptr), (o), (n), "")
353
354 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
355
356 #ifdef CONFIG_RISCV_ISA_ZAWRS
357 /*
358 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
359 * @val we expect it to still terminate within a "reasonable" amount of time
360 * for an implementation-specific other reason, a pending, locally-enabled
361 * interrupt, or because it has been configured to raise an illegal
362 * instruction exception.
363 */
__cmpwait(volatile void * ptr,unsigned long val,int size)364 static __always_inline void __cmpwait(volatile void *ptr,
365 unsigned long val,
366 int size)
367 {
368 unsigned long tmp;
369
370 u32 *__ptr32b;
371 ulong __s, __val, __mask;
372
373 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
374 0, RISCV_ISA_EXT_ZAWRS, 1)
375 : : : : no_zawrs);
376
377 switch (size) {
378 case 1:
379 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
380 __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
381 __val = val << __s;
382 __mask = 0xff << __s;
383
384 asm volatile(
385 " lr.w %0, %1\n"
386 " and %0, %0, %3\n"
387 " xor %0, %0, %2\n"
388 " bnez %0, 1f\n"
389 ZAWRS_WRS_NTO "\n"
390 "1:"
391 : "=&r" (tmp), "+A" (*(__ptr32b))
392 : "r" (__val), "r" (__mask)
393 : "memory");
394 break;
395 case 2:
396 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
397 __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
398 __val = val << __s;
399 __mask = 0xffff << __s;
400
401 asm volatile(
402 " lr.w %0, %1\n"
403 " and %0, %0, %3\n"
404 " xor %0, %0, %2\n"
405 " bnez %0, 1f\n"
406 ZAWRS_WRS_NTO "\n"
407 "1:"
408 : "=&r" (tmp), "+A" (*(__ptr32b))
409 : "r" (__val), "r" (__mask)
410 : "memory");
411 break;
412 case 4:
413 asm volatile(
414 " lr.w %0, %1\n"
415 " xor %0, %0, %2\n"
416 " bnez %0, 1f\n"
417 ZAWRS_WRS_NTO "\n"
418 "1:"
419 : "=&r" (tmp), "+A" (*(u32 *)ptr)
420 : "r" (val));
421 break;
422 #if __riscv_xlen == 64
423 case 8:
424 asm volatile(
425 " lr.d %0, %1\n"
426 " xor %0, %0, %2\n"
427 " bnez %0, 1f\n"
428 ZAWRS_WRS_NTO "\n"
429 "1:"
430 : "=&r" (tmp), "+A" (*(u64 *)ptr)
431 : "r" (val));
432 break;
433 #endif
434 default:
435 BUILD_BUG();
436 }
437
438 return;
439
440 no_zawrs:
441 asm volatile(RISCV_PAUSE : : : "memory");
442 }
443
444 #define __cmpwait_relaxed(ptr, val) \
445 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
446 #endif
447
448 #endif /* _ASM_RISCV_CMPXCHG_H */
449