xref: /linux/arch/riscv/include/asm/cmpxchg.h (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2014 Regents of the University of California
4  */
5 
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
8 
9 #include <linux/bug.h>
10 
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
16 
17 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append,		\
18 			   swap_append, r, p, n)				\
19 ({										\
20 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&				\
21 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {		\
22 		__asm__ __volatile__ (						\
23 			prepend							\
24 			"	amoswap" swap_sfx " %0, %z2, %1\n"		\
25 			swap_append						\
26 			: "=&r" (r), "+A" (*(p))				\
27 			: "rJ" (n)						\
28 			: "memory");						\
29 	} else {								\
30 		u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
31 		ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
32 		ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
33 				<< __s;						\
34 		ulong __newx = (ulong)(n) << __s;				\
35 		ulong __retx;							\
36 		ulong __rc;							\
37 										\
38 		__asm__ __volatile__ (						\
39 		       prepend							\
40 		       "0:	lr.w %0, %2\n"					\
41 		       "	and  %1, %0, %z4\n"				\
42 		       "	or   %1, %1, %z3\n"				\
43 		       "	sc.w" sc_sfx " %1, %1, %2\n"			\
44 		       "	bnez %1, 0b\n"					\
45 		       sc_append						\
46 		       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
47 		       : "rJ" (__newx), "rJ" (~__mask)				\
48 		       : "memory");						\
49 										\
50 		r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
51 	}									\
52 })
53 
54 #define __arch_xchg(sfx, prepend, append, r, p, n)			\
55 ({									\
56 	__asm__ __volatile__ (						\
57 		prepend							\
58 		"	amoswap" sfx " %0, %2, %1\n"			\
59 		append							\
60 		: "=r" (r), "+A" (*(p))					\
61 		: "r" (n)						\
62 		: "memory");						\
63 })
64 
65 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
66 		   sc_append, swap_append)				\
67 ({									\
68 	__typeof__(ptr) __ptr = (ptr);					\
69 	__typeof__(*(__ptr)) __new = (new);				\
70 	__typeof__(*(__ptr)) __ret;					\
71 									\
72 	switch (sizeof(*__ptr)) {					\
73 	case 1:								\
74 		__arch_xchg_masked(sc_sfx, ".b" swap_sfx,		\
75 				   prepend, sc_append, swap_append,	\
76 				   __ret, __ptr, __new);		\
77 		break;							\
78 	case 2:								\
79 		__arch_xchg_masked(sc_sfx, ".h" swap_sfx,		\
80 				   prepend, sc_append, swap_append,	\
81 				   __ret, __ptr, __new);		\
82 		break;							\
83 	case 4:								\
84 		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
85 			      __ret, __ptr, __new);			\
86 		break;							\
87 	case 8:								\
88 		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
89 			      __ret, __ptr, __new);			\
90 		break;							\
91 	default:							\
92 		BUILD_BUG();						\
93 	}								\
94 	(__typeof__(*(__ptr)))__ret;					\
95 })
96 
97 #define arch_xchg_relaxed(ptr, x)					\
98 	_arch_xchg(ptr, x, "", "", "", "", "")
99 
100 #define arch_xchg_acquire(ptr, x)					\
101 	_arch_xchg(ptr, x, "", "", "",					\
102 		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
103 
104 #define arch_xchg_release(ptr, x)					\
105 	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
106 
107 #define arch_xchg(ptr, x)						\
108 	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
109 
110 #define xchg32(ptr, x)							\
111 ({									\
112 	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
113 	arch_xchg((ptr), (x));						\
114 })
115 
116 #define xchg64(ptr, x)							\
117 ({									\
118 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
119 	arch_xchg((ptr), (x));						\
120 })
121 
122 /*
123  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
124  * store NEW in MEM.  Return the initial value in MEM.  Success is
125  * indicated by comparing RETURN with OLD.
126  */
127 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx,					\
128 			      sc_prepend, sc_append,				\
129 			      cas_prepend, cas_append,				\
130 			      r, p, o, n)					\
131 ({										\
132 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&				\
133 	    IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&				\
134 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) &&		\
135 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {		\
136 		r = o;								\
137 										\
138 		__asm__ __volatile__ (						\
139 			cas_prepend							\
140 			"	amocas" cas_sfx " %0, %z2, %1\n"		\
141 			cas_append							\
142 			: "+&r" (r), "+A" (*(p))				\
143 			: "rJ" (n)						\
144 			: "memory");						\
145 	} else {								\
146 		u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
147 		ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
148 		ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
149 			       << __s;						\
150 		ulong __newx = (ulong)(n) << __s;				\
151 		ulong __oldx = (ulong)(o) << __s;				\
152 		ulong __retx;							\
153 		ulong __rc;							\
154 										\
155 		__asm__ __volatile__ (						\
156 			sc_prepend							\
157 			"0:	lr.w %0, %2\n"					\
158 			"	and  %1, %0, %z5\n"				\
159 			"	bne  %1, %z3, 1f\n"				\
160 			"	and  %1, %0, %z6\n"				\
161 			"	or   %1, %1, %z4\n"				\
162 			"	sc.w" sc_sfx " %1, %1, %2\n"			\
163 			"	bnez %1, 0b\n"					\
164 			sc_append							\
165 			"1:\n"							\
166 			: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
167 			: "rJ" ((long)__oldx), "rJ" (__newx),			\
168 			  "rJ" (__mask), "rJ" (~__mask)				\
169 			: "memory");						\
170 										\
171 		r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
172 	}									\
173 })
174 
175 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx,				\
176 		       sc_prepend, sc_append,				\
177 		       cas_prepend, cas_append,				\
178 		       r, p, co, o, n)					\
179 ({									\
180 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&			\
181 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {	\
182 		r = o;							\
183 									\
184 		__asm__ __volatile__ (					\
185 			cas_prepend					\
186 			"	amocas" cas_sfx " %0, %z2, %1\n"	\
187 			cas_append					\
188 			: "+&r" (r), "+A" (*(p))			\
189 			: "rJ" (n)					\
190 			: "memory");					\
191 	} else {							\
192 		register unsigned int __rc;				\
193 									\
194 		__asm__ __volatile__ (					\
195 			sc_prepend					\
196 			"0:	lr" lr_sfx " %0, %2\n"			\
197 			"	bne  %0, %z3, 1f\n"			\
198 			"	sc" sc_sfx " %1, %z4, %2\n"		\
199 			"	bnez %1, 0b\n"				\
200 			sc_append					\
201 			"1:\n"						\
202 			: "=&r" (r), "=&r" (__rc), "+A" (*(p))		\
203 			: "rJ" (co o), "rJ" (n)				\
204 			: "memory");					\
205 	}								\
206 })
207 
208 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx,			\
209 		      sc_prepend, sc_append,				\
210 		      cas_prepend, cas_append)				\
211 ({									\
212 	__typeof__(ptr) __ptr = (ptr);					\
213 	__typeof__(*(__ptr)) __old = (old);				\
214 	__typeof__(*(__ptr)) __new = (new);				\
215 	__typeof__(*(__ptr)) __ret;					\
216 									\
217 	switch (sizeof(*__ptr)) {					\
218 	case 1:								\
219 		__arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx,		\
220 				      sc_prepend, sc_append,		\
221 				      cas_prepend, cas_append,		\
222 				      __ret, __ptr, __old, __new);	\
223 		break;							\
224 	case 2:								\
225 		__arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx,		\
226 				      sc_prepend, sc_append,		\
227 				      cas_prepend, cas_append,		\
228 				      __ret, __ptr, __old, __new);	\
229 		break;							\
230 	case 4:								\
231 		__arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx,		\
232 			       sc_prepend, sc_append,			\
233 			       cas_prepend, cas_append,			\
234 			       __ret, __ptr, (long), __old, __new);	\
235 		break;							\
236 	case 8:								\
237 		__arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx,		\
238 			       sc_prepend, sc_append,			\
239 			       cas_prepend, cas_append,			\
240 			       __ret, __ptr, /**/, __old, __new);	\
241 		break;							\
242 	default:							\
243 		BUILD_BUG();						\
244 	}								\
245 	(__typeof__(*(__ptr)))__ret;					\
246 })
247 
248 /*
249  * These macros are here to improve the readability of the arch_cmpxchg_XXX()
250  * macros.
251  */
252 #define SC_SFX(x)	x
253 #define CAS_SFX(x)	x
254 #define SC_PREPEND(x)	x
255 #define SC_APPEND(x)	x
256 #define CAS_PREPEND(x)	x
257 #define CAS_APPEND(x)	x
258 
259 #define arch_cmpxchg_relaxed(ptr, o, n)					\
260 	_arch_cmpxchg((ptr), (o), (n),					\
261 		      SC_SFX(""), CAS_SFX(""),				\
262 		      SC_PREPEND(""), SC_APPEND(""),			\
263 		      CAS_PREPEND(""), CAS_APPEND(""))
264 
265 #define arch_cmpxchg_acquire(ptr, o, n)					\
266 	_arch_cmpxchg((ptr), (o), (n),					\
267 		      SC_SFX(""), CAS_SFX(""),				\
268 		      SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER),	\
269 		      CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
270 
271 #define arch_cmpxchg_release(ptr, o, n)					\
272 	_arch_cmpxchg((ptr), (o), (n),					\
273 		      SC_SFX(""), CAS_SFX(""),				\
274 		      SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""),	\
275 		      CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
276 
277 #define arch_cmpxchg(ptr, o, n)						\
278 	_arch_cmpxchg((ptr), (o), (n),					\
279 		      SC_SFX(".rl"), CAS_SFX(".aqrl"),			\
280 		      SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER),	\
281 		      CAS_PREPEND(""), CAS_APPEND(""))
282 
283 #define arch_cmpxchg_local(ptr, o, n)					\
284 	arch_cmpxchg_relaxed((ptr), (o), (n))
285 
286 #define arch_cmpxchg64(ptr, o, n)					\
287 ({									\
288 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
289 	arch_cmpxchg((ptr), (o), (n));					\
290 })
291 
292 #define arch_cmpxchg64_local(ptr, o, n)					\
293 ({									\
294 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
295 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
296 })
297 
298 #define arch_cmpxchg64_relaxed(ptr, o, n)				\
299 ({									\
300 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
301 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
302 })
303 
304 #define arch_cmpxchg64_acquire(ptr, o, n)				\
305 ({									\
306 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
307 	arch_cmpxchg_acquire((ptr), (o), (n));				\
308 })
309 
310 #define arch_cmpxchg64_release(ptr, o, n)				\
311 ({									\
312 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
313 	arch_cmpxchg_release((ptr), (o), (n));				\
314 })
315 
316 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
317 
318 #define system_has_cmpxchg128()        riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
319 
320 union __u128_halves {
321 	u128 full;
322 	struct {
323 		u64 low, high;
324 	};
325 };
326 
327 #define __arch_cmpxchg128(p, o, n, cas_sfx)					\
328 ({										\
329 	__typeof__(*(p)) __o = (o);                                             \
330 	union __u128_halves __hn = { .full = (n) };				\
331 	union __u128_halves __ho = { .full = (__o) };				\
332 	register unsigned long t1 asm ("t1") = __hn.low;			\
333 	register unsigned long t2 asm ("t2") = __hn.high;			\
334 	register unsigned long t3 asm ("t3") = __ho.low;			\
335 	register unsigned long t4 asm ("t4") = __ho.high;			\
336 										\
337 	__asm__ __volatile__ (							\
338 		 "       amocas.q" cas_sfx " %0, %z3, %2"			\
339 		 : "+&r" (t3), "+&r" (t4), "+A" (*(p))				\
340 		 : "rJ" (t1), "rJ" (t2)						\
341 		 : "memory");							\
342 										\
343 		 ((u128)t4 << 64) | t3;						\
344 })
345 
346 #define arch_cmpxchg128(ptr, o, n)						\
347 	__arch_cmpxchg128((ptr), (o), (n), ".aqrl")
348 
349 #define arch_cmpxchg128_local(ptr, o, n)					\
350 	__arch_cmpxchg128((ptr), (o), (n), "")
351 
352 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
353 
354 #ifdef CONFIG_RISCV_ISA_ZAWRS
355 /*
356  * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
357  * @val we expect it to still terminate within a "reasonable" amount of time
358  * for an implementation-specific other reason, a pending, locally-enabled
359  * interrupt, or because it has been configured to raise an illegal
360  * instruction exception.
361  */
362 static __always_inline void __cmpwait(volatile void *ptr,
363 				      unsigned long val,
364 				      int size)
365 {
366 	unsigned long tmp;
367 
368 	asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
369 			     0, RISCV_ISA_EXT_ZAWRS, 1)
370 		 : : : : no_zawrs);
371 
372 	switch (size) {
373 	case 1:
374 		fallthrough;
375 	case 2:
376 		/* RISC-V doesn't have lr instructions on byte and half-word. */
377 		goto no_zawrs;
378 	case 4:
379 		asm volatile(
380 		"	lr.w	%0, %1\n"
381 		"	xor	%0, %0, %2\n"
382 		"	bnez	%0, 1f\n"
383 			ZAWRS_WRS_NTO "\n"
384 		"1:"
385 		: "=&r" (tmp), "+A" (*(u32 *)ptr)
386 		: "r" (val));
387 		break;
388 #if __riscv_xlen == 64
389 	case 8:
390 		asm volatile(
391 		"	lr.d	%0, %1\n"
392 		"	xor	%0, %0, %2\n"
393 		"	bnez	%0, 1f\n"
394 			ZAWRS_WRS_NTO "\n"
395 		"1:"
396 		: "=&r" (tmp), "+A" (*(u64 *)ptr)
397 		: "r" (val));
398 		break;
399 #endif
400 	default:
401 		BUILD_BUG();
402 	}
403 
404 	return;
405 
406 no_zawrs:
407 	asm volatile(RISCV_PAUSE : : : "memory");
408 }
409 
410 #define __cmpwait_relaxed(ptr, val) \
411 	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
412 #endif
413 
414 #endif /* _ASM_RISCV_CMPXCHG_H */
415