xref: /linux/arch/riscv/include/asm/cmpxchg.h (revision 55a42f78ffd386e01a5404419f8c5ded7db70a21)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2014 Regents of the University of California
4  */
5 
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
8 
9 #include <linux/bug.h>
10 
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
16 #include <asm/processor.h>
17 #include <asm/errata_list.h>
18 
19 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append,		\
20 			   swap_append, r, p, n)				\
21 ({										\
22 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&				\
23 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {		\
24 		__asm__ __volatile__ (						\
25 			prepend							\
26 			"	amoswap" swap_sfx " %0, %z2, %1\n"		\
27 			swap_append						\
28 			: "=&r" (r), "+A" (*(p))				\
29 			: "rJ" (n)						\
30 			: "memory");						\
31 	} else {								\
32 		u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
33 		ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
34 		ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
35 				<< __s;						\
36 		ulong __newx = (ulong)(n) << __s;				\
37 		ulong __retx;							\
38 		ulong __rc;							\
39 										\
40 		__asm__ __volatile__ (						\
41 		       prepend							\
42 		       PREFETCHW_ASM(%5)					\
43 		       "0:	lr.w %0, %2\n"					\
44 		       "	and  %1, %0, %z4\n"				\
45 		       "	or   %1, %1, %z3\n"				\
46 		       "	sc.w" sc_sfx " %1, %1, %2\n"			\
47 		       "	bnez %1, 0b\n"					\
48 		       sc_append						\
49 		       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
50 		       : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b)		\
51 		       : "memory");						\
52 										\
53 		r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
54 	}									\
55 })
56 
57 #define __arch_xchg(sfx, prepend, append, r, p, n)			\
58 ({									\
59 	__asm__ __volatile__ (						\
60 		prepend							\
61 		"	amoswap" sfx " %0, %2, %1\n"			\
62 		append							\
63 		: "=r" (r), "+A" (*(p))					\
64 		: "r" (n)						\
65 		: "memory");						\
66 })
67 
68 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
69 		   sc_append, swap_append)				\
70 ({									\
71 	__typeof__(ptr) __ptr = (ptr);					\
72 	__typeof__(*(__ptr)) __new = (new);				\
73 	__typeof__(*(__ptr)) __ret;					\
74 									\
75 	switch (sizeof(*__ptr)) {					\
76 	case 1:								\
77 		__arch_xchg_masked(sc_sfx, ".b" swap_sfx,		\
78 				   prepend, sc_append, swap_append,	\
79 				   __ret, __ptr, __new);		\
80 		break;							\
81 	case 2:								\
82 		__arch_xchg_masked(sc_sfx, ".h" swap_sfx,		\
83 				   prepend, sc_append, swap_append,	\
84 				   __ret, __ptr, __new);		\
85 		break;							\
86 	case 4:								\
87 		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
88 			      __ret, __ptr, __new);			\
89 		break;							\
90 	case 8:								\
91 		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
92 			      __ret, __ptr, __new);			\
93 		break;							\
94 	default:							\
95 		BUILD_BUG();						\
96 	}								\
97 	(__typeof__(*(__ptr)))__ret;					\
98 })
99 
100 #define arch_xchg_relaxed(ptr, x)					\
101 	_arch_xchg(ptr, x, "", "", "", "", "")
102 
103 #define arch_xchg_acquire(ptr, x)					\
104 	_arch_xchg(ptr, x, "", "", "",					\
105 		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
106 
107 #define arch_xchg_release(ptr, x)					\
108 	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
109 
110 #define arch_xchg(ptr, x)						\
111 	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
112 
113 #define xchg32(ptr, x)							\
114 ({									\
115 	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
116 	arch_xchg((ptr), (x));						\
117 })
118 
119 #define xchg64(ptr, x)							\
120 ({									\
121 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
122 	arch_xchg((ptr), (x));						\
123 })
124 
125 /*
126  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
127  * store NEW in MEM.  Return the initial value in MEM.  Success is
128  * indicated by comparing RETURN with OLD.
129  */
130 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx,					\
131 			      sc_prepend, sc_append,				\
132 			      cas_prepend, cas_append,				\
133 			      r, p, o, n)					\
134 ({										\
135 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&				\
136 	    IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&				\
137 	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) &&				\
138 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) &&		\
139 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {		\
140 		r = o;								\
141 										\
142 		__asm__ __volatile__ (						\
143 			cas_prepend							\
144 			"	amocas" cas_sfx " %0, %z2, %1\n"		\
145 			cas_append							\
146 			: "+&r" (r), "+A" (*(p))				\
147 			: "rJ" (n)						\
148 			: "memory");						\
149 	} else {								\
150 		u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
151 		ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
152 		ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
153 			       << __s;						\
154 		ulong __newx = (ulong)(n) << __s;				\
155 		ulong __oldx = (ulong)(o) << __s;				\
156 		ulong __retx;							\
157 		ulong __rc;							\
158 										\
159 		__asm__ __volatile__ (						\
160 			sc_prepend							\
161 			"0:	lr.w %0, %2\n"					\
162 			"	and  %1, %0, %z5\n"				\
163 			"	bne  %1, %z3, 1f\n"				\
164 			"	and  %1, %0, %z6\n"				\
165 			"	or   %1, %1, %z4\n"				\
166 			"	sc.w" sc_sfx " %1, %1, %2\n"			\
167 			"	bnez %1, 0b\n"					\
168 			sc_append							\
169 			"1:\n"							\
170 			: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
171 			: "rJ" ((long)__oldx), "rJ" (__newx),			\
172 			  "rJ" (__mask), "rJ" (~__mask)				\
173 			: "memory");						\
174 										\
175 		r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
176 	}									\
177 })
178 
179 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx,				\
180 		       sc_prepend, sc_append,				\
181 		       cas_prepend, cas_append,				\
182 		       r, p, co, o, n)					\
183 ({									\
184 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&			\
185 	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) &&			\
186 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {	\
187 		r = o;							\
188 									\
189 		__asm__ __volatile__ (					\
190 			cas_prepend					\
191 			"	amocas" cas_sfx " %0, %z2, %1\n"	\
192 			cas_append					\
193 			: "+&r" (r), "+A" (*(p))			\
194 			: "rJ" (n)					\
195 			: "memory");					\
196 	} else {							\
197 		register unsigned int __rc;				\
198 									\
199 		__asm__ __volatile__ (					\
200 			sc_prepend					\
201 			"0:	lr" lr_sfx " %0, %2\n"			\
202 			"	bne  %0, %z3, 1f\n"			\
203 			"	sc" sc_sfx " %1, %z4, %2\n"		\
204 			"	bnez %1, 0b\n"				\
205 			sc_append					\
206 			"1:\n"						\
207 			: "=&r" (r), "=&r" (__rc), "+A" (*(p))		\
208 			: "rJ" (co o), "rJ" (n)				\
209 			: "memory");					\
210 	}								\
211 })
212 
213 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx,			\
214 		      sc_prepend, sc_append,				\
215 		      cas_prepend, cas_append)				\
216 ({									\
217 	__typeof__(ptr) __ptr = (ptr);					\
218 	__typeof__(*(__ptr)) __old = (old);				\
219 	__typeof__(*(__ptr)) __new = (new);				\
220 	__typeof__(*(__ptr)) __ret;					\
221 									\
222 	switch (sizeof(*__ptr)) {					\
223 	case 1:								\
224 		__arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx,		\
225 				      sc_prepend, sc_append,		\
226 				      cas_prepend, cas_append,		\
227 				      __ret, __ptr, __old, __new);	\
228 		break;							\
229 	case 2:								\
230 		__arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx,		\
231 				      sc_prepend, sc_append,		\
232 				      cas_prepend, cas_append,		\
233 				      __ret, __ptr, __old, __new);	\
234 		break;							\
235 	case 4:								\
236 		__arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx,		\
237 			       sc_prepend, sc_append,			\
238 			       cas_prepend, cas_append,			\
239 			       __ret, __ptr, (long)(int)(long), __old, __new);	\
240 		break;							\
241 	case 8:								\
242 		__arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx,		\
243 			       sc_prepend, sc_append,			\
244 			       cas_prepend, cas_append,			\
245 			       __ret, __ptr, /**/, __old, __new);	\
246 		break;							\
247 	default:							\
248 		BUILD_BUG();						\
249 	}								\
250 	(__typeof__(*(__ptr)))__ret;					\
251 })
252 
253 /*
254  * These macros are here to improve the readability of the arch_cmpxchg_XXX()
255  * macros.
256  */
257 #define SC_SFX(x)	x
258 #define CAS_SFX(x)	x
259 #define SC_PREPEND(x)	x
260 #define SC_APPEND(x)	x
261 #define CAS_PREPEND(x)	x
262 #define CAS_APPEND(x)	x
263 
264 #define arch_cmpxchg_relaxed(ptr, o, n)					\
265 	_arch_cmpxchg((ptr), (o), (n),					\
266 		      SC_SFX(""), CAS_SFX(""),				\
267 		      SC_PREPEND(""), SC_APPEND(""),			\
268 		      CAS_PREPEND(""), CAS_APPEND(""))
269 
270 #define arch_cmpxchg_acquire(ptr, o, n)					\
271 	_arch_cmpxchg((ptr), (o), (n),					\
272 		      SC_SFX(""), CAS_SFX(""),				\
273 		      SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER),	\
274 		      CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
275 
276 #define arch_cmpxchg_release(ptr, o, n)					\
277 	_arch_cmpxchg((ptr), (o), (n),					\
278 		      SC_SFX(""), CAS_SFX(""),				\
279 		      SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""),	\
280 		      CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
281 
282 #define arch_cmpxchg(ptr, o, n)						\
283 	_arch_cmpxchg((ptr), (o), (n),					\
284 		      SC_SFX(".rl"), CAS_SFX(".aqrl"),			\
285 		      SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER),	\
286 		      CAS_PREPEND(""), CAS_APPEND(""))
287 
288 #define arch_cmpxchg_local(ptr, o, n)					\
289 	arch_cmpxchg_relaxed((ptr), (o), (n))
290 
291 #define arch_cmpxchg64(ptr, o, n)					\
292 ({									\
293 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
294 	arch_cmpxchg((ptr), (o), (n));					\
295 })
296 
297 #define arch_cmpxchg64_local(ptr, o, n)					\
298 ({									\
299 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
300 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
301 })
302 
303 #define arch_cmpxchg64_relaxed(ptr, o, n)				\
304 ({									\
305 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
306 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
307 })
308 
309 #define arch_cmpxchg64_acquire(ptr, o, n)				\
310 ({									\
311 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
312 	arch_cmpxchg_acquire((ptr), (o), (n));				\
313 })
314 
315 #define arch_cmpxchg64_release(ptr, o, n)				\
316 ({									\
317 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
318 	arch_cmpxchg_release((ptr), (o), (n));				\
319 })
320 
321 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) && defined(CONFIG_TOOLCHAIN_HAS_ZACAS)
322 
323 #define system_has_cmpxchg128()        riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
324 
325 union __u128_halves {
326 	u128 full;
327 	struct {
328 		u64 low, high;
329 	};
330 };
331 
332 #define __arch_cmpxchg128(p, o, n, cas_sfx)					\
333 ({										\
334 	__typeof__(*(p)) __o = (o);                                             \
335 	union __u128_halves __hn = { .full = (n) };				\
336 	union __u128_halves __ho = { .full = (__o) };				\
337 	register unsigned long t1 asm ("t1") = __hn.low;			\
338 	register unsigned long t2 asm ("t2") = __hn.high;			\
339 	register unsigned long t3 asm ("t3") = __ho.low;			\
340 	register unsigned long t4 asm ("t4") = __ho.high;			\
341 										\
342 	__asm__ __volatile__ (							\
343 		 "       amocas.q" cas_sfx " %0, %z3, %2"			\
344 		 : "+&r" (t3), "+&r" (t4), "+A" (*(p))				\
345 		 : "rJ" (t1), "rJ" (t2)						\
346 		 : "memory");							\
347 										\
348 		 ((u128)t4 << 64) | t3;						\
349 })
350 
351 #define arch_cmpxchg128(ptr, o, n)						\
352 	__arch_cmpxchg128((ptr), (o), (n), ".aqrl")
353 
354 #define arch_cmpxchg128_local(ptr, o, n)					\
355 	__arch_cmpxchg128((ptr), (o), (n), "")
356 
357 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS && CONFIG_TOOLCHAIN_HAS_ZACAS */
358 
359 #ifdef CONFIG_RISCV_ISA_ZAWRS
360 /*
361  * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
362  * @val we expect it to still terminate within a "reasonable" amount of time
363  * for an implementation-specific other reason, a pending, locally-enabled
364  * interrupt, or because it has been configured to raise an illegal
365  * instruction exception.
366  */
367 static __always_inline void __cmpwait(volatile void *ptr,
368 				      unsigned long val,
369 				      int size)
370 {
371 	unsigned long tmp;
372 
373 	u32 *__ptr32b;
374 	ulong __s, __val, __mask;
375 
376 	asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
377 			     0, RISCV_ISA_EXT_ZAWRS, 1)
378 		 : : : : no_zawrs);
379 
380 	switch (size) {
381 	case 1:
382 		__ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
383 		__s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
384 		__val = val << __s;
385 		__mask = 0xff << __s;
386 
387 		asm volatile(
388 		"	lr.w	%0, %1\n"
389 		"	and	%0, %0, %3\n"
390 		"	xor	%0, %0, %2\n"
391 		"	bnez	%0, 1f\n"
392 			ZAWRS_WRS_NTO "\n"
393 		"1:"
394 		: "=&r" (tmp), "+A" (*(__ptr32b))
395 		: "r" (__val), "r" (__mask)
396 		: "memory");
397 		break;
398 	case 2:
399 		__ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
400 		__s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
401 		__val = val << __s;
402 		__mask = 0xffff << __s;
403 
404 		asm volatile(
405 		"	lr.w	%0, %1\n"
406 		"	and	%0, %0, %3\n"
407 		"	xor	%0, %0, %2\n"
408 		"	bnez	%0, 1f\n"
409 			ZAWRS_WRS_NTO "\n"
410 		"1:"
411 		: "=&r" (tmp), "+A" (*(__ptr32b))
412 		: "r" (__val), "r" (__mask)
413 		: "memory");
414 		break;
415 	case 4:
416 		asm volatile(
417 		"	lr.w	%0, %1\n"
418 		"	xor	%0, %0, %2\n"
419 		"	bnez	%0, 1f\n"
420 			ZAWRS_WRS_NTO "\n"
421 		"1:"
422 		: "=&r" (tmp), "+A" (*(u32 *)ptr)
423 		: "r" (val));
424 		break;
425 #if __riscv_xlen == 64
426 	case 8:
427 		asm volatile(
428 		"	lr.d	%0, %1\n"
429 		"	xor	%0, %0, %2\n"
430 		"	bnez	%0, 1f\n"
431 			ZAWRS_WRS_NTO "\n"
432 		"1:"
433 		: "=&r" (tmp), "+A" (*(u64 *)ptr)
434 		: "r" (val));
435 		break;
436 #endif
437 	default:
438 		BUILD_BUG();
439 	}
440 
441 	return;
442 
443 no_zawrs:
444 	ALT_RISCV_PAUSE();
445 }
446 
447 #define __cmpwait_relaxed(ptr, val) \
448 	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
449 #endif
450 
451 #endif /* _ASM_RISCV_CMPXCHG_H */
452