xref: /linux/arch/riscv/include/asm/cmpxchg.h (revision 119b1e61a769aa98e68599f44721661a4d8c55f3)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2014 Regents of the University of California
4  */
5 
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
8 
9 #include <linux/bug.h>
10 
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
16 #include <asm/processor.h>
17 
18 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append,		\
19 			   swap_append, r, p, n)				\
20 ({										\
21 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&				\
22 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) {		\
23 		__asm__ __volatile__ (						\
24 			prepend							\
25 			"	amoswap" swap_sfx " %0, %z2, %1\n"		\
26 			swap_append						\
27 			: "=&r" (r), "+A" (*(p))				\
28 			: "rJ" (n)						\
29 			: "memory");						\
30 	} else {								\
31 		u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
32 		ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
33 		ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
34 				<< __s;						\
35 		ulong __newx = (ulong)(n) << __s;				\
36 		ulong __retx;							\
37 		ulong __rc;							\
38 										\
39 		__asm__ __volatile__ (						\
40 		       prepend							\
41 		       PREFETCHW_ASM(%5)					\
42 		       "0:	lr.w %0, %2\n"					\
43 		       "	and  %1, %0, %z4\n"				\
44 		       "	or   %1, %1, %z3\n"				\
45 		       "	sc.w" sc_sfx " %1, %1, %2\n"			\
46 		       "	bnez %1, 0b\n"					\
47 		       sc_append						\
48 		       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
49 		       : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b)		\
50 		       : "memory");						\
51 										\
52 		r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
53 	}									\
54 })
55 
56 #define __arch_xchg(sfx, prepend, append, r, p, n)			\
57 ({									\
58 	__asm__ __volatile__ (						\
59 		prepend							\
60 		"	amoswap" sfx " %0, %2, %1\n"			\
61 		append							\
62 		: "=r" (r), "+A" (*(p))					\
63 		: "r" (n)						\
64 		: "memory");						\
65 })
66 
67 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
68 		   sc_append, swap_append)				\
69 ({									\
70 	__typeof__(ptr) __ptr = (ptr);					\
71 	__typeof__(*(__ptr)) __new = (new);				\
72 	__typeof__(*(__ptr)) __ret;					\
73 									\
74 	switch (sizeof(*__ptr)) {					\
75 	case 1:								\
76 		__arch_xchg_masked(sc_sfx, ".b" swap_sfx,		\
77 				   prepend, sc_append, swap_append,	\
78 				   __ret, __ptr, __new);		\
79 		break;							\
80 	case 2:								\
81 		__arch_xchg_masked(sc_sfx, ".h" swap_sfx,		\
82 				   prepend, sc_append, swap_append,	\
83 				   __ret, __ptr, __new);		\
84 		break;							\
85 	case 4:								\
86 		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
87 			      __ret, __ptr, __new);			\
88 		break;							\
89 	case 8:								\
90 		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
91 			      __ret, __ptr, __new);			\
92 		break;							\
93 	default:							\
94 		BUILD_BUG();						\
95 	}								\
96 	(__typeof__(*(__ptr)))__ret;					\
97 })
98 
99 #define arch_xchg_relaxed(ptr, x)					\
100 	_arch_xchg(ptr, x, "", "", "", "", "")
101 
102 #define arch_xchg_acquire(ptr, x)					\
103 	_arch_xchg(ptr, x, "", "", "",					\
104 		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
105 
106 #define arch_xchg_release(ptr, x)					\
107 	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
108 
109 #define arch_xchg(ptr, x)						\
110 	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
111 
112 #define xchg32(ptr, x)							\
113 ({									\
114 	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
115 	arch_xchg((ptr), (x));						\
116 })
117 
118 #define xchg64(ptr, x)							\
119 ({									\
120 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
121 	arch_xchg((ptr), (x));						\
122 })
123 
124 /*
125  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
126  * store NEW in MEM.  Return the initial value in MEM.  Success is
127  * indicated by comparing RETURN with OLD.
128  */
129 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx,					\
130 			      sc_prepend, sc_append,				\
131 			      cas_prepend, cas_append,				\
132 			      r, p, o, n)					\
133 ({										\
134 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&				\
135 	    IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&				\
136 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) &&		\
137 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {		\
138 		r = o;								\
139 										\
140 		__asm__ __volatile__ (						\
141 			cas_prepend							\
142 			"	amocas" cas_sfx " %0, %z2, %1\n"		\
143 			cas_append							\
144 			: "+&r" (r), "+A" (*(p))				\
145 			: "rJ" (n)						\
146 			: "memory");						\
147 	} else {								\
148 		u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
149 		ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
150 		ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)	\
151 			       << __s;						\
152 		ulong __newx = (ulong)(n) << __s;				\
153 		ulong __oldx = (ulong)(o) << __s;				\
154 		ulong __retx;							\
155 		ulong __rc;							\
156 										\
157 		__asm__ __volatile__ (						\
158 			sc_prepend							\
159 			"0:	lr.w %0, %2\n"					\
160 			"	and  %1, %0, %z5\n"				\
161 			"	bne  %1, %z3, 1f\n"				\
162 			"	and  %1, %0, %z6\n"				\
163 			"	or   %1, %1, %z4\n"				\
164 			"	sc.w" sc_sfx " %1, %1, %2\n"			\
165 			"	bnez %1, 0b\n"					\
166 			sc_append							\
167 			"1:\n"							\
168 			: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
169 			: "rJ" ((long)__oldx), "rJ" (__newx),			\
170 			  "rJ" (__mask), "rJ" (~__mask)				\
171 			: "memory");						\
172 										\
173 		r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
174 	}									\
175 })
176 
177 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx,				\
178 		       sc_prepend, sc_append,				\
179 		       cas_prepend, cas_append,				\
180 		       r, p, co, o, n)					\
181 ({									\
182 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&			\
183 	    riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {	\
184 		r = o;							\
185 									\
186 		__asm__ __volatile__ (					\
187 			cas_prepend					\
188 			"	amocas" cas_sfx " %0, %z2, %1\n"	\
189 			cas_append					\
190 			: "+&r" (r), "+A" (*(p))			\
191 			: "rJ" (n)					\
192 			: "memory");					\
193 	} else {							\
194 		register unsigned int __rc;				\
195 									\
196 		__asm__ __volatile__ (					\
197 			sc_prepend					\
198 			"0:	lr" lr_sfx " %0, %2\n"			\
199 			"	bne  %0, %z3, 1f\n"			\
200 			"	sc" sc_sfx " %1, %z4, %2\n"		\
201 			"	bnez %1, 0b\n"				\
202 			sc_append					\
203 			"1:\n"						\
204 			: "=&r" (r), "=&r" (__rc), "+A" (*(p))		\
205 			: "rJ" (co o), "rJ" (n)				\
206 			: "memory");					\
207 	}								\
208 })
209 
210 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx,			\
211 		      sc_prepend, sc_append,				\
212 		      cas_prepend, cas_append)				\
213 ({									\
214 	__typeof__(ptr) __ptr = (ptr);					\
215 	__typeof__(*(__ptr)) __old = (old);				\
216 	__typeof__(*(__ptr)) __new = (new);				\
217 	__typeof__(*(__ptr)) __ret;					\
218 									\
219 	switch (sizeof(*__ptr)) {					\
220 	case 1:								\
221 		__arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx,		\
222 				      sc_prepend, sc_append,		\
223 				      cas_prepend, cas_append,		\
224 				      __ret, __ptr, __old, __new);	\
225 		break;							\
226 	case 2:								\
227 		__arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx,		\
228 				      sc_prepend, sc_append,		\
229 				      cas_prepend, cas_append,		\
230 				      __ret, __ptr, __old, __new);	\
231 		break;							\
232 	case 4:								\
233 		__arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx,		\
234 			       sc_prepend, sc_append,			\
235 			       cas_prepend, cas_append,			\
236 			       __ret, __ptr, (long)(int)(long), __old, __new);	\
237 		break;							\
238 	case 8:								\
239 		__arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx,		\
240 			       sc_prepend, sc_append,			\
241 			       cas_prepend, cas_append,			\
242 			       __ret, __ptr, /**/, __old, __new);	\
243 		break;							\
244 	default:							\
245 		BUILD_BUG();						\
246 	}								\
247 	(__typeof__(*(__ptr)))__ret;					\
248 })
249 
250 /*
251  * These macros are here to improve the readability of the arch_cmpxchg_XXX()
252  * macros.
253  */
254 #define SC_SFX(x)	x
255 #define CAS_SFX(x)	x
256 #define SC_PREPEND(x)	x
257 #define SC_APPEND(x)	x
258 #define CAS_PREPEND(x)	x
259 #define CAS_APPEND(x)	x
260 
261 #define arch_cmpxchg_relaxed(ptr, o, n)					\
262 	_arch_cmpxchg((ptr), (o), (n),					\
263 		      SC_SFX(""), CAS_SFX(""),				\
264 		      SC_PREPEND(""), SC_APPEND(""),			\
265 		      CAS_PREPEND(""), CAS_APPEND(""))
266 
267 #define arch_cmpxchg_acquire(ptr, o, n)					\
268 	_arch_cmpxchg((ptr), (o), (n),					\
269 		      SC_SFX(""), CAS_SFX(""),				\
270 		      SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER),	\
271 		      CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
272 
273 #define arch_cmpxchg_release(ptr, o, n)					\
274 	_arch_cmpxchg((ptr), (o), (n),					\
275 		      SC_SFX(""), CAS_SFX(""),				\
276 		      SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""),	\
277 		      CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
278 
279 #define arch_cmpxchg(ptr, o, n)						\
280 	_arch_cmpxchg((ptr), (o), (n),					\
281 		      SC_SFX(".rl"), CAS_SFX(".aqrl"),			\
282 		      SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER),	\
283 		      CAS_PREPEND(""), CAS_APPEND(""))
284 
285 #define arch_cmpxchg_local(ptr, o, n)					\
286 	arch_cmpxchg_relaxed((ptr), (o), (n))
287 
288 #define arch_cmpxchg64(ptr, o, n)					\
289 ({									\
290 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
291 	arch_cmpxchg((ptr), (o), (n));					\
292 })
293 
294 #define arch_cmpxchg64_local(ptr, o, n)					\
295 ({									\
296 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
297 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
298 })
299 
300 #define arch_cmpxchg64_relaxed(ptr, o, n)				\
301 ({									\
302 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
303 	arch_cmpxchg_relaxed((ptr), (o), (n));				\
304 })
305 
306 #define arch_cmpxchg64_acquire(ptr, o, n)				\
307 ({									\
308 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
309 	arch_cmpxchg_acquire((ptr), (o), (n));				\
310 })
311 
312 #define arch_cmpxchg64_release(ptr, o, n)				\
313 ({									\
314 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
315 	arch_cmpxchg_release((ptr), (o), (n));				\
316 })
317 
318 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
319 
320 #define system_has_cmpxchg128()        riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
321 
322 union __u128_halves {
323 	u128 full;
324 	struct {
325 		u64 low, high;
326 	};
327 };
328 
329 #define __arch_cmpxchg128(p, o, n, cas_sfx)					\
330 ({										\
331 	__typeof__(*(p)) __o = (o);                                             \
332 	union __u128_halves __hn = { .full = (n) };				\
333 	union __u128_halves __ho = { .full = (__o) };				\
334 	register unsigned long t1 asm ("t1") = __hn.low;			\
335 	register unsigned long t2 asm ("t2") = __hn.high;			\
336 	register unsigned long t3 asm ("t3") = __ho.low;			\
337 	register unsigned long t4 asm ("t4") = __ho.high;			\
338 										\
339 	__asm__ __volatile__ (							\
340 		 "       amocas.q" cas_sfx " %0, %z3, %2"			\
341 		 : "+&r" (t3), "+&r" (t4), "+A" (*(p))				\
342 		 : "rJ" (t1), "rJ" (t2)						\
343 		 : "memory");							\
344 										\
345 		 ((u128)t4 << 64) | t3;						\
346 })
347 
348 #define arch_cmpxchg128(ptr, o, n)						\
349 	__arch_cmpxchg128((ptr), (o), (n), ".aqrl")
350 
351 #define arch_cmpxchg128_local(ptr, o, n)					\
352 	__arch_cmpxchg128((ptr), (o), (n), "")
353 
354 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
355 
356 #ifdef CONFIG_RISCV_ISA_ZAWRS
357 /*
358  * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
359  * @val we expect it to still terminate within a "reasonable" amount of time
360  * for an implementation-specific other reason, a pending, locally-enabled
361  * interrupt, or because it has been configured to raise an illegal
362  * instruction exception.
363  */
__cmpwait(volatile void * ptr,unsigned long val,int size)364 static __always_inline void __cmpwait(volatile void *ptr,
365 				      unsigned long val,
366 				      int size)
367 {
368 	unsigned long tmp;
369 
370 	u32 *__ptr32b;
371 	ulong __s, __val, __mask;
372 
373 	asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
374 			     0, RISCV_ISA_EXT_ZAWRS, 1)
375 		 : : : : no_zawrs);
376 
377 	switch (size) {
378 	case 1:
379 		__ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
380 		__s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
381 		__val = val << __s;
382 		__mask = 0xff << __s;
383 
384 		asm volatile(
385 		"	lr.w	%0, %1\n"
386 		"	and	%0, %0, %3\n"
387 		"	xor	%0, %0, %2\n"
388 		"	bnez	%0, 1f\n"
389 			ZAWRS_WRS_NTO "\n"
390 		"1:"
391 		: "=&r" (tmp), "+A" (*(__ptr32b))
392 		: "r" (__val), "r" (__mask)
393 		: "memory");
394 		break;
395 	case 2:
396 		__ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
397 		__s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
398 		__val = val << __s;
399 		__mask = 0xffff << __s;
400 
401 		asm volatile(
402 		"	lr.w	%0, %1\n"
403 		"	and	%0, %0, %3\n"
404 		"	xor	%0, %0, %2\n"
405 		"	bnez	%0, 1f\n"
406 			ZAWRS_WRS_NTO "\n"
407 		"1:"
408 		: "=&r" (tmp), "+A" (*(__ptr32b))
409 		: "r" (__val), "r" (__mask)
410 		: "memory");
411 		break;
412 	case 4:
413 		asm volatile(
414 		"	lr.w	%0, %1\n"
415 		"	xor	%0, %0, %2\n"
416 		"	bnez	%0, 1f\n"
417 			ZAWRS_WRS_NTO "\n"
418 		"1:"
419 		: "=&r" (tmp), "+A" (*(u32 *)ptr)
420 		: "r" (val));
421 		break;
422 #if __riscv_xlen == 64
423 	case 8:
424 		asm volatile(
425 		"	lr.d	%0, %1\n"
426 		"	xor	%0, %0, %2\n"
427 		"	bnez	%0, 1f\n"
428 			ZAWRS_WRS_NTO "\n"
429 		"1:"
430 		: "=&r" (tmp), "+A" (*(u64 *)ptr)
431 		: "r" (val));
432 		break;
433 #endif
434 	default:
435 		BUILD_BUG();
436 	}
437 
438 	return;
439 
440 no_zawrs:
441 	asm volatile(RISCV_PAUSE : : : "memory");
442 }
443 
444 #define __cmpwait_relaxed(ptr, val) \
445 	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
446 #endif
447 
448 #endif /* _ASM_RISCV_CMPXCHG_H */
449