1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2014 Regents of the University of California 4 */ 5 6 #ifndef _ASM_RISCV_CMPXCHG_H 7 #define _ASM_RISCV_CMPXCHG_H 8 9 #include <linux/bug.h> 10 11 #include <asm/alternative-macros.h> 12 #include <asm/fence.h> 13 #include <asm/hwcap.h> 14 #include <asm/insn-def.h> 15 #include <asm/cpufeature-macros.h> 16 17 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ 18 swap_append, r, p, n) \ 19 ({ \ 20 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 21 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ 22 __asm__ __volatile__ ( \ 23 prepend \ 24 " amoswap" swap_sfx " %0, %z2, %1\n" \ 25 swap_append \ 26 : "=&r" (r), "+A" (*(p)) \ 27 : "rJ" (n) \ 28 : "memory"); \ 29 } else { \ 30 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 31 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 32 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 33 << __s; \ 34 ulong __newx = (ulong)(n) << __s; \ 35 ulong __retx; \ 36 ulong __rc; \ 37 \ 38 __asm__ __volatile__ ( \ 39 prepend \ 40 "0: lr.w %0, %2\n" \ 41 " and %1, %0, %z4\n" \ 42 " or %1, %1, %z3\n" \ 43 " sc.w" sc_sfx " %1, %1, %2\n" \ 44 " bnez %1, 0b\n" \ 45 sc_append \ 46 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 47 : "rJ" (__newx), "rJ" (~__mask) \ 48 : "memory"); \ 49 \ 50 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 51 } \ 52 }) 53 54 #define __arch_xchg(sfx, prepend, append, r, p, n) \ 55 ({ \ 56 __asm__ __volatile__ ( \ 57 prepend \ 58 " amoswap" sfx " %0, %2, %1\n" \ 59 append \ 60 : "=r" (r), "+A" (*(p)) \ 61 : "r" (n) \ 62 : "memory"); \ 63 }) 64 65 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ 66 sc_append, swap_append) \ 67 ({ \ 68 __typeof__(ptr) __ptr = (ptr); \ 69 __typeof__(*(__ptr)) __new = (new); \ 70 __typeof__(*(__ptr)) __ret; \ 71 \ 72 switch (sizeof(*__ptr)) { \ 73 case 1: \ 74 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ 75 prepend, sc_append, swap_append, \ 76 __ret, __ptr, __new); \ 77 break; \ 78 case 2: \ 79 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ 80 prepend, sc_append, swap_append, \ 81 __ret, __ptr, __new); \ 82 break; \ 83 case 4: \ 84 __arch_xchg(".w" swap_sfx, prepend, swap_append, \ 85 __ret, __ptr, __new); \ 86 break; \ 87 case 8: \ 88 __arch_xchg(".d" swap_sfx, prepend, swap_append, \ 89 __ret, __ptr, __new); \ 90 break; \ 91 default: \ 92 BUILD_BUG(); \ 93 } \ 94 (__typeof__(*(__ptr)))__ret; \ 95 }) 96 97 #define arch_xchg_relaxed(ptr, x) \ 98 _arch_xchg(ptr, x, "", "", "", "", "") 99 100 #define arch_xchg_acquire(ptr, x) \ 101 _arch_xchg(ptr, x, "", "", "", \ 102 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) 103 104 #define arch_xchg_release(ptr, x) \ 105 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") 106 107 #define arch_xchg(ptr, x) \ 108 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") 109 110 #define xchg32(ptr, x) \ 111 ({ \ 112 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 113 arch_xchg((ptr), (x)); \ 114 }) 115 116 #define xchg64(ptr, x) \ 117 ({ \ 118 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 119 arch_xchg((ptr), (x)); \ 120 }) 121 122 /* 123 * Atomic compare and exchange. Compare OLD with MEM, if identical, 124 * store NEW in MEM. Return the initial value in MEM. Success is 125 * indicated by comparing RETURN with OLD. 126 */ 127 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ 128 sc_prepend, sc_append, \ 129 cas_prepend, cas_append, \ 130 r, p, o, n) \ 131 ({ \ 132 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 133 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 134 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ 135 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 136 r = o; \ 137 \ 138 __asm__ __volatile__ ( \ 139 cas_prepend \ 140 " amocas" cas_sfx " %0, %z2, %1\n" \ 141 cas_append \ 142 : "+&r" (r), "+A" (*(p)) \ 143 : "rJ" (n) \ 144 : "memory"); \ 145 } else { \ 146 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 147 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 148 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 149 << __s; \ 150 ulong __newx = (ulong)(n) << __s; \ 151 ulong __oldx = (ulong)(o) << __s; \ 152 ulong __retx; \ 153 ulong __rc; \ 154 \ 155 __asm__ __volatile__ ( \ 156 sc_prepend \ 157 "0: lr.w %0, %2\n" \ 158 " and %1, %0, %z5\n" \ 159 " bne %1, %z3, 1f\n" \ 160 " and %1, %0, %z6\n" \ 161 " or %1, %1, %z4\n" \ 162 " sc.w" sc_sfx " %1, %1, %2\n" \ 163 " bnez %1, 0b\n" \ 164 sc_append \ 165 "1:\n" \ 166 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 167 : "rJ" ((long)__oldx), "rJ" (__newx), \ 168 "rJ" (__mask), "rJ" (~__mask) \ 169 : "memory"); \ 170 \ 171 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 172 } \ 173 }) 174 175 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ 176 sc_prepend, sc_append, \ 177 cas_prepend, cas_append, \ 178 r, p, co, o, n) \ 179 ({ \ 180 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 181 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 182 r = o; \ 183 \ 184 __asm__ __volatile__ ( \ 185 cas_prepend \ 186 " amocas" cas_sfx " %0, %z2, %1\n" \ 187 cas_append \ 188 : "+&r" (r), "+A" (*(p)) \ 189 : "rJ" (n) \ 190 : "memory"); \ 191 } else { \ 192 register unsigned int __rc; \ 193 \ 194 __asm__ __volatile__ ( \ 195 sc_prepend \ 196 "0: lr" lr_sfx " %0, %2\n" \ 197 " bne %0, %z3, 1f\n" \ 198 " sc" sc_sfx " %1, %z4, %2\n" \ 199 " bnez %1, 0b\n" \ 200 sc_append \ 201 "1:\n" \ 202 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ 203 : "rJ" (co o), "rJ" (n) \ 204 : "memory"); \ 205 } \ 206 }) 207 208 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ 209 sc_prepend, sc_append, \ 210 cas_prepend, cas_append) \ 211 ({ \ 212 __typeof__(ptr) __ptr = (ptr); \ 213 __typeof__(*(__ptr)) __old = (old); \ 214 __typeof__(*(__ptr)) __new = (new); \ 215 __typeof__(*(__ptr)) __ret; \ 216 \ 217 switch (sizeof(*__ptr)) { \ 218 case 1: \ 219 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ 220 sc_prepend, sc_append, \ 221 cas_prepend, cas_append, \ 222 __ret, __ptr, __old, __new); \ 223 break; \ 224 case 2: \ 225 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ 226 sc_prepend, sc_append, \ 227 cas_prepend, cas_append, \ 228 __ret, __ptr, __old, __new); \ 229 break; \ 230 case 4: \ 231 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ 232 sc_prepend, sc_append, \ 233 cas_prepend, cas_append, \ 234 __ret, __ptr, (long), __old, __new); \ 235 break; \ 236 case 8: \ 237 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ 238 sc_prepend, sc_append, \ 239 cas_prepend, cas_append, \ 240 __ret, __ptr, /**/, __old, __new); \ 241 break; \ 242 default: \ 243 BUILD_BUG(); \ 244 } \ 245 (__typeof__(*(__ptr)))__ret; \ 246 }) 247 248 /* 249 * These macros are here to improve the readability of the arch_cmpxchg_XXX() 250 * macros. 251 */ 252 #define SC_SFX(x) x 253 #define CAS_SFX(x) x 254 #define SC_PREPEND(x) x 255 #define SC_APPEND(x) x 256 #define CAS_PREPEND(x) x 257 #define CAS_APPEND(x) x 258 259 #define arch_cmpxchg_relaxed(ptr, o, n) \ 260 _arch_cmpxchg((ptr), (o), (n), \ 261 SC_SFX(""), CAS_SFX(""), \ 262 SC_PREPEND(""), SC_APPEND(""), \ 263 CAS_PREPEND(""), CAS_APPEND("")) 264 265 #define arch_cmpxchg_acquire(ptr, o, n) \ 266 _arch_cmpxchg((ptr), (o), (n), \ 267 SC_SFX(""), CAS_SFX(""), \ 268 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ 269 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) 270 271 #define arch_cmpxchg_release(ptr, o, n) \ 272 _arch_cmpxchg((ptr), (o), (n), \ 273 SC_SFX(""), CAS_SFX(""), \ 274 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ 275 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) 276 277 #define arch_cmpxchg(ptr, o, n) \ 278 _arch_cmpxchg((ptr), (o), (n), \ 279 SC_SFX(".rl"), CAS_SFX(".aqrl"), \ 280 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ 281 CAS_PREPEND(""), CAS_APPEND("")) 282 283 #define arch_cmpxchg_local(ptr, o, n) \ 284 arch_cmpxchg_relaxed((ptr), (o), (n)) 285 286 #define arch_cmpxchg64(ptr, o, n) \ 287 ({ \ 288 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 289 arch_cmpxchg((ptr), (o), (n)); \ 290 }) 291 292 #define arch_cmpxchg64_local(ptr, o, n) \ 293 ({ \ 294 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 295 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 296 }) 297 298 #define arch_cmpxchg64_relaxed(ptr, o, n) \ 299 ({ \ 300 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 301 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 302 }) 303 304 #define arch_cmpxchg64_acquire(ptr, o, n) \ 305 ({ \ 306 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 307 arch_cmpxchg_acquire((ptr), (o), (n)); \ 308 }) 309 310 #define arch_cmpxchg64_release(ptr, o, n) \ 311 ({ \ 312 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 313 arch_cmpxchg_release((ptr), (o), (n)); \ 314 }) 315 316 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) 317 318 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) 319 320 union __u128_halves { 321 u128 full; 322 struct { 323 u64 low, high; 324 }; 325 }; 326 327 #define __arch_cmpxchg128(p, o, n, cas_sfx) \ 328 ({ \ 329 __typeof__(*(p)) __o = (o); \ 330 union __u128_halves __hn = { .full = (n) }; \ 331 union __u128_halves __ho = { .full = (__o) }; \ 332 register unsigned long t1 asm ("t1") = __hn.low; \ 333 register unsigned long t2 asm ("t2") = __hn.high; \ 334 register unsigned long t3 asm ("t3") = __ho.low; \ 335 register unsigned long t4 asm ("t4") = __ho.high; \ 336 \ 337 __asm__ __volatile__ ( \ 338 " amocas.q" cas_sfx " %0, %z3, %2" \ 339 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ 340 : "rJ" (t1), "rJ" (t2) \ 341 : "memory"); \ 342 \ 343 ((u128)t4 << 64) | t3; \ 344 }) 345 346 #define arch_cmpxchg128(ptr, o, n) \ 347 __arch_cmpxchg128((ptr), (o), (n), ".aqrl") 348 349 #define arch_cmpxchg128_local(ptr, o, n) \ 350 __arch_cmpxchg128((ptr), (o), (n), "") 351 352 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ 353 354 #ifdef CONFIG_RISCV_ISA_ZAWRS 355 /* 356 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to 357 * @val we expect it to still terminate within a "reasonable" amount of time 358 * for an implementation-specific other reason, a pending, locally-enabled 359 * interrupt, or because it has been configured to raise an illegal 360 * instruction exception. 361 */ 362 static __always_inline void __cmpwait(volatile void *ptr, 363 unsigned long val, 364 int size) 365 { 366 unsigned long tmp; 367 368 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 369 0, RISCV_ISA_EXT_ZAWRS, 1) 370 : : : : no_zawrs); 371 372 switch (size) { 373 case 1: 374 fallthrough; 375 case 2: 376 /* RISC-V doesn't have lr instructions on byte and half-word. */ 377 goto no_zawrs; 378 case 4: 379 asm volatile( 380 " lr.w %0, %1\n" 381 " xor %0, %0, %2\n" 382 " bnez %0, 1f\n" 383 ZAWRS_WRS_NTO "\n" 384 "1:" 385 : "=&r" (tmp), "+A" (*(u32 *)ptr) 386 : "r" (val)); 387 break; 388 #if __riscv_xlen == 64 389 case 8: 390 asm volatile( 391 " lr.d %0, %1\n" 392 " xor %0, %0, %2\n" 393 " bnez %0, 1f\n" 394 ZAWRS_WRS_NTO "\n" 395 "1:" 396 : "=&r" (tmp), "+A" (*(u64 *)ptr) 397 : "r" (val)); 398 break; 399 #endif 400 default: 401 BUILD_BUG(); 402 } 403 404 return; 405 406 no_zawrs: 407 asm volatile(RISCV_PAUSE : : : "memory"); 408 } 409 410 #define __cmpwait_relaxed(ptr, val) \ 411 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) 412 #endif 413 414 #endif /* _ASM_RISCV_CMPXCHG_H */ 415