1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2014 Regents of the University of California 4 */ 5 6 #ifndef _ASM_RISCV_CMPXCHG_H 7 #define _ASM_RISCV_CMPXCHG_H 8 9 #include <linux/bug.h> 10 11 #include <asm/alternative-macros.h> 12 #include <asm/fence.h> 13 #include <asm/hwcap.h> 14 #include <asm/insn-def.h> 15 #include <asm/cpufeature-macros.h> 16 #include <asm/processor.h> 17 #include <asm/errata_list.h> 18 19 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ 20 swap_append, r, p, n) \ 21 ({ \ 22 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 23 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ 24 __asm__ __volatile__ ( \ 25 prepend \ 26 " amoswap" swap_sfx " %0, %z2, %1\n" \ 27 swap_append \ 28 : "=&r" (r), "+A" (*(p)) \ 29 : "rJ" (n) \ 30 : "memory"); \ 31 } else { \ 32 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 33 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 34 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 35 << __s; \ 36 ulong __newx = (ulong)(n) << __s; \ 37 ulong __retx; \ 38 ulong __rc; \ 39 \ 40 __asm__ __volatile__ ( \ 41 prepend \ 42 PREFETCHW_ASM(%5) \ 43 "0: lr.w %0, %2\n" \ 44 " and %1, %0, %z4\n" \ 45 " or %1, %1, %z3\n" \ 46 " sc.w" sc_sfx " %1, %1, %2\n" \ 47 " bnez %1, 0b\n" \ 48 sc_append \ 49 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 50 : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ 51 : "memory"); \ 52 \ 53 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 54 } \ 55 }) 56 57 #define __arch_xchg(sfx, prepend, append, r, p, n) \ 58 ({ \ 59 __asm__ __volatile__ ( \ 60 prepend \ 61 " amoswap" sfx " %0, %2, %1\n" \ 62 append \ 63 : "=r" (r), "+A" (*(p)) \ 64 : "r" (n) \ 65 : "memory"); \ 66 }) 67 68 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ 69 sc_append, swap_append) \ 70 ({ \ 71 __typeof__(ptr) __ptr = (ptr); \ 72 __typeof__(*(__ptr)) __new = (new); \ 73 __typeof__(*(__ptr)) __ret; \ 74 \ 75 switch (sizeof(*__ptr)) { \ 76 case 1: \ 77 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ 78 prepend, sc_append, swap_append, \ 79 __ret, __ptr, __new); \ 80 break; \ 81 case 2: \ 82 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ 83 prepend, sc_append, swap_append, \ 84 __ret, __ptr, __new); \ 85 break; \ 86 case 4: \ 87 __arch_xchg(".w" swap_sfx, prepend, swap_append, \ 88 __ret, __ptr, __new); \ 89 break; \ 90 case 8: \ 91 __arch_xchg(".d" swap_sfx, prepend, swap_append, \ 92 __ret, __ptr, __new); \ 93 break; \ 94 default: \ 95 BUILD_BUG(); \ 96 } \ 97 (__typeof__(*(__ptr)))__ret; \ 98 }) 99 100 #define arch_xchg_relaxed(ptr, x) \ 101 _arch_xchg(ptr, x, "", "", "", "", "") 102 103 #define arch_xchg_acquire(ptr, x) \ 104 _arch_xchg(ptr, x, "", "", "", \ 105 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) 106 107 #define arch_xchg_release(ptr, x) \ 108 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") 109 110 #define arch_xchg(ptr, x) \ 111 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") 112 113 #define xchg32(ptr, x) \ 114 ({ \ 115 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 116 arch_xchg((ptr), (x)); \ 117 }) 118 119 #define xchg64(ptr, x) \ 120 ({ \ 121 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 122 arch_xchg((ptr), (x)); \ 123 }) 124 125 /* 126 * Atomic compare and exchange. Compare OLD with MEM, if identical, 127 * store NEW in MEM. Return the initial value in MEM. Success is 128 * indicated by comparing RETURN with OLD. 129 */ 130 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ 131 sc_prepend, sc_append, \ 132 cas_prepend, cas_append, \ 133 r, p, o, n) \ 134 ({ \ 135 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 136 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 137 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \ 138 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ 139 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 140 r = o; \ 141 \ 142 __asm__ __volatile__ ( \ 143 cas_prepend \ 144 " amocas" cas_sfx " %0, %z2, %1\n" \ 145 cas_append \ 146 : "+&r" (r), "+A" (*(p)) \ 147 : "rJ" (n) \ 148 : "memory"); \ 149 } else { \ 150 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 151 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 152 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 153 << __s; \ 154 ulong __newx = (ulong)(n) << __s; \ 155 ulong __oldx = (ulong)(o) << __s; \ 156 ulong __retx; \ 157 ulong __rc; \ 158 \ 159 __asm__ __volatile__ ( \ 160 sc_prepend \ 161 "0: lr.w %0, %2\n" \ 162 " and %1, %0, %z5\n" \ 163 " bne %1, %z3, 1f\n" \ 164 " and %1, %0, %z6\n" \ 165 " or %1, %1, %z4\n" \ 166 " sc.w" sc_sfx " %1, %1, %2\n" \ 167 " bnez %1, 0b\n" \ 168 sc_append \ 169 "1:\n" \ 170 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 171 : "rJ" ((long)__oldx), "rJ" (__newx), \ 172 "rJ" (__mask), "rJ" (~__mask) \ 173 : "memory"); \ 174 \ 175 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 176 } \ 177 }) 178 179 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ 180 sc_prepend, sc_append, \ 181 cas_prepend, cas_append, \ 182 r, p, co, o, n) \ 183 ({ \ 184 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 185 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \ 186 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 187 r = o; \ 188 \ 189 __asm__ __volatile__ ( \ 190 cas_prepend \ 191 " amocas" cas_sfx " %0, %z2, %1\n" \ 192 cas_append \ 193 : "+&r" (r), "+A" (*(p)) \ 194 : "rJ" (n) \ 195 : "memory"); \ 196 } else { \ 197 register unsigned int __rc; \ 198 \ 199 __asm__ __volatile__ ( \ 200 sc_prepend \ 201 "0: lr" lr_sfx " %0, %2\n" \ 202 " bne %0, %z3, 1f\n" \ 203 " sc" sc_sfx " %1, %z4, %2\n" \ 204 " bnez %1, 0b\n" \ 205 sc_append \ 206 "1:\n" \ 207 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ 208 : "rJ" (co o), "rJ" (n) \ 209 : "memory"); \ 210 } \ 211 }) 212 213 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ 214 sc_prepend, sc_append, \ 215 cas_prepend, cas_append) \ 216 ({ \ 217 __typeof__(ptr) __ptr = (ptr); \ 218 __typeof__(*(__ptr)) __old = (old); \ 219 __typeof__(*(__ptr)) __new = (new); \ 220 __typeof__(*(__ptr)) __ret; \ 221 \ 222 switch (sizeof(*__ptr)) { \ 223 case 1: \ 224 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ 225 sc_prepend, sc_append, \ 226 cas_prepend, cas_append, \ 227 __ret, __ptr, __old, __new); \ 228 break; \ 229 case 2: \ 230 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ 231 sc_prepend, sc_append, \ 232 cas_prepend, cas_append, \ 233 __ret, __ptr, __old, __new); \ 234 break; \ 235 case 4: \ 236 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ 237 sc_prepend, sc_append, \ 238 cas_prepend, cas_append, \ 239 __ret, __ptr, (long)(int)(long), __old, __new); \ 240 break; \ 241 case 8: \ 242 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ 243 sc_prepend, sc_append, \ 244 cas_prepend, cas_append, \ 245 __ret, __ptr, /**/, __old, __new); \ 246 break; \ 247 default: \ 248 BUILD_BUG(); \ 249 } \ 250 (__typeof__(*(__ptr)))__ret; \ 251 }) 252 253 /* 254 * These macros are here to improve the readability of the arch_cmpxchg_XXX() 255 * macros. 256 */ 257 #define SC_SFX(x) x 258 #define CAS_SFX(x) x 259 #define SC_PREPEND(x) x 260 #define SC_APPEND(x) x 261 #define CAS_PREPEND(x) x 262 #define CAS_APPEND(x) x 263 264 #define arch_cmpxchg_relaxed(ptr, o, n) \ 265 _arch_cmpxchg((ptr), (o), (n), \ 266 SC_SFX(""), CAS_SFX(""), \ 267 SC_PREPEND(""), SC_APPEND(""), \ 268 CAS_PREPEND(""), CAS_APPEND("")) 269 270 #define arch_cmpxchg_acquire(ptr, o, n) \ 271 _arch_cmpxchg((ptr), (o), (n), \ 272 SC_SFX(""), CAS_SFX(""), \ 273 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ 274 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) 275 276 #define arch_cmpxchg_release(ptr, o, n) \ 277 _arch_cmpxchg((ptr), (o), (n), \ 278 SC_SFX(""), CAS_SFX(""), \ 279 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ 280 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) 281 282 #define arch_cmpxchg(ptr, o, n) \ 283 _arch_cmpxchg((ptr), (o), (n), \ 284 SC_SFX(".rl"), CAS_SFX(".aqrl"), \ 285 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ 286 CAS_PREPEND(""), CAS_APPEND("")) 287 288 #define arch_cmpxchg_local(ptr, o, n) \ 289 arch_cmpxchg_relaxed((ptr), (o), (n)) 290 291 #define arch_cmpxchg64(ptr, o, n) \ 292 ({ \ 293 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 294 arch_cmpxchg((ptr), (o), (n)); \ 295 }) 296 297 #define arch_cmpxchg64_local(ptr, o, n) \ 298 ({ \ 299 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 300 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 301 }) 302 303 #define arch_cmpxchg64_relaxed(ptr, o, n) \ 304 ({ \ 305 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 306 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 307 }) 308 309 #define arch_cmpxchg64_acquire(ptr, o, n) \ 310 ({ \ 311 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 312 arch_cmpxchg_acquire((ptr), (o), (n)); \ 313 }) 314 315 #define arch_cmpxchg64_release(ptr, o, n) \ 316 ({ \ 317 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 318 arch_cmpxchg_release((ptr), (o), (n)); \ 319 }) 320 321 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) && defined(CONFIG_TOOLCHAIN_HAS_ZACAS) 322 323 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) 324 325 union __u128_halves { 326 u128 full; 327 struct { 328 u64 low, high; 329 }; 330 }; 331 332 #define __arch_cmpxchg128(p, o, n, cas_sfx) \ 333 ({ \ 334 __typeof__(*(p)) __o = (o); \ 335 union __u128_halves __hn = { .full = (n) }; \ 336 union __u128_halves __ho = { .full = (__o) }; \ 337 register unsigned long t1 asm ("t1") = __hn.low; \ 338 register unsigned long t2 asm ("t2") = __hn.high; \ 339 register unsigned long t3 asm ("t3") = __ho.low; \ 340 register unsigned long t4 asm ("t4") = __ho.high; \ 341 \ 342 __asm__ __volatile__ ( \ 343 " amocas.q" cas_sfx " %0, %z3, %2" \ 344 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ 345 : "rJ" (t1), "rJ" (t2) \ 346 : "memory"); \ 347 \ 348 ((u128)t4 << 64) | t3; \ 349 }) 350 351 #define arch_cmpxchg128(ptr, o, n) \ 352 __arch_cmpxchg128((ptr), (o), (n), ".aqrl") 353 354 #define arch_cmpxchg128_local(ptr, o, n) \ 355 __arch_cmpxchg128((ptr), (o), (n), "") 356 357 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS && CONFIG_TOOLCHAIN_HAS_ZACAS */ 358 359 #ifdef CONFIG_RISCV_ISA_ZAWRS 360 /* 361 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to 362 * @val we expect it to still terminate within a "reasonable" amount of time 363 * for an implementation-specific other reason, a pending, locally-enabled 364 * interrupt, or because it has been configured to raise an illegal 365 * instruction exception. 366 */ 367 static __always_inline void __cmpwait(volatile void *ptr, 368 unsigned long val, 369 int size) 370 { 371 unsigned long tmp; 372 373 u32 *__ptr32b; 374 ulong __s, __val, __mask; 375 376 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 377 0, RISCV_ISA_EXT_ZAWRS, 1) 378 : : : : no_zawrs); 379 380 switch (size) { 381 case 1: 382 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 383 __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; 384 __val = val << __s; 385 __mask = 0xff << __s; 386 387 asm volatile( 388 " lr.w %0, %1\n" 389 " and %0, %0, %3\n" 390 " xor %0, %0, %2\n" 391 " bnez %0, 1f\n" 392 ZAWRS_WRS_NTO "\n" 393 "1:" 394 : "=&r" (tmp), "+A" (*(__ptr32b)) 395 : "r" (__val), "r" (__mask) 396 : "memory"); 397 break; 398 case 2: 399 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 400 __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; 401 __val = val << __s; 402 __mask = 0xffff << __s; 403 404 asm volatile( 405 " lr.w %0, %1\n" 406 " and %0, %0, %3\n" 407 " xor %0, %0, %2\n" 408 " bnez %0, 1f\n" 409 ZAWRS_WRS_NTO "\n" 410 "1:" 411 : "=&r" (tmp), "+A" (*(__ptr32b)) 412 : "r" (__val), "r" (__mask) 413 : "memory"); 414 break; 415 case 4: 416 asm volatile( 417 " lr.w %0, %1\n" 418 " xor %0, %0, %2\n" 419 " bnez %0, 1f\n" 420 ZAWRS_WRS_NTO "\n" 421 "1:" 422 : "=&r" (tmp), "+A" (*(u32 *)ptr) 423 : "r" (val)); 424 break; 425 #if __riscv_xlen == 64 426 case 8: 427 asm volatile( 428 " lr.d %0, %1\n" 429 " xor %0, %0, %2\n" 430 " bnez %0, 1f\n" 431 ZAWRS_WRS_NTO "\n" 432 "1:" 433 : "=&r" (tmp), "+A" (*(u64 *)ptr) 434 : "r" (val)); 435 break; 436 #endif 437 default: 438 BUILD_BUG(); 439 } 440 441 return; 442 443 no_zawrs: 444 ALT_RISCV_PAUSE(); 445 } 446 447 #define __cmpwait_relaxed(ptr, val) \ 448 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) 449 #endif 450 451 #endif /* _ASM_RISCV_CMPXCHG_H */ 452