1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2014 Regents of the University of California 4 */ 5 6 #ifndef _ASM_RISCV_CMPXCHG_H 7 #define _ASM_RISCV_CMPXCHG_H 8 9 #include <linux/bug.h> 10 11 #include <asm/alternative-macros.h> 12 #include <asm/fence.h> 13 #include <asm/hwcap.h> 14 #include <asm/insn-def.h> 15 #include <asm/cpufeature-macros.h> 16 #include <asm/processor.h> 17 18 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ 19 swap_append, r, p, n) \ 20 ({ \ 21 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 22 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ 23 __asm__ __volatile__ ( \ 24 prepend \ 25 " amoswap" swap_sfx " %0, %z2, %1\n" \ 26 swap_append \ 27 : "=&r" (r), "+A" (*(p)) \ 28 : "rJ" (n) \ 29 : "memory"); \ 30 } else { \ 31 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 32 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 33 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 34 << __s; \ 35 ulong __newx = (ulong)(n) << __s; \ 36 ulong __retx; \ 37 ulong __rc; \ 38 \ 39 __asm__ __volatile__ ( \ 40 prepend \ 41 PREFETCHW_ASM(%5) \ 42 "0: lr.w %0, %2\n" \ 43 " and %1, %0, %z4\n" \ 44 " or %1, %1, %z3\n" \ 45 " sc.w" sc_sfx " %1, %1, %2\n" \ 46 " bnez %1, 0b\n" \ 47 sc_append \ 48 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 49 : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ 50 : "memory"); \ 51 \ 52 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 53 } \ 54 }) 55 56 #define __arch_xchg(sfx, prepend, append, r, p, n) \ 57 ({ \ 58 __asm__ __volatile__ ( \ 59 prepend \ 60 " amoswap" sfx " %0, %2, %1\n" \ 61 append \ 62 : "=r" (r), "+A" (*(p)) \ 63 : "r" (n) \ 64 : "memory"); \ 65 }) 66 67 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ 68 sc_append, swap_append) \ 69 ({ \ 70 __typeof__(ptr) __ptr = (ptr); \ 71 __typeof__(*(__ptr)) __new = (new); \ 72 __typeof__(*(__ptr)) __ret; \ 73 \ 74 switch (sizeof(*__ptr)) { \ 75 case 1: \ 76 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ 77 prepend, sc_append, swap_append, \ 78 __ret, __ptr, __new); \ 79 break; \ 80 case 2: \ 81 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ 82 prepend, sc_append, swap_append, \ 83 __ret, __ptr, __new); \ 84 break; \ 85 case 4: \ 86 __arch_xchg(".w" swap_sfx, prepend, swap_append, \ 87 __ret, __ptr, __new); \ 88 break; \ 89 case 8: \ 90 __arch_xchg(".d" swap_sfx, prepend, swap_append, \ 91 __ret, __ptr, __new); \ 92 break; \ 93 default: \ 94 BUILD_BUG(); \ 95 } \ 96 (__typeof__(*(__ptr)))__ret; \ 97 }) 98 99 #define arch_xchg_relaxed(ptr, x) \ 100 _arch_xchg(ptr, x, "", "", "", "", "") 101 102 #define arch_xchg_acquire(ptr, x) \ 103 _arch_xchg(ptr, x, "", "", "", \ 104 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) 105 106 #define arch_xchg_release(ptr, x) \ 107 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") 108 109 #define arch_xchg(ptr, x) \ 110 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") 111 112 #define xchg32(ptr, x) \ 113 ({ \ 114 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 115 arch_xchg((ptr), (x)); \ 116 }) 117 118 #define xchg64(ptr, x) \ 119 ({ \ 120 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 121 arch_xchg((ptr), (x)); \ 122 }) 123 124 /* 125 * Atomic compare and exchange. Compare OLD with MEM, if identical, 126 * store NEW in MEM. Return the initial value in MEM. Success is 127 * indicated by comparing RETURN with OLD. 128 */ 129 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ 130 sc_prepend, sc_append, \ 131 cas_prepend, cas_append, \ 132 r, p, o, n) \ 133 ({ \ 134 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 135 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 136 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \ 137 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ 138 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 139 r = o; \ 140 \ 141 __asm__ __volatile__ ( \ 142 cas_prepend \ 143 " amocas" cas_sfx " %0, %z2, %1\n" \ 144 cas_append \ 145 : "+&r" (r), "+A" (*(p)) \ 146 : "rJ" (n) \ 147 : "memory"); \ 148 } else { \ 149 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 150 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 151 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 152 << __s; \ 153 ulong __newx = (ulong)(n) << __s; \ 154 ulong __oldx = (ulong)(o) << __s; \ 155 ulong __retx; \ 156 ulong __rc; \ 157 \ 158 __asm__ __volatile__ ( \ 159 sc_prepend \ 160 "0: lr.w %0, %2\n" \ 161 " and %1, %0, %z5\n" \ 162 " bne %1, %z3, 1f\n" \ 163 " and %1, %0, %z6\n" \ 164 " or %1, %1, %z4\n" \ 165 " sc.w" sc_sfx " %1, %1, %2\n" \ 166 " bnez %1, 0b\n" \ 167 sc_append \ 168 "1:\n" \ 169 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 170 : "rJ" ((long)__oldx), "rJ" (__newx), \ 171 "rJ" (__mask), "rJ" (~__mask) \ 172 : "memory"); \ 173 \ 174 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 175 } \ 176 }) 177 178 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ 179 sc_prepend, sc_append, \ 180 cas_prepend, cas_append, \ 181 r, p, co, o, n) \ 182 ({ \ 183 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 184 IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \ 185 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 186 r = o; \ 187 \ 188 __asm__ __volatile__ ( \ 189 cas_prepend \ 190 " amocas" cas_sfx " %0, %z2, %1\n" \ 191 cas_append \ 192 : "+&r" (r), "+A" (*(p)) \ 193 : "rJ" (n) \ 194 : "memory"); \ 195 } else { \ 196 register unsigned int __rc; \ 197 \ 198 __asm__ __volatile__ ( \ 199 sc_prepend \ 200 "0: lr" lr_sfx " %0, %2\n" \ 201 " bne %0, %z3, 1f\n" \ 202 " sc" sc_sfx " %1, %z4, %2\n" \ 203 " bnez %1, 0b\n" \ 204 sc_append \ 205 "1:\n" \ 206 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ 207 : "rJ" (co o), "rJ" (n) \ 208 : "memory"); \ 209 } \ 210 }) 211 212 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ 213 sc_prepend, sc_append, \ 214 cas_prepend, cas_append) \ 215 ({ \ 216 __typeof__(ptr) __ptr = (ptr); \ 217 __typeof__(*(__ptr)) __old = (old); \ 218 __typeof__(*(__ptr)) __new = (new); \ 219 __typeof__(*(__ptr)) __ret; \ 220 \ 221 switch (sizeof(*__ptr)) { \ 222 case 1: \ 223 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ 224 sc_prepend, sc_append, \ 225 cas_prepend, cas_append, \ 226 __ret, __ptr, __old, __new); \ 227 break; \ 228 case 2: \ 229 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ 230 sc_prepend, sc_append, \ 231 cas_prepend, cas_append, \ 232 __ret, __ptr, __old, __new); \ 233 break; \ 234 case 4: \ 235 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ 236 sc_prepend, sc_append, \ 237 cas_prepend, cas_append, \ 238 __ret, __ptr, (long)(int)(long), __old, __new); \ 239 break; \ 240 case 8: \ 241 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ 242 sc_prepend, sc_append, \ 243 cas_prepend, cas_append, \ 244 __ret, __ptr, /**/, __old, __new); \ 245 break; \ 246 default: \ 247 BUILD_BUG(); \ 248 } \ 249 (__typeof__(*(__ptr)))__ret; \ 250 }) 251 252 /* 253 * These macros are here to improve the readability of the arch_cmpxchg_XXX() 254 * macros. 255 */ 256 #define SC_SFX(x) x 257 #define CAS_SFX(x) x 258 #define SC_PREPEND(x) x 259 #define SC_APPEND(x) x 260 #define CAS_PREPEND(x) x 261 #define CAS_APPEND(x) x 262 263 #define arch_cmpxchg_relaxed(ptr, o, n) \ 264 _arch_cmpxchg((ptr), (o), (n), \ 265 SC_SFX(""), CAS_SFX(""), \ 266 SC_PREPEND(""), SC_APPEND(""), \ 267 CAS_PREPEND(""), CAS_APPEND("")) 268 269 #define arch_cmpxchg_acquire(ptr, o, n) \ 270 _arch_cmpxchg((ptr), (o), (n), \ 271 SC_SFX(""), CAS_SFX(""), \ 272 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ 273 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) 274 275 #define arch_cmpxchg_release(ptr, o, n) \ 276 _arch_cmpxchg((ptr), (o), (n), \ 277 SC_SFX(""), CAS_SFX(""), \ 278 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ 279 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) 280 281 #define arch_cmpxchg(ptr, o, n) \ 282 _arch_cmpxchg((ptr), (o), (n), \ 283 SC_SFX(".rl"), CAS_SFX(".aqrl"), \ 284 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ 285 CAS_PREPEND(""), CAS_APPEND("")) 286 287 #define arch_cmpxchg_local(ptr, o, n) \ 288 arch_cmpxchg_relaxed((ptr), (o), (n)) 289 290 #define arch_cmpxchg64(ptr, o, n) \ 291 ({ \ 292 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 293 arch_cmpxchg((ptr), (o), (n)); \ 294 }) 295 296 #define arch_cmpxchg64_local(ptr, o, n) \ 297 ({ \ 298 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 299 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 300 }) 301 302 #define arch_cmpxchg64_relaxed(ptr, o, n) \ 303 ({ \ 304 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 305 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 306 }) 307 308 #define arch_cmpxchg64_acquire(ptr, o, n) \ 309 ({ \ 310 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 311 arch_cmpxchg_acquire((ptr), (o), (n)); \ 312 }) 313 314 #define arch_cmpxchg64_release(ptr, o, n) \ 315 ({ \ 316 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 317 arch_cmpxchg_release((ptr), (o), (n)); \ 318 }) 319 320 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) && defined(CONFIG_TOOLCHAIN_HAS_ZACAS) 321 322 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) 323 324 union __u128_halves { 325 u128 full; 326 struct { 327 u64 low, high; 328 }; 329 }; 330 331 #define __arch_cmpxchg128(p, o, n, cas_sfx) \ 332 ({ \ 333 __typeof__(*(p)) __o = (o); \ 334 union __u128_halves __hn = { .full = (n) }; \ 335 union __u128_halves __ho = { .full = (__o) }; \ 336 register unsigned long t1 asm ("t1") = __hn.low; \ 337 register unsigned long t2 asm ("t2") = __hn.high; \ 338 register unsigned long t3 asm ("t3") = __ho.low; \ 339 register unsigned long t4 asm ("t4") = __ho.high; \ 340 \ 341 __asm__ __volatile__ ( \ 342 " amocas.q" cas_sfx " %0, %z3, %2" \ 343 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ 344 : "rJ" (t1), "rJ" (t2) \ 345 : "memory"); \ 346 \ 347 ((u128)t4 << 64) | t3; \ 348 }) 349 350 #define arch_cmpxchg128(ptr, o, n) \ 351 __arch_cmpxchg128((ptr), (o), (n), ".aqrl") 352 353 #define arch_cmpxchg128_local(ptr, o, n) \ 354 __arch_cmpxchg128((ptr), (o), (n), "") 355 356 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS && CONFIG_TOOLCHAIN_HAS_ZACAS */ 357 358 #ifdef CONFIG_RISCV_ISA_ZAWRS 359 /* 360 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to 361 * @val we expect it to still terminate within a "reasonable" amount of time 362 * for an implementation-specific other reason, a pending, locally-enabled 363 * interrupt, or because it has been configured to raise an illegal 364 * instruction exception. 365 */ 366 static __always_inline void __cmpwait(volatile void *ptr, 367 unsigned long val, 368 int size) 369 { 370 unsigned long tmp; 371 372 u32 *__ptr32b; 373 ulong __s, __val, __mask; 374 375 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 376 0, RISCV_ISA_EXT_ZAWRS, 1) 377 : : : : no_zawrs); 378 379 switch (size) { 380 case 1: 381 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 382 __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; 383 __val = val << __s; 384 __mask = 0xff << __s; 385 386 asm volatile( 387 " lr.w %0, %1\n" 388 " and %0, %0, %3\n" 389 " xor %0, %0, %2\n" 390 " bnez %0, 1f\n" 391 ZAWRS_WRS_NTO "\n" 392 "1:" 393 : "=&r" (tmp), "+A" (*(__ptr32b)) 394 : "r" (__val), "r" (__mask) 395 : "memory"); 396 break; 397 case 2: 398 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 399 __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; 400 __val = val << __s; 401 __mask = 0xffff << __s; 402 403 asm volatile( 404 " lr.w %0, %1\n" 405 " and %0, %0, %3\n" 406 " xor %0, %0, %2\n" 407 " bnez %0, 1f\n" 408 ZAWRS_WRS_NTO "\n" 409 "1:" 410 : "=&r" (tmp), "+A" (*(__ptr32b)) 411 : "r" (__val), "r" (__mask) 412 : "memory"); 413 break; 414 case 4: 415 asm volatile( 416 " lr.w %0, %1\n" 417 " xor %0, %0, %2\n" 418 " bnez %0, 1f\n" 419 ZAWRS_WRS_NTO "\n" 420 "1:" 421 : "=&r" (tmp), "+A" (*(u32 *)ptr) 422 : "r" (val)); 423 break; 424 #if __riscv_xlen == 64 425 case 8: 426 asm volatile( 427 " lr.d %0, %1\n" 428 " xor %0, %0, %2\n" 429 " bnez %0, 1f\n" 430 ZAWRS_WRS_NTO "\n" 431 "1:" 432 : "=&r" (tmp), "+A" (*(u64 *)ptr) 433 : "r" (val)); 434 break; 435 #endif 436 default: 437 BUILD_BUG(); 438 } 439 440 return; 441 442 no_zawrs: 443 asm volatile(RISCV_PAUSE : : : "memory"); 444 } 445 446 #define __cmpwait_relaxed(ptr, val) \ 447 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) 448 #endif 449 450 #endif /* _ASM_RISCV_CMPXCHG_H */ 451