1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2014 Regents of the University of California 4 */ 5 6 #ifndef _ASM_RISCV_CMPXCHG_H 7 #define _ASM_RISCV_CMPXCHG_H 8 9 #include <linux/bug.h> 10 11 #include <asm/alternative-macros.h> 12 #include <asm/fence.h> 13 #include <asm/hwcap.h> 14 #include <asm/insn-def.h> 15 #include <asm/cpufeature-macros.h> 16 #include <asm/processor.h> 17 18 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ 19 swap_append, r, p, n) \ 20 ({ \ 21 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 22 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ 23 __asm__ __volatile__ ( \ 24 prepend \ 25 " amoswap" swap_sfx " %0, %z2, %1\n" \ 26 swap_append \ 27 : "=&r" (r), "+A" (*(p)) \ 28 : "rJ" (n) \ 29 : "memory"); \ 30 } else { \ 31 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 32 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 33 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 34 << __s; \ 35 ulong __newx = (ulong)(n) << __s; \ 36 ulong __retx; \ 37 ulong __rc; \ 38 \ 39 __asm__ __volatile__ ( \ 40 prepend \ 41 PREFETCHW_ASM(%5) \ 42 "0: lr.w %0, %2\n" \ 43 " and %1, %0, %z4\n" \ 44 " or %1, %1, %z3\n" \ 45 " sc.w" sc_sfx " %1, %1, %2\n" \ 46 " bnez %1, 0b\n" \ 47 sc_append \ 48 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 49 : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ 50 : "memory"); \ 51 \ 52 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 53 } \ 54 }) 55 56 #define __arch_xchg(sfx, prepend, append, r, p, n) \ 57 ({ \ 58 __asm__ __volatile__ ( \ 59 prepend \ 60 " amoswap" sfx " %0, %2, %1\n" \ 61 append \ 62 : "=r" (r), "+A" (*(p)) \ 63 : "r" (n) \ 64 : "memory"); \ 65 }) 66 67 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ 68 sc_append, swap_append) \ 69 ({ \ 70 __typeof__(ptr) __ptr = (ptr); \ 71 __typeof__(*(__ptr)) __new = (new); \ 72 __typeof__(*(__ptr)) __ret; \ 73 \ 74 switch (sizeof(*__ptr)) { \ 75 case 1: \ 76 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ 77 prepend, sc_append, swap_append, \ 78 __ret, __ptr, __new); \ 79 break; \ 80 case 2: \ 81 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ 82 prepend, sc_append, swap_append, \ 83 __ret, __ptr, __new); \ 84 break; \ 85 case 4: \ 86 __arch_xchg(".w" swap_sfx, prepend, swap_append, \ 87 __ret, __ptr, __new); \ 88 break; \ 89 case 8: \ 90 __arch_xchg(".d" swap_sfx, prepend, swap_append, \ 91 __ret, __ptr, __new); \ 92 break; \ 93 default: \ 94 BUILD_BUG(); \ 95 } \ 96 (__typeof__(*(__ptr)))__ret; \ 97 }) 98 99 #define arch_xchg_relaxed(ptr, x) \ 100 _arch_xchg(ptr, x, "", "", "", "", "") 101 102 #define arch_xchg_acquire(ptr, x) \ 103 _arch_xchg(ptr, x, "", "", "", \ 104 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) 105 106 #define arch_xchg_release(ptr, x) \ 107 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") 108 109 #define arch_xchg(ptr, x) \ 110 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") 111 112 #define xchg32(ptr, x) \ 113 ({ \ 114 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 115 arch_xchg((ptr), (x)); \ 116 }) 117 118 #define xchg64(ptr, x) \ 119 ({ \ 120 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 121 arch_xchg((ptr), (x)); \ 122 }) 123 124 /* 125 * Atomic compare and exchange. Compare OLD with MEM, if identical, 126 * store NEW in MEM. Return the initial value in MEM. Success is 127 * indicated by comparing RETURN with OLD. 128 */ 129 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ 130 sc_prepend, sc_append, \ 131 cas_prepend, cas_append, \ 132 r, p, o, n) \ 133 ({ \ 134 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 135 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 136 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ 137 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 138 r = o; \ 139 \ 140 __asm__ __volatile__ ( \ 141 cas_prepend \ 142 " amocas" cas_sfx " %0, %z2, %1\n" \ 143 cas_append \ 144 : "+&r" (r), "+A" (*(p)) \ 145 : "rJ" (n) \ 146 : "memory"); \ 147 } else { \ 148 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 149 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 150 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 151 << __s; \ 152 ulong __newx = (ulong)(n) << __s; \ 153 ulong __oldx = (ulong)(o) << __s; \ 154 ulong __retx; \ 155 ulong __rc; \ 156 \ 157 __asm__ __volatile__ ( \ 158 sc_prepend \ 159 "0: lr.w %0, %2\n" \ 160 " and %1, %0, %z5\n" \ 161 " bne %1, %z3, 1f\n" \ 162 " and %1, %0, %z6\n" \ 163 " or %1, %1, %z4\n" \ 164 " sc.w" sc_sfx " %1, %1, %2\n" \ 165 " bnez %1, 0b\n" \ 166 sc_append \ 167 "1:\n" \ 168 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 169 : "rJ" ((long)__oldx), "rJ" (__newx), \ 170 "rJ" (__mask), "rJ" (~__mask) \ 171 : "memory"); \ 172 \ 173 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 174 } \ 175 }) 176 177 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ 178 sc_prepend, sc_append, \ 179 cas_prepend, cas_append, \ 180 r, p, co, o, n) \ 181 ({ \ 182 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 183 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 184 r = o; \ 185 \ 186 __asm__ __volatile__ ( \ 187 cas_prepend \ 188 " amocas" cas_sfx " %0, %z2, %1\n" \ 189 cas_append \ 190 : "+&r" (r), "+A" (*(p)) \ 191 : "rJ" (n) \ 192 : "memory"); \ 193 } else { \ 194 register unsigned int __rc; \ 195 \ 196 __asm__ __volatile__ ( \ 197 sc_prepend \ 198 "0: lr" lr_sfx " %0, %2\n" \ 199 " bne %0, %z3, 1f\n" \ 200 " sc" sc_sfx " %1, %z4, %2\n" \ 201 " bnez %1, 0b\n" \ 202 sc_append \ 203 "1:\n" \ 204 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ 205 : "rJ" (co o), "rJ" (n) \ 206 : "memory"); \ 207 } \ 208 }) 209 210 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ 211 sc_prepend, sc_append, \ 212 cas_prepend, cas_append) \ 213 ({ \ 214 __typeof__(ptr) __ptr = (ptr); \ 215 __typeof__(*(__ptr)) __old = (old); \ 216 __typeof__(*(__ptr)) __new = (new); \ 217 __typeof__(*(__ptr)) __ret; \ 218 \ 219 switch (sizeof(*__ptr)) { \ 220 case 1: \ 221 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ 222 sc_prepend, sc_append, \ 223 cas_prepend, cas_append, \ 224 __ret, __ptr, __old, __new); \ 225 break; \ 226 case 2: \ 227 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ 228 sc_prepend, sc_append, \ 229 cas_prepend, cas_append, \ 230 __ret, __ptr, __old, __new); \ 231 break; \ 232 case 4: \ 233 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ 234 sc_prepend, sc_append, \ 235 cas_prepend, cas_append, \ 236 __ret, __ptr, (long)(int)(long), __old, __new); \ 237 break; \ 238 case 8: \ 239 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ 240 sc_prepend, sc_append, \ 241 cas_prepend, cas_append, \ 242 __ret, __ptr, /**/, __old, __new); \ 243 break; \ 244 default: \ 245 BUILD_BUG(); \ 246 } \ 247 (__typeof__(*(__ptr)))__ret; \ 248 }) 249 250 /* 251 * These macros are here to improve the readability of the arch_cmpxchg_XXX() 252 * macros. 253 */ 254 #define SC_SFX(x) x 255 #define CAS_SFX(x) x 256 #define SC_PREPEND(x) x 257 #define SC_APPEND(x) x 258 #define CAS_PREPEND(x) x 259 #define CAS_APPEND(x) x 260 261 #define arch_cmpxchg_relaxed(ptr, o, n) \ 262 _arch_cmpxchg((ptr), (o), (n), \ 263 SC_SFX(""), CAS_SFX(""), \ 264 SC_PREPEND(""), SC_APPEND(""), \ 265 CAS_PREPEND(""), CAS_APPEND("")) 266 267 #define arch_cmpxchg_acquire(ptr, o, n) \ 268 _arch_cmpxchg((ptr), (o), (n), \ 269 SC_SFX(""), CAS_SFX(""), \ 270 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ 271 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) 272 273 #define arch_cmpxchg_release(ptr, o, n) \ 274 _arch_cmpxchg((ptr), (o), (n), \ 275 SC_SFX(""), CAS_SFX(""), \ 276 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ 277 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) 278 279 #define arch_cmpxchg(ptr, o, n) \ 280 _arch_cmpxchg((ptr), (o), (n), \ 281 SC_SFX(".rl"), CAS_SFX(".aqrl"), \ 282 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ 283 CAS_PREPEND(""), CAS_APPEND("")) 284 285 #define arch_cmpxchg_local(ptr, o, n) \ 286 arch_cmpxchg_relaxed((ptr), (o), (n)) 287 288 #define arch_cmpxchg64(ptr, o, n) \ 289 ({ \ 290 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 291 arch_cmpxchg((ptr), (o), (n)); \ 292 }) 293 294 #define arch_cmpxchg64_local(ptr, o, n) \ 295 ({ \ 296 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 297 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 298 }) 299 300 #define arch_cmpxchg64_relaxed(ptr, o, n) \ 301 ({ \ 302 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 303 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 304 }) 305 306 #define arch_cmpxchg64_acquire(ptr, o, n) \ 307 ({ \ 308 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 309 arch_cmpxchg_acquire((ptr), (o), (n)); \ 310 }) 311 312 #define arch_cmpxchg64_release(ptr, o, n) \ 313 ({ \ 314 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 315 arch_cmpxchg_release((ptr), (o), (n)); \ 316 }) 317 318 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) 319 320 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) 321 322 union __u128_halves { 323 u128 full; 324 struct { 325 u64 low, high; 326 }; 327 }; 328 329 #define __arch_cmpxchg128(p, o, n, cas_sfx) \ 330 ({ \ 331 __typeof__(*(p)) __o = (o); \ 332 union __u128_halves __hn = { .full = (n) }; \ 333 union __u128_halves __ho = { .full = (__o) }; \ 334 register unsigned long t1 asm ("t1") = __hn.low; \ 335 register unsigned long t2 asm ("t2") = __hn.high; \ 336 register unsigned long t3 asm ("t3") = __ho.low; \ 337 register unsigned long t4 asm ("t4") = __ho.high; \ 338 \ 339 __asm__ __volatile__ ( \ 340 " amocas.q" cas_sfx " %0, %z3, %2" \ 341 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ 342 : "rJ" (t1), "rJ" (t2) \ 343 : "memory"); \ 344 \ 345 ((u128)t4 << 64) | t3; \ 346 }) 347 348 #define arch_cmpxchg128(ptr, o, n) \ 349 __arch_cmpxchg128((ptr), (o), (n), ".aqrl") 350 351 #define arch_cmpxchg128_local(ptr, o, n) \ 352 __arch_cmpxchg128((ptr), (o), (n), "") 353 354 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ 355 356 #ifdef CONFIG_RISCV_ISA_ZAWRS 357 /* 358 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to 359 * @val we expect it to still terminate within a "reasonable" amount of time 360 * for an implementation-specific other reason, a pending, locally-enabled 361 * interrupt, or because it has been configured to raise an illegal 362 * instruction exception. 363 */ 364 static __always_inline void __cmpwait(volatile void *ptr, 365 unsigned long val, 366 int size) 367 { 368 unsigned long tmp; 369 370 u32 *__ptr32b; 371 ulong __s, __val, __mask; 372 373 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 374 0, RISCV_ISA_EXT_ZAWRS, 1) 375 : : : : no_zawrs); 376 377 switch (size) { 378 case 1: 379 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 380 __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; 381 __val = val << __s; 382 __mask = 0xff << __s; 383 384 asm volatile( 385 " lr.w %0, %1\n" 386 " and %0, %0, %3\n" 387 " xor %0, %0, %2\n" 388 " bnez %0, 1f\n" 389 ZAWRS_WRS_NTO "\n" 390 "1:" 391 : "=&r" (tmp), "+A" (*(__ptr32b)) 392 : "r" (__val), "r" (__mask) 393 : "memory"); 394 break; 395 case 2: 396 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 397 __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; 398 __val = val << __s; 399 __mask = 0xffff << __s; 400 401 asm volatile( 402 " lr.w %0, %1\n" 403 " and %0, %0, %3\n" 404 " xor %0, %0, %2\n" 405 " bnez %0, 1f\n" 406 ZAWRS_WRS_NTO "\n" 407 "1:" 408 : "=&r" (tmp), "+A" (*(__ptr32b)) 409 : "r" (__val), "r" (__mask) 410 : "memory"); 411 break; 412 case 4: 413 asm volatile( 414 " lr.w %0, %1\n" 415 " xor %0, %0, %2\n" 416 " bnez %0, 1f\n" 417 ZAWRS_WRS_NTO "\n" 418 "1:" 419 : "=&r" (tmp), "+A" (*(u32 *)ptr) 420 : "r" (val)); 421 break; 422 #if __riscv_xlen == 64 423 case 8: 424 asm volatile( 425 " lr.d %0, %1\n" 426 " xor %0, %0, %2\n" 427 " bnez %0, 1f\n" 428 ZAWRS_WRS_NTO "\n" 429 "1:" 430 : "=&r" (tmp), "+A" (*(u64 *)ptr) 431 : "r" (val)); 432 break; 433 #endif 434 default: 435 BUILD_BUG(); 436 } 437 438 return; 439 440 no_zawrs: 441 asm volatile(RISCV_PAUSE : : : "memory"); 442 } 443 444 #define __cmpwait_relaxed(ptr, val) \ 445 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) 446 #endif 447 448 #endif /* _ASM_RISCV_CMPXCHG_H */ 449