1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2014 Regents of the University of California 4 */ 5 6 #ifndef _ASM_RISCV_CMPXCHG_H 7 #define _ASM_RISCV_CMPXCHG_H 8 9 #include <linux/bug.h> 10 11 #include <asm/alternative-macros.h> 12 #include <asm/fence.h> 13 #include <asm/hwcap.h> 14 #include <asm/insn-def.h> 15 #include <asm/cpufeature-macros.h> 16 #include <asm/processor.h> 17 #include <asm/errata_list.h> 18 19 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ 20 swap_append, r, p, n) \ 21 ({ \ 22 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 23 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ 24 __asm__ __volatile__ ( \ 25 prepend \ 26 " amoswap" swap_sfx " %0, %z2, %1\n" \ 27 swap_append \ 28 : "=&r" (r), "+A" (*(p)) \ 29 : "rJ" (n) \ 30 : "memory"); \ 31 } else { \ 32 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 33 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 34 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 35 << __s; \ 36 ulong __newx = (ulong)(n) << __s; \ 37 ulong __retx; \ 38 ulong __rc; \ 39 \ 40 __asm__ __volatile__ ( \ 41 prepend \ 42 PREFETCHW_ASM(%5) \ 43 "0: lr.w %0, %2\n" \ 44 " and %1, %0, %z4\n" \ 45 " or %1, %1, %z3\n" \ 46 " sc.w" sc_sfx " %1, %1, %2\n" \ 47 " bnez %1, 0b\n" \ 48 sc_append \ 49 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 50 : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ 51 : "memory"); \ 52 \ 53 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 54 } \ 55 }) 56 57 #define __arch_xchg(sfx, prepend, append, r, p, n) \ 58 ({ \ 59 __asm__ __volatile__ ( \ 60 prepend \ 61 " amoswap" sfx " %0, %2, %1\n" \ 62 append \ 63 : "=r" (r), "+A" (*(p)) \ 64 : "r" (n) \ 65 : "memory"); \ 66 }) 67 68 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ 69 sc_append, swap_append) \ 70 ({ \ 71 __typeof__(ptr) __ptr = (ptr); \ 72 __typeof__(*(__ptr)) __new = (new); \ 73 __typeof__(*(__ptr)) __ret; \ 74 \ 75 switch (sizeof(*__ptr)) { \ 76 case 1: \ 77 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ 78 prepend, sc_append, swap_append, \ 79 __ret, __ptr, __new); \ 80 break; \ 81 case 2: \ 82 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ 83 prepend, sc_append, swap_append, \ 84 __ret, __ptr, __new); \ 85 break; \ 86 case 4: \ 87 __arch_xchg(".w" swap_sfx, prepend, swap_append, \ 88 __ret, __ptr, __new); \ 89 break; \ 90 case 8: \ 91 __arch_xchg(".d" swap_sfx, prepend, swap_append, \ 92 __ret, __ptr, __new); \ 93 break; \ 94 default: \ 95 BUILD_BUG(); \ 96 } \ 97 (__typeof__(*(__ptr)))__ret; \ 98 }) 99 100 #define arch_xchg_relaxed(ptr, x) \ 101 _arch_xchg(ptr, x, "", "", "", "", "") 102 103 #define arch_xchg_acquire(ptr, x) \ 104 _arch_xchg(ptr, x, "", "", "", \ 105 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) 106 107 #define arch_xchg_release(ptr, x) \ 108 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") 109 110 #define arch_xchg(ptr, x) \ 111 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") 112 113 #define xchg32(ptr, x) \ 114 ({ \ 115 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ 116 arch_xchg((ptr), (x)); \ 117 }) 118 119 #define xchg64(ptr, x) \ 120 ({ \ 121 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 122 arch_xchg((ptr), (x)); \ 123 }) 124 125 /* 126 * Atomic compare and exchange. Compare OLD with MEM, if identical, 127 * store NEW in MEM. Return the initial value in MEM. Success is 128 * indicated by comparing RETURN with OLD. 129 */ 130 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ 131 sc_prepend, sc_append, \ 132 cas_prepend, cas_append, \ 133 r, p, o, n) \ 134 ({ \ 135 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ 136 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 137 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ 138 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 139 r = o; \ 140 \ 141 __asm__ __volatile__ ( \ 142 cas_prepend \ 143 " amocas" cas_sfx " %0, %z2, %1\n" \ 144 cas_append \ 145 : "+&r" (r), "+A" (*(p)) \ 146 : "rJ" (n) \ 147 : "memory"); \ 148 } else { \ 149 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ 150 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ 151 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ 152 << __s; \ 153 ulong __newx = (ulong)(n) << __s; \ 154 ulong __oldx = (ulong)(o) << __s; \ 155 ulong __retx; \ 156 ulong __rc; \ 157 \ 158 __asm__ __volatile__ ( \ 159 sc_prepend \ 160 "0: lr.w %0, %2\n" \ 161 " and %1, %0, %z5\n" \ 162 " bne %1, %z3, 1f\n" \ 163 " and %1, %0, %z6\n" \ 164 " or %1, %1, %z4\n" \ 165 " sc.w" sc_sfx " %1, %1, %2\n" \ 166 " bnez %1, 0b\n" \ 167 sc_append \ 168 "1:\n" \ 169 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ 170 : "rJ" ((long)__oldx), "rJ" (__newx), \ 171 "rJ" (__mask), "rJ" (~__mask) \ 172 : "memory"); \ 173 \ 174 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ 175 } \ 176 }) 177 178 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ 179 sc_prepend, sc_append, \ 180 cas_prepend, cas_append, \ 181 r, p, co, o, n) \ 182 ({ \ 183 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ 184 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ 185 r = o; \ 186 \ 187 __asm__ __volatile__ ( \ 188 cas_prepend \ 189 " amocas" cas_sfx " %0, %z2, %1\n" \ 190 cas_append \ 191 : "+&r" (r), "+A" (*(p)) \ 192 : "rJ" (n) \ 193 : "memory"); \ 194 } else { \ 195 register unsigned int __rc; \ 196 \ 197 __asm__ __volatile__ ( \ 198 sc_prepend \ 199 "0: lr" lr_sfx " %0, %2\n" \ 200 " bne %0, %z3, 1f\n" \ 201 " sc" sc_sfx " %1, %z4, %2\n" \ 202 " bnez %1, 0b\n" \ 203 sc_append \ 204 "1:\n" \ 205 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ 206 : "rJ" (co o), "rJ" (n) \ 207 : "memory"); \ 208 } \ 209 }) 210 211 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ 212 sc_prepend, sc_append, \ 213 cas_prepend, cas_append) \ 214 ({ \ 215 __typeof__(ptr) __ptr = (ptr); \ 216 __typeof__(*(__ptr)) __old = (old); \ 217 __typeof__(*(__ptr)) __new = (new); \ 218 __typeof__(*(__ptr)) __ret; \ 219 \ 220 switch (sizeof(*__ptr)) { \ 221 case 1: \ 222 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ 223 sc_prepend, sc_append, \ 224 cas_prepend, cas_append, \ 225 __ret, __ptr, __old, __new); \ 226 break; \ 227 case 2: \ 228 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ 229 sc_prepend, sc_append, \ 230 cas_prepend, cas_append, \ 231 __ret, __ptr, __old, __new); \ 232 break; \ 233 case 4: \ 234 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ 235 sc_prepend, sc_append, \ 236 cas_prepend, cas_append, \ 237 __ret, __ptr, (long)(int)(long), __old, __new); \ 238 break; \ 239 case 8: \ 240 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ 241 sc_prepend, sc_append, \ 242 cas_prepend, cas_append, \ 243 __ret, __ptr, /**/, __old, __new); \ 244 break; \ 245 default: \ 246 BUILD_BUG(); \ 247 } \ 248 (__typeof__(*(__ptr)))__ret; \ 249 }) 250 251 /* 252 * These macros are here to improve the readability of the arch_cmpxchg_XXX() 253 * macros. 254 */ 255 #define SC_SFX(x) x 256 #define CAS_SFX(x) x 257 #define SC_PREPEND(x) x 258 #define SC_APPEND(x) x 259 #define CAS_PREPEND(x) x 260 #define CAS_APPEND(x) x 261 262 #define arch_cmpxchg_relaxed(ptr, o, n) \ 263 _arch_cmpxchg((ptr), (o), (n), \ 264 SC_SFX(""), CAS_SFX(""), \ 265 SC_PREPEND(""), SC_APPEND(""), \ 266 CAS_PREPEND(""), CAS_APPEND("")) 267 268 #define arch_cmpxchg_acquire(ptr, o, n) \ 269 _arch_cmpxchg((ptr), (o), (n), \ 270 SC_SFX(""), CAS_SFX(""), \ 271 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ 272 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) 273 274 #define arch_cmpxchg_release(ptr, o, n) \ 275 _arch_cmpxchg((ptr), (o), (n), \ 276 SC_SFX(""), CAS_SFX(""), \ 277 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ 278 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) 279 280 #define arch_cmpxchg(ptr, o, n) \ 281 _arch_cmpxchg((ptr), (o), (n), \ 282 SC_SFX(".rl"), CAS_SFX(".aqrl"), \ 283 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ 284 CAS_PREPEND(""), CAS_APPEND("")) 285 286 #define arch_cmpxchg_local(ptr, o, n) \ 287 arch_cmpxchg_relaxed((ptr), (o), (n)) 288 289 #define arch_cmpxchg64(ptr, o, n) \ 290 ({ \ 291 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 292 arch_cmpxchg((ptr), (o), (n)); \ 293 }) 294 295 #define arch_cmpxchg64_local(ptr, o, n) \ 296 ({ \ 297 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 298 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 299 }) 300 301 #define arch_cmpxchg64_relaxed(ptr, o, n) \ 302 ({ \ 303 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 304 arch_cmpxchg_relaxed((ptr), (o), (n)); \ 305 }) 306 307 #define arch_cmpxchg64_acquire(ptr, o, n) \ 308 ({ \ 309 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 310 arch_cmpxchg_acquire((ptr), (o), (n)); \ 311 }) 312 313 #define arch_cmpxchg64_release(ptr, o, n) \ 314 ({ \ 315 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 316 arch_cmpxchg_release((ptr), (o), (n)); \ 317 }) 318 319 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) 320 321 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) 322 323 union __u128_halves { 324 u128 full; 325 struct { 326 u64 low, high; 327 }; 328 }; 329 330 #define __arch_cmpxchg128(p, o, n, cas_sfx) \ 331 ({ \ 332 __typeof__(*(p)) __o = (o); \ 333 union __u128_halves __hn = { .full = (n) }; \ 334 union __u128_halves __ho = { .full = (__o) }; \ 335 register unsigned long t1 asm ("t1") = __hn.low; \ 336 register unsigned long t2 asm ("t2") = __hn.high; \ 337 register unsigned long t3 asm ("t3") = __ho.low; \ 338 register unsigned long t4 asm ("t4") = __ho.high; \ 339 \ 340 __asm__ __volatile__ ( \ 341 " amocas.q" cas_sfx " %0, %z3, %2" \ 342 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ 343 : "rJ" (t1), "rJ" (t2) \ 344 : "memory"); \ 345 \ 346 ((u128)t4 << 64) | t3; \ 347 }) 348 349 #define arch_cmpxchg128(ptr, o, n) \ 350 __arch_cmpxchg128((ptr), (o), (n), ".aqrl") 351 352 #define arch_cmpxchg128_local(ptr, o, n) \ 353 __arch_cmpxchg128((ptr), (o), (n), "") 354 355 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ 356 357 #ifdef CONFIG_RISCV_ISA_ZAWRS 358 /* 359 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to 360 * @val we expect it to still terminate within a "reasonable" amount of time 361 * for an implementation-specific other reason, a pending, locally-enabled 362 * interrupt, or because it has been configured to raise an illegal 363 * instruction exception. 364 */ 365 static __always_inline void __cmpwait(volatile void *ptr, 366 unsigned long val, 367 int size) 368 { 369 unsigned long tmp; 370 371 u32 *__ptr32b; 372 ulong __s, __val, __mask; 373 374 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 375 0, RISCV_ISA_EXT_ZAWRS, 1) 376 : : : : no_zawrs); 377 378 switch (size) { 379 case 1: 380 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 381 __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; 382 __val = val << __s; 383 __mask = 0xff << __s; 384 385 asm volatile( 386 " lr.w %0, %1\n" 387 " and %0, %0, %3\n" 388 " xor %0, %0, %2\n" 389 " bnez %0, 1f\n" 390 ZAWRS_WRS_NTO "\n" 391 "1:" 392 : "=&r" (tmp), "+A" (*(__ptr32b)) 393 : "r" (__val), "r" (__mask) 394 : "memory"); 395 break; 396 case 2: 397 __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); 398 __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; 399 __val = val << __s; 400 __mask = 0xffff << __s; 401 402 asm volatile( 403 " lr.w %0, %1\n" 404 " and %0, %0, %3\n" 405 " xor %0, %0, %2\n" 406 " bnez %0, 1f\n" 407 ZAWRS_WRS_NTO "\n" 408 "1:" 409 : "=&r" (tmp), "+A" (*(__ptr32b)) 410 : "r" (__val), "r" (__mask) 411 : "memory"); 412 break; 413 case 4: 414 asm volatile( 415 " lr.w %0, %1\n" 416 " xor %0, %0, %2\n" 417 " bnez %0, 1f\n" 418 ZAWRS_WRS_NTO "\n" 419 "1:" 420 : "=&r" (tmp), "+A" (*(u32 *)ptr) 421 : "r" (val)); 422 break; 423 #if __riscv_xlen == 64 424 case 8: 425 asm volatile( 426 " lr.d %0, %1\n" 427 " xor %0, %0, %2\n" 428 " bnez %0, 1f\n" 429 ZAWRS_WRS_NTO "\n" 430 "1:" 431 : "=&r" (tmp), "+A" (*(u64 *)ptr) 432 : "r" (val)); 433 break; 434 #endif 435 default: 436 BUILD_BUG(); 437 } 438 439 return; 440 441 no_zawrs: 442 ALT_RISCV_PAUSE(); 443 } 444 445 #define __cmpwait_relaxed(ptr, val) \ 446 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) 447 #endif 448 449 #endif /* _ASM_RISCV_CMPXCHG_H */ 450