1bf929272SPaul Burton /* SPDX-License-Identifier: GPL-2.0-only */ 2bf929272SPaul Burton #ifndef __MIPS_ASM_SYNC_H__ 3bf929272SPaul Burton #define __MIPS_ASM_SYNC_H__ 4bf929272SPaul Burton 5bf929272SPaul Burton /* 6bf929272SPaul Burton * sync types are defined by the MIPS64 Instruction Set documentation in Volume 7bf929272SPaul Burton * II-A of the MIPS Architecture Reference Manual, which can be found here: 8bf929272SPaul Burton * 9bf929272SPaul Burton * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06 10bf929272SPaul Burton * 11bf929272SPaul Burton * Two types of barrier are provided: 12bf929272SPaul Burton * 13bf929272SPaul Burton * 1) Completion barriers, which ensure that a memory operation has actually 14bf929272SPaul Burton * completed & often involve stalling the CPU pipeline to do so. 15bf929272SPaul Burton * 16bf929272SPaul Burton * 2) Ordering barriers, which only ensure that affected memory operations 17bf929272SPaul Burton * won't be reordered in the CPU pipeline in a manner that violates the 18bf929272SPaul Burton * restrictions imposed by the barrier. 19bf929272SPaul Burton * 20bf929272SPaul Burton * Ordering barriers can be more efficient than completion barriers, since: 21bf929272SPaul Burton * 22*2f9060b1SBjorn Helgaas * a) Ordering barriers only require memory access instructions which precede 23bf929272SPaul Burton * them in program order (older instructions) to reach a point in the 24bf929272SPaul Burton * load/store datapath beyond which reordering is not possible before 25bf929272SPaul Burton * allowing memory access instructions which follow them (younger 26bf929272SPaul Burton * instructions) to be performed. That is, older instructions don't 27bf929272SPaul Burton * actually need to complete - they just need to get far enough that all 28bf929272SPaul Burton * other coherent CPUs will observe their completion before they observe 29bf929272SPaul Burton * the effects of younger instructions. 30bf929272SPaul Burton * 31bf929272SPaul Burton * b) Multiple variants of ordering barrier are provided which allow the 32bf929272SPaul Burton * effects to be restricted to different combinations of older or younger 33bf929272SPaul Burton * loads or stores. By way of example, if we only care that stores older 34bf929272SPaul Burton * than a barrier are observed prior to stores that are younger than a 35bf929272SPaul Burton * barrier & don't care about the ordering of loads then the 'wmb' 36bf929272SPaul Burton * ordering barrier can be used. Limiting the barrier's effects to stores 37bf929272SPaul Burton * allows loads to continue unaffected & potentially allows the CPU to 38bf929272SPaul Burton * make progress faster than if younger loads had to wait for older stores 39bf929272SPaul Burton * to complete. 40bf929272SPaul Burton */ 41bf929272SPaul Burton 42bf929272SPaul Burton /* 43bf929272SPaul Burton * No sync instruction at all; used to allow code to nullify the effect of the 44bf929272SPaul Burton * __SYNC() macro without needing lots of #ifdefery. 45bf929272SPaul Burton */ 46bf929272SPaul Burton #define __SYNC_none -1 47bf929272SPaul Burton 48bf929272SPaul Burton /* 49bf929272SPaul Burton * A full completion barrier; all memory accesses appearing prior to this sync 50bf929272SPaul Burton * instruction in program order must complete before any memory accesses 51bf929272SPaul Burton * appearing after this sync instruction in program order. 52bf929272SPaul Burton */ 53bf929272SPaul Burton #define __SYNC_full 0x00 54bf929272SPaul Burton 55bf929272SPaul Burton /* 56bf929272SPaul Burton * For now we use a full completion barrier to implement all sync types, until 57bf929272SPaul Burton * we're satisfied that lightweight ordering barriers defined by MIPSr6 are 58bf929272SPaul Burton * sufficient to uphold our desired memory model. 59bf929272SPaul Burton */ 60bf929272SPaul Burton #define __SYNC_aq __SYNC_full 61bf929272SPaul Burton #define __SYNC_rl __SYNC_full 62bf929272SPaul Burton #define __SYNC_mb __SYNC_full 63bf929272SPaul Burton 64bf929272SPaul Burton /* 65bf929272SPaul Burton * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering 66bf929272SPaul Burton * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform 67bf929272SPaul Burton * speculative reads. 68bf929272SPaul Burton */ 69bf929272SPaul Burton #ifdef CONFIG_CPU_CAVIUM_OCTEON 70bf929272SPaul Burton # define __SYNC_rmb __SYNC_none 71bf929272SPaul Burton # define __SYNC_wmb 0x04 72bf929272SPaul Burton #else 73bf929272SPaul Burton # define __SYNC_rmb __SYNC_full 74bf929272SPaul Burton # define __SYNC_wmb __SYNC_full 75bf929272SPaul Burton #endif 76bf929272SPaul Burton 77bf929272SPaul Burton /* 78bf929272SPaul Burton * A GINV sync is a little different; it doesn't relate directly to loads or 79bf929272SPaul Burton * stores, but instead causes synchronization of an icache or TLB global 80bf929272SPaul Burton * invalidation operation triggered by the ginvi or ginvt instructions 81bf929272SPaul Burton * respectively. In cases where we need to know that a ginvi or ginvt operation 82bf929272SPaul Burton * has been performed by all coherent CPUs, we must issue a sync instruction of 83bf929272SPaul Burton * this type. Once this instruction graduates all coherent CPUs will have 84bf929272SPaul Burton * observed the invalidation. 85bf929272SPaul Burton */ 86bf929272SPaul Burton #define __SYNC_ginv 0x14 87bf929272SPaul Burton 88bf929272SPaul Burton /* Trivial; indicate that we always need this sync instruction. */ 89bf929272SPaul Burton #define __SYNC_always (1 << 0) 90bf929272SPaul Burton 91bf929272SPaul Burton /* 92bf929272SPaul Burton * Indicate that we need this sync instruction only on systems with weakly 93bf929272SPaul Burton * ordered memory access. In general this is most MIPS systems, but there are 94bf929272SPaul Burton * exceptions which provide strongly ordered memory. 95bf929272SPaul Burton */ 96bf929272SPaul Burton #ifdef CONFIG_WEAK_ORDERING 97bf929272SPaul Burton # define __SYNC_weak_ordering (1 << 1) 98bf929272SPaul Burton #else 99bf929272SPaul Burton # define __SYNC_weak_ordering 0 100bf929272SPaul Burton #endif 101bf929272SPaul Burton 102bf929272SPaul Burton /* 103bf929272SPaul Burton * Indicate that we need this sync instruction only on systems where LL/SC 104bf929272SPaul Burton * don't implicitly provide a memory barrier. In general this is most MIPS 105bf929272SPaul Burton * systems. 106bf929272SPaul Burton */ 107bf929272SPaul Burton #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC 108bf929272SPaul Burton # define __SYNC_weak_llsc (1 << 2) 109bf929272SPaul Burton #else 110bf929272SPaul Burton # define __SYNC_weak_llsc 0 111bf929272SPaul Burton #endif 112bf929272SPaul Burton 113bf929272SPaul Burton /* 114bf929272SPaul Burton * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, 115bf929272SPaul Burton * store or prefetch) in between an LL & SC can cause the SC instruction to 116bf929272SPaul Burton * erroneously succeed, breaking atomicity. Whilst it's unusual to write code 117bf929272SPaul Burton * containing such sequences, this bug bites harder than we might otherwise 118bf929272SPaul Burton * expect due to reordering & speculation: 119bf929272SPaul Burton * 120bf929272SPaul Burton * 1) A memory access appearing prior to the LL in program order may actually 121bf929272SPaul Burton * be executed after the LL - this is the reordering case. 122bf929272SPaul Burton * 123bf929272SPaul Burton * In order to avoid this we need to place a memory barrier (ie. a SYNC 124bf929272SPaul Burton * instruction) prior to every LL instruction, in between it and any earlier 125bf929272SPaul Burton * memory access instructions. 126bf929272SPaul Burton * 127bf929272SPaul Burton * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. 128bf929272SPaul Burton * 129bf929272SPaul Burton * 2) If a conditional branch exists between an LL & SC with a target outside 130bf929272SPaul Burton * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() 131bf929272SPaul Burton * or similar, then misprediction of the branch may allow speculative 132bf929272SPaul Burton * execution of memory accesses from outside of the LL-SC loop. 133bf929272SPaul Burton * 134bf929272SPaul Burton * In order to avoid this we need a memory barrier (ie. a SYNC instruction) 135bf929272SPaul Burton * at each affected branch target. 136bf929272SPaul Burton * 137bf929272SPaul Burton * This case affects all current Loongson 3 CPUs. 138bf929272SPaul Burton * 139bf929272SPaul Burton * The above described cases cause an error in the cache coherence protocol; 140bf929272SPaul Burton * such that the Invalidate of a competing LL-SC goes 'missing' and SC 141bf929272SPaul Burton * erroneously observes its core still has Exclusive state and lets the SC 142bf929272SPaul Burton * proceed. 143bf929272SPaul Burton * 144bf929272SPaul Burton * Therefore the error only occurs on SMP systems. 145bf929272SPaul Burton */ 146bf929272SPaul Burton #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS 147bf929272SPaul Burton # define __SYNC_loongson3_war (1 << 31) 148bf929272SPaul Burton #else 149bf929272SPaul Burton # define __SYNC_loongson3_war 0 150bf929272SPaul Burton #endif 151bf929272SPaul Burton 152bf929272SPaul Burton /* 153bf929272SPaul Burton * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering 154bf929272SPaul Burton * barrier to be ineffective, requiring the use of 2 in sequence to provide an 155bf929272SPaul Burton * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use 156bf929272SPaul Burton * optimized memory barrier primitives."). Here we specify that the affected 157bf929272SPaul Burton * sync instructions should be emitted twice. 15897e914b7SMark Tomlinson * Note that this expression is evaluated by the assembler (not the compiler), 15997e914b7SMark Tomlinson * and that the assembler evaluates '==' as 0 or -1, not 0 or 1. 160bf929272SPaul Burton */ 161bf929272SPaul Burton #ifdef CONFIG_CPU_CAVIUM_OCTEON 16297e914b7SMark Tomlinson # define __SYNC_rpt(type) (1 - (type == __SYNC_wmb)) 163bf929272SPaul Burton #else 164bf929272SPaul Burton # define __SYNC_rpt(type) 1 165bf929272SPaul Burton #endif 166bf929272SPaul Burton 167bf929272SPaul Burton /* 168bf929272SPaul Burton * The main event. Here we actually emit a sync instruction of a given type, if 169bf929272SPaul Burton * reason is non-zero. 170bf929272SPaul Burton * 171bf929272SPaul Burton * In future we have the option of emitting entries in a fixups-style table 172bf929272SPaul Burton * here that would allow us to opportunistically remove some sync instructions 173bf929272SPaul Burton * when we detect at runtime that we're running on a CPU that doesn't need 174bf929272SPaul Burton * them. 175bf929272SPaul Burton */ 176bf929272SPaul Burton #ifdef CONFIG_CPU_HAS_SYNC 177bf929272SPaul Burton # define ____SYNC(_type, _reason, _else) \ 178bf929272SPaul Burton .if (( _type ) != -1) && ( _reason ); \ 179bf929272SPaul Burton .set push; \ 180bf929272SPaul Burton .set MIPS_ISA_LEVEL_RAW; \ 181bf929272SPaul Burton .rept __SYNC_rpt(_type); \ 182bf929272SPaul Burton sync _type; \ 183bf929272SPaul Burton .endr; \ 184bf929272SPaul Burton .set pop; \ 185bf929272SPaul Burton .else; \ 186bf929272SPaul Burton _else; \ 187bf929272SPaul Burton .endif 188bf929272SPaul Burton #else 189bf929272SPaul Burton # define ____SYNC(_type, _reason, _else) 190bf929272SPaul Burton #endif 191bf929272SPaul Burton 192bf929272SPaul Burton /* 193bf929272SPaul Burton * Preprocessor magic to expand macros used as arguments before we insert them 194bf929272SPaul Burton * into assembly code. 195bf929272SPaul Burton */ 196bf929272SPaul Burton #ifdef __ASSEMBLY__ 197bf929272SPaul Burton # define ___SYNC(type, reason, else) \ 198bf929272SPaul Burton ____SYNC(type, reason, else) 199bf929272SPaul Burton #else 200bf929272SPaul Burton # define ___SYNC(type, reason, else) \ 201bf929272SPaul Burton __stringify(____SYNC(type, reason, else)) 202bf929272SPaul Burton #endif 203bf929272SPaul Burton 204bf929272SPaul Burton #define __SYNC(type, reason) \ 205bf929272SPaul Burton ___SYNC(__SYNC_##type, __SYNC_##reason, ) 206bf929272SPaul Burton #define __SYNC_ELSE(type, reason, else) \ 207bf929272SPaul Burton ___SYNC(__SYNC_##type, __SYNC_##reason, else) 208bf929272SPaul Burton 209bf929272SPaul Burton #endif /* __MIPS_ASM_SYNC_H__ */ 210