xref: /linux/arch/mips/include/asm/sync.h (revision 06d07429858317ded2db7986113a9e0129cd599b)
1bf929272SPaul Burton /* SPDX-License-Identifier: GPL-2.0-only */
2bf929272SPaul Burton #ifndef __MIPS_ASM_SYNC_H__
3bf929272SPaul Burton #define __MIPS_ASM_SYNC_H__
4bf929272SPaul Burton 
5bf929272SPaul Burton /*
6bf929272SPaul Burton  * sync types are defined by the MIPS64 Instruction Set documentation in Volume
7bf929272SPaul Burton  * II-A of the MIPS Architecture Reference Manual, which can be found here:
8bf929272SPaul Burton  *
9bf929272SPaul Burton  *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
10bf929272SPaul Burton  *
11bf929272SPaul Burton  * Two types of barrier are provided:
12bf929272SPaul Burton  *
13bf929272SPaul Burton  *   1) Completion barriers, which ensure that a memory operation has actually
14bf929272SPaul Burton  *      completed & often involve stalling the CPU pipeline to do so.
15bf929272SPaul Burton  *
16bf929272SPaul Burton  *   2) Ordering barriers, which only ensure that affected memory operations
17bf929272SPaul Burton  *      won't be reordered in the CPU pipeline in a manner that violates the
18bf929272SPaul Burton  *      restrictions imposed by the barrier.
19bf929272SPaul Burton  *
20bf929272SPaul Burton  * Ordering barriers can be more efficient than completion barriers, since:
21bf929272SPaul Burton  *
22*2f9060b1SBjorn Helgaas  *   a) Ordering barriers only require memory access instructions which precede
23bf929272SPaul Burton  *      them in program order (older instructions) to reach a point in the
24bf929272SPaul Burton  *      load/store datapath beyond which reordering is not possible before
25bf929272SPaul Burton  *      allowing memory access instructions which follow them (younger
26bf929272SPaul Burton  *      instructions) to be performed.  That is, older instructions don't
27bf929272SPaul Burton  *      actually need to complete - they just need to get far enough that all
28bf929272SPaul Burton  *      other coherent CPUs will observe their completion before they observe
29bf929272SPaul Burton  *      the effects of younger instructions.
30bf929272SPaul Burton  *
31bf929272SPaul Burton  *   b) Multiple variants of ordering barrier are provided which allow the
32bf929272SPaul Burton  *      effects to be restricted to different combinations of older or younger
33bf929272SPaul Burton  *      loads or stores. By way of example, if we only care that stores older
34bf929272SPaul Burton  *      than a barrier are observed prior to stores that are younger than a
35bf929272SPaul Burton  *      barrier & don't care about the ordering of loads then the 'wmb'
36bf929272SPaul Burton  *      ordering barrier can be used. Limiting the barrier's effects to stores
37bf929272SPaul Burton  *      allows loads to continue unaffected & potentially allows the CPU to
38bf929272SPaul Burton  *      make progress faster than if younger loads had to wait for older stores
39bf929272SPaul Burton  *      to complete.
40bf929272SPaul Burton  */
41bf929272SPaul Burton 
42bf929272SPaul Burton /*
43bf929272SPaul Burton  * No sync instruction at all; used to allow code to nullify the effect of the
44bf929272SPaul Burton  * __SYNC() macro without needing lots of #ifdefery.
45bf929272SPaul Burton  */
46bf929272SPaul Burton #define __SYNC_none	-1
47bf929272SPaul Burton 
48bf929272SPaul Burton /*
49bf929272SPaul Burton  * A full completion barrier; all memory accesses appearing prior to this sync
50bf929272SPaul Burton  * instruction in program order must complete before any memory accesses
51bf929272SPaul Burton  * appearing after this sync instruction in program order.
52bf929272SPaul Burton  */
53bf929272SPaul Burton #define __SYNC_full	0x00
54bf929272SPaul Burton 
55bf929272SPaul Burton /*
56bf929272SPaul Burton  * For now we use a full completion barrier to implement all sync types, until
57bf929272SPaul Burton  * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
58bf929272SPaul Burton  * sufficient to uphold our desired memory model.
59bf929272SPaul Burton  */
60bf929272SPaul Burton #define __SYNC_aq	__SYNC_full
61bf929272SPaul Burton #define __SYNC_rl	__SYNC_full
62bf929272SPaul Burton #define __SYNC_mb	__SYNC_full
63bf929272SPaul Burton 
64bf929272SPaul Burton /*
65bf929272SPaul Burton  * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
66bf929272SPaul Burton  * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
67bf929272SPaul Burton  * speculative reads.
68bf929272SPaul Burton  */
69bf929272SPaul Burton #ifdef CONFIG_CPU_CAVIUM_OCTEON
70bf929272SPaul Burton # define __SYNC_rmb	__SYNC_none
71bf929272SPaul Burton # define __SYNC_wmb	0x04
72bf929272SPaul Burton #else
73bf929272SPaul Burton # define __SYNC_rmb	__SYNC_full
74bf929272SPaul Burton # define __SYNC_wmb	__SYNC_full
75bf929272SPaul Burton #endif
76bf929272SPaul Burton 
77bf929272SPaul Burton /*
78bf929272SPaul Burton  * A GINV sync is a little different; it doesn't relate directly to loads or
79bf929272SPaul Burton  * stores, but instead causes synchronization of an icache or TLB global
80bf929272SPaul Burton  * invalidation operation triggered by the ginvi or ginvt instructions
81bf929272SPaul Burton  * respectively. In cases where we need to know that a ginvi or ginvt operation
82bf929272SPaul Burton  * has been performed by all coherent CPUs, we must issue a sync instruction of
83bf929272SPaul Burton  * this type. Once this instruction graduates all coherent CPUs will have
84bf929272SPaul Burton  * observed the invalidation.
85bf929272SPaul Burton  */
86bf929272SPaul Burton #define __SYNC_ginv	0x14
87bf929272SPaul Burton 
88bf929272SPaul Burton /* Trivial; indicate that we always need this sync instruction. */
89bf929272SPaul Burton #define __SYNC_always	(1 << 0)
90bf929272SPaul Burton 
91bf929272SPaul Burton /*
92bf929272SPaul Burton  * Indicate that we need this sync instruction only on systems with weakly
93bf929272SPaul Burton  * ordered memory access. In general this is most MIPS systems, but there are
94bf929272SPaul Burton  * exceptions which provide strongly ordered memory.
95bf929272SPaul Burton  */
96bf929272SPaul Burton #ifdef CONFIG_WEAK_ORDERING
97bf929272SPaul Burton # define __SYNC_weak_ordering	(1 << 1)
98bf929272SPaul Burton #else
99bf929272SPaul Burton # define __SYNC_weak_ordering	0
100bf929272SPaul Burton #endif
101bf929272SPaul Burton 
102bf929272SPaul Burton /*
103bf929272SPaul Burton  * Indicate that we need this sync instruction only on systems where LL/SC
104bf929272SPaul Burton  * don't implicitly provide a memory barrier. In general this is most MIPS
105bf929272SPaul Burton  * systems.
106bf929272SPaul Burton  */
107bf929272SPaul Burton #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
108bf929272SPaul Burton # define __SYNC_weak_llsc	(1 << 2)
109bf929272SPaul Burton #else
110bf929272SPaul Burton # define __SYNC_weak_llsc	0
111bf929272SPaul Burton #endif
112bf929272SPaul Burton 
113bf929272SPaul Burton /*
114bf929272SPaul Burton  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
115bf929272SPaul Burton  * store or prefetch) in between an LL & SC can cause the SC instruction to
116bf929272SPaul Burton  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
117bf929272SPaul Burton  * containing such sequences, this bug bites harder than we might otherwise
118bf929272SPaul Burton  * expect due to reordering & speculation:
119bf929272SPaul Burton  *
120bf929272SPaul Burton  * 1) A memory access appearing prior to the LL in program order may actually
121bf929272SPaul Burton  *    be executed after the LL - this is the reordering case.
122bf929272SPaul Burton  *
123bf929272SPaul Burton  *    In order to avoid this we need to place a memory barrier (ie. a SYNC
124bf929272SPaul Burton  *    instruction) prior to every LL instruction, in between it and any earlier
125bf929272SPaul Burton  *    memory access instructions.
126bf929272SPaul Burton  *
127bf929272SPaul Burton  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
128bf929272SPaul Burton  *
129bf929272SPaul Burton  * 2) If a conditional branch exists between an LL & SC with a target outside
130bf929272SPaul Burton  *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
131bf929272SPaul Burton  *    or similar, then misprediction of the branch may allow speculative
132bf929272SPaul Burton  *    execution of memory accesses from outside of the LL-SC loop.
133bf929272SPaul Burton  *
134bf929272SPaul Burton  *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
135bf929272SPaul Burton  *    at each affected branch target.
136bf929272SPaul Burton  *
137bf929272SPaul Burton  *    This case affects all current Loongson 3 CPUs.
138bf929272SPaul Burton  *
139bf929272SPaul Burton  * The above described cases cause an error in the cache coherence protocol;
140bf929272SPaul Burton  * such that the Invalidate of a competing LL-SC goes 'missing' and SC
141bf929272SPaul Burton  * erroneously observes its core still has Exclusive state and lets the SC
142bf929272SPaul Burton  * proceed.
143bf929272SPaul Burton  *
144bf929272SPaul Burton  * Therefore the error only occurs on SMP systems.
145bf929272SPaul Burton  */
146bf929272SPaul Burton #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
147bf929272SPaul Burton # define __SYNC_loongson3_war	(1 << 31)
148bf929272SPaul Burton #else
149bf929272SPaul Burton # define __SYNC_loongson3_war	0
150bf929272SPaul Burton #endif
151bf929272SPaul Burton 
152bf929272SPaul Burton /*
153bf929272SPaul Burton  * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
154bf929272SPaul Burton  * barrier to be ineffective, requiring the use of 2 in sequence to provide an
155bf929272SPaul Burton  * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
156bf929272SPaul Burton  * optimized memory barrier primitives."). Here we specify that the affected
157bf929272SPaul Burton  * sync instructions should be emitted twice.
15897e914b7SMark Tomlinson  * Note that this expression is evaluated by the assembler (not the compiler),
15997e914b7SMark Tomlinson  * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
160bf929272SPaul Burton  */
161bf929272SPaul Burton #ifdef CONFIG_CPU_CAVIUM_OCTEON
16297e914b7SMark Tomlinson # define __SYNC_rpt(type)	(1 - (type == __SYNC_wmb))
163bf929272SPaul Burton #else
164bf929272SPaul Burton # define __SYNC_rpt(type)	1
165bf929272SPaul Burton #endif
166bf929272SPaul Burton 
167bf929272SPaul Burton /*
168bf929272SPaul Burton  * The main event. Here we actually emit a sync instruction of a given type, if
169bf929272SPaul Burton  * reason is non-zero.
170bf929272SPaul Burton  *
171bf929272SPaul Burton  * In future we have the option of emitting entries in a fixups-style table
172bf929272SPaul Burton  * here that would allow us to opportunistically remove some sync instructions
173bf929272SPaul Burton  * when we detect at runtime that we're running on a CPU that doesn't need
174bf929272SPaul Burton  * them.
175bf929272SPaul Burton  */
176bf929272SPaul Burton #ifdef CONFIG_CPU_HAS_SYNC
177bf929272SPaul Burton # define ____SYNC(_type, _reason, _else)			\
178bf929272SPaul Burton 	.if	(( _type ) != -1) && ( _reason );		\
179bf929272SPaul Burton 	.set	push;						\
180bf929272SPaul Burton 	.set	MIPS_ISA_LEVEL_RAW;				\
181bf929272SPaul Burton 	.rept	__SYNC_rpt(_type);				\
182bf929272SPaul Burton 	sync	_type;						\
183bf929272SPaul Burton 	.endr;							\
184bf929272SPaul Burton 	.set	pop;						\
185bf929272SPaul Burton 	.else;							\
186bf929272SPaul Burton 	_else;							\
187bf929272SPaul Burton 	.endif
188bf929272SPaul Burton #else
189bf929272SPaul Burton # define ____SYNC(_type, _reason, _else)
190bf929272SPaul Burton #endif
191bf929272SPaul Burton 
192bf929272SPaul Burton /*
193bf929272SPaul Burton  * Preprocessor magic to expand macros used as arguments before we insert them
194bf929272SPaul Burton  * into assembly code.
195bf929272SPaul Burton  */
196bf929272SPaul Burton #ifdef __ASSEMBLY__
197bf929272SPaul Burton # define ___SYNC(type, reason, else)				\
198bf929272SPaul Burton 	____SYNC(type, reason, else)
199bf929272SPaul Burton #else
200bf929272SPaul Burton # define ___SYNC(type, reason, else)				\
201bf929272SPaul Burton 	__stringify(____SYNC(type, reason, else))
202bf929272SPaul Burton #endif
203bf929272SPaul Burton 
204bf929272SPaul Burton #define __SYNC(type, reason)					\
205bf929272SPaul Burton 	___SYNC(__SYNC_##type, __SYNC_##reason, )
206bf929272SPaul Burton #define __SYNC_ELSE(type, reason, else)				\
207bf929272SPaul Burton 	___SYNC(__SYNC_##type, __SYNC_##reason, else)
208bf929272SPaul Burton 
209bf929272SPaul Burton #endif /* __MIPS_ASM_SYNC_H__ */
210