xref: /linux/arch/mips/include/asm/barrier.h (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
7  */
8 #ifndef __ASM_BARRIER_H
9 #define __ASM_BARRIER_H
10 
11 #include <asm/addrspace.h>
12 
13 /*
14  * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
15  * These values are used with the sync instruction to perform memory barriers.
16  * Types of ordering guarantees available through the SYNC instruction:
17  * - Completion Barriers
18  * - Ordering Barriers
19  * As compared to the completion barrier, the ordering barrier is a
20  * lighter-weight operation as it does not require the specified instructions
21  * before the SYNC to be already completed. Instead it only requires that those
22  * specified instructions which are subsequent to the SYNC in the instruction
23  * stream are never re-ordered for processing ahead of the specified
24  * instructions which are before the SYNC in the instruction stream.
25  * This potentially reduces how many cycles the barrier instruction must stall
26  * before it completes.
27  * Implementations that do not use any of the non-zero values of stype to define
28  * different barriers, such as ordering barriers, must make those stype values
29  * act the same as stype zero.
30  */
31 
32 /*
33  * Completion barriers:
34  * - Every synchronizable specified memory instruction (loads or stores or both)
35  *   that occurs in the instruction stream before the SYNC instruction must be
36  *   already globally performed before any synchronizable specified memory
37  *   instructions that occur after the SYNC are allowed to be performed, with
38  *   respect to any other processor or coherent I/O module.
39  *
40  * - The barrier does not guarantee the order in which instruction fetches are
41  *   performed.
42  *
43  * - A stype value of zero will always be defined such that it performs the most
44  *   complete set of synchronization operations that are defined.This means
45  *   stype zero always does a completion barrier that affects both loads and
46  *   stores preceding the SYNC instruction and both loads and stores that are
47  *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
48  *   by the architecture or specific implementations to perform synchronization
49  *   behaviors that are less complete than that of stype zero. If an
50  *   implementation does not use one of these non-zero values to define a
51  *   different synchronization behavior, then that non-zero value of stype must
52  *   act the same as stype zero completion barrier. This allows software written
53  *   for an implementation with a lighter-weight barrier to work on another
54  *   implementation which only implements the stype zero completion barrier.
55  *
56  * - A completion barrier is required, potentially in conjunction with SSNOP (in
57  *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
58  *   to guarantee that memory reference results are visible across operating
59  *   mode changes. For example, a completion barrier is required on some
60  *   implementations on entry to and exit from Debug Mode to guarantee that
61  *   memory effects are handled correctly.
62  */
63 
64 /*
65  * stype 0 - A completion barrier that affects preceding loads and stores and
66  * subsequent loads and stores.
67  * Older instructions which must reach the load/store ordering point before the
68  * SYNC instruction completes: Loads, Stores
69  * Younger instructions which must reach the load/store ordering point only
70  * after the SYNC instruction completes: Loads, Stores
71  * Older instructions which must be globally performed when the SYNC instruction
72  * completes: Loads, Stores
73  */
74 #define STYPE_SYNC 0x0
75 
76 /*
77  * Ordering barriers:
78  * - Every synchronizable specified memory instruction (loads or stores or both)
79  *   that occurs in the instruction stream before the SYNC instruction must
80  *   reach a stage in the load/store datapath after which no instruction
81  *   re-ordering is possible before any synchronizable specified memory
82  *   instruction which occurs after the SYNC instruction in the instruction
83  *   stream reaches the same stage in the load/store datapath.
84  *
85  * - If any memory instruction before the SYNC instruction in program order,
86  *   generates a memory request to the external memory and any memory
87  *   instruction after the SYNC instruction in program order also generates a
88  *   memory request to external memory, the memory request belonging to the
89  *   older instruction must be globally performed before the time the memory
90  *   request belonging to the younger instruction is globally performed.
91  *
92  * - The barrier does not guarantee the order in which instruction fetches are
93  *   performed.
94  */
95 
96 /*
97  * stype 0x10 - An ordering barrier that affects preceding loads and stores and
98  * subsequent loads and stores.
99  * Older instructions which must reach the load/store ordering point before the
100  * SYNC instruction completes: Loads, Stores
101  * Younger instructions which must reach the load/store ordering point only
102  * after the SYNC instruction completes: Loads, Stores
103  * Older instructions which must be globally performed when the SYNC instruction
104  * completes: N/A
105  */
106 #define STYPE_SYNC_MB 0x10
107 
108 /*
109  * stype 0x14 - A completion barrier specific to global invalidations
110  *
111  * When a sync instruction of this type completes any preceding GINVI or GINVT
112  * operation has been globalized & completed on all coherent CPUs. Anything
113  * that the GINV* instruction should invalidate will have been invalidated on
114  * all coherent CPUs when this instruction completes. It is implementation
115  * specific whether the GINV* instructions themselves will ensure completion,
116  * or this sync type will.
117  *
118  * In systems implementing global invalidates (ie. with Config5.GI == 2 or 3)
119  * this sync type also requires that previous SYNCI operations have completed.
120  */
121 #define STYPE_GINV	0x14
122 
123 #ifdef CONFIG_CPU_HAS_SYNC
124 #define __sync()				\
125 	__asm__ __volatile__(			\
126 		".set	push\n\t"		\
127 		".set	noreorder\n\t"		\
128 		".set	mips2\n\t"		\
129 		"sync\n\t"			\
130 		".set	pop"			\
131 		: /* no output */		\
132 		: /* no input */		\
133 		: "memory")
134 #else
135 #define __sync()	do { } while(0)
136 #endif
137 
138 #define __fast_iob()				\
139 	__asm__ __volatile__(			\
140 		".set	push\n\t"		\
141 		".set	noreorder\n\t"		\
142 		"lw	$0,%0\n\t"		\
143 		"nop\n\t"			\
144 		".set	pop"			\
145 		: /* no output */		\
146 		: "m" (*(int *)CKSEG1)		\
147 		: "memory")
148 #ifdef CONFIG_CPU_CAVIUM_OCTEON
149 # define OCTEON_SYNCW_STR	".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
150 # define __syncw()	__asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
151 
152 # define fast_wmb()	__syncw()
153 # define fast_rmb()	barrier()
154 # define fast_mb()	__sync()
155 # define fast_iob()	do { } while (0)
156 #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
157 # define fast_wmb()	__sync()
158 # define fast_rmb()	__sync()
159 # define fast_mb()	__sync()
160 # ifdef CONFIG_SGI_IP28
161 #  define fast_iob()				\
162 	__asm__ __volatile__(			\
163 		".set	push\n\t"		\
164 		".set	noreorder\n\t"		\
165 		"lw	$0,%0\n\t"		\
166 		"sync\n\t"			\
167 		"lw	$0,%0\n\t"		\
168 		".set	pop"			\
169 		: /* no output */		\
170 		: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
171 		: "memory")
172 # else
173 #  define fast_iob()				\
174 	do {					\
175 		__sync();			\
176 		__fast_iob();			\
177 	} while (0)
178 # endif
179 #endif /* CONFIG_CPU_CAVIUM_OCTEON */
180 
181 #ifdef CONFIG_CPU_HAS_WB
182 
183 #include <asm/wbflush.h>
184 
185 #define mb()		wbflush()
186 #define iob()		wbflush()
187 
188 #else /* !CONFIG_CPU_HAS_WB */
189 
190 #define mb()		fast_mb()
191 #define iob()		fast_iob()
192 
193 #endif /* !CONFIG_CPU_HAS_WB */
194 
195 #define wmb()		fast_wmb()
196 #define rmb()		fast_rmb()
197 
198 #if defined(CONFIG_WEAK_ORDERING)
199 # ifdef CONFIG_CPU_CAVIUM_OCTEON
200 #  define __smp_mb()	__sync()
201 #  define __smp_rmb()	barrier()
202 #  define __smp_wmb()	__syncw()
203 # else
204 #  define __smp_mb()	__asm__ __volatile__("sync" : : :"memory")
205 #  define __smp_rmb()	__asm__ __volatile__("sync" : : :"memory")
206 #  define __smp_wmb()	__asm__ __volatile__("sync" : : :"memory")
207 # endif
208 #else
209 #define __smp_mb()	barrier()
210 #define __smp_rmb()	barrier()
211 #define __smp_wmb()	barrier()
212 #endif
213 
214 #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
215 #define __WEAK_LLSC_MB		"	sync	\n"
216 #else
217 #define __WEAK_LLSC_MB		"		\n"
218 #endif
219 
220 #define smp_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
221 
222 #ifdef CONFIG_CPU_CAVIUM_OCTEON
223 #define smp_mb__before_llsc() smp_wmb()
224 #define __smp_mb__before_llsc() __smp_wmb()
225 /* Cause previous writes to become visible on all CPUs as soon as possible */
226 #define nudge_writes() __asm__ __volatile__(".set push\n\t"		\
227 					    ".set arch=octeon\n\t"	\
228 					    "syncw\n\t"			\
229 					    ".set pop" : : : "memory")
230 #else
231 #define smp_mb__before_llsc() smp_llsc_mb()
232 #define __smp_mb__before_llsc() smp_llsc_mb()
233 #define nudge_writes() mb()
234 #endif
235 
236 #define __smp_mb__before_atomic()	__smp_mb__before_llsc()
237 #define __smp_mb__after_atomic()	smp_llsc_mb()
238 
239 /*
240  * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
241  * store or pref) in between an ll & sc can cause the sc instruction to
242  * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
243  * containing such sequences, this bug bites harder than we might otherwise
244  * expect due to reordering & speculation:
245  *
246  * 1) A memory access appearing prior to the ll in program order may actually
247  *    be executed after the ll - this is the reordering case.
248  *
249  *    In order to avoid this we need to place a memory barrier (ie. a sync
250  *    instruction) prior to every ll instruction, in between it & any earlier
251  *    memory access instructions. Many of these cases are already covered by
252  *    smp_mb__before_llsc() but for the remaining cases, typically ones in
253  *    which multiple CPUs may operate on a memory location but ordering is not
254  *    usually guaranteed, we use loongson_llsc_mb() below.
255  *
256  *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
257  *
258  * 2) If a conditional branch exists between an ll & sc with a target outside
259  *    of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
260  *    or similar, then misprediction of the branch may allow speculative
261  *    execution of memory accesses from outside of the ll-sc loop.
262  *
263  *    In order to avoid this we need a memory barrier (ie. a sync instruction)
264  *    at each affected branch target, for which we also use loongson_llsc_mb()
265  *    defined below.
266  *
267  *    This case affects all current Loongson 3 CPUs.
268  */
269 #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
270 #define loongson_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
271 #else
272 #define loongson_llsc_mb()	do { } while (0)
273 #endif
274 
275 static inline void sync_ginv(void)
276 {
277 	asm volatile("sync\t%0" :: "i"(STYPE_GINV));
278 }
279 
280 #include <asm-generic/barrier.h>
281 
282 #endif /* __ASM_BARRIER_H */
283