xref: /linux/arch/s390/include/asm/percpu.h (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __ARCH_S390_PERCPU__
3 #define __ARCH_S390_PERCPU__
4 
5 #include <linux/preempt.h>
6 #include <asm/cmpxchg.h>
7 #include <asm/march.h>
8 
9 /*
10  * s390 uses its own implementation for per cpu data, the offset of
11  * the cpu local data area is cached in the cpu's lowcore memory.
12  */
13 #define __my_cpu_offset get_lowcore()->percpu_offset
14 
15 #define arch_raw_cpu_ptr(_ptr)						\
16 ({									\
17 	unsigned long lc_percpu, tcp_ptr__;				\
18 									\
19 	tcp_ptr__ = (__force unsigned long)(_ptr);			\
20 	lc_percpu = offsetof(struct lowcore, percpu_offset);		\
21 	asm_inline volatile(						\
22 	ALTERNATIVE("ag		%[__ptr__],%[offzero](%%r0)\n",		\
23 		    "ag		%[__ptr__],%[offalt](%%r0)\n",		\
24 		    ALT_FEATURE(MFEATURE_LOWCORE))			\
25 	: [__ptr__] "+d" (tcp_ptr__)					\
26 	: [offzero] "i" (lc_percpu),					\
27 	  [offalt] "i" (lc_percpu + LOWCORE_ALT_ADDRESS),		\
28 	  "m" (((struct lowcore *)0)->percpu_offset)			\
29 	: "cc");							\
30 	(TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__;		\
31 })
32 
33 /*
34  * We use a compare-and-swap loop since that uses less cpu cycles than
35  * disabling and enabling interrupts like the generic variant would do.
36  */
37 #define arch_this_cpu_to_op_simple(pcp, val, op)			\
38 ({									\
39 	typedef typeof(pcp) pcp_op_T__;					\
40 	pcp_op_T__ old__, new__, prev__;				\
41 	pcp_op_T__ *ptr__;						\
42 	preempt_disable_notrace();					\
43 	ptr__ = raw_cpu_ptr(&(pcp));					\
44 	prev__ = READ_ONCE(*ptr__);					\
45 	do {								\
46 		old__ = prev__;						\
47 		new__ = old__ op (val);					\
48 		prev__ = cmpxchg(ptr__, old__, new__);			\
49 	} while (prev__ != old__);					\
50 	preempt_enable_notrace();					\
51 	new__;								\
52 })
53 
54 #define this_cpu_add_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
55 #define this_cpu_add_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
56 #define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
57 #define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
58 #define this_cpu_and_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
59 #define this_cpu_and_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
60 #define this_cpu_or_1(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
61 #define this_cpu_or_2(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
62 
63 /*
64  * Macros to be used for percpu code section based on atomic instructions.
65  *
66  * Avoid the need to use preempt_disable() / preempt_disable() pairs and the
67  * conditional preempt_schedule_notrace() function calls which come with
68  * this. The idea is that this_cpu operations based on atomic instructions are
69  * guarded with mviy instructions:
70  *
71  * - The first mviy instruction writes the register number, which contains the
72  *   percpu address variable to lowcore. This also indicates that a percpu
73  *   code section is executed.
74  *
75  * - The first mviy instruction following the mviy instruction must be the ag
76  *   instruction which adds the percpu offset to the percpu address register.
77  *
78  * - Afterwards the atomic percpu operation follows.
79  *
80  * - Then a second mviy instruction writes a zero to lowcore, which indicates
81  *   the end of the percpu code section.
82  *
83  * - In case of an interrupt/exception/nmi the register number which was
84  *   written to lowcore is copied to the exception frame (pt_regs), and a zero
85  *   is written to lowcore.
86  *
87  * - On return to the previous context it is checked if a percpu code section
88  *   was executed (saved register number not zero), and if the process was
89  *   migrated to a different cpu. If the percpu offset was already added to
90  *   the percpu address register (instruction address does _not_ point to the
91  *   ag instruction) the content of the percpu address register is adjusted so
92  *   it points to percpu variable of the new cpu.
93  *
94  * Inline assemblies making use of this typically have a code sequence like:
95  *
96  *   MVIY_PERCPU(...) <- start of percpu code section
97  *   AG_ALT(...)      <- add percpu offset; must be the second instruction
98  *   atomic_op	      <- atomic op
99  *   MVIY_ALT(...)    <- end of percpu code section
100  */
101 
102 #define MVIY_PERCPU(disp, dispalt, reg)						\
103 	".macro GEN_MVIY disp reg\n"						\
104 	".irp	rs,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"			\
105 	"	.ifc \\reg,%%r\\rs\n"						\
106 	"	mviy	\\disp(%%r0),\\rs\n"					\
107 	"	.endif\n"							\
108 	".endr\n"								\
109 	".endm\n"								\
110 	ALTERNATIVE("GEN_MVIY " __stringify(disp)    " " __stringify(reg) "\n",	\
111 		    "GEN_MVIY " __stringify(dispalt) " " __stringify(reg) "\n",	\
112 		    ALT_FEATURE(MFEATURE_LOWCORE))				\
113 	".purgem GEN_MVIY\n"
114 
115 #define MVIY_ALT(disp, dispalt)							\
116 	ALTERNATIVE("	mviy	" disp	  "(%%r0),0\n",				\
117 		    "	mviy	" dispalt "(%%r0),0\n",				\
118 		    ALT_FEATURE(MFEATURE_LOWCORE))
119 
120 #define AG_ALT(disp, dispalt, reg)						\
121 	ALTERNATIVE("	ag	" reg ", " disp	   "(%%r0)\n",			\
122 		    "	ag	" reg ", " dispalt "(%%r0)\n",			\
123 		    ALT_FEATURE(MFEATURE_LOWCORE))
124 
125 #ifndef MARCH_HAS_Z196_FEATURES
126 
127 #define this_cpu_add_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
128 #define this_cpu_add_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
129 #define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
130 #define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
131 #define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
132 #define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
133 #define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
134 #define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
135 
136 #else /* MARCH_HAS_Z196_FEATURES */
137 
138 #define arch_this_cpu_add(pcp, val, op1, op2, szcast)				\
139 do {										\
140 	unsigned long lc_pcpr, lc_pcpo;						\
141 	typedef typeof(pcp) pcp_op_T__;						\
142 	pcp_op_T__ val__ = (val);						\
143 	pcp_op_T__ old__, *ptr__;						\
144 										\
145 	lc_pcpr = offsetof(struct lowcore, percpu_register);			\
146 	lc_pcpo = offsetof(struct lowcore, percpu_offset);			\
147 	ptr__ = PERCPU_PTR(&(pcp));						\
148 	if (__builtin_constant_p(val__) &&					\
149 	    ((szcast)val__ > -129) && ((szcast)val__ < 128)) {			\
150 		asm volatile(							\
151 			MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
152 			AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
153 			op2 "   0(%[ptr__]),%[val__]\n"				\
154 			MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")		\
155 			: [ptr__] "+&a" (ptr__), "+m" (*ptr__),			\
156 			  "=m" (((struct lowcore *)0)->percpu_register)		\
157 			: [val__] "i" ((szcast)val__),				\
158 			  [disppcpr] "i" (lc_pcpr),				\
159 			  [disppcpo] "i" (lc_pcpo),				\
160 			  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
161 			  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
162 			  "m" (((struct lowcore *)0)->percpu_offset)		\
163 			: "cc");						\
164 	} else {								\
165 		asm volatile(							\
166 			MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
167 			AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
168 			op1 "   %[old__],%[val__],0(%[ptr__])\n"		\
169 			MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")		\
170 			: [old__] "=&d" (old__),				\
171 			  [ptr__] "+&a" (ptr__),  "+m" (*ptr__),		\
172 			  "=m" (((struct lowcore *)0)->percpu_register)		\
173 			: [val__] "d" (val__),					\
174 			  [disppcpr] "i" (lc_pcpr),				\
175 			  [disppcpo] "i" (lc_pcpo),				\
176 			  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
177 			  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
178 			  "m" (((struct lowcore *)0)->percpu_offset)		\
179 			: "cc");						\
180 	}									\
181 } while (0)
182 
183 #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
184 #define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
185 
186 #define arch_this_cpu_add_return(pcp, val, op)				\
187 ({									\
188 	unsigned long lc_pcpr, lc_pcpo;					\
189 	typedef typeof(pcp) pcp_op_T__; 				\
190 	pcp_op_T__ val__ = (val);					\
191 	pcp_op_T__ old__, *ptr__;					\
192 									\
193 	lc_pcpr = offsetof(struct lowcore, percpu_register);		\
194 	lc_pcpo = offsetof(struct lowcore, percpu_offset);		\
195 	ptr__ = PERCPU_PTR(&(pcp));					\
196 	asm_inline volatile(						\
197 		MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
198 		AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
199 		op "	%[old__],%[val__],0(%[ptr__])\n"		\
200 		MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")		\
201 		: [old__] "=&d" (old__),				\
202 		  [ptr__] "+&a" (ptr__), "+m" (*ptr__),			\
203 		  "=m" (((struct lowcore *)0)->percpu_register)		\
204 		: [val__] "d" (val__),					\
205 		  [disppcpr] "i" (lc_pcpr),				\
206 		  [disppcpo] "i" (lc_pcpo),				\
207 		  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
208 		  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
209 		  "m" (((struct lowcore *)0)->percpu_offset)		\
210 		: "cc");						\
211 	old__ + val__;							\
212 })
213 
214 #define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
215 #define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
216 
217 #define arch_this_cpu_to_op(pcp, val, op)				\
218 do {									\
219 	unsigned long lc_pcpr, lc_pcpo;					\
220 	typedef typeof(pcp) pcp_op_T__; 				\
221 	pcp_op_T__ val__ = (val);					\
222 	pcp_op_T__ old__, *ptr__;					\
223 									\
224 	lc_pcpr = offsetof(struct lowcore, percpu_register);		\
225 	lc_pcpo = offsetof(struct lowcore, percpu_offset);		\
226 	ptr__ = PERCPU_PTR(&(pcp));					\
227 	asm_inline volatile(						\
228 		MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
229 		AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
230 		op "    %[old__],%[val__],0(%[ptr__])\n"		\
231 		MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")		\
232 		: [old__] "=&d" (old__),				\
233 		  [ptr__] "+&a" (ptr__), "+m" (*ptr__),			\
234 		  "=m" (((struct lowcore *)0)->percpu_register)		\
235 		: [val__] "d" (val__),					\
236 		  [disppcpr] "i" (lc_pcpr),				\
237 		  [disppcpo] "i" (lc_pcpo),				\
238 		  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
239 		  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
240 		  "m" (((struct lowcore *)0)->percpu_offset)		\
241 		: "cc");						\
242 } while (0)
243 
244 #define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op(pcp, val, "lan")
245 #define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op(pcp, val, "lang")
246 #define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op(pcp, val, "lao")
247 #define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op(pcp, val, "laog")
248 
249 #endif /* MARCH_HAS_Z196_FEATURES */
250 
251 #define arch_this_cpu_read(pcp, op)					\
252 ({									\
253 	unsigned long lc_pcpr, lc_pcpo, res__;				\
254 	typedef typeof(pcp) pcp_op_T__;					\
255 	pcp_op_T__ *ptr__;						\
256 									\
257 	lc_pcpr = offsetof(struct lowcore, percpu_register);		\
258 	lc_pcpo = offsetof(struct lowcore, percpu_offset);		\
259 	ptr__ = PERCPU_PTR(&(pcp));					\
260 	asm_inline volatile(						\
261 		MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
262 		AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
263 		op "	%[res__],0(%[ptr__])\n"				\
264 		MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")		\
265 		: [res__] "=&d" (res__), [ptr__] "+&a" (ptr__),		\
266 		  "=m" (((struct lowcore *)0)->percpu_register)		\
267 		: [disppcpr] "i" (lc_pcpr),				\
268 		  [disppcpo] "i" (lc_pcpo),				\
269 		  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
270 		  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
271 		  "m" (*ptr__),						\
272 		  "m" (((struct lowcore *)0)->percpu_offset)		\
273 		: "cc");						\
274 	(pcp_op_T__)res__;						\
275 })
276 
277 #define this_cpu_read_1(pcp) arch_this_cpu_read(pcp, "llgc")
278 #define this_cpu_read_2(pcp) arch_this_cpu_read(pcp, "llgh")
279 #define this_cpu_read_4(pcp) arch_this_cpu_read(pcp, "llgf")
280 #define this_cpu_read_8(pcp) arch_this_cpu_read(pcp, "lg")
281 
282 #define arch_this_cpu_write(pcp, val, op)				\
283 do {									\
284 	unsigned long lc_pcpr, lc_pcpo;					\
285 	typedef typeof(pcp) pcp_op_T__;					\
286 	pcp_op_T__ *ptr__, val__ = (val);				\
287 									\
288 	lc_pcpr = offsetof(struct lowcore, percpu_register);		\
289 	lc_pcpo = offsetof(struct lowcore, percpu_offset);		\
290 	ptr__ = PERCPU_PTR(&(pcp));					\
291 	asm_inline volatile(						\
292 		MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
293 		AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
294 		op "    %[val__],0(%[ptr__])\n"				\
295 		MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]")		\
296 		: [ptr__] "+&a" (ptr__), "=m" (*ptr__),			\
297 		  "=m" (((struct lowcore *)0)->percpu_register)		\
298 		: [val__] "d" (val__),					\
299 		  [disppcpr] "i" (lc_pcpr),				\
300 		  [disppcpo] "i" (lc_pcpo),				\
301 		  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
302 		  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
303 		  "m" (((struct lowcore *)0)->percpu_offset)		\
304 		: "cc");						\
305 } while (0)
306 
307 #define this_cpu_write_1(pcp, val) arch_this_cpu_write(pcp, val, "stc")
308 #define this_cpu_write_2(pcp, val) arch_this_cpu_write(pcp, val, "sth")
309 #define this_cpu_write_4(pcp, val) arch_this_cpu_write(pcp, val, "st")
310 #define this_cpu_write_8(pcp, val) arch_this_cpu_write(pcp, val, "stg")
311 
312 #define arch_this_cpu_cmpxchg(pcp, oval, nval)				\
313 ({									\
314 	typedef typeof(pcp) pcp_op_T__;					\
315 	pcp_op_T__ ret__;						\
316 	pcp_op_T__ *ptr__;						\
317 	preempt_disable_notrace();					\
318 	ptr__ = raw_cpu_ptr(&(pcp));					\
319 	ret__ = cmpxchg(ptr__, oval, nval);				\
320 	preempt_enable_notrace();					\
321 	ret__;								\
322 })
323 
324 #define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
325 #define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
326 #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
327 #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
328 
329 #define this_cpu_cmpxchg64(pcp, o, n)	this_cpu_cmpxchg_8(pcp, o, n)
330 
331 #define this_cpu_cmpxchg128(pcp, oval, nval)				\
332 ({									\
333 	typedef typeof(pcp) pcp_op_T__;					\
334 	u128 old__, new__, ret__;					\
335 	pcp_op_T__ *ptr__;						\
336 	old__ = oval;							\
337 	new__ = nval;							\
338 	preempt_disable_notrace();					\
339 	ptr__ = raw_cpu_ptr(&(pcp));					\
340 	ret__ = cmpxchg128((void *)ptr__, old__, new__);		\
341 	preempt_enable_notrace();					\
342 	ret__;								\
343 })
344 
345 #define arch_this_cpu_xchg(pcp, nval)					\
346 ({									\
347 	typeof(pcp) *ptr__;						\
348 	typeof(pcp) ret__;						\
349 	preempt_disable_notrace();					\
350 	ptr__ = raw_cpu_ptr(&(pcp));					\
351 	ret__ = xchg(ptr__, nval);					\
352 	preempt_enable_notrace();					\
353 	ret__;								\
354 })
355 
356 #define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
357 #define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
358 #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
359 #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
360 
361 #include <asm-generic/percpu.h>
362 
363 #endif /* __ARCH_S390_PERCPU__ */
364