xref: /linux/arch/arm64/include/asm/fpsimdmacros.h (revision f217d94fc632fece2a41030c2eebc4ed34a48b2a)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * FP/SIMD state saving and restoring macros
4  *
5  * Copyright (C) 2012 ARM Ltd.
6  * Author: Catalin Marinas <catalin.marinas@arm.com>
7  */
8 
9 #include <asm/assembler.h>
10 
11 .macro fpsimd_save state, tmpnr
12 	stp	q0, q1, [\state, #16 * 0]
13 	stp	q2, q3, [\state, #16 * 2]
14 	stp	q4, q5, [\state, #16 * 4]
15 	stp	q6, q7, [\state, #16 * 6]
16 	stp	q8, q9, [\state, #16 * 8]
17 	stp	q10, q11, [\state, #16 * 10]
18 	stp	q12, q13, [\state, #16 * 12]
19 	stp	q14, q15, [\state, #16 * 14]
20 	stp	q16, q17, [\state, #16 * 16]
21 	stp	q18, q19, [\state, #16 * 18]
22 	stp	q20, q21, [\state, #16 * 20]
23 	stp	q22, q23, [\state, #16 * 22]
24 	stp	q24, q25, [\state, #16 * 24]
25 	stp	q26, q27, [\state, #16 * 26]
26 	stp	q28, q29, [\state, #16 * 28]
27 	stp	q30, q31, [\state, #16 * 30]!
28 	mrs	x\tmpnr, fpsr
29 	str	w\tmpnr, [\state, #16 * 2]
30 	mrs	x\tmpnr, fpcr
31 	str	w\tmpnr, [\state, #16 * 2 + 4]
32 .endm
33 
34 .macro fpsimd_restore_fpcr state, tmp
35 	/*
36 	 * Writes to fpcr may be self-synchronising, so avoid restoring
37 	 * the register if it hasn't changed.
38 	 */
39 	mrs	\tmp, fpcr
40 	cmp	\tmp, \state
41 	b.eq	9999f
42 	msr	fpcr, \state
43 9999:
44 .endm
45 
46 /* Clobbers \state */
47 .macro fpsimd_restore state, tmpnr
48 	ldp	q0, q1, [\state, #16 * 0]
49 	ldp	q2, q3, [\state, #16 * 2]
50 	ldp	q4, q5, [\state, #16 * 4]
51 	ldp	q6, q7, [\state, #16 * 6]
52 	ldp	q8, q9, [\state, #16 * 8]
53 	ldp	q10, q11, [\state, #16 * 10]
54 	ldp	q12, q13, [\state, #16 * 12]
55 	ldp	q14, q15, [\state, #16 * 14]
56 	ldp	q16, q17, [\state, #16 * 16]
57 	ldp	q18, q19, [\state, #16 * 18]
58 	ldp	q20, q21, [\state, #16 * 20]
59 	ldp	q22, q23, [\state, #16 * 22]
60 	ldp	q24, q25, [\state, #16 * 24]
61 	ldp	q26, q27, [\state, #16 * 26]
62 	ldp	q28, q29, [\state, #16 * 28]
63 	ldp	q30, q31, [\state, #16 * 30]!
64 	ldr	w\tmpnr, [\state, #16 * 2]
65 	msr	fpsr, x\tmpnr
66 	ldr	w\tmpnr, [\state, #16 * 2 + 4]
67 	fpsimd_restore_fpcr x\tmpnr, \state
68 .endm
69 
70 /* Sanity-check macros to help avoid encoding garbage instructions */
71 
72 .macro _check_general_reg nr
73 	.if (\nr) < 0 || (\nr) > 30
74 		.error "Bad register number \nr."
75 	.endif
76 .endm
77 
78 .macro _sve_check_zreg znr
79 	.if (\znr) < 0 || (\znr) > 31
80 		.error "Bad Scalable Vector Extension vector register number \znr."
81 	.endif
82 .endm
83 
84 .macro _sve_check_preg pnr
85 	.if (\pnr) < 0 || (\pnr) > 15
86 		.error "Bad Scalable Vector Extension predicate register number \pnr."
87 	.endif
88 .endm
89 
90 .macro _check_num n, min, max
91 	.if (\n) < (\min) || (\n) > (\max)
92 		.error "Number \n out of range [\min,\max]"
93 	.endif
94 .endm
95 
96 .macro _sme_check_wv v
97 	.if (\v) < 12 || (\v) > 15
98 		.error "Bad vector select register \v."
99 	.endif
100 .endm
101 
102 /* SVE instruction encodings for non-SVE-capable assemblers */
103 /* (pre binutils 2.28, all kernel capable clang versions support SVE) */
104 
105 /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
106 .macro _sve_str_v nz, nxbase, offset=0
107 	_sve_check_zreg \nz
108 	_check_general_reg \nxbase
109 	_check_num (\offset), -0x100, 0xff
110 	.inst	0xe5804000			\
111 		| (\nz)				\
112 		| ((\nxbase) << 5)		\
113 		| (((\offset) & 7) << 10)	\
114 		| (((\offset) & 0x1f8) << 13)
115 .endm
116 
117 /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
118 .macro _sve_ldr_v nz, nxbase, offset=0
119 	_sve_check_zreg \nz
120 	_check_general_reg \nxbase
121 	_check_num (\offset), -0x100, 0xff
122 	.inst	0x85804000			\
123 		| (\nz)				\
124 		| ((\nxbase) << 5)		\
125 		| (((\offset) & 7) << 10)	\
126 		| (((\offset) & 0x1f8) << 13)
127 .endm
128 
129 /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
130 .macro _sve_str_p np, nxbase, offset=0
131 	_sve_check_preg \np
132 	_check_general_reg \nxbase
133 	_check_num (\offset), -0x100, 0xff
134 	.inst	0xe5800000			\
135 		| (\np)				\
136 		| ((\nxbase) << 5)		\
137 		| (((\offset) & 7) << 10)	\
138 		| (((\offset) & 0x1f8) << 13)
139 .endm
140 
141 /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
142 .macro _sve_ldr_p np, nxbase, offset=0
143 	_sve_check_preg \np
144 	_check_general_reg \nxbase
145 	_check_num (\offset), -0x100, 0xff
146 	.inst	0x85800000			\
147 		| (\np)				\
148 		| ((\nxbase) << 5)		\
149 		| (((\offset) & 7) << 10)	\
150 		| (((\offset) & 0x1f8) << 13)
151 .endm
152 
153 /* RDVL X\nx, #\imm */
154 .macro _sve_rdvl nx, imm
155 	_check_general_reg \nx
156 	_check_num (\imm), -0x20, 0x1f
157 	.inst	0x04bf5000			\
158 		| (\nx)				\
159 		| (((\imm) & 0x3f) << 5)
160 .endm
161 
162 /* RDFFR (unpredicated): RDFFR P\np.B */
163 .macro _sve_rdffr np
164 	_sve_check_preg \np
165 	.inst	0x2519f000			\
166 		| (\np)
167 .endm
168 
169 /* WRFFR P\np.B */
170 .macro _sve_wrffr np
171 	_sve_check_preg \np
172 	.inst	0x25289000			\
173 		| ((\np) << 5)
174 .endm
175 
176 /* PFALSE P\np.B */
177 .macro _sve_pfalse np
178 	_sve_check_preg \np
179 	.inst	0x2518e400			\
180 		| (\np)
181 .endm
182 
183 /* SME instruction encodings for non-SME-capable assemblers */
184 /* (pre binutils 2.38/LLVM 13) */
185 
186 /* RDSVL X\nx, #\imm */
187 .macro _sme_rdsvl nx, imm
188 	_check_general_reg \nx
189 	_check_num (\imm), -0x20, 0x1f
190 	.inst	0x04bf5800			\
191 		| (\nx)				\
192 		| (((\imm) & 0x3f) << 5)
193 .endm
194 
195 /*
196  * STR (vector from ZA array):
197  *	STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
198  */
199 .macro _sme_str_zav nw, nxbase, offset=0
200 	_sme_check_wv \nw
201 	_check_general_reg \nxbase
202 	_check_num (\offset), -0x100, 0xff
203 	.inst	0xe1200000			\
204 		| (((\nw) & 3) << 13)		\
205 		| ((\nxbase) << 5)		\
206 		| ((\offset) & 7)
207 .endm
208 
209 /*
210  * LDR (vector to ZA array):
211  *	LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
212  */
213 .macro _sme_ldr_zav nw, nxbase, offset=0
214 	_sme_check_wv \nw
215 	_check_general_reg \nxbase
216 	_check_num (\offset), -0x100, 0xff
217 	.inst	0xe1000000			\
218 		| (((\nw) & 3) << 13)		\
219 		| ((\nxbase) << 5)		\
220 		| ((\offset) & 7)
221 .endm
222 
223 /*
224  * LDR (ZT0)
225  *
226  *	LDR ZT0, nx
227  */
228 .macro _ldr_zt nx
229 	_check_general_reg \nx
230 	.inst	0xe11f8000	\
231 		 | (\nx << 5)
232 .endm
233 
234 /*
235  * STR (ZT0)
236  *
237  *	STR ZT0, nx
238  */
239 .macro _str_zt nx
240 	_check_general_reg \nx
241 	.inst	0xe13f8000		\
242 		| (\nx << 5)
243 .endm
244 
245 /*
246  * Zero the entire ZA array
247  *	ZERO ZA
248  */
249 .macro zero_za
250 	.inst 0xc00800ff
251 .endm
252 
253 .macro __for from:req, to:req
254 	.if (\from) == (\to)
255 		_for__body %\from
256 	.else
257 		__for %\from, %((\from) + ((\to) - (\from)) / 2)
258 		__for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
259 	.endif
260 .endm
261 
262 .macro _for var:req, from:req, to:req, insn:vararg
263 	.macro _for__body \var:req
264 		.noaltmacro
265 		\insn
266 		.altmacro
267 	.endm
268 
269 	.altmacro
270 	__for \from, \to
271 	.noaltmacro
272 
273 	.purgem _for__body
274 .endm
275 
276 /* Update ZCR_EL1.LEN with the new VQ */
277 .macro sve_load_vq xvqminus1, xtmp, xtmp2
278 		mrs_s		\xtmp, SYS_ZCR_EL1
279 		bic		\xtmp2, \xtmp, ZCR_ELx_LEN_MASK
280 		orr		\xtmp2, \xtmp2, \xvqminus1
281 		cmp		\xtmp2, \xtmp
282 		b.eq		921f
283 		msr_s		SYS_ZCR_EL1, \xtmp2	//self-synchronising
284 921:
285 .endm
286 
287 /* Update SMCR_EL1.LEN with the new VQ */
288 .macro sme_load_vq xvqminus1, xtmp, xtmp2
289 		mrs_s		\xtmp, SYS_SMCR_EL1
290 		bic		\xtmp2, \xtmp, SMCR_ELx_LEN_MASK
291 		orr		\xtmp2, \xtmp2, \xvqminus1
292 		cmp		\xtmp2, \xtmp
293 		b.eq		921f
294 		msr_s		SYS_SMCR_EL1, \xtmp2	//self-synchronising
295 921:
296 .endm
297 
298 /* Preserve the first 128-bits of Znz and zero the rest. */
299 .macro _sve_flush_z nz
300 	_sve_check_zreg \nz
301 	mov	v\nz\().16b, v\nz\().16b
302 .endm
303 
304 .macro sve_flush_z
305  _for n, 0, 31, _sve_flush_z	\n
306 .endm
307 .macro sve_flush_p
308  _for n, 0, 15, _sve_pfalse	\n
309 .endm
310 .macro sve_flush_ffr
311 		_sve_wrffr	0
312 .endm
313 
314 .macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
315  _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
316  _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
317 		cbz		\save_ffr, 921f
318 		_sve_rdffr	0
319 		_sve_str_p	0, \nxbase
320 		_sve_ldr_p	0, \nxbase, -16
321 		b		922f
322 921:
323 		str		xzr, [x\nxbase]		// Zero out FFR
324 922:
325 		mrs		x\nxtmp, fpsr
326 		str		w\nxtmp, [\xpfpsr]
327 		mrs		x\nxtmp, fpcr
328 		str		w\nxtmp, [\xpfpsr, #4]
329 .endm
330 
331 .macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
332  _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
333 		cbz		\restore_ffr, 921f
334 		_sve_ldr_p	0, \nxbase
335 		_sve_wrffr	0
336 921:
337  _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
338 
339 		ldr		w\nxtmp, [\xpfpsr]
340 		msr		fpsr, x\nxtmp
341 		ldr		w\nxtmp, [\xpfpsr, #4]
342 		msr		fpcr, x\nxtmp
343 .endm
344 
345 .macro sme_save_za nxbase, xvl, nw
346 	mov	w\nw, #0
347 
348 423:
349 	_sme_str_zav \nw, \nxbase
350 	add	x\nxbase, x\nxbase, \xvl
351 	add	x\nw, x\nw, #1
352 	cmp	\xvl, x\nw
353 	bne	423b
354 .endm
355 
356 .macro sme_load_za nxbase, xvl, nw
357 	mov	w\nw, #0
358 
359 423:
360 	_sme_ldr_zav \nw, \nxbase
361 	add	x\nxbase, x\nxbase, \xvl
362 	add	x\nw, x\nw, #1
363 	cmp	\xvl, x\nw
364 	bne	423b
365 .endm
366