xref: /linux/arch/arm64/include/asm/fpsimd.h (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2012 ARM Ltd.
4  */
5 #ifndef __ASM_FP_H
6 #define __ASM_FP_H
7 
8 #include <asm/errno.h>
9 #include <asm/percpu.h>
10 #include <asm/ptrace.h>
11 #include <asm/processor.h>
12 #include <asm/sigcontext.h>
13 #include <asm/sysreg.h>
14 
15 #ifndef __ASSEMBLER__
16 
17 #include <linux/bitmap.h>
18 #include <linux/build_bug.h>
19 #include <linux/bug.h>
20 #include <linux/cache.h>
21 #include <linux/init.h>
22 #include <linux/stddef.h>
23 #include <linux/types.h>
24 
25 #define __FPSIMD_PREAMBLE	".arch_extension fp\n" \
26 				".arch_extension simd\n"
27 #define __SVE_PREAMBLE		".arch_extension sve\n"
28 #define __SME_PREAMBLE		".arch_extension sme\n"
29 
30 /* Masks for extracting the FPSR and FPCR from the FPSCR */
31 #define VFP_FPSCR_STAT_MASK	0xf800009f
32 #define VFP_FPSCR_CTRL_MASK	0x07f79f00
33 /*
34  * The VFP state has 32x64-bit registers and a single 32-bit
35  * control/status register.
36  */
37 #define VFP_STATE_SIZE		((32 * 8) + 4)
38 
39 static inline unsigned long cpacr_save_enable_kernel_sve(void)
40 {
41 	unsigned long old = read_sysreg(cpacr_el1);
42 	unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN;
43 
44 	write_sysreg(old | set, cpacr_el1);
45 	isb();
46 	return old;
47 }
48 
49 static inline unsigned long cpacr_save_enable_kernel_sme(void)
50 {
51 	unsigned long old = read_sysreg(cpacr_el1);
52 	unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN;
53 
54 	write_sysreg(old | set, cpacr_el1);
55 	isb();
56 	return old;
57 }
58 
59 static inline void cpacr_restore(unsigned long cpacr)
60 {
61 	write_sysreg(cpacr, cpacr_el1);
62 	isb();
63 }
64 
65 /*
66  * When we defined the maximum SVE vector length we defined the ABI so
67  * that the maximum vector length included all the reserved for future
68  * expansion bits in ZCR rather than those just currently defined by
69  * the architecture.  Using this length to allocate worst size buffers
70  * results in excessively large allocations, and this effect is even
71  * more pronounced for SME due to ZA.  Define more suitable VLs for
72  * these situations.
73  */
74 #define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1)
75 #define SME_VQ_MAX	((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1)
76 
77 struct task_struct;
78 
79 static inline void fpsimd_save_common(struct user_fpsimd_state *state)
80 {
81 	state->fpsr = read_sysreg_s(SYS_FPSR);
82 	state->fpcr = read_sysreg_s(SYS_FPCR);
83 }
84 
85 static inline void fpsimd_load_common(const struct user_fpsimd_state *state)
86 {
87 	write_sysreg_s(state->fpsr, SYS_FPSR);
88 	write_sysreg_s(state->fpcr, SYS_FPCR);
89 }
90 
91 static inline void fpsimd_save_vregs(struct user_fpsimd_state *state)
92 {
93 	instrument_write(state->vregs, sizeof(state->vregs));
94 	asm volatile(
95 	__FPSIMD_PREAMBLE
96 	"	stp	q0,  q1,  [%[vregs], #16 * 0]\n"
97 	"	stp	q2,  q3,  [%[vregs], #16 * 2]\n"
98 	"	stp	q4,  q5,  [%[vregs], #16 * 4]\n"
99 	"	stp	q6,  q7,  [%[vregs], #16 * 6]\n"
100 	"	stp	q8,  q9,  [%[vregs], #16 * 8]\n"
101 	"	stp	q10, q11, [%[vregs], #16 * 10]\n"
102 	"	stp	q12, q13, [%[vregs], #16 * 12]\n"
103 	"	stp	q14, q15, [%[vregs], #16 * 14]\n"
104 	"	stp	q16, q17, [%[vregs], #16 * 16]\n"
105 	"	stp	q18, q19, [%[vregs], #16 * 18]\n"
106 	"	stp	q20, q21, [%[vregs], #16 * 20]\n"
107 	"	stp	q22, q23, [%[vregs], #16 * 22]\n"
108 	"	stp	q24, q25, [%[vregs], #16 * 24]\n"
109 	"	stp	q26, q27, [%[vregs], #16 * 26]\n"
110 	"	stp	q28, q29, [%[vregs], #16 * 28]\n"
111 	"	stp	q30, q31, [%[vregs], #16 * 30]\n"
112 	: "=Q" (state->vregs)
113 	: [vregs] "r" (state->vregs)
114 	);
115 }
116 
117 static inline void fpsimd_load_vregs(const struct user_fpsimd_state *state)
118 {
119 	instrument_read(state->vregs, sizeof(state->vregs));
120 	asm volatile(
121 	__FPSIMD_PREAMBLE
122 	"	ldp	q0,  q1,  [%[vregs], #16 * 0]\n"
123 	"	ldp	q2,  q3,  [%[vregs], #16 * 2]\n"
124 	"	ldp	q4,  q5,  [%[vregs], #16 * 4]\n"
125 	"	ldp	q6,  q7,  [%[vregs], #16 * 6]\n"
126 	"	ldp	q8,  q9,  [%[vregs], #16 * 8]\n"
127 	"	ldp	q10, q11, [%[vregs], #16 * 10]\n"
128 	"	ldp	q12, q13, [%[vregs], #16 * 12]\n"
129 	"	ldp	q14, q15, [%[vregs], #16 * 14]\n"
130 	"	ldp	q16, q17, [%[vregs], #16 * 16]\n"
131 	"	ldp	q18, q19, [%[vregs], #16 * 18]\n"
132 	"	ldp	q20, q21, [%[vregs], #16 * 20]\n"
133 	"	ldp	q22, q23, [%[vregs], #16 * 22]\n"
134 	"	ldp	q24, q25, [%[vregs], #16 * 24]\n"
135 	"	ldp	q26, q27, [%[vregs], #16 * 26]\n"
136 	"	ldp	q28, q29, [%[vregs], #16 * 28]\n"
137 	"	ldp	q30, q31, [%[vregs], #16 * 30]\n"
138 	:
139 	: "Q" (state->vregs),
140 	  [vregs] "r" (state->vregs)
141 	);
142 }
143 
144 static inline void fpsimd_save_state(struct user_fpsimd_state *state)
145 {
146 	fpsimd_save_vregs(state);
147 	fpsimd_save_common(state);
148 }
149 
150 static inline void fpsimd_load_state(const struct user_fpsimd_state *state)
151 {
152 	fpsimd_load_vregs(state);
153 	fpsimd_load_common(state);
154 }
155 
156 extern void fpsimd_thread_switch(struct task_struct *next);
157 extern void fpsimd_flush_thread(void);
158 
159 extern void fpsimd_preserve_current_state(void);
160 extern void fpsimd_restore_current_state(void);
161 extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
162 
163 struct cpu_fp_state {
164 	struct user_fpsimd_state *st;
165 	struct arm64_sve_state *sve_state;
166 	struct arm64_sme_state *sme_state;
167 	u64 *svcr;
168 	u64 *fpmr;
169 	unsigned int sve_vl;
170 	unsigned int sme_vl;
171 	enum fp_type *fp_type;
172 	enum fp_type to_save;
173 };
174 
175 DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
176 
177 extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
178 
179 extern void fpsimd_flush_task_state(struct task_struct *target);
180 extern void fpsimd_save_and_flush_current_state(void);
181 extern void fpsimd_save_and_flush_cpu_state(void);
182 
183 static inline bool thread_sm_enabled(struct thread_struct *thread)
184 {
185 	return system_supports_sme() && (thread->svcr & SVCR_SM_MASK);
186 }
187 
188 static inline bool thread_za_enabled(struct thread_struct *thread)
189 {
190 	return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
191 }
192 
193 extern void task_smstop_sm(struct task_struct *task);
194 
195 /* Maximum VL that SVE/SME VL-agnostic software can transparently support */
196 #define VL_ARCH_MAX 0x100
197 
198 static inline void *thread_zt_state(struct thread_struct *thread)
199 {
200 	/* The ZT register state is stored immediately after the ZA state */
201 	unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread));
202 	return (void *)thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq);
203 }
204 
205 static inline unsigned int sve_get_vl(void)
206 {
207 	unsigned int vl;
208 
209 	asm volatile(
210 	__SVE_PREAMBLE
211 	"	rdvl %x[vl], #1\n"
212 	: [vl] "=r" (vl)
213 	);
214 
215 	return vl;
216 }
217 
218 #define FOR_EACH_Z_REG(idx_str, asm_str)											\
219 	"	.irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n"	\
220 	asm_str	"\n"														\
221 	"	.endr\n"
222 
223 #define FOR_EACH_P_REG(idx_str, asm_str)											\
224 	"	.irp " idx_str ",0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\n"	\
225 	asm_str	"\n"								\
226 	"	.endr\n"
227 
228 static inline void __sve_save_z(struct arm64_sve_state *state, unsigned long vl)
229 {
230 	instrument_write(state, SVE_NUM_ZREGS * vl);
231 	asm volatile(
232 	__SVE_PREAMBLE
233 	FOR_EACH_Z_REG("n", "str	z\\n, [%[zregs], #\\n, MUL VL]")
234 	:
235 	: [zregs] "r" (state)
236 	: "memory"
237 	);
238 }
239 
240 static inline void __sve_load_z(const struct arm64_sve_state *state, unsigned long vl)
241 {
242 	instrument_read(state, SVE_NUM_ZREGS * vl);
243 	asm volatile(
244 	__SVE_PREAMBLE
245 	FOR_EACH_Z_REG("n", "ldr	z\\n, [%[zregs], #\\n, MUL VL]")
246 	:
247 	: [zregs] "r" (state)
248 	: "memory"
249 	);
250 }
251 
252 static inline void __sve_save_p(struct arm64_sve_state *state, unsigned long vl, bool ffr)
253 {
254 	void *pregs = (void *)state + SVE_NUM_ZREGS * vl;
255 	unsigned long pl = vl / 8;
256 	void *pffr = pregs + SVE_NUM_PREGS * pl;
257 
258 	instrument_write(pregs, SVE_NUM_PREGS * pl);
259 	asm volatile(
260 	__SVE_PREAMBLE
261 	FOR_EACH_P_REG("n", "str	p\\n, [%[pregs], #\\n, MUL VL]\n")
262 	:
263 	: [pregs] "r" (pregs)
264 	: "memory"
265 	);
266 
267 	instrument_write(pffr, pl);
268 	if (ffr) {
269 		asm volatile(
270 		__SVE_PREAMBLE
271 		"	rdffr	p0.b\n"
272 		"	str	p0, [%[pffr]]\n"
273 		"	ldr	p0, [%[pregs]]\n"
274 		:
275 		: [pregs] "r" (pregs),
276 		  [pffr] "r" (pffr)
277 		: "memory"
278 		);
279 	} else {
280 		asm volatile(
281 		__SVE_PREAMBLE
282 		"	pfalse	p0.b\n"
283 		"	str	p0, [%[pffr]]\n"
284 		"	ldr	p0, [%[pregs]]\n"
285 		:
286 		: [pregs] "r" (pregs),
287 		  [pffr] "r" (pffr)
288 		: "memory"
289 		);
290 	}
291 }
292 
293 static inline void __sve_load_p(const struct arm64_sve_state *state, unsigned long vl, bool ffr)
294 {
295 	const void *pregs = (const void *)state + SVE_NUM_ZREGS * vl;
296 	unsigned long pl = vl / 8;
297 	const void *pffr = pregs + SVE_NUM_PREGS * pl;
298 
299 	if (ffr) {
300 		instrument_read(pffr, pl);
301 		asm volatile(
302 		__SVE_PREAMBLE
303 		"	ldr	p0, [%[pffr]]\n"
304 		"	wrffr	p0.b\n"
305 		:
306 		: [pffr] "r" (pffr)
307 		: "memory"
308 		);
309 	}
310 
311 	instrument_read(pregs, SVE_NUM_PREGS * pl);
312 	asm volatile(
313 	__SVE_PREAMBLE
314 	FOR_EACH_P_REG("n", "ldr	p\\n, [%[pregs], #\\n, MUL VL]\n")
315 	:
316 	: [pregs] "r" (pregs)
317 	: "memory"
318 	);
319 }
320 
321 static inline void sve_save_state(struct arm64_sve_state *state, bool ffr)
322 {
323 	unsigned long vl = sve_get_vl();
324 	__sve_save_z(state, vl);
325 	__sve_save_p(state, vl, ffr);
326 }
327 
328 static inline void sve_load_state(const struct arm64_sve_state *state, bool ffr)
329 {
330 	unsigned long vl = sve_get_vl();
331 	__sve_load_z(state, vl);
332 	__sve_load_p(state, vl, ffr);
333 }
334 
335 /*
336  * Zero all SVE registers except for the first 128 bits of each vector.
337  *
338  * The caller must ensure that the VL has been configured and the CPU must be
339  * in non-streaming mode.
340  */
341 static inline void sve_flush_live(void)
342 {
343 	unsigned long vl = sve_get_vl();
344 
345 	if (vl > sizeof(__uint128_t)) {
346 		asm volatile(
347 		__FPSIMD_PREAMBLE
348 		FOR_EACH_Z_REG("n", "mov	v\\n\\().16b, v\\n\\().16b")
349 		);
350 	}
351 
352 	asm volatile(
353 	__SVE_PREAMBLE
354 	FOR_EACH_P_REG("n", "pfalse	p\\n\\().b")
355 	"	wrffr	p0.b\n"
356 	);
357 }
358 
359 struct arm64_cpu_capabilities;
360 extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
361 extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
362 extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
363 extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
364 extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
365 extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
366 
367 /*
368  * Helpers to translate bit indices in sve_vq_map to VQ values (and
369  * vice versa).  This allows find_next_bit() to be used to find the
370  * _maximum_ VQ not exceeding a certain value.
371  */
372 static inline unsigned int __vq_to_bit(unsigned int vq)
373 {
374 	return SVE_VQ_MAX - vq;
375 }
376 
377 static inline unsigned int __bit_to_vq(unsigned int bit)
378 {
379 	return SVE_VQ_MAX - bit;
380 }
381 
382 
383 struct vl_info {
384 	enum vec_type type;
385 	const char *name;		/* For display purposes */
386 
387 	/* Minimum supported vector length across all CPUs */
388 	int min_vl;
389 
390 	/* Maximum supported vector length across all CPUs */
391 	int max_vl;
392 	int max_virtualisable_vl;
393 
394 	/*
395 	 * Set of available vector lengths,
396 	 * where length vq encoded as bit __vq_to_bit(vq):
397 	 */
398 	DECLARE_BITMAP(vq_map, SVE_VQ_MAX);
399 
400 	/* Set of vector lengths present on at least one cpu: */
401 	DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX);
402 };
403 
404 #ifdef CONFIG_ARM64_SVE
405 
406 extern void sve_alloc(struct task_struct *task, bool flush);
407 extern void fpsimd_release_task(struct task_struct *task);
408 extern void fpsimd_sync_from_effective_state(struct task_struct *task);
409 extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task);
410 
411 extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
412 				 unsigned long vl, unsigned long flags);
413 
414 extern int sve_set_current_vl(unsigned long arg);
415 extern int sve_get_current_vl(void);
416 
417 static inline void sve_user_disable(void)
418 {
419 	sysreg_clear_set(cpacr_el1, CPACR_EL1_ZEN_EL0EN, 0);
420 }
421 
422 static inline void sve_user_enable(void)
423 {
424 	sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN);
425 }
426 
427 #define sve_cond_update_zcr_vq(val, reg)		\
428 	do {						\
429 		u64 __zcr = read_sysreg_s((reg));	\
430 		u64 __new = __zcr & ~ZCR_ELx_LEN_MASK;	\
431 		__new |= (val) & ZCR_ELx_LEN_MASK;	\
432 		if (__zcr != __new)			\
433 			write_sysreg_s(__new, (reg));	\
434 	} while (0)
435 
436 /*
437  * Probing and setup functions.
438  * Calls to these functions must be serialised with one another.
439  */
440 enum vec_type;
441 
442 extern void __init vec_init_vq_map(enum vec_type type);
443 extern void vec_update_vq_map(enum vec_type type);
444 extern int vec_verify_vq_map(enum vec_type type);
445 extern void __init sve_setup(void);
446 
447 extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX];
448 
449 static inline void write_vl(enum vec_type type, u64 val)
450 {
451 	u64 tmp;
452 
453 	switch (type) {
454 #ifdef CONFIG_ARM64_SVE
455 	case ARM64_VEC_SVE:
456 		tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK;
457 		write_sysreg_s(tmp | val, SYS_ZCR_EL1);
458 		break;
459 #endif
460 #ifdef CONFIG_ARM64_SME
461 	case ARM64_VEC_SME:
462 		tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
463 		write_sysreg_s(tmp | val, SYS_SMCR_EL1);
464 		break;
465 #endif
466 	default:
467 		WARN_ON_ONCE(1);
468 		break;
469 	}
470 }
471 
472 static inline int vec_max_vl(enum vec_type type)
473 {
474 	return vl_info[type].max_vl;
475 }
476 
477 static inline int vec_max_virtualisable_vl(enum vec_type type)
478 {
479 	return vl_info[type].max_virtualisable_vl;
480 }
481 
482 static inline int sve_max_vl(void)
483 {
484 	return vec_max_vl(ARM64_VEC_SVE);
485 }
486 
487 static inline int sve_max_virtualisable_vl(void)
488 {
489 	return vec_max_virtualisable_vl(ARM64_VEC_SVE);
490 }
491 
492 /* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
493 static inline bool vq_available(enum vec_type type, unsigned int vq)
494 {
495 	return test_bit(__vq_to_bit(vq), vl_info[type].vq_map);
496 }
497 
498 static inline bool sve_vq_available(unsigned int vq)
499 {
500 	return vq_available(ARM64_VEC_SVE, vq);
501 }
502 
503 static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
504 {
505 	unsigned int vl = max(sve_vl, sme_vl);
506 	return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
507 }
508 
509 /*
510  * Return how many bytes of memory are required to store the full SVE
511  * state for task, given task's currently configured vector length.
512  */
513 static inline size_t sve_state_size(struct task_struct const *task)
514 {
515 	unsigned int sve_vl = task_get_sve_vl(task);
516 	unsigned int sme_vl = task_get_sme_vl(task);
517 	return __sve_state_size(sve_vl, sme_vl);
518 }
519 
520 #else /* ! CONFIG_ARM64_SVE */
521 
522 static inline void sve_alloc(struct task_struct *task, bool flush) { }
523 static inline void fpsimd_release_task(struct task_struct *task) { }
524 static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { }
525 static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { }
526 
527 static inline int sve_max_virtualisable_vl(void)
528 {
529 	return 0;
530 }
531 
532 static inline int sve_set_current_vl(unsigned long arg)
533 {
534 	return -EINVAL;
535 }
536 
537 static inline int sve_get_current_vl(void)
538 {
539 	return -EINVAL;
540 }
541 
542 static inline int sve_max_vl(void)
543 {
544 	return -EINVAL;
545 }
546 
547 static inline bool sve_vq_available(unsigned int vq) { return false; }
548 
549 static inline void sve_user_disable(void) { BUILD_BUG(); }
550 static inline void sve_user_enable(void) { BUILD_BUG(); }
551 
552 #define sve_cond_update_zcr_vq(val, reg) do { } while (0)
553 
554 static inline void vec_init_vq_map(enum vec_type t) { }
555 static inline void vec_update_vq_map(enum vec_type t) { }
556 static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
557 static inline void sve_setup(void) { }
558 
559 static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
560 {
561 	return 0;
562 }
563 
564 static inline size_t sve_state_size(struct task_struct const *task)
565 {
566 	return 0;
567 }
568 
569 #endif /* ! CONFIG_ARM64_SVE */
570 
571 #ifdef CONFIG_ARM64_SME
572 
573 static inline void sme_user_disable(void)
574 {
575 	sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
576 }
577 
578 static inline void sme_user_enable(void)
579 {
580 	sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
581 }
582 
583 static inline void sme_smstart_sm(void)
584 {
585 	asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
586 }
587 
588 static inline void sme_smstop_sm(void)
589 {
590 	asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
591 }
592 
593 static inline void sme_smstop(void)
594 {
595 	asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
596 }
597 
598 extern void __init sme_setup(void);
599 
600 static inline int sme_max_vl(void)
601 {
602 	return vec_max_vl(ARM64_VEC_SME);
603 }
604 
605 static inline int sme_max_virtualisable_vl(void)
606 {
607 	return vec_max_virtualisable_vl(ARM64_VEC_SME);
608 }
609 
610 static inline unsigned int sme_get_vl(void)
611 {
612 	unsigned int vl;
613 
614 	asm volatile(
615 	__SME_PREAMBLE
616 	"	rdsvl %x[vl], #1\n"
617 	: [vl] "=r" (vl)
618 	);
619 
620 	return vl;
621 }
622 
623 extern void sme_alloc(struct task_struct *task, bool flush);
624 extern int sme_set_current_vl(unsigned long arg);
625 extern int sme_get_current_vl(void);
626 extern void sme_suspend_exit(void);
627 
628 static inline size_t __sme_state_size(unsigned int sme_vl)
629 {
630 	size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl));
631 
632 	if (system_supports_sme2())
633 		size += ZT_SIG_REG_SIZE;
634 
635 	return size;
636 }
637 
638 static inline void __sme_save_za(struct arm64_sme_state *state, unsigned long svl)
639 {
640 	/*
641 	 * The <Wv> argument to LDR/STR (array vector) can only encode W12-W15.
642 	 * The "Ucj" constraint exists for this, but is only supported by GCC
643 	 * 14.1.0+ and LLVM 18.1.0+.
644 	 */
645 	register unsigned int v asm ("w12");
646 
647 	instrument_write(state, svl * svl);
648 	for (v = 0; v < svl; v++) {
649 		void *pav = (void *)state + v * svl;
650 
651 		asm volatile(
652 		__SME_PREAMBLE
653 		"	str	za[%w[v], #0], [%[pav]]\n"
654 		:
655 		: [v] "r" (v),
656 		  [pav] "r" (pav)
657 		: "memory"
658 		);
659 	}
660 }
661 
662 static inline void __sme_load_za(const struct arm64_sme_state *state, unsigned long svl)
663 {
664 	/* See comment in __sme_save_za */
665 	register unsigned int v asm ("w12");
666 
667 	instrument_read(state, svl * svl);
668 	for (v = 0; v < svl; v++) {
669 		void *pav = (void *)state + v * svl;
670 
671 		asm volatile(
672 		__SME_PREAMBLE
673 		"	ldr	za[%w[v], #0], [%[pav]]\n"
674 		:
675 		: [v] "r" (v),
676 		  [pav] "r" (pav)
677 		: "memory"
678 		);
679 	}
680 }
681 
682 static inline void __sme_save_zt(struct arm64_sme_state *state, unsigned long svl)
683 {
684 	void *pzt = (void *)state + svl * svl;
685 
686 	instrument_write(pzt, 64);
687 	asm volatile(
688 	__DEFINE_ASM_GPR_NUMS
689 	/*
690 	 * STR ZT0, [<Xn|SP>]
691 	 * Supported by binutils 2.41+.
692 	 * Supported by LLVM 16+
693 	 */
694 	"	.inst	0xe13f8000 | ((.L__gpr_num_%[pzt]) << 5)\n"
695 	:
696 	: [pzt] "r" (pzt)
697 	: "memory"
698 	);
699 }
700 
701 static inline void __sme_load_zt(const struct arm64_sme_state *state, unsigned long svl)
702 {
703 	void *pzt = (void *)state + svl * svl;
704 
705 	instrument_read(pzt, 64);
706 	asm volatile(
707 	__DEFINE_ASM_GPR_NUMS
708 	/*
709 	 * LDR ZT0, [<Xn|SP>]
710 	 * Supported by binutils 2.41+.
711 	 * Supported by LLVM 16+
712 	 */
713 	"	.inst	0xe11f8000 | ((.L__gpr_num_%[pzt]) << 5)\n"
714 	:
715 	: [pzt] "r" (pzt)
716 	: "memory"
717 	);
718 }
719 
720 static inline void sme_save_state(struct arm64_sme_state *state, bool zt)
721 {
722 	unsigned long svl = sme_get_vl();
723 
724 	__sme_save_za(state, svl);
725 	if (zt)
726 		__sme_save_zt(state, svl);
727 }
728 
729 static inline void sme_load_state(const struct arm64_sme_state *state, bool zt)
730 {
731 	unsigned long svl = sme_get_vl();
732 
733 	__sme_load_za(state, svl);
734 	if (zt)
735 		__sme_load_zt(state, svl);
736 }
737 
738 /*
739  * Return how many bytes of memory are required to store the full SME
740  * specific state for task, given task's currently configured vector
741  * length.
742  */
743 static inline size_t sme_state_size(struct task_struct const *task)
744 {
745 	return __sme_state_size(task_get_sme_vl(task));
746 }
747 
748 void sme_enable_dvmsync(void);
749 void sme_set_active(void);
750 void sme_clear_active(void);
751 
752 static inline void sme_enter_from_user_mode(void)
753 {
754 	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
755 	    test_thread_flag(TIF_SME))
756 		sme_clear_active();
757 }
758 
759 static inline void sme_exit_to_user_mode(void)
760 {
761 	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
762 	    test_thread_flag(TIF_SME))
763 		sme_set_active();
764 }
765 
766 #else
767 
768 static inline void sme_user_disable(void) { BUILD_BUG(); }
769 static inline void sme_user_enable(void) { BUILD_BUG(); }
770 
771 static inline void sme_smstart_sm(void) { }
772 static inline void sme_smstop_sm(void) { }
773 static inline void sme_smstop(void) { }
774 
775 static inline void sme_alloc(struct task_struct *task, bool flush) { }
776 static inline void sme_setup(void) { }
777 static inline unsigned int sme_get_vl(void) { return 0; }
778 static inline int sme_max_vl(void) { return 0; }
779 static inline int sme_max_virtualisable_vl(void) { return 0; }
780 static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
781 static inline int sme_get_current_vl(void) { return -EINVAL; }
782 static inline void sme_suspend_exit(void) { }
783 
784 static inline size_t __sme_state_size(unsigned int sme_vl)
785 {
786 	return 0;
787 }
788 
789 static inline size_t sme_state_size(struct task_struct const *task)
790 {
791 	return 0;
792 }
793 
794 static inline void sme_save_state(struct arm64_sme_state *state, bool zt) { BUILD_BUG(); }
795 static inline void sme_load_state(const struct arm64_sme_state *state, bool zt) { BUILD_BUG(); }
796 
797 static inline void sme_enter_from_user_mode(void) { }
798 static inline void sme_exit_to_user_mode(void) { }
799 
800 #endif /* ! CONFIG_ARM64_SME */
801 
802 /* For use by EFI runtime services calls only */
803 extern void __efi_fpsimd_begin(void);
804 extern void __efi_fpsimd_end(void);
805 
806 #endif
807 
808 #endif
809