xref: /freebsd/sys/arm64/arm64/vfp.c (revision 9ff643a8da476c38b29c071d00805d52b851ee03)
1 /*-
2  * Copyright (c) 2015-2016 The FreeBSD Foundation
3  *
4  * This software was developed by Andrew Turner under
5  * sponsorship from the FreeBSD Foundation.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #ifdef VFP
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/elf.h>
34 #include <sys/eventhandler.h>
35 #include <sys/limits.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/pcpu.h>
39 #include <sys/proc.h>
40 #include <sys/reg.h>
41 #include <sys/smp.h>
42 
43 #include <vm/uma.h>
44 
45 #include <machine/armreg.h>
46 #include <machine/md_var.h>
47 #include <machine/pcb.h>
48 #include <machine/vfp.h>
49 
50 /* Sanity check we can store all the VFP registers */
51 CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32);
52 
53 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
54     "Kernel contexts for VFP state");
55 
56 struct fpu_kern_ctx {
57 	struct vfpstate	*prev;
58 #define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
59 #define	FPU_KERN_CTX_INUSE	0x02
60 	uint32_t	 flags;
61 	struct vfpstate	 state;
62 };
63 
64 static uma_zone_t fpu_save_area_zone;
65 static struct vfpstate *fpu_initialstate;
66 
67 static u_int sve_max_vector_len;
68 
69 static size_t
_sve_buf_size(u_int sve_len)70 _sve_buf_size(u_int sve_len)
71 {
72 	size_t len;
73 
74 	/* 32 vector registers */
75 	len = (size_t)sve_len * 32;
76 	/*
77 	 * 16 predicate registers and the fault fault register, each 1/8th
78 	 * the size of a vector register.
79 	 */
80 	len += ((size_t)sve_len * 17) / 8;
81 	/*
82 	 * FPSR and FPCR
83 	 */
84 	len += sizeof(uint64_t) * 2;
85 
86 	return (len);
87 }
88 
89 size_t
sve_max_buf_size(void)90 sve_max_buf_size(void)
91 {
92 	MPASS(sve_max_vector_len > 0);
93 	return (_sve_buf_size(sve_max_vector_len));
94 }
95 
96 size_t
sve_buf_size(struct thread * td)97 sve_buf_size(struct thread *td)
98 {
99 	struct pcb *pcb;
100 
101 	pcb = td->td_pcb;
102 	MPASS(pcb->pcb_svesaved != NULL);
103 	MPASS(pcb->pcb_sve_len > 0);
104 
105 	return (_sve_buf_size(pcb->pcb_sve_len));
106 }
107 
108 static void *
sve_alloc(void)109 sve_alloc(void)
110 {
111 	void *buf;
112 
113 	buf = malloc(sve_max_buf_size(), M_FPUKERN_CTX, M_WAITOK | M_ZERO);
114 
115 	return (buf);
116 }
117 
118 static void
sve_free(void * buf)119 sve_free(void *buf)
120 {
121 	free(buf, M_FPUKERN_CTX);
122 }
123 
124 void
vfp_enable(void)125 vfp_enable(void)
126 {
127 	uint32_t cpacr;
128 
129 	cpacr = READ_SPECIALREG(cpacr_el1);
130 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
131 	WRITE_SPECIALREG(cpacr_el1, cpacr);
132 	isb();
133 }
134 
135 static void
sve_enable(void)136 sve_enable(void)
137 {
138 	uint32_t cpacr;
139 
140 	cpacr = READ_SPECIALREG(cpacr_el1);
141 	/* Enable FP */
142 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
143 	/* Enable SVE */
144 	cpacr = (cpacr & ~CPACR_ZEN_MASK) | CPACR_ZEN_TRAP_NONE;
145 	WRITE_SPECIALREG(cpacr_el1, cpacr);
146 	isb();
147 }
148 
149 void
vfp_disable(void)150 vfp_disable(void)
151 {
152 	uint32_t cpacr;
153 
154 	cpacr = READ_SPECIALREG(cpacr_el1);
155 	/* Disable FP */
156 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1;
157 	/* Disable SVE */
158 	cpacr = (cpacr & ~CPACR_ZEN_MASK) | CPACR_ZEN_TRAP_ALL1;
159 	WRITE_SPECIALREG(cpacr_el1, cpacr);
160 	isb();
161 }
162 
163 /*
164  * Called when the thread is dying or when discarding the kernel VFP state.
165  * If the thread was the last to use the VFP unit mark it as unused to tell
166  * the kernel the fp state is unowned. Ensure the VFP unit is off so we get
167  * an exception on the next access.
168  */
169 void
vfp_discard(struct thread * td)170 vfp_discard(struct thread *td)
171 {
172 
173 #ifdef INVARIANTS
174 	if (td != NULL)
175 		CRITICAL_ASSERT(td);
176 #endif
177 	if (PCPU_GET(fpcurthread) == td)
178 		PCPU_SET(fpcurthread, NULL);
179 
180 	vfp_disable();
181 }
182 
183 void
vfp_store(struct vfpstate * state)184 vfp_store(struct vfpstate *state)
185 {
186 	__uint128_t *vfp_state;
187 	uint64_t fpcr, fpsr;
188 
189 	vfp_state = state->vfp_regs;
190 	__asm __volatile(
191 	    ".arch_extension fp\n"
192 	    "mrs	%0, fpcr		\n"
193 	    "mrs	%1, fpsr		\n"
194 	    "stp	q0,  q1,  [%2, #16 *  0]\n"
195 	    "stp	q2,  q3,  [%2, #16 *  2]\n"
196 	    "stp	q4,  q5,  [%2, #16 *  4]\n"
197 	    "stp	q6,  q7,  [%2, #16 *  6]\n"
198 	    "stp	q8,  q9,  [%2, #16 *  8]\n"
199 	    "stp	q10, q11, [%2, #16 * 10]\n"
200 	    "stp	q12, q13, [%2, #16 * 12]\n"
201 	    "stp	q14, q15, [%2, #16 * 14]\n"
202 	    "stp	q16, q17, [%2, #16 * 16]\n"
203 	    "stp	q18, q19, [%2, #16 * 18]\n"
204 	    "stp	q20, q21, [%2, #16 * 20]\n"
205 	    "stp	q22, q23, [%2, #16 * 22]\n"
206 	    "stp	q24, q25, [%2, #16 * 24]\n"
207 	    "stp	q26, q27, [%2, #16 * 26]\n"
208 	    "stp	q28, q29, [%2, #16 * 28]\n"
209 	    "stp	q30, q31, [%2, #16 * 30]\n"
210 	    ".arch_extension nofp\n"
211 	    : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
212 
213 	state->vfp_fpcr = fpcr;
214 	state->vfp_fpsr = fpsr;
215 }
216 
217 void
vfp_restore(struct vfpstate * state)218 vfp_restore(struct vfpstate *state)
219 {
220 	__uint128_t *vfp_state;
221 	uint64_t fpcr, fpsr;
222 
223 	vfp_state = state->vfp_regs;
224 	fpcr = state->vfp_fpcr;
225 	fpsr = state->vfp_fpsr;
226 
227 	__asm __volatile(
228 	    ".arch_extension fp\n"
229 	    "ldp	q0,  q1,  [%2, #16 *  0]\n"
230 	    "ldp	q2,  q3,  [%2, #16 *  2]\n"
231 	    "ldp	q4,  q5,  [%2, #16 *  4]\n"
232 	    "ldp	q6,  q7,  [%2, #16 *  6]\n"
233 	    "ldp	q8,  q9,  [%2, #16 *  8]\n"
234 	    "ldp	q10, q11, [%2, #16 * 10]\n"
235 	    "ldp	q12, q13, [%2, #16 * 12]\n"
236 	    "ldp	q14, q15, [%2, #16 * 14]\n"
237 	    "ldp	q16, q17, [%2, #16 * 16]\n"
238 	    "ldp	q18, q19, [%2, #16 * 18]\n"
239 	    "ldp	q20, q21, [%2, #16 * 20]\n"
240 	    "ldp	q22, q23, [%2, #16 * 22]\n"
241 	    "ldp	q24, q25, [%2, #16 * 24]\n"
242 	    "ldp	q26, q27, [%2, #16 * 26]\n"
243 	    "ldp	q28, q29, [%2, #16 * 28]\n"
244 	    "ldp	q30, q31, [%2, #16 * 30]\n"
245 	    "msr	fpcr, %0		\n"
246 	    "msr	fpsr, %1		\n"
247 	    ".arch_extension nofp\n"
248 	    : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
249 }
250 
251 static void
sve_store(void * state,u_int sve_len)252 sve_store(void *state, u_int sve_len)
253 {
254 	vm_offset_t f_start, p_start, z_start;
255 	uint64_t fpcr, fpsr;
256 
257 	/*
258 	 * Calculate the start of each register groups. There are three
259 	 * groups depending on size, with the First Fault Register (FFR)
260 	 * stored with the predicate registers as we use one of them to
261 	 * temporarily hold it.
262 	 *
263 	 *                 +-------------------------+-------------------+
264 	 *                 | Contents                | Register size     |
265 	 *      z_start -> +-------------------------+-------------------+
266 	 *                 |                         |                   |
267 	 *                 | 32 Z regs               | sve_len           |
268 	 *                 |                         |                   |
269 	 *      p_start -> +-------------------------+-------------------+
270 	 *                 |                         |                   |
271 	 *                 | 16 Predicate registers  | 1/8 size of Z reg |
272 	 *                 |  1 First Fault register |                   |
273 	 *                 |                         |                   |
274 	 *      f_start -> +-------------------------+-------------------+
275 	 *                 |                         |                   |
276 	 *                 | FPSR/FPCR               | 32 bit            |
277 	 *                 |                         |                   |
278 	 *                 +-------------------------+-------------------+
279 	 */
280 	z_start = (vm_offset_t)state;
281 	p_start = z_start + sve_len * 32;
282 	f_start = p_start + (sve_len / 8) * 17;
283 
284 	__asm __volatile(
285 	    ".arch_extension sve				\n"
286 	    "str	z0, [%0, #0, MUL VL]			\n"
287 	    "str	z1, [%0, #1, MUL VL]			\n"
288 	    "str	z2, [%0, #2, MUL VL]			\n"
289 	    "str	z3, [%0, #3, MUL VL]			\n"
290 	    "str	z4, [%0, #4, MUL VL]			\n"
291 	    "str	z5, [%0, #5, MUL VL]			\n"
292 	    "str	z6, [%0, #6, MUL VL]			\n"
293 	    "str	z7, [%0, #7, MUL VL]			\n"
294 	    "str	z8, [%0, #8, MUL VL]			\n"
295 	    "str	z9, [%0, #9, MUL VL]			\n"
296 	    "str	z10, [%0, #10, MUL VL]			\n"
297 	    "str	z11, [%0, #11, MUL VL]			\n"
298 	    "str	z12, [%0, #12, MUL VL]			\n"
299 	    "str	z13, [%0, #13, MUL VL]			\n"
300 	    "str	z14, [%0, #14, MUL VL]			\n"
301 	    "str	z15, [%0, #15, MUL VL]			\n"
302 	    "str	z16, [%0, #16, MUL VL]			\n"
303 	    "str	z17, [%0, #17, MUL VL]			\n"
304 	    "str	z18, [%0, #18, MUL VL]			\n"
305 	    "str	z19, [%0, #19, MUL VL]			\n"
306 	    "str	z20, [%0, #20, MUL VL]			\n"
307 	    "str	z21, [%0, #21, MUL VL]			\n"
308 	    "str	z22, [%0, #22, MUL VL]			\n"
309 	    "str	z23, [%0, #23, MUL VL]			\n"
310 	    "str	z24, [%0, #24, MUL VL]			\n"
311 	    "str	z25, [%0, #25, MUL VL]			\n"
312 	    "str	z26, [%0, #26, MUL VL]			\n"
313 	    "str	z27, [%0, #27, MUL VL]			\n"
314 	    "str	z28, [%0, #28, MUL VL]			\n"
315 	    "str	z29, [%0, #29, MUL VL]			\n"
316 	    "str	z30, [%0, #30, MUL VL]			\n"
317 	    "str	z31, [%0, #31, MUL VL]			\n"
318 	    /* Store the predicate registers */
319 	    "str	p0, [%1, #0, MUL VL]			\n"
320 	    "str	p1, [%1, #1, MUL VL]			\n"
321 	    "str	p2, [%1, #2, MUL VL]			\n"
322 	    "str	p3, [%1, #3, MUL VL]			\n"
323 	    "str	p4, [%1, #4, MUL VL]			\n"
324 	    "str	p5, [%1, #5, MUL VL]			\n"
325 	    "str	p6, [%1, #6, MUL VL]			\n"
326 	    "str	p7, [%1, #7, MUL VL]			\n"
327 	    "str	p8, [%1, #8, MUL VL]			\n"
328 	    "str	p9, [%1, #9, MUL VL]			\n"
329 	    "str	p10, [%1, #10, MUL VL]			\n"
330 	    "str	p11, [%1, #11, MUL VL]			\n"
331 	    "str	p12, [%1, #12, MUL VL]			\n"
332 	    "str	p13, [%1, #13, MUL VL]			\n"
333 	    "str	p14, [%1, #14, MUL VL]			\n"
334 	    "str	p15, [%1, #15, MUL VL]			\n"
335 	    ".arch_extension nosve				\n"
336 	    : : "r"(z_start), "r"(p_start));
337 
338 	/* Save the FFR if needed */
339 	/* TODO: Skip if in SME streaming mode (when supported) */
340 	__asm __volatile(
341 	    ".arch_extension sve				\n"
342 	    "rdffr	p0.b					\n"
343 	    "str	p0, [%0, #16, MUL VL]			\n"
344 	/*
345 	 * Load the old p0 value to ensure it is consistent if we enable
346 	 * without calling sve_restore, e.g. switch to a kernel thread and
347 	 * back.
348 	 */
349 	    "ldr	p0, [%0, #0, MUL VL]			\n"
350 	    ".arch_extension nosve				\n"
351 	    : : "r"(p_start));
352 
353 	__asm __volatile(
354 	    ".arch_extension fp					\n"
355 	    "mrs	%0, fpsr				\n"
356 	    "mrs	%1, fpcr				\n"
357 	    "stp	%w0, %w1, [%2]				\n"
358 	    ".arch_extension nofp				\n"
359 	    : "=&r"(fpsr), "=&r"(fpcr) : "r"(f_start));
360 }
361 
362 static void
sve_restore(void * state,u_int sve_len)363 sve_restore(void *state, u_int sve_len)
364 {
365 	vm_offset_t f_start, p_start, z_start;
366 	uint64_t fpcr, fpsr;
367 
368 	/* See sve_store for the layout of the state buffer */
369 	z_start = (vm_offset_t)state;
370 	p_start = z_start + sve_len * 32;
371 	f_start = p_start + (sve_len / 8) * 17;
372 
373 	__asm __volatile(
374 	    ".arch_extension sve				\n"
375 	    "ldr	p0, [%0, #16, MUL VL]			\n"
376 	    "wrffr	p0.b					\n"
377 	    ".arch_extension nosve				\n"
378 	    : : "r"(p_start));
379 
380 	__asm __volatile(
381 	    ".arch_extension sve				\n"
382 	    "ldr	z0, [%0, #0, MUL VL]			\n"
383 	    "ldr	z1, [%0, #1, MUL VL]			\n"
384 	    "ldr	z2, [%0, #2, MUL VL]			\n"
385 	    "ldr	z3, [%0, #3, MUL VL]			\n"
386 	    "ldr	z4, [%0, #4, MUL VL]			\n"
387 	    "ldr	z5, [%0, #5, MUL VL]			\n"
388 	    "ldr	z6, [%0, #6, MUL VL]			\n"
389 	    "ldr	z7, [%0, #7, MUL VL]			\n"
390 	    "ldr	z8, [%0, #8, MUL VL]			\n"
391 	    "ldr	z9, [%0, #9, MUL VL]			\n"
392 	    "ldr	z10, [%0, #10, MUL VL]			\n"
393 	    "ldr	z11, [%0, #11, MUL VL]			\n"
394 	    "ldr	z12, [%0, #12, MUL VL]			\n"
395 	    "ldr	z13, [%0, #13, MUL VL]			\n"
396 	    "ldr	z14, [%0, #14, MUL VL]			\n"
397 	    "ldr	z15, [%0, #15, MUL VL]			\n"
398 	    "ldr	z16, [%0, #16, MUL VL]			\n"
399 	    "ldr	z17, [%0, #17, MUL VL]			\n"
400 	    "ldr	z18, [%0, #18, MUL VL]			\n"
401 	    "ldr	z19, [%0, #19, MUL VL]			\n"
402 	    "ldr	z20, [%0, #20, MUL VL]			\n"
403 	    "ldr	z21, [%0, #21, MUL VL]			\n"
404 	    "ldr	z22, [%0, #22, MUL VL]			\n"
405 	    "ldr	z23, [%0, #23, MUL VL]			\n"
406 	    "ldr	z24, [%0, #24, MUL VL]			\n"
407 	    "ldr	z25, [%0, #25, MUL VL]			\n"
408 	    "ldr	z26, [%0, #26, MUL VL]			\n"
409 	    "ldr	z27, [%0, #27, MUL VL]			\n"
410 	    "ldr	z28, [%0, #28, MUL VL]			\n"
411 	    "ldr	z29, [%0, #29, MUL VL]			\n"
412 	    "ldr	z30, [%0, #30, MUL VL]			\n"
413 	    "ldr	z31, [%0, #31, MUL VL]			\n"
414 	    /* Store the predicate registers */
415 	    "ldr	p0, [%1, #0, MUL VL]			\n"
416 	    "ldr	p1, [%1, #1, MUL VL]			\n"
417 	    "ldr	p2, [%1, #2, MUL VL]			\n"
418 	    "ldr	p3, [%1, #3, MUL VL]			\n"
419 	    "ldr	p4, [%1, #4, MUL VL]			\n"
420 	    "ldr	p5, [%1, #5, MUL VL]			\n"
421 	    "ldr	p6, [%1, #6, MUL VL]			\n"
422 	    "ldr	p7, [%1, #7, MUL VL]			\n"
423 	    "ldr	p8, [%1, #8, MUL VL]			\n"
424 	    "ldr	p9, [%1, #9, MUL VL]			\n"
425 	    "ldr	p10, [%1, #10, MUL VL]			\n"
426 	    "ldr	p11, [%1, #11, MUL VL]			\n"
427 	    "ldr	p12, [%1, #12, MUL VL]			\n"
428 	    "ldr	p13, [%1, #13, MUL VL]			\n"
429 	    "ldr	p14, [%1, #14, MUL VL]			\n"
430 	    "ldr	p15, [%1, #15, MUL VL]			\n"
431 	    ".arch_extension nosve				\n"
432 	    : : "r"(z_start), "r"(p_start));
433 
434 	__asm __volatile(
435 	    ".arch_extension fp					\n"
436 	    "ldp	%w0, %w1, [%2]				\n"
437 	    "msr	fpsr, %0				\n"
438 	    "msr	fpcr, %1				\n"
439 	    ".arch_extension nofp				\n"
440 	    : "=&r"(fpsr), "=&r"(fpcr) : "r"(f_start));
441 }
442 
443 /*
444  * Sync the VFP registers to the SVE register state, e.g. in signal return
445  * when userspace may have changed the vfp register values and expect them
446  * to be used when the signal handler returns.
447  */
448 void
vfp_to_sve_sync(struct thread * td)449 vfp_to_sve_sync(struct thread *td)
450 {
451 	struct pcb *pcb;
452 	uint32_t *fpxr;
453 
454 	pcb = td->td_pcb;
455 	if (pcb->pcb_svesaved == NULL)
456 		return;
457 
458 	MPASS(pcb->pcb_fpusaved != NULL);
459 
460 	/* Copy the VFP registers to the SVE region */
461 	for (int i = 0; i < nitems(pcb->pcb_fpusaved->vfp_regs); i++) {
462 		__uint128_t *sve_reg;
463 
464 		sve_reg = (__uint128_t *)((uintptr_t)pcb->pcb_svesaved +
465 		    i * pcb->pcb_sve_len);
466 		*sve_reg = pcb->pcb_fpusaved->vfp_regs[i];
467 	}
468 
469 	fpxr = (uint32_t *)((uintptr_t)pcb->pcb_svesaved +
470 	    (32 * pcb->pcb_sve_len) + (17 * pcb->pcb_sve_len / 8));
471 	fpxr[0] = pcb->pcb_fpusaved->vfp_fpsr;
472 	fpxr[1] = pcb->pcb_fpusaved->vfp_fpcr;
473 }
474 
475 /*
476  * Sync the SVE registers to the VFP register state.
477  */
478 void
sve_to_vfp_sync(struct thread * td)479 sve_to_vfp_sync(struct thread *td)
480 {
481 	struct pcb *pcb;
482 	uint32_t *fpxr;
483 
484 	pcb = td->td_pcb;
485 	if (pcb->pcb_svesaved == NULL)
486 		return;
487 
488 	MPASS(pcb->pcb_fpusaved == &pcb->pcb_fpustate);
489 
490 	/* Copy the SVE registers to the VFP saved state */
491 	for (int i = 0; i < nitems(pcb->pcb_fpusaved->vfp_regs); i++) {
492 		__uint128_t *sve_reg;
493 
494 		sve_reg = (__uint128_t *)((uintptr_t)pcb->pcb_svesaved +
495 		    i * pcb->pcb_sve_len);
496 		pcb->pcb_fpusaved->vfp_regs[i] = *sve_reg;
497 	}
498 
499 	fpxr = (uint32_t *)((uintptr_t)pcb->pcb_svesaved +
500 	    (32 * pcb->pcb_sve_len) + (17 * pcb->pcb_sve_len / 8));
501 	pcb->pcb_fpusaved->vfp_fpsr = fpxr[0];
502 	pcb->pcb_fpusaved->vfp_fpcr = fpxr[1];
503 }
504 
505 static void
vfp_save_state_common(struct thread * td,struct pcb * pcb,bool full_save)506 vfp_save_state_common(struct thread *td, struct pcb *pcb, bool full_save)
507 {
508 	uint32_t cpacr;
509 	bool save_sve;
510 
511 	save_sve = false;
512 
513 	critical_enter();
514 	/*
515 	 * Only store the registers if the VFP is enabled,
516 	 * i.e. return if we are trapping on FP access.
517 	 */
518 	cpacr = READ_SPECIALREG(cpacr_el1);
519 	if ((cpacr & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_NONE)
520 		goto done;
521 
522 	KASSERT(PCPU_GET(fpcurthread) == td,
523 	    ("Storing an invalid VFP state"));
524 
525 	/*
526 	 * Also save the SVE state. As SVE depends on the VFP being
527 	 * enabled we can rely on only needing to check this when
528 	 * the VFP unit has been enabled.
529 	 */
530 	if ((cpacr & CPACR_ZEN_MASK) == CPACR_ZEN_TRAP_NONE) {
531 		/* If SVE is enabled it should be valid */
532 		MPASS((pcb->pcb_fpflags & PCB_FP_SVEVALID) != 0);
533 
534 		/*
535 		 * If we are switching while in a system call skip saving
536 		 * SVE registers. The ABI allows us to drop them over any
537 		 * system calls, however doing so is expensive in SVE
538 		 * heavy userspace code. This would require us to disable
539 		 * SVE for all system calls and trap the next use of them.
540 		 * As an optimisation only disable SVE on context switch.
541 		 */
542 		if (td->td_frame == NULL ||
543 		    (ESR_ELx_EXCEPTION(td->td_frame->tf_esr) != EXCP_SVC64 &&
544 		    td->td_sa.code != (u_int)-1))
545 			save_sve = true;
546 	}
547 
548 	if (save_sve) {
549 		KASSERT(pcb->pcb_svesaved != NULL,
550 		    ("Storing to a NULL SVE state"));
551 		sve_store(pcb->pcb_svesaved, pcb->pcb_sve_len);
552 		if (full_save)
553 			sve_to_vfp_sync(td);
554 	} else {
555 		pcb->pcb_fpflags &= ~PCB_FP_SVEVALID;
556 		vfp_store(pcb->pcb_fpusaved);
557 	}
558 	dsb(ish);
559 	vfp_disable();
560 
561 done:
562 	critical_exit();
563 }
564 
565 void
vfp_save_state(struct thread * td,struct pcb * pcb)566 vfp_save_state(struct thread *td, struct pcb *pcb)
567 {
568 	KASSERT(td != NULL, ("NULL vfp thread"));
569 	KASSERT(pcb != NULL, ("NULL vfp pcb"));
570 	KASSERT(td->td_pcb == pcb, ("Invalid vfp pcb"));
571 
572 	vfp_save_state_common(td, pcb, true);
573 }
574 
575 void
vfp_save_state_savectx(struct pcb * pcb)576 vfp_save_state_savectx(struct pcb *pcb)
577 {
578 	/*
579 	 * savectx() will be called on panic with dumppcb as an argument,
580 	 * dumppcb either has no pcb_fpusaved set or it was previously set
581 	 * to its own fpu state.
582 	 *
583 	 * In both cases we can set it here to the pcb fpu state.
584 	 */
585 	MPASS(pcb->pcb_fpusaved == NULL ||
586 	    pcb->pcb_fpusaved == &pcb->pcb_fpustate);
587 	pcb->pcb_fpusaved = &pcb->pcb_fpustate;
588 
589 	vfp_save_state_common(curthread, pcb, true);
590 }
591 
592 void
vfp_save_state_switch(struct thread * td)593 vfp_save_state_switch(struct thread *td)
594 {
595 	KASSERT(td != NULL, ("NULL vfp thread"));
596 
597 	vfp_save_state_common(td, td->td_pcb, false);
598 }
599 
600 /*
601  * Update the VFP state for a forked process or new thread. The PCB will
602  * have been copied from the old thread.
603  */
604 void
vfp_new_thread(struct thread * newtd,struct thread * oldtd,bool fork)605 vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
606 {
607 	struct pcb *newpcb, *oldpcb;
608 
609 	newpcb = newtd->td_pcb;
610 	oldpcb = oldtd->td_pcb;
611 
612 	/* Kernel threads start with clean VFP */
613 	if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
614 		newpcb->pcb_fpflags &=
615 		    ~(PCB_FP_STARTED | PCB_FP_SVEVALID | PCB_FP_KERN |
616 		      PCB_FP_NOSAVE);
617 	} else {
618 		MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
619 
620 		/*
621 		 * The only SVE register state to be guaranteed to be saved
622 		 * a system call is the lower bits of the Z registers as
623 		 * these are aliased with the existing FP registers. Because
624 		 * we can only create a new thread or fork through a system
625 		 * call it is safe to drop the SVE state in the new thread.
626 		 */
627 		newpcb->pcb_fpflags &= ~PCB_FP_SVEVALID;
628 		if (!fork) {
629 			newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
630 		}
631 	}
632 
633 	newpcb->pcb_svesaved = NULL;
634 	if (oldpcb->pcb_svesaved == NULL)
635 		newpcb->pcb_sve_len = sve_max_vector_len;
636 	else
637 		KASSERT(newpcb->pcb_sve_len == oldpcb->pcb_sve_len,
638 		    ("%s: pcb sve vector length differs: %x != %x", __func__,
639 		    newpcb->pcb_sve_len, oldpcb->pcb_sve_len));
640 
641 	newpcb->pcb_fpusaved = &newpcb->pcb_fpustate;
642 	newpcb->pcb_vfpcpu = UINT_MAX;
643 }
644 
645 /*
646  * Reset the FP state to avoid leaking state from the parent process across
647  * execve() (and to ensure that we get a consistent floating point environment
648  * in every new process).
649  */
650 void
vfp_reset_state(struct thread * td,struct pcb * pcb)651 vfp_reset_state(struct thread *td, struct pcb *pcb)
652 {
653 	/* Discard the threads VFP state before resetting it */
654 	critical_enter();
655 	vfp_discard(td);
656 	critical_exit();
657 
658 	/*
659 	 * Clear the thread state. The VFP is disabled and is not the current
660 	 * VFP thread so we won't change any of these on context switch.
661 	 */
662 	bzero(&pcb->pcb_fpustate.vfp_regs, sizeof(pcb->pcb_fpustate.vfp_regs));
663 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
664 	    ("pcb_fpusaved should point to pcb_fpustate."));
665 	pcb->pcb_fpustate.vfp_fpcr = VFPCR_INIT;
666 	pcb->pcb_fpustate.vfp_fpsr = 0;
667 	/* XXX: Memory leak when using SVE between fork & exec? */
668 	pcb->pcb_svesaved = NULL;
669 	pcb->pcb_vfpcpu = UINT_MAX;
670 	pcb->pcb_fpflags = 0;
671 }
672 
673 static void
vfp_restore_state_common(struct thread * td,int flags)674 vfp_restore_state_common(struct thread *td, int flags)
675 {
676 	struct pcb *curpcb;
677 	u_int cpu;
678 	bool restore_sve;
679 
680 	KASSERT(td == curthread, ("%s: Called with non-current thread",
681 	    __func__));
682 
683 	critical_enter();
684 
685 	cpu = PCPU_GET(cpuid);
686 	curpcb = td->td_pcb;
687 
688 	/*
689 	 * If SVE has been used and the base VFP state is in use then
690 	 * restore the SVE registers. A non-base VFP state should only
691 	 * be used by the kernel and SVE should onlu be used by userspace.
692 	 */
693 	restore_sve = false;
694 	if ((curpcb->pcb_fpflags & PCB_FP_SVEVALID) != 0 &&
695 	    curpcb->pcb_fpusaved == &curpcb->pcb_fpustate) {
696 		MPASS(curpcb->pcb_svesaved != NULL);
697 		/* SVE shouldn't be enabled in the kernel */
698 		MPASS((flags & PCB_FP_KERN) == 0);
699 		restore_sve = true;
700 	}
701 
702 	if (restore_sve) {
703 		MPASS((curpcb->pcb_fpflags & PCB_FP_SVEVALID) != 0);
704 		sve_enable();
705 	} else {
706 		curpcb->pcb_fpflags |= PCB_FP_STARTED;
707 		vfp_enable();
708 	}
709 
710 	/*
711 	 * If the previous thread on this cpu to use the VFP was not the
712 	 * current thread, or the current thread last used it on a different
713 	 * cpu we need to restore the old state.
714 	 */
715 	if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) {
716 		/*
717 		 * The VFP registers are the lower 128 bits of the SVE
718 		 * registers. Use the SVE store state if it was previously
719 		 * enabled.
720 		 */
721 		if (restore_sve) {
722 			MPASS(td->td_pcb->pcb_svesaved != NULL);
723 			sve_restore(td->td_pcb->pcb_svesaved,
724 			    td->td_pcb->pcb_sve_len);
725 		} else {
726 			vfp_restore(td->td_pcb->pcb_fpusaved);
727 		}
728 		PCPU_SET(fpcurthread, td);
729 		curpcb->pcb_vfpcpu = cpu;
730 	}
731 
732 	critical_exit();
733 }
734 
735 void
vfp_restore_state(void)736 vfp_restore_state(void)
737 {
738 	struct thread *td;
739 
740 	td = curthread;
741 	vfp_restore_state_common(td, td->td_pcb->pcb_fpflags);
742 }
743 
744 bool
sve_restore_state(struct thread * td)745 sve_restore_state(struct thread *td)
746 {
747 	struct pcb *curpcb;
748 	void *svesaved;
749 	uint64_t cpacr;
750 
751 	KASSERT(td == curthread, ("%s: Called with non-current thread",
752 	    __func__));
753 
754 	curpcb = td->td_pcb;
755 
756 	/* The SVE state should alias the base VFP state */
757 	MPASS(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate);
758 
759 	/* SVE not enabled, tell the caller to raise a fault */
760 	if (curpcb->pcb_sve_len == 0) {
761 		/*
762 		 * The init pcb is created before we read the vector length.
763 		 * Set it to the default length.
764 		 */
765 		if (sve_max_vector_len == 0)
766 			return (false);
767 
768 		MPASS(curpcb->pcb_svesaved == NULL);
769 		curpcb->pcb_sve_len = sve_max_vector_len;
770 	}
771 
772 	if (curpcb->pcb_svesaved == NULL) {
773 		/* SVE should be disabled so will be invalid */
774 		MPASS((curpcb->pcb_fpflags & PCB_FP_SVEVALID) == 0);
775 
776 		/*
777 		 * Allocate the SVE buffer of this thread.
778 		 * Enable interrupts so the allocation can sleep
779 		 */
780 		svesaved = sve_alloc();
781 
782 		critical_enter();
783 
784 		/* Restore the VFP state if needed */
785 		cpacr = READ_SPECIALREG(cpacr_el1);
786 		if ((cpacr & CPACR_FPEN_MASK) != CPACR_FPEN_TRAP_NONE) {
787 			vfp_restore_state_common(td, curpcb->pcb_fpflags);
788 		}
789 
790 		/*
791 		 * Set the flags after enabling the VFP as the SVE saved
792 		 * state will be invalid.
793 		 */
794 		curpcb->pcb_svesaved = svesaved;
795 		curpcb->pcb_fpflags |= PCB_FP_SVEVALID;
796 		sve_enable();
797 
798 		critical_exit();
799 	} else {
800 		vfp_restore_state_common(td, curpcb->pcb_fpflags);
801 
802 		/* Enable SVE if it wasn't previously enabled */
803 		if ((curpcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) {
804 			critical_enter();
805 			sve_enable();
806 			curpcb->pcb_fpflags |= PCB_FP_SVEVALID;
807 			critical_exit();
808 		}
809 	}
810 
811 	return (true);
812 }
813 
814 void
vfp_init_secondary(void)815 vfp_init_secondary(void)
816 {
817 	uint64_t pfr;
818 
819 	/* Check if there is a vfp unit present */
820 	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
821 	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
822 		return;
823 
824 	/* Disable to be enabled when it's used */
825 	vfp_disable();
826 }
827 
828 static void
vfp_init(const void * dummy __unused)829 vfp_init(const void *dummy __unused)
830 {
831 	uint64_t pfr;
832 
833 	/* Check if there is a vfp unit present */
834 	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
835 	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
836 		return;
837 
838 	fpu_save_area_zone = uma_zcreate("VFP_save_area",
839 	    sizeof(struct vfpstate), NULL, NULL, NULL, NULL,
840 	    _Alignof(struct vfpstate) - 1, 0);
841 	fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO);
842 
843 	/* Ensure the VFP is enabled before accessing it in vfp_store */
844 	vfp_enable();
845 	vfp_store(fpu_initialstate);
846 
847 	/* Disable to be enabled when it's used */
848 	vfp_disable();
849 
850 	/* Zero the VFP registers but keep fpcr and fpsr */
851 	bzero(fpu_initialstate->vfp_regs, sizeof(fpu_initialstate->vfp_regs));
852 
853 	thread0.td_pcb->pcb_fpusaved->vfp_fpcr = VFPCR_INIT;
854 }
855 
856 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
857 
858 static void
sve_thread_dtor(void * arg __unused,struct thread * td)859 sve_thread_dtor(void *arg __unused, struct thread *td)
860 {
861 	sve_free(td->td_pcb->pcb_svesaved);
862 }
863 
864 static void
sve_pcpu_read(void * arg)865 sve_pcpu_read(void *arg)
866 {
867 	u_int *len;
868 	uint64_t vl;
869 
870 	len = arg;
871 
872 	/* Enable SVE to read zcr_el1 and VFP for rdvl */
873 	sve_enable();
874 
875 	/* Set the longest vector length */
876 	WRITE_SPECIALREG(ZCR_EL1_REG, ZCR_LEN_MASK);
877 	isb();
878 
879 	/* Read the real vector length */
880 	__asm __volatile(
881 	    ".arch_extension sve	\n"
882 	    "rdvl	%0, #1		\n"
883 	    ".arch_extension nosve	\n"
884 	    : "=&r"(vl));
885 
886 	vfp_disable();
887 
888 	len[PCPU_GET(cpuid)] = vl;
889 }
890 
891 static void
sve_init(const void * dummy __unused)892 sve_init(const void *dummy __unused)
893 {
894 	u_int *len_list;
895 	uint64_t reg;
896 	int i;
897 
898 	if (!get_kernel_reg(ID_AA64PFR0_EL1, &reg))
899 		return;
900 
901 	if (ID_AA64PFR0_SVE_VAL(reg) == ID_AA64PFR0_SVE_NONE)
902 		return;
903 
904 	len_list = malloc(sizeof(*len_list) * (mp_maxid + 1), M_TEMP,
905 	    M_WAITOK | M_ZERO);
906 	smp_rendezvous(NULL, sve_pcpu_read, NULL, len_list);
907 
908 	sve_max_vector_len = ZCR_LEN_BYTES(ZCR_LEN_MASK);
909 	CPU_FOREACH(i) {
910 		if (bootverbose)
911 			printf("CPU%d SVE vector length: %u\n", i, len_list[i]);
912 		sve_max_vector_len = MIN(sve_max_vector_len, len_list[i]);
913 	}
914 	free(len_list, M_TEMP);
915 
916 	if (bootverbose)
917 		printf("SVE with %u byte vectors\n", sve_max_vector_len);
918 
919 	if (sve_max_vector_len > 0) {
920 		EVENTHANDLER_REGISTER(thread_dtor, sve_thread_dtor, NULL,
921 		    EVENTHANDLER_PRI_ANY);
922 	}
923 }
924 SYSINIT(sve, SI_SUB_SMP, SI_ORDER_ANY, sve_init, NULL);
925 
926 static bool
get_arm64_sve(struct regset * rs,struct thread * td,void * buf,size_t * sizep)927 get_arm64_sve(struct regset *rs, struct thread *td, void *buf,
928     size_t *sizep)
929 {
930 	struct svereg_header *header;
931 	struct pcb *pcb;
932 	size_t buf_size;
933 	uint16_t sve_flags;
934 
935 	pcb = td->td_pcb;
936 
937 	/* If there is no SVE support in HW then we don't support NT_ARM_SVE */
938 	if (pcb->pcb_sve_len == 0)
939 		return (false);
940 
941 	sve_flags = 0;
942 	if ((pcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) {
943 		/* If SVE hasn't been used yet provide the VFP registers */
944 		buf_size = sizeof(struct fpreg);
945 		sve_flags |= SVEREG_FLAG_FP;
946 	} else {
947 		/* We have SVE registers */
948 		buf_size = sve_buf_size(td);
949 		sve_flags |= SVEREG_FLAG_SVE;
950 		KASSERT(pcb->pcb_svesaved != NULL, ("%s: no saved sve",
951 		    __func__));
952 	}
953 
954 	if (buf != NULL) {
955 		KASSERT(*sizep == sizeof(struct svereg_header) + buf_size,
956 		    ("%s: invalid size", __func__));
957 
958 		if (td == curthread && (pcb->pcb_fpflags & PCB_FP_STARTED) != 0)
959 			vfp_save_state(td, pcb);
960 
961 		header = buf;
962 		memset(header, 0, sizeof(*header));
963 
964 		header->sve_size = sizeof(struct svereg_header) + buf_size;
965 		header->sve_maxsize = sizeof(struct svereg_header) +
966 		    sve_max_buf_size();
967 		header->sve_vec_len = pcb->pcb_sve_len;
968 		header->sve_max_vec_len = sve_max_vector_len;
969 		header->sve_flags = sve_flags;
970 
971 		if ((sve_flags & SVEREG_FLAG_REGS_MASK) == SVEREG_FLAG_FP) {
972 			struct fpreg *fpregs;
973 
974 			fpregs = (void *)(&header[1]);
975 			memcpy(fpregs->fp_q, pcb->pcb_fpustate.vfp_regs,
976 			    sizeof(fpregs->fp_q));
977 			fpregs->fp_cr = pcb->pcb_fpustate.vfp_fpcr;
978 			fpregs->fp_sr = pcb->pcb_fpustate.vfp_fpsr;
979 		} else {
980 			memcpy((void *)(&header[1]), pcb->pcb_svesaved,
981 			    buf_size);
982 		}
983 	}
984 	*sizep = sizeof(struct svereg_header) + buf_size;
985 
986 	return (true);
987 }
988 
989 static bool
set_arm64_sve(struct regset * rs,struct thread * td,void * buf,size_t size)990 set_arm64_sve(struct regset *rs, struct thread *td, void *buf, size_t size)
991 {
992 	struct svereg_header *header;
993 	struct pcb *pcb;
994 	size_t buf_size;
995 	uint16_t sve_flags;
996 
997 	pcb = td->td_pcb;
998 
999 	/* If there is no SVE support in HW then we don't support NT_ARM_SVE */
1000 	if (pcb->pcb_sve_len == 0)
1001 		return (false);
1002 
1003 	sve_flags = 0;
1004 	if ((pcb->pcb_fpflags & PCB_FP_SVEVALID) == 0) {
1005 		/*
1006 		 * If the SVE state is invalid it provide the FP registers.
1007 		 * This may be beause it hasn't been used, or it has but
1008 		 * was switched out in a system call.
1009 		 */
1010 		buf_size = sizeof(struct fpreg);
1011 		sve_flags |= SVEREG_FLAG_FP;
1012 	} else {
1013 		/* We have SVE registers */
1014 		MPASS(pcb->pcb_svesaved != NULL);
1015 		buf_size = sve_buf_size(td);
1016 		sve_flags |= SVEREG_FLAG_SVE;
1017 		KASSERT(pcb->pcb_svesaved != NULL, ("%s: no saved sve",
1018 		    __func__));
1019 	}
1020 
1021 	if (size != sizeof(struct svereg_header) + buf_size)
1022 		return (false);
1023 
1024 	header = buf;
1025 	/* Sanity checks on the header */
1026 	if (header->sve_size != sizeof(struct svereg_header) + buf_size)
1027 		return (false);
1028 
1029 	if (header->sve_maxsize != sizeof(struct svereg_header) +
1030 	    sve_max_buf_size())
1031 		return (false);
1032 
1033 	if (header->sve_vec_len != pcb->pcb_sve_len)
1034 		return (false);
1035 
1036 	if (header->sve_max_vec_len != sve_max_vector_len)
1037 		return (false);
1038 
1039 	if (header->sve_flags != sve_flags)
1040 		return (false);
1041 
1042 	if ((sve_flags & SVEREG_FLAG_REGS_MASK) == SVEREG_FLAG_FP) {
1043 		struct fpreg *fpregs;
1044 
1045 		fpregs = (void *)(&header[1]);
1046 		memcpy(pcb->pcb_fpustate.vfp_regs, fpregs->fp_q,
1047 		    sizeof(fpregs->fp_q));
1048 		pcb->pcb_fpustate.vfp_fpcr = fpregs->fp_cr;
1049 		pcb->pcb_fpustate.vfp_fpsr = fpregs->fp_sr;
1050 	} else {
1051 		/* Restore the SVE registers */
1052 		memcpy(pcb->pcb_svesaved, (void *)(&header[1]), buf_size);
1053 	}
1054 
1055 	return (true);
1056 }
1057 
1058 static struct regset regset_arm64_sve = {
1059 	.note = NT_ARM_SVE,
1060 	.get = get_arm64_sve,
1061 	.set = set_arm64_sve,
1062 };
1063 ELF_REGSET(regset_arm64_sve);
1064 
1065 struct fpu_kern_ctx *
fpu_kern_alloc_ctx(u_int flags)1066 fpu_kern_alloc_ctx(u_int flags)
1067 {
1068 	struct fpu_kern_ctx *res;
1069 	size_t sz;
1070 
1071 	sz = sizeof(struct fpu_kern_ctx);
1072 	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
1073 	    M_NOWAIT : M_WAITOK) | M_ZERO);
1074 	return (res);
1075 }
1076 
1077 void
fpu_kern_free_ctx(struct fpu_kern_ctx * ctx)1078 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
1079 {
1080 
1081 	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
1082 	/* XXXAndrew clear the memory ? */
1083 	free(ctx, M_FPUKERN_CTX);
1084 }
1085 
1086 void
fpu_kern_enter(struct thread * td,struct fpu_kern_ctx * ctx,u_int flags)1087 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
1088 {
1089 	struct pcb *pcb;
1090 
1091 	pcb = td->td_pcb;
1092 	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
1093 	    ("ctx is required when !FPU_KERN_NOCTX"));
1094 	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
1095 	    ("using inuse ctx"));
1096 	KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
1097 	    ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
1098 
1099 	if ((flags & FPU_KERN_NOCTX) != 0) {
1100 		critical_enter();
1101 		if (curthread == PCPU_GET(fpcurthread)) {
1102 			vfp_save_state(curthread, pcb);
1103 		}
1104 		PCPU_SET(fpcurthread, NULL);
1105 
1106 		vfp_enable();
1107 		pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
1108 		    PCB_FP_STARTED;
1109 		return;
1110 	}
1111 
1112 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
1113 		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
1114 		return;
1115 	}
1116 	/*
1117 	 * Check either we are already using the VFP in the kernel, or
1118 	 * the saved state points to the default user space.
1119 	 */
1120 	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
1121 	    pcb->pcb_fpusaved == &pcb->pcb_fpustate,
1122 	    ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate));
1123 	ctx->flags = FPU_KERN_CTX_INUSE;
1124 	vfp_save_state(curthread, pcb);
1125 	ctx->prev = pcb->pcb_fpusaved;
1126 	pcb->pcb_fpusaved = &ctx->state;
1127 	pcb->pcb_fpflags |= PCB_FP_KERN;
1128 	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
1129 
1130 	return;
1131 }
1132 
1133 int
fpu_kern_leave(struct thread * td,struct fpu_kern_ctx * ctx)1134 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
1135 {
1136 	struct pcb *pcb;
1137 
1138 	pcb = td->td_pcb;
1139 
1140 	if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
1141 		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
1142 		KASSERT(PCPU_GET(fpcurthread) == NULL,
1143 		    ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
1144 		CRITICAL_ASSERT(td);
1145 
1146 		vfp_disable();
1147 		pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
1148 		critical_exit();
1149 	} else {
1150 		KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
1151 		    ("FPU context not inuse"));
1152 		ctx->flags &= ~FPU_KERN_CTX_INUSE;
1153 
1154 		if (is_fpu_kern_thread(0) &&
1155 		    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
1156 			return (0);
1157 		KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
1158 		critical_enter();
1159 		vfp_discard(td);
1160 		critical_exit();
1161 		pcb->pcb_fpflags &= ~PCB_FP_STARTED;
1162 		pcb->pcb_fpusaved = ctx->prev;
1163 	}
1164 
1165 	if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) {
1166 		pcb->pcb_fpflags &= ~PCB_FP_KERN;
1167 	} else {
1168 		KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
1169 		    ("unpaired fpu_kern_leave"));
1170 	}
1171 
1172 	return (0);
1173 }
1174 
1175 int
fpu_kern_thread(u_int flags __unused)1176 fpu_kern_thread(u_int flags __unused)
1177 {
1178 	struct pcb *pcb = curthread->td_pcb;
1179 
1180 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
1181 	    ("Only kthread may use fpu_kern_thread"));
1182 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
1183 	    ("Mangled pcb_fpusaved"));
1184 	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
1185 	    ("Thread already setup for the VFP"));
1186 	pcb->pcb_fpflags |= PCB_FP_KERN;
1187 	return (0);
1188 }
1189 
1190 int
is_fpu_kern_thread(u_int flags __unused)1191 is_fpu_kern_thread(u_int flags __unused)
1192 {
1193 	struct pcb *curpcb;
1194 
1195 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
1196 		return (0);
1197 	curpcb = curthread->td_pcb;
1198 	return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
1199 }
1200 
1201 /*
1202  * FPU save area alloc/free/init utility routines
1203  */
1204 struct vfpstate *
fpu_save_area_alloc(void)1205 fpu_save_area_alloc(void)
1206 {
1207 	return (uma_zalloc(fpu_save_area_zone, M_WAITOK));
1208 }
1209 
1210 void
fpu_save_area_free(struct vfpstate * fsa)1211 fpu_save_area_free(struct vfpstate *fsa)
1212 {
1213 	uma_zfree(fpu_save_area_zone, fsa);
1214 }
1215 
1216 void
fpu_save_area_reset(struct vfpstate * fsa)1217 fpu_save_area_reset(struct vfpstate *fsa)
1218 {
1219 	memcpy(fsa, fpu_initialstate, sizeof(*fsa));
1220 }
1221 #endif
1222