1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
5 * Copyright (c) 2012 Mark Tinguely
6 *
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/malloc.h>
36 #include <sys/proc.h>
37
38 #include <machine/armreg.h>
39 #include <machine/elf.h>
40 #include <machine/frame.h>
41 #include <machine/md_var.h>
42 #include <machine/pcb.h>
43 #include <machine/undefined.h>
44 #include <machine/vfp.h>
45
46 /* function prototypes */
47 static int vfp_bounce(u_int, u_int, struct trapframe *, int);
48 static void vfp_restore(struct vfp_state *);
49
50 extern int vfp_exists;
51 static struct undefined_handler vfp10_uh, vfp11_uh;
52 /* If true the VFP unit has 32 double registers, otherwise it has 16 */
53 static int is_d32;
54
55 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
56 "Kernel contexts for VFP state");
57
58 struct fpu_kern_ctx {
59 struct vfp_state *prev;
60 #define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */
61 #define FPU_KERN_CTX_INUSE 0x02
62 uint32_t flags;
63 struct vfp_state state;
64 };
65
66 /*
67 * About .fpu directives in this file...
68 *
69 * We should need simply .fpu vfpv3, but clang 3.5 has a quirk where setting
70 * vfpv3 doesn't imply that vfp2 features are also available -- both have to be
71 * explicitly set to get all the features of both. This is probably a bug in
72 * clang, so it may get fixed and require changes here some day. Other changes
73 * are probably coming in clang too, because there is email and open PRs
74 * indicating they want to completely disable the ability to use .fpu and
75 * similar directives in inline asm. That would be catastrophic for us,
76 * hopefully they come to their senses. There was also some discusion of a new
77 * syntax such as .push fpu=vfpv3; ...; .pop fpu; and that would be ideal for
78 * us, better than what we have now really.
79 *
80 * For gcc, each .fpu directive completely overrides the prior directive, unlike
81 * with clang, but luckily on gcc saying v3 implies all the v2 features as well.
82 */
83
84 #define fmxr(reg, val) \
85 __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \
86 " vmsr " __STRING(reg) ", %0" :: "r"(val));
87
88 #define fmrx(reg) \
89 ({ u_int val = 0;\
90 __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \
91 " vmrs %0, " __STRING(reg) : "=r"(val)); \
92 val; \
93 })
94
95 static u_int
get_coprocessorACR(void)96 get_coprocessorACR(void)
97 {
98 u_int val;
99 __asm __volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (val) : : "cc");
100 return val;
101 }
102
103 static void
set_coprocessorACR(u_int val)104 set_coprocessorACR(u_int val)
105 {
106 __asm __volatile("mcr p15, 0, %0, c1, c0, 2\n\t"
107 : : "r" (val) : "cc");
108 isb();
109 }
110
111 static void
vfp_enable(void)112 vfp_enable(void)
113 {
114 uint32_t fpexc;
115
116 fpexc = fmrx(fpexc);
117 fmxr(fpexc, fpexc | VFPEXC_EN);
118 isb();
119 }
120
121 static void
vfp_disable(void)122 vfp_disable(void)
123 {
124 uint32_t fpexc;
125
126 fpexc = fmrx(fpexc);
127 fmxr(fpexc, fpexc & ~VFPEXC_EN);
128 isb();
129 }
130
131 /* called for each cpu */
132 void
vfp_init(void)133 vfp_init(void)
134 {
135 u_int fpsid, tmp;
136 u_int coproc, vfp_arch;
137
138 coproc = get_coprocessorACR();
139 coproc |= COPROC10 | COPROC11;
140 set_coprocessorACR(coproc);
141
142 fpsid = fmrx(fpsid); /* read the vfp system id */
143
144 if (!(fpsid & VFPSID_HARDSOFT_IMP)) {
145 vfp_exists = 1;
146 is_d32 = 0;
147 PCPU_SET(vfpsid, fpsid); /* save the fpsid */
148 elf_hwcap |= HWCAP_VFP;
149
150 vfp_arch =
151 (fpsid & VFPSID_SUBVERSION2_MASK) >> VFPSID_SUBVERSION_OFF;
152
153 if (vfp_arch >= VFP_ARCH3) {
154 tmp = fmrx(mvfr0);
155 PCPU_SET(vfpmvfr0, tmp);
156 elf_hwcap |= HWCAP_VFPv3;
157
158 if ((tmp & VMVFR0_RB_MASK) == 2) {
159 elf_hwcap |= HWCAP_VFPD32;
160 is_d32 = 1;
161 } else
162 elf_hwcap |= HWCAP_VFPv3D16;
163
164 tmp = fmrx(mvfr1);
165 PCPU_SET(vfpmvfr1, tmp);
166
167 if (PCPU_GET(cpuid) == 0) {
168 if ((tmp & VMVFR1_FZ_MASK) == 0x1) {
169 /* Denormals arithmetic support */
170 initial_fpscr &= ~VFPSCR_FZ;
171 thread0.td_pcb->pcb_vfpstate.fpscr =
172 initial_fpscr;
173 }
174 }
175
176 if ((tmp & VMVFR1_LS_MASK) >> VMVFR1_LS_OFF == 1 &&
177 (tmp & VMVFR1_I_MASK) >> VMVFR1_I_OFF == 1 &&
178 (tmp & VMVFR1_SP_MASK) >> VMVFR1_SP_OFF == 1)
179 elf_hwcap |= HWCAP_NEON;
180 if ((tmp & VMVFR1_FMAC_MASK) >> VMVFR1_FMAC_OFF == 1)
181 elf_hwcap |= HWCAP_VFPv4;
182 }
183
184 vfp_disable();
185
186 /* initialize the coprocess 10 and 11 calls
187 * These are called to restore the registers and enable
188 * the VFP hardware.
189 */
190 if (vfp10_uh.uh_handler == NULL) {
191 vfp10_uh.uh_handler = vfp_bounce;
192 vfp11_uh.uh_handler = vfp_bounce;
193 install_coproc_handler_static(10, &vfp10_uh);
194 install_coproc_handler_static(11, &vfp11_uh);
195 }
196 }
197 }
198
199 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
200
201 /*
202 * Start the VFP unit, restore the VFP registers from the PCB and retry
203 * the instruction.
204 */
205 static int
vfp_bounce(u_int addr,u_int insn,struct trapframe * frame,int code)206 vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code)
207 {
208 u_int cpu, fpexc;
209 struct pcb *curpcb;
210 ksiginfo_t ksi;
211
212 critical_enter();
213
214 /*
215 * If the VFP is already on and we got an undefined instruction, then
216 * something tried to executate a truly invalid instruction that maps to
217 * the VFP.
218 */
219 fpexc = fmrx(fpexc);
220 if (fpexc & VFPEXC_EN) {
221 /* Clear any exceptions */
222 fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V));
223
224 /* kill the process - we do not handle emulation */
225 critical_exit();
226
227 if (fpexc & VFPEXC_EX) {
228 /* We have an exception, signal a SIGFPE */
229 ksiginfo_init_trap(&ksi);
230 ksi.ksi_signo = SIGFPE;
231 if (fpexc & VFPEXC_UFC)
232 ksi.ksi_code = FPE_FLTUND;
233 else if (fpexc & VFPEXC_OFC)
234 ksi.ksi_code = FPE_FLTOVF;
235 else if (fpexc & VFPEXC_IOC)
236 ksi.ksi_code = FPE_FLTINV;
237 ksi.ksi_addr = (void *)addr;
238 trapsignal(curthread, &ksi);
239 return 0;
240 }
241
242 return 1;
243 }
244
245 curpcb = curthread->td_pcb;
246 if ((code & FAULT_USER) == 0 &&
247 (curpcb->pcb_fpflags & PCB_FP_KERN) == 0) {
248 critical_exit();
249 return (1);
250 }
251
252 /*
253 * If the last time this thread used the VFP it was on this core, and
254 * the last thread to use the VFP on this core was this thread, then the
255 * VFP state is valid, otherwise restore this thread's state to the VFP.
256 */
257 fmxr(fpexc, fpexc | VFPEXC_EN);
258 cpu = PCPU_GET(cpuid);
259 if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) {
260 vfp_restore(curpcb->pcb_vfpsaved);
261 curpcb->pcb_vfpcpu = cpu;
262 PCPU_SET(fpcurthread, curthread);
263 }
264
265 critical_exit();
266
267 KASSERT((code & FAULT_USER) == 0 ||
268 curpcb->pcb_vfpsaved == &curpcb->pcb_vfpstate,
269 ("Kernel VFP state in use when entering userspace"));
270
271 return (0);
272 }
273
274 /*
275 * Update the VFP state for a forked process or new thread. The PCB will
276 * have been copied from the old thread.
277 * The code is heavily based on arm64 logic.
278 */
279 void
vfp_new_thread(struct thread * newtd,struct thread * oldtd,bool fork)280 vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
281 {
282 struct pcb *newpcb;
283
284 newpcb = newtd->td_pcb;
285
286 /* Kernel threads start with clean VFP */
287 if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
288 newpcb->pcb_fpflags &=
289 ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE);
290 } else {
291 MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
292 if (!fork) {
293 newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
294 }
295 }
296
297 newpcb->pcb_vfpsaved = &newpcb->pcb_vfpstate;
298 newpcb->pcb_vfpcpu = UINT_MAX;
299 }
300 /*
301 * Restore the given state to the VFP hardware.
302 */
303 static void
vfp_restore(struct vfp_state * vfpsave)304 vfp_restore(struct vfp_state *vfpsave)
305 {
306 uint32_t fpexc;
307
308 /* On vfpv3 we may need to restore FPINST and FPINST2 */
309 fpexc = vfpsave->fpexec;
310 if (fpexc & VFPEXC_EX) {
311 fmxr(fpinst, vfpsave->fpinst);
312 if (fpexc & VFPEXC_FP2V)
313 fmxr(fpinst2, vfpsave->fpinst2);
314 }
315 fmxr(fpscr, vfpsave->fpscr);
316
317 __asm __volatile(
318 " .fpu vfpv2\n"
319 " .fpu vfpv3\n"
320 " vldmia %0!, {d0-d15}\n" /* d0-d15 */
321 " cmp %1, #0\n" /* -D16 or -D32? */
322 " vldmiane %0!, {d16-d31}\n" /* d16-d31 */
323 " addeq %0, %0, #128\n" /* skip missing regs */
324 : "+&r" (vfpsave) : "r" (is_d32) : "cc"
325 );
326
327 fmxr(fpexc, fpexc);
328 }
329
330 /*
331 * If the VFP is on, save its current state and turn it off if requested to do
332 * so. If the VFP is not on, does not change the values at *vfpsave. Caller is
333 * responsible for preventing a context switch while this is running.
334 */
335 void
vfp_store(struct vfp_state * vfpsave,boolean_t disable_vfp)336 vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp)
337 {
338 uint32_t fpexc;
339
340 fpexc = fmrx(fpexc); /* Is the vfp enabled? */
341 if (fpexc & VFPEXC_EN) {
342 vfpsave->fpexec = fpexc;
343 vfpsave->fpscr = fmrx(fpscr);
344
345 /* On vfpv3 we may need to save FPINST and FPINST2 */
346 if (fpexc & VFPEXC_EX) {
347 vfpsave->fpinst = fmrx(fpinst);
348 if (fpexc & VFPEXC_FP2V)
349 vfpsave->fpinst2 = fmrx(fpinst2);
350 fpexc &= ~VFPEXC_EX;
351 }
352
353 __asm __volatile(
354 " .fpu vfpv2\n"
355 " .fpu vfpv3\n"
356 " vstmia %0!, {d0-d15}\n" /* d0-d15 */
357 " cmp %1, #0\n" /* -D16 or -D32? */
358 " vstmiane %0!, {d16-d31}\n" /* d16-d31 */
359 " addeq %0, %0, #128\n" /* skip missing regs */
360 : "+&r" (vfpsave) : "r" (is_d32) : "cc"
361 );
362
363 if (disable_vfp)
364 fmxr(fpexc , fpexc & ~VFPEXC_EN);
365 }
366 }
367
368 /*
369 * The current thread is dying. If the state currently in the hardware belongs
370 * to the current thread, set fpcurthread to NULL to indicate that the VFP
371 * hardware state does not belong to any thread. If the VFP is on, turn it off.
372 */
373 void
vfp_discard(struct thread * td)374 vfp_discard(struct thread *td)
375 {
376 u_int tmp;
377
378 if (PCPU_GET(fpcurthread) == td)
379 PCPU_SET(fpcurthread, NULL);
380
381 tmp = fmrx(fpexc);
382 if (tmp & VFPEXC_EN)
383 fmxr(fpexc, tmp & ~VFPEXC_EN);
384 }
385
386 void
vfp_save_state(struct thread * td,struct pcb * pcb)387 vfp_save_state(struct thread *td, struct pcb *pcb)
388 {
389 int32_t fpexc;
390
391 KASSERT(pcb != NULL, ("NULL vfp pcb"));
392 KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb"));
393
394 /*
395 * savectx() will be called on panic with dumppcb as an argument,
396 * dumppcb doesn't have pcb_vfpsaved set, so set it to save
397 * the VFP registers.
398 */
399 if (pcb->pcb_vfpsaved == NULL)
400 pcb->pcb_vfpsaved = &pcb->pcb_vfpstate;
401
402 if (td == NULL)
403 td = curthread;
404
405 critical_enter();
406 /*
407 * Only store the registers if the VFP is enabled,
408 * i.e. return if we are trapping on FP access.
409 */
410 fpexc = fmrx(fpexc);
411 if (fpexc & VFPEXC_EN) {
412 KASSERT(PCPU_GET(fpcurthread) == td,
413 ("Storing an invalid VFP state"));
414
415 vfp_store(pcb->pcb_vfpsaved, true);
416 }
417 critical_exit();
418 }
419
420 struct fpu_kern_ctx *
fpu_kern_alloc_ctx(u_int flags)421 fpu_kern_alloc_ctx(u_int flags)
422 {
423 return (malloc(sizeof(struct fpu_kern_ctx), M_FPUKERN_CTX,
424 ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO));
425 }
426
427 void
fpu_kern_free_ctx(struct fpu_kern_ctx * ctx)428 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
429 {
430 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("freeing in-use ctx"));
431
432 free(ctx, M_FPUKERN_CTX);
433 }
434
435 void
fpu_kern_enter(struct thread * td,struct fpu_kern_ctx * ctx,u_int flags)436 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
437 {
438 struct pcb *pcb;
439
440 pcb = td->td_pcb;
441 KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
442 ("ctx is required when !FPU_KERN_NOCTX"));
443 KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
444 ("using inuse ctx"));
445 KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
446 ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
447
448 if ((flags & FPU_KERN_NOCTX) != 0) {
449 critical_enter();
450 if (curthread == PCPU_GET(fpcurthread)) {
451 vfp_save_state(curthread, pcb);
452 }
453 PCPU_SET(fpcurthread, NULL);
454
455 vfp_enable();
456 pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
457 PCB_FP_STARTED;
458 return;
459 }
460
461 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
462 ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
463 return;
464 }
465 /*
466 * Check either we are already using the VFP in the kernel, or
467 * the the saved state points to the default user space.
468 */
469 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
470 pcb->pcb_vfpsaved == &pcb->pcb_vfpstate,
471 ("Mangled pcb_vfpsaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_vfpsaved,
472 &pcb->pcb_vfpstate));
473 ctx->flags = FPU_KERN_CTX_INUSE;
474 vfp_save_state(curthread, pcb);
475 ctx->prev = pcb->pcb_vfpsaved;
476 pcb->pcb_vfpsaved = &ctx->state;
477 pcb->pcb_fpflags |= PCB_FP_KERN;
478 pcb->pcb_fpflags &= ~PCB_FP_STARTED;
479
480 return;
481 }
482
483 int
fpu_kern_leave(struct thread * td,struct fpu_kern_ctx * ctx)484 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
485 {
486 struct pcb *pcb;
487
488 pcb = td->td_pcb;
489
490 if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
491 KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
492 KASSERT(PCPU_GET(fpcurthread) == NULL,
493 ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
494 CRITICAL_ASSERT(td);
495
496 vfp_disable();
497 pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
498 critical_exit();
499 } else {
500 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
501 ("FPU context not inuse"));
502 ctx->flags &= ~FPU_KERN_CTX_INUSE;
503
504 if (is_fpu_kern_thread(0) &&
505 (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
506 return (0);
507 KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
508 critical_enter();
509 vfp_discard(td);
510 critical_exit();
511 pcb->pcb_fpflags &= ~PCB_FP_STARTED;
512 pcb->pcb_vfpsaved = ctx->prev;
513 }
514
515 if (pcb->pcb_vfpsaved == &pcb->pcb_vfpstate) {
516 pcb->pcb_fpflags &= ~PCB_FP_KERN;
517 } else {
518 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
519 ("unpaired fpu_kern_leave"));
520 }
521
522 return (0);
523 }
524
525 int
fpu_kern_thread(u_int flags __unused)526 fpu_kern_thread(u_int flags __unused)
527 {
528 struct pcb *pcb = curthread->td_pcb;
529
530 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
531 ("Only kthread may use fpu_kern_thread"));
532 KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate,
533 ("Mangled pcb_vfpsaved"));
534 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
535 ("Thread already setup for the VFP"));
536 pcb->pcb_fpflags |= PCB_FP_KERN;
537 return (0);
538 }
539
540 int
is_fpu_kern_thread(u_int flags __unused)541 is_fpu_kern_thread(u_int flags __unused)
542 {
543 struct pcb *curpcb;
544
545 if ((curthread->td_pflags & TDP_KTHREAD) == 0)
546 return (0);
547 curpcb = curthread->td_pcb;
548 return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
549 }
550