1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2018, Joyent, Inc.
14 * Copyright 2022 Oxide Computer Company
15 */
16
17 /*
18 * This implements the hypervisor multiplexor FPU API. Its purpose is to make it
19 * easy to switch between the host and guest hypervisor while hiding all the
20 * details about CR0.TS and how to save the host's state as required.
21 */
22
23 #include <sys/pcb.h>
24 #include <sys/kmem.h>
25 #include <sys/debug.h>
26 #include <sys/cmn_err.h>
27 #include <sys/ddi.h>
28 #include <sys/sunddi.h>
29 #include <sys/hma.h>
30 #include <sys/x86_archext.h>
31 #include <sys/archsystm.h>
32 #include <sys/controlregs.h>
33 #include <sys/sysmacros.h>
34 #include <sys/stdbool.h>
35 #include <sys/ontrap.h>
36 #include <sys/cpuvar.h>
37 #include <sys/disp.h>
38
39 struct hma_fpu {
40 fpu_ctx_t hf_guest_fpu;
41 kthread_t *hf_curthread;
42 boolean_t hf_inguest;
43 };
44
45 int
hma_fpu_init(hma_fpu_t * fpu)46 hma_fpu_init(hma_fpu_t *fpu)
47 {
48 struct xsave_state *xs;
49
50 ASSERT0(fpu->hf_inguest);
51
52 switch (fp_save_mech) {
53 case FP_FXSAVE:
54 bcopy(&sse_initial, fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx,
55 sizeof (struct fxsave_state));
56 fpu->hf_guest_fpu.fpu_xsave_mask = 0;
57 break;
58 case FP_XSAVE:
59 /*
60 * Zero everything in the xsave case as we may have data in
61 * the structure that's not part of the initial value (which
62 * only really deals with a small portion of the xsave state).
63 */
64 xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs;
65 bzero(xs, cpuid_get_xsave_size());
66 bcopy(&avx_initial, xs, sizeof (*xs));
67 xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
68 fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL;
69 break;
70 default:
71 panic("Invalid fp_save_mech");
72 }
73
74 fpu->hf_guest_fpu.fpu_flags = FPU_EN | FPU_VALID;
75
76 return (0);
77 }
78
79 void
hma_fpu_free(hma_fpu_t * fpu)80 hma_fpu_free(hma_fpu_t *fpu)
81 {
82 if (fpu == NULL)
83 return;
84
85 ASSERT3P(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, !=, NULL);
86 kmem_cache_free(fpsave_cachep,
87 fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic);
88 kmem_free(fpu, sizeof (*fpu));
89 }
90
91 hma_fpu_t *
hma_fpu_alloc(int kmflag)92 hma_fpu_alloc(int kmflag)
93 {
94 hma_fpu_t *fpu;
95
96 fpu = kmem_zalloc(sizeof (hma_fpu_t), kmflag);
97 if (fpu == NULL)
98 return (NULL);
99
100 fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic =
101 kmem_cache_alloc(fpsave_cachep, kmflag);
102 if (fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic == NULL) {
103 kmem_free(fpu, sizeof (hma_fpu_t));
104 return (NULL);
105 }
106 fpu->hf_inguest = B_FALSE;
107
108 /*
109 * Make sure the entire structure is zero.
110 */
111 switch (fp_save_mech) {
112 case FP_FXSAVE:
113 bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic,
114 sizeof (struct fxsave_state));
115 break;
116 case FP_XSAVE:
117 bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic,
118 cpuid_get_xsave_size());
119 break;
120 default:
121 panic("Invalid fp_save_mech");
122 }
123
124 return (fpu);
125 }
126
127 void
hma_fpu_start_guest(hma_fpu_t * fpu)128 hma_fpu_start_guest(hma_fpu_t *fpu)
129 {
130 /*
131 * Note, we don't check / assert whether or not t_prempt is true because
132 * there are contexts where this is safe to call (from a context op)
133 * where t_preempt may not be set.
134 */
135 ASSERT3S(fpu->hf_inguest, ==, B_FALSE);
136 ASSERT3P(fpu->hf_curthread, ==, NULL);
137 ASSERT3P(curthread->t_lwp, !=, NULL);
138 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0);
139 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, !=, 0);
140
141 fpu->hf_inguest = B_TRUE;
142 fpu->hf_curthread = curthread;
143
144
145 fp_save(&curthread->t_lwp->lwp_pcb.pcb_fpu);
146 fp_restore(&fpu->hf_guest_fpu);
147 fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID;
148 }
149
150 /*
151 * Since fp_save() assumes a thread-centric view of the FPU usage -- it will
152 * assert if attempting to save elsewhere than the thread PCB, and will elide
153 * action if the FPU is not enabled -- we cannot use it for the manual saving of
154 * FPU contents. To work around that, we call the save mechanism directly.
155 */
156 static void
do_fp_save(fpu_ctx_t * fpu)157 do_fp_save(fpu_ctx_t *fpu)
158 {
159 /*
160 * For our manual saving, we expect that the thread PCB never be the
161 * landing zone for the data.
162 */
163 ASSERT(curthread->t_lwp == NULL ||
164 fpu != &curthread->t_lwp->lwp_pcb.pcb_fpu);
165
166 switch (fp_save_mech) {
167 case FP_FXSAVE:
168 fpxsave(fpu->fpu_regs.kfpu_u.kfpu_fx);
169 break;
170 case FP_XSAVE:
171 xsavep(fpu->fpu_regs.kfpu_u.kfpu_xs, fpu->fpu_xsave_mask);
172 break;
173 default:
174 panic("Invalid fp_save_mech");
175 }
176 fpu->fpu_flags |= FPU_VALID;
177 }
178
179
180 void
hma_fpu_stop_guest(hma_fpu_t * fpu)181 hma_fpu_stop_guest(hma_fpu_t *fpu)
182 {
183 ASSERT3S(fpu->hf_inguest, ==, B_TRUE);
184 ASSERT3P(fpu->hf_curthread, ==, curthread);
185 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0);
186 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0);
187
188 do_fp_save(&fpu->hf_guest_fpu);
189
190 fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu);
191
192 fpu->hf_inguest = B_FALSE;
193 fpu->hf_curthread = NULL;
194 }
195
196 /*
197 * Will output up to `ndesc` records into `descp`. The required size for an
198 * XSAVE area containing all of the data fields supported by the host will be
199 * placed in `req_sizep` (if non-NULL). Returns the number of feature bits
200 * supported by the host.
201 */
202 uint_t
hma_fpu_describe_xsave_state(hma_xsave_state_desc_t * descp,uint_t ndesc,size_t * req_sizep)203 hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *descp, uint_t ndesc,
204 size_t *req_sizep)
205 {
206 uint64_t features;
207
208 switch (fp_save_mech) {
209 case FP_FXSAVE:
210 /*
211 * Even without xsave support, the FPU will have legacy x87
212 * float and SSE state contained within.
213 */
214 features = XFEATURE_LEGACY_FP | XFEATURE_SSE;
215 break;
216 case FP_XSAVE:
217 features = get_xcr(XFEATURE_ENABLED_MASK);
218 break;
219 default:
220 panic("Invalid fp_save_mech");
221 }
222
223 uint_t count, pos;
224 uint_t max_size = MIN_XSAVE_SIZE;
225 for (count = 0, pos = 0; pos <= 63; pos++) {
226 const uint64_t bit = (1 << pos);
227 uint32_t size, off;
228
229 if ((features & bit) == 0) {
230 continue;
231 }
232
233 if (bit == XFEATURE_LEGACY_FP || bit == XFEATURE_SSE) {
234 size = sizeof (struct fxsave_state);
235 off = 0;
236 } else {
237 /*
238 * Size and position of data types within the XSAVE area
239 * is described in leaf 0xD in the subfunction
240 * corresponding to the bit position (for pos > 1).
241 */
242 struct cpuid_regs regs = {
243 .cp_eax = 0xD,
244 .cp_ecx = pos,
245 };
246
247 ASSERT3U(pos, >, 1);
248
249 (void) __cpuid_insn(®s);
250 size = regs.cp_eax;
251 off = regs.cp_ebx;
252 }
253 max_size = MAX(max_size, off + size);
254
255 if (count < ndesc) {
256 hma_xsave_state_desc_t *desc = &descp[count];
257
258 desc->hxsd_bit = bit;
259 desc->hxsd_size = size;
260 desc->hxsd_off = off;
261 }
262 count++;
263 }
264 if (req_sizep != NULL) {
265 *req_sizep = max_size;
266 }
267 return (count);
268 }
269
270 hma_fpu_xsave_result_t
hma_fpu_get_xsave_state(const hma_fpu_t * fpu,void * buf,size_t len)271 hma_fpu_get_xsave_state(const hma_fpu_t *fpu, void *buf, size_t len)
272 {
273 ASSERT(!fpu->hf_inguest);
274
275 size_t valid_len;
276 switch (fp_save_mech) {
277 case FP_FXSAVE: {
278 if (len < MIN_XSAVE_SIZE) {
279 return (HFXR_NO_SPACE);
280 }
281 bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf,
282 sizeof (struct fxsave_state));
283
284 struct xsave_header hdr = {
285 .xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE,
286 };
287 bcopy(&hdr, buf + sizeof (struct fxsave_state), sizeof (hdr));
288
289 break;
290 }
291 case FP_XSAVE:
292 (void) hma_fpu_describe_xsave_state(NULL, 0, &valid_len);
293 if (len < valid_len) {
294 return (HFXR_NO_SPACE);
295 }
296 bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf,
297 valid_len);
298 break;
299 default:
300 panic("Invalid fp_save_mech");
301 }
302
303 return (HFXR_OK);
304 }
305
306 hma_fpu_xsave_result_t
hma_fpu_set_xsave_state(hma_fpu_t * fpu,void * buf,size_t len)307 hma_fpu_set_xsave_state(hma_fpu_t *fpu, void *buf, size_t len)
308 {
309 ASSERT(!fpu->hf_inguest);
310
311 if (len < MIN_XSAVE_SIZE) {
312 return (HFXR_NO_SPACE);
313 }
314 /* 64-byte alignment is demanded of the FPU-related operations */
315 if (((uintptr_t)buf & 63) != 0) {
316 return (HFXR_BAD_ALIGN);
317 }
318
319 struct xsave_header *hdr = buf + sizeof (struct fxsave_state);
320 if (hdr->xsh_xcomp_bv != 0) {
321 /* XSAVEC formatting not supported at this time */
322 return (HFXR_UNSUP_FMT);
323 }
324
325 uint64_t allowed_bits;
326 size_t save_area_size;
327 switch (fp_save_mech) {
328 case FP_FXSAVE:
329 allowed_bits = XFEATURE_LEGACY_FP | XFEATURE_SSE;
330 save_area_size = sizeof (struct fxsave_state);
331 break;
332 case FP_XSAVE:
333 allowed_bits = get_xcr(XFEATURE_ENABLED_MASK);
334 save_area_size = cpuid_get_xsave_size();
335 break;
336 default:
337 panic("Invalid fp_save_mech");
338 }
339 if ((hdr->xsh_xstate_bv & ~(allowed_bits)) != 0) {
340 return (HFXR_UNSUP_FEAT);
341 }
342
343 /*
344 * We validate the incoming state with the FPU itself prior to saving it
345 * into the guest FPU context area. In order to preserve any state
346 * currently housed in the FPU, we save it to a temporarily allocated
347 * FPU context. It is important to note that we are not following the
348 * normal rules around state management detailed in uts/intel/os/fpu.c.
349 * This saving is unconditional, uncaring about the state in the FPU or
350 * the value of CR0_TS, simplifying our process before returning to the
351 * caller (without needing to chcek of an lwp, etc). To prevent
352 * interrupting threads from encountering this unusual FPU state, we
353 * keep interrupts disabled for the duration.
354 */
355 fpu_ctx_t temp_ctx = {
356 .fpu_xsave_mask = XFEATURE_FP_ALL,
357 };
358 temp_ctx.fpu_regs.kfpu_u.kfpu_generic =
359 kmem_cache_alloc(fpsave_cachep, KM_SLEEP);
360 bzero(temp_ctx.fpu_regs.kfpu_u.kfpu_generic, save_area_size);
361
362 ulong_t iflag;
363 iflag = intr_clear();
364 bool disable_when_done = (getcr0() & CR0_TS) != 0;
365 do_fp_save(&temp_ctx);
366
367 /*
368 * If the provided data is invalid, it will cause a #GP when we attempt
369 * to load it into the FPU, so protect against that with on_trap().
370 * Should the data load successfully, we can then be confident that its
371 * later use in via hma_fpu_start_guest() will be safe.
372 */
373 on_trap_data_t otd;
374 volatile hma_fpu_xsave_result_t res = HFXR_OK;
375 if (on_trap(&otd, OT_DATA_EC) != 0) {
376 res = HFXR_INVALID_DATA;
377 goto done;
378 }
379
380 switch (fp_save_mech) {
381 case FP_FXSAVE:
382 if (hdr->xsh_xstate_bv == 0) {
383 /*
384 * An empty xstate_bv means we can simply load the
385 * legacy FP/SSE area with their initial state.
386 */
387 bcopy(&sse_initial,
388 fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx,
389 sizeof (sse_initial));
390 } else {
391 fpxrestore(buf);
392 fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx);
393 }
394 break;
395 case FP_XSAVE:
396 xrestore(buf, XFEATURE_FP_ALL);
397 xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs,
398 fpu->hf_guest_fpu.fpu_xsave_mask);
399 break;
400 default:
401 panic("Invalid fp_save_mech");
402 }
403
404 done:
405 no_trap();
406 fp_restore(&temp_ctx);
407 if (disable_when_done) {
408 fpdisable();
409 }
410 intr_restore(iflag);
411 kmem_cache_free(fpsave_cachep, temp_ctx.fpu_regs.kfpu_u.kfpu_generic);
412
413 return (res);
414 }
415
416 void
hma_fpu_get_fxsave_state(const hma_fpu_t * fpu,struct fxsave_state * fx)417 hma_fpu_get_fxsave_state(const hma_fpu_t *fpu, struct fxsave_state *fx)
418 {
419 const struct fxsave_state *guest;
420
421 ASSERT3S(fpu->hf_inguest, ==, B_FALSE);
422
423 guest = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx;
424 bcopy(guest, fx, sizeof (*fx));
425 }
426
427 int
hma_fpu_set_fxsave_state(hma_fpu_t * fpu,const struct fxsave_state * fx)428 hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx)
429 {
430 struct fxsave_state *gfx;
431 struct xsave_state *gxs;
432
433 ASSERT3S(fpu->hf_inguest, ==, B_FALSE);
434
435 /*
436 * If reserved bits are set in fx_mxcsr, then we will take a #GP when
437 * we restore them. Reject this outright.
438 *
439 * We do not need to check if we are dealing with state that has pending
440 * exceptions. This was only the case with the original FPU save and
441 * restore mechanisms (fsave/frstor). When using fxsave/fxrstor and
442 * xsave/xrstor they will be deferred to the user using the FPU, which
443 * is what we'd want here (they'd be used in guest context).
444 */
445 if ((fx->fx_mxcsr & ~sse_mxcsr_mask) != 0)
446 return (EINVAL);
447
448 switch (fp_save_mech) {
449 case FP_FXSAVE:
450 gfx = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx;
451 bcopy(fx, gfx, sizeof (*fx));
452 break;
453 case FP_XSAVE:
454 gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs;
455 bzero(gxs, cpuid_get_xsave_size());
456 bcopy(fx, &gxs->xs_fxsave, sizeof (*fx));
457 gxs->xs_header.xsh_xstate_bv =
458 XFEATURE_LEGACY_FP | XFEATURE_SSE;
459 break;
460 default:
461 panic("Invalid fp_save_mech");
462 }
463
464 return (0);
465 }
466