1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2018, Joyent, Inc. 14 * Copyright 2022 Oxide Computer Company 15 */ 16 17 /* 18 * This implements the hypervisor multiplexor FPU API. Its purpose is to make it 19 * easy to switch between the host and guest hypervisor while hiding all the 20 * details about CR0.TS and how to save the host's state as required. 21 */ 22 23 #include <sys/pcb.h> 24 #include <sys/kmem.h> 25 #include <sys/debug.h> 26 #include <sys/cmn_err.h> 27 #include <sys/ddi.h> 28 #include <sys/sunddi.h> 29 #include <sys/hma.h> 30 #include <sys/x86_archext.h> 31 #include <sys/archsystm.h> 32 #include <sys/controlregs.h> 33 #include <sys/sysmacros.h> 34 #include <sys/stdbool.h> 35 #include <sys/ontrap.h> 36 #include <sys/cpuvar.h> 37 #include <sys/disp.h> 38 39 struct hma_fpu { 40 fpu_ctx_t hf_guest_fpu; 41 kthread_t *hf_curthread; 42 boolean_t hf_inguest; 43 }; 44 45 int 46 hma_fpu_init(hma_fpu_t *fpu) 47 { 48 struct xsave_state *xs; 49 50 ASSERT0(fpu->hf_inguest); 51 52 switch (fp_save_mech) { 53 case FP_FXSAVE: 54 bcopy(&sse_initial, fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx, 55 sizeof (struct fxsave_state)); 56 fpu->hf_guest_fpu.fpu_xsave_mask = 0; 57 break; 58 case FP_XSAVE: 59 /* 60 * Zero everything in the xsave case as we may have data in 61 * the structure that's not part of the initial value (which 62 * only really deals with a small portion of the xsave state). 63 */ 64 xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; 65 bzero(xs, cpuid_get_xsave_size()); 66 bcopy(&avx_initial, xs, sizeof (*xs)); 67 xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; 68 fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL; 69 break; 70 default: 71 panic("Invalid fp_save_mech"); 72 } 73 74 fpu->hf_guest_fpu.fpu_flags = FPU_EN | FPU_VALID; 75 76 return (0); 77 } 78 79 void 80 hma_fpu_free(hma_fpu_t *fpu) 81 { 82 if (fpu == NULL) 83 return; 84 85 ASSERT3P(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, !=, NULL); 86 kmem_cache_free(fpsave_cachep, 87 fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic); 88 kmem_free(fpu, sizeof (*fpu)); 89 } 90 91 hma_fpu_t * 92 hma_fpu_alloc(int kmflag) 93 { 94 hma_fpu_t *fpu; 95 96 fpu = kmem_zalloc(sizeof (hma_fpu_t), kmflag); 97 if (fpu == NULL) 98 return (NULL); 99 100 fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic = 101 kmem_cache_alloc(fpsave_cachep, kmflag); 102 if (fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic == NULL) { 103 kmem_free(fpu, sizeof (hma_fpu_t)); 104 return (NULL); 105 } 106 fpu->hf_inguest = B_FALSE; 107 108 /* 109 * Make sure the entire structure is zero. 110 */ 111 switch (fp_save_mech) { 112 case FP_FXSAVE: 113 bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, 114 sizeof (struct fxsave_state)); 115 break; 116 case FP_XSAVE: 117 bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, 118 cpuid_get_xsave_size()); 119 break; 120 default: 121 panic("Invalid fp_save_mech"); 122 } 123 124 return (fpu); 125 } 126 127 void 128 hma_fpu_start_guest(hma_fpu_t *fpu) 129 { 130 /* 131 * Note, we don't check / assert whether or not t_prempt is true because 132 * there are contexts where this is safe to call (from a context op) 133 * where t_preempt may not be set. 134 */ 135 ASSERT3S(fpu->hf_inguest, ==, B_FALSE); 136 ASSERT3P(fpu->hf_curthread, ==, NULL); 137 ASSERT3P(curthread->t_lwp, !=, NULL); 138 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); 139 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, !=, 0); 140 141 fpu->hf_inguest = B_TRUE; 142 fpu->hf_curthread = curthread; 143 144 145 fp_save(&curthread->t_lwp->lwp_pcb.pcb_fpu); 146 fp_restore(&fpu->hf_guest_fpu); 147 fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID; 148 } 149 150 /* 151 * Since fp_save() assumes a thread-centric view of the FPU usage -- it will 152 * assert if attempting to save elsewhere than the thread PCB, and will elide 153 * action if the FPU is not enabled -- we cannot use it for the manual saving of 154 * FPU contents. To work around that, we call the save mechanism directly. 155 */ 156 static void 157 do_fp_save(fpu_ctx_t *fpu) 158 { 159 /* 160 * For our manual saving, we expect that the thread PCB never be the 161 * landing zone for the data. 162 */ 163 ASSERT(curthread->t_lwp == NULL || 164 fpu != &curthread->t_lwp->lwp_pcb.pcb_fpu); 165 166 switch (fp_save_mech) { 167 case FP_FXSAVE: 168 fpxsave(fpu->fpu_regs.kfpu_u.kfpu_fx); 169 break; 170 case FP_XSAVE: 171 xsavep(fpu->fpu_regs.kfpu_u.kfpu_xs, fpu->fpu_xsave_mask); 172 break; 173 default: 174 panic("Invalid fp_save_mech"); 175 } 176 fpu->fpu_flags |= FPU_VALID; 177 } 178 179 180 void 181 hma_fpu_stop_guest(hma_fpu_t *fpu) 182 { 183 ASSERT3S(fpu->hf_inguest, ==, B_TRUE); 184 ASSERT3P(fpu->hf_curthread, ==, curthread); 185 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); 186 ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0); 187 188 do_fp_save(&fpu->hf_guest_fpu); 189 190 fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu); 191 192 fpu->hf_inguest = B_FALSE; 193 fpu->hf_curthread = NULL; 194 } 195 196 /* 197 * Will output up to `ndesc` records into `descp`. The required size for an 198 * XSAVE area containing all of the data fields supported by the host will be 199 * placed in `req_sizep` (if non-NULL). Returns the number of feature bits 200 * supported by the host. 201 */ 202 uint_t 203 hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *descp, uint_t ndesc, 204 size_t *req_sizep) 205 { 206 uint64_t features; 207 208 switch (fp_save_mech) { 209 case FP_FXSAVE: 210 /* 211 * Even without xsave support, the FPU will have legacy x87 212 * float and SSE state contained within. 213 */ 214 features = XFEATURE_LEGACY_FP | XFEATURE_SSE; 215 break; 216 case FP_XSAVE: 217 features = get_xcr(XFEATURE_ENABLED_MASK); 218 break; 219 default: 220 panic("Invalid fp_save_mech"); 221 } 222 223 uint_t count, pos; 224 uint_t max_size = MIN_XSAVE_SIZE; 225 for (count = 0, pos = 0; pos <= 63; pos++) { 226 const uint64_t bit = (1 << pos); 227 uint32_t size, off; 228 229 if ((features & bit) == 0) { 230 continue; 231 } 232 233 if (bit == XFEATURE_LEGACY_FP || bit == XFEATURE_SSE) { 234 size = sizeof (struct fxsave_state); 235 off = 0; 236 } else { 237 /* 238 * Size and position of data types within the XSAVE area 239 * is described in leaf 0xD in the subfunction 240 * corresponding to the bit position (for pos > 1). 241 */ 242 struct cpuid_regs regs = { 243 .cp_eax = 0xD, 244 .cp_ecx = pos, 245 }; 246 247 ASSERT3U(pos, >, 1); 248 249 (void) __cpuid_insn(®s); 250 size = regs.cp_eax; 251 off = regs.cp_ebx; 252 } 253 max_size = MAX(max_size, off + size); 254 255 if (count < ndesc) { 256 hma_xsave_state_desc_t *desc = &descp[count]; 257 258 desc->hxsd_bit = bit; 259 desc->hxsd_size = size; 260 desc->hxsd_off = off; 261 } 262 count++; 263 } 264 if (req_sizep != NULL) { 265 *req_sizep = max_size; 266 } 267 return (count); 268 } 269 270 hma_fpu_xsave_result_t 271 hma_fpu_get_xsave_state(const hma_fpu_t *fpu, void *buf, size_t len) 272 { 273 ASSERT(!fpu->hf_inguest); 274 275 size_t valid_len; 276 switch (fp_save_mech) { 277 case FP_FXSAVE: { 278 if (len < MIN_XSAVE_SIZE) { 279 return (HFXR_NO_SPACE); 280 } 281 bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf, 282 sizeof (struct fxsave_state)); 283 284 struct xsave_header hdr = { 285 .xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE, 286 }; 287 bcopy(&hdr, buf + sizeof (struct fxsave_state), sizeof (hdr)); 288 289 break; 290 } 291 case FP_XSAVE: 292 (void) hma_fpu_describe_xsave_state(NULL, 0, &valid_len); 293 if (len < valid_len) { 294 return (HFXR_NO_SPACE); 295 } 296 bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf, 297 valid_len); 298 break; 299 default: 300 panic("Invalid fp_save_mech"); 301 } 302 303 return (HFXR_OK); 304 } 305 306 hma_fpu_xsave_result_t 307 hma_fpu_set_xsave_state(hma_fpu_t *fpu, void *buf, size_t len) 308 { 309 ASSERT(!fpu->hf_inguest); 310 311 if (len < MIN_XSAVE_SIZE) { 312 return (HFXR_NO_SPACE); 313 } 314 /* 64-byte alignment is demanded of the FPU-related operations */ 315 if (((uintptr_t)buf & 63) != 0) { 316 return (HFXR_BAD_ALIGN); 317 } 318 319 struct xsave_header *hdr = buf + sizeof (struct fxsave_state); 320 if (hdr->xsh_xcomp_bv != 0) { 321 /* XSAVEC formatting not supported at this time */ 322 return (HFXR_UNSUP_FMT); 323 } 324 325 uint64_t allowed_bits; 326 size_t save_area_size; 327 switch (fp_save_mech) { 328 case FP_FXSAVE: 329 allowed_bits = XFEATURE_LEGACY_FP | XFEATURE_SSE; 330 save_area_size = sizeof (struct fxsave_state); 331 break; 332 case FP_XSAVE: 333 allowed_bits = get_xcr(XFEATURE_ENABLED_MASK); 334 save_area_size = cpuid_get_xsave_size(); 335 break; 336 default: 337 panic("Invalid fp_save_mech"); 338 } 339 if ((hdr->xsh_xstate_bv & ~(allowed_bits)) != 0) { 340 return (HFXR_UNSUP_FEAT); 341 } 342 343 /* 344 * We validate the incoming state with the FPU itself prior to saving it 345 * into the guest FPU context area. In order to preserve any state 346 * currently housed in the FPU, we save it to a temporarily allocated 347 * FPU context. It is important to note that we are not following the 348 * normal rules around state management detailed in uts/intel/os/fpu.c. 349 * This saving is unconditional, uncaring about the state in the FPU or 350 * the value of CR0_TS, simplifying our process before returning to the 351 * caller (without needing to chcek of an lwp, etc). To prevent 352 * interrupting threads from encountering this unusual FPU state, we 353 * keep interrupts disabled for the duration. 354 */ 355 fpu_ctx_t temp_ctx = { 356 .fpu_xsave_mask = XFEATURE_FP_ALL, 357 }; 358 temp_ctx.fpu_regs.kfpu_u.kfpu_generic = 359 kmem_cache_alloc(fpsave_cachep, KM_SLEEP); 360 bzero(temp_ctx.fpu_regs.kfpu_u.kfpu_generic, save_area_size); 361 362 ulong_t iflag; 363 iflag = intr_clear(); 364 bool disable_when_done = (getcr0() & CR0_TS) != 0; 365 do_fp_save(&temp_ctx); 366 367 /* 368 * If the provided data is invalid, it will cause a #GP when we attempt 369 * to load it into the FPU, so protect against that with on_trap(). 370 * Should the data load successfully, we can then be confident that its 371 * later use in via hma_fpu_start_guest() will be safe. 372 */ 373 on_trap_data_t otd; 374 volatile hma_fpu_xsave_result_t res = HFXR_OK; 375 if (on_trap(&otd, OT_DATA_EC) != 0) { 376 res = HFXR_INVALID_DATA; 377 goto done; 378 } 379 380 switch (fp_save_mech) { 381 case FP_FXSAVE: 382 if (hdr->xsh_xstate_bv == 0) { 383 /* 384 * An empty xstate_bv means we can simply load the 385 * legacy FP/SSE area with their initial state. 386 */ 387 bcopy(&sse_initial, 388 fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx, 389 sizeof (sse_initial)); 390 } else { 391 fpxrestore(buf); 392 fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx); 393 } 394 break; 395 case FP_XSAVE: 396 xrestore(buf, XFEATURE_FP_ALL); 397 xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs, 398 fpu->hf_guest_fpu.fpu_xsave_mask); 399 break; 400 default: 401 panic("Invalid fp_save_mech"); 402 } 403 404 done: 405 no_trap(); 406 fp_restore(&temp_ctx); 407 if (disable_when_done) { 408 fpdisable(); 409 } 410 intr_restore(iflag); 411 kmem_cache_free(fpsave_cachep, temp_ctx.fpu_regs.kfpu_u.kfpu_generic); 412 413 return (res); 414 } 415 416 void 417 hma_fpu_get_fxsave_state(const hma_fpu_t *fpu, struct fxsave_state *fx) 418 { 419 const struct fxsave_state *guest; 420 421 ASSERT3S(fpu->hf_inguest, ==, B_FALSE); 422 423 guest = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx; 424 bcopy(guest, fx, sizeof (*fx)); 425 } 426 427 int 428 hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx) 429 { 430 struct fxsave_state *gfx; 431 struct xsave_state *gxs; 432 433 ASSERT3S(fpu->hf_inguest, ==, B_FALSE); 434 435 /* 436 * If reserved bits are set in fx_mxcsr, then we will take a #GP when 437 * we restore them. Reject this outright. 438 * 439 * We do not need to check if we are dealing with state that has pending 440 * exceptions. This was only the case with the original FPU save and 441 * restore mechanisms (fsave/frstor). When using fxsave/fxrstor and 442 * xsave/xrstor they will be deferred to the user using the FPU, which 443 * is what we'd want here (they'd be used in guest context). 444 */ 445 if ((fx->fx_mxcsr & ~sse_mxcsr_mask) != 0) 446 return (EINVAL); 447 448 switch (fp_save_mech) { 449 case FP_FXSAVE: 450 gfx = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx; 451 bcopy(fx, gfx, sizeof (*fx)); 452 break; 453 case FP_XSAVE: 454 gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; 455 bzero(gxs, cpuid_get_xsave_size()); 456 bcopy(fx, &gxs->xs_fxsave, sizeof (*fx)); 457 gxs->xs_header.xsh_xstate_bv = 458 XFEATURE_LEGACY_FP | XFEATURE_SSE; 459 break; 460 default: 461 panic("Invalid fp_save_mech"); 462 } 463 464 return (0); 465 } 466