1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2021 Intel Corporation. */ 3 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 4 5 #include <asm/sgx.h> 6 7 #include "cpuid.h" 8 #include "kvm_cache_regs.h" 9 #include "nested.h" 10 #include "sgx.h" 11 #include "vmx.h" 12 #include "x86.h" 13 14 bool __read_mostly enable_sgx = 1; 15 module_param_named(sgx, enable_sgx, bool, 0444); 16 17 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */ 18 static u64 sgx_pubkey_hash[4] __ro_after_init; 19 20 /* 21 * ENCLS's memory operands use a fixed segment (DS) and a fixed 22 * address size based on the mode. Related prefixes are ignored. 23 */ 24 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, 25 int size, int alignment, gva_t *gva) 26 { 27 struct kvm_segment s; 28 bool fault; 29 30 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */ 31 *gva = offset; 32 if (!is_64_bit_mode(vcpu)) { 33 vmx_get_segment(vcpu, &s, VCPU_SREG_DS); 34 *gva += s.base; 35 } 36 37 if (!IS_ALIGNED(*gva, alignment)) { 38 fault = true; 39 } else if (likely(is_64_bit_mode(vcpu))) { 40 fault = is_noncanonical_address(*gva, vcpu); 41 } else { 42 *gva &= 0xffffffff; 43 fault = (s.unusable) || 44 (s.type != 2 && s.type != 3) || 45 (*gva > s.limit) || 46 ((s.base != 0 || s.limit != 0xffffffff) && 47 (((u64)*gva + size - 1) > s.limit + 1)); 48 } 49 if (fault) 50 kvm_inject_gp(vcpu, 0); 51 return fault ? -EINVAL : 0; 52 } 53 54 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr, 55 unsigned int size) 56 { 57 uint64_t data[2] = { addr, size }; 58 59 __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data)); 60 } 61 62 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data, 63 unsigned int size) 64 { 65 if (__copy_from_user(data, (void __user *)hva, size)) { 66 sgx_handle_emulation_failure(vcpu, hva, size); 67 return -EFAULT; 68 } 69 70 return 0; 71 } 72 73 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write, 74 gpa_t *gpa) 75 { 76 struct x86_exception ex; 77 78 if (write) 79 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex); 80 else 81 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex); 82 83 if (*gpa == INVALID_GPA) { 84 kvm_inject_emulated_page_fault(vcpu, &ex); 85 return -EFAULT; 86 } 87 88 return 0; 89 } 90 91 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva) 92 { 93 *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa)); 94 if (kvm_is_error_hva(*hva)) { 95 sgx_handle_emulation_failure(vcpu, gpa, 1); 96 return -EFAULT; 97 } 98 99 *hva |= gpa & ~PAGE_MASK; 100 101 return 0; 102 } 103 104 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) 105 { 106 struct x86_exception ex; 107 108 /* 109 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check 110 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC, 111 * but the error code isn't (yet) plumbed through the ENCLS helpers. 112 */ 113 if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) { 114 kvm_prepare_emulation_failure_exit(vcpu); 115 return 0; 116 } 117 118 /* 119 * If the guest thinks it's running on SGX2 hardware, inject an SGX 120 * #PF if the fault matches an EPCM fault signature (#GP on SGX1, 121 * #PF on SGX2). The assumption is that EPCM faults are much more 122 * likely than a bad userspace address. 123 */ 124 if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) && 125 guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) { 126 memset(&ex, 0, sizeof(ex)); 127 ex.vector = PF_VECTOR; 128 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK | 129 PFERR_SGX_MASK; 130 ex.address = gva; 131 ex.error_code_valid = true; 132 ex.nested_page_fault = false; 133 kvm_inject_emulated_page_fault(vcpu, &ex); 134 } else { 135 kvm_inject_gp(vcpu, 0); 136 } 137 return 1; 138 } 139 140 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, 141 struct sgx_pageinfo *pageinfo, 142 unsigned long secs_hva, 143 gva_t secs_gva) 144 { 145 struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents; 146 struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; 147 u64 attributes, xfrm, size; 148 u32 miscselect; 149 u8 max_size_log2; 150 int trapnr, ret; 151 152 sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0); 153 sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1); 154 if (!sgx_12_0 || !sgx_12_1) { 155 kvm_prepare_emulation_failure_exit(vcpu); 156 return 0; 157 } 158 159 miscselect = contents->miscselect; 160 attributes = contents->attributes; 161 xfrm = contents->xfrm; 162 size = contents->size; 163 164 /* Enforce restriction of access to the PROVISIONKEY. */ 165 if (!vcpu->kvm->arch.sgx_provisioning_allowed && 166 (attributes & SGX_ATTR_PROVISIONKEY)) { 167 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) 168 pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n"); 169 kvm_inject_gp(vcpu, 0); 170 return 1; 171 } 172 173 /* 174 * Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. Note 175 * that the allowed XFRM (XFeature Request Mask) isn't strictly bound 176 * by the supported XCR0. FP+SSE *must* be set in XFRM, even if XSAVE 177 * is unsupported, i.e. even if XCR0 itself is completely unsupported. 178 */ 179 if ((u32)miscselect & ~sgx_12_0->ebx || 180 (u32)attributes & ~sgx_12_1->eax || 181 (u32)(attributes >> 32) & ~sgx_12_1->ebx || 182 (u32)xfrm & ~sgx_12_1->ecx || 183 (u32)(xfrm >> 32) & ~sgx_12_1->edx || 184 xfrm & ~(vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE) || 185 (xfrm & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { 186 kvm_inject_gp(vcpu, 0); 187 return 1; 188 } 189 190 /* Enforce CPUID restriction on max enclave size. */ 191 max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : 192 sgx_12_0->edx; 193 if (size >= BIT_ULL(max_size_log2)) { 194 kvm_inject_gp(vcpu, 0); 195 return 1; 196 } 197 198 /* 199 * sgx_virt_ecreate() returns: 200 * 1) 0: ECREATE was successful 201 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the 202 * exception number. 203 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never 204 * happen as KVM checks host addresses at memslot creation. 205 * sgx_virt_ecreate() has already warned in this case. 206 */ 207 ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr); 208 if (!ret) 209 return kvm_skip_emulated_instruction(vcpu); 210 if (ret == -EFAULT) 211 return sgx_inject_fault(vcpu, secs_gva, trapnr); 212 213 return ret; 214 } 215 216 static int handle_encls_ecreate(struct kvm_vcpu *vcpu) 217 { 218 gva_t pageinfo_gva, secs_gva; 219 gva_t metadata_gva, contents_gva; 220 gpa_t metadata_gpa, contents_gpa, secs_gpa; 221 unsigned long metadata_hva, contents_hva, secs_hva; 222 struct sgx_pageinfo pageinfo; 223 struct sgx_secs *contents; 224 struct x86_exception ex; 225 int r; 226 227 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || 228 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) 229 return 1; 230 231 /* 232 * Copy the PAGEINFO to local memory, its pointers need to be 233 * translated, i.e. we need to do a deep copy/translate. 234 */ 235 r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, 236 sizeof(pageinfo), &ex); 237 if (r == X86EMUL_PROPAGATE_FAULT) { 238 kvm_inject_emulated_page_fault(vcpu, &ex); 239 return 1; 240 } else if (r != X86EMUL_CONTINUE) { 241 sgx_handle_emulation_failure(vcpu, pageinfo_gva, 242 sizeof(pageinfo)); 243 return 0; 244 } 245 246 if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || 247 sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, 248 &contents_gva)) 249 return 1; 250 251 /* 252 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. 253 * Resume the guest on failure to inject a #PF. 254 */ 255 if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || 256 sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || 257 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) 258 return 1; 259 260 /* 261 * ...and then to HVA. The order of accesses isn't architectural, i.e. 262 * KVM doesn't have to fully process one address at a time. Exit to 263 * userspace if a GPA is invalid. 264 */ 265 if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || 266 sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || 267 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) 268 return 0; 269 270 /* 271 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the 272 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and 273 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to 274 * enforce restriction of access to the PROVISIONKEY. 275 */ 276 contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); 277 if (!contents) 278 return -ENOMEM; 279 280 /* Exit to userspace if copying from a host userspace address fails. */ 281 if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) { 282 free_page((unsigned long)contents); 283 return 0; 284 } 285 286 pageinfo.metadata = metadata_hva; 287 pageinfo.contents = (u64)contents; 288 289 r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva); 290 291 free_page((unsigned long)contents); 292 293 return r; 294 } 295 296 static int handle_encls_einit(struct kvm_vcpu *vcpu) 297 { 298 unsigned long sig_hva, secs_hva, token_hva, rflags; 299 struct vcpu_vmx *vmx = to_vmx(vcpu); 300 gva_t sig_gva, secs_gva, token_gva; 301 gpa_t sig_gpa, secs_gpa, token_gpa; 302 int ret, trapnr; 303 304 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) || 305 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) || 306 sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva)) 307 return 1; 308 309 /* 310 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA. 311 * Resume the guest on failure to inject a #PF. 312 */ 313 if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) || 314 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) || 315 sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa)) 316 return 1; 317 318 /* 319 * ...and then to HVA. The order of accesses isn't architectural, i.e. 320 * KVM doesn't have to fully process one address at a time. Exit to 321 * userspace if a GPA is invalid. Note, all structures are aligned and 322 * cannot split pages. 323 */ 324 if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) || 325 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) || 326 sgx_gpa_to_hva(vcpu, token_gpa, &token_hva)) 327 return 0; 328 329 ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva, 330 (void __user *)secs_hva, 331 vmx->msr_ia32_sgxlepubkeyhash, &trapnr); 332 333 if (ret == -EFAULT) 334 return sgx_inject_fault(vcpu, secs_gva, trapnr); 335 336 /* 337 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva, 338 * @token_hva or @secs_hva. This should never happen as KVM checks host 339 * addresses at memslot creation. sgx_virt_einit() has already warned 340 * in this case, so just return. 341 */ 342 if (ret < 0) 343 return ret; 344 345 rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | 346 X86_EFLAGS_AF | X86_EFLAGS_SF | 347 X86_EFLAGS_OF); 348 if (ret) 349 rflags |= X86_EFLAGS_ZF; 350 else 351 rflags &= ~X86_EFLAGS_ZF; 352 vmx_set_rflags(vcpu, rflags); 353 354 kvm_rax_write(vcpu, ret); 355 return kvm_skip_emulated_instruction(vcpu); 356 } 357 358 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) 359 { 360 /* 361 * ENCLS generates a #UD if SGX1 isn't supported, i.e. this point will 362 * be reached if and only if the SGX1 leafs are enabled. 363 */ 364 if (leaf >= ECREATE && leaf <= ETRACK) 365 return true; 366 367 if (leaf >= EAUG && leaf <= EMODT) 368 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); 369 370 return false; 371 } 372 373 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu) 374 { 375 const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED; 376 377 return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits; 378 } 379 380 int handle_encls(struct kvm_vcpu *vcpu) 381 { 382 u32 leaf = (u32)kvm_rax_read(vcpu); 383 384 if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) || 385 !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { 386 kvm_queue_exception(vcpu, UD_VECTOR); 387 } else if (!encls_leaf_enabled_in_guest(vcpu, leaf) || 388 !sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) { 389 kvm_inject_gp(vcpu, 0); 390 } else { 391 if (leaf == ECREATE) 392 return handle_encls_ecreate(vcpu); 393 if (leaf == EINIT) 394 return handle_encls_einit(vcpu); 395 WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf); 396 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 397 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS; 398 return 0; 399 } 400 return 1; 401 } 402 403 void setup_default_sgx_lepubkeyhash(void) 404 { 405 /* 406 * Use Intel's default value for Skylake hardware if Launch Control is 407 * not supported, i.e. Intel's hash is hardcoded into silicon, or if 408 * Launch Control is supported and enabled, i.e. mimic the reset value 409 * and let the guest write the MSRs at will. If Launch Control is 410 * supported but disabled, then use the current MSR values as the hash 411 * MSRs exist but are read-only (locked and not writable). 412 */ 413 if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) || 414 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) { 415 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL; 416 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL; 417 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL; 418 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL; 419 } else { 420 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */ 421 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]); 422 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]); 423 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]); 424 } 425 } 426 427 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu) 428 { 429 struct vcpu_vmx *vmx = to_vmx(vcpu); 430 431 memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash, 432 sizeof(sgx_pubkey_hash)); 433 } 434 435 /* 436 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM 437 * restrictions if the guest's allowed-1 settings diverge from hardware. 438 */ 439 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu) 440 { 441 struct kvm_cpuid_entry2 *guest_cpuid; 442 u32 eax, ebx, ecx, edx; 443 444 if (!vcpu->kvm->arch.sgx_provisioning_allowed) 445 return true; 446 447 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0); 448 if (!guest_cpuid) 449 return true; 450 451 cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx); 452 if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx) 453 return true; 454 455 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1); 456 if (!guest_cpuid) 457 return true; 458 459 cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx); 460 if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx || 461 guest_cpuid->ecx != ecx || guest_cpuid->edx != edx) 462 return true; 463 464 return false; 465 } 466 467 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 468 { 469 /* 470 * There is no software enable bit for SGX that is virtualized by 471 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the 472 * guest (either by the host or by the guest's BIOS) but enabled in the 473 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate 474 * the expected system behavior for ENCLS. 475 */ 476 u64 bitmap = -1ull; 477 478 /* Nothing to do if hardware doesn't support SGX */ 479 if (!cpu_has_vmx_encls_vmexit()) 480 return; 481 482 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) && 483 sgx_enabled_in_guest_bios(vcpu)) { 484 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { 485 bitmap &= ~GENMASK_ULL(ETRACK, ECREATE); 486 if (sgx_intercept_encls_ecreate(vcpu)) 487 bitmap |= (1 << ECREATE); 488 } 489 490 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) 491 bitmap &= ~GENMASK_ULL(EMODT, EAUG); 492 493 /* 494 * Trap and execute EINIT if launch control is enabled in the 495 * host using the guest's values for launch control MSRs, even 496 * if the guest's values are fixed to hardware default values. 497 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing 498 * the MSRs is extraordinarily expensive. 499 */ 500 if (boot_cpu_has(X86_FEATURE_SGX_LC)) 501 bitmap |= (1 << EINIT); 502 503 if (!vmcs12 && is_guest_mode(vcpu)) 504 vmcs12 = get_vmcs12(vcpu); 505 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12)) 506 bitmap |= vmcs12->encls_exiting_bitmap; 507 } 508 vmcs_write64(ENCLS_EXITING_BITMAP, bitmap); 509 } 510