1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2021 Intel Corporation. */ 3 4 #include <asm/sgx.h> 5 6 #include "cpuid.h" 7 #include "kvm_cache_regs.h" 8 #include "nested.h" 9 #include "sgx.h" 10 #include "vmx.h" 11 #include "x86.h" 12 13 bool __read_mostly enable_sgx = 1; 14 module_param_named(sgx, enable_sgx, bool, 0444); 15 16 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */ 17 static u64 sgx_pubkey_hash[4] __ro_after_init; 18 19 /* 20 * ENCLS's memory operands use a fixed segment (DS) and a fixed 21 * address size based on the mode. Related prefixes are ignored. 22 */ 23 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, 24 int size, int alignment, gva_t *gva) 25 { 26 struct kvm_segment s; 27 bool fault; 28 29 /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */ 30 *gva = offset; 31 if (!is_long_mode(vcpu)) { 32 vmx_get_segment(vcpu, &s, VCPU_SREG_DS); 33 *gva += s.base; 34 } 35 36 if (!IS_ALIGNED(*gva, alignment)) { 37 fault = true; 38 } else if (likely(is_long_mode(vcpu))) { 39 fault = is_noncanonical_address(*gva, vcpu); 40 } else { 41 *gva &= 0xffffffff; 42 fault = (s.unusable) || 43 (s.type != 2 && s.type != 3) || 44 (*gva > s.limit) || 45 ((s.base != 0 || s.limit != 0xffffffff) && 46 (((u64)*gva + size - 1) > s.limit + 1)); 47 } 48 if (fault) 49 kvm_inject_gp(vcpu, 0); 50 return fault ? -EINVAL : 0; 51 } 52 53 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr, 54 unsigned int size) 55 { 56 uint64_t data[2] = { addr, size }; 57 58 __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data)); 59 } 60 61 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data, 62 unsigned int size) 63 { 64 if (__copy_from_user(data, (void __user *)hva, size)) { 65 sgx_handle_emulation_failure(vcpu, hva, size); 66 return -EFAULT; 67 } 68 69 return 0; 70 } 71 72 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write, 73 gpa_t *gpa) 74 { 75 struct x86_exception ex; 76 77 if (write) 78 *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex); 79 else 80 *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex); 81 82 if (*gpa == INVALID_GPA) { 83 kvm_inject_emulated_page_fault(vcpu, &ex); 84 return -EFAULT; 85 } 86 87 return 0; 88 } 89 90 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva) 91 { 92 *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa)); 93 if (kvm_is_error_hva(*hva)) { 94 sgx_handle_emulation_failure(vcpu, gpa, 1); 95 return -EFAULT; 96 } 97 98 *hva |= gpa & ~PAGE_MASK; 99 100 return 0; 101 } 102 103 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) 104 { 105 struct x86_exception ex; 106 107 /* 108 * A non-EPCM #PF indicates a bad userspace HVA. This *should* check 109 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC, 110 * but the error code isn't (yet) plumbed through the ENCLS helpers. 111 */ 112 if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) { 113 kvm_prepare_emulation_failure_exit(vcpu); 114 return 0; 115 } 116 117 /* 118 * If the guest thinks it's running on SGX2 hardware, inject an SGX 119 * #PF if the fault matches an EPCM fault signature (#GP on SGX1, 120 * #PF on SGX2). The assumption is that EPCM faults are much more 121 * likely than a bad userspace address. 122 */ 123 if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) && 124 guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) { 125 memset(&ex, 0, sizeof(ex)); 126 ex.vector = PF_VECTOR; 127 ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK | 128 PFERR_SGX_MASK; 129 ex.address = gva; 130 ex.error_code_valid = true; 131 ex.nested_page_fault = false; 132 kvm_inject_emulated_page_fault(vcpu, &ex); 133 } else { 134 kvm_inject_gp(vcpu, 0); 135 } 136 return 1; 137 } 138 139 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, 140 struct sgx_pageinfo *pageinfo, 141 unsigned long secs_hva, 142 gva_t secs_gva) 143 { 144 struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents; 145 struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; 146 u64 attributes, xfrm, size; 147 u32 miscselect; 148 u8 max_size_log2; 149 int trapnr, ret; 150 151 sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0); 152 sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1); 153 if (!sgx_12_0 || !sgx_12_1) { 154 kvm_prepare_emulation_failure_exit(vcpu); 155 return 0; 156 } 157 158 miscselect = contents->miscselect; 159 attributes = contents->attributes; 160 xfrm = contents->xfrm; 161 size = contents->size; 162 163 /* Enforce restriction of access to the PROVISIONKEY. */ 164 if (!vcpu->kvm->arch.sgx_provisioning_allowed && 165 (attributes & SGX_ATTR_PROVISIONKEY)) { 166 if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) 167 pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n"); 168 kvm_inject_gp(vcpu, 0); 169 return 1; 170 } 171 172 /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ 173 if ((u32)miscselect & ~sgx_12_0->ebx || 174 (u32)attributes & ~sgx_12_1->eax || 175 (u32)(attributes >> 32) & ~sgx_12_1->ebx || 176 (u32)xfrm & ~sgx_12_1->ecx || 177 (u32)(xfrm >> 32) & ~sgx_12_1->edx) { 178 kvm_inject_gp(vcpu, 0); 179 return 1; 180 } 181 182 /* Enforce CPUID restriction on max enclave size. */ 183 max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : 184 sgx_12_0->edx; 185 if (size >= BIT_ULL(max_size_log2)) { 186 kvm_inject_gp(vcpu, 0); 187 return 1; 188 } 189 190 /* 191 * sgx_virt_ecreate() returns: 192 * 1) 0: ECREATE was successful 193 * 2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the 194 * exception number. 195 * 3) -EINVAL: access_ok() on @secs_hva failed. This should never 196 * happen as KVM checks host addresses at memslot creation. 197 * sgx_virt_ecreate() has already warned in this case. 198 */ 199 ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr); 200 if (!ret) 201 return kvm_skip_emulated_instruction(vcpu); 202 if (ret == -EFAULT) 203 return sgx_inject_fault(vcpu, secs_gva, trapnr); 204 205 return ret; 206 } 207 208 static int handle_encls_ecreate(struct kvm_vcpu *vcpu) 209 { 210 gva_t pageinfo_gva, secs_gva; 211 gva_t metadata_gva, contents_gva; 212 gpa_t metadata_gpa, contents_gpa, secs_gpa; 213 unsigned long metadata_hva, contents_hva, secs_hva; 214 struct sgx_pageinfo pageinfo; 215 struct sgx_secs *contents; 216 struct x86_exception ex; 217 int r; 218 219 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || 220 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) 221 return 1; 222 223 /* 224 * Copy the PAGEINFO to local memory, its pointers need to be 225 * translated, i.e. we need to do a deep copy/translate. 226 */ 227 r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, 228 sizeof(pageinfo), &ex); 229 if (r == X86EMUL_PROPAGATE_FAULT) { 230 kvm_inject_emulated_page_fault(vcpu, &ex); 231 return 1; 232 } else if (r != X86EMUL_CONTINUE) { 233 sgx_handle_emulation_failure(vcpu, pageinfo_gva, 234 sizeof(pageinfo)); 235 return 0; 236 } 237 238 if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || 239 sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, 240 &contents_gva)) 241 return 1; 242 243 /* 244 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. 245 * Resume the guest on failure to inject a #PF. 246 */ 247 if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || 248 sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || 249 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) 250 return 1; 251 252 /* 253 * ...and then to HVA. The order of accesses isn't architectural, i.e. 254 * KVM doesn't have to fully process one address at a time. Exit to 255 * userspace if a GPA is invalid. 256 */ 257 if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || 258 sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || 259 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) 260 return 0; 261 262 /* 263 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the 264 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and 265 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to 266 * enforce restriction of access to the PROVISIONKEY. 267 */ 268 contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); 269 if (!contents) 270 return -ENOMEM; 271 272 /* Exit to userspace if copying from a host userspace address fails. */ 273 if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) { 274 free_page((unsigned long)contents); 275 return 0; 276 } 277 278 pageinfo.metadata = metadata_hva; 279 pageinfo.contents = (u64)contents; 280 281 r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva); 282 283 free_page((unsigned long)contents); 284 285 return r; 286 } 287 288 static int handle_encls_einit(struct kvm_vcpu *vcpu) 289 { 290 unsigned long sig_hva, secs_hva, token_hva, rflags; 291 struct vcpu_vmx *vmx = to_vmx(vcpu); 292 gva_t sig_gva, secs_gva, token_gva; 293 gpa_t sig_gpa, secs_gpa, token_gpa; 294 int ret, trapnr; 295 296 if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) || 297 sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) || 298 sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva)) 299 return 1; 300 301 /* 302 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA. 303 * Resume the guest on failure to inject a #PF. 304 */ 305 if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) || 306 sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) || 307 sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa)) 308 return 1; 309 310 /* 311 * ...and then to HVA. The order of accesses isn't architectural, i.e. 312 * KVM doesn't have to fully process one address at a time. Exit to 313 * userspace if a GPA is invalid. Note, all structures are aligned and 314 * cannot split pages. 315 */ 316 if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) || 317 sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) || 318 sgx_gpa_to_hva(vcpu, token_gpa, &token_hva)) 319 return 0; 320 321 ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva, 322 (void __user *)secs_hva, 323 vmx->msr_ia32_sgxlepubkeyhash, &trapnr); 324 325 if (ret == -EFAULT) 326 return sgx_inject_fault(vcpu, secs_gva, trapnr); 327 328 /* 329 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva, 330 * @token_hva or @secs_hva. This should never happen as KVM checks host 331 * addresses at memslot creation. sgx_virt_einit() has already warned 332 * in this case, so just return. 333 */ 334 if (ret < 0) 335 return ret; 336 337 rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | 338 X86_EFLAGS_AF | X86_EFLAGS_SF | 339 X86_EFLAGS_OF); 340 if (ret) 341 rflags |= X86_EFLAGS_ZF; 342 else 343 rflags &= ~X86_EFLAGS_ZF; 344 vmx_set_rflags(vcpu, rflags); 345 346 kvm_rax_write(vcpu, ret); 347 return kvm_skip_emulated_instruction(vcpu); 348 } 349 350 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf) 351 { 352 if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX)) 353 return false; 354 355 if (leaf >= ECREATE && leaf <= ETRACK) 356 return guest_cpuid_has(vcpu, X86_FEATURE_SGX1); 357 358 if (leaf >= EAUG && leaf <= EMODT) 359 return guest_cpuid_has(vcpu, X86_FEATURE_SGX2); 360 361 return false; 362 } 363 364 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu) 365 { 366 const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED; 367 368 return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits; 369 } 370 371 int handle_encls(struct kvm_vcpu *vcpu) 372 { 373 u32 leaf = (u32)kvm_rax_read(vcpu); 374 375 if (!encls_leaf_enabled_in_guest(vcpu, leaf)) { 376 kvm_queue_exception(vcpu, UD_VECTOR); 377 } else if (!sgx_enabled_in_guest_bios(vcpu)) { 378 kvm_inject_gp(vcpu, 0); 379 } else { 380 if (leaf == ECREATE) 381 return handle_encls_ecreate(vcpu); 382 if (leaf == EINIT) 383 return handle_encls_einit(vcpu); 384 WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf); 385 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 386 vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS; 387 return 0; 388 } 389 return 1; 390 } 391 392 void setup_default_sgx_lepubkeyhash(void) 393 { 394 /* 395 * Use Intel's default value for Skylake hardware if Launch Control is 396 * not supported, i.e. Intel's hash is hardcoded into silicon, or if 397 * Launch Control is supported and enabled, i.e. mimic the reset value 398 * and let the guest write the MSRs at will. If Launch Control is 399 * supported but disabled, then use the current MSR values as the hash 400 * MSRs exist but are read-only (locked and not writable). 401 */ 402 if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) || 403 rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) { 404 sgx_pubkey_hash[0] = 0xa6053e051270b7acULL; 405 sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL; 406 sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL; 407 sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL; 408 } else { 409 /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */ 410 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]); 411 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]); 412 rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]); 413 } 414 } 415 416 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu) 417 { 418 struct vcpu_vmx *vmx = to_vmx(vcpu); 419 420 memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash, 421 sizeof(sgx_pubkey_hash)); 422 } 423 424 /* 425 * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM 426 * restrictions if the guest's allowed-1 settings diverge from hardware. 427 */ 428 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu) 429 { 430 struct kvm_cpuid_entry2 *guest_cpuid; 431 u32 eax, ebx, ecx, edx; 432 433 if (!vcpu->kvm->arch.sgx_provisioning_allowed) 434 return true; 435 436 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0); 437 if (!guest_cpuid) 438 return true; 439 440 cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx); 441 if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx) 442 return true; 443 444 guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1); 445 if (!guest_cpuid) 446 return true; 447 448 cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx); 449 if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx || 450 guest_cpuid->ecx != ecx || guest_cpuid->edx != edx) 451 return true; 452 453 return false; 454 } 455 456 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) 457 { 458 /* 459 * There is no software enable bit for SGX that is virtualized by 460 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the 461 * guest (either by the host or by the guest's BIOS) but enabled in the 462 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate 463 * the expected system behavior for ENCLS. 464 */ 465 u64 bitmap = -1ull; 466 467 /* Nothing to do if hardware doesn't support SGX */ 468 if (!cpu_has_vmx_encls_vmexit()) 469 return; 470 471 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) && 472 sgx_enabled_in_guest_bios(vcpu)) { 473 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) { 474 bitmap &= ~GENMASK_ULL(ETRACK, ECREATE); 475 if (sgx_intercept_encls_ecreate(vcpu)) 476 bitmap |= (1 << ECREATE); 477 } 478 479 if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) 480 bitmap &= ~GENMASK_ULL(EMODT, EAUG); 481 482 /* 483 * Trap and execute EINIT if launch control is enabled in the 484 * host using the guest's values for launch control MSRs, even 485 * if the guest's values are fixed to hardware default values. 486 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing 487 * the MSRs is extraordinarily expensive. 488 */ 489 if (boot_cpu_has(X86_FEATURE_SGX_LC)) 490 bitmap |= (1 << EINIT); 491 492 if (!vmcs12 && is_guest_mode(vcpu)) 493 vmcs12 = get_vmcs12(vcpu); 494 if (vmcs12 && nested_cpu_has_encls_exit(vmcs12)) 495 bitmap |= vmcs12->encls_exiting_bitmap; 496 } 497 vmcs_write64(ENCLS_EXITING_BITMAP, bitmap); 498 } 499