xref: /linux/arch/x86/kvm/vmx/sgx.c (revision 343d59119e776af3060000f7af70553fc531230e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*  Copyright(c) 2021 Intel Corporation. */
3 
4 #include <asm/sgx.h>
5 
6 #include "cpuid.h"
7 #include "kvm_cache_regs.h"
8 #include "nested.h"
9 #include "sgx.h"
10 #include "vmx.h"
11 #include "x86.h"
12 
13 bool __read_mostly enable_sgx = 1;
14 module_param_named(sgx, enable_sgx, bool, 0444);
15 
16 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
17 static u64 sgx_pubkey_hash[4] __ro_after_init;
18 
19 /*
20  * ENCLS's memory operands use a fixed segment (DS) and a fixed
21  * address size based on the mode.  Related prefixes are ignored.
22  */
23 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
24 			     int size, int alignment, gva_t *gva)
25 {
26 	struct kvm_segment s;
27 	bool fault;
28 
29 	/* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
30 	*gva = offset;
31 	if (!is_long_mode(vcpu)) {
32 		vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
33 		*gva += s.base;
34 	}
35 
36 	if (!IS_ALIGNED(*gva, alignment)) {
37 		fault = true;
38 	} else if (likely(is_long_mode(vcpu))) {
39 		fault = is_noncanonical_address(*gva, vcpu);
40 	} else {
41 		*gva &= 0xffffffff;
42 		fault = (s.unusable) ||
43 			(s.type != 2 && s.type != 3) ||
44 			(*gva > s.limit) ||
45 			((s.base != 0 || s.limit != 0xffffffff) &&
46 			(((u64)*gva + size - 1) > s.limit + 1));
47 	}
48 	if (fault)
49 		kvm_inject_gp(vcpu, 0);
50 	return fault ? -EINVAL : 0;
51 }
52 
53 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
54 					 unsigned int size)
55 {
56 	uint64_t data[2] = { addr, size };
57 
58 	__kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
59 }
60 
61 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
62 			unsigned int size)
63 {
64 	if (__copy_from_user(data, (void __user *)hva, size)) {
65 		sgx_handle_emulation_failure(vcpu, hva, size);
66 		return -EFAULT;
67 	}
68 
69 	return 0;
70 }
71 
72 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
73 			  gpa_t *gpa)
74 {
75 	struct x86_exception ex;
76 
77 	if (write)
78 		*gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
79 	else
80 		*gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
81 
82 	if (*gpa == INVALID_GPA) {
83 		kvm_inject_emulated_page_fault(vcpu, &ex);
84 		return -EFAULT;
85 	}
86 
87 	return 0;
88 }
89 
90 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
91 {
92 	*hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
93 	if (kvm_is_error_hva(*hva)) {
94 		sgx_handle_emulation_failure(vcpu, gpa, 1);
95 		return -EFAULT;
96 	}
97 
98 	*hva |= gpa & ~PAGE_MASK;
99 
100 	return 0;
101 }
102 
103 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
104 {
105 	struct x86_exception ex;
106 
107 	/*
108 	 * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
109 	 * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
110 	 * but the error code isn't (yet) plumbed through the ENCLS helpers.
111 	 */
112 	if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
113 		kvm_prepare_emulation_failure_exit(vcpu);
114 		return 0;
115 	}
116 
117 	/*
118 	 * If the guest thinks it's running on SGX2 hardware, inject an SGX
119 	 * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
120 	 * #PF on SGX2).  The assumption is that EPCM faults are much more
121 	 * likely than a bad userspace address.
122 	 */
123 	if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
124 	    guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
125 		memset(&ex, 0, sizeof(ex));
126 		ex.vector = PF_VECTOR;
127 		ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
128 				PFERR_SGX_MASK;
129 		ex.address = gva;
130 		ex.error_code_valid = true;
131 		ex.nested_page_fault = false;
132 		kvm_inject_emulated_page_fault(vcpu, &ex);
133 	} else {
134 		kvm_inject_gp(vcpu, 0);
135 	}
136 	return 1;
137 }
138 
139 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
140 				  struct sgx_pageinfo *pageinfo,
141 				  unsigned long secs_hva,
142 				  gva_t secs_gva)
143 {
144 	struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
145 	struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
146 	u64 attributes, xfrm, size;
147 	u32 miscselect;
148 	u8 max_size_log2;
149 	int trapnr, ret;
150 
151 	sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
152 	sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
153 	if (!sgx_12_0 || !sgx_12_1) {
154 		kvm_prepare_emulation_failure_exit(vcpu);
155 		return 0;
156 	}
157 
158 	miscselect = contents->miscselect;
159 	attributes = contents->attributes;
160 	xfrm = contents->xfrm;
161 	size = contents->size;
162 
163 	/* Enforce restriction of access to the PROVISIONKEY. */
164 	if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
165 	    (attributes & SGX_ATTR_PROVISIONKEY)) {
166 		if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
167 			pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
168 		kvm_inject_gp(vcpu, 0);
169 		return 1;
170 	}
171 
172 	/* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
173 	if ((u32)miscselect & ~sgx_12_0->ebx ||
174 	    (u32)attributes & ~sgx_12_1->eax ||
175 	    (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
176 	    (u32)xfrm & ~sgx_12_1->ecx ||
177 	    (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
178 		kvm_inject_gp(vcpu, 0);
179 		return 1;
180 	}
181 
182 	/* Enforce CPUID restriction on max enclave size. */
183 	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
184 							    sgx_12_0->edx;
185 	if (size >= BIT_ULL(max_size_log2)) {
186 		kvm_inject_gp(vcpu, 0);
187 		return 1;
188 	}
189 
190 	/*
191 	 * sgx_virt_ecreate() returns:
192 	 *  1) 0:	ECREATE was successful
193 	 *  2) -EFAULT:	ECREATE was run but faulted, and trapnr was set to the
194 	 *		exception number.
195 	 *  3) -EINVAL:	access_ok() on @secs_hva failed. This should never
196 	 *		happen as KVM checks host addresses at memslot creation.
197 	 *		sgx_virt_ecreate() has already warned in this case.
198 	 */
199 	ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
200 	if (!ret)
201 		return kvm_skip_emulated_instruction(vcpu);
202 	if (ret == -EFAULT)
203 		return sgx_inject_fault(vcpu, secs_gva, trapnr);
204 
205 	return ret;
206 }
207 
208 static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
209 {
210 	gva_t pageinfo_gva, secs_gva;
211 	gva_t metadata_gva, contents_gva;
212 	gpa_t metadata_gpa, contents_gpa, secs_gpa;
213 	unsigned long metadata_hva, contents_hva, secs_hva;
214 	struct sgx_pageinfo pageinfo;
215 	struct sgx_secs *contents;
216 	struct x86_exception ex;
217 	int r;
218 
219 	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
220 	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
221 		return 1;
222 
223 	/*
224 	 * Copy the PAGEINFO to local memory, its pointers need to be
225 	 * translated, i.e. we need to do a deep copy/translate.
226 	 */
227 	r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
228 				sizeof(pageinfo), &ex);
229 	if (r == X86EMUL_PROPAGATE_FAULT) {
230 		kvm_inject_emulated_page_fault(vcpu, &ex);
231 		return 1;
232 	} else if (r != X86EMUL_CONTINUE) {
233 		sgx_handle_emulation_failure(vcpu, pageinfo_gva,
234 					     sizeof(pageinfo));
235 		return 0;
236 	}
237 
238 	if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
239 	    sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
240 			      &contents_gva))
241 		return 1;
242 
243 	/*
244 	 * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
245 	 * Resume the guest on failure to inject a #PF.
246 	 */
247 	if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
248 	    sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
249 	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
250 		return 1;
251 
252 	/*
253 	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
254 	 * KVM doesn't have to fully process one address at a time.  Exit to
255 	 * userspace if a GPA is invalid.
256 	 */
257 	if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
258 	    sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
259 	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
260 		return 0;
261 
262 	/*
263 	 * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
264 	 * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
265 	 * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
266 	 * enforce restriction of access to the PROVISIONKEY.
267 	 */
268 	contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
269 	if (!contents)
270 		return -ENOMEM;
271 
272 	/* Exit to userspace if copying from a host userspace address fails. */
273 	if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
274 		free_page((unsigned long)contents);
275 		return 0;
276 	}
277 
278 	pageinfo.metadata = metadata_hva;
279 	pageinfo.contents = (u64)contents;
280 
281 	r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
282 
283 	free_page((unsigned long)contents);
284 
285 	return r;
286 }
287 
288 static int handle_encls_einit(struct kvm_vcpu *vcpu)
289 {
290 	unsigned long sig_hva, secs_hva, token_hva, rflags;
291 	struct vcpu_vmx *vmx = to_vmx(vcpu);
292 	gva_t sig_gva, secs_gva, token_gva;
293 	gpa_t sig_gpa, secs_gpa, token_gpa;
294 	int ret, trapnr;
295 
296 	if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
297 	    sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
298 	    sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
299 		return 1;
300 
301 	/*
302 	 * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
303 	 * Resume the guest on failure to inject a #PF.
304 	 */
305 	if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
306 	    sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
307 	    sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
308 		return 1;
309 
310 	/*
311 	 * ...and then to HVA.  The order of accesses isn't architectural, i.e.
312 	 * KVM doesn't have to fully process one address at a time.  Exit to
313 	 * userspace if a GPA is invalid.  Note, all structures are aligned and
314 	 * cannot split pages.
315 	 */
316 	if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
317 	    sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
318 	    sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
319 		return 0;
320 
321 	ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
322 			     (void __user *)secs_hva,
323 			     vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
324 
325 	if (ret == -EFAULT)
326 		return sgx_inject_fault(vcpu, secs_gva, trapnr);
327 
328 	/*
329 	 * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
330 	 * @token_hva or @secs_hva. This should never happen as KVM checks host
331 	 * addresses at memslot creation. sgx_virt_einit() has already warned
332 	 * in this case, so just return.
333 	 */
334 	if (ret < 0)
335 		return ret;
336 
337 	rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
338 					  X86_EFLAGS_AF | X86_EFLAGS_SF |
339 					  X86_EFLAGS_OF);
340 	if (ret)
341 		rflags |= X86_EFLAGS_ZF;
342 	else
343 		rflags &= ~X86_EFLAGS_ZF;
344 	vmx_set_rflags(vcpu, rflags);
345 
346 	kvm_rax_write(vcpu, ret);
347 	return kvm_skip_emulated_instruction(vcpu);
348 }
349 
350 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
351 {
352 	if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
353 		return false;
354 
355 	if (leaf >= ECREATE && leaf <= ETRACK)
356 		return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
357 
358 	if (leaf >= EAUG && leaf <= EMODT)
359 		return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
360 
361 	return false;
362 }
363 
364 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
365 {
366 	const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
367 
368 	return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
369 }
370 
371 int handle_encls(struct kvm_vcpu *vcpu)
372 {
373 	u32 leaf = (u32)kvm_rax_read(vcpu);
374 
375 	if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
376 		kvm_queue_exception(vcpu, UD_VECTOR);
377 	} else if (!sgx_enabled_in_guest_bios(vcpu)) {
378 		kvm_inject_gp(vcpu, 0);
379 	} else {
380 		if (leaf == ECREATE)
381 			return handle_encls_ecreate(vcpu);
382 		if (leaf == EINIT)
383 			return handle_encls_einit(vcpu);
384 		WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
385 		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
386 		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
387 		return 0;
388 	}
389 	return 1;
390 }
391 
392 void setup_default_sgx_lepubkeyhash(void)
393 {
394 	/*
395 	 * Use Intel's default value for Skylake hardware if Launch Control is
396 	 * not supported, i.e. Intel's hash is hardcoded into silicon, or if
397 	 * Launch Control is supported and enabled, i.e. mimic the reset value
398 	 * and let the guest write the MSRs at will.  If Launch Control is
399 	 * supported but disabled, then use the current MSR values as the hash
400 	 * MSRs exist but are read-only (locked and not writable).
401 	 */
402 	if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
403 	    rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
404 		sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
405 		sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
406 		sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
407 		sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
408 	} else {
409 		/* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
410 		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
411 		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
412 		rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
413 	}
414 }
415 
416 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
417 {
418 	struct vcpu_vmx *vmx = to_vmx(vcpu);
419 
420 	memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
421 	       sizeof(sgx_pubkey_hash));
422 }
423 
424 /*
425  * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
426  * restrictions if the guest's allowed-1 settings diverge from hardware.
427  */
428 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
429 {
430 	struct kvm_cpuid_entry2 *guest_cpuid;
431 	u32 eax, ebx, ecx, edx;
432 
433 	if (!vcpu->kvm->arch.sgx_provisioning_allowed)
434 		return true;
435 
436 	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
437 	if (!guest_cpuid)
438 		return true;
439 
440 	cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
441 	if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
442 		return true;
443 
444 	guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
445 	if (!guest_cpuid)
446 		return true;
447 
448 	cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
449 	if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
450 	    guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
451 		return true;
452 
453 	return false;
454 }
455 
456 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
457 {
458 	/*
459 	 * There is no software enable bit for SGX that is virtualized by
460 	 * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
461 	 * guest (either by the host or by the guest's BIOS) but enabled in the
462 	 * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
463 	 * the expected system behavior for ENCLS.
464 	 */
465 	u64 bitmap = -1ull;
466 
467 	/* Nothing to do if hardware doesn't support SGX */
468 	if (!cpu_has_vmx_encls_vmexit())
469 		return;
470 
471 	if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
472 	    sgx_enabled_in_guest_bios(vcpu)) {
473 		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
474 			bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
475 			if (sgx_intercept_encls_ecreate(vcpu))
476 				bitmap |= (1 << ECREATE);
477 		}
478 
479 		if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
480 			bitmap &= ~GENMASK_ULL(EMODT, EAUG);
481 
482 		/*
483 		 * Trap and execute EINIT if launch control is enabled in the
484 		 * host using the guest's values for launch control MSRs, even
485 		 * if the guest's values are fixed to hardware default values.
486 		 * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
487 		 * the MSRs is extraordinarily expensive.
488 		 */
489 		if (boot_cpu_has(X86_FEATURE_SGX_LC))
490 			bitmap |= (1 << EINIT);
491 
492 		if (!vmcs12 && is_guest_mode(vcpu))
493 			vmcs12 = get_vmcs12(vcpu);
494 		if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
495 			bitmap |= vmcs12->encls_exiting_bitmap;
496 	}
497 	vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
498 }
499