xref: /linux/arch/x86/kvm/svm/sev.c (revision 9009b455811b0fa1f6b0adfa94db136984db5a38)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM-SEV support
6  *
7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8  */
9 
10 #include <linux/kvm_types.h>
11 #include <linux/kvm_host.h>
12 #include <linux/kernel.h>
13 #include <linux/highmem.h>
14 #include <linux/psp-sev.h>
15 #include <linux/pagemap.h>
16 #include <linux/swap.h>
17 #include <linux/misc_cgroup.h>
18 #include <linux/processor.h>
19 #include <linux/trace_events.h>
20 #include <asm/fpu/internal.h>
21 
22 #include <asm/trapnr.h>
23 
24 #include "x86.h"
25 #include "svm.h"
26 #include "svm_ops.h"
27 #include "cpuid.h"
28 #include "trace.h"
29 
30 #define __ex(x) __kvm_handle_fault_on_reboot(x)
31 
32 #ifndef CONFIG_KVM_AMD_SEV
33 /*
34  * When this config is not defined, SEV feature is not supported and APIs in
35  * this file are not used but this file still gets compiled into the KVM AMD
36  * module.
37  *
38  * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
39  * misc_res_type {} defined in linux/misc_cgroup.h.
40  *
41  * Below macros allow compilation to succeed.
42  */
43 #define MISC_CG_RES_SEV MISC_CG_RES_TYPES
44 #define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
45 #endif
46 
47 static u8 sev_enc_bit;
48 static int sev_flush_asids(void);
49 static DECLARE_RWSEM(sev_deactivate_lock);
50 static DEFINE_MUTEX(sev_bitmap_lock);
51 unsigned int max_sev_asid;
52 static unsigned int min_sev_asid;
53 static unsigned long *sev_asid_bitmap;
54 static unsigned long *sev_reclaim_asid_bitmap;
55 
56 struct enc_region {
57 	struct list_head list;
58 	unsigned long npages;
59 	struct page **pages;
60 	unsigned long uaddr;
61 	unsigned long size;
62 };
63 
64 static int sev_flush_asids(void)
65 {
66 	int ret, error = 0;
67 
68 	/*
69 	 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
70 	 * so it must be guarded.
71 	 */
72 	down_write(&sev_deactivate_lock);
73 
74 	wbinvd_on_all_cpus();
75 	ret = sev_guest_df_flush(&error);
76 
77 	up_write(&sev_deactivate_lock);
78 
79 	if (ret)
80 		pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);
81 
82 	return ret;
83 }
84 
85 /* Must be called with the sev_bitmap_lock held */
86 static bool __sev_recycle_asids(int min_asid, int max_asid)
87 {
88 	int pos;
89 
90 	/* Check if there are any ASIDs to reclaim before performing a flush */
91 	pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
92 	if (pos >= max_asid)
93 		return false;
94 
95 	if (sev_flush_asids())
96 		return false;
97 
98 	/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
99 	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
100 		   max_sev_asid);
101 	bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
102 
103 	return true;
104 }
105 
106 static int sev_asid_new(struct kvm_sev_info *sev)
107 {
108 	int pos, min_asid, max_asid, ret;
109 	bool retry = true;
110 	enum misc_res_type type;
111 
112 	type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
113 	WARN_ON(sev->misc_cg);
114 	sev->misc_cg = get_current_misc_cg();
115 	ret = misc_cg_try_charge(type, sev->misc_cg, 1);
116 	if (ret) {
117 		put_misc_cg(sev->misc_cg);
118 		sev->misc_cg = NULL;
119 		return ret;
120 	}
121 
122 	mutex_lock(&sev_bitmap_lock);
123 
124 	/*
125 	 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
126 	 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
127 	 */
128 	min_asid = sev->es_active ? 0 : min_sev_asid - 1;
129 	max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
130 again:
131 	pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
132 	if (pos >= max_asid) {
133 		if (retry && __sev_recycle_asids(min_asid, max_asid)) {
134 			retry = false;
135 			goto again;
136 		}
137 		mutex_unlock(&sev_bitmap_lock);
138 		ret = -EBUSY;
139 		goto e_uncharge;
140 	}
141 
142 	__set_bit(pos, sev_asid_bitmap);
143 
144 	mutex_unlock(&sev_bitmap_lock);
145 
146 	return pos + 1;
147 e_uncharge:
148 	misc_cg_uncharge(type, sev->misc_cg, 1);
149 	put_misc_cg(sev->misc_cg);
150 	sev->misc_cg = NULL;
151 	return ret;
152 }
153 
154 static int sev_get_asid(struct kvm *kvm)
155 {
156 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
157 
158 	return sev->asid;
159 }
160 
161 static void sev_asid_free(struct kvm_sev_info *sev)
162 {
163 	struct svm_cpu_data *sd;
164 	int cpu, pos;
165 	enum misc_res_type type;
166 
167 	mutex_lock(&sev_bitmap_lock);
168 
169 	pos = sev->asid - 1;
170 	__set_bit(pos, sev_reclaim_asid_bitmap);
171 
172 	for_each_possible_cpu(cpu) {
173 		sd = per_cpu(svm_data, cpu);
174 		sd->sev_vmcbs[pos] = NULL;
175 	}
176 
177 	mutex_unlock(&sev_bitmap_lock);
178 
179 	type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
180 	misc_cg_uncharge(type, sev->misc_cg, 1);
181 	put_misc_cg(sev->misc_cg);
182 	sev->misc_cg = NULL;
183 }
184 
185 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
186 {
187 	struct sev_data_decommission *decommission;
188 	struct sev_data_deactivate *data;
189 
190 	if (!handle)
191 		return;
192 
193 	data = kzalloc(sizeof(*data), GFP_KERNEL);
194 	if (!data)
195 		return;
196 
197 	/* deactivate handle */
198 	data->handle = handle;
199 
200 	/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
201 	down_read(&sev_deactivate_lock);
202 	sev_guest_deactivate(data, NULL);
203 	up_read(&sev_deactivate_lock);
204 
205 	kfree(data);
206 
207 	decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
208 	if (!decommission)
209 		return;
210 
211 	/* decommission handle */
212 	decommission->handle = handle;
213 	sev_guest_decommission(decommission, NULL);
214 
215 	kfree(decommission);
216 }
217 
218 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
219 {
220 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
221 	int asid, ret;
222 
223 	ret = -EBUSY;
224 	if (unlikely(sev->active))
225 		return ret;
226 
227 	asid = sev_asid_new(sev);
228 	if (asid < 0)
229 		return ret;
230 	sev->asid = asid;
231 
232 	ret = sev_platform_init(&argp->error);
233 	if (ret)
234 		goto e_free;
235 
236 	sev->active = true;
237 	INIT_LIST_HEAD(&sev->regions_list);
238 
239 	return 0;
240 
241 e_free:
242 	sev_asid_free(sev);
243 	sev->asid = 0;
244 	return ret;
245 }
246 
247 static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
248 {
249 	if (!sev_es)
250 		return -ENOTTY;
251 
252 	to_kvm_svm(kvm)->sev_info.es_active = true;
253 
254 	return sev_guest_init(kvm, argp);
255 }
256 
257 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
258 {
259 	struct sev_data_activate *data;
260 	int asid = sev_get_asid(kvm);
261 	int ret;
262 
263 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
264 	if (!data)
265 		return -ENOMEM;
266 
267 	/* activate ASID on the given handle */
268 	data->handle = handle;
269 	data->asid   = asid;
270 	ret = sev_guest_activate(data, error);
271 	kfree(data);
272 
273 	return ret;
274 }
275 
276 static int __sev_issue_cmd(int fd, int id, void *data, int *error)
277 {
278 	struct fd f;
279 	int ret;
280 
281 	f = fdget(fd);
282 	if (!f.file)
283 		return -EBADF;
284 
285 	ret = sev_issue_cmd_external_user(f.file, id, data, error);
286 
287 	fdput(f);
288 	return ret;
289 }
290 
291 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
292 {
293 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
294 
295 	return __sev_issue_cmd(sev->fd, id, data, error);
296 }
297 
298 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
299 {
300 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
301 	struct sev_data_launch_start *start;
302 	struct kvm_sev_launch_start params;
303 	void *dh_blob, *session_blob;
304 	int *error = &argp->error;
305 	int ret;
306 
307 	if (!sev_guest(kvm))
308 		return -ENOTTY;
309 
310 	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
311 		return -EFAULT;
312 
313 	start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
314 	if (!start)
315 		return -ENOMEM;
316 
317 	dh_blob = NULL;
318 	if (params.dh_uaddr) {
319 		dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
320 		if (IS_ERR(dh_blob)) {
321 			ret = PTR_ERR(dh_blob);
322 			goto e_free;
323 		}
324 
325 		start->dh_cert_address = __sme_set(__pa(dh_blob));
326 		start->dh_cert_len = params.dh_len;
327 	}
328 
329 	session_blob = NULL;
330 	if (params.session_uaddr) {
331 		session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
332 		if (IS_ERR(session_blob)) {
333 			ret = PTR_ERR(session_blob);
334 			goto e_free_dh;
335 		}
336 
337 		start->session_address = __sme_set(__pa(session_blob));
338 		start->session_len = params.session_len;
339 	}
340 
341 	start->handle = params.handle;
342 	start->policy = params.policy;
343 
344 	/* create memory encryption context */
345 	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
346 	if (ret)
347 		goto e_free_session;
348 
349 	/* Bind ASID to this guest */
350 	ret = sev_bind_asid(kvm, start->handle, error);
351 	if (ret)
352 		goto e_free_session;
353 
354 	/* return handle to userspace */
355 	params.handle = start->handle;
356 	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
357 		sev_unbind_asid(kvm, start->handle);
358 		ret = -EFAULT;
359 		goto e_free_session;
360 	}
361 
362 	sev->handle = start->handle;
363 	sev->fd = argp->sev_fd;
364 
365 e_free_session:
366 	kfree(session_blob);
367 e_free_dh:
368 	kfree(dh_blob);
369 e_free:
370 	kfree(start);
371 	return ret;
372 }
373 
374 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
375 				    unsigned long ulen, unsigned long *n,
376 				    int write)
377 {
378 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
379 	unsigned long npages, size;
380 	int npinned;
381 	unsigned long locked, lock_limit;
382 	struct page **pages;
383 	unsigned long first, last;
384 	int ret;
385 
386 	lockdep_assert_held(&kvm->lock);
387 
388 	if (ulen == 0 || uaddr + ulen < uaddr)
389 		return ERR_PTR(-EINVAL);
390 
391 	/* Calculate number of pages. */
392 	first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
393 	last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
394 	npages = (last - first + 1);
395 
396 	locked = sev->pages_locked + npages;
397 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
398 	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
399 		pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
400 		return ERR_PTR(-ENOMEM);
401 	}
402 
403 	if (WARN_ON_ONCE(npages > INT_MAX))
404 		return ERR_PTR(-EINVAL);
405 
406 	/* Avoid using vmalloc for smaller buffers. */
407 	size = npages * sizeof(struct page *);
408 	if (size > PAGE_SIZE)
409 		pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
410 	else
411 		pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
412 
413 	if (!pages)
414 		return ERR_PTR(-ENOMEM);
415 
416 	/* Pin the user virtual address. */
417 	npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
418 	if (npinned != npages) {
419 		pr_err("SEV: Failure locking %lu pages.\n", npages);
420 		ret = -ENOMEM;
421 		goto err;
422 	}
423 
424 	*n = npages;
425 	sev->pages_locked = locked;
426 
427 	return pages;
428 
429 err:
430 	if (npinned > 0)
431 		unpin_user_pages(pages, npinned);
432 
433 	kvfree(pages);
434 	return ERR_PTR(ret);
435 }
436 
437 static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
438 			     unsigned long npages)
439 {
440 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
441 
442 	unpin_user_pages(pages, npages);
443 	kvfree(pages);
444 	sev->pages_locked -= npages;
445 }
446 
447 static void sev_clflush_pages(struct page *pages[], unsigned long npages)
448 {
449 	uint8_t *page_virtual;
450 	unsigned long i;
451 
452 	if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
453 	    pages == NULL)
454 		return;
455 
456 	for (i = 0; i < npages; i++) {
457 		page_virtual = kmap_atomic(pages[i]);
458 		clflush_cache_range(page_virtual, PAGE_SIZE);
459 		kunmap_atomic(page_virtual);
460 	}
461 }
462 
463 static unsigned long get_num_contig_pages(unsigned long idx,
464 				struct page **inpages, unsigned long npages)
465 {
466 	unsigned long paddr, next_paddr;
467 	unsigned long i = idx + 1, pages = 1;
468 
469 	/* find the number of contiguous pages starting from idx */
470 	paddr = __sme_page_pa(inpages[idx]);
471 	while (i < npages) {
472 		next_paddr = __sme_page_pa(inpages[i++]);
473 		if ((paddr + PAGE_SIZE) == next_paddr) {
474 			pages++;
475 			paddr = next_paddr;
476 			continue;
477 		}
478 		break;
479 	}
480 
481 	return pages;
482 }
483 
484 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
485 {
486 	unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
487 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
488 	struct kvm_sev_launch_update_data params;
489 	struct sev_data_launch_update_data *data;
490 	struct page **inpages;
491 	int ret;
492 
493 	if (!sev_guest(kvm))
494 		return -ENOTTY;
495 
496 	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
497 		return -EFAULT;
498 
499 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
500 	if (!data)
501 		return -ENOMEM;
502 
503 	vaddr = params.uaddr;
504 	size = params.len;
505 	vaddr_end = vaddr + size;
506 
507 	/* Lock the user memory. */
508 	inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
509 	if (IS_ERR(inpages)) {
510 		ret = PTR_ERR(inpages);
511 		goto e_free;
512 	}
513 
514 	/*
515 	 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
516 	 * place; the cache may contain the data that was written unencrypted.
517 	 */
518 	sev_clflush_pages(inpages, npages);
519 
520 	for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
521 		int offset, len;
522 
523 		/*
524 		 * If the user buffer is not page-aligned, calculate the offset
525 		 * within the page.
526 		 */
527 		offset = vaddr & (PAGE_SIZE - 1);
528 
529 		/* Calculate the number of pages that can be encrypted in one go. */
530 		pages = get_num_contig_pages(i, inpages, npages);
531 
532 		len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
533 
534 		data->handle = sev->handle;
535 		data->len = len;
536 		data->address = __sme_page_pa(inpages[i]) + offset;
537 		ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
538 		if (ret)
539 			goto e_unpin;
540 
541 		size -= len;
542 		next_vaddr = vaddr + len;
543 	}
544 
545 e_unpin:
546 	/* content of memory is updated, mark pages dirty */
547 	for (i = 0; i < npages; i++) {
548 		set_page_dirty_lock(inpages[i]);
549 		mark_page_accessed(inpages[i]);
550 	}
551 	/* unlock the user pages */
552 	sev_unpin_memory(kvm, inpages, npages);
553 e_free:
554 	kfree(data);
555 	return ret;
556 }
557 
558 static int sev_es_sync_vmsa(struct vcpu_svm *svm)
559 {
560 	struct vmcb_save_area *save = &svm->vmcb->save;
561 
562 	/* Check some debug related fields before encrypting the VMSA */
563 	if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
564 		return -EINVAL;
565 
566 	/* Sync registgers */
567 	save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
568 	save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
569 	save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
570 	save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
571 	save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
572 	save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
573 	save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
574 	save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
575 #ifdef CONFIG_X86_64
576 	save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
577 	save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
578 	save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
579 	save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
580 	save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
581 	save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
582 	save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
583 	save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
584 #endif
585 	save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
586 
587 	/* Sync some non-GPR registers before encrypting */
588 	save->xcr0 = svm->vcpu.arch.xcr0;
589 	save->pkru = svm->vcpu.arch.pkru;
590 	save->xss  = svm->vcpu.arch.ia32_xss;
591 
592 	/*
593 	 * SEV-ES will use a VMSA that is pointed to by the VMCB, not
594 	 * the traditional VMSA that is part of the VMCB. Copy the
595 	 * traditional VMSA as it has been built so far (in prep
596 	 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
597 	 */
598 	memcpy(svm->vmsa, save, sizeof(*save));
599 
600 	return 0;
601 }
602 
603 static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
604 {
605 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
606 	struct sev_data_launch_update_vmsa *vmsa;
607 	int i, ret;
608 
609 	if (!sev_es_guest(kvm))
610 		return -ENOTTY;
611 
612 	vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
613 	if (!vmsa)
614 		return -ENOMEM;
615 
616 	for (i = 0; i < kvm->created_vcpus; i++) {
617 		struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
618 
619 		/* Perform some pre-encryption checks against the VMSA */
620 		ret = sev_es_sync_vmsa(svm);
621 		if (ret)
622 			goto e_free;
623 
624 		/*
625 		 * The LAUNCH_UPDATE_VMSA command will perform in-place
626 		 * encryption of the VMSA memory content (i.e it will write
627 		 * the same memory region with the guest's key), so invalidate
628 		 * it first.
629 		 */
630 		clflush_cache_range(svm->vmsa, PAGE_SIZE);
631 
632 		vmsa->handle = sev->handle;
633 		vmsa->address = __sme_pa(svm->vmsa);
634 		vmsa->len = PAGE_SIZE;
635 		ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
636 				    &argp->error);
637 		if (ret)
638 			goto e_free;
639 
640 		svm->vcpu.arch.guest_state_protected = true;
641 	}
642 
643 e_free:
644 	kfree(vmsa);
645 	return ret;
646 }
647 
648 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
649 {
650 	void __user *measure = (void __user *)(uintptr_t)argp->data;
651 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
652 	struct sev_data_launch_measure *data;
653 	struct kvm_sev_launch_measure params;
654 	void __user *p = NULL;
655 	void *blob = NULL;
656 	int ret;
657 
658 	if (!sev_guest(kvm))
659 		return -ENOTTY;
660 
661 	if (copy_from_user(&params, measure, sizeof(params)))
662 		return -EFAULT;
663 
664 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
665 	if (!data)
666 		return -ENOMEM;
667 
668 	/* User wants to query the blob length */
669 	if (!params.len)
670 		goto cmd;
671 
672 	p = (void __user *)(uintptr_t)params.uaddr;
673 	if (p) {
674 		if (params.len > SEV_FW_BLOB_MAX_SIZE) {
675 			ret = -EINVAL;
676 			goto e_free;
677 		}
678 
679 		ret = -ENOMEM;
680 		blob = kmalloc(params.len, GFP_KERNEL);
681 		if (!blob)
682 			goto e_free;
683 
684 		data->address = __psp_pa(blob);
685 		data->len = params.len;
686 	}
687 
688 cmd:
689 	data->handle = sev->handle;
690 	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
691 
692 	/*
693 	 * If we query the session length, FW responded with expected data.
694 	 */
695 	if (!params.len)
696 		goto done;
697 
698 	if (ret)
699 		goto e_free_blob;
700 
701 	if (blob) {
702 		if (copy_to_user(p, blob, params.len))
703 			ret = -EFAULT;
704 	}
705 
706 done:
707 	params.len = data->len;
708 	if (copy_to_user(measure, &params, sizeof(params)))
709 		ret = -EFAULT;
710 e_free_blob:
711 	kfree(blob);
712 e_free:
713 	kfree(data);
714 	return ret;
715 }
716 
717 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
718 {
719 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
720 	struct sev_data_launch_finish *data;
721 	int ret;
722 
723 	if (!sev_guest(kvm))
724 		return -ENOTTY;
725 
726 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
727 	if (!data)
728 		return -ENOMEM;
729 
730 	data->handle = sev->handle;
731 	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
732 
733 	kfree(data);
734 	return ret;
735 }
736 
737 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
738 {
739 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
740 	struct kvm_sev_guest_status params;
741 	struct sev_data_guest_status *data;
742 	int ret;
743 
744 	if (!sev_guest(kvm))
745 		return -ENOTTY;
746 
747 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
748 	if (!data)
749 		return -ENOMEM;
750 
751 	data->handle = sev->handle;
752 	ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
753 	if (ret)
754 		goto e_free;
755 
756 	params.policy = data->policy;
757 	params.state = data->state;
758 	params.handle = data->handle;
759 
760 	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
761 		ret = -EFAULT;
762 e_free:
763 	kfree(data);
764 	return ret;
765 }
766 
767 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
768 			       unsigned long dst, int size,
769 			       int *error, bool enc)
770 {
771 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
772 	struct sev_data_dbg *data;
773 	int ret;
774 
775 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
776 	if (!data)
777 		return -ENOMEM;
778 
779 	data->handle = sev->handle;
780 	data->dst_addr = dst;
781 	data->src_addr = src;
782 	data->len = size;
783 
784 	ret = sev_issue_cmd(kvm,
785 			    enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
786 			    data, error);
787 	kfree(data);
788 	return ret;
789 }
790 
791 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
792 			     unsigned long dst_paddr, int sz, int *err)
793 {
794 	int offset;
795 
796 	/*
797 	 * Its safe to read more than we are asked, caller should ensure that
798 	 * destination has enough space.
799 	 */
800 	offset = src_paddr & 15;
801 	src_paddr = round_down(src_paddr, 16);
802 	sz = round_up(sz + offset, 16);
803 
804 	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
805 }
806 
807 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
808 				  unsigned long __user dst_uaddr,
809 				  unsigned long dst_paddr,
810 				  int size, int *err)
811 {
812 	struct page *tpage = NULL;
813 	int ret, offset;
814 
815 	/* if inputs are not 16-byte then use intermediate buffer */
816 	if (!IS_ALIGNED(dst_paddr, 16) ||
817 	    !IS_ALIGNED(paddr,     16) ||
818 	    !IS_ALIGNED(size,      16)) {
819 		tpage = (void *)alloc_page(GFP_KERNEL);
820 		if (!tpage)
821 			return -ENOMEM;
822 
823 		dst_paddr = __sme_page_pa(tpage);
824 	}
825 
826 	ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
827 	if (ret)
828 		goto e_free;
829 
830 	if (tpage) {
831 		offset = paddr & 15;
832 		if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
833 				 page_address(tpage) + offset, size))
834 			ret = -EFAULT;
835 	}
836 
837 e_free:
838 	if (tpage)
839 		__free_page(tpage);
840 
841 	return ret;
842 }
843 
844 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
845 				  unsigned long __user vaddr,
846 				  unsigned long dst_paddr,
847 				  unsigned long __user dst_vaddr,
848 				  int size, int *error)
849 {
850 	struct page *src_tpage = NULL;
851 	struct page *dst_tpage = NULL;
852 	int ret, len = size;
853 
854 	/* If source buffer is not aligned then use an intermediate buffer */
855 	if (!IS_ALIGNED(vaddr, 16)) {
856 		src_tpage = alloc_page(GFP_KERNEL);
857 		if (!src_tpage)
858 			return -ENOMEM;
859 
860 		if (copy_from_user(page_address(src_tpage),
861 				(void __user *)(uintptr_t)vaddr, size)) {
862 			__free_page(src_tpage);
863 			return -EFAULT;
864 		}
865 
866 		paddr = __sme_page_pa(src_tpage);
867 	}
868 
869 	/*
870 	 *  If destination buffer or length is not aligned then do read-modify-write:
871 	 *   - decrypt destination in an intermediate buffer
872 	 *   - copy the source buffer in an intermediate buffer
873 	 *   - use the intermediate buffer as source buffer
874 	 */
875 	if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
876 		int dst_offset;
877 
878 		dst_tpage = alloc_page(GFP_KERNEL);
879 		if (!dst_tpage) {
880 			ret = -ENOMEM;
881 			goto e_free;
882 		}
883 
884 		ret = __sev_dbg_decrypt(kvm, dst_paddr,
885 					__sme_page_pa(dst_tpage), size, error);
886 		if (ret)
887 			goto e_free;
888 
889 		/*
890 		 *  If source is kernel buffer then use memcpy() otherwise
891 		 *  copy_from_user().
892 		 */
893 		dst_offset = dst_paddr & 15;
894 
895 		if (src_tpage)
896 			memcpy(page_address(dst_tpage) + dst_offset,
897 			       page_address(src_tpage), size);
898 		else {
899 			if (copy_from_user(page_address(dst_tpage) + dst_offset,
900 					   (void __user *)(uintptr_t)vaddr, size)) {
901 				ret = -EFAULT;
902 				goto e_free;
903 			}
904 		}
905 
906 		paddr = __sme_page_pa(dst_tpage);
907 		dst_paddr = round_down(dst_paddr, 16);
908 		len = round_up(size, 16);
909 	}
910 
911 	ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
912 
913 e_free:
914 	if (src_tpage)
915 		__free_page(src_tpage);
916 	if (dst_tpage)
917 		__free_page(dst_tpage);
918 	return ret;
919 }
920 
921 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
922 {
923 	unsigned long vaddr, vaddr_end, next_vaddr;
924 	unsigned long dst_vaddr;
925 	struct page **src_p, **dst_p;
926 	struct kvm_sev_dbg debug;
927 	unsigned long n;
928 	unsigned int size;
929 	int ret;
930 
931 	if (!sev_guest(kvm))
932 		return -ENOTTY;
933 
934 	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
935 		return -EFAULT;
936 
937 	if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
938 		return -EINVAL;
939 	if (!debug.dst_uaddr)
940 		return -EINVAL;
941 
942 	vaddr = debug.src_uaddr;
943 	size = debug.len;
944 	vaddr_end = vaddr + size;
945 	dst_vaddr = debug.dst_uaddr;
946 
947 	for (; vaddr < vaddr_end; vaddr = next_vaddr) {
948 		int len, s_off, d_off;
949 
950 		/* lock userspace source and destination page */
951 		src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
952 		if (IS_ERR(src_p))
953 			return PTR_ERR(src_p);
954 
955 		dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
956 		if (IS_ERR(dst_p)) {
957 			sev_unpin_memory(kvm, src_p, n);
958 			return PTR_ERR(dst_p);
959 		}
960 
961 		/*
962 		 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
963 		 * the pages; flush the destination too so that future accesses do not
964 		 * see stale data.
965 		 */
966 		sev_clflush_pages(src_p, 1);
967 		sev_clflush_pages(dst_p, 1);
968 
969 		/*
970 		 * Since user buffer may not be page aligned, calculate the
971 		 * offset within the page.
972 		 */
973 		s_off = vaddr & ~PAGE_MASK;
974 		d_off = dst_vaddr & ~PAGE_MASK;
975 		len = min_t(size_t, (PAGE_SIZE - s_off), size);
976 
977 		if (dec)
978 			ret = __sev_dbg_decrypt_user(kvm,
979 						     __sme_page_pa(src_p[0]) + s_off,
980 						     dst_vaddr,
981 						     __sme_page_pa(dst_p[0]) + d_off,
982 						     len, &argp->error);
983 		else
984 			ret = __sev_dbg_encrypt_user(kvm,
985 						     __sme_page_pa(src_p[0]) + s_off,
986 						     vaddr,
987 						     __sme_page_pa(dst_p[0]) + d_off,
988 						     dst_vaddr,
989 						     len, &argp->error);
990 
991 		sev_unpin_memory(kvm, src_p, n);
992 		sev_unpin_memory(kvm, dst_p, n);
993 
994 		if (ret)
995 			goto err;
996 
997 		next_vaddr = vaddr + len;
998 		dst_vaddr = dst_vaddr + len;
999 		size -= len;
1000 	}
1001 err:
1002 	return ret;
1003 }
1004 
1005 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
1006 {
1007 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1008 	struct sev_data_launch_secret *data;
1009 	struct kvm_sev_launch_secret params;
1010 	struct page **pages;
1011 	void *blob, *hdr;
1012 	unsigned long n, i;
1013 	int ret, offset;
1014 
1015 	if (!sev_guest(kvm))
1016 		return -ENOTTY;
1017 
1018 	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
1019 		return -EFAULT;
1020 
1021 	pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
1022 	if (IS_ERR(pages))
1023 		return PTR_ERR(pages);
1024 
1025 	/*
1026 	 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
1027 	 * place; the cache may contain the data that was written unencrypted.
1028 	 */
1029 	sev_clflush_pages(pages, n);
1030 
1031 	/*
1032 	 * The secret must be copied into contiguous memory region, lets verify
1033 	 * that userspace memory pages are contiguous before we issue command.
1034 	 */
1035 	if (get_num_contig_pages(0, pages, n) != n) {
1036 		ret = -EINVAL;
1037 		goto e_unpin_memory;
1038 	}
1039 
1040 	ret = -ENOMEM;
1041 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
1042 	if (!data)
1043 		goto e_unpin_memory;
1044 
1045 	offset = params.guest_uaddr & (PAGE_SIZE - 1);
1046 	data->guest_address = __sme_page_pa(pages[0]) + offset;
1047 	data->guest_len = params.guest_len;
1048 
1049 	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
1050 	if (IS_ERR(blob)) {
1051 		ret = PTR_ERR(blob);
1052 		goto e_free;
1053 	}
1054 
1055 	data->trans_address = __psp_pa(blob);
1056 	data->trans_len = params.trans_len;
1057 
1058 	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
1059 	if (IS_ERR(hdr)) {
1060 		ret = PTR_ERR(hdr);
1061 		goto e_free_blob;
1062 	}
1063 	data->hdr_address = __psp_pa(hdr);
1064 	data->hdr_len = params.hdr_len;
1065 
1066 	data->handle = sev->handle;
1067 	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
1068 
1069 	kfree(hdr);
1070 
1071 e_free_blob:
1072 	kfree(blob);
1073 e_free:
1074 	kfree(data);
1075 e_unpin_memory:
1076 	/* content of memory is updated, mark pages dirty */
1077 	for (i = 0; i < n; i++) {
1078 		set_page_dirty_lock(pages[i]);
1079 		mark_page_accessed(pages[i]);
1080 	}
1081 	sev_unpin_memory(kvm, pages, n);
1082 	return ret;
1083 }
1084 
1085 static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
1086 {
1087 	void __user *report = (void __user *)(uintptr_t)argp->data;
1088 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1089 	struct sev_data_attestation_report *data;
1090 	struct kvm_sev_attestation_report params;
1091 	void __user *p;
1092 	void *blob = NULL;
1093 	int ret;
1094 
1095 	if (!sev_guest(kvm))
1096 		return -ENOTTY;
1097 
1098 	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
1099 		return -EFAULT;
1100 
1101 	data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
1102 	if (!data)
1103 		return -ENOMEM;
1104 
1105 	/* User wants to query the blob length */
1106 	if (!params.len)
1107 		goto cmd;
1108 
1109 	p = (void __user *)(uintptr_t)params.uaddr;
1110 	if (p) {
1111 		if (params.len > SEV_FW_BLOB_MAX_SIZE) {
1112 			ret = -EINVAL;
1113 			goto e_free;
1114 		}
1115 
1116 		ret = -ENOMEM;
1117 		blob = kmalloc(params.len, GFP_KERNEL);
1118 		if (!blob)
1119 			goto e_free;
1120 
1121 		data->address = __psp_pa(blob);
1122 		data->len = params.len;
1123 		memcpy(data->mnonce, params.mnonce, sizeof(params.mnonce));
1124 	}
1125 cmd:
1126 	data->handle = sev->handle;
1127 	ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, data, &argp->error);
1128 	/*
1129 	 * If we query the session length, FW responded with expected data.
1130 	 */
1131 	if (!params.len)
1132 		goto done;
1133 
1134 	if (ret)
1135 		goto e_free_blob;
1136 
1137 	if (blob) {
1138 		if (copy_to_user(p, blob, params.len))
1139 			ret = -EFAULT;
1140 	}
1141 
1142 done:
1143 	params.len = data->len;
1144 	if (copy_to_user(report, &params, sizeof(params)))
1145 		ret = -EFAULT;
1146 e_free_blob:
1147 	kfree(blob);
1148 e_free:
1149 	kfree(data);
1150 	return ret;
1151 }
1152 
1153 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
1154 {
1155 	struct kvm_sev_cmd sev_cmd;
1156 	int r;
1157 
1158 	if (!svm_sev_enabled() || !sev)
1159 		return -ENOTTY;
1160 
1161 	if (!argp)
1162 		return 0;
1163 
1164 	if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
1165 		return -EFAULT;
1166 
1167 	mutex_lock(&kvm->lock);
1168 
1169 	switch (sev_cmd.id) {
1170 	case KVM_SEV_INIT:
1171 		r = sev_guest_init(kvm, &sev_cmd);
1172 		break;
1173 	case KVM_SEV_ES_INIT:
1174 		r = sev_es_guest_init(kvm, &sev_cmd);
1175 		break;
1176 	case KVM_SEV_LAUNCH_START:
1177 		r = sev_launch_start(kvm, &sev_cmd);
1178 		break;
1179 	case KVM_SEV_LAUNCH_UPDATE_DATA:
1180 		r = sev_launch_update_data(kvm, &sev_cmd);
1181 		break;
1182 	case KVM_SEV_LAUNCH_UPDATE_VMSA:
1183 		r = sev_launch_update_vmsa(kvm, &sev_cmd);
1184 		break;
1185 	case KVM_SEV_LAUNCH_MEASURE:
1186 		r = sev_launch_measure(kvm, &sev_cmd);
1187 		break;
1188 	case KVM_SEV_LAUNCH_FINISH:
1189 		r = sev_launch_finish(kvm, &sev_cmd);
1190 		break;
1191 	case KVM_SEV_GUEST_STATUS:
1192 		r = sev_guest_status(kvm, &sev_cmd);
1193 		break;
1194 	case KVM_SEV_DBG_DECRYPT:
1195 		r = sev_dbg_crypt(kvm, &sev_cmd, true);
1196 		break;
1197 	case KVM_SEV_DBG_ENCRYPT:
1198 		r = sev_dbg_crypt(kvm, &sev_cmd, false);
1199 		break;
1200 	case KVM_SEV_LAUNCH_SECRET:
1201 		r = sev_launch_secret(kvm, &sev_cmd);
1202 		break;
1203 	case KVM_SEV_GET_ATTESTATION_REPORT:
1204 		r = sev_get_attestation_report(kvm, &sev_cmd);
1205 		break;
1206 	default:
1207 		r = -EINVAL;
1208 		goto out;
1209 	}
1210 
1211 	if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
1212 		r = -EFAULT;
1213 
1214 out:
1215 	mutex_unlock(&kvm->lock);
1216 	return r;
1217 }
1218 
1219 int svm_register_enc_region(struct kvm *kvm,
1220 			    struct kvm_enc_region *range)
1221 {
1222 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1223 	struct enc_region *region;
1224 	int ret = 0;
1225 
1226 	if (!sev_guest(kvm))
1227 		return -ENOTTY;
1228 
1229 	if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
1230 		return -EINVAL;
1231 
1232 	region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
1233 	if (!region)
1234 		return -ENOMEM;
1235 
1236 	mutex_lock(&kvm->lock);
1237 	region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
1238 	if (IS_ERR(region->pages)) {
1239 		ret = PTR_ERR(region->pages);
1240 		mutex_unlock(&kvm->lock);
1241 		goto e_free;
1242 	}
1243 
1244 	region->uaddr = range->addr;
1245 	region->size = range->size;
1246 
1247 	list_add_tail(&region->list, &sev->regions_list);
1248 	mutex_unlock(&kvm->lock);
1249 
1250 	/*
1251 	 * The guest may change the memory encryption attribute from C=0 -> C=1
1252 	 * or vice versa for this memory range. Lets make sure caches are
1253 	 * flushed to ensure that guest data gets written into memory with
1254 	 * correct C-bit.
1255 	 */
1256 	sev_clflush_pages(region->pages, region->npages);
1257 
1258 	return ret;
1259 
1260 e_free:
1261 	kfree(region);
1262 	return ret;
1263 }
1264 
1265 static struct enc_region *
1266 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
1267 {
1268 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1269 	struct list_head *head = &sev->regions_list;
1270 	struct enc_region *i;
1271 
1272 	list_for_each_entry(i, head, list) {
1273 		if (i->uaddr == range->addr &&
1274 		    i->size == range->size)
1275 			return i;
1276 	}
1277 
1278 	return NULL;
1279 }
1280 
1281 static void __unregister_enc_region_locked(struct kvm *kvm,
1282 					   struct enc_region *region)
1283 {
1284 	sev_unpin_memory(kvm, region->pages, region->npages);
1285 	list_del(&region->list);
1286 	kfree(region);
1287 }
1288 
1289 int svm_unregister_enc_region(struct kvm *kvm,
1290 			      struct kvm_enc_region *range)
1291 {
1292 	struct enc_region *region;
1293 	int ret;
1294 
1295 	mutex_lock(&kvm->lock);
1296 
1297 	if (!sev_guest(kvm)) {
1298 		ret = -ENOTTY;
1299 		goto failed;
1300 	}
1301 
1302 	region = find_enc_region(kvm, range);
1303 	if (!region) {
1304 		ret = -EINVAL;
1305 		goto failed;
1306 	}
1307 
1308 	/*
1309 	 * Ensure that all guest tagged cache entries are flushed before
1310 	 * releasing the pages back to the system for use. CLFLUSH will
1311 	 * not do this, so issue a WBINVD.
1312 	 */
1313 	wbinvd_on_all_cpus();
1314 
1315 	__unregister_enc_region_locked(kvm, region);
1316 
1317 	mutex_unlock(&kvm->lock);
1318 	return 0;
1319 
1320 failed:
1321 	mutex_unlock(&kvm->lock);
1322 	return ret;
1323 }
1324 
1325 void sev_vm_destroy(struct kvm *kvm)
1326 {
1327 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1328 	struct list_head *head = &sev->regions_list;
1329 	struct list_head *pos, *q;
1330 
1331 	if (!sev_guest(kvm))
1332 		return;
1333 
1334 	mutex_lock(&kvm->lock);
1335 
1336 	/*
1337 	 * Ensure that all guest tagged cache entries are flushed before
1338 	 * releasing the pages back to the system for use. CLFLUSH will
1339 	 * not do this, so issue a WBINVD.
1340 	 */
1341 	wbinvd_on_all_cpus();
1342 
1343 	/*
1344 	 * if userspace was terminated before unregistering the memory regions
1345 	 * then lets unpin all the registered memory.
1346 	 */
1347 	if (!list_empty(head)) {
1348 		list_for_each_safe(pos, q, head) {
1349 			__unregister_enc_region_locked(kvm,
1350 				list_entry(pos, struct enc_region, list));
1351 			cond_resched();
1352 		}
1353 	}
1354 
1355 	mutex_unlock(&kvm->lock);
1356 
1357 	sev_unbind_asid(kvm, sev->handle);
1358 	sev_asid_free(sev);
1359 }
1360 
1361 void __init sev_hardware_setup(void)
1362 {
1363 	unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
1364 	bool sev_es_supported = false;
1365 	bool sev_supported = false;
1366 
1367 	/* Does the CPU support SEV? */
1368 	if (!boot_cpu_has(X86_FEATURE_SEV))
1369 		goto out;
1370 
1371 	/* Retrieve SEV CPUID information */
1372 	cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
1373 
1374 	/* Set encryption bit location for SEV-ES guests */
1375 	sev_enc_bit = ebx & 0x3f;
1376 
1377 	/* Maximum number of encrypted guests supported simultaneously */
1378 	max_sev_asid = ecx;
1379 
1380 	if (!svm_sev_enabled())
1381 		goto out;
1382 
1383 	/* Minimum ASID value that should be used for SEV guest */
1384 	min_sev_asid = edx;
1385 
1386 	/* Initialize SEV ASID bitmaps */
1387 	sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1388 	if (!sev_asid_bitmap)
1389 		goto out;
1390 
1391 	sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1392 	if (!sev_reclaim_asid_bitmap)
1393 		goto out;
1394 
1395 	sev_asid_count = max_sev_asid - min_sev_asid + 1;
1396 	if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
1397 		goto out;
1398 
1399 	pr_info("SEV supported: %u ASIDs\n", sev_asid_count);
1400 	sev_supported = true;
1401 
1402 	/* SEV-ES support requested? */
1403 	if (!sev_es)
1404 		goto out;
1405 
1406 	/* Does the CPU support SEV-ES? */
1407 	if (!boot_cpu_has(X86_FEATURE_SEV_ES))
1408 		goto out;
1409 
1410 	/* Has the system been allocated ASIDs for SEV-ES? */
1411 	if (min_sev_asid == 1)
1412 		goto out;
1413 
1414 	sev_es_asid_count = min_sev_asid - 1;
1415 	if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count))
1416 		goto out;
1417 
1418 	pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count);
1419 	sev_es_supported = true;
1420 
1421 out:
1422 	sev = sev_supported;
1423 	sev_es = sev_es_supported;
1424 }
1425 
1426 void sev_hardware_teardown(void)
1427 {
1428 	if (!svm_sev_enabled())
1429 		return;
1430 
1431 	bitmap_free(sev_asid_bitmap);
1432 	bitmap_free(sev_reclaim_asid_bitmap);
1433 	misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
1434 	misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
1435 
1436 	sev_flush_asids();
1437 }
1438 
1439 /*
1440  * Pages used by hardware to hold guest encrypted state must be flushed before
1441  * returning them to the system.
1442  */
1443 static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
1444 				   unsigned long len)
1445 {
1446 	/*
1447 	 * If hardware enforced cache coherency for encrypted mappings of the
1448 	 * same physical page is supported, nothing to do.
1449 	 */
1450 	if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
1451 		return;
1452 
1453 	/*
1454 	 * If the VM Page Flush MSR is supported, use it to flush the page
1455 	 * (using the page virtual address and the guest ASID).
1456 	 */
1457 	if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
1458 		struct kvm_sev_info *sev;
1459 		unsigned long va_start;
1460 		u64 start, stop;
1461 
1462 		/* Align start and stop to page boundaries. */
1463 		va_start = (unsigned long)va;
1464 		start = (u64)va_start & PAGE_MASK;
1465 		stop = PAGE_ALIGN((u64)va_start + len);
1466 
1467 		if (start < stop) {
1468 			sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
1469 
1470 			while (start < stop) {
1471 				wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
1472 				       start | sev->asid);
1473 
1474 				start += PAGE_SIZE;
1475 			}
1476 
1477 			return;
1478 		}
1479 
1480 		WARN(1, "Address overflow, using WBINVD\n");
1481 	}
1482 
1483 	/*
1484 	 * Hardware should always have one of the above features,
1485 	 * but if not, use WBINVD and issue a warning.
1486 	 */
1487 	WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
1488 	wbinvd_on_all_cpus();
1489 }
1490 
1491 void sev_free_vcpu(struct kvm_vcpu *vcpu)
1492 {
1493 	struct vcpu_svm *svm;
1494 
1495 	if (!sev_es_guest(vcpu->kvm))
1496 		return;
1497 
1498 	svm = to_svm(vcpu);
1499 
1500 	if (vcpu->arch.guest_state_protected)
1501 		sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
1502 	__free_page(virt_to_page(svm->vmsa));
1503 
1504 	if (svm->ghcb_sa_free)
1505 		kfree(svm->ghcb_sa);
1506 }
1507 
1508 static void dump_ghcb(struct vcpu_svm *svm)
1509 {
1510 	struct ghcb *ghcb = svm->ghcb;
1511 	unsigned int nbits;
1512 
1513 	/* Re-use the dump_invalid_vmcb module parameter */
1514 	if (!dump_invalid_vmcb) {
1515 		pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
1516 		return;
1517 	}
1518 
1519 	nbits = sizeof(ghcb->save.valid_bitmap) * 8;
1520 
1521 	pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
1522 	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
1523 	       ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
1524 	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
1525 	       ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
1526 	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
1527 	       ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
1528 	pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
1529 	       ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
1530 	pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
1531 }
1532 
1533 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
1534 {
1535 	struct kvm_vcpu *vcpu = &svm->vcpu;
1536 	struct ghcb *ghcb = svm->ghcb;
1537 
1538 	/*
1539 	 * The GHCB protocol so far allows for the following data
1540 	 * to be returned:
1541 	 *   GPRs RAX, RBX, RCX, RDX
1542 	 *
1543 	 * Copy their values, even if they may not have been written during the
1544 	 * VM-Exit.  It's the guest's responsibility to not consume random data.
1545 	 */
1546 	ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
1547 	ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
1548 	ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
1549 	ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
1550 }
1551 
1552 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
1553 {
1554 	struct vmcb_control_area *control = &svm->vmcb->control;
1555 	struct kvm_vcpu *vcpu = &svm->vcpu;
1556 	struct ghcb *ghcb = svm->ghcb;
1557 	u64 exit_code;
1558 
1559 	/*
1560 	 * The GHCB protocol so far allows for the following data
1561 	 * to be supplied:
1562 	 *   GPRs RAX, RBX, RCX, RDX
1563 	 *   XCR0
1564 	 *   CPL
1565 	 *
1566 	 * VMMCALL allows the guest to provide extra registers. KVM also
1567 	 * expects RSI for hypercalls, so include that, too.
1568 	 *
1569 	 * Copy their values to the appropriate location if supplied.
1570 	 */
1571 	memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
1572 
1573 	vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
1574 	vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
1575 	vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
1576 	vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
1577 	vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
1578 
1579 	svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
1580 
1581 	if (ghcb_xcr0_is_valid(ghcb)) {
1582 		vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
1583 		kvm_update_cpuid_runtime(vcpu);
1584 	}
1585 
1586 	/* Copy the GHCB exit information into the VMCB fields */
1587 	exit_code = ghcb_get_sw_exit_code(ghcb);
1588 	control->exit_code = lower_32_bits(exit_code);
1589 	control->exit_code_hi = upper_32_bits(exit_code);
1590 	control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
1591 	control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
1592 
1593 	/* Clear the valid entries fields */
1594 	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
1595 }
1596 
1597 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
1598 {
1599 	struct kvm_vcpu *vcpu;
1600 	struct ghcb *ghcb;
1601 	u64 exit_code = 0;
1602 
1603 	ghcb = svm->ghcb;
1604 
1605 	/* Only GHCB Usage code 0 is supported */
1606 	if (ghcb->ghcb_usage)
1607 		goto vmgexit_err;
1608 
1609 	/*
1610 	 * Retrieve the exit code now even though is may not be marked valid
1611 	 * as it could help with debugging.
1612 	 */
1613 	exit_code = ghcb_get_sw_exit_code(ghcb);
1614 
1615 	if (!ghcb_sw_exit_code_is_valid(ghcb) ||
1616 	    !ghcb_sw_exit_info_1_is_valid(ghcb) ||
1617 	    !ghcb_sw_exit_info_2_is_valid(ghcb))
1618 		goto vmgexit_err;
1619 
1620 	switch (ghcb_get_sw_exit_code(ghcb)) {
1621 	case SVM_EXIT_READ_DR7:
1622 		break;
1623 	case SVM_EXIT_WRITE_DR7:
1624 		if (!ghcb_rax_is_valid(ghcb))
1625 			goto vmgexit_err;
1626 		break;
1627 	case SVM_EXIT_RDTSC:
1628 		break;
1629 	case SVM_EXIT_RDPMC:
1630 		if (!ghcb_rcx_is_valid(ghcb))
1631 			goto vmgexit_err;
1632 		break;
1633 	case SVM_EXIT_CPUID:
1634 		if (!ghcb_rax_is_valid(ghcb) ||
1635 		    !ghcb_rcx_is_valid(ghcb))
1636 			goto vmgexit_err;
1637 		if (ghcb_get_rax(ghcb) == 0xd)
1638 			if (!ghcb_xcr0_is_valid(ghcb))
1639 				goto vmgexit_err;
1640 		break;
1641 	case SVM_EXIT_INVD:
1642 		break;
1643 	case SVM_EXIT_IOIO:
1644 		if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
1645 			if (!ghcb_sw_scratch_is_valid(ghcb))
1646 				goto vmgexit_err;
1647 		} else {
1648 			if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
1649 				if (!ghcb_rax_is_valid(ghcb))
1650 					goto vmgexit_err;
1651 		}
1652 		break;
1653 	case SVM_EXIT_MSR:
1654 		if (!ghcb_rcx_is_valid(ghcb))
1655 			goto vmgexit_err;
1656 		if (ghcb_get_sw_exit_info_1(ghcb)) {
1657 			if (!ghcb_rax_is_valid(ghcb) ||
1658 			    !ghcb_rdx_is_valid(ghcb))
1659 				goto vmgexit_err;
1660 		}
1661 		break;
1662 	case SVM_EXIT_VMMCALL:
1663 		if (!ghcb_rax_is_valid(ghcb) ||
1664 		    !ghcb_cpl_is_valid(ghcb))
1665 			goto vmgexit_err;
1666 		break;
1667 	case SVM_EXIT_RDTSCP:
1668 		break;
1669 	case SVM_EXIT_WBINVD:
1670 		break;
1671 	case SVM_EXIT_MONITOR:
1672 		if (!ghcb_rax_is_valid(ghcb) ||
1673 		    !ghcb_rcx_is_valid(ghcb) ||
1674 		    !ghcb_rdx_is_valid(ghcb))
1675 			goto vmgexit_err;
1676 		break;
1677 	case SVM_EXIT_MWAIT:
1678 		if (!ghcb_rax_is_valid(ghcb) ||
1679 		    !ghcb_rcx_is_valid(ghcb))
1680 			goto vmgexit_err;
1681 		break;
1682 	case SVM_VMGEXIT_MMIO_READ:
1683 	case SVM_VMGEXIT_MMIO_WRITE:
1684 		if (!ghcb_sw_scratch_is_valid(ghcb))
1685 			goto vmgexit_err;
1686 		break;
1687 	case SVM_VMGEXIT_NMI_COMPLETE:
1688 	case SVM_VMGEXIT_AP_HLT_LOOP:
1689 	case SVM_VMGEXIT_AP_JUMP_TABLE:
1690 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
1691 		break;
1692 	default:
1693 		goto vmgexit_err;
1694 	}
1695 
1696 	return 0;
1697 
1698 vmgexit_err:
1699 	vcpu = &svm->vcpu;
1700 
1701 	if (ghcb->ghcb_usage) {
1702 		vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
1703 			    ghcb->ghcb_usage);
1704 	} else {
1705 		vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
1706 			    exit_code);
1707 		dump_ghcb(svm);
1708 	}
1709 
1710 	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1711 	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
1712 	vcpu->run->internal.ndata = 2;
1713 	vcpu->run->internal.data[0] = exit_code;
1714 	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
1715 
1716 	return -EINVAL;
1717 }
1718 
1719 static void pre_sev_es_run(struct vcpu_svm *svm)
1720 {
1721 	if (!svm->ghcb)
1722 		return;
1723 
1724 	if (svm->ghcb_sa_free) {
1725 		/*
1726 		 * The scratch area lives outside the GHCB, so there is a
1727 		 * buffer that, depending on the operation performed, may
1728 		 * need to be synced, then freed.
1729 		 */
1730 		if (svm->ghcb_sa_sync) {
1731 			kvm_write_guest(svm->vcpu.kvm,
1732 					ghcb_get_sw_scratch(svm->ghcb),
1733 					svm->ghcb_sa, svm->ghcb_sa_len);
1734 			svm->ghcb_sa_sync = false;
1735 		}
1736 
1737 		kfree(svm->ghcb_sa);
1738 		svm->ghcb_sa = NULL;
1739 		svm->ghcb_sa_free = false;
1740 	}
1741 
1742 	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
1743 
1744 	sev_es_sync_to_ghcb(svm);
1745 
1746 	kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
1747 	svm->ghcb = NULL;
1748 }
1749 
1750 void pre_sev_run(struct vcpu_svm *svm, int cpu)
1751 {
1752 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1753 	int asid = sev_get_asid(svm->vcpu.kvm);
1754 
1755 	/* Perform any SEV-ES pre-run actions */
1756 	pre_sev_es_run(svm);
1757 
1758 	/* Assign the asid allocated with this SEV guest */
1759 	svm->asid = asid;
1760 
1761 	/*
1762 	 * Flush guest TLB:
1763 	 *
1764 	 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
1765 	 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
1766 	 */
1767 	if (sd->sev_vmcbs[asid] == svm->vmcb &&
1768 	    svm->vcpu.arch.last_vmentry_cpu == cpu)
1769 		return;
1770 
1771 	sd->sev_vmcbs[asid] = svm->vmcb;
1772 	svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
1773 	vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
1774 }
1775 
1776 #define GHCB_SCRATCH_AREA_LIMIT		(16ULL * PAGE_SIZE)
1777 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
1778 {
1779 	struct vmcb_control_area *control = &svm->vmcb->control;
1780 	struct ghcb *ghcb = svm->ghcb;
1781 	u64 ghcb_scratch_beg, ghcb_scratch_end;
1782 	u64 scratch_gpa_beg, scratch_gpa_end;
1783 	void *scratch_va;
1784 
1785 	scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
1786 	if (!scratch_gpa_beg) {
1787 		pr_err("vmgexit: scratch gpa not provided\n");
1788 		return false;
1789 	}
1790 
1791 	scratch_gpa_end = scratch_gpa_beg + len;
1792 	if (scratch_gpa_end < scratch_gpa_beg) {
1793 		pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
1794 		       len, scratch_gpa_beg);
1795 		return false;
1796 	}
1797 
1798 	if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
1799 		/* Scratch area begins within GHCB */
1800 		ghcb_scratch_beg = control->ghcb_gpa +
1801 				   offsetof(struct ghcb, shared_buffer);
1802 		ghcb_scratch_end = control->ghcb_gpa +
1803 				   offsetof(struct ghcb, reserved_1);
1804 
1805 		/*
1806 		 * If the scratch area begins within the GHCB, it must be
1807 		 * completely contained in the GHCB shared buffer area.
1808 		 */
1809 		if (scratch_gpa_beg < ghcb_scratch_beg ||
1810 		    scratch_gpa_end > ghcb_scratch_end) {
1811 			pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
1812 			       scratch_gpa_beg, scratch_gpa_end);
1813 			return false;
1814 		}
1815 
1816 		scratch_va = (void *)svm->ghcb;
1817 		scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
1818 	} else {
1819 		/*
1820 		 * The guest memory must be read into a kernel buffer, so
1821 		 * limit the size
1822 		 */
1823 		if (len > GHCB_SCRATCH_AREA_LIMIT) {
1824 			pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
1825 			       len, GHCB_SCRATCH_AREA_LIMIT);
1826 			return false;
1827 		}
1828 		scratch_va = kzalloc(len, GFP_KERNEL);
1829 		if (!scratch_va)
1830 			return false;
1831 
1832 		if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
1833 			/* Unable to copy scratch area from guest */
1834 			pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
1835 
1836 			kfree(scratch_va);
1837 			return false;
1838 		}
1839 
1840 		/*
1841 		 * The scratch area is outside the GHCB. The operation will
1842 		 * dictate whether the buffer needs to be synced before running
1843 		 * the vCPU next time (i.e. a read was requested so the data
1844 		 * must be written back to the guest memory).
1845 		 */
1846 		svm->ghcb_sa_sync = sync;
1847 		svm->ghcb_sa_free = true;
1848 	}
1849 
1850 	svm->ghcb_sa = scratch_va;
1851 	svm->ghcb_sa_len = len;
1852 
1853 	return true;
1854 }
1855 
1856 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
1857 			      unsigned int pos)
1858 {
1859 	svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
1860 	svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
1861 }
1862 
1863 static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
1864 {
1865 	return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
1866 }
1867 
1868 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
1869 {
1870 	svm->vmcb->control.ghcb_gpa = value;
1871 }
1872 
1873 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
1874 {
1875 	struct vmcb_control_area *control = &svm->vmcb->control;
1876 	struct kvm_vcpu *vcpu = &svm->vcpu;
1877 	u64 ghcb_info;
1878 	int ret = 1;
1879 
1880 	ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
1881 
1882 	trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
1883 					     control->ghcb_gpa);
1884 
1885 	switch (ghcb_info) {
1886 	case GHCB_MSR_SEV_INFO_REQ:
1887 		set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
1888 						    GHCB_VERSION_MIN,
1889 						    sev_enc_bit));
1890 		break;
1891 	case GHCB_MSR_CPUID_REQ: {
1892 		u64 cpuid_fn, cpuid_reg, cpuid_value;
1893 
1894 		cpuid_fn = get_ghcb_msr_bits(svm,
1895 					     GHCB_MSR_CPUID_FUNC_MASK,
1896 					     GHCB_MSR_CPUID_FUNC_POS);
1897 
1898 		/* Initialize the registers needed by the CPUID intercept */
1899 		vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
1900 		vcpu->arch.regs[VCPU_REGS_RCX] = 0;
1901 
1902 		ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
1903 		if (!ret) {
1904 			ret = -EINVAL;
1905 			break;
1906 		}
1907 
1908 		cpuid_reg = get_ghcb_msr_bits(svm,
1909 					      GHCB_MSR_CPUID_REG_MASK,
1910 					      GHCB_MSR_CPUID_REG_POS);
1911 		if (cpuid_reg == 0)
1912 			cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
1913 		else if (cpuid_reg == 1)
1914 			cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
1915 		else if (cpuid_reg == 2)
1916 			cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
1917 		else
1918 			cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
1919 
1920 		set_ghcb_msr_bits(svm, cpuid_value,
1921 				  GHCB_MSR_CPUID_VALUE_MASK,
1922 				  GHCB_MSR_CPUID_VALUE_POS);
1923 
1924 		set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
1925 				  GHCB_MSR_INFO_MASK,
1926 				  GHCB_MSR_INFO_POS);
1927 		break;
1928 	}
1929 	case GHCB_MSR_TERM_REQ: {
1930 		u64 reason_set, reason_code;
1931 
1932 		reason_set = get_ghcb_msr_bits(svm,
1933 					       GHCB_MSR_TERM_REASON_SET_MASK,
1934 					       GHCB_MSR_TERM_REASON_SET_POS);
1935 		reason_code = get_ghcb_msr_bits(svm,
1936 						GHCB_MSR_TERM_REASON_MASK,
1937 						GHCB_MSR_TERM_REASON_POS);
1938 		pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
1939 			reason_set, reason_code);
1940 		fallthrough;
1941 	}
1942 	default:
1943 		ret = -EINVAL;
1944 	}
1945 
1946 	trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
1947 					    control->ghcb_gpa, ret);
1948 
1949 	return ret;
1950 }
1951 
1952 int sev_handle_vmgexit(struct vcpu_svm *svm)
1953 {
1954 	struct vmcb_control_area *control = &svm->vmcb->control;
1955 	u64 ghcb_gpa, exit_code;
1956 	struct ghcb *ghcb;
1957 	int ret;
1958 
1959 	/* Validate the GHCB */
1960 	ghcb_gpa = control->ghcb_gpa;
1961 	if (ghcb_gpa & GHCB_MSR_INFO_MASK)
1962 		return sev_handle_vmgexit_msr_protocol(svm);
1963 
1964 	if (!ghcb_gpa) {
1965 		vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
1966 		return -EINVAL;
1967 	}
1968 
1969 	if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
1970 		/* Unable to map GHCB from guest */
1971 		vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
1972 			    ghcb_gpa);
1973 		return -EINVAL;
1974 	}
1975 
1976 	svm->ghcb = svm->ghcb_map.hva;
1977 	ghcb = svm->ghcb_map.hva;
1978 
1979 	trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
1980 
1981 	exit_code = ghcb_get_sw_exit_code(ghcb);
1982 
1983 	ret = sev_es_validate_vmgexit(svm);
1984 	if (ret)
1985 		return ret;
1986 
1987 	sev_es_sync_from_ghcb(svm);
1988 	ghcb_set_sw_exit_info_1(ghcb, 0);
1989 	ghcb_set_sw_exit_info_2(ghcb, 0);
1990 
1991 	ret = -EINVAL;
1992 	switch (exit_code) {
1993 	case SVM_VMGEXIT_MMIO_READ:
1994 		if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
1995 			break;
1996 
1997 		ret = kvm_sev_es_mmio_read(&svm->vcpu,
1998 					   control->exit_info_1,
1999 					   control->exit_info_2,
2000 					   svm->ghcb_sa);
2001 		break;
2002 	case SVM_VMGEXIT_MMIO_WRITE:
2003 		if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
2004 			break;
2005 
2006 		ret = kvm_sev_es_mmio_write(&svm->vcpu,
2007 					    control->exit_info_1,
2008 					    control->exit_info_2,
2009 					    svm->ghcb_sa);
2010 		break;
2011 	case SVM_VMGEXIT_NMI_COMPLETE:
2012 		ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
2013 		break;
2014 	case SVM_VMGEXIT_AP_HLT_LOOP:
2015 		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
2016 		break;
2017 	case SVM_VMGEXIT_AP_JUMP_TABLE: {
2018 		struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
2019 
2020 		switch (control->exit_info_1) {
2021 		case 0:
2022 			/* Set AP jump table address */
2023 			sev->ap_jump_table = control->exit_info_2;
2024 			break;
2025 		case 1:
2026 			/* Get AP jump table address */
2027 			ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
2028 			break;
2029 		default:
2030 			pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
2031 			       control->exit_info_1);
2032 			ghcb_set_sw_exit_info_1(ghcb, 1);
2033 			ghcb_set_sw_exit_info_2(ghcb,
2034 						X86_TRAP_UD |
2035 						SVM_EVTINJ_TYPE_EXEPT |
2036 						SVM_EVTINJ_VALID);
2037 		}
2038 
2039 		ret = 1;
2040 		break;
2041 	}
2042 	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
2043 		vcpu_unimpl(&svm->vcpu,
2044 			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
2045 			    control->exit_info_1, control->exit_info_2);
2046 		break;
2047 	default:
2048 		ret = svm_invoke_exit_handler(svm, exit_code);
2049 	}
2050 
2051 	return ret;
2052 }
2053 
2054 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
2055 {
2056 	if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
2057 		return -EINVAL;
2058 
2059 	return kvm_sev_es_string_io(&svm->vcpu, size, port,
2060 				    svm->ghcb_sa, svm->ghcb_sa_len, in);
2061 }
2062 
2063 void sev_es_init_vmcb(struct vcpu_svm *svm)
2064 {
2065 	struct kvm_vcpu *vcpu = &svm->vcpu;
2066 
2067 	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
2068 	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
2069 
2070 	/*
2071 	 * An SEV-ES guest requires a VMSA area that is a separate from the
2072 	 * VMCB page. Do not include the encryption mask on the VMSA physical
2073 	 * address since hardware will access it using the guest key.
2074 	 */
2075 	svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
2076 
2077 	/* Can't intercept CR register access, HV can't modify CR registers */
2078 	svm_clr_intercept(svm, INTERCEPT_CR0_READ);
2079 	svm_clr_intercept(svm, INTERCEPT_CR4_READ);
2080 	svm_clr_intercept(svm, INTERCEPT_CR8_READ);
2081 	svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
2082 	svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
2083 	svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
2084 
2085 	svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
2086 
2087 	/* Track EFER/CR register changes */
2088 	svm_set_intercept(svm, TRAP_EFER_WRITE);
2089 	svm_set_intercept(svm, TRAP_CR0_WRITE);
2090 	svm_set_intercept(svm, TRAP_CR4_WRITE);
2091 	svm_set_intercept(svm, TRAP_CR8_WRITE);
2092 
2093 	/* No support for enable_vmware_backdoor */
2094 	clr_exception_intercept(svm, GP_VECTOR);
2095 
2096 	/* Can't intercept XSETBV, HV can't modify XCR0 directly */
2097 	svm_clr_intercept(svm, INTERCEPT_XSETBV);
2098 
2099 	/* Clear intercepts on selected MSRs */
2100 	set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
2101 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
2102 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
2103 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
2104 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
2105 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
2106 }
2107 
2108 void sev_es_create_vcpu(struct vcpu_svm *svm)
2109 {
2110 	/*
2111 	 * Set the GHCB MSR value as per the GHCB specification when creating
2112 	 * a vCPU for an SEV-ES guest.
2113 	 */
2114 	set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
2115 					    GHCB_VERSION_MIN,
2116 					    sev_enc_bit));
2117 }
2118 
2119 void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
2120 {
2121 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2122 	struct vmcb_save_area *hostsa;
2123 
2124 	/*
2125 	 * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
2126 	 * of which one step is to perform a VMLOAD. Since hardware does not
2127 	 * perform a VMSAVE on VMRUN, the host savearea must be updated.
2128 	 */
2129 	vmsave(__sme_page_pa(sd->save_area));
2130 
2131 	/* XCR0 is restored on VMEXIT, save the current host value */
2132 	hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
2133 	hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
2134 
2135 	/* PKRU is restored on VMEXIT, save the current host value */
2136 	hostsa->pkru = read_pkru();
2137 
2138 	/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
2139 	hostsa->xss = host_xss;
2140 }
2141 
2142 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
2143 {
2144 	struct vcpu_svm *svm = to_svm(vcpu);
2145 
2146 	/* First SIPI: Use the values as initially set by the VMM */
2147 	if (!svm->received_first_sipi) {
2148 		svm->received_first_sipi = true;
2149 		return;
2150 	}
2151 
2152 	/*
2153 	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
2154 	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
2155 	 * non-zero value.
2156 	 */
2157 	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
2158 }
2159