xref: /linux/arch/x86/coco/sev/vc-handle.c (revision d30c1683aaecb93d2ab95685dc4300a33d3cea7a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Memory Encryption Support
4  *
5  * Copyright (C) 2019 SUSE
6  *
7  * Author: Joerg Roedel <jroedel@suse.de>
8  */
9 
10 #define pr_fmt(fmt)	"SEV: " fmt
11 
12 #include <linux/sched/debug.h>	/* For show_regs() */
13 #include <linux/cc_platform.h>
14 #include <linux/printk.h>
15 #include <linux/mm_types.h>
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/io.h>
19 #include <linux/psp-sev.h>
20 #include <linux/efi.h>
21 #include <uapi/linux/sev-guest.h>
22 
23 #include <asm/init.h>
24 #include <asm/stacktrace.h>
25 #include <asm/sev.h>
26 #include <asm/sev-internal.h>
27 #include <asm/insn-eval.h>
28 #include <asm/fpu/xcr.h>
29 #include <asm/processor.h>
30 #include <asm/setup.h>
31 #include <asm/traps.h>
32 #include <asm/svm.h>
33 #include <asm/smp.h>
34 #include <asm/cpu.h>
35 #include <asm/apic.h>
36 #include <asm/cpuid/api.h>
37 
38 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
39 					   unsigned long vaddr, phys_addr_t *paddr)
40 {
41 	unsigned long va = (unsigned long)vaddr;
42 	unsigned int level;
43 	phys_addr_t pa;
44 	pgd_t *pgd;
45 	pte_t *pte;
46 
47 	pgd = __va(read_cr3_pa());
48 	pgd = &pgd[pgd_index(va)];
49 	pte = lookup_address_in_pgd(pgd, va, &level);
50 	if (!pte) {
51 		ctxt->fi.vector     = X86_TRAP_PF;
52 		ctxt->fi.cr2        = vaddr;
53 		ctxt->fi.error_code = 0;
54 
55 		if (user_mode(ctxt->regs))
56 			ctxt->fi.error_code |= X86_PF_USER;
57 
58 		return ES_EXCEPTION;
59 	}
60 
61 	if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
62 		/* Emulated MMIO to/from encrypted memory not supported */
63 		return ES_UNSUPPORTED;
64 
65 	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
66 	pa |= va & ~page_level_mask(level);
67 
68 	*paddr = pa;
69 
70 	return ES_OK;
71 }
72 
73 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
74 {
75 	BUG_ON(size > 4);
76 
77 	if (user_mode(ctxt->regs)) {
78 		struct thread_struct *t = &current->thread;
79 		struct io_bitmap *iobm = t->io_bitmap;
80 		size_t idx;
81 
82 		if (!iobm)
83 			goto fault;
84 
85 		for (idx = port; idx < port + size; ++idx) {
86 			if (test_bit(idx, iobm->bitmap))
87 				goto fault;
88 		}
89 	}
90 
91 	return ES_OK;
92 
93 fault:
94 	ctxt->fi.vector = X86_TRAP_GP;
95 	ctxt->fi.error_code = 0;
96 
97 	return ES_EXCEPTION;
98 }
99 
100 void vc_forward_exception(struct es_em_ctxt *ctxt)
101 {
102 	long error_code = ctxt->fi.error_code;
103 	int trapnr = ctxt->fi.vector;
104 
105 	ctxt->regs->orig_ax = ctxt->fi.error_code;
106 
107 	switch (trapnr) {
108 	case X86_TRAP_GP:
109 		exc_general_protection(ctxt->regs, error_code);
110 		break;
111 	case X86_TRAP_UD:
112 		exc_invalid_op(ctxt->regs);
113 		break;
114 	case X86_TRAP_PF:
115 		write_cr2(ctxt->fi.cr2);
116 		exc_page_fault(ctxt->regs, error_code);
117 		break;
118 	case X86_TRAP_AC:
119 		exc_alignment_check(ctxt->regs, error_code);
120 		break;
121 	default:
122 		pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
123 		BUG();
124 	}
125 }
126 
127 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
128 				unsigned char *buffer)
129 {
130 	return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
131 }
132 
133 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
134 {
135 	char buffer[MAX_INSN_SIZE];
136 	int insn_bytes;
137 
138 	insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
139 	if (insn_bytes == 0) {
140 		/* Nothing could be copied */
141 		ctxt->fi.vector     = X86_TRAP_PF;
142 		ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
143 		ctxt->fi.cr2        = ctxt->regs->ip;
144 		return ES_EXCEPTION;
145 	} else if (insn_bytes == -EINVAL) {
146 		/* Effective RIP could not be calculated */
147 		ctxt->fi.vector     = X86_TRAP_GP;
148 		ctxt->fi.error_code = 0;
149 		ctxt->fi.cr2        = 0;
150 		return ES_EXCEPTION;
151 	}
152 
153 	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
154 		return ES_DECODE_FAILED;
155 
156 	if (ctxt->insn.immediate.got)
157 		return ES_OK;
158 	else
159 		return ES_DECODE_FAILED;
160 }
161 
162 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
163 {
164 	char buffer[MAX_INSN_SIZE];
165 	int res, ret;
166 
167 	res = vc_fetch_insn_kernel(ctxt, buffer);
168 	if (res) {
169 		ctxt->fi.vector     = X86_TRAP_PF;
170 		ctxt->fi.error_code = X86_PF_INSTR;
171 		ctxt->fi.cr2        = ctxt->regs->ip;
172 		return ES_EXCEPTION;
173 	}
174 
175 	ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
176 	if (ret < 0)
177 		return ES_DECODE_FAILED;
178 	else
179 		return ES_OK;
180 }
181 
182 /*
183  * User instruction decoding is also required for the EFI runtime. Even though
184  * the EFI runtime is running in kernel mode, it uses special EFI virtual
185  * address mappings that require the use of efi_mm to properly address and
186  * decode.
187  */
188 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
189 {
190 	if (user_mode(ctxt->regs) || mm_is_efi(current->active_mm))
191 		return __vc_decode_user_insn(ctxt);
192 	else
193 		return __vc_decode_kern_insn(ctxt);
194 }
195 
196 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
197 				   char *dst, char *buf, size_t size)
198 {
199 	unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
200 
201 	/*
202 	 * This function uses __put_user() independent of whether kernel or user
203 	 * memory is accessed. This works fine because __put_user() does no
204 	 * sanity checks of the pointer being accessed. All that it does is
205 	 * to report when the access failed.
206 	 *
207 	 * Also, this function runs in atomic context, so __put_user() is not
208 	 * allowed to sleep. The page-fault handler detects that it is running
209 	 * in atomic context and will not try to take mmap_sem and handle the
210 	 * fault, so additional pagefault_enable()/disable() calls are not
211 	 * needed.
212 	 *
213 	 * The access can't be done via copy_to_user() here because
214 	 * vc_write_mem() must not use string instructions to access unsafe
215 	 * memory. The reason is that MOVS is emulated by the #VC handler by
216 	 * splitting the move up into a read and a write and taking a nested #VC
217 	 * exception on whatever of them is the MMIO access. Using string
218 	 * instructions here would cause infinite nesting.
219 	 */
220 	switch (size) {
221 	case 1: {
222 		u8 d1;
223 		u8 __user *target = (u8 __user *)dst;
224 
225 		memcpy(&d1, buf, 1);
226 		if (__put_user(d1, target))
227 			goto fault;
228 		break;
229 	}
230 	case 2: {
231 		u16 d2;
232 		u16 __user *target = (u16 __user *)dst;
233 
234 		memcpy(&d2, buf, 2);
235 		if (__put_user(d2, target))
236 			goto fault;
237 		break;
238 	}
239 	case 4: {
240 		u32 d4;
241 		u32 __user *target = (u32 __user *)dst;
242 
243 		memcpy(&d4, buf, 4);
244 		if (__put_user(d4, target))
245 			goto fault;
246 		break;
247 	}
248 	case 8: {
249 		u64 d8;
250 		u64 __user *target = (u64 __user *)dst;
251 
252 		memcpy(&d8, buf, 8);
253 		if (__put_user(d8, target))
254 			goto fault;
255 		break;
256 	}
257 	default:
258 		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
259 		return ES_UNSUPPORTED;
260 	}
261 
262 	return ES_OK;
263 
264 fault:
265 	if (user_mode(ctxt->regs))
266 		error_code |= X86_PF_USER;
267 
268 	ctxt->fi.vector = X86_TRAP_PF;
269 	ctxt->fi.error_code = error_code;
270 	ctxt->fi.cr2 = (unsigned long)dst;
271 
272 	return ES_EXCEPTION;
273 }
274 
275 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
276 				  char *src, char *buf, size_t size)
277 {
278 	unsigned long error_code = X86_PF_PROT;
279 
280 	/*
281 	 * This function uses __get_user() independent of whether kernel or user
282 	 * memory is accessed. This works fine because __get_user() does no
283 	 * sanity checks of the pointer being accessed. All that it does is
284 	 * to report when the access failed.
285 	 *
286 	 * Also, this function runs in atomic context, so __get_user() is not
287 	 * allowed to sleep. The page-fault handler detects that it is running
288 	 * in atomic context and will not try to take mmap_sem and handle the
289 	 * fault, so additional pagefault_enable()/disable() calls are not
290 	 * needed.
291 	 *
292 	 * The access can't be done via copy_from_user() here because
293 	 * vc_read_mem() must not use string instructions to access unsafe
294 	 * memory. The reason is that MOVS is emulated by the #VC handler by
295 	 * splitting the move up into a read and a write and taking a nested #VC
296 	 * exception on whatever of them is the MMIO access. Using string
297 	 * instructions here would cause infinite nesting.
298 	 */
299 	switch (size) {
300 	case 1: {
301 		u8 d1;
302 		u8 __user *s = (u8 __user *)src;
303 
304 		if (__get_user(d1, s))
305 			goto fault;
306 		memcpy(buf, &d1, 1);
307 		break;
308 	}
309 	case 2: {
310 		u16 d2;
311 		u16 __user *s = (u16 __user *)src;
312 
313 		if (__get_user(d2, s))
314 			goto fault;
315 		memcpy(buf, &d2, 2);
316 		break;
317 	}
318 	case 4: {
319 		u32 d4;
320 		u32 __user *s = (u32 __user *)src;
321 
322 		if (__get_user(d4, s))
323 			goto fault;
324 		memcpy(buf, &d4, 4);
325 		break;
326 	}
327 	case 8: {
328 		u64 d8;
329 		u64 __user *s = (u64 __user *)src;
330 		if (__get_user(d8, s))
331 			goto fault;
332 		memcpy(buf, &d8, 8);
333 		break;
334 	}
335 	default:
336 		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
337 		return ES_UNSUPPORTED;
338 	}
339 
340 	return ES_OK;
341 
342 fault:
343 	if (user_mode(ctxt->regs))
344 		error_code |= X86_PF_USER;
345 
346 	ctxt->fi.vector = X86_TRAP_PF;
347 	ctxt->fi.error_code = error_code;
348 	ctxt->fi.cr2 = (unsigned long)src;
349 
350 	return ES_EXCEPTION;
351 }
352 
353 #define sev_printk(fmt, ...)		printk(fmt, ##__VA_ARGS__)
354 #define error(v)
355 
356 #include "vc-shared.c"
357 
358 /* Writes to the SVSM CAA MSR are ignored */
359 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
360 {
361 	if (write)
362 		return ES_OK;
363 
364 	regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
365 	regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
366 
367 	return ES_OK;
368 }
369 
370 /*
371  * TSC related accesses should not exit to the hypervisor when a guest is
372  * executing with Secure TSC enabled, so special handling is required for
373  * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ.
374  */
375 static enum es_result __vc_handle_secure_tsc_msrs(struct es_em_ctxt *ctxt, bool write)
376 {
377 	struct pt_regs *regs = ctxt->regs;
378 	u64 tsc;
379 
380 	/*
381 	 * Writing to MSR_IA32_TSC can cause subsequent reads of the TSC to
382 	 * return undefined values, and GUEST_TSC_FREQ is read-only. Generate
383 	 * a #GP on all writes.
384 	 */
385 	if (write) {
386 		ctxt->fi.vector = X86_TRAP_GP;
387 		ctxt->fi.error_code = 0;
388 		return ES_EXCEPTION;
389 	}
390 
391 	/*
392 	 * GUEST_TSC_FREQ read should not be intercepted when Secure TSC is
393 	 * enabled. Terminate the guest if a read is attempted.
394 	 */
395 	if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ)
396 		return ES_VMM_ERROR;
397 
398 	/* Reads of MSR_IA32_TSC should return the current TSC value. */
399 	tsc = rdtsc_ordered();
400 	regs->ax = lower_32_bits(tsc);
401 	regs->dx = upper_32_bits(tsc);
402 
403 	return ES_OK;
404 }
405 
406 enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write)
407 {
408 	struct pt_regs *regs = ctxt->regs;
409 	enum es_result ret;
410 
411 	switch (regs->cx) {
412 	case MSR_SVSM_CAA:
413 		return __vc_handle_msr_caa(regs, write);
414 	case MSR_IA32_TSC:
415 	case MSR_AMD64_GUEST_TSC_FREQ:
416 		if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
417 			return __vc_handle_secure_tsc_msrs(ctxt, write);
418 		break;
419 	case MSR_AMD64_SAVIC_CONTROL:
420 		/*
421 		 * AMD64_SAVIC_CONTROL should not be intercepted when
422 		 * Secure AVIC is enabled. Terminate the Secure AVIC guest
423 		 * if the interception is enabled.
424 		 */
425 		if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC))
426 			return ES_VMM_ERROR;
427 		break;
428 	default:
429 		break;
430 	}
431 
432 	ghcb_set_rcx(ghcb, regs->cx);
433 	if (write) {
434 		ghcb_set_rax(ghcb, regs->ax);
435 		ghcb_set_rdx(ghcb, regs->dx);
436 	}
437 
438 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
439 
440 	if ((ret == ES_OK) && !write) {
441 		regs->ax = ghcb->save.rax;
442 		regs->dx = ghcb->save.rdx;
443 	}
444 
445 	return ret;
446 }
447 
448 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
449 {
450 	return sev_es_ghcb_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30);
451 }
452 
453 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
454 {
455 	int trapnr = ctxt->fi.vector;
456 
457 	if (trapnr == X86_TRAP_PF)
458 		native_write_cr2(ctxt->fi.cr2);
459 
460 	ctxt->regs->orig_ax = ctxt->fi.error_code;
461 	do_early_exception(ctxt->regs, trapnr);
462 }
463 
464 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
465 {
466 	long *reg_array;
467 	int offset;
468 
469 	reg_array = (long *)ctxt->regs;
470 	offset    = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
471 
472 	if (offset < 0)
473 		return NULL;
474 
475 	offset /= sizeof(long);
476 
477 	return reg_array + offset;
478 }
479 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
480 				 unsigned int bytes, bool read)
481 {
482 	u64 exit_code, exit_info_1, exit_info_2;
483 	unsigned long ghcb_pa = __pa(ghcb);
484 	enum es_result res;
485 	phys_addr_t paddr;
486 	void __user *ref;
487 
488 	ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
489 	if (ref == (void __user *)-1L)
490 		return ES_UNSUPPORTED;
491 
492 	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
493 
494 	res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
495 	if (res != ES_OK) {
496 		if (res == ES_EXCEPTION && !read)
497 			ctxt->fi.error_code |= X86_PF_WRITE;
498 
499 		return res;
500 	}
501 
502 	exit_info_1 = paddr;
503 	/* Can never be greater than 8 */
504 	exit_info_2 = bytes;
505 
506 	ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
507 
508 	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
509 }
510 
511 /*
512  * The MOVS instruction has two memory operands, which raises the
513  * problem that it is not known whether the access to the source or the
514  * destination caused the #VC exception (and hence whether an MMIO read
515  * or write operation needs to be emulated).
516  *
517  * Instead of playing games with walking page-tables and trying to guess
518  * whether the source or destination is an MMIO range, split the move
519  * into two operations, a read and a write with only one memory operand.
520  * This will cause a nested #VC exception on the MMIO address which can
521  * then be handled.
522  *
523  * This implementation has the benefit that it also supports MOVS where
524  * source _and_ destination are MMIO regions.
525  *
526  * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
527  * rare operation. If it turns out to be a performance problem the split
528  * operations can be moved to memcpy_fromio() and memcpy_toio().
529  */
530 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
531 					  unsigned int bytes)
532 {
533 	unsigned long ds_base, es_base;
534 	unsigned char *src, *dst;
535 	unsigned char buffer[8];
536 	enum es_result ret;
537 	bool rep;
538 	int off;
539 
540 	ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
541 	es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
542 
543 	if (ds_base == -1L || es_base == -1L) {
544 		ctxt->fi.vector = X86_TRAP_GP;
545 		ctxt->fi.error_code = 0;
546 		return ES_EXCEPTION;
547 	}
548 
549 	src = ds_base + (unsigned char *)ctxt->regs->si;
550 	dst = es_base + (unsigned char *)ctxt->regs->di;
551 
552 	ret = vc_read_mem(ctxt, src, buffer, bytes);
553 	if (ret != ES_OK)
554 		return ret;
555 
556 	ret = vc_write_mem(ctxt, dst, buffer, bytes);
557 	if (ret != ES_OK)
558 		return ret;
559 
560 	if (ctxt->regs->flags & X86_EFLAGS_DF)
561 		off = -bytes;
562 	else
563 		off =  bytes;
564 
565 	ctxt->regs->si += off;
566 	ctxt->regs->di += off;
567 
568 	rep = insn_has_rep_prefix(&ctxt->insn);
569 	if (rep)
570 		ctxt->regs->cx -= 1;
571 
572 	if (!rep || ctxt->regs->cx == 0)
573 		return ES_OK;
574 	else
575 		return ES_RETRY;
576 }
577 
578 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
579 {
580 	struct insn *insn = &ctxt->insn;
581 	enum insn_mmio_type mmio;
582 	unsigned int bytes = 0;
583 	enum es_result ret;
584 	u8 sign_byte;
585 	long *reg_data;
586 
587 	mmio = insn_decode_mmio(insn, &bytes);
588 	if (mmio == INSN_MMIO_DECODE_FAILED)
589 		return ES_DECODE_FAILED;
590 
591 	if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
592 		reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
593 		if (!reg_data)
594 			return ES_DECODE_FAILED;
595 	}
596 
597 	if (user_mode(ctxt->regs))
598 		return ES_UNSUPPORTED;
599 
600 	switch (mmio) {
601 	case INSN_MMIO_WRITE:
602 		memcpy(ghcb->shared_buffer, reg_data, bytes);
603 		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
604 		break;
605 	case INSN_MMIO_WRITE_IMM:
606 		memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
607 		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
608 		break;
609 	case INSN_MMIO_READ:
610 		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
611 		if (ret)
612 			break;
613 
614 		/* Zero-extend for 32-bit operation */
615 		if (bytes == 4)
616 			*reg_data = 0;
617 
618 		memcpy(reg_data, ghcb->shared_buffer, bytes);
619 		break;
620 	case INSN_MMIO_READ_ZERO_EXTEND:
621 		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
622 		if (ret)
623 			break;
624 
625 		/* Zero extend based on operand size */
626 		memset(reg_data, 0, insn->opnd_bytes);
627 		memcpy(reg_data, ghcb->shared_buffer, bytes);
628 		break;
629 	case INSN_MMIO_READ_SIGN_EXTEND:
630 		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
631 		if (ret)
632 			break;
633 
634 		if (bytes == 1) {
635 			u8 *val = (u8 *)ghcb->shared_buffer;
636 
637 			sign_byte = (*val & 0x80) ? 0xff : 0x00;
638 		} else {
639 			u16 *val = (u16 *)ghcb->shared_buffer;
640 
641 			sign_byte = (*val & 0x8000) ? 0xff : 0x00;
642 		}
643 
644 		/* Sign extend based on operand size */
645 		memset(reg_data, sign_byte, insn->opnd_bytes);
646 		memcpy(reg_data, ghcb->shared_buffer, bytes);
647 		break;
648 	case INSN_MMIO_MOVS:
649 		ret = vc_handle_mmio_movs(ctxt, bytes);
650 		break;
651 	default:
652 		ret = ES_UNSUPPORTED;
653 		break;
654 	}
655 
656 	return ret;
657 }
658 
659 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
660 					  struct es_em_ctxt *ctxt)
661 {
662 	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
663 	long val, *reg = vc_insn_get_rm(ctxt);
664 	enum es_result ret;
665 
666 	if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
667 		return ES_VMM_ERROR;
668 
669 	if (!reg)
670 		return ES_DECODE_FAILED;
671 
672 	val = *reg;
673 
674 	/* Upper 32 bits must be written as zeroes */
675 	if (val >> 32) {
676 		ctxt->fi.vector = X86_TRAP_GP;
677 		ctxt->fi.error_code = 0;
678 		return ES_EXCEPTION;
679 	}
680 
681 	/* Clear out other reserved bits and set bit 10 */
682 	val = (val & 0xffff23ffL) | BIT(10);
683 
684 	/* Early non-zero writes to DR7 are not supported */
685 	if (!data && (val & ~DR7_RESET_VALUE))
686 		return ES_UNSUPPORTED;
687 
688 	/* Using a value of 0 for ExitInfo1 means RAX holds the value */
689 	ghcb_set_rax(ghcb, val);
690 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
691 	if (ret != ES_OK)
692 		return ret;
693 
694 	if (data)
695 		data->dr7 = val;
696 
697 	return ES_OK;
698 }
699 
700 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
701 					 struct es_em_ctxt *ctxt)
702 {
703 	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
704 	long *reg = vc_insn_get_rm(ctxt);
705 
706 	if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
707 		return ES_VMM_ERROR;
708 
709 	if (!reg)
710 		return ES_DECODE_FAILED;
711 
712 	if (data)
713 		*reg = data->dr7;
714 	else
715 		*reg = DR7_RESET_VALUE;
716 
717 	return ES_OK;
718 }
719 
720 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
721 				       struct es_em_ctxt *ctxt)
722 {
723 	return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
724 }
725 
726 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
727 {
728 	enum es_result ret;
729 
730 	ghcb_set_rcx(ghcb, ctxt->regs->cx);
731 
732 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
733 	if (ret != ES_OK)
734 		return ret;
735 
736 	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
737 		return ES_VMM_ERROR;
738 
739 	ctxt->regs->ax = ghcb->save.rax;
740 	ctxt->regs->dx = ghcb->save.rdx;
741 
742 	return ES_OK;
743 }
744 
745 static enum es_result vc_handle_monitor(struct ghcb *ghcb,
746 					struct es_em_ctxt *ctxt)
747 {
748 	/*
749 	 * Treat it as a NOP and do not leak a physical address to the
750 	 * hypervisor.
751 	 */
752 	return ES_OK;
753 }
754 
755 static enum es_result vc_handle_mwait(struct ghcb *ghcb,
756 				      struct es_em_ctxt *ctxt)
757 {
758 	/* Treat the same as MONITOR/MONITORX */
759 	return ES_OK;
760 }
761 
762 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
763 					struct es_em_ctxt *ctxt)
764 {
765 	enum es_result ret;
766 
767 	ghcb_set_rax(ghcb, ctxt->regs->ax);
768 	ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
769 
770 	if (x86_platform.hyper.sev_es_hcall_prepare)
771 		x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
772 
773 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
774 	if (ret != ES_OK)
775 		return ret;
776 
777 	if (!ghcb_rax_is_valid(ghcb))
778 		return ES_VMM_ERROR;
779 
780 	ctxt->regs->ax = ghcb->save.rax;
781 
782 	/*
783 	 * Call sev_es_hcall_finish() after regs->ax is already set.
784 	 * This allows the hypervisor handler to overwrite it again if
785 	 * necessary.
786 	 */
787 	if (x86_platform.hyper.sev_es_hcall_finish &&
788 	    !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
789 		return ES_VMM_ERROR;
790 
791 	return ES_OK;
792 }
793 
794 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
795 					struct es_em_ctxt *ctxt)
796 {
797 	/*
798 	 * Calling ecx_alignment_check() directly does not work, because it
799 	 * enables IRQs and the GHCB is active. Forward the exception and call
800 	 * it later from vc_forward_exception().
801 	 */
802 	ctxt->fi.vector = X86_TRAP_AC;
803 	ctxt->fi.error_code = 0;
804 	return ES_EXCEPTION;
805 }
806 
807 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
808 					 struct ghcb *ghcb,
809 					 unsigned long exit_code)
810 {
811 	enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
812 
813 	if (result != ES_OK)
814 		return result;
815 
816 	switch (exit_code) {
817 	case SVM_EXIT_READ_DR7:
818 		result = vc_handle_dr7_read(ghcb, ctxt);
819 		break;
820 	case SVM_EXIT_WRITE_DR7:
821 		result = vc_handle_dr7_write(ghcb, ctxt);
822 		break;
823 	case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
824 		result = vc_handle_trap_ac(ghcb, ctxt);
825 		break;
826 	case SVM_EXIT_RDTSC:
827 	case SVM_EXIT_RDTSCP:
828 		result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
829 		break;
830 	case SVM_EXIT_RDPMC:
831 		result = vc_handle_rdpmc(ghcb, ctxt);
832 		break;
833 	case SVM_EXIT_INVD:
834 		pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
835 		result = ES_UNSUPPORTED;
836 		break;
837 	case SVM_EXIT_CPUID:
838 		result = vc_handle_cpuid(ghcb, ctxt);
839 		break;
840 	case SVM_EXIT_IOIO:
841 		result = vc_handle_ioio(ghcb, ctxt);
842 		break;
843 	case SVM_EXIT_MSR:
844 		result = vc_handle_msr(ghcb, ctxt);
845 		break;
846 	case SVM_EXIT_VMMCALL:
847 		result = vc_handle_vmmcall(ghcb, ctxt);
848 		break;
849 	case SVM_EXIT_WBINVD:
850 		result = vc_handle_wbinvd(ghcb, ctxt);
851 		break;
852 	case SVM_EXIT_MONITOR:
853 		result = vc_handle_monitor(ghcb, ctxt);
854 		break;
855 	case SVM_EXIT_MWAIT:
856 		result = vc_handle_mwait(ghcb, ctxt);
857 		break;
858 	case SVM_EXIT_NPF:
859 		result = vc_handle_mmio(ghcb, ctxt);
860 		break;
861 	default:
862 		/*
863 		 * Unexpected #VC exception
864 		 */
865 		result = ES_UNSUPPORTED;
866 	}
867 
868 	return result;
869 }
870 
871 static __always_inline bool is_vc2_stack(unsigned long sp)
872 {
873 	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
874 }
875 
876 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
877 {
878 	unsigned long sp, prev_sp;
879 
880 	sp      = (unsigned long)regs;
881 	prev_sp = regs->sp;
882 
883 	/*
884 	 * If the code was already executing on the VC2 stack when the #VC
885 	 * happened, let it proceed to the normal handling routine. This way the
886 	 * code executing on the VC2 stack can cause #VC exceptions to get handled.
887 	 */
888 	return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
889 }
890 
891 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
892 {
893 	struct ghcb_state state;
894 	struct es_em_ctxt ctxt;
895 	enum es_result result;
896 	struct ghcb *ghcb;
897 	bool ret = true;
898 
899 	ghcb = __sev_get_ghcb(&state);
900 
901 	vc_ghcb_invalidate(ghcb);
902 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
903 
904 	if (result == ES_OK)
905 		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
906 
907 	__sev_put_ghcb(&state);
908 
909 	/* Done - now check the result */
910 	switch (result) {
911 	case ES_OK:
912 		vc_finish_insn(&ctxt);
913 		break;
914 	case ES_UNSUPPORTED:
915 		pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
916 				   error_code, regs->ip);
917 		ret = false;
918 		break;
919 	case ES_VMM_ERROR:
920 		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
921 				   error_code, regs->ip);
922 		ret = false;
923 		break;
924 	case ES_DECODE_FAILED:
925 		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
926 				   error_code, regs->ip);
927 		ret = false;
928 		break;
929 	case ES_EXCEPTION:
930 		vc_forward_exception(&ctxt);
931 		break;
932 	case ES_RETRY:
933 		/* Nothing to do */
934 		break;
935 	default:
936 		pr_emerg("Unknown result in %s():%d\n", __func__, result);
937 		/*
938 		 * Emulating the instruction which caused the #VC exception
939 		 * failed - can't continue so print debug information
940 		 */
941 		BUG();
942 	}
943 
944 	return ret;
945 }
946 
947 static __always_inline bool vc_is_db(unsigned long error_code)
948 {
949 	return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
950 }
951 
952 /*
953  * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
954  * and will panic when an error happens.
955  */
956 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
957 {
958 	irqentry_state_t irq_state;
959 
960 	/*
961 	 * With the current implementation it is always possible to switch to a
962 	 * safe stack because #VC exceptions only happen at known places, like
963 	 * intercepted instructions or accesses to MMIO areas/IO ports. They can
964 	 * also happen with code instrumentation when the hypervisor intercepts
965 	 * #DB, but the critical paths are forbidden to be instrumented, so #DB
966 	 * exceptions currently also only happen in safe places.
967 	 *
968 	 * But keep this here in case the noinstr annotations are violated due
969 	 * to bug elsewhere.
970 	 */
971 	if (unlikely(vc_from_invalid_context(regs))) {
972 		instrumentation_begin();
973 		panic("Can't handle #VC exception from unsupported context\n");
974 		instrumentation_end();
975 	}
976 
977 	/*
978 	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
979 	 */
980 	if (vc_is_db(error_code)) {
981 		exc_debug(regs);
982 		return;
983 	}
984 
985 	irq_state = irqentry_nmi_enter(regs);
986 
987 	instrumentation_begin();
988 
989 	if (!vc_raw_handle_exception(regs, error_code)) {
990 		/* Show some debug info */
991 		show_regs(regs);
992 
993 		/* Ask hypervisor to sev_es_terminate */
994 		sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
995 
996 		/* If that fails and we get here - just panic */
997 		panic("Returned from Terminate-Request to Hypervisor\n");
998 	}
999 
1000 	instrumentation_end();
1001 	irqentry_nmi_exit(regs, irq_state);
1002 }
1003 
1004 /*
1005  * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
1006  * and will kill the current task with SIGBUS when an error happens.
1007  */
1008 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
1009 {
1010 	/*
1011 	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
1012 	 */
1013 	if (vc_is_db(error_code)) {
1014 		noist_exc_debug(regs);
1015 		return;
1016 	}
1017 
1018 	irqentry_enter_from_user_mode(regs);
1019 	instrumentation_begin();
1020 
1021 	if (!vc_raw_handle_exception(regs, error_code)) {
1022 		/*
1023 		 * Do not kill the machine if user-space triggered the
1024 		 * exception. Send SIGBUS instead and let user-space deal with
1025 		 * it.
1026 		 */
1027 		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
1028 	}
1029 
1030 	instrumentation_end();
1031 	irqentry_exit_to_user_mode(regs);
1032 }
1033 
1034 bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
1035 {
1036 	unsigned long exit_code = regs->orig_ax;
1037 	struct es_em_ctxt ctxt;
1038 	enum es_result result;
1039 
1040 	vc_ghcb_invalidate(boot_ghcb);
1041 
1042 	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
1043 	if (result == ES_OK)
1044 		result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
1045 
1046 	/* Done - now check the result */
1047 	switch (result) {
1048 	case ES_OK:
1049 		vc_finish_insn(&ctxt);
1050 		break;
1051 	case ES_UNSUPPORTED:
1052 		early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
1053 				exit_code, regs->ip);
1054 		goto fail;
1055 	case ES_VMM_ERROR:
1056 		early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
1057 				exit_code, regs->ip);
1058 		goto fail;
1059 	case ES_DECODE_FAILED:
1060 		early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
1061 				exit_code, regs->ip);
1062 		goto fail;
1063 	case ES_EXCEPTION:
1064 		vc_early_forward_exception(&ctxt);
1065 		break;
1066 	case ES_RETRY:
1067 		/* Nothing to do */
1068 		break;
1069 	default:
1070 		BUG();
1071 	}
1072 
1073 	return true;
1074 
1075 fail:
1076 	show_regs(regs);
1077 
1078 	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1079 }
1080 
1081