xref: /linux/arch/x86/coco/sev/vc-handle.c (revision e0c0ab04f6785abaa71b9b8dc252cb1a2072c225)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Memory Encryption Support
4  *
5  * Copyright (C) 2019 SUSE
6  *
7  * Author: Joerg Roedel <jroedel@suse.de>
8  */
9 
10 #define pr_fmt(fmt)	"SEV: " fmt
11 
12 #include <linux/sched/debug.h>	/* For show_regs() */
13 #include <linux/cc_platform.h>
14 #include <linux/printk.h>
15 #include <linux/mm_types.h>
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/io.h>
19 #include <linux/psp-sev.h>
20 #include <uapi/linux/sev-guest.h>
21 
22 #include <asm/init.h>
23 #include <asm/stacktrace.h>
24 #include <asm/sev.h>
25 #include <asm/sev-internal.h>
26 #include <asm/insn-eval.h>
27 #include <asm/fpu/xcr.h>
28 #include <asm/processor.h>
29 #include <asm/setup.h>
30 #include <asm/traps.h>
31 #include <asm/svm.h>
32 #include <asm/smp.h>
33 #include <asm/cpu.h>
34 #include <asm/apic.h>
35 #include <asm/cpuid/api.h>
36 
37 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
38 					   unsigned long vaddr, phys_addr_t *paddr)
39 {
40 	unsigned long va = (unsigned long)vaddr;
41 	unsigned int level;
42 	phys_addr_t pa;
43 	pgd_t *pgd;
44 	pte_t *pte;
45 
46 	pgd = __va(read_cr3_pa());
47 	pgd = &pgd[pgd_index(va)];
48 	pte = lookup_address_in_pgd(pgd, va, &level);
49 	if (!pte) {
50 		ctxt->fi.vector     = X86_TRAP_PF;
51 		ctxt->fi.cr2        = vaddr;
52 		ctxt->fi.error_code = 0;
53 
54 		if (user_mode(ctxt->regs))
55 			ctxt->fi.error_code |= X86_PF_USER;
56 
57 		return ES_EXCEPTION;
58 	}
59 
60 	if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
61 		/* Emulated MMIO to/from encrypted memory not supported */
62 		return ES_UNSUPPORTED;
63 
64 	pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
65 	pa |= va & ~page_level_mask(level);
66 
67 	*paddr = pa;
68 
69 	return ES_OK;
70 }
71 
72 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
73 {
74 	BUG_ON(size > 4);
75 
76 	if (user_mode(ctxt->regs)) {
77 		struct thread_struct *t = &current->thread;
78 		struct io_bitmap *iobm = t->io_bitmap;
79 		size_t idx;
80 
81 		if (!iobm)
82 			goto fault;
83 
84 		for (idx = port; idx < port + size; ++idx) {
85 			if (test_bit(idx, iobm->bitmap))
86 				goto fault;
87 		}
88 	}
89 
90 	return ES_OK;
91 
92 fault:
93 	ctxt->fi.vector = X86_TRAP_GP;
94 	ctxt->fi.error_code = 0;
95 
96 	return ES_EXCEPTION;
97 }
98 
99 void vc_forward_exception(struct es_em_ctxt *ctxt)
100 {
101 	long error_code = ctxt->fi.error_code;
102 	int trapnr = ctxt->fi.vector;
103 
104 	ctxt->regs->orig_ax = ctxt->fi.error_code;
105 
106 	switch (trapnr) {
107 	case X86_TRAP_GP:
108 		exc_general_protection(ctxt->regs, error_code);
109 		break;
110 	case X86_TRAP_UD:
111 		exc_invalid_op(ctxt->regs);
112 		break;
113 	case X86_TRAP_PF:
114 		write_cr2(ctxt->fi.cr2);
115 		exc_page_fault(ctxt->regs, error_code);
116 		break;
117 	case X86_TRAP_AC:
118 		exc_alignment_check(ctxt->regs, error_code);
119 		break;
120 	default:
121 		pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
122 		BUG();
123 	}
124 }
125 
126 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
127 				unsigned char *buffer)
128 {
129 	return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
130 }
131 
132 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
133 {
134 	char buffer[MAX_INSN_SIZE];
135 	int insn_bytes;
136 
137 	insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
138 	if (insn_bytes == 0) {
139 		/* Nothing could be copied */
140 		ctxt->fi.vector     = X86_TRAP_PF;
141 		ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
142 		ctxt->fi.cr2        = ctxt->regs->ip;
143 		return ES_EXCEPTION;
144 	} else if (insn_bytes == -EINVAL) {
145 		/* Effective RIP could not be calculated */
146 		ctxt->fi.vector     = X86_TRAP_GP;
147 		ctxt->fi.error_code = 0;
148 		ctxt->fi.cr2        = 0;
149 		return ES_EXCEPTION;
150 	}
151 
152 	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
153 		return ES_DECODE_FAILED;
154 
155 	if (ctxt->insn.immediate.got)
156 		return ES_OK;
157 	else
158 		return ES_DECODE_FAILED;
159 }
160 
161 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
162 {
163 	char buffer[MAX_INSN_SIZE];
164 	int res, ret;
165 
166 	res = vc_fetch_insn_kernel(ctxt, buffer);
167 	if (res) {
168 		ctxt->fi.vector     = X86_TRAP_PF;
169 		ctxt->fi.error_code = X86_PF_INSTR;
170 		ctxt->fi.cr2        = ctxt->regs->ip;
171 		return ES_EXCEPTION;
172 	}
173 
174 	ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
175 	if (ret < 0)
176 		return ES_DECODE_FAILED;
177 	else
178 		return ES_OK;
179 }
180 
181 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
182 {
183 	if (user_mode(ctxt->regs))
184 		return __vc_decode_user_insn(ctxt);
185 	else
186 		return __vc_decode_kern_insn(ctxt);
187 }
188 
189 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
190 				   char *dst, char *buf, size_t size)
191 {
192 	unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
193 
194 	/*
195 	 * This function uses __put_user() independent of whether kernel or user
196 	 * memory is accessed. This works fine because __put_user() does no
197 	 * sanity checks of the pointer being accessed. All that it does is
198 	 * to report when the access failed.
199 	 *
200 	 * Also, this function runs in atomic context, so __put_user() is not
201 	 * allowed to sleep. The page-fault handler detects that it is running
202 	 * in atomic context and will not try to take mmap_sem and handle the
203 	 * fault, so additional pagefault_enable()/disable() calls are not
204 	 * needed.
205 	 *
206 	 * The access can't be done via copy_to_user() here because
207 	 * vc_write_mem() must not use string instructions to access unsafe
208 	 * memory. The reason is that MOVS is emulated by the #VC handler by
209 	 * splitting the move up into a read and a write and taking a nested #VC
210 	 * exception on whatever of them is the MMIO access. Using string
211 	 * instructions here would cause infinite nesting.
212 	 */
213 	switch (size) {
214 	case 1: {
215 		u8 d1;
216 		u8 __user *target = (u8 __user *)dst;
217 
218 		memcpy(&d1, buf, 1);
219 		if (__put_user(d1, target))
220 			goto fault;
221 		break;
222 	}
223 	case 2: {
224 		u16 d2;
225 		u16 __user *target = (u16 __user *)dst;
226 
227 		memcpy(&d2, buf, 2);
228 		if (__put_user(d2, target))
229 			goto fault;
230 		break;
231 	}
232 	case 4: {
233 		u32 d4;
234 		u32 __user *target = (u32 __user *)dst;
235 
236 		memcpy(&d4, buf, 4);
237 		if (__put_user(d4, target))
238 			goto fault;
239 		break;
240 	}
241 	case 8: {
242 		u64 d8;
243 		u64 __user *target = (u64 __user *)dst;
244 
245 		memcpy(&d8, buf, 8);
246 		if (__put_user(d8, target))
247 			goto fault;
248 		break;
249 	}
250 	default:
251 		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
252 		return ES_UNSUPPORTED;
253 	}
254 
255 	return ES_OK;
256 
257 fault:
258 	if (user_mode(ctxt->regs))
259 		error_code |= X86_PF_USER;
260 
261 	ctxt->fi.vector = X86_TRAP_PF;
262 	ctxt->fi.error_code = error_code;
263 	ctxt->fi.cr2 = (unsigned long)dst;
264 
265 	return ES_EXCEPTION;
266 }
267 
268 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
269 				  char *src, char *buf, size_t size)
270 {
271 	unsigned long error_code = X86_PF_PROT;
272 
273 	/*
274 	 * This function uses __get_user() independent of whether kernel or user
275 	 * memory is accessed. This works fine because __get_user() does no
276 	 * sanity checks of the pointer being accessed. All that it does is
277 	 * to report when the access failed.
278 	 *
279 	 * Also, this function runs in atomic context, so __get_user() is not
280 	 * allowed to sleep. The page-fault handler detects that it is running
281 	 * in atomic context and will not try to take mmap_sem and handle the
282 	 * fault, so additional pagefault_enable()/disable() calls are not
283 	 * needed.
284 	 *
285 	 * The access can't be done via copy_from_user() here because
286 	 * vc_read_mem() must not use string instructions to access unsafe
287 	 * memory. The reason is that MOVS is emulated by the #VC handler by
288 	 * splitting the move up into a read and a write and taking a nested #VC
289 	 * exception on whatever of them is the MMIO access. Using string
290 	 * instructions here would cause infinite nesting.
291 	 */
292 	switch (size) {
293 	case 1: {
294 		u8 d1;
295 		u8 __user *s = (u8 __user *)src;
296 
297 		if (__get_user(d1, s))
298 			goto fault;
299 		memcpy(buf, &d1, 1);
300 		break;
301 	}
302 	case 2: {
303 		u16 d2;
304 		u16 __user *s = (u16 __user *)src;
305 
306 		if (__get_user(d2, s))
307 			goto fault;
308 		memcpy(buf, &d2, 2);
309 		break;
310 	}
311 	case 4: {
312 		u32 d4;
313 		u32 __user *s = (u32 __user *)src;
314 
315 		if (__get_user(d4, s))
316 			goto fault;
317 		memcpy(buf, &d4, 4);
318 		break;
319 	}
320 	case 8: {
321 		u64 d8;
322 		u64 __user *s = (u64 __user *)src;
323 		if (__get_user(d8, s))
324 			goto fault;
325 		memcpy(buf, &d8, 8);
326 		break;
327 	}
328 	default:
329 		WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
330 		return ES_UNSUPPORTED;
331 	}
332 
333 	return ES_OK;
334 
335 fault:
336 	if (user_mode(ctxt->regs))
337 		error_code |= X86_PF_USER;
338 
339 	ctxt->fi.vector = X86_TRAP_PF;
340 	ctxt->fi.error_code = error_code;
341 	ctxt->fi.cr2 = (unsigned long)src;
342 
343 	return ES_EXCEPTION;
344 }
345 
346 #define sev_printk(fmt, ...)		printk(fmt, ##__VA_ARGS__)
347 
348 #include "vc-shared.c"
349 
350 /* Writes to the SVSM CAA MSR are ignored */
351 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
352 {
353 	if (write)
354 		return ES_OK;
355 
356 	regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
357 	regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
358 
359 	return ES_OK;
360 }
361 
362 /*
363  * TSC related accesses should not exit to the hypervisor when a guest is
364  * executing with Secure TSC enabled, so special handling is required for
365  * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ.
366  */
367 static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write)
368 {
369 	u64 tsc;
370 
371 	/*
372 	 * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled.
373 	 * Terminate the SNP guest when the interception is enabled.
374 	 */
375 	if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ)
376 		return ES_VMM_ERROR;
377 
378 	/*
379 	 * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC
380 	 *         to return undefined values, so ignore all writes.
381 	 *
382 	 * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use
383 	 *        the value returned by rdtsc_ordered().
384 	 */
385 	if (write) {
386 		WARN_ONCE(1, "TSC MSR writes are verboten!\n");
387 		return ES_OK;
388 	}
389 
390 	tsc = rdtsc_ordered();
391 	regs->ax = lower_32_bits(tsc);
392 	regs->dx = upper_32_bits(tsc);
393 
394 	return ES_OK;
395 }
396 
397 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
398 {
399 	struct pt_regs *regs = ctxt->regs;
400 	enum es_result ret;
401 	bool write;
402 
403 	/* Is it a WRMSR? */
404 	write = ctxt->insn.opcode.bytes[1] == 0x30;
405 
406 	switch (regs->cx) {
407 	case MSR_SVSM_CAA:
408 		return __vc_handle_msr_caa(regs, write);
409 	case MSR_IA32_TSC:
410 	case MSR_AMD64_GUEST_TSC_FREQ:
411 		if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
412 			return __vc_handle_secure_tsc_msrs(regs, write);
413 		break;
414 	default:
415 		break;
416 	}
417 
418 	ghcb_set_rcx(ghcb, regs->cx);
419 	if (write) {
420 		ghcb_set_rax(ghcb, regs->ax);
421 		ghcb_set_rdx(ghcb, regs->dx);
422 	}
423 
424 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
425 
426 	if ((ret == ES_OK) && !write) {
427 		regs->ax = ghcb->save.rax;
428 		regs->dx = ghcb->save.rdx;
429 	}
430 
431 	return ret;
432 }
433 
434 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
435 {
436 	int trapnr = ctxt->fi.vector;
437 
438 	if (trapnr == X86_TRAP_PF)
439 		native_write_cr2(ctxt->fi.cr2);
440 
441 	ctxt->regs->orig_ax = ctxt->fi.error_code;
442 	do_early_exception(ctxt->regs, trapnr);
443 }
444 
445 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
446 {
447 	long *reg_array;
448 	int offset;
449 
450 	reg_array = (long *)ctxt->regs;
451 	offset    = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
452 
453 	if (offset < 0)
454 		return NULL;
455 
456 	offset /= sizeof(long);
457 
458 	return reg_array + offset;
459 }
460 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
461 				 unsigned int bytes, bool read)
462 {
463 	u64 exit_code, exit_info_1, exit_info_2;
464 	unsigned long ghcb_pa = __pa(ghcb);
465 	enum es_result res;
466 	phys_addr_t paddr;
467 	void __user *ref;
468 
469 	ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
470 	if (ref == (void __user *)-1L)
471 		return ES_UNSUPPORTED;
472 
473 	exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
474 
475 	res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
476 	if (res != ES_OK) {
477 		if (res == ES_EXCEPTION && !read)
478 			ctxt->fi.error_code |= X86_PF_WRITE;
479 
480 		return res;
481 	}
482 
483 	exit_info_1 = paddr;
484 	/* Can never be greater than 8 */
485 	exit_info_2 = bytes;
486 
487 	ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
488 
489 	return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
490 }
491 
492 /*
493  * The MOVS instruction has two memory operands, which raises the
494  * problem that it is not known whether the access to the source or the
495  * destination caused the #VC exception (and hence whether an MMIO read
496  * or write operation needs to be emulated).
497  *
498  * Instead of playing games with walking page-tables and trying to guess
499  * whether the source or destination is an MMIO range, split the move
500  * into two operations, a read and a write with only one memory operand.
501  * This will cause a nested #VC exception on the MMIO address which can
502  * then be handled.
503  *
504  * This implementation has the benefit that it also supports MOVS where
505  * source _and_ destination are MMIO regions.
506  *
507  * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
508  * rare operation. If it turns out to be a performance problem the split
509  * operations can be moved to memcpy_fromio() and memcpy_toio().
510  */
511 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
512 					  unsigned int bytes)
513 {
514 	unsigned long ds_base, es_base;
515 	unsigned char *src, *dst;
516 	unsigned char buffer[8];
517 	enum es_result ret;
518 	bool rep;
519 	int off;
520 
521 	ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
522 	es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
523 
524 	if (ds_base == -1L || es_base == -1L) {
525 		ctxt->fi.vector = X86_TRAP_GP;
526 		ctxt->fi.error_code = 0;
527 		return ES_EXCEPTION;
528 	}
529 
530 	src = ds_base + (unsigned char *)ctxt->regs->si;
531 	dst = es_base + (unsigned char *)ctxt->regs->di;
532 
533 	ret = vc_read_mem(ctxt, src, buffer, bytes);
534 	if (ret != ES_OK)
535 		return ret;
536 
537 	ret = vc_write_mem(ctxt, dst, buffer, bytes);
538 	if (ret != ES_OK)
539 		return ret;
540 
541 	if (ctxt->regs->flags & X86_EFLAGS_DF)
542 		off = -bytes;
543 	else
544 		off =  bytes;
545 
546 	ctxt->regs->si += off;
547 	ctxt->regs->di += off;
548 
549 	rep = insn_has_rep_prefix(&ctxt->insn);
550 	if (rep)
551 		ctxt->regs->cx -= 1;
552 
553 	if (!rep || ctxt->regs->cx == 0)
554 		return ES_OK;
555 	else
556 		return ES_RETRY;
557 }
558 
559 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
560 {
561 	struct insn *insn = &ctxt->insn;
562 	enum insn_mmio_type mmio;
563 	unsigned int bytes = 0;
564 	enum es_result ret;
565 	u8 sign_byte;
566 	long *reg_data;
567 
568 	mmio = insn_decode_mmio(insn, &bytes);
569 	if (mmio == INSN_MMIO_DECODE_FAILED)
570 		return ES_DECODE_FAILED;
571 
572 	if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
573 		reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
574 		if (!reg_data)
575 			return ES_DECODE_FAILED;
576 	}
577 
578 	if (user_mode(ctxt->regs))
579 		return ES_UNSUPPORTED;
580 
581 	switch (mmio) {
582 	case INSN_MMIO_WRITE:
583 		memcpy(ghcb->shared_buffer, reg_data, bytes);
584 		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
585 		break;
586 	case INSN_MMIO_WRITE_IMM:
587 		memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
588 		ret = vc_do_mmio(ghcb, ctxt, bytes, false);
589 		break;
590 	case INSN_MMIO_READ:
591 		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
592 		if (ret)
593 			break;
594 
595 		/* Zero-extend for 32-bit operation */
596 		if (bytes == 4)
597 			*reg_data = 0;
598 
599 		memcpy(reg_data, ghcb->shared_buffer, bytes);
600 		break;
601 	case INSN_MMIO_READ_ZERO_EXTEND:
602 		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
603 		if (ret)
604 			break;
605 
606 		/* Zero extend based on operand size */
607 		memset(reg_data, 0, insn->opnd_bytes);
608 		memcpy(reg_data, ghcb->shared_buffer, bytes);
609 		break;
610 	case INSN_MMIO_READ_SIGN_EXTEND:
611 		ret = vc_do_mmio(ghcb, ctxt, bytes, true);
612 		if (ret)
613 			break;
614 
615 		if (bytes == 1) {
616 			u8 *val = (u8 *)ghcb->shared_buffer;
617 
618 			sign_byte = (*val & 0x80) ? 0xff : 0x00;
619 		} else {
620 			u16 *val = (u16 *)ghcb->shared_buffer;
621 
622 			sign_byte = (*val & 0x8000) ? 0xff : 0x00;
623 		}
624 
625 		/* Sign extend based on operand size */
626 		memset(reg_data, sign_byte, insn->opnd_bytes);
627 		memcpy(reg_data, ghcb->shared_buffer, bytes);
628 		break;
629 	case INSN_MMIO_MOVS:
630 		ret = vc_handle_mmio_movs(ctxt, bytes);
631 		break;
632 	default:
633 		ret = ES_UNSUPPORTED;
634 		break;
635 	}
636 
637 	return ret;
638 }
639 
640 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
641 					  struct es_em_ctxt *ctxt)
642 {
643 	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
644 	long val, *reg = vc_insn_get_rm(ctxt);
645 	enum es_result ret;
646 
647 	if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
648 		return ES_VMM_ERROR;
649 
650 	if (!reg)
651 		return ES_DECODE_FAILED;
652 
653 	val = *reg;
654 
655 	/* Upper 32 bits must be written as zeroes */
656 	if (val >> 32) {
657 		ctxt->fi.vector = X86_TRAP_GP;
658 		ctxt->fi.error_code = 0;
659 		return ES_EXCEPTION;
660 	}
661 
662 	/* Clear out other reserved bits and set bit 10 */
663 	val = (val & 0xffff23ffL) | BIT(10);
664 
665 	/* Early non-zero writes to DR7 are not supported */
666 	if (!data && (val & ~DR7_RESET_VALUE))
667 		return ES_UNSUPPORTED;
668 
669 	/* Using a value of 0 for ExitInfo1 means RAX holds the value */
670 	ghcb_set_rax(ghcb, val);
671 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
672 	if (ret != ES_OK)
673 		return ret;
674 
675 	if (data)
676 		data->dr7 = val;
677 
678 	return ES_OK;
679 }
680 
681 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
682 					 struct es_em_ctxt *ctxt)
683 {
684 	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
685 	long *reg = vc_insn_get_rm(ctxt);
686 
687 	if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
688 		return ES_VMM_ERROR;
689 
690 	if (!reg)
691 		return ES_DECODE_FAILED;
692 
693 	if (data)
694 		*reg = data->dr7;
695 	else
696 		*reg = DR7_RESET_VALUE;
697 
698 	return ES_OK;
699 }
700 
701 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
702 				       struct es_em_ctxt *ctxt)
703 {
704 	return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
705 }
706 
707 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
708 {
709 	enum es_result ret;
710 
711 	ghcb_set_rcx(ghcb, ctxt->regs->cx);
712 
713 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
714 	if (ret != ES_OK)
715 		return ret;
716 
717 	if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
718 		return ES_VMM_ERROR;
719 
720 	ctxt->regs->ax = ghcb->save.rax;
721 	ctxt->regs->dx = ghcb->save.rdx;
722 
723 	return ES_OK;
724 }
725 
726 static enum es_result vc_handle_monitor(struct ghcb *ghcb,
727 					struct es_em_ctxt *ctxt)
728 {
729 	/*
730 	 * Treat it as a NOP and do not leak a physical address to the
731 	 * hypervisor.
732 	 */
733 	return ES_OK;
734 }
735 
736 static enum es_result vc_handle_mwait(struct ghcb *ghcb,
737 				      struct es_em_ctxt *ctxt)
738 {
739 	/* Treat the same as MONITOR/MONITORX */
740 	return ES_OK;
741 }
742 
743 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
744 					struct es_em_ctxt *ctxt)
745 {
746 	enum es_result ret;
747 
748 	ghcb_set_rax(ghcb, ctxt->regs->ax);
749 	ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
750 
751 	if (x86_platform.hyper.sev_es_hcall_prepare)
752 		x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
753 
754 	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
755 	if (ret != ES_OK)
756 		return ret;
757 
758 	if (!ghcb_rax_is_valid(ghcb))
759 		return ES_VMM_ERROR;
760 
761 	ctxt->regs->ax = ghcb->save.rax;
762 
763 	/*
764 	 * Call sev_es_hcall_finish() after regs->ax is already set.
765 	 * This allows the hypervisor handler to overwrite it again if
766 	 * necessary.
767 	 */
768 	if (x86_platform.hyper.sev_es_hcall_finish &&
769 	    !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
770 		return ES_VMM_ERROR;
771 
772 	return ES_OK;
773 }
774 
775 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
776 					struct es_em_ctxt *ctxt)
777 {
778 	/*
779 	 * Calling ecx_alignment_check() directly does not work, because it
780 	 * enables IRQs and the GHCB is active. Forward the exception and call
781 	 * it later from vc_forward_exception().
782 	 */
783 	ctxt->fi.vector = X86_TRAP_AC;
784 	ctxt->fi.error_code = 0;
785 	return ES_EXCEPTION;
786 }
787 
788 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
789 					 struct ghcb *ghcb,
790 					 unsigned long exit_code)
791 {
792 	enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
793 
794 	if (result != ES_OK)
795 		return result;
796 
797 	switch (exit_code) {
798 	case SVM_EXIT_READ_DR7:
799 		result = vc_handle_dr7_read(ghcb, ctxt);
800 		break;
801 	case SVM_EXIT_WRITE_DR7:
802 		result = vc_handle_dr7_write(ghcb, ctxt);
803 		break;
804 	case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
805 		result = vc_handle_trap_ac(ghcb, ctxt);
806 		break;
807 	case SVM_EXIT_RDTSC:
808 	case SVM_EXIT_RDTSCP:
809 		result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
810 		break;
811 	case SVM_EXIT_RDPMC:
812 		result = vc_handle_rdpmc(ghcb, ctxt);
813 		break;
814 	case SVM_EXIT_INVD:
815 		pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
816 		result = ES_UNSUPPORTED;
817 		break;
818 	case SVM_EXIT_CPUID:
819 		result = vc_handle_cpuid(ghcb, ctxt);
820 		break;
821 	case SVM_EXIT_IOIO:
822 		result = vc_handle_ioio(ghcb, ctxt);
823 		break;
824 	case SVM_EXIT_MSR:
825 		result = vc_handle_msr(ghcb, ctxt);
826 		break;
827 	case SVM_EXIT_VMMCALL:
828 		result = vc_handle_vmmcall(ghcb, ctxt);
829 		break;
830 	case SVM_EXIT_WBINVD:
831 		result = vc_handle_wbinvd(ghcb, ctxt);
832 		break;
833 	case SVM_EXIT_MONITOR:
834 		result = vc_handle_monitor(ghcb, ctxt);
835 		break;
836 	case SVM_EXIT_MWAIT:
837 		result = vc_handle_mwait(ghcb, ctxt);
838 		break;
839 	case SVM_EXIT_NPF:
840 		result = vc_handle_mmio(ghcb, ctxt);
841 		break;
842 	default:
843 		/*
844 		 * Unexpected #VC exception
845 		 */
846 		result = ES_UNSUPPORTED;
847 	}
848 
849 	return result;
850 }
851 
852 static __always_inline bool is_vc2_stack(unsigned long sp)
853 {
854 	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
855 }
856 
857 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
858 {
859 	unsigned long sp, prev_sp;
860 
861 	sp      = (unsigned long)regs;
862 	prev_sp = regs->sp;
863 
864 	/*
865 	 * If the code was already executing on the VC2 stack when the #VC
866 	 * happened, let it proceed to the normal handling routine. This way the
867 	 * code executing on the VC2 stack can cause #VC exceptions to get handled.
868 	 */
869 	return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
870 }
871 
872 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
873 {
874 	struct ghcb_state state;
875 	struct es_em_ctxt ctxt;
876 	enum es_result result;
877 	struct ghcb *ghcb;
878 	bool ret = true;
879 
880 	ghcb = __sev_get_ghcb(&state);
881 
882 	vc_ghcb_invalidate(ghcb);
883 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
884 
885 	if (result == ES_OK)
886 		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
887 
888 	__sev_put_ghcb(&state);
889 
890 	/* Done - now check the result */
891 	switch (result) {
892 	case ES_OK:
893 		vc_finish_insn(&ctxt);
894 		break;
895 	case ES_UNSUPPORTED:
896 		pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
897 				   error_code, regs->ip);
898 		ret = false;
899 		break;
900 	case ES_VMM_ERROR:
901 		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
902 				   error_code, regs->ip);
903 		ret = false;
904 		break;
905 	case ES_DECODE_FAILED:
906 		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
907 				   error_code, regs->ip);
908 		ret = false;
909 		break;
910 	case ES_EXCEPTION:
911 		vc_forward_exception(&ctxt);
912 		break;
913 	case ES_RETRY:
914 		/* Nothing to do */
915 		break;
916 	default:
917 		pr_emerg("Unknown result in %s():%d\n", __func__, result);
918 		/*
919 		 * Emulating the instruction which caused the #VC exception
920 		 * failed - can't continue so print debug information
921 		 */
922 		BUG();
923 	}
924 
925 	return ret;
926 }
927 
928 static __always_inline bool vc_is_db(unsigned long error_code)
929 {
930 	return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
931 }
932 
933 /*
934  * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
935  * and will panic when an error happens.
936  */
937 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
938 {
939 	irqentry_state_t irq_state;
940 
941 	/*
942 	 * With the current implementation it is always possible to switch to a
943 	 * safe stack because #VC exceptions only happen at known places, like
944 	 * intercepted instructions or accesses to MMIO areas/IO ports. They can
945 	 * also happen with code instrumentation when the hypervisor intercepts
946 	 * #DB, but the critical paths are forbidden to be instrumented, so #DB
947 	 * exceptions currently also only happen in safe places.
948 	 *
949 	 * But keep this here in case the noinstr annotations are violated due
950 	 * to bug elsewhere.
951 	 */
952 	if (unlikely(vc_from_invalid_context(regs))) {
953 		instrumentation_begin();
954 		panic("Can't handle #VC exception from unsupported context\n");
955 		instrumentation_end();
956 	}
957 
958 	/*
959 	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
960 	 */
961 	if (vc_is_db(error_code)) {
962 		exc_debug(regs);
963 		return;
964 	}
965 
966 	irq_state = irqentry_nmi_enter(regs);
967 
968 	instrumentation_begin();
969 
970 	if (!vc_raw_handle_exception(regs, error_code)) {
971 		/* Show some debug info */
972 		show_regs(regs);
973 
974 		/* Ask hypervisor to sev_es_terminate */
975 		sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
976 
977 		/* If that fails and we get here - just panic */
978 		panic("Returned from Terminate-Request to Hypervisor\n");
979 	}
980 
981 	instrumentation_end();
982 	irqentry_nmi_exit(regs, irq_state);
983 }
984 
985 /*
986  * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
987  * and will kill the current task with SIGBUS when an error happens.
988  */
989 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
990 {
991 	/*
992 	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
993 	 */
994 	if (vc_is_db(error_code)) {
995 		noist_exc_debug(regs);
996 		return;
997 	}
998 
999 	irqentry_enter_from_user_mode(regs);
1000 	instrumentation_begin();
1001 
1002 	if (!vc_raw_handle_exception(regs, error_code)) {
1003 		/*
1004 		 * Do not kill the machine if user-space triggered the
1005 		 * exception. Send SIGBUS instead and let user-space deal with
1006 		 * it.
1007 		 */
1008 		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
1009 	}
1010 
1011 	instrumentation_end();
1012 	irqentry_exit_to_user_mode(regs);
1013 }
1014 
1015 bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
1016 {
1017 	unsigned long exit_code = regs->orig_ax;
1018 	struct es_em_ctxt ctxt;
1019 	enum es_result result;
1020 
1021 	vc_ghcb_invalidate(boot_ghcb);
1022 
1023 	result = vc_init_em_ctxt(&ctxt, regs, exit_code);
1024 	if (result == ES_OK)
1025 		result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
1026 
1027 	/* Done - now check the result */
1028 	switch (result) {
1029 	case ES_OK:
1030 		vc_finish_insn(&ctxt);
1031 		break;
1032 	case ES_UNSUPPORTED:
1033 		early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
1034 				exit_code, regs->ip);
1035 		goto fail;
1036 	case ES_VMM_ERROR:
1037 		early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
1038 				exit_code, regs->ip);
1039 		goto fail;
1040 	case ES_DECODE_FAILED:
1041 		early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
1042 				exit_code, regs->ip);
1043 		goto fail;
1044 	case ES_EXCEPTION:
1045 		vc_early_forward_exception(&ctxt);
1046 		break;
1047 	case ES_RETRY:
1048 		/* Nothing to do */
1049 		break;
1050 	default:
1051 		BUG();
1052 	}
1053 
1054 	return true;
1055 
1056 fail:
1057 	show_regs(regs);
1058 
1059 	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1060 }
1061 
1062