xref: /freebsd/sys/riscv/vmm/vmm_riscv.c (revision 68206add1173920065092c06eea40c90bb73b5c5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
5  *
6  * This software was developed by the University of Cambridge Computer
7  * Laboratory (Department of Computer Science and Technology) under Innovate
8  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
9  * Prototype".
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/smp.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mman.h>
39 #include <sys/pcpu.h>
40 #include <sys/proc.h>
41 #include <sys/rman.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/vmem.h>
46 #include <sys/bus.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_param.h>
54 
55 #include <machine/md_var.h>
56 #include <machine/riscvreg.h>
57 #include <machine/vm.h>
58 #include <machine/cpufunc.h>
59 #include <machine/cpu.h>
60 #include <machine/machdep.h>
61 #include <machine/vmm.h>
62 #include <machine/vmm_dev.h>
63 #include <machine/atomic.h>
64 #include <machine/pmap.h>
65 #include <machine/intr.h>
66 #include <machine/encoding.h>
67 #include <machine/db_machdep.h>
68 
69 #include "riscv.h"
70 #include "vmm_aplic.h"
71 #include "vmm_fence.h"
72 #include "vmm_stat.h"
73 
74 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
75 
76 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
77 
78 static int
79 m_op(uint32_t insn, int match, int mask)
80 {
81 
82 	if (((insn ^ match) & mask) == 0)
83 		return (1);
84 
85 	return (0);
86 }
87 
88 static inline void
89 riscv_set_active_vcpu(struct hypctx *hypctx)
90 {
91 
92 	DPCPU_SET(vcpu, hypctx);
93 }
94 
95 struct hypctx *
96 riscv_get_active_vcpu(void)
97 {
98 
99 	return (DPCPU_GET(vcpu));
100 }
101 
102 int
103 vmmops_modinit(void)
104 {
105 
106 	if (!has_hyp) {
107 		printf("vmm: riscv hart doesn't support H-extension.\n");
108 		return (ENXIO);
109 	}
110 
111 	return (0);
112 }
113 
114 int
115 vmmops_modcleanup(void)
116 {
117 
118 	return (0);
119 }
120 
121 void *
122 vmmops_init(struct vm *vm, pmap_t pmap)
123 {
124 	struct hyp *hyp;
125 	vm_size_t size;
126 
127 	size = round_page(sizeof(struct hyp) +
128 	    sizeof(struct hypctx *) * vm_get_maxcpus(vm));
129 	hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
130 	hyp->vm = vm;
131 	hyp->aplic_attached = false;
132 
133 	aplic_vminit(hyp);
134 
135 	return (hyp);
136 }
137 
138 static void
139 vmmops_delegate(void)
140 {
141 	uint64_t hedeleg;
142 	uint64_t hideleg;
143 
144 	hedeleg  = (1UL << SCAUSE_INST_MISALIGNED);
145 	hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
146 	hedeleg |= (1UL << SCAUSE_BREAKPOINT);
147 	hedeleg |= (1UL << SCAUSE_ECALL_USER);
148 	hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
149 	hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
150 	hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
151 	csr_write(hedeleg, hedeleg);
152 
153 	hideleg  = (1UL << IRQ_SOFTWARE_HYPERVISOR);
154 	hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
155 	hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
156 	csr_write(hideleg, hideleg);
157 }
158 
159 static void
160 vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
161 {
162 	struct hypcsr *csrs;
163 
164 	csrs = &hypctx->guest_csrs;
165 
166 	csr_write(vsstatus, csrs->vsstatus);
167 	csr_write(vsie, csrs->vsie);
168 	csr_write(vstvec, csrs->vstvec);
169 	csr_write(vsscratch, csrs->vsscratch);
170 	csr_write(vsepc, csrs->vsepc);
171 	csr_write(vscause, csrs->vscause);
172 	csr_write(vstval, csrs->vstval);
173 	csr_write(hvip, csrs->hvip);
174 	csr_write(vsatp, csrs->vsatp);
175 }
176 
177 static void
178 vmmops_vcpu_save_csrs(struct hypctx *hypctx)
179 {
180 	struct hypcsr *csrs;
181 
182 	csrs = &hypctx->guest_csrs;
183 
184 	csrs->vsstatus = csr_read(vsstatus);
185 	csrs->vsie = csr_read(vsie);
186 	csrs->vstvec = csr_read(vstvec);
187 	csrs->vsscratch = csr_read(vsscratch);
188 	csrs->vsepc = csr_read(vsepc);
189 	csrs->vscause = csr_read(vscause);
190 	csrs->vstval = csr_read(vstval);
191 	csrs->hvip = csr_read(hvip);
192 	csrs->vsatp = csr_read(vsatp);
193 }
194 
195 void *
196 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
197 {
198 	struct hypctx *hypctx;
199 	struct hyp *hyp;
200 	vm_size_t size;
201 
202 	hyp = vmi;
203 
204 	dprintf("%s: hyp %p\n", __func__, hyp);
205 
206 	KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
207 	    ("%s: Invalid vcpuid %d", __func__, vcpuid));
208 
209 	size = round_page(sizeof(struct hypctx));
210 
211 	hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
212 	hypctx->hyp = hyp;
213 	hypctx->vcpu = vcpu1;
214 	hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
215 
216 	/* Fence queue. */
217 	hypctx->fence_queue = mallocarray(VMM_FENCE_QUEUE_SIZE,
218 	    sizeof(struct vmm_fence), M_HYP, M_WAITOK | M_ZERO);
219 	mtx_init(&hypctx->fence_queue_mtx, "fence queue", NULL, MTX_SPIN);
220 
221 	/* sstatus */
222 	hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
223 	hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
224 
225 	/* hstatus */
226 	hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
227 	hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
228 
229 	hypctx->cpu_id = vcpuid;
230 	hyp->ctx[vcpuid] = hypctx;
231 
232 	aplic_cpuinit(hypctx);
233 	vtimer_cpuinit(hypctx);
234 
235 	return (hypctx);
236 }
237 
238 static int
239 riscv_vmm_pinit(pmap_t pmap)
240 {
241 
242 	dprintf("%s: pmap %p\n", __func__, pmap);
243 
244 	pmap_pinit_stage(pmap, PM_STAGE2);
245 
246 	return (1);
247 }
248 
249 struct vmspace *
250 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
251 {
252 
253 	return (vmspace_alloc(min, max, riscv_vmm_pinit));
254 }
255 
256 void
257 vmmops_vmspace_free(struct vmspace *vmspace)
258 {
259 
260 	pmap_remove_pages(vmspace_pmap(vmspace));
261 	vmspace_free(vmspace);
262 }
263 
264 static void
265 riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
266     struct hyptrap *trap)
267 {
268 	register struct hyptrap * htrap asm("a0");
269 	uintptr_t old_hstatus;
270 	uintptr_t old_stvec;
271 	uintptr_t entry;
272 	uint64_t val;
273 	uint64_t tmp;
274 	int intr;
275 
276 	entry = (uintptr_t)&vmm_unpriv_trap;
277 	htrap = trap;
278 
279 	intr = intr_disable();
280 
281 	old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
282 	/*
283 	 * Setup a temporary exception vector, so that if hlvx.hu raises
284 	 * an exception we catch it in the vmm_unpriv_trap().
285 	 */
286 	old_stvec = csr_swap(stvec, entry);
287 
288 	/*
289 	 * Read first two bytes of instruction assuming it could be a
290 	 * compressed one.
291 	 */
292 	__asm __volatile(".option push\n"
293 			 ".option norvc\n"
294 			"hlvx.hu %[val], (%[addr])\n"
295 			".option pop\n"
296 	    : [val] "=r" (val)
297 	    : [addr] "r" (guest_addr), "r" (htrap)
298 	    : "a1", "memory");
299 
300 	/*
301 	 * Check if previous hlvx.hu did not raise an exception, and then
302 	 * read the rest of instruction if it is a full-length one.
303 	 */
304 	if (trap->scause == -1 && (val & 0x3) == 0x3) {
305 		guest_addr += 2;
306 		__asm __volatile(".option push\n"
307 				 ".option norvc\n"
308 				"hlvx.hu %[tmp], (%[addr])\n"
309 				".option pop\n"
310 		    : [tmp] "=r" (tmp)
311 		    : [addr] "r" (guest_addr), "r" (htrap)
312 		    : "a1", "memory");
313 		val |= (tmp << 16);
314 	}
315 
316 	csr_write(hstatus, old_hstatus);
317 	csr_write(stvec, old_stvec);
318 
319 	intr_restore(intr);
320 
321 	*data = val;
322 }
323 
324 static int
325 riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
326     struct hyptrap *trap)
327 {
328 	uintptr_t guest_addr;
329 	struct vie *vie;
330 	uint64_t insn;
331 	int reg_num;
332 	int rs2, rd;
333 	int direction;
334 	int sign_extend;
335 	int access_size;
336 
337 	guest_addr = vme_ret->sepc;
338 
339 	KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
340 	    vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
341 	    vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
342 	    ("Invalid scause"));
343 
344 	direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
345 	    VM_DIR_WRITE : VM_DIR_READ;
346 
347 	sign_extend = 1;
348 
349 	bzero(trap, sizeof(struct hyptrap));
350 	trap->scause = -1;
351 	riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
352 	if (trap->scause != -1)
353 		return (-1);
354 
355 	if ((insn & 0x3) == 0x3) {
356 		rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
357 		rd = (insn & RD_MASK) >> RD_SHIFT;
358 
359 		if (direction == VM_DIR_WRITE) {
360 			if (m_op(insn, MATCH_SB, MASK_SB))
361 				access_size = 1;
362 			else if (m_op(insn, MATCH_SH, MASK_SH))
363 				access_size = 2;
364 			else if (m_op(insn, MATCH_SW, MASK_SW))
365 				access_size = 4;
366 			else if (m_op(insn, MATCH_SD, MASK_SD))
367 				access_size = 8;
368 			else {
369 				printf("unknown store instr at %lx",
370 				    guest_addr);
371 				return (-2);
372 			}
373 			reg_num = rs2;
374 		} else {
375 			if (m_op(insn, MATCH_LB, MASK_LB))
376 				access_size = 1;
377 			else if (m_op(insn, MATCH_LH, MASK_LH))
378 				access_size = 2;
379 			else if (m_op(insn, MATCH_LW, MASK_LW))
380 				access_size = 4;
381 			else if (m_op(insn, MATCH_LD, MASK_LD))
382 				access_size = 8;
383 			else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
384 				access_size = 1;
385 				sign_extend = 0;
386 			} else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
387 				access_size = 2;
388 				sign_extend = 0;
389 			} else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
390 				access_size = 4;
391 				sign_extend = 0;
392 			} else {
393 				printf("unknown load instr at %lx",
394 				    guest_addr);
395 				return (-3);
396 			}
397 			reg_num = rd;
398 		}
399 		vme_ret->inst_length = 4;
400 	} else {
401 		rs2 = (insn >> 7) & 0x7;
402 		rs2 += 0x8;
403 		rd = (insn >> 2) & 0x7;
404 		rd += 0x8;
405 
406 		if (direction == VM_DIR_WRITE) {
407 			if (m_op(insn, MATCH_C_SW, MASK_C_SW))
408 				access_size = 4;
409 			else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
410 				access_size = 8;
411 			else {
412 				printf("unknown compressed store instr at %lx",
413 				    guest_addr);
414 				return (-4);
415 			}
416 		} else  {
417 			if (m_op(insn, MATCH_C_LW, MASK_C_LW))
418 				access_size = 4;
419 			else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
420 				access_size = 8;
421 			else {
422 				printf("unknown load instr at %lx", guest_addr);
423 				return (-5);
424 			}
425 		}
426 		reg_num = rd;
427 		vme_ret->inst_length = 2;
428 	}
429 
430 	vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
431 	    (vme_ret->stval & 0x3);
432 
433 	dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
434 	    reg_num, vme_ret->u.inst_emul.gpa);
435 
436 	vie = &vme_ret->u.inst_emul.vie;
437 	vie->dir = direction;
438 	vie->reg = reg_num;
439 	vie->sign_extend = sign_extend;
440 	vie->access_size = access_size;
441 
442 	return (0);
443 }
444 
445 static bool
446 riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
447     pmap_t pmap)
448 {
449 	struct hyptrap trap;
450 	uint64_t insn;
451 	uint64_t gpa;
452 	bool handled;
453 	bool retu;
454 	int ret;
455 	int i;
456 
457 	handled = false;
458 
459 	if (vme->scause & SCAUSE_INTR) {
460 		/*
461 		 * Host interrupt? Leave critical section to handle.
462 		 */
463 		vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
464 		vme->exitcode = VM_EXITCODE_BOGUS;
465 		vme->inst_length = 0;
466 		return (handled);
467 	}
468 
469 	switch (vme->scause) {
470 	case SCAUSE_FETCH_GUEST_PAGE_FAULT:
471 	case SCAUSE_LOAD_GUEST_PAGE_FAULT:
472 	case SCAUSE_STORE_GUEST_PAGE_FAULT:
473 		gpa = (vme->htval << 2) | (vme->stval & 0x3);
474 		if (vm_mem_allocated(hypctx->vcpu, gpa)) {
475 			vme->exitcode = VM_EXITCODE_PAGING;
476 			vme->inst_length = 0;
477 			vme->u.paging.gpa = gpa;
478 		} else {
479 			ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
480 			if (ret != 0) {
481 				vme->exitcode = VM_EXITCODE_HYP;
482 				vme->u.hyp.scause = trap.scause;
483 				break;
484 			}
485 			vme->exitcode = VM_EXITCODE_INST_EMUL;
486 		}
487 		break;
488 	case SCAUSE_ILLEGAL_INSTRUCTION:
489 		/*
490 		 * TODO: handle illegal instruction properly.
491 		 */
492 		printf("%s: Illegal instruction at %lx stval 0x%lx htval "
493 		    "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
494 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
495 		vme->exitcode = VM_EXITCODE_BOGUS;
496 		handled = false;
497 		break;
498 	case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
499 		retu = false;
500 		vmm_sbi_ecall(hypctx->vcpu, &retu);
501 		if (retu == false) {
502 			handled = true;
503 			break;
504 		}
505 		for (i = 0; i < nitems(vme->u.ecall.args); i++)
506 			vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
507 		vme->exitcode = VM_EXITCODE_ECALL;
508 		handled = false;
509 		break;
510 	case SCAUSE_VIRTUAL_INSTRUCTION:
511 		insn = vme->stval;
512 		if (m_op(insn, MATCH_WFI, MASK_WFI))
513 			vme->exitcode = VM_EXITCODE_WFI;
514 		else
515 			vme->exitcode = VM_EXITCODE_BOGUS;
516 		handled = false;
517 		break;
518 	default:
519 		printf("unknown scause %lx\n", vme->scause);
520 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
521 		vme->exitcode = VM_EXITCODE_BOGUS;
522 		handled = false;
523 		break;
524 	}
525 
526 	return (handled);
527 }
528 
529 int
530 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
531     int prot, uint64_t *gpa, int *is_fault)
532 {
533 
534 	/* Implement me. */
535 
536 	return (ENOSYS);
537 }
538 
539 void
540 riscv_send_ipi(struct hypctx *hypctx, int hart_id)
541 {
542 	struct hyp *hyp;
543 	struct vm *vm;
544 
545 	hyp = hypctx->hyp;
546 	vm = hyp->vm;
547 
548 	atomic_set_32(&hypctx->ipi_pending, 1);
549 
550 	vcpu_notify_event(vm_vcpu(vm, hart_id));
551 }
552 
553 int
554 riscv_check_ipi(struct hypctx *hypctx, bool clear)
555 {
556 	int val;
557 
558 	if (clear)
559 		val = atomic_swap_32(&hypctx->ipi_pending, 0);
560 	else
561 		val = hypctx->ipi_pending;
562 
563 	return (val);
564 }
565 
566 bool
567 riscv_check_interrupts_pending(struct hypctx *hypctx)
568 {
569 
570 	if (hypctx->interrupts_pending)
571 		return (true);
572 
573 	return (false);
574 }
575 
576 static void
577 riscv_sync_interrupts(struct hypctx *hypctx)
578 {
579 	int pending;
580 
581 	pending = aplic_check_pending(hypctx);
582 	if (pending)
583 		hypctx->guest_csrs.hvip |= HVIP_VSEIP;
584 	else
585 		hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
586 
587 	/* Guest clears VSSIP bit manually. */
588 	if (riscv_check_ipi(hypctx, true))
589 		hypctx->guest_csrs.hvip |= HVIP_VSSIP;
590 
591 	if (riscv_check_interrupts_pending(hypctx))
592 		hypctx->guest_csrs.hvip |= HVIP_VSTIP;
593 	else
594 		hypctx->guest_csrs.hvip &= ~HVIP_VSTIP;
595 
596 	csr_write(hvip, hypctx->guest_csrs.hvip);
597 }
598 
599 int
600 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
601 {
602 	struct hypctx *hypctx;
603 	struct vm_exit *vme;
604 	struct vcpu *vcpu;
605 	register_t val;
606 	uint64_t hvip;
607 	bool handled;
608 
609 	hypctx = (struct hypctx *)vcpui;
610 	vcpu = hypctx->vcpu;
611 	vme = vm_exitinfo(vcpu);
612 
613 	hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
614 
615 	vmmops_delegate();
616 
617 	/*
618 	 * From The RISC-V Instruction Set Manual
619 	 * Volume II: RISC-V Privileged Architectures
620 	 *
621 	 * If the new virtual machine's guest physical page tables
622 	 * have been modified, it may be necessary to execute an HFENCE.GVMA
623 	 * instruction (see Section 5.3.2) before or after writing hgatp.
624 	 */
625 	__asm __volatile("hfence.gvma" ::: "memory");
626 
627 	csr_write(hgatp, pmap->pm_satp);
628 	if (has_sstc)
629 		csr_write(henvcfg, HENVCFG_STCE);
630 	csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
631 	/* TODO: should we trap rdcycle / rdtime? */
632 	csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
633 
634 	vmmops_vcpu_restore_csrs(hypctx);
635 
636 	for (;;) {
637 		dprintf("%s: pc %lx\n", __func__, pc);
638 
639 		if (hypctx->has_exception) {
640 			hypctx->has_exception = false;
641 			/*
642 			 * TODO: implement exception injection.
643 			 */
644 		}
645 
646 		val = intr_disable();
647 
648 		/* Check if the vcpu is suspended */
649 		if (vcpu_suspended(evinfo)) {
650 			intr_restore(val);
651 			vm_exit_suspended(vcpu, pc);
652 			break;
653 		}
654 
655 		if (vcpu_debugged(vcpu)) {
656 			intr_restore(val);
657 			vm_exit_debug(vcpu, pc);
658 			break;
659 		}
660 
661 		/*
662 		 * TODO: What happens if a timer interrupt is asserted exactly
663 		 * here, but for the previous VM?
664 		 */
665 		riscv_set_active_vcpu(hypctx);
666 		aplic_flush_hwstate(hypctx);
667 		riscv_sync_interrupts(hypctx);
668 		vmm_fence_process(hypctx);
669 
670 		dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
671 		    __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
672 		    hypctx->guest_regs.hyp_hstatus);
673 
674 		vmm_switch(hypctx);
675 
676 		dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
677 		    hypctx->guest_regs.hyp_hstatus);
678 
679 		/* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */
680 		hvip = csr_read(hvip);
681 		if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) {
682 			if (hvip & HVIP_VSSIP) {
683 				/* TODO: VSSIP was set by guest. */
684 			} else {
685 				/* VSSIP was cleared by guest. */
686 				hypctx->guest_csrs.hvip &= ~HVIP_VSSIP;
687 			}
688 		}
689 
690 		aplic_sync_hwstate(hypctx);
691 
692 		/*
693 		 * TODO: deactivate stage 2 pmap here if needed.
694 		 */
695 
696 		vme->scause = csr_read(scause);
697 		vme->sepc = csr_read(sepc);
698 		vme->stval = csr_read(stval);
699 		vme->htval = csr_read(htval);
700 		vme->htinst = csr_read(htinst);
701 
702 		intr_restore(val);
703 
704 		vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
705 		vme->pc = hypctx->guest_regs.hyp_sepc;
706 		vme->inst_length = INSN_SIZE;
707 
708 		handled = riscv_handle_world_switch(hypctx, vme, pmap);
709 		if (handled == false)
710 			/* Exit loop to emulate instruction. */
711 			break;
712 		else {
713 			/* Resume guest execution from the next instruction. */
714 			hypctx->guest_regs.hyp_sepc += vme->inst_length;
715 		}
716 	}
717 
718 	vmmops_vcpu_save_csrs(hypctx);
719 
720 	return (0);
721 }
722 
723 static void
724 riscv_pcpu_vmcleanup(void *arg)
725 {
726 	struct hyp *hyp;
727 	int i, maxcpus;
728 
729 	hyp = arg;
730 	maxcpus = vm_get_maxcpus(hyp->vm);
731 	for (i = 0; i < maxcpus; i++) {
732 		if (riscv_get_active_vcpu() == hyp->ctx[i]) {
733 			riscv_set_active_vcpu(NULL);
734 			break;
735 		}
736 	}
737 }
738 
739 void
740 vmmops_vcpu_cleanup(void *vcpui)
741 {
742 	struct hypctx *hypctx;
743 
744 	hypctx = vcpui;
745 
746 	dprintf("%s\n", __func__);
747 
748 	aplic_cpucleanup(hypctx);
749 
750 	mtx_destroy(&hypctx->fence_queue_mtx);
751 	free(hypctx->fence_queue, M_HYP);
752 	free(hypctx, M_HYP);
753 }
754 
755 void
756 vmmops_cleanup(void *vmi)
757 {
758 	struct hyp *hyp;
759 
760 	hyp = vmi;
761 
762 	dprintf("%s\n", __func__);
763 
764 	aplic_vmcleanup(hyp);
765 
766 	smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
767 
768 	free(hyp, M_HYP);
769 }
770 
771 /*
772  * Return register value. Registers have different sizes and an explicit cast
773  * must be made to ensure proper conversion.
774  */
775 static uint64_t *
776 hypctx_regptr(struct hypctx *hypctx, int reg)
777 {
778 
779 	switch (reg) {
780 	case VM_REG_GUEST_RA:
781 		return (&hypctx->guest_regs.hyp_ra);
782 	case VM_REG_GUEST_SP:
783 		return (&hypctx->guest_regs.hyp_sp);
784 	case VM_REG_GUEST_GP:
785 		return (&hypctx->guest_regs.hyp_gp);
786 	case VM_REG_GUEST_TP:
787 		return (&hypctx->guest_regs.hyp_tp);
788 	case VM_REG_GUEST_T0:
789 		return (&hypctx->guest_regs.hyp_t[0]);
790 	case VM_REG_GUEST_T1:
791 		return (&hypctx->guest_regs.hyp_t[1]);
792 	case VM_REG_GUEST_T2:
793 		return (&hypctx->guest_regs.hyp_t[2]);
794 	case VM_REG_GUEST_S0:
795 		return (&hypctx->guest_regs.hyp_s[0]);
796 	case VM_REG_GUEST_S1:
797 		return (&hypctx->guest_regs.hyp_s[1]);
798 	case VM_REG_GUEST_A0:
799 		return (&hypctx->guest_regs.hyp_a[0]);
800 	case VM_REG_GUEST_A1:
801 		return (&hypctx->guest_regs.hyp_a[1]);
802 	case VM_REG_GUEST_A2:
803 		return (&hypctx->guest_regs.hyp_a[2]);
804 	case VM_REG_GUEST_A3:
805 		return (&hypctx->guest_regs.hyp_a[3]);
806 	case VM_REG_GUEST_A4:
807 		return (&hypctx->guest_regs.hyp_a[4]);
808 	case VM_REG_GUEST_A5:
809 		return (&hypctx->guest_regs.hyp_a[5]);
810 	case VM_REG_GUEST_A6:
811 		return (&hypctx->guest_regs.hyp_a[6]);
812 	case VM_REG_GUEST_A7:
813 		return (&hypctx->guest_regs.hyp_a[7]);
814 	case VM_REG_GUEST_S2:
815 		return (&hypctx->guest_regs.hyp_s[2]);
816 	case VM_REG_GUEST_S3:
817 		return (&hypctx->guest_regs.hyp_s[3]);
818 	case VM_REG_GUEST_S4:
819 		return (&hypctx->guest_regs.hyp_s[4]);
820 	case VM_REG_GUEST_S5:
821 		return (&hypctx->guest_regs.hyp_s[5]);
822 	case VM_REG_GUEST_S6:
823 		return (&hypctx->guest_regs.hyp_s[6]);
824 	case VM_REG_GUEST_S7:
825 		return (&hypctx->guest_regs.hyp_s[7]);
826 	case VM_REG_GUEST_S8:
827 		return (&hypctx->guest_regs.hyp_s[8]);
828 	case VM_REG_GUEST_S9:
829 		return (&hypctx->guest_regs.hyp_s[9]);
830 	case VM_REG_GUEST_S10:
831 		return (&hypctx->guest_regs.hyp_s[10]);
832 	case VM_REG_GUEST_S11:
833 		return (&hypctx->guest_regs.hyp_s[11]);
834 	case VM_REG_GUEST_T3:
835 		return (&hypctx->guest_regs.hyp_t[3]);
836 	case VM_REG_GUEST_T4:
837 		return (&hypctx->guest_regs.hyp_t[4]);
838 	case VM_REG_GUEST_T5:
839 		return (&hypctx->guest_regs.hyp_t[5]);
840 	case VM_REG_GUEST_T6:
841 		return (&hypctx->guest_regs.hyp_t[6]);
842 	case VM_REG_GUEST_SEPC:
843 		return (&hypctx->guest_regs.hyp_sepc);
844 	default:
845 		break;
846 	}
847 
848 	return (NULL);
849 }
850 
851 int
852 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
853 {
854 	uint64_t *regp;
855 	int running, hostcpu;
856 	struct hypctx *hypctx;
857 
858 	hypctx = vcpui;
859 
860 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
861 	if (running && hostcpu != curcpu)
862 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
863 		    vcpu_vcpuid(hypctx->vcpu));
864 
865 	if (reg == VM_REG_GUEST_ZERO) {
866 		*retval = 0;
867 		return (0);
868 	}
869 
870 	regp = hypctx_regptr(hypctx, reg);
871 	if (regp == NULL)
872 		return (EINVAL);
873 
874 	*retval = *regp;
875 
876 	return (0);
877 }
878 
879 int
880 vmmops_setreg(void *vcpui, int reg, uint64_t val)
881 {
882 	struct hypctx *hypctx;
883 	int running, hostcpu;
884 	uint64_t *regp;
885 
886 	hypctx = vcpui;
887 
888 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
889 	if (running && hostcpu != curcpu)
890 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
891 		    vcpu_vcpuid(hypctx->vcpu));
892 
893 	regp = hypctx_regptr(hypctx, reg);
894 	if (regp == NULL)
895 		return (EINVAL);
896 
897 	*regp = val;
898 
899 	return (0);
900 }
901 
902 int
903 vmmops_exception(void *vcpui, uint64_t scause)
904 {
905 	struct hypctx *hypctx;
906 	int running, hostcpu;
907 
908 	hypctx = vcpui;
909 
910 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
911 	if (running && hostcpu != curcpu)
912 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
913 		    vcpu_vcpuid(hypctx->vcpu));
914 
915 	/* TODO: implement me. */
916 
917 	return (ENOSYS);
918 }
919 
920 int
921 vmmops_getcap(void *vcpui, int num, int *retval)
922 {
923 	int ret;
924 
925 	ret = ENOENT;
926 
927 	switch (num) {
928 	case VM_CAP_SSTC:
929 		*retval = has_sstc;
930 		ret = 0;
931 		break;
932 	case VM_CAP_UNRESTRICTED_GUEST:
933 		*retval = 1;
934 		ret = 0;
935 		break;
936 	default:
937 		break;
938 	}
939 
940 	return (ret);
941 }
942 
943 int
944 vmmops_setcap(void *vcpui, int num, int val)
945 {
946 
947 	return (ENOENT);
948 }
949