xref: /freebsd/sys/riscv/vmm/vmm_riscv.c (revision b196276c20b577b364372f1aa1a646b9ce34bf5c)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
5  *
6  * This software was developed by the University of Cambridge Computer
7  * Laboratory (Department of Computer Science and Technology) under Innovate
8  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
9  * Prototype".
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/smp.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mman.h>
39 #include <sys/pcpu.h>
40 #include <sys/proc.h>
41 #include <sys/rman.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/vmem.h>
46 #include <sys/bus.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_param.h>
54 
55 #include <machine/md_var.h>
56 #include <machine/riscvreg.h>
57 #include <machine/vm.h>
58 #include <machine/cpufunc.h>
59 #include <machine/cpu.h>
60 #include <machine/machdep.h>
61 #include <machine/vmm.h>
62 #include <machine/vmm_dev.h>
63 #include <machine/atomic.h>
64 #include <machine/pmap.h>
65 #include <machine/intr.h>
66 #include <machine/encoding.h>
67 #include <machine/db_machdep.h>
68 
69 #include "riscv.h"
70 #include "vmm_aplic.h"
71 #include "vmm_stat.h"
72 
73 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
74 
75 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
76 
77 static int
78 m_op(uint32_t insn, int match, int mask)
79 {
80 
81 	if (((insn ^ match) & mask) == 0)
82 		return (1);
83 
84 	return (0);
85 }
86 
87 static inline void
88 riscv_set_active_vcpu(struct hypctx *hypctx)
89 {
90 
91 	DPCPU_SET(vcpu, hypctx);
92 }
93 
94 struct hypctx *
95 riscv_get_active_vcpu(void)
96 {
97 
98 	return (DPCPU_GET(vcpu));
99 }
100 
101 int
102 vmmops_modinit(void)
103 {
104 
105 	if (!has_hyp) {
106 		printf("vmm: riscv hart doesn't support H-extension.\n");
107 		return (ENXIO);
108 	}
109 
110 	return (0);
111 }
112 
113 int
114 vmmops_modcleanup(void)
115 {
116 
117 	return (0);
118 }
119 
120 void *
121 vmmops_init(struct vm *vm, pmap_t pmap)
122 {
123 	struct hyp *hyp;
124 	vm_size_t size;
125 
126 	size = round_page(sizeof(struct hyp) +
127 	    sizeof(struct hypctx *) * vm_get_maxcpus(vm));
128 	hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
129 	hyp->vm = vm;
130 	hyp->aplic_attached = false;
131 
132 	aplic_vminit(hyp);
133 
134 	return (hyp);
135 }
136 
137 static void
138 vmmops_delegate(void)
139 {
140 	uint64_t hedeleg;
141 	uint64_t hideleg;
142 
143 	hedeleg  = (1UL << SCAUSE_INST_MISALIGNED);
144 	hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
145 	hedeleg |= (1UL << SCAUSE_BREAKPOINT);
146 	hedeleg |= (1UL << SCAUSE_ECALL_USER);
147 	hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
148 	hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
149 	hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
150 	csr_write(hedeleg, hedeleg);
151 
152 	hideleg  = (1UL << IRQ_SOFTWARE_HYPERVISOR);
153 	hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
154 	hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
155 	csr_write(hideleg, hideleg);
156 }
157 
158 static void
159 vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
160 {
161 	struct hypcsr *csrs;
162 
163 	csrs = &hypctx->guest_csrs;
164 
165 	csr_write(vsstatus, csrs->vsstatus);
166 	csr_write(vsie, csrs->vsie);
167 	csr_write(vstvec, csrs->vstvec);
168 	csr_write(vsscratch, csrs->vsscratch);
169 	csr_write(vsepc, csrs->vsepc);
170 	csr_write(vscause, csrs->vscause);
171 	csr_write(vstval, csrs->vstval);
172 	csr_write(hvip, csrs->hvip);
173 	csr_write(vsatp, csrs->vsatp);
174 }
175 
176 static void
177 vmmops_vcpu_save_csrs(struct hypctx *hypctx)
178 {
179 	struct hypcsr *csrs;
180 
181 	csrs = &hypctx->guest_csrs;
182 
183 	csrs->vsstatus = csr_read(vsstatus);
184 	csrs->vsie = csr_read(vsie);
185 	csrs->vstvec = csr_read(vstvec);
186 	csrs->vsscratch = csr_read(vsscratch);
187 	csrs->vsepc = csr_read(vsepc);
188 	csrs->vscause = csr_read(vscause);
189 	csrs->vstval = csr_read(vstval);
190 	csrs->hvip = csr_read(hvip);
191 	csrs->vsatp = csr_read(vsatp);
192 }
193 
194 void *
195 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
196 {
197 	struct hypctx *hypctx;
198 	struct hyp *hyp;
199 	vm_size_t size;
200 
201 	hyp = vmi;
202 
203 	dprintf("%s: hyp %p\n", __func__, hyp);
204 
205 	KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
206 	    ("%s: Invalid vcpuid %d", __func__, vcpuid));
207 
208 	size = round_page(sizeof(struct hypctx));
209 
210 	hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
211 	hypctx->hyp = hyp;
212 	hypctx->vcpu = vcpu1;
213 	hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
214 
215 	/* sstatus */
216 	hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
217 	hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
218 
219 	/* hstatus */
220 	hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
221 	hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
222 
223 	hypctx->cpu_id = vcpuid;
224 	hyp->ctx[vcpuid] = hypctx;
225 
226 	aplic_cpuinit(hypctx);
227 	vtimer_cpuinit(hypctx);
228 
229 	return (hypctx);
230 }
231 
232 static int
233 riscv_vmm_pinit(pmap_t pmap)
234 {
235 
236 	dprintf("%s: pmap %p\n", __func__, pmap);
237 
238 	pmap_pinit_stage(pmap, PM_STAGE2);
239 
240 	return (1);
241 }
242 
243 struct vmspace *
244 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
245 {
246 
247 	return (vmspace_alloc(min, max, riscv_vmm_pinit));
248 }
249 
250 void
251 vmmops_vmspace_free(struct vmspace *vmspace)
252 {
253 
254 	pmap_remove_pages(vmspace_pmap(vmspace));
255 	vmspace_free(vmspace);
256 }
257 
258 static void
259 riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
260     struct hyptrap *trap)
261 {
262 	register struct hyptrap * htrap asm("a0");
263 	uintptr_t old_hstatus;
264 	uintptr_t old_stvec;
265 	uintptr_t entry;
266 	uint64_t val;
267 	uint64_t tmp;
268 	int intr;
269 
270 	entry = (uintptr_t)&vmm_unpriv_trap;
271 	htrap = trap;
272 
273 	intr = intr_disable();
274 
275 	old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
276 	/*
277 	 * Setup a temporary exception vector, so that if hlvx.hu raises
278 	 * an exception we catch it in the vmm_unpriv_trap().
279 	 */
280 	old_stvec = csr_swap(stvec, entry);
281 
282 	/*
283 	 * Read first two bytes of instruction assuming it could be a
284 	 * compressed one.
285 	 */
286 	__asm __volatile(".option push\n"
287 			 ".option norvc\n"
288 			"hlvx.hu %[val], (%[addr])\n"
289 			".option pop\n"
290 	    : [val] "=r" (val)
291 	    : [addr] "r" (guest_addr), "r" (htrap)
292 	    : "a1", "memory");
293 
294 	/*
295 	 * Check if previous hlvx.hu did not raise an exception, and then
296 	 * read the rest of instruction if it is a full-length one.
297 	 */
298 	if (trap->scause == -1 && (val & 0x3) == 0x3) {
299 		guest_addr += 2;
300 		__asm __volatile(".option push\n"
301 				 ".option norvc\n"
302 				"hlvx.hu %[tmp], (%[addr])\n"
303 				".option pop\n"
304 		    : [tmp] "=r" (tmp)
305 		    : [addr] "r" (guest_addr), "r" (htrap)
306 		    : "a1", "memory");
307 		val |= (tmp << 16);
308 	}
309 
310 	csr_write(hstatus, old_hstatus);
311 	csr_write(stvec, old_stvec);
312 
313 	intr_restore(intr);
314 
315 	*data = val;
316 }
317 
318 static int
319 riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
320     struct hyptrap *trap)
321 {
322 	uintptr_t guest_addr;
323 	struct vie *vie;
324 	uint64_t insn;
325 	int reg_num;
326 	int rs2, rd;
327 	int direction;
328 	int sign_extend;
329 	int access_size;
330 
331 	guest_addr = vme_ret->sepc;
332 
333 	KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
334 	    vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
335 	    vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
336 	    ("Invalid scause"));
337 
338 	direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
339 	    VM_DIR_WRITE : VM_DIR_READ;
340 
341 	sign_extend = 1;
342 
343 	bzero(trap, sizeof(struct hyptrap));
344 	trap->scause = -1;
345 	riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
346 	if (trap->scause != -1)
347 		return (-1);
348 
349 	if ((insn & 0x3) == 0x3) {
350 		rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
351 		rd = (insn & RD_MASK) >> RD_SHIFT;
352 
353 		if (direction == VM_DIR_WRITE) {
354 			if (m_op(insn, MATCH_SB, MASK_SB))
355 				access_size = 1;
356 			else if (m_op(insn, MATCH_SH, MASK_SH))
357 				access_size = 2;
358 			else if (m_op(insn, MATCH_SW, MASK_SW))
359 				access_size = 4;
360 			else if (m_op(insn, MATCH_SD, MASK_SD))
361 				access_size = 8;
362 			else {
363 				printf("unknown store instr at %lx",
364 				    guest_addr);
365 				return (-2);
366 			}
367 			reg_num = rs2;
368 		} else {
369 			if (m_op(insn, MATCH_LB, MASK_LB))
370 				access_size = 1;
371 			else if (m_op(insn, MATCH_LH, MASK_LH))
372 				access_size = 2;
373 			else if (m_op(insn, MATCH_LW, MASK_LW))
374 				access_size = 4;
375 			else if (m_op(insn, MATCH_LD, MASK_LD))
376 				access_size = 8;
377 			else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
378 				access_size = 1;
379 				sign_extend = 0;
380 			} else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
381 				access_size = 2;
382 				sign_extend = 0;
383 			} else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
384 				access_size = 4;
385 				sign_extend = 0;
386 			} else {
387 				printf("unknown load instr at %lx",
388 				    guest_addr);
389 				return (-3);
390 			}
391 			reg_num = rd;
392 		}
393 		vme_ret->inst_length = 4;
394 	} else {
395 		rs2 = (insn >> 7) & 0x7;
396 		rs2 += 0x8;
397 		rd = (insn >> 2) & 0x7;
398 		rd += 0x8;
399 
400 		if (direction == VM_DIR_WRITE) {
401 			if (m_op(insn, MATCH_C_SW, MASK_C_SW))
402 				access_size = 4;
403 			else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
404 				access_size = 8;
405 			else {
406 				printf("unknown compressed store instr at %lx",
407 				    guest_addr);
408 				return (-4);
409 			}
410 		} else  {
411 			if (m_op(insn, MATCH_C_LW, MASK_C_LW))
412 				access_size = 4;
413 			else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
414 				access_size = 8;
415 			else {
416 				printf("unknown load instr at %lx", guest_addr);
417 				return (-5);
418 			}
419 		}
420 		reg_num = rd;
421 		vme_ret->inst_length = 2;
422 	}
423 
424 	vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
425 	    (vme_ret->stval & 0x3);
426 
427 	dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
428 	    reg_num, vme_ret->u.inst_emul.gpa);
429 
430 	vie = &vme_ret->u.inst_emul.vie;
431 	vie->dir = direction;
432 	vie->reg = reg_num;
433 	vie->sign_extend = sign_extend;
434 	vie->access_size = access_size;
435 
436 	return (0);
437 }
438 
439 static bool
440 riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
441     pmap_t pmap)
442 {
443 	struct hyptrap trap;
444 	uint64_t insn;
445 	uint64_t gpa;
446 	bool handled;
447 	bool retu;
448 	int ret;
449 	int i;
450 
451 	handled = false;
452 
453 	if (vme->scause & SCAUSE_INTR) {
454 		/*
455 		 * Host interrupt? Leave critical section to handle.
456 		 */
457 		vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
458 		vme->exitcode = VM_EXITCODE_BOGUS;
459 		vme->inst_length = 0;
460 		return (handled);
461 	}
462 
463 	switch (vme->scause) {
464 	case SCAUSE_FETCH_GUEST_PAGE_FAULT:
465 	case SCAUSE_LOAD_GUEST_PAGE_FAULT:
466 	case SCAUSE_STORE_GUEST_PAGE_FAULT:
467 		gpa = (vme->htval << 2) | (vme->stval & 0x3);
468 		if (vm_mem_allocated(hypctx->vcpu, gpa)) {
469 			vme->exitcode = VM_EXITCODE_PAGING;
470 			vme->inst_length = 0;
471 			vme->u.paging.gpa = gpa;
472 		} else {
473 			ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
474 			if (ret != 0) {
475 				vme->exitcode = VM_EXITCODE_HYP;
476 				vme->u.hyp.scause = trap.scause;
477 				break;
478 			}
479 			vme->exitcode = VM_EXITCODE_INST_EMUL;
480 		}
481 		break;
482 	case SCAUSE_ILLEGAL_INSTRUCTION:
483 		/*
484 		 * TODO: handle illegal instruction properly.
485 		 */
486 		printf("%s: Illegal instruction at %lx stval 0x%lx htval "
487 		    "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
488 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
489 		vme->exitcode = VM_EXITCODE_BOGUS;
490 		handled = false;
491 		break;
492 	case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
493 		retu = false;
494 		vmm_sbi_ecall(hypctx->vcpu, &retu);
495 		if (retu == false) {
496 			handled = true;
497 			break;
498 		}
499 		for (i = 0; i < nitems(vme->u.ecall.args); i++)
500 			vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
501 		vme->exitcode = VM_EXITCODE_ECALL;
502 		handled = false;
503 		break;
504 	case SCAUSE_VIRTUAL_INSTRUCTION:
505 		insn = vme->stval;
506 		if (m_op(insn, MATCH_WFI, MASK_WFI))
507 			vme->exitcode = VM_EXITCODE_WFI;
508 		else
509 			vme->exitcode = VM_EXITCODE_BOGUS;
510 		handled = false;
511 		break;
512 	default:
513 		printf("unknown scause %lx\n", vme->scause);
514 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
515 		vme->exitcode = VM_EXITCODE_BOGUS;
516 		handled = false;
517 		break;
518 	}
519 
520 	return (handled);
521 }
522 
523 int
524 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
525     int prot, uint64_t *gpa, int *is_fault)
526 {
527 
528 	/* Implement me. */
529 
530 	return (ENOSYS);
531 }
532 
533 void
534 riscv_send_ipi(struct hypctx *hypctx, int hart_id)
535 {
536 	struct hyp *hyp;
537 	struct vm *vm;
538 
539 	hyp = hypctx->hyp;
540 	vm = hyp->vm;
541 
542 	atomic_set_32(&hypctx->ipi_pending, 1);
543 
544 	vcpu_notify_event(vm_vcpu(vm, hart_id));
545 }
546 
547 int
548 riscv_check_ipi(struct hypctx *hypctx, bool clear)
549 {
550 	int val;
551 
552 	if (clear)
553 		val = atomic_swap_32(&hypctx->ipi_pending, 0);
554 	else
555 		val = hypctx->ipi_pending;
556 
557 	return (val);
558 }
559 
560 bool
561 riscv_check_interrupts_pending(struct hypctx *hypctx)
562 {
563 
564 	if (hypctx->interrupts_pending)
565 		return (true);
566 
567 	return (false);
568 }
569 
570 static void
571 riscv_sync_interrupts(struct hypctx *hypctx)
572 {
573 	int pending;
574 
575 	pending = aplic_check_pending(hypctx);
576 	if (pending)
577 		hypctx->guest_csrs.hvip |= HVIP_VSEIP;
578 	else
579 		hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
580 
581 	/* Guest clears VSSIP bit manually. */
582 	if (riscv_check_ipi(hypctx, true))
583 		hypctx->guest_csrs.hvip |= HVIP_VSSIP;
584 
585 	if (riscv_check_interrupts_pending(hypctx))
586 		hypctx->guest_csrs.hvip |= HVIP_VSTIP;
587 	else
588 		hypctx->guest_csrs.hvip &= ~HVIP_VSTIP;
589 
590 	csr_write(hvip, hypctx->guest_csrs.hvip);
591 }
592 
593 int
594 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
595 {
596 	struct hypctx *hypctx;
597 	struct vm_exit *vme;
598 	struct vcpu *vcpu;
599 	register_t val;
600 	uint64_t hvip;
601 	bool handled;
602 
603 	hypctx = (struct hypctx *)vcpui;
604 	vcpu = hypctx->vcpu;
605 	vme = vm_exitinfo(vcpu);
606 
607 	hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
608 
609 	vmmops_delegate();
610 
611 	/*
612 	 * From The RISC-V Instruction Set Manual
613 	 * Volume II: RISC-V Privileged Architectures
614 	 *
615 	 * If the new virtual machine's guest physical page tables
616 	 * have been modified, it may be necessary to execute an HFENCE.GVMA
617 	 * instruction (see Section 5.3.2) before or after writing hgatp.
618 	 */
619 	__asm __volatile("hfence.gvma" ::: "memory");
620 
621 	csr_write(hgatp, pmap->pm_satp);
622 	if (has_sstc)
623 		csr_write(henvcfg, HENVCFG_STCE);
624 	csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
625 	/* TODO: should we trap rdcycle / rdtime? */
626 	csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
627 
628 	vmmops_vcpu_restore_csrs(hypctx);
629 
630 	for (;;) {
631 		dprintf("%s: pc %lx\n", __func__, pc);
632 
633 		if (hypctx->has_exception) {
634 			hypctx->has_exception = false;
635 			/*
636 			 * TODO: implement exception injection.
637 			 */
638 		}
639 
640 		val = intr_disable();
641 
642 		/* Check if the vcpu is suspended */
643 		if (vcpu_suspended(evinfo)) {
644 			intr_restore(val);
645 			vm_exit_suspended(vcpu, pc);
646 			break;
647 		}
648 
649 		if (vcpu_debugged(vcpu)) {
650 			intr_restore(val);
651 			vm_exit_debug(vcpu, pc);
652 			break;
653 		}
654 
655 		/*
656 		 * TODO: What happens if a timer interrupt is asserted exactly
657 		 * here, but for the previous VM?
658 		 */
659 		riscv_set_active_vcpu(hypctx);
660 		aplic_flush_hwstate(hypctx);
661 		riscv_sync_interrupts(hypctx);
662 
663 		dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
664 		    __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
665 		    hypctx->guest_regs.hyp_hstatus);
666 
667 		vmm_switch(hypctx);
668 
669 		dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
670 		    hypctx->guest_regs.hyp_hstatus);
671 
672 		/* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */
673 		hvip = csr_read(hvip);
674 		if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) {
675 			if (hvip & HVIP_VSSIP) {
676 				/* TODO: VSSIP was set by guest. */
677 			} else {
678 				/* VSSIP was cleared by guest. */
679 				hypctx->guest_csrs.hvip &= ~HVIP_VSSIP;
680 			}
681 		}
682 
683 		aplic_sync_hwstate(hypctx);
684 
685 		/*
686 		 * TODO: deactivate stage 2 pmap here if needed.
687 		 */
688 
689 		vme->scause = csr_read(scause);
690 		vme->sepc = csr_read(sepc);
691 		vme->stval = csr_read(stval);
692 		vme->htval = csr_read(htval);
693 		vme->htinst = csr_read(htinst);
694 
695 		intr_restore(val);
696 
697 		vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
698 		vme->pc = hypctx->guest_regs.hyp_sepc;
699 		vme->inst_length = INSN_SIZE;
700 
701 		handled = riscv_handle_world_switch(hypctx, vme, pmap);
702 		if (handled == false)
703 			/* Exit loop to emulate instruction. */
704 			break;
705 		else {
706 			/* Resume guest execution from the next instruction. */
707 			hypctx->guest_regs.hyp_sepc += vme->inst_length;
708 		}
709 	}
710 
711 	vmmops_vcpu_save_csrs(hypctx);
712 
713 	return (0);
714 }
715 
716 static void
717 riscv_pcpu_vmcleanup(void *arg)
718 {
719 	struct hyp *hyp;
720 	int i, maxcpus;
721 
722 	hyp = arg;
723 	maxcpus = vm_get_maxcpus(hyp->vm);
724 	for (i = 0; i < maxcpus; i++) {
725 		if (riscv_get_active_vcpu() == hyp->ctx[i]) {
726 			riscv_set_active_vcpu(NULL);
727 			break;
728 		}
729 	}
730 }
731 
732 void
733 vmmops_vcpu_cleanup(void *vcpui)
734 {
735 	struct hypctx *hypctx;
736 
737 	hypctx = vcpui;
738 
739 	dprintf("%s\n", __func__);
740 
741 	aplic_cpucleanup(hypctx);
742 
743 	free(hypctx, M_HYP);
744 }
745 
746 void
747 vmmops_cleanup(void *vmi)
748 {
749 	struct hyp *hyp;
750 
751 	hyp = vmi;
752 
753 	dprintf("%s\n", __func__);
754 
755 	aplic_vmcleanup(hyp);
756 
757 	smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
758 
759 	free(hyp, M_HYP);
760 }
761 
762 /*
763  * Return register value. Registers have different sizes and an explicit cast
764  * must be made to ensure proper conversion.
765  */
766 static uint64_t *
767 hypctx_regptr(struct hypctx *hypctx, int reg)
768 {
769 
770 	switch (reg) {
771 	case VM_REG_GUEST_RA:
772 		return (&hypctx->guest_regs.hyp_ra);
773 	case VM_REG_GUEST_SP:
774 		return (&hypctx->guest_regs.hyp_sp);
775 	case VM_REG_GUEST_GP:
776 		return (&hypctx->guest_regs.hyp_gp);
777 	case VM_REG_GUEST_TP:
778 		return (&hypctx->guest_regs.hyp_tp);
779 	case VM_REG_GUEST_T0:
780 		return (&hypctx->guest_regs.hyp_t[0]);
781 	case VM_REG_GUEST_T1:
782 		return (&hypctx->guest_regs.hyp_t[1]);
783 	case VM_REG_GUEST_T2:
784 		return (&hypctx->guest_regs.hyp_t[2]);
785 	case VM_REG_GUEST_S0:
786 		return (&hypctx->guest_regs.hyp_s[0]);
787 	case VM_REG_GUEST_S1:
788 		return (&hypctx->guest_regs.hyp_s[1]);
789 	case VM_REG_GUEST_A0:
790 		return (&hypctx->guest_regs.hyp_a[0]);
791 	case VM_REG_GUEST_A1:
792 		return (&hypctx->guest_regs.hyp_a[1]);
793 	case VM_REG_GUEST_A2:
794 		return (&hypctx->guest_regs.hyp_a[2]);
795 	case VM_REG_GUEST_A3:
796 		return (&hypctx->guest_regs.hyp_a[3]);
797 	case VM_REG_GUEST_A4:
798 		return (&hypctx->guest_regs.hyp_a[4]);
799 	case VM_REG_GUEST_A5:
800 		return (&hypctx->guest_regs.hyp_a[5]);
801 	case VM_REG_GUEST_A6:
802 		return (&hypctx->guest_regs.hyp_a[6]);
803 	case VM_REG_GUEST_A7:
804 		return (&hypctx->guest_regs.hyp_a[7]);
805 	case VM_REG_GUEST_S2:
806 		return (&hypctx->guest_regs.hyp_s[2]);
807 	case VM_REG_GUEST_S3:
808 		return (&hypctx->guest_regs.hyp_s[3]);
809 	case VM_REG_GUEST_S4:
810 		return (&hypctx->guest_regs.hyp_s[4]);
811 	case VM_REG_GUEST_S5:
812 		return (&hypctx->guest_regs.hyp_s[5]);
813 	case VM_REG_GUEST_S6:
814 		return (&hypctx->guest_regs.hyp_s[6]);
815 	case VM_REG_GUEST_S7:
816 		return (&hypctx->guest_regs.hyp_s[7]);
817 	case VM_REG_GUEST_S8:
818 		return (&hypctx->guest_regs.hyp_s[8]);
819 	case VM_REG_GUEST_S9:
820 		return (&hypctx->guest_regs.hyp_s[9]);
821 	case VM_REG_GUEST_S10:
822 		return (&hypctx->guest_regs.hyp_s[10]);
823 	case VM_REG_GUEST_S11:
824 		return (&hypctx->guest_regs.hyp_s[11]);
825 	case VM_REG_GUEST_T3:
826 		return (&hypctx->guest_regs.hyp_t[3]);
827 	case VM_REG_GUEST_T4:
828 		return (&hypctx->guest_regs.hyp_t[4]);
829 	case VM_REG_GUEST_T5:
830 		return (&hypctx->guest_regs.hyp_t[5]);
831 	case VM_REG_GUEST_T6:
832 		return (&hypctx->guest_regs.hyp_t[6]);
833 	case VM_REG_GUEST_SEPC:
834 		return (&hypctx->guest_regs.hyp_sepc);
835 	default:
836 		break;
837 	}
838 
839 	return (NULL);
840 }
841 
842 int
843 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
844 {
845 	uint64_t *regp;
846 	int running, hostcpu;
847 	struct hypctx *hypctx;
848 
849 	hypctx = vcpui;
850 
851 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
852 	if (running && hostcpu != curcpu)
853 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
854 		    vcpu_vcpuid(hypctx->vcpu));
855 
856 	if (reg == VM_REG_GUEST_ZERO) {
857 		*retval = 0;
858 		return (0);
859 	}
860 
861 	regp = hypctx_regptr(hypctx, reg);
862 	if (regp == NULL)
863 		return (EINVAL);
864 
865 	*retval = *regp;
866 
867 	return (0);
868 }
869 
870 int
871 vmmops_setreg(void *vcpui, int reg, uint64_t val)
872 {
873 	struct hypctx *hypctx;
874 	int running, hostcpu;
875 	uint64_t *regp;
876 
877 	hypctx = vcpui;
878 
879 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
880 	if (running && hostcpu != curcpu)
881 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
882 		    vcpu_vcpuid(hypctx->vcpu));
883 
884 	regp = hypctx_regptr(hypctx, reg);
885 	if (regp == NULL)
886 		return (EINVAL);
887 
888 	*regp = val;
889 
890 	return (0);
891 }
892 
893 int
894 vmmops_exception(void *vcpui, uint64_t scause)
895 {
896 	struct hypctx *hypctx;
897 	int running, hostcpu;
898 
899 	hypctx = vcpui;
900 
901 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
902 	if (running && hostcpu != curcpu)
903 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
904 		    vcpu_vcpuid(hypctx->vcpu));
905 
906 	/* TODO: implement me. */
907 
908 	return (ENOSYS);
909 }
910 
911 int
912 vmmops_getcap(void *vcpui, int num, int *retval)
913 {
914 	int ret;
915 
916 	ret = ENOENT;
917 
918 	switch (num) {
919 	case VM_CAP_SSTC:
920 		*retval = has_sstc;
921 		ret = 0;
922 		break;
923 	case VM_CAP_UNRESTRICTED_GUEST:
924 		*retval = 1;
925 		ret = 0;
926 		break;
927 	default:
928 		break;
929 	}
930 
931 	return (ret);
932 }
933 
934 int
935 vmmops_setcap(void *vcpui, int num, int val)
936 {
937 
938 	return (ENOENT);
939 }
940