xref: /freebsd/sys/riscv/vmm/vmm_riscv.c (revision c76c2a19ae3763d17aa6a60a5831ed24cbc16e83)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024-2025 Ruslan Bukin <br@bsdpad.com>
5  *
6  * This software was developed by the University of Cambridge Computer
7  * Laboratory (Department of Computer Science and Technology) under Innovate
8  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
9  * Prototype".
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/smp.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mman.h>
39 #include <sys/pcpu.h>
40 #include <sys/proc.h>
41 #include <sys/rman.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/vmem.h>
46 #include <sys/bus.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_param.h>
54 
55 #include <machine/md_var.h>
56 #include <machine/riscvreg.h>
57 #include <machine/vm.h>
58 #include <machine/cpufunc.h>
59 #include <machine/cpu.h>
60 #include <machine/machdep.h>
61 #include <machine/vmm.h>
62 #include <machine/vmm_dev.h>
63 #include <machine/atomic.h>
64 #include <machine/pmap.h>
65 #include <machine/intr.h>
66 #include <machine/encoding.h>
67 #include <machine/db_machdep.h>
68 
69 #include <dev/vmm/vmm_mem.h>
70 
71 #include "riscv.h"
72 #include "vmm_aplic.h"
73 #include "vmm_fence.h"
74 #include "vmm_stat.h"
75 
76 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
77 
78 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
79 
80 static int
m_op(uint32_t insn,int match,int mask)81 m_op(uint32_t insn, int match, int mask)
82 {
83 
84 	if (((insn ^ match) & mask) == 0)
85 		return (1);
86 
87 	return (0);
88 }
89 
90 static inline void
riscv_set_active_vcpu(struct hypctx * hypctx)91 riscv_set_active_vcpu(struct hypctx *hypctx)
92 {
93 
94 	DPCPU_SET(vcpu, hypctx);
95 }
96 
97 struct hypctx *
riscv_get_active_vcpu(void)98 riscv_get_active_vcpu(void)
99 {
100 
101 	return (DPCPU_GET(vcpu));
102 }
103 
104 int
vmmops_modinit(void)105 vmmops_modinit(void)
106 {
107 
108 	if (!has_hyp) {
109 		printf("vmm: riscv hart doesn't support H-extension.\n");
110 		return (ENXIO);
111 	}
112 
113 	return (0);
114 }
115 
116 int
vmmops_modcleanup(void)117 vmmops_modcleanup(void)
118 {
119 
120 	return (0);
121 }
122 
123 void *
vmmops_init(struct vm * vm,pmap_t pmap)124 vmmops_init(struct vm *vm, pmap_t pmap)
125 {
126 	struct hyp *hyp;
127 	vm_size_t size;
128 
129 	size = round_page(sizeof(struct hyp) +
130 	    sizeof(struct hypctx *) * vm_get_maxcpus(vm));
131 	hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
132 	hyp->vm = vm;
133 	hyp->aplic_attached = false;
134 
135 	aplic_vminit(hyp);
136 
137 	return (hyp);
138 }
139 
140 static void
vmmops_delegate(void)141 vmmops_delegate(void)
142 {
143 	uint64_t hedeleg;
144 	uint64_t hideleg;
145 
146 	hedeleg  = (1UL << SCAUSE_INST_MISALIGNED);
147 	hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
148 	hedeleg |= (1UL << SCAUSE_BREAKPOINT);
149 	hedeleg |= (1UL << SCAUSE_ECALL_USER);
150 	hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
151 	hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
152 	hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
153 	csr_write(hedeleg, hedeleg);
154 
155 	hideleg  = (1UL << IRQ_SOFTWARE_HYPERVISOR);
156 	hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
157 	hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
158 	csr_write(hideleg, hideleg);
159 }
160 
161 static void
vmmops_vcpu_restore_csrs(struct hypctx * hypctx)162 vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
163 {
164 	struct hypcsr *csrs;
165 
166 	csrs = &hypctx->guest_csrs;
167 
168 	csr_write(vsstatus, csrs->vsstatus);
169 	csr_write(vsie, csrs->vsie);
170 	csr_write(vstvec, csrs->vstvec);
171 	csr_write(vsscratch, csrs->vsscratch);
172 	csr_write(vsepc, csrs->vsepc);
173 	csr_write(vscause, csrs->vscause);
174 	csr_write(vstval, csrs->vstval);
175 	csr_write(hvip, csrs->hvip);
176 	csr_write(vsatp, csrs->vsatp);
177 }
178 
179 static void
vmmops_vcpu_save_csrs(struct hypctx * hypctx)180 vmmops_vcpu_save_csrs(struct hypctx *hypctx)
181 {
182 	struct hypcsr *csrs;
183 
184 	csrs = &hypctx->guest_csrs;
185 
186 	csrs->vsstatus = csr_read(vsstatus);
187 	csrs->vsie = csr_read(vsie);
188 	csrs->vstvec = csr_read(vstvec);
189 	csrs->vsscratch = csr_read(vsscratch);
190 	csrs->vsepc = csr_read(vsepc);
191 	csrs->vscause = csr_read(vscause);
192 	csrs->vstval = csr_read(vstval);
193 	csrs->hvip = csr_read(hvip);
194 	csrs->vsatp = csr_read(vsatp);
195 }
196 
197 void *
vmmops_vcpu_init(void * vmi,struct vcpu * vcpu1,int vcpuid)198 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
199 {
200 	struct hypctx *hypctx;
201 	struct hyp *hyp;
202 	vm_size_t size;
203 
204 	hyp = vmi;
205 
206 	dprintf("%s: hyp %p\n", __func__, hyp);
207 
208 	KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
209 	    ("%s: Invalid vcpuid %d", __func__, vcpuid));
210 
211 	size = round_page(sizeof(struct hypctx));
212 
213 	hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
214 	hypctx->hyp = hyp;
215 	hypctx->vcpu = vcpu1;
216 	hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
217 
218 	/* Fence queue. */
219 	hypctx->fence_queue = mallocarray(VMM_FENCE_QUEUE_SIZE,
220 	    sizeof(struct vmm_fence), M_HYP, M_WAITOK | M_ZERO);
221 	mtx_init(&hypctx->fence_queue_mtx, "fence queue", NULL, MTX_SPIN);
222 
223 	/* sstatus */
224 	hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
225 	hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
226 
227 	/* hstatus */
228 	hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
229 	hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
230 
231 	hypctx->cpu_id = vcpuid;
232 	hyp->ctx[vcpuid] = hypctx;
233 
234 	aplic_cpuinit(hypctx);
235 	vtimer_cpuinit(hypctx);
236 
237 	return (hypctx);
238 }
239 
240 static int
riscv_vmm_pinit(pmap_t pmap)241 riscv_vmm_pinit(pmap_t pmap)
242 {
243 
244 	dprintf("%s: pmap %p\n", __func__, pmap);
245 
246 	pmap_pinit_stage(pmap, PM_STAGE2);
247 
248 	return (1);
249 }
250 
251 struct vmspace *
vmmops_vmspace_alloc(vm_offset_t min,vm_offset_t max)252 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
253 {
254 
255 	return (vmspace_alloc(min, max, riscv_vmm_pinit));
256 }
257 
258 void
vmmops_vmspace_free(struct vmspace * vmspace)259 vmmops_vmspace_free(struct vmspace *vmspace)
260 {
261 
262 	pmap_remove_pages(vmspace_pmap(vmspace));
263 	vmspace_free(vmspace);
264 }
265 
266 static void
riscv_unpriv_read(struct hypctx * hypctx,uintptr_t guest_addr,uint64_t * data,struct hyptrap * trap)267 riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
268     struct hyptrap *trap)
269 {
270 	register struct hyptrap * htrap asm("a0");
271 	uintptr_t old_hstatus;
272 	uintptr_t old_stvec;
273 	uintptr_t entry;
274 	uint64_t val;
275 	uint64_t tmp;
276 	int intr;
277 
278 	entry = (uintptr_t)&vmm_unpriv_trap;
279 	htrap = trap;
280 
281 	intr = intr_disable();
282 
283 	old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
284 	/*
285 	 * Setup a temporary exception vector, so that if hlvx.hu raises
286 	 * an exception we catch it in the vmm_unpriv_trap().
287 	 */
288 	old_stvec = csr_swap(stvec, entry);
289 
290 	/*
291 	 * Read first two bytes of instruction assuming it could be a
292 	 * compressed one.
293 	 */
294 	__asm __volatile(".option push\n"
295 			 ".option norvc\n"
296 			"hlvx.hu %[val], (%[addr])\n"
297 			".option pop\n"
298 	    : [val] "=r" (val)
299 	    : [addr] "r" (guest_addr), "r" (htrap)
300 	    : "a1", "memory");
301 
302 	/*
303 	 * Check if previous hlvx.hu did not raise an exception, and then
304 	 * read the rest of instruction if it is a full-length one.
305 	 */
306 	if (trap->scause == -1 && (val & 0x3) == 0x3) {
307 		guest_addr += 2;
308 		__asm __volatile(".option push\n"
309 				 ".option norvc\n"
310 				"hlvx.hu %[tmp], (%[addr])\n"
311 				".option pop\n"
312 		    : [tmp] "=r" (tmp)
313 		    : [addr] "r" (guest_addr), "r" (htrap)
314 		    : "a1", "memory");
315 		val |= (tmp << 16);
316 	}
317 
318 	csr_write(hstatus, old_hstatus);
319 	csr_write(stvec, old_stvec);
320 
321 	intr_restore(intr);
322 
323 	*data = val;
324 }
325 
326 static int
riscv_gen_inst_emul_data(struct hypctx * hypctx,struct vm_exit * vme_ret,struct hyptrap * trap)327 riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
328     struct hyptrap *trap)
329 {
330 	uintptr_t guest_addr;
331 	struct vie *vie;
332 	uint64_t insn;
333 	int reg_num;
334 	int rs2, rd;
335 	int direction;
336 	int sign_extend;
337 	int access_size;
338 
339 	guest_addr = vme_ret->sepc;
340 
341 	KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
342 	    vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
343 	    vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
344 	    ("Invalid scause"));
345 
346 	direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
347 	    VM_DIR_WRITE : VM_DIR_READ;
348 
349 	sign_extend = 1;
350 
351 	bzero(trap, sizeof(struct hyptrap));
352 	trap->scause = -1;
353 	riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
354 	if (trap->scause != -1)
355 		return (-1);
356 
357 	if ((insn & 0x3) == 0x3) {
358 		rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
359 		rd = (insn & RD_MASK) >> RD_SHIFT;
360 
361 		if (direction == VM_DIR_WRITE) {
362 			if (m_op(insn, MATCH_SB, MASK_SB))
363 				access_size = 1;
364 			else if (m_op(insn, MATCH_SH, MASK_SH))
365 				access_size = 2;
366 			else if (m_op(insn, MATCH_SW, MASK_SW))
367 				access_size = 4;
368 			else if (m_op(insn, MATCH_SD, MASK_SD))
369 				access_size = 8;
370 			else {
371 				printf("unknown store instr at %lx",
372 				    guest_addr);
373 				return (-2);
374 			}
375 			reg_num = rs2;
376 		} else {
377 			if (m_op(insn, MATCH_LB, MASK_LB))
378 				access_size = 1;
379 			else if (m_op(insn, MATCH_LH, MASK_LH))
380 				access_size = 2;
381 			else if (m_op(insn, MATCH_LW, MASK_LW))
382 				access_size = 4;
383 			else if (m_op(insn, MATCH_LD, MASK_LD))
384 				access_size = 8;
385 			else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
386 				access_size = 1;
387 				sign_extend = 0;
388 			} else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
389 				access_size = 2;
390 				sign_extend = 0;
391 			} else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
392 				access_size = 4;
393 				sign_extend = 0;
394 			} else {
395 				printf("unknown load instr at %lx",
396 				    guest_addr);
397 				return (-3);
398 			}
399 			reg_num = rd;
400 		}
401 		vme_ret->inst_length = 4;
402 	} else {
403 		rs2 = (insn >> 7) & 0x7;
404 		rs2 += 0x8;
405 		rd = (insn >> 2) & 0x7;
406 		rd += 0x8;
407 
408 		if (direction == VM_DIR_WRITE) {
409 			if (m_op(insn, MATCH_C_SW, MASK_C_SW))
410 				access_size = 4;
411 			else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
412 				access_size = 8;
413 			else {
414 				printf("unknown compressed store instr at %lx",
415 				    guest_addr);
416 				return (-4);
417 			}
418 		} else  {
419 			if (m_op(insn, MATCH_C_LW, MASK_C_LW))
420 				access_size = 4;
421 			else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
422 				access_size = 8;
423 			else {
424 				printf("unknown load instr at %lx", guest_addr);
425 				return (-5);
426 			}
427 		}
428 		reg_num = rd;
429 		vme_ret->inst_length = 2;
430 	}
431 
432 	vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
433 	    (vme_ret->stval & 0x3);
434 
435 	dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
436 	    reg_num, vme_ret->u.inst_emul.gpa);
437 
438 	vie = &vme_ret->u.inst_emul.vie;
439 	vie->dir = direction;
440 	vie->reg = reg_num;
441 	vie->sign_extend = sign_extend;
442 	vie->access_size = access_size;
443 
444 	return (0);
445 }
446 
447 static bool
riscv_handle_world_switch(struct hypctx * hypctx,struct vm_exit * vme,pmap_t pmap)448 riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
449     pmap_t pmap)
450 {
451 	struct hyptrap trap;
452 	uint64_t insn;
453 	uint64_t gpa;
454 	bool handled;
455 	int ret;
456 	int i;
457 
458 	handled = false;
459 
460 	if (vme->scause & SCAUSE_INTR) {
461 		/*
462 		 * Host interrupt? Leave critical section to handle.
463 		 */
464 		vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
465 		vme->exitcode = VM_EXITCODE_BOGUS;
466 		vme->inst_length = 0;
467 		return (handled);
468 	}
469 
470 	switch (vme->scause) {
471 	case SCAUSE_FETCH_GUEST_PAGE_FAULT:
472 	case SCAUSE_LOAD_GUEST_PAGE_FAULT:
473 	case SCAUSE_STORE_GUEST_PAGE_FAULT:
474 		gpa = (vme->htval << 2) | (vme->stval & 0x3);
475 		if (vm_mem_allocated(hypctx->vcpu, gpa)) {
476 			vme->exitcode = VM_EXITCODE_PAGING;
477 			vme->inst_length = 0;
478 			vme->u.paging.gpa = gpa;
479 		} else {
480 			ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
481 			if (ret != 0) {
482 				vme->exitcode = VM_EXITCODE_HYP;
483 				vme->u.hyp.scause = trap.scause;
484 				break;
485 			}
486 			vme->exitcode = VM_EXITCODE_INST_EMUL;
487 		}
488 		break;
489 	case SCAUSE_ILLEGAL_INSTRUCTION:
490 		/*
491 		 * TODO: handle illegal instruction properly.
492 		 */
493 		printf("%s: Illegal instruction at %lx stval 0x%lx htval "
494 		    "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
495 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
496 		vme->exitcode = VM_EXITCODE_BOGUS;
497 		handled = false;
498 		break;
499 	case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
500 		handled = vmm_sbi_ecall(hypctx->vcpu);
501 		if (handled == true)
502 			break;
503 		for (i = 0; i < nitems(vme->u.ecall.args); i++)
504 			vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
505 		vme->exitcode = VM_EXITCODE_ECALL;
506 		break;
507 	case SCAUSE_VIRTUAL_INSTRUCTION:
508 		insn = vme->stval;
509 		if (m_op(insn, MATCH_WFI, MASK_WFI))
510 			vme->exitcode = VM_EXITCODE_WFI;
511 		else
512 			vme->exitcode = VM_EXITCODE_BOGUS;
513 		handled = false;
514 		break;
515 	default:
516 		printf("unknown scause %lx\n", vme->scause);
517 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
518 		vme->exitcode = VM_EXITCODE_BOGUS;
519 		handled = false;
520 		break;
521 	}
522 
523 	return (handled);
524 }
525 
526 int
vmmops_gla2gpa(void * vcpui,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)527 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
528     int prot, uint64_t *gpa, int *is_fault)
529 {
530 
531 	/* Implement me. */
532 
533 	return (ENOSYS);
534 }
535 
536 void
riscv_send_ipi(struct hyp * hyp,cpuset_t * cpus)537 riscv_send_ipi(struct hyp *hyp, cpuset_t *cpus)
538 {
539 	struct hypctx *hypctx;
540 	struct vm *vm;
541 	uint16_t maxcpus;
542 	int i;
543 
544 	vm = hyp->vm;
545 
546 	maxcpus = vm_get_maxcpus(hyp->vm);
547 	for (i = 0; i < maxcpus; i++) {
548 		if (!CPU_ISSET(i, cpus))
549 			continue;
550 		hypctx = hyp->ctx[i];
551 		atomic_set_32(&hypctx->ipi_pending, 1);
552 		vcpu_notify_event(vm_vcpu(vm, i));
553 	}
554 }
555 
556 int
riscv_check_ipi(struct hypctx * hypctx,bool clear)557 riscv_check_ipi(struct hypctx *hypctx, bool clear)
558 {
559 	int val;
560 
561 	if (clear)
562 		val = atomic_swap_32(&hypctx->ipi_pending, 0);
563 	else
564 		val = hypctx->ipi_pending;
565 
566 	return (val);
567 }
568 
569 bool
riscv_check_interrupts_pending(struct hypctx * hypctx)570 riscv_check_interrupts_pending(struct hypctx *hypctx)
571 {
572 
573 	if (hypctx->interrupts_pending)
574 		return (true);
575 
576 	return (false);
577 }
578 
579 static void
riscv_sync_interrupts(struct hypctx * hypctx)580 riscv_sync_interrupts(struct hypctx *hypctx)
581 {
582 	int pending;
583 
584 	pending = aplic_check_pending(hypctx);
585 	if (pending)
586 		hypctx->guest_csrs.hvip |= HVIP_VSEIP;
587 	else
588 		hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
589 
590 	/* Guest clears VSSIP bit manually. */
591 	if (riscv_check_ipi(hypctx, true))
592 		hypctx->guest_csrs.hvip |= HVIP_VSSIP;
593 
594 	if (riscv_check_interrupts_pending(hypctx))
595 		hypctx->guest_csrs.hvip |= HVIP_VSTIP;
596 	else
597 		hypctx->guest_csrs.hvip &= ~HVIP_VSTIP;
598 
599 	csr_write(hvip, hypctx->guest_csrs.hvip);
600 }
601 
602 int
vmmops_run(void * vcpui,register_t pc,pmap_t pmap,struct vm_eventinfo * evinfo)603 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
604 {
605 	struct hypctx *hypctx;
606 	struct vm_exit *vme;
607 	struct vcpu *vcpu;
608 	register_t val;
609 	uint64_t hvip;
610 	bool handled;
611 
612 	hypctx = (struct hypctx *)vcpui;
613 	vcpu = hypctx->vcpu;
614 	vme = vm_exitinfo(vcpu);
615 
616 	hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
617 
618 	vmmops_delegate();
619 
620 	/*
621 	 * From The RISC-V Instruction Set Manual
622 	 * Volume II: RISC-V Privileged Architectures
623 	 *
624 	 * If the new virtual machine's guest physical page tables
625 	 * have been modified, it may be necessary to execute an HFENCE.GVMA
626 	 * instruction (see Section 5.3.2) before or after writing hgatp.
627 	 */
628 	__asm __volatile("hfence.gvma" ::: "memory");
629 
630 	csr_write(hgatp, pmap->pm_satp);
631 	if (has_sstc)
632 		csr_write(henvcfg, HENVCFG_STCE);
633 	csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
634 	/* TODO: should we trap rdcycle / rdtime? */
635 	csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
636 
637 	vmmops_vcpu_restore_csrs(hypctx);
638 
639 	for (;;) {
640 		dprintf("%s: pc %lx\n", __func__, pc);
641 
642 		if (hypctx->has_exception) {
643 			hypctx->has_exception = false;
644 			/*
645 			 * TODO: implement exception injection.
646 			 */
647 		}
648 
649 		val = intr_disable();
650 
651 		/* Check if the vcpu is suspended */
652 		if (vcpu_suspended(evinfo)) {
653 			intr_restore(val);
654 			vm_exit_suspended(vcpu, pc);
655 			break;
656 		}
657 
658 		if (vcpu_debugged(vcpu)) {
659 			intr_restore(val);
660 			vm_exit_debug(vcpu, pc);
661 			break;
662 		}
663 
664 		/*
665 		 * TODO: What happens if a timer interrupt is asserted exactly
666 		 * here, but for the previous VM?
667 		 */
668 		riscv_set_active_vcpu(hypctx);
669 		aplic_flush_hwstate(hypctx);
670 		riscv_sync_interrupts(hypctx);
671 		vmm_fence_process(hypctx);
672 
673 		dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
674 		    __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
675 		    hypctx->guest_regs.hyp_hstatus);
676 
677 		vmm_switch(hypctx);
678 
679 		dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
680 		    hypctx->guest_regs.hyp_hstatus);
681 
682 		/* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */
683 		hvip = csr_read(hvip);
684 		if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) {
685 			if (hvip & HVIP_VSSIP) {
686 				/* TODO: VSSIP was set by guest. */
687 			} else {
688 				/* VSSIP was cleared by guest. */
689 				hypctx->guest_csrs.hvip &= ~HVIP_VSSIP;
690 			}
691 		}
692 
693 		aplic_sync_hwstate(hypctx);
694 
695 		/*
696 		 * TODO: deactivate stage 2 pmap here if needed.
697 		 */
698 
699 		vme->scause = csr_read(scause);
700 		vme->sepc = csr_read(sepc);
701 		vme->stval = csr_read(stval);
702 		vme->htval = csr_read(htval);
703 		vme->htinst = csr_read(htinst);
704 
705 		intr_restore(val);
706 
707 		vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
708 		vme->pc = hypctx->guest_regs.hyp_sepc;
709 		vme->inst_length = INSN_SIZE;
710 
711 		handled = riscv_handle_world_switch(hypctx, vme, pmap);
712 		if (handled == false)
713 			/* Exit loop to emulate instruction. */
714 			break;
715 		else {
716 			/* Resume guest execution from the next instruction. */
717 			hypctx->guest_regs.hyp_sepc += vme->inst_length;
718 		}
719 	}
720 
721 	vmmops_vcpu_save_csrs(hypctx);
722 
723 	return (0);
724 }
725 
726 static void
riscv_pcpu_vmcleanup(void * arg)727 riscv_pcpu_vmcleanup(void *arg)
728 {
729 	struct hyp *hyp;
730 	int i, maxcpus;
731 
732 	hyp = arg;
733 	maxcpus = vm_get_maxcpus(hyp->vm);
734 	for (i = 0; i < maxcpus; i++) {
735 		if (riscv_get_active_vcpu() == hyp->ctx[i]) {
736 			riscv_set_active_vcpu(NULL);
737 			break;
738 		}
739 	}
740 }
741 
742 void
vmmops_vcpu_cleanup(void * vcpui)743 vmmops_vcpu_cleanup(void *vcpui)
744 {
745 	struct hypctx *hypctx;
746 
747 	hypctx = vcpui;
748 
749 	dprintf("%s\n", __func__);
750 
751 	aplic_cpucleanup(hypctx);
752 
753 	mtx_destroy(&hypctx->fence_queue_mtx);
754 	free(hypctx->fence_queue, M_HYP);
755 	free(hypctx, M_HYP);
756 }
757 
758 void
vmmops_cleanup(void * vmi)759 vmmops_cleanup(void *vmi)
760 {
761 	struct hyp *hyp;
762 
763 	hyp = vmi;
764 
765 	dprintf("%s\n", __func__);
766 
767 	aplic_vmcleanup(hyp);
768 
769 	smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
770 
771 	free(hyp, M_HYP);
772 }
773 
774 /*
775  * Return register value. Registers have different sizes and an explicit cast
776  * must be made to ensure proper conversion.
777  */
778 static uint64_t *
hypctx_regptr(struct hypctx * hypctx,int reg)779 hypctx_regptr(struct hypctx *hypctx, int reg)
780 {
781 
782 	switch (reg) {
783 	case VM_REG_GUEST_RA:
784 		return (&hypctx->guest_regs.hyp_ra);
785 	case VM_REG_GUEST_SP:
786 		return (&hypctx->guest_regs.hyp_sp);
787 	case VM_REG_GUEST_GP:
788 		return (&hypctx->guest_regs.hyp_gp);
789 	case VM_REG_GUEST_TP:
790 		return (&hypctx->guest_regs.hyp_tp);
791 	case VM_REG_GUEST_T0:
792 		return (&hypctx->guest_regs.hyp_t[0]);
793 	case VM_REG_GUEST_T1:
794 		return (&hypctx->guest_regs.hyp_t[1]);
795 	case VM_REG_GUEST_T2:
796 		return (&hypctx->guest_regs.hyp_t[2]);
797 	case VM_REG_GUEST_S0:
798 		return (&hypctx->guest_regs.hyp_s[0]);
799 	case VM_REG_GUEST_S1:
800 		return (&hypctx->guest_regs.hyp_s[1]);
801 	case VM_REG_GUEST_A0:
802 		return (&hypctx->guest_regs.hyp_a[0]);
803 	case VM_REG_GUEST_A1:
804 		return (&hypctx->guest_regs.hyp_a[1]);
805 	case VM_REG_GUEST_A2:
806 		return (&hypctx->guest_regs.hyp_a[2]);
807 	case VM_REG_GUEST_A3:
808 		return (&hypctx->guest_regs.hyp_a[3]);
809 	case VM_REG_GUEST_A4:
810 		return (&hypctx->guest_regs.hyp_a[4]);
811 	case VM_REG_GUEST_A5:
812 		return (&hypctx->guest_regs.hyp_a[5]);
813 	case VM_REG_GUEST_A6:
814 		return (&hypctx->guest_regs.hyp_a[6]);
815 	case VM_REG_GUEST_A7:
816 		return (&hypctx->guest_regs.hyp_a[7]);
817 	case VM_REG_GUEST_S2:
818 		return (&hypctx->guest_regs.hyp_s[2]);
819 	case VM_REG_GUEST_S3:
820 		return (&hypctx->guest_regs.hyp_s[3]);
821 	case VM_REG_GUEST_S4:
822 		return (&hypctx->guest_regs.hyp_s[4]);
823 	case VM_REG_GUEST_S5:
824 		return (&hypctx->guest_regs.hyp_s[5]);
825 	case VM_REG_GUEST_S6:
826 		return (&hypctx->guest_regs.hyp_s[6]);
827 	case VM_REG_GUEST_S7:
828 		return (&hypctx->guest_regs.hyp_s[7]);
829 	case VM_REG_GUEST_S8:
830 		return (&hypctx->guest_regs.hyp_s[8]);
831 	case VM_REG_GUEST_S9:
832 		return (&hypctx->guest_regs.hyp_s[9]);
833 	case VM_REG_GUEST_S10:
834 		return (&hypctx->guest_regs.hyp_s[10]);
835 	case VM_REG_GUEST_S11:
836 		return (&hypctx->guest_regs.hyp_s[11]);
837 	case VM_REG_GUEST_T3:
838 		return (&hypctx->guest_regs.hyp_t[3]);
839 	case VM_REG_GUEST_T4:
840 		return (&hypctx->guest_regs.hyp_t[4]);
841 	case VM_REG_GUEST_T5:
842 		return (&hypctx->guest_regs.hyp_t[5]);
843 	case VM_REG_GUEST_T6:
844 		return (&hypctx->guest_regs.hyp_t[6]);
845 	case VM_REG_GUEST_SEPC:
846 		return (&hypctx->guest_regs.hyp_sepc);
847 	default:
848 		break;
849 	}
850 
851 	return (NULL);
852 }
853 
854 int
vmmops_getreg(void * vcpui,int reg,uint64_t * retval)855 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
856 {
857 	uint64_t *regp;
858 	int running, hostcpu;
859 	struct hypctx *hypctx;
860 
861 	hypctx = vcpui;
862 
863 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
864 	if (running && hostcpu != curcpu)
865 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
866 		    vcpu_vcpuid(hypctx->vcpu));
867 
868 	if (reg == VM_REG_GUEST_ZERO) {
869 		*retval = 0;
870 		return (0);
871 	}
872 
873 	regp = hypctx_regptr(hypctx, reg);
874 	if (regp == NULL)
875 		return (EINVAL);
876 
877 	*retval = *regp;
878 
879 	return (0);
880 }
881 
882 int
vmmops_setreg(void * vcpui,int reg,uint64_t val)883 vmmops_setreg(void *vcpui, int reg, uint64_t val)
884 {
885 	struct hypctx *hypctx;
886 	int running, hostcpu;
887 	uint64_t *regp;
888 
889 	hypctx = vcpui;
890 
891 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
892 	if (running && hostcpu != curcpu)
893 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
894 		    vcpu_vcpuid(hypctx->vcpu));
895 
896 	regp = hypctx_regptr(hypctx, reg);
897 	if (regp == NULL)
898 		return (EINVAL);
899 
900 	*regp = val;
901 
902 	return (0);
903 }
904 
905 int
vmmops_exception(void * vcpui,uint64_t scause)906 vmmops_exception(void *vcpui, uint64_t scause)
907 {
908 	struct hypctx *hypctx;
909 	int running, hostcpu;
910 
911 	hypctx = vcpui;
912 
913 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
914 	if (running && hostcpu != curcpu)
915 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
916 		    vcpu_vcpuid(hypctx->vcpu));
917 
918 	/* TODO: implement me. */
919 
920 	return (ENOSYS);
921 }
922 
923 int
vmmops_getcap(void * vcpui,int num,int * retval)924 vmmops_getcap(void *vcpui, int num, int *retval)
925 {
926 	int ret;
927 
928 	ret = ENOENT;
929 
930 	switch (num) {
931 	case VM_CAP_SSTC:
932 		*retval = has_sstc;
933 		ret = 0;
934 		break;
935 	case VM_CAP_UNRESTRICTED_GUEST:
936 		*retval = 1;
937 		ret = 0;
938 		break;
939 	default:
940 		break;
941 	}
942 
943 	return (ret);
944 }
945 
946 int
vmmops_setcap(void * vcpui,int num,int val)947 vmmops_setcap(void *vcpui, int num, int val)
948 {
949 
950 	return (ENOENT);
951 }
952