xref: /freebsd/sys/riscv/vmm/vmm_riscv.c (revision d3916eace506b8ab23537223f5c92924636a1c41)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
5  *
6  * This software was developed by the University of Cambridge Computer
7  * Laboratory (Department of Computer Science and Technology) under Innovate
8  * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
9  * Prototype".
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/smp.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mman.h>
39 #include <sys/pcpu.h>
40 #include <sys/proc.h>
41 #include <sys/rman.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/vmem.h>
46 #include <sys/bus.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_param.h>
54 
55 #include <machine/md_var.h>
56 #include <machine/riscvreg.h>
57 #include <machine/vm.h>
58 #include <machine/cpufunc.h>
59 #include <machine/cpu.h>
60 #include <machine/machdep.h>
61 #include <machine/vmm.h>
62 #include <machine/vmm_dev.h>
63 #include <machine/atomic.h>
64 #include <machine/pmap.h>
65 #include <machine/intr.h>
66 #include <machine/encoding.h>
67 #include <machine/db_machdep.h>
68 
69 #include "riscv.h"
70 #include "vmm_aplic.h"
71 #include "vmm_stat.h"
72 
73 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
74 
75 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
76 
77 static int
m_op(uint32_t insn,int match,int mask)78 m_op(uint32_t insn, int match, int mask)
79 {
80 
81 	if (((insn ^ match) & mask) == 0)
82 		return (1);
83 
84 	return (0);
85 }
86 
87 static inline void
riscv_set_active_vcpu(struct hypctx * hypctx)88 riscv_set_active_vcpu(struct hypctx *hypctx)
89 {
90 
91 	DPCPU_SET(vcpu, hypctx);
92 }
93 
94 struct hypctx *
riscv_get_active_vcpu(void)95 riscv_get_active_vcpu(void)
96 {
97 
98 	return (DPCPU_GET(vcpu));
99 }
100 
101 int
vmmops_modinit(void)102 vmmops_modinit(void)
103 {
104 
105 	if (!has_hyp) {
106 		printf("vmm: riscv hart doesn't support H-extension.\n");
107 		return (ENXIO);
108 	}
109 
110 	if (!has_sstc) {
111 		printf("vmm: riscv hart doesn't support SSTC extension.\n");
112 		return (ENXIO);
113 	}
114 
115 	return (0);
116 }
117 
118 int
vmmops_modcleanup(void)119 vmmops_modcleanup(void)
120 {
121 
122 	return (0);
123 }
124 
125 void *
vmmops_init(struct vm * vm,pmap_t pmap)126 vmmops_init(struct vm *vm, pmap_t pmap)
127 {
128 	struct hyp *hyp;
129 	vm_size_t size;
130 
131 	size = round_page(sizeof(struct hyp) +
132 	    sizeof(struct hypctx *) * vm_get_maxcpus(vm));
133 	hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
134 	hyp->vm = vm;
135 	hyp->aplic_attached = false;
136 
137 	aplic_vminit(hyp);
138 
139 	return (hyp);
140 }
141 
142 static void
vmmops_delegate(void)143 vmmops_delegate(void)
144 {
145 	uint64_t hedeleg;
146 	uint64_t hideleg;
147 
148 	hedeleg  = (1UL << SCAUSE_INST_MISALIGNED);
149 	hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
150 	hedeleg |= (1UL << SCAUSE_BREAKPOINT);
151 	hedeleg |= (1UL << SCAUSE_ECALL_USER);
152 	hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
153 	hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
154 	hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
155 	csr_write(hedeleg, hedeleg);
156 
157 	hideleg  = (1UL << IRQ_SOFTWARE_HYPERVISOR);
158 	hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
159 	hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
160 	csr_write(hideleg, hideleg);
161 }
162 
163 static void
vmmops_vcpu_restore_csrs(struct hypctx * hypctx)164 vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
165 {
166 	struct hypcsr *csrs;
167 
168 	csrs = &hypctx->guest_csrs;
169 
170 	csr_write(vsstatus, csrs->vsstatus);
171 	csr_write(vsie, csrs->vsie);
172 	csr_write(vstvec, csrs->vstvec);
173 	csr_write(vsscratch, csrs->vsscratch);
174 	csr_write(vsepc, csrs->vsepc);
175 	csr_write(vscause, csrs->vscause);
176 	csr_write(vstval, csrs->vstval);
177 	csr_write(hvip, csrs->hvip);
178 	csr_write(vsatp, csrs->vsatp);
179 }
180 
181 static void
vmmops_vcpu_save_csrs(struct hypctx * hypctx)182 vmmops_vcpu_save_csrs(struct hypctx *hypctx)
183 {
184 	struct hypcsr *csrs;
185 
186 	csrs = &hypctx->guest_csrs;
187 
188 	csrs->vsstatus = csr_read(vsstatus);
189 	csrs->vsie = csr_read(vsie);
190 	csrs->vstvec = csr_read(vstvec);
191 	csrs->vsscratch = csr_read(vsscratch);
192 	csrs->vsepc = csr_read(vsepc);
193 	csrs->vscause = csr_read(vscause);
194 	csrs->vstval = csr_read(vstval);
195 	csrs->hvip = csr_read(hvip);
196 	csrs->vsatp = csr_read(vsatp);
197 }
198 
199 void *
vmmops_vcpu_init(void * vmi,struct vcpu * vcpu1,int vcpuid)200 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
201 {
202 	struct hypctx *hypctx;
203 	struct hyp *hyp;
204 	vm_size_t size;
205 
206 	hyp = vmi;
207 
208 	dprintf("%s: hyp %p\n", __func__, hyp);
209 
210 	KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
211 	    ("%s: Invalid vcpuid %d", __func__, vcpuid));
212 
213 	size = round_page(sizeof(struct hypctx));
214 
215 	hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
216 	hypctx->hyp = hyp;
217 	hypctx->vcpu = vcpu1;
218 	hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
219 
220 	/* sstatus */
221 	hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
222 	hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
223 
224 	/* hstatus */
225 	hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
226 	hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
227 
228 	hypctx->cpu_id = vcpuid;
229 	hyp->ctx[vcpuid] = hypctx;
230 
231 	aplic_cpuinit(hypctx);
232 
233 	return (hypctx);
234 }
235 
236 static int
riscv_vmm_pinit(pmap_t pmap)237 riscv_vmm_pinit(pmap_t pmap)
238 {
239 
240 	dprintf("%s: pmap %p\n", __func__, pmap);
241 
242 	pmap_pinit_stage(pmap, PM_STAGE2);
243 
244 	return (1);
245 }
246 
247 struct vmspace *
vmmops_vmspace_alloc(vm_offset_t min,vm_offset_t max)248 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
249 {
250 
251 	return (vmspace_alloc(min, max, riscv_vmm_pinit));
252 }
253 
254 void
vmmops_vmspace_free(struct vmspace * vmspace)255 vmmops_vmspace_free(struct vmspace *vmspace)
256 {
257 
258 	pmap_remove_pages(vmspace_pmap(vmspace));
259 	vmspace_free(vmspace);
260 }
261 
262 static void
riscv_unpriv_read(struct hypctx * hypctx,uintptr_t guest_addr,uint64_t * data,struct hyptrap * trap)263 riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
264     struct hyptrap *trap)
265 {
266 	register struct hyptrap * htrap asm("a0");
267 	uintptr_t old_hstatus;
268 	uintptr_t old_stvec;
269 	uintptr_t entry;
270 	uint64_t val;
271 	uint64_t tmp;
272 	int intr;
273 
274 	entry = (uintptr_t)&vmm_unpriv_trap;
275 	htrap = trap;
276 
277 	intr = intr_disable();
278 
279 	old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
280 	/*
281 	 * Setup a temporary exception vector, so that if hlvx.hu raises
282 	 * an exception we catch it in the vmm_unpriv_trap().
283 	 */
284 	old_stvec = csr_swap(stvec, entry);
285 
286 	/*
287 	 * Read first two bytes of instruction assuming it could be a
288 	 * compressed one.
289 	 */
290 	__asm __volatile(".option push\n"
291 			 ".option norvc\n"
292 			"hlvx.hu %[val], (%[addr])\n"
293 			".option pop\n"
294 	    : [val] "=r" (val)
295 	    : [addr] "r" (guest_addr), "r" (htrap)
296 	    : "a1", "memory");
297 
298 	/*
299 	 * Check if previous hlvx.hu did not raise an exception, and then
300 	 * read the rest of instruction if it is a full-length one.
301 	 */
302 	if (trap->scause == -1 && (val & 0x3) == 0x3) {
303 		guest_addr += 2;
304 		__asm __volatile(".option push\n"
305 				 ".option norvc\n"
306 				"hlvx.hu %[tmp], (%[addr])\n"
307 				".option pop\n"
308 		    : [tmp] "=r" (tmp)
309 		    : [addr] "r" (guest_addr), "r" (htrap)
310 		    : "a1", "memory");
311 		val |= (tmp << 16);
312 	}
313 
314 	csr_write(hstatus, old_hstatus);
315 	csr_write(stvec, old_stvec);
316 
317 	intr_restore(intr);
318 
319 	*data = val;
320 }
321 
322 static int
riscv_gen_inst_emul_data(struct hypctx * hypctx,struct vm_exit * vme_ret,struct hyptrap * trap)323 riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
324     struct hyptrap *trap)
325 {
326 	uintptr_t guest_addr;
327 	struct vie *vie;
328 	uint64_t insn;
329 	int reg_num;
330 	int rs2, rd;
331 	int direction;
332 	int sign_extend;
333 	int access_size;
334 
335 	guest_addr = vme_ret->sepc;
336 
337 	KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
338 	    vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
339 	    vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
340 	    ("Invalid scause"));
341 
342 	direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
343 	    VM_DIR_WRITE : VM_DIR_READ;
344 
345 	sign_extend = 1;
346 
347 	bzero(trap, sizeof(struct hyptrap));
348 	trap->scause = -1;
349 	riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
350 	if (trap->scause != -1)
351 		return (-1);
352 
353 	if ((insn & 0x3) == 0x3) {
354 		rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
355 		rd = (insn & RD_MASK) >> RD_SHIFT;
356 
357 		if (direction == VM_DIR_WRITE) {
358 			if (m_op(insn, MATCH_SB, MASK_SB))
359 				access_size = 1;
360 			else if (m_op(insn, MATCH_SH, MASK_SH))
361 				access_size = 2;
362 			else if (m_op(insn, MATCH_SW, MASK_SW))
363 				access_size = 4;
364 			else if (m_op(insn, MATCH_SD, MASK_SD))
365 				access_size = 8;
366 			else {
367 				printf("unknown store instr at %lx",
368 				    guest_addr);
369 				return (-2);
370 			}
371 			reg_num = rs2;
372 		} else {
373 			if (m_op(insn, MATCH_LB, MASK_LB))
374 				access_size = 1;
375 			else if (m_op(insn, MATCH_LH, MASK_LH))
376 				access_size = 2;
377 			else if (m_op(insn, MATCH_LW, MASK_LW))
378 				access_size = 4;
379 			else if (m_op(insn, MATCH_LD, MASK_LD))
380 				access_size = 8;
381 			else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
382 				access_size = 1;
383 				sign_extend = 0;
384 			} else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
385 				access_size = 2;
386 				sign_extend = 0;
387 			} else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
388 				access_size = 4;
389 				sign_extend = 0;
390 			} else {
391 				printf("unknown load instr at %lx",
392 				    guest_addr);
393 				return (-3);
394 			}
395 			reg_num = rd;
396 		}
397 		vme_ret->inst_length = 4;
398 	} else {
399 		rs2 = (insn >> 7) & 0x7;
400 		rs2 += 0x8;
401 		rd = (insn >> 2) & 0x7;
402 		rd += 0x8;
403 
404 		if (direction == VM_DIR_WRITE) {
405 			if (m_op(insn, MATCH_C_SW, MASK_C_SW))
406 				access_size = 4;
407 			else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
408 				access_size = 8;
409 			else {
410 				printf("unknown compressed store instr at %lx",
411 				    guest_addr);
412 				return (-4);
413 			}
414 		} else  {
415 			if (m_op(insn, MATCH_C_LW, MASK_C_LW))
416 				access_size = 4;
417 			else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
418 				access_size = 8;
419 			else {
420 				printf("unknown load instr at %lx", guest_addr);
421 				return (-5);
422 			}
423 		}
424 		reg_num = rd;
425 		vme_ret->inst_length = 2;
426 	}
427 
428 	vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
429 	    (vme_ret->stval & 0x3);
430 
431 	dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
432 	    reg_num, vme_ret->u.inst_emul.gpa);
433 
434 	vie = &vme_ret->u.inst_emul.vie;
435 	vie->dir = direction;
436 	vie->reg = reg_num;
437 	vie->sign_extend = sign_extend;
438 	vie->access_size = access_size;
439 
440 	return (0);
441 }
442 
443 static bool
riscv_handle_world_switch(struct hypctx * hypctx,struct vm_exit * vme,pmap_t pmap)444 riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
445     pmap_t pmap)
446 {
447 	struct hyptrap trap;
448 	uint64_t insn;
449 	uint64_t gpa;
450 	bool handled;
451 	bool retu;
452 	int ret;
453 	int i;
454 
455 	handled = false;
456 
457 	if (vme->scause & SCAUSE_INTR) {
458 		/*
459 		 * Host interrupt? Leave critical section to handle.
460 		 */
461 		vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
462 		vme->exitcode = VM_EXITCODE_BOGUS;
463 		vme->inst_length = 0;
464 		return (handled);
465 	}
466 
467 	switch (vme->scause) {
468 	case SCAUSE_FETCH_GUEST_PAGE_FAULT:
469 	case SCAUSE_LOAD_GUEST_PAGE_FAULT:
470 	case SCAUSE_STORE_GUEST_PAGE_FAULT:
471 		gpa = (vme->htval << 2) | (vme->stval & 0x3);
472 		if (vm_mem_allocated(hypctx->vcpu, gpa)) {
473 			vme->exitcode = VM_EXITCODE_PAGING;
474 			vme->inst_length = 0;
475 			vme->u.paging.gpa = gpa;
476 		} else {
477 			ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
478 			if (ret != 0) {
479 				vme->exitcode = VM_EXITCODE_HYP;
480 				vme->u.hyp.scause = trap.scause;
481 				break;
482 			}
483 			vme->exitcode = VM_EXITCODE_INST_EMUL;
484 		}
485 		break;
486 	case SCAUSE_ILLEGAL_INSTRUCTION:
487 		/*
488 		 * TODO: handle illegal instruction properly.
489 		 */
490 		printf("%s: Illegal instruction at %lx stval 0x%lx htval "
491 		    "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
492 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
493 		vme->exitcode = VM_EXITCODE_BOGUS;
494 		handled = false;
495 		break;
496 	case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
497 		retu = false;
498 		vmm_sbi_ecall(hypctx->vcpu, &retu);
499 		if (retu == false) {
500 			handled = true;
501 			break;
502 		}
503 		for (i = 0; i < nitems(vme->u.ecall.args); i++)
504 			vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
505 		vme->exitcode = VM_EXITCODE_ECALL;
506 		handled = false;
507 		break;
508 	case SCAUSE_VIRTUAL_INSTRUCTION:
509 		insn = vme->stval;
510 		if (m_op(insn, MATCH_WFI, MASK_WFI))
511 			vme->exitcode = VM_EXITCODE_WFI;
512 		else
513 			vme->exitcode = VM_EXITCODE_BOGUS;
514 		handled = false;
515 		break;
516 	default:
517 		printf("unknown scause %lx\n", vme->scause);
518 		vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
519 		vme->exitcode = VM_EXITCODE_BOGUS;
520 		handled = false;
521 		break;
522 	}
523 
524 	return (handled);
525 }
526 
527 int
vmmops_gla2gpa(void * vcpui,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)528 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
529     int prot, uint64_t *gpa, int *is_fault)
530 {
531 
532 	/* Implement me. */
533 
534 	return (ENOSYS);
535 }
536 
537 void
riscv_send_ipi(struct hypctx * hypctx,int hart_id)538 riscv_send_ipi(struct hypctx *hypctx, int hart_id)
539 {
540 	struct hyp *hyp;
541 	struct vm *vm;
542 
543 	hyp = hypctx->hyp;
544 	vm = hyp->vm;
545 
546 	atomic_set_32(&hypctx->ipi_pending, 1);
547 
548 	vcpu_notify_event(vm_vcpu(vm, hart_id));
549 }
550 
551 int
riscv_check_ipi(struct hypctx * hypctx,bool clear)552 riscv_check_ipi(struct hypctx *hypctx, bool clear)
553 {
554 	int val;
555 
556 	if (clear)
557 		val = atomic_swap_32(&hypctx->ipi_pending, 0);
558 	else
559 		val = hypctx->ipi_pending;
560 
561 	return (val);
562 }
563 
564 static void
riscv_sync_interrupts(struct hypctx * hypctx)565 riscv_sync_interrupts(struct hypctx *hypctx)
566 {
567 	int pending;
568 
569 	pending = aplic_check_pending(hypctx);
570 
571 	if (pending)
572 		hypctx->guest_csrs.hvip |= HVIP_VSEIP;
573 	else
574 		hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
575 
576 	csr_write(hvip, hypctx->guest_csrs.hvip);
577 }
578 
579 static void
riscv_sync_ipi(struct hypctx * hypctx)580 riscv_sync_ipi(struct hypctx *hypctx)
581 {
582 
583 	/* Guest clears VSSIP bit manually. */
584 	if (riscv_check_ipi(hypctx, true))
585 		hypctx->guest_csrs.hvip |= HVIP_VSSIP;
586 
587 	csr_write(hvip, hypctx->guest_csrs.hvip);
588 }
589 
590 int
vmmops_run(void * vcpui,register_t pc,pmap_t pmap,struct vm_eventinfo * evinfo)591 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
592 {
593 	struct hypctx *hypctx;
594 	struct vm_exit *vme;
595 	struct vcpu *vcpu;
596 	register_t val;
597 	bool handled;
598 
599 	hypctx = (struct hypctx *)vcpui;
600 	vcpu = hypctx->vcpu;
601 	vme = vm_exitinfo(vcpu);
602 
603 	hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
604 
605 	vmmops_delegate();
606 
607 	/*
608 	 * From The RISC-V Instruction Set Manual
609 	 * Volume II: RISC-V Privileged Architectures
610 	 *
611 	 * If the new virtual machine's guest physical page tables
612 	 * have been modified, it may be necessary to execute an HFENCE.GVMA
613 	 * instruction (see Section 5.3.2) before or after writing hgatp.
614 	 */
615 	__asm __volatile("hfence.gvma" ::: "memory");
616 
617 	csr_write(hgatp, pmap->pm_satp);
618 	csr_write(henvcfg, HENVCFG_STCE);
619 	csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
620 	/* TODO: should we trap rdcycle / rdtime? */
621 	csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
622 
623 	vmmops_vcpu_restore_csrs(hypctx);
624 
625 	for (;;) {
626 		dprintf("%s: pc %lx\n", __func__, pc);
627 
628 		if (hypctx->has_exception) {
629 			hypctx->has_exception = false;
630 			/*
631 			 * TODO: implement exception injection.
632 			 */
633 		}
634 
635 		val = intr_disable();
636 
637 		/* Check if the vcpu is suspended */
638 		if (vcpu_suspended(evinfo)) {
639 			intr_restore(val);
640 			vm_exit_suspended(vcpu, pc);
641 			break;
642 		}
643 
644 		if (vcpu_debugged(vcpu)) {
645 			intr_restore(val);
646 			vm_exit_debug(vcpu, pc);
647 			break;
648 		}
649 
650 		/*
651 		 * TODO: What happens if a timer interrupt is asserted exactly
652 		 * here, but for the previous VM?
653 		 */
654 		riscv_set_active_vcpu(hypctx);
655 		aplic_flush_hwstate(hypctx);
656 
657 		riscv_sync_interrupts(hypctx);
658 		riscv_sync_ipi(hypctx);
659 
660 		dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
661 		    __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
662 		    hypctx->guest_regs.hyp_hstatus);
663 
664 		vmm_switch(hypctx);
665 
666 		dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
667 		    hypctx->guest_regs.hyp_hstatus);
668 
669 		aplic_sync_hwstate(hypctx);
670 		riscv_sync_interrupts(hypctx);
671 
672 		/*
673 		 * TODO: deactivate stage 2 pmap here if needed.
674 		 */
675 
676 		vme->scause = csr_read(scause);
677 		vme->sepc = csr_read(sepc);
678 		vme->stval = csr_read(stval);
679 		vme->htval = csr_read(htval);
680 		vme->htinst = csr_read(htinst);
681 
682 		intr_restore(val);
683 
684 		vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
685 		vme->pc = hypctx->guest_regs.hyp_sepc;
686 		vme->inst_length = INSN_SIZE;
687 
688 		handled = riscv_handle_world_switch(hypctx, vme, pmap);
689 		if (handled == false)
690 			/* Exit loop to emulate instruction. */
691 			break;
692 		else {
693 			/* Resume guest execution from the next instruction. */
694 			hypctx->guest_regs.hyp_sepc += vme->inst_length;
695 		}
696 	}
697 
698 	vmmops_vcpu_save_csrs(hypctx);
699 
700 	return (0);
701 }
702 
703 static void
riscv_pcpu_vmcleanup(void * arg)704 riscv_pcpu_vmcleanup(void *arg)
705 {
706 	struct hyp *hyp;
707 	int i, maxcpus;
708 
709 	hyp = arg;
710 	maxcpus = vm_get_maxcpus(hyp->vm);
711 	for (i = 0; i < maxcpus; i++) {
712 		if (riscv_get_active_vcpu() == hyp->ctx[i]) {
713 			riscv_set_active_vcpu(NULL);
714 			break;
715 		}
716 	}
717 }
718 
719 void
vmmops_vcpu_cleanup(void * vcpui)720 vmmops_vcpu_cleanup(void *vcpui)
721 {
722 	struct hypctx *hypctx;
723 
724 	hypctx = vcpui;
725 
726 	dprintf("%s\n", __func__);
727 
728 	aplic_cpucleanup(hypctx);
729 
730 	free(hypctx, M_HYP);
731 }
732 
733 void
vmmops_cleanup(void * vmi)734 vmmops_cleanup(void *vmi)
735 {
736 	struct hyp *hyp;
737 
738 	hyp = vmi;
739 
740 	dprintf("%s\n", __func__);
741 
742 	aplic_vmcleanup(hyp);
743 
744 	smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
745 
746 	free(hyp, M_HYP);
747 }
748 
749 /*
750  * Return register value. Registers have different sizes and an explicit cast
751  * must be made to ensure proper conversion.
752  */
753 static uint64_t *
hypctx_regptr(struct hypctx * hypctx,int reg)754 hypctx_regptr(struct hypctx *hypctx, int reg)
755 {
756 
757 	switch (reg) {
758 	case VM_REG_GUEST_RA:
759 		return (&hypctx->guest_regs.hyp_ra);
760 	case VM_REG_GUEST_SP:
761 		return (&hypctx->guest_regs.hyp_sp);
762 	case VM_REG_GUEST_GP:
763 		return (&hypctx->guest_regs.hyp_gp);
764 	case VM_REG_GUEST_TP:
765 		return (&hypctx->guest_regs.hyp_tp);
766 	case VM_REG_GUEST_T0:
767 		return (&hypctx->guest_regs.hyp_t[0]);
768 	case VM_REG_GUEST_T1:
769 		return (&hypctx->guest_regs.hyp_t[1]);
770 	case VM_REG_GUEST_T2:
771 		return (&hypctx->guest_regs.hyp_t[2]);
772 	case VM_REG_GUEST_S0:
773 		return (&hypctx->guest_regs.hyp_s[0]);
774 	case VM_REG_GUEST_S1:
775 		return (&hypctx->guest_regs.hyp_s[1]);
776 	case VM_REG_GUEST_A0:
777 		return (&hypctx->guest_regs.hyp_a[0]);
778 	case VM_REG_GUEST_A1:
779 		return (&hypctx->guest_regs.hyp_a[1]);
780 	case VM_REG_GUEST_A2:
781 		return (&hypctx->guest_regs.hyp_a[2]);
782 	case VM_REG_GUEST_A3:
783 		return (&hypctx->guest_regs.hyp_a[3]);
784 	case VM_REG_GUEST_A4:
785 		return (&hypctx->guest_regs.hyp_a[4]);
786 	case VM_REG_GUEST_A5:
787 		return (&hypctx->guest_regs.hyp_a[5]);
788 	case VM_REG_GUEST_A6:
789 		return (&hypctx->guest_regs.hyp_a[6]);
790 	case VM_REG_GUEST_A7:
791 		return (&hypctx->guest_regs.hyp_a[7]);
792 	case VM_REG_GUEST_S2:
793 		return (&hypctx->guest_regs.hyp_s[2]);
794 	case VM_REG_GUEST_S3:
795 		return (&hypctx->guest_regs.hyp_s[3]);
796 	case VM_REG_GUEST_S4:
797 		return (&hypctx->guest_regs.hyp_s[4]);
798 	case VM_REG_GUEST_S5:
799 		return (&hypctx->guest_regs.hyp_s[5]);
800 	case VM_REG_GUEST_S6:
801 		return (&hypctx->guest_regs.hyp_s[6]);
802 	case VM_REG_GUEST_S7:
803 		return (&hypctx->guest_regs.hyp_s[7]);
804 	case VM_REG_GUEST_S8:
805 		return (&hypctx->guest_regs.hyp_s[8]);
806 	case VM_REG_GUEST_S9:
807 		return (&hypctx->guest_regs.hyp_s[9]);
808 	case VM_REG_GUEST_S10:
809 		return (&hypctx->guest_regs.hyp_s[10]);
810 	case VM_REG_GUEST_S11:
811 		return (&hypctx->guest_regs.hyp_s[11]);
812 	case VM_REG_GUEST_T3:
813 		return (&hypctx->guest_regs.hyp_t[3]);
814 	case VM_REG_GUEST_T4:
815 		return (&hypctx->guest_regs.hyp_t[4]);
816 	case VM_REG_GUEST_T5:
817 		return (&hypctx->guest_regs.hyp_t[5]);
818 	case VM_REG_GUEST_T6:
819 		return (&hypctx->guest_regs.hyp_t[6]);
820 	case VM_REG_GUEST_SEPC:
821 		return (&hypctx->guest_regs.hyp_sepc);
822 	default:
823 		break;
824 	}
825 
826 	return (NULL);
827 }
828 
829 int
vmmops_getreg(void * vcpui,int reg,uint64_t * retval)830 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
831 {
832 	uint64_t *regp;
833 	int running, hostcpu;
834 	struct hypctx *hypctx;
835 
836 	hypctx = vcpui;
837 
838 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
839 	if (running && hostcpu != curcpu)
840 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
841 		    vcpu_vcpuid(hypctx->vcpu));
842 
843 	if (reg == VM_REG_GUEST_ZERO) {
844 		*retval = 0;
845 		return (0);
846 	}
847 
848 	regp = hypctx_regptr(hypctx, reg);
849 	if (regp == NULL)
850 		return (EINVAL);
851 
852 	*retval = *regp;
853 
854 	return (0);
855 }
856 
857 int
vmmops_setreg(void * vcpui,int reg,uint64_t val)858 vmmops_setreg(void *vcpui, int reg, uint64_t val)
859 {
860 	struct hypctx *hypctx;
861 	int running, hostcpu;
862 	uint64_t *regp;
863 
864 	hypctx = vcpui;
865 
866 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
867 	if (running && hostcpu != curcpu)
868 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
869 		    vcpu_vcpuid(hypctx->vcpu));
870 
871 	regp = hypctx_regptr(hypctx, reg);
872 	if (regp == NULL)
873 		return (EINVAL);
874 
875 	*regp = val;
876 
877 	return (0);
878 }
879 
880 int
vmmops_exception(void * vcpui,uint64_t scause)881 vmmops_exception(void *vcpui, uint64_t scause)
882 {
883 	struct hypctx *hypctx;
884 	int running, hostcpu;
885 
886 	hypctx = vcpui;
887 
888 	running = vcpu_is_running(hypctx->vcpu, &hostcpu);
889 	if (running && hostcpu != curcpu)
890 		panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
891 		    vcpu_vcpuid(hypctx->vcpu));
892 
893 	/* TODO: implement me. */
894 
895 	return (ENOSYS);
896 }
897 
898 int
vmmops_getcap(void * vcpui,int num,int * retval)899 vmmops_getcap(void *vcpui, int num, int *retval)
900 {
901 	int ret;
902 
903 	ret = ENOENT;
904 
905 	switch (num) {
906 	case VM_CAP_UNRESTRICTED_GUEST:
907 		*retval = 1;
908 		ret = 0;
909 		break;
910 	default:
911 		break;
912 	}
913 
914 	return (ret);
915 }
916 
917 int
vmmops_setcap(void * vcpui,int num,int val)918 vmmops_setcap(void *vcpui, int num, int val)
919 {
920 
921 	return (ENOENT);
922 }
923