1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 Ruslan Bukin <br@bsdpad.com>
5 *
6 * This software was developed by the University of Cambridge Computer
7 * Laboratory (Department of Computer Science and Technology) under Innovate
8 * UK project 105694, "Digital Security by Design (DSbD) Technology Platform
9 * Prototype".
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/smp.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mman.h>
39 #include <sys/pcpu.h>
40 #include <sys/proc.h>
41 #include <sys/rman.h>
42 #include <sys/sysctl.h>
43 #include <sys/lock.h>
44 #include <sys/mutex.h>
45 #include <sys/vmem.h>
46 #include <sys/bus.h>
47
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_param.h>
54
55 #include <machine/md_var.h>
56 #include <machine/riscvreg.h>
57 #include <machine/vm.h>
58 #include <machine/cpufunc.h>
59 #include <machine/cpu.h>
60 #include <machine/machdep.h>
61 #include <machine/vmm.h>
62 #include <machine/vmm_dev.h>
63 #include <machine/atomic.h>
64 #include <machine/pmap.h>
65 #include <machine/intr.h>
66 #include <machine/encoding.h>
67 #include <machine/db_machdep.h>
68
69 #include "riscv.h"
70 #include "vmm_aplic.h"
71 #include "vmm_stat.h"
72
73 MALLOC_DEFINE(M_HYP, "RISC-V VMM HYP", "RISC-V VMM HYP");
74
75 DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
76
77 static int
m_op(uint32_t insn,int match,int mask)78 m_op(uint32_t insn, int match, int mask)
79 {
80
81 if (((insn ^ match) & mask) == 0)
82 return (1);
83
84 return (0);
85 }
86
87 static inline void
riscv_set_active_vcpu(struct hypctx * hypctx)88 riscv_set_active_vcpu(struct hypctx *hypctx)
89 {
90
91 DPCPU_SET(vcpu, hypctx);
92 }
93
94 struct hypctx *
riscv_get_active_vcpu(void)95 riscv_get_active_vcpu(void)
96 {
97
98 return (DPCPU_GET(vcpu));
99 }
100
101 int
vmmops_modinit(void)102 vmmops_modinit(void)
103 {
104
105 if (!has_hyp) {
106 printf("vmm: riscv hart doesn't support H-extension.\n");
107 return (ENXIO);
108 }
109
110 return (0);
111 }
112
113 int
vmmops_modcleanup(void)114 vmmops_modcleanup(void)
115 {
116
117 return (0);
118 }
119
120 void *
vmmops_init(struct vm * vm,pmap_t pmap)121 vmmops_init(struct vm *vm, pmap_t pmap)
122 {
123 struct hyp *hyp;
124 vm_size_t size;
125
126 size = round_page(sizeof(struct hyp) +
127 sizeof(struct hypctx *) * vm_get_maxcpus(vm));
128 hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
129 hyp->vm = vm;
130 hyp->aplic_attached = false;
131
132 aplic_vminit(hyp);
133
134 return (hyp);
135 }
136
137 static void
vmmops_delegate(void)138 vmmops_delegate(void)
139 {
140 uint64_t hedeleg;
141 uint64_t hideleg;
142
143 hedeleg = (1UL << SCAUSE_INST_MISALIGNED);
144 hedeleg |= (1UL << SCAUSE_ILLEGAL_INSTRUCTION);
145 hedeleg |= (1UL << SCAUSE_BREAKPOINT);
146 hedeleg |= (1UL << SCAUSE_ECALL_USER);
147 hedeleg |= (1UL << SCAUSE_INST_PAGE_FAULT);
148 hedeleg |= (1UL << SCAUSE_LOAD_PAGE_FAULT);
149 hedeleg |= (1UL << SCAUSE_STORE_PAGE_FAULT);
150 csr_write(hedeleg, hedeleg);
151
152 hideleg = (1UL << IRQ_SOFTWARE_HYPERVISOR);
153 hideleg |= (1UL << IRQ_TIMER_HYPERVISOR);
154 hideleg |= (1UL << IRQ_EXTERNAL_HYPERVISOR);
155 csr_write(hideleg, hideleg);
156 }
157
158 static void
vmmops_vcpu_restore_csrs(struct hypctx * hypctx)159 vmmops_vcpu_restore_csrs(struct hypctx *hypctx)
160 {
161 struct hypcsr *csrs;
162
163 csrs = &hypctx->guest_csrs;
164
165 csr_write(vsstatus, csrs->vsstatus);
166 csr_write(vsie, csrs->vsie);
167 csr_write(vstvec, csrs->vstvec);
168 csr_write(vsscratch, csrs->vsscratch);
169 csr_write(vsepc, csrs->vsepc);
170 csr_write(vscause, csrs->vscause);
171 csr_write(vstval, csrs->vstval);
172 csr_write(hvip, csrs->hvip);
173 csr_write(vsatp, csrs->vsatp);
174 }
175
176 static void
vmmops_vcpu_save_csrs(struct hypctx * hypctx)177 vmmops_vcpu_save_csrs(struct hypctx *hypctx)
178 {
179 struct hypcsr *csrs;
180
181 csrs = &hypctx->guest_csrs;
182
183 csrs->vsstatus = csr_read(vsstatus);
184 csrs->vsie = csr_read(vsie);
185 csrs->vstvec = csr_read(vstvec);
186 csrs->vsscratch = csr_read(vsscratch);
187 csrs->vsepc = csr_read(vsepc);
188 csrs->vscause = csr_read(vscause);
189 csrs->vstval = csr_read(vstval);
190 csrs->hvip = csr_read(hvip);
191 csrs->vsatp = csr_read(vsatp);
192 }
193
194 void *
vmmops_vcpu_init(void * vmi,struct vcpu * vcpu1,int vcpuid)195 vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
196 {
197 struct hypctx *hypctx;
198 struct hyp *hyp;
199 vm_size_t size;
200
201 hyp = vmi;
202
203 dprintf("%s: hyp %p\n", __func__, hyp);
204
205 KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
206 ("%s: Invalid vcpuid %d", __func__, vcpuid));
207
208 size = round_page(sizeof(struct hypctx));
209
210 hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
211 hypctx->hyp = hyp;
212 hypctx->vcpu = vcpu1;
213 hypctx->guest_scounteren = HCOUNTEREN_CY | HCOUNTEREN_TM;
214
215 /* sstatus */
216 hypctx->guest_regs.hyp_sstatus = SSTATUS_SPP | SSTATUS_SPIE;
217 hypctx->guest_regs.hyp_sstatus |= SSTATUS_FS_INITIAL;
218
219 /* hstatus */
220 hypctx->guest_regs.hyp_hstatus = HSTATUS_SPV | HSTATUS_VTW;
221 hypctx->guest_regs.hyp_hstatus |= HSTATUS_SPVP;
222
223 hypctx->cpu_id = vcpuid;
224 hyp->ctx[vcpuid] = hypctx;
225
226 aplic_cpuinit(hypctx);
227 vtimer_cpuinit(hypctx);
228
229 return (hypctx);
230 }
231
232 static int
riscv_vmm_pinit(pmap_t pmap)233 riscv_vmm_pinit(pmap_t pmap)
234 {
235
236 dprintf("%s: pmap %p\n", __func__, pmap);
237
238 pmap_pinit_stage(pmap, PM_STAGE2);
239
240 return (1);
241 }
242
243 struct vmspace *
vmmops_vmspace_alloc(vm_offset_t min,vm_offset_t max)244 vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
245 {
246
247 return (vmspace_alloc(min, max, riscv_vmm_pinit));
248 }
249
250 void
vmmops_vmspace_free(struct vmspace * vmspace)251 vmmops_vmspace_free(struct vmspace *vmspace)
252 {
253
254 pmap_remove_pages(vmspace_pmap(vmspace));
255 vmspace_free(vmspace);
256 }
257
258 static void
riscv_unpriv_read(struct hypctx * hypctx,uintptr_t guest_addr,uint64_t * data,struct hyptrap * trap)259 riscv_unpriv_read(struct hypctx *hypctx, uintptr_t guest_addr, uint64_t *data,
260 struct hyptrap *trap)
261 {
262 register struct hyptrap * htrap asm("a0");
263 uintptr_t old_hstatus;
264 uintptr_t old_stvec;
265 uintptr_t entry;
266 uint64_t val;
267 uint64_t tmp;
268 int intr;
269
270 entry = (uintptr_t)&vmm_unpriv_trap;
271 htrap = trap;
272
273 intr = intr_disable();
274
275 old_hstatus = csr_swap(hstatus, hypctx->guest_regs.hyp_hstatus);
276 /*
277 * Setup a temporary exception vector, so that if hlvx.hu raises
278 * an exception we catch it in the vmm_unpriv_trap().
279 */
280 old_stvec = csr_swap(stvec, entry);
281
282 /*
283 * Read first two bytes of instruction assuming it could be a
284 * compressed one.
285 */
286 __asm __volatile(".option push\n"
287 ".option norvc\n"
288 "hlvx.hu %[val], (%[addr])\n"
289 ".option pop\n"
290 : [val] "=r" (val)
291 : [addr] "r" (guest_addr), "r" (htrap)
292 : "a1", "memory");
293
294 /*
295 * Check if previous hlvx.hu did not raise an exception, and then
296 * read the rest of instruction if it is a full-length one.
297 */
298 if (trap->scause == -1 && (val & 0x3) == 0x3) {
299 guest_addr += 2;
300 __asm __volatile(".option push\n"
301 ".option norvc\n"
302 "hlvx.hu %[tmp], (%[addr])\n"
303 ".option pop\n"
304 : [tmp] "=r" (tmp)
305 : [addr] "r" (guest_addr), "r" (htrap)
306 : "a1", "memory");
307 val |= (tmp << 16);
308 }
309
310 csr_write(hstatus, old_hstatus);
311 csr_write(stvec, old_stvec);
312
313 intr_restore(intr);
314
315 *data = val;
316 }
317
318 static int
riscv_gen_inst_emul_data(struct hypctx * hypctx,struct vm_exit * vme_ret,struct hyptrap * trap)319 riscv_gen_inst_emul_data(struct hypctx *hypctx, struct vm_exit *vme_ret,
320 struct hyptrap *trap)
321 {
322 uintptr_t guest_addr;
323 struct vie *vie;
324 uint64_t insn;
325 int reg_num;
326 int rs2, rd;
327 int direction;
328 int sign_extend;
329 int access_size;
330
331 guest_addr = vme_ret->sepc;
332
333 KASSERT(vme_ret->scause == SCAUSE_FETCH_GUEST_PAGE_FAULT ||
334 vme_ret->scause == SCAUSE_LOAD_GUEST_PAGE_FAULT ||
335 vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT,
336 ("Invalid scause"));
337
338 direction = vme_ret->scause == SCAUSE_STORE_GUEST_PAGE_FAULT ?
339 VM_DIR_WRITE : VM_DIR_READ;
340
341 sign_extend = 1;
342
343 bzero(trap, sizeof(struct hyptrap));
344 trap->scause = -1;
345 riscv_unpriv_read(hypctx, guest_addr, &insn, trap);
346 if (trap->scause != -1)
347 return (-1);
348
349 if ((insn & 0x3) == 0x3) {
350 rs2 = (insn & RS2_MASK) >> RS2_SHIFT;
351 rd = (insn & RD_MASK) >> RD_SHIFT;
352
353 if (direction == VM_DIR_WRITE) {
354 if (m_op(insn, MATCH_SB, MASK_SB))
355 access_size = 1;
356 else if (m_op(insn, MATCH_SH, MASK_SH))
357 access_size = 2;
358 else if (m_op(insn, MATCH_SW, MASK_SW))
359 access_size = 4;
360 else if (m_op(insn, MATCH_SD, MASK_SD))
361 access_size = 8;
362 else {
363 printf("unknown store instr at %lx",
364 guest_addr);
365 return (-2);
366 }
367 reg_num = rs2;
368 } else {
369 if (m_op(insn, MATCH_LB, MASK_LB))
370 access_size = 1;
371 else if (m_op(insn, MATCH_LH, MASK_LH))
372 access_size = 2;
373 else if (m_op(insn, MATCH_LW, MASK_LW))
374 access_size = 4;
375 else if (m_op(insn, MATCH_LD, MASK_LD))
376 access_size = 8;
377 else if (m_op(insn, MATCH_LBU, MASK_LBU)) {
378 access_size = 1;
379 sign_extend = 0;
380 } else if (m_op(insn, MATCH_LHU, MASK_LHU)) {
381 access_size = 2;
382 sign_extend = 0;
383 } else if (m_op(insn, MATCH_LWU, MASK_LWU)) {
384 access_size = 4;
385 sign_extend = 0;
386 } else {
387 printf("unknown load instr at %lx",
388 guest_addr);
389 return (-3);
390 }
391 reg_num = rd;
392 }
393 vme_ret->inst_length = 4;
394 } else {
395 rs2 = (insn >> 7) & 0x7;
396 rs2 += 0x8;
397 rd = (insn >> 2) & 0x7;
398 rd += 0x8;
399
400 if (direction == VM_DIR_WRITE) {
401 if (m_op(insn, MATCH_C_SW, MASK_C_SW))
402 access_size = 4;
403 else if (m_op(insn, MATCH_C_SD, MASK_C_SD))
404 access_size = 8;
405 else {
406 printf("unknown compressed store instr at %lx",
407 guest_addr);
408 return (-4);
409 }
410 } else {
411 if (m_op(insn, MATCH_C_LW, MASK_C_LW))
412 access_size = 4;
413 else if (m_op(insn, MATCH_C_LD, MASK_C_LD))
414 access_size = 8;
415 else {
416 printf("unknown load instr at %lx", guest_addr);
417 return (-5);
418 }
419 }
420 reg_num = rd;
421 vme_ret->inst_length = 2;
422 }
423
424 vme_ret->u.inst_emul.gpa = (vme_ret->htval << 2) |
425 (vme_ret->stval & 0x3);
426
427 dprintf("guest_addr %lx insn %lx, reg %d, gpa %lx\n", guest_addr, insn,
428 reg_num, vme_ret->u.inst_emul.gpa);
429
430 vie = &vme_ret->u.inst_emul.vie;
431 vie->dir = direction;
432 vie->reg = reg_num;
433 vie->sign_extend = sign_extend;
434 vie->access_size = access_size;
435
436 return (0);
437 }
438
439 static bool
riscv_handle_world_switch(struct hypctx * hypctx,struct vm_exit * vme,pmap_t pmap)440 riscv_handle_world_switch(struct hypctx *hypctx, struct vm_exit *vme,
441 pmap_t pmap)
442 {
443 struct hyptrap trap;
444 uint64_t insn;
445 uint64_t gpa;
446 bool handled;
447 bool retu;
448 int ret;
449 int i;
450
451 handled = false;
452
453 if (vme->scause & SCAUSE_INTR) {
454 /*
455 * Host interrupt? Leave critical section to handle.
456 */
457 vmm_stat_incr(hypctx->vcpu, VMEXIT_IRQ, 1);
458 vme->exitcode = VM_EXITCODE_BOGUS;
459 vme->inst_length = 0;
460 return (handled);
461 }
462
463 switch (vme->scause) {
464 case SCAUSE_FETCH_GUEST_PAGE_FAULT:
465 case SCAUSE_LOAD_GUEST_PAGE_FAULT:
466 case SCAUSE_STORE_GUEST_PAGE_FAULT:
467 gpa = (vme->htval << 2) | (vme->stval & 0x3);
468 if (vm_mem_allocated(hypctx->vcpu, gpa)) {
469 vme->exitcode = VM_EXITCODE_PAGING;
470 vme->inst_length = 0;
471 vme->u.paging.gpa = gpa;
472 } else {
473 ret = riscv_gen_inst_emul_data(hypctx, vme, &trap);
474 if (ret != 0) {
475 vme->exitcode = VM_EXITCODE_HYP;
476 vme->u.hyp.scause = trap.scause;
477 break;
478 }
479 vme->exitcode = VM_EXITCODE_INST_EMUL;
480 }
481 break;
482 case SCAUSE_ILLEGAL_INSTRUCTION:
483 /*
484 * TODO: handle illegal instruction properly.
485 */
486 printf("%s: Illegal instruction at %lx stval 0x%lx htval "
487 "0x%lx\n", __func__, vme->sepc, vme->stval, vme->htval);
488 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
489 vme->exitcode = VM_EXITCODE_BOGUS;
490 handled = false;
491 break;
492 case SCAUSE_VIRTUAL_SUPERVISOR_ECALL:
493 retu = false;
494 vmm_sbi_ecall(hypctx->vcpu, &retu);
495 if (retu == false) {
496 handled = true;
497 break;
498 }
499 for (i = 0; i < nitems(vme->u.ecall.args); i++)
500 vme->u.ecall.args[i] = hypctx->guest_regs.hyp_a[i];
501 vme->exitcode = VM_EXITCODE_ECALL;
502 handled = false;
503 break;
504 case SCAUSE_VIRTUAL_INSTRUCTION:
505 insn = vme->stval;
506 if (m_op(insn, MATCH_WFI, MASK_WFI))
507 vme->exitcode = VM_EXITCODE_WFI;
508 else
509 vme->exitcode = VM_EXITCODE_BOGUS;
510 handled = false;
511 break;
512 default:
513 printf("unknown scause %lx\n", vme->scause);
514 vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
515 vme->exitcode = VM_EXITCODE_BOGUS;
516 handled = false;
517 break;
518 }
519
520 return (handled);
521 }
522
523 int
vmmops_gla2gpa(void * vcpui,struct vm_guest_paging * paging,uint64_t gla,int prot,uint64_t * gpa,int * is_fault)524 vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
525 int prot, uint64_t *gpa, int *is_fault)
526 {
527
528 /* Implement me. */
529
530 return (ENOSYS);
531 }
532
533 void
riscv_send_ipi(struct hypctx * hypctx,int hart_id)534 riscv_send_ipi(struct hypctx *hypctx, int hart_id)
535 {
536 struct hyp *hyp;
537 struct vm *vm;
538
539 hyp = hypctx->hyp;
540 vm = hyp->vm;
541
542 atomic_set_32(&hypctx->ipi_pending, 1);
543
544 vcpu_notify_event(vm_vcpu(vm, hart_id));
545 }
546
547 int
riscv_check_ipi(struct hypctx * hypctx,bool clear)548 riscv_check_ipi(struct hypctx *hypctx, bool clear)
549 {
550 int val;
551
552 if (clear)
553 val = atomic_swap_32(&hypctx->ipi_pending, 0);
554 else
555 val = hypctx->ipi_pending;
556
557 return (val);
558 }
559
560 bool
riscv_check_interrupts_pending(struct hypctx * hypctx)561 riscv_check_interrupts_pending(struct hypctx *hypctx)
562 {
563
564 if (hypctx->interrupts_pending)
565 return (true);
566
567 return (false);
568 }
569
570 static void
riscv_sync_interrupts(struct hypctx * hypctx)571 riscv_sync_interrupts(struct hypctx *hypctx)
572 {
573 int pending;
574
575 pending = aplic_check_pending(hypctx);
576 if (pending)
577 hypctx->guest_csrs.hvip |= HVIP_VSEIP;
578 else
579 hypctx->guest_csrs.hvip &= ~HVIP_VSEIP;
580
581 /* Guest clears VSSIP bit manually. */
582 if (riscv_check_ipi(hypctx, true))
583 hypctx->guest_csrs.hvip |= HVIP_VSSIP;
584
585 if (riscv_check_interrupts_pending(hypctx))
586 hypctx->guest_csrs.hvip |= HVIP_VSTIP;
587 else
588 hypctx->guest_csrs.hvip &= ~HVIP_VSTIP;
589
590 csr_write(hvip, hypctx->guest_csrs.hvip);
591 }
592
593 int
vmmops_run(void * vcpui,register_t pc,pmap_t pmap,struct vm_eventinfo * evinfo)594 vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
595 {
596 struct hypctx *hypctx;
597 struct vm_exit *vme;
598 struct vcpu *vcpu;
599 register_t val;
600 uint64_t hvip;
601 bool handled;
602
603 hypctx = (struct hypctx *)vcpui;
604 vcpu = hypctx->vcpu;
605 vme = vm_exitinfo(vcpu);
606
607 hypctx->guest_regs.hyp_sepc = (uint64_t)pc;
608
609 vmmops_delegate();
610
611 /*
612 * From The RISC-V Instruction Set Manual
613 * Volume II: RISC-V Privileged Architectures
614 *
615 * If the new virtual machine's guest physical page tables
616 * have been modified, it may be necessary to execute an HFENCE.GVMA
617 * instruction (see Section 5.3.2) before or after writing hgatp.
618 */
619 __asm __volatile("hfence.gvma" ::: "memory");
620
621 csr_write(hgatp, pmap->pm_satp);
622 if (has_sstc)
623 csr_write(henvcfg, HENVCFG_STCE);
624 csr_write(hie, HIE_VSEIE | HIE_VSSIE | HIE_SGEIE);
625 /* TODO: should we trap rdcycle / rdtime? */
626 csr_write(hcounteren, HCOUNTEREN_CY | HCOUNTEREN_TM);
627
628 vmmops_vcpu_restore_csrs(hypctx);
629
630 for (;;) {
631 dprintf("%s: pc %lx\n", __func__, pc);
632
633 if (hypctx->has_exception) {
634 hypctx->has_exception = false;
635 /*
636 * TODO: implement exception injection.
637 */
638 }
639
640 val = intr_disable();
641
642 /* Check if the vcpu is suspended */
643 if (vcpu_suspended(evinfo)) {
644 intr_restore(val);
645 vm_exit_suspended(vcpu, pc);
646 break;
647 }
648
649 if (vcpu_debugged(vcpu)) {
650 intr_restore(val);
651 vm_exit_debug(vcpu, pc);
652 break;
653 }
654
655 /*
656 * TODO: What happens if a timer interrupt is asserted exactly
657 * here, but for the previous VM?
658 */
659 riscv_set_active_vcpu(hypctx);
660 aplic_flush_hwstate(hypctx);
661 riscv_sync_interrupts(hypctx);
662
663 dprintf("%s: Entering guest VM, vsatp %lx, ss %lx hs %lx\n",
664 __func__, csr_read(vsatp), hypctx->guest_regs.hyp_sstatus,
665 hypctx->guest_regs.hyp_hstatus);
666
667 vmm_switch(hypctx);
668
669 dprintf("%s: Leaving guest VM, hstatus %lx\n", __func__,
670 hypctx->guest_regs.hyp_hstatus);
671
672 /* Guest can clear VSSIP. It can't clear VSTIP or VSEIP. */
673 hvip = csr_read(hvip);
674 if ((hypctx->guest_csrs.hvip ^ hvip) & HVIP_VSSIP) {
675 if (hvip & HVIP_VSSIP) {
676 /* TODO: VSSIP was set by guest. */
677 } else {
678 /* VSSIP was cleared by guest. */
679 hypctx->guest_csrs.hvip &= ~HVIP_VSSIP;
680 }
681 }
682
683 aplic_sync_hwstate(hypctx);
684
685 /*
686 * TODO: deactivate stage 2 pmap here if needed.
687 */
688
689 vme->scause = csr_read(scause);
690 vme->sepc = csr_read(sepc);
691 vme->stval = csr_read(stval);
692 vme->htval = csr_read(htval);
693 vme->htinst = csr_read(htinst);
694
695 intr_restore(val);
696
697 vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
698 vme->pc = hypctx->guest_regs.hyp_sepc;
699 vme->inst_length = INSN_SIZE;
700
701 handled = riscv_handle_world_switch(hypctx, vme, pmap);
702 if (handled == false)
703 /* Exit loop to emulate instruction. */
704 break;
705 else {
706 /* Resume guest execution from the next instruction. */
707 hypctx->guest_regs.hyp_sepc += vme->inst_length;
708 }
709 }
710
711 vmmops_vcpu_save_csrs(hypctx);
712
713 return (0);
714 }
715
716 static void
riscv_pcpu_vmcleanup(void * arg)717 riscv_pcpu_vmcleanup(void *arg)
718 {
719 struct hyp *hyp;
720 int i, maxcpus;
721
722 hyp = arg;
723 maxcpus = vm_get_maxcpus(hyp->vm);
724 for (i = 0; i < maxcpus; i++) {
725 if (riscv_get_active_vcpu() == hyp->ctx[i]) {
726 riscv_set_active_vcpu(NULL);
727 break;
728 }
729 }
730 }
731
732 void
vmmops_vcpu_cleanup(void * vcpui)733 vmmops_vcpu_cleanup(void *vcpui)
734 {
735 struct hypctx *hypctx;
736
737 hypctx = vcpui;
738
739 dprintf("%s\n", __func__);
740
741 aplic_cpucleanup(hypctx);
742
743 free(hypctx, M_HYP);
744 }
745
746 void
vmmops_cleanup(void * vmi)747 vmmops_cleanup(void *vmi)
748 {
749 struct hyp *hyp;
750
751 hyp = vmi;
752
753 dprintf("%s\n", __func__);
754
755 aplic_vmcleanup(hyp);
756
757 smp_rendezvous(NULL, riscv_pcpu_vmcleanup, NULL, hyp);
758
759 free(hyp, M_HYP);
760 }
761
762 /*
763 * Return register value. Registers have different sizes and an explicit cast
764 * must be made to ensure proper conversion.
765 */
766 static uint64_t *
hypctx_regptr(struct hypctx * hypctx,int reg)767 hypctx_regptr(struct hypctx *hypctx, int reg)
768 {
769
770 switch (reg) {
771 case VM_REG_GUEST_RA:
772 return (&hypctx->guest_regs.hyp_ra);
773 case VM_REG_GUEST_SP:
774 return (&hypctx->guest_regs.hyp_sp);
775 case VM_REG_GUEST_GP:
776 return (&hypctx->guest_regs.hyp_gp);
777 case VM_REG_GUEST_TP:
778 return (&hypctx->guest_regs.hyp_tp);
779 case VM_REG_GUEST_T0:
780 return (&hypctx->guest_regs.hyp_t[0]);
781 case VM_REG_GUEST_T1:
782 return (&hypctx->guest_regs.hyp_t[1]);
783 case VM_REG_GUEST_T2:
784 return (&hypctx->guest_regs.hyp_t[2]);
785 case VM_REG_GUEST_S0:
786 return (&hypctx->guest_regs.hyp_s[0]);
787 case VM_REG_GUEST_S1:
788 return (&hypctx->guest_regs.hyp_s[1]);
789 case VM_REG_GUEST_A0:
790 return (&hypctx->guest_regs.hyp_a[0]);
791 case VM_REG_GUEST_A1:
792 return (&hypctx->guest_regs.hyp_a[1]);
793 case VM_REG_GUEST_A2:
794 return (&hypctx->guest_regs.hyp_a[2]);
795 case VM_REG_GUEST_A3:
796 return (&hypctx->guest_regs.hyp_a[3]);
797 case VM_REG_GUEST_A4:
798 return (&hypctx->guest_regs.hyp_a[4]);
799 case VM_REG_GUEST_A5:
800 return (&hypctx->guest_regs.hyp_a[5]);
801 case VM_REG_GUEST_A6:
802 return (&hypctx->guest_regs.hyp_a[6]);
803 case VM_REG_GUEST_A7:
804 return (&hypctx->guest_regs.hyp_a[7]);
805 case VM_REG_GUEST_S2:
806 return (&hypctx->guest_regs.hyp_s[2]);
807 case VM_REG_GUEST_S3:
808 return (&hypctx->guest_regs.hyp_s[3]);
809 case VM_REG_GUEST_S4:
810 return (&hypctx->guest_regs.hyp_s[4]);
811 case VM_REG_GUEST_S5:
812 return (&hypctx->guest_regs.hyp_s[5]);
813 case VM_REG_GUEST_S6:
814 return (&hypctx->guest_regs.hyp_s[6]);
815 case VM_REG_GUEST_S7:
816 return (&hypctx->guest_regs.hyp_s[7]);
817 case VM_REG_GUEST_S8:
818 return (&hypctx->guest_regs.hyp_s[8]);
819 case VM_REG_GUEST_S9:
820 return (&hypctx->guest_regs.hyp_s[9]);
821 case VM_REG_GUEST_S10:
822 return (&hypctx->guest_regs.hyp_s[10]);
823 case VM_REG_GUEST_S11:
824 return (&hypctx->guest_regs.hyp_s[11]);
825 case VM_REG_GUEST_T3:
826 return (&hypctx->guest_regs.hyp_t[3]);
827 case VM_REG_GUEST_T4:
828 return (&hypctx->guest_regs.hyp_t[4]);
829 case VM_REG_GUEST_T5:
830 return (&hypctx->guest_regs.hyp_t[5]);
831 case VM_REG_GUEST_T6:
832 return (&hypctx->guest_regs.hyp_t[6]);
833 case VM_REG_GUEST_SEPC:
834 return (&hypctx->guest_regs.hyp_sepc);
835 default:
836 break;
837 }
838
839 return (NULL);
840 }
841
842 int
vmmops_getreg(void * vcpui,int reg,uint64_t * retval)843 vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
844 {
845 uint64_t *regp;
846 int running, hostcpu;
847 struct hypctx *hypctx;
848
849 hypctx = vcpui;
850
851 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
852 if (running && hostcpu != curcpu)
853 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
854 vcpu_vcpuid(hypctx->vcpu));
855
856 if (reg == VM_REG_GUEST_ZERO) {
857 *retval = 0;
858 return (0);
859 }
860
861 regp = hypctx_regptr(hypctx, reg);
862 if (regp == NULL)
863 return (EINVAL);
864
865 *retval = *regp;
866
867 return (0);
868 }
869
870 int
vmmops_setreg(void * vcpui,int reg,uint64_t val)871 vmmops_setreg(void *vcpui, int reg, uint64_t val)
872 {
873 struct hypctx *hypctx;
874 int running, hostcpu;
875 uint64_t *regp;
876
877 hypctx = vcpui;
878
879 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
880 if (running && hostcpu != curcpu)
881 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
882 vcpu_vcpuid(hypctx->vcpu));
883
884 regp = hypctx_regptr(hypctx, reg);
885 if (regp == NULL)
886 return (EINVAL);
887
888 *regp = val;
889
890 return (0);
891 }
892
893 int
vmmops_exception(void * vcpui,uint64_t scause)894 vmmops_exception(void *vcpui, uint64_t scause)
895 {
896 struct hypctx *hypctx;
897 int running, hostcpu;
898
899 hypctx = vcpui;
900
901 running = vcpu_is_running(hypctx->vcpu, &hostcpu);
902 if (running && hostcpu != curcpu)
903 panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
904 vcpu_vcpuid(hypctx->vcpu));
905
906 /* TODO: implement me. */
907
908 return (ENOSYS);
909 }
910
911 int
vmmops_getcap(void * vcpui,int num,int * retval)912 vmmops_getcap(void *vcpui, int num, int *retval)
913 {
914 int ret;
915
916 ret = ENOENT;
917
918 switch (num) {
919 case VM_CAP_SSTC:
920 *retval = has_sstc;
921 ret = 0;
922 break;
923 case VM_CAP_UNRESTRICTED_GUEST:
924 *retval = 1;
925 ret = 0;
926 break;
927 default:
928 break;
929 }
930
931 return (ret);
932 }
933
934 int
vmmops_setcap(void * vcpui,int num,int val)935 vmmops_setcap(void *vcpui, int num, int val)
936 {
937
938 return (ENOENT);
939 }
940