1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V code 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 */ 7 8 #include <linux/compiler.h> 9 #include <assert.h> 10 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 15 16 static vm_vaddr_t exception_handlers; 17 18 bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) 19 { 20 unsigned long value = 0; 21 int ret; 22 23 ret = __vcpu_get_reg(vcpu, ext, &value); 24 25 return !ret && !!value; 26 } 27 28 static uint64_t page_align(struct kvm_vm *vm, uint64_t v) 29 { 30 return (v + vm->page_size) & ~(vm->page_size - 1); 31 } 32 33 static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) 34 { 35 return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << 36 PGTBL_PAGE_SIZE_SHIFT; 37 } 38 39 static uint64_t ptrs_per_pte(struct kvm_vm *vm) 40 { 41 return PGTBL_PAGE_SIZE / sizeof(uint64_t); 42 } 43 44 static uint64_t pte_index_mask[] = { 45 PGTBL_L0_INDEX_MASK, 46 PGTBL_L1_INDEX_MASK, 47 PGTBL_L2_INDEX_MASK, 48 PGTBL_L3_INDEX_MASK, 49 }; 50 51 static uint32_t pte_index_shift[] = { 52 PGTBL_L0_INDEX_SHIFT, 53 PGTBL_L1_INDEX_SHIFT, 54 PGTBL_L2_INDEX_SHIFT, 55 PGTBL_L3_INDEX_SHIFT, 56 }; 57 58 static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) 59 { 60 TEST_ASSERT(level > -1, 61 "Negative page table level (%d) not possible", level); 62 TEST_ASSERT(level < vm->pgtable_levels, 63 "Invalid page table level (%d)", level); 64 65 return (gva & pte_index_mask[level]) >> pte_index_shift[level]; 66 } 67 68 void virt_arch_pgd_alloc(struct kvm_vm *vm) 69 { 70 size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; 71 72 if (vm->pgd_created) 73 return; 74 75 vm->pgd = vm_phy_pages_alloc(vm, nr_pages, 76 KVM_GUEST_PAGE_TABLE_MIN_PADDR, 77 vm->memslots[MEM_REGION_PT]); 78 vm->pgd_created = true; 79 } 80 81 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 82 { 83 uint64_t *ptep, next_ppn; 84 int level = vm->pgtable_levels - 1; 85 86 TEST_ASSERT((vaddr % vm->page_size) == 0, 87 "Virtual address not on page boundary,\n" 88 " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); 89 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 90 (vaddr >> vm->page_shift)), 91 "Invalid virtual address, vaddr: 0x%lx", vaddr); 92 TEST_ASSERT((paddr % vm->page_size) == 0, 93 "Physical address not on page boundary,\n" 94 " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); 95 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 96 "Physical address beyond maximum supported,\n" 97 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 98 paddr, vm->max_gfn, vm->page_size); 99 100 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; 101 if (!*ptep) { 102 next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; 103 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | 104 PGTBL_PTE_VALID_MASK; 105 } 106 level--; 107 108 while (level > -1) { 109 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + 110 pte_index(vm, vaddr, level) * 8; 111 if (!*ptep && level > 0) { 112 next_ppn = vm_alloc_page_table(vm) >> 113 PGTBL_PAGE_SIZE_SHIFT; 114 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | 115 PGTBL_PTE_VALID_MASK; 116 } 117 level--; 118 } 119 120 paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; 121 *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | 122 PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; 123 } 124 125 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 126 { 127 uint64_t *ptep; 128 int level = vm->pgtable_levels - 1; 129 130 if (!vm->pgd_created) 131 goto unmapped_gva; 132 133 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; 134 if (!ptep) 135 goto unmapped_gva; 136 level--; 137 138 while (level > -1) { 139 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + 140 pte_index(vm, gva, level) * 8; 141 if (!ptep) 142 goto unmapped_gva; 143 level--; 144 } 145 146 return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); 147 148 unmapped_gva: 149 TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d", 150 gva, level); 151 exit(1); 152 } 153 154 static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, 155 uint64_t page, int level) 156 { 157 #ifdef DEBUG 158 static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; 159 uint64_t pte, *ptep; 160 161 if (level < 0) 162 return; 163 164 for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { 165 ptep = addr_gpa2hva(vm, pte); 166 if (!*ptep) 167 continue; 168 fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", 169 type[level], pte, *ptep, ptep); 170 pte_dump(stream, vm, indent + 1, 171 pte_addr(vm, *ptep), level - 1); 172 } 173 #endif 174 } 175 176 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 177 { 178 int level = vm->pgtable_levels - 1; 179 uint64_t pgd, *ptep; 180 181 if (!vm->pgd_created) 182 return; 183 184 for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { 185 ptep = addr_gpa2hva(vm, pgd); 186 if (!*ptep) 187 continue; 188 fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", 189 pgd, *ptep, ptep); 190 pte_dump(stream, vm, indent + 1, 191 pte_addr(vm, *ptep), level - 1); 192 } 193 } 194 195 void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) 196 { 197 struct kvm_vm *vm = vcpu->vm; 198 unsigned long satp; 199 200 /* 201 * The RISC-V Sv48 MMU mode supports 56-bit physical address 202 * for 48-bit virtual address with 4KB last level page size. 203 */ 204 switch (vm->mode) { 205 case VM_MODE_P52V48_4K: 206 case VM_MODE_P48V48_4K: 207 case VM_MODE_P40V48_4K: 208 break; 209 default: 210 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 211 } 212 213 satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; 214 satp |= SATP_MODE_48; 215 216 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); 217 } 218 219 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) 220 { 221 struct kvm_riscv_core core; 222 223 vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); 224 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); 225 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); 226 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); 227 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); 228 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); 229 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); 230 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); 231 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); 232 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); 233 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); 234 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); 235 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); 236 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); 237 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); 238 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); 239 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); 240 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); 241 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); 242 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); 243 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); 244 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); 245 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); 246 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); 247 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); 248 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); 249 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); 250 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); 251 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); 252 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); 253 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); 254 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); 255 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); 256 257 fprintf(stream, 258 " MODE: 0x%lx\n", core.mode); 259 fprintf(stream, 260 " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n", 261 core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp); 262 fprintf(stream, 263 " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n", 264 core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2); 265 fprintf(stream, 266 " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n", 267 core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1); 268 fprintf(stream, 269 " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n", 270 core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5); 271 fprintf(stream, 272 " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n", 273 core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3); 274 fprintf(stream, 275 " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n", 276 core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7); 277 fprintf(stream, 278 " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n", 279 core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11); 280 fprintf(stream, 281 " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n", 282 core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); 283 } 284 285 static void __aligned(16) guest_unexp_trap(void) 286 { 287 sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 288 KVM_RISCV_SELFTESTS_SBI_UNEXP, 289 0, 0, 0, 0, 0, 0); 290 } 291 292 void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) 293 { 294 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); 295 } 296 297 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 298 { 299 int r; 300 size_t stack_size; 301 unsigned long stack_vaddr; 302 unsigned long current_gp = 0; 303 struct kvm_mp_state mps; 304 struct kvm_vcpu *vcpu; 305 306 stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : 307 vm->page_size; 308 stack_vaddr = __vm_vaddr_alloc(vm, stack_size, 309 DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, 310 MEM_REGION_DATA); 311 312 vcpu = __vm_vcpu_add(vm, vcpu_id); 313 riscv_vcpu_mmu_setup(vcpu); 314 315 /* 316 * With SBI HSM support in KVM RISC-V, all secondary VCPUs are 317 * powered-off by default so we ensure that all secondary VCPUs 318 * are powered-on using KVM_SET_MP_STATE ioctl(). 319 */ 320 mps.mp_state = KVM_MP_STATE_RUNNABLE; 321 r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps); 322 TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r); 323 324 /* Setup global pointer of guest to be same as the host */ 325 asm volatile ( 326 "add %0, gp, zero" : "=r" (current_gp) : : "memory"); 327 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); 328 329 /* Setup stack pointer and program counter of guest */ 330 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); 331 332 /* Setup sscratch for guest_get_vcpuid() */ 333 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id); 334 335 /* Setup default exception vector of guest */ 336 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap); 337 338 return vcpu; 339 } 340 341 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) 342 { 343 va_list ap; 344 uint64_t id = RISCV_CORE_REG(regs.a0); 345 int i; 346 347 TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" 348 " num: %u", num); 349 350 va_start(ap, num); 351 352 for (i = 0; i < num; i++) { 353 switch (i) { 354 case 0: 355 id = RISCV_CORE_REG(regs.a0); 356 break; 357 case 1: 358 id = RISCV_CORE_REG(regs.a1); 359 break; 360 case 2: 361 id = RISCV_CORE_REG(regs.a2); 362 break; 363 case 3: 364 id = RISCV_CORE_REG(regs.a3); 365 break; 366 case 4: 367 id = RISCV_CORE_REG(regs.a4); 368 break; 369 case 5: 370 id = RISCV_CORE_REG(regs.a5); 371 break; 372 case 6: 373 id = RISCV_CORE_REG(regs.a6); 374 break; 375 case 7: 376 id = RISCV_CORE_REG(regs.a7); 377 break; 378 } 379 vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); 380 } 381 382 va_end(ap); 383 } 384 385 void kvm_exit_unexpected_exception(int vector, int ec) 386 { 387 ucall(UCALL_UNHANDLED, 2, vector, ec); 388 } 389 390 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) 391 { 392 struct ucall uc; 393 394 if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { 395 TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", 396 uc.args[0], uc.args[1]); 397 } 398 } 399 400 struct handlers { 401 exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; 402 }; 403 404 void route_exception(struct ex_regs *regs) 405 { 406 struct handlers *handlers = (struct handlers *)exception_handlers; 407 int vector = 0, ec; 408 409 ec = regs->cause & ~CAUSE_IRQ_FLAG; 410 if (ec >= NR_EXCEPTIONS) 411 goto unexpected_exception; 412 413 /* Use the same handler for all the interrupts */ 414 if (regs->cause & CAUSE_IRQ_FLAG) { 415 vector = 1; 416 ec = 0; 417 } 418 419 if (handlers && handlers->exception_handlers[vector][ec]) 420 return handlers->exception_handlers[vector][ec](regs); 421 422 unexpected_exception: 423 return kvm_exit_unexpected_exception(vector, ec); 424 } 425 426 void vcpu_init_vector_tables(struct kvm_vcpu *vcpu) 427 { 428 extern char exception_vectors; 429 430 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors); 431 } 432 433 void vm_init_vector_tables(struct kvm_vm *vm) 434 { 435 vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), 436 vm->page_size, MEM_REGION_DATA); 437 438 *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; 439 } 440 441 void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler) 442 { 443 struct handlers *handlers = addr_gva2hva(vm, vm->handlers); 444 445 assert(vector < NR_EXCEPTIONS); 446 handlers->exception_handlers[0][vector] = handler; 447 } 448 449 void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler) 450 { 451 struct handlers *handlers = addr_gva2hva(vm, vm->handlers); 452 453 handlers->exception_handlers[1][0] = handler; 454 } 455 456 uint32_t guest_get_vcpuid(void) 457 { 458 return csr_read(CSR_SSCRATCH); 459 } 460 461 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, 462 unsigned long arg1, unsigned long arg2, 463 unsigned long arg3, unsigned long arg4, 464 unsigned long arg5) 465 { 466 register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); 467 register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); 468 register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); 469 register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); 470 register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); 471 register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); 472 register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); 473 register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); 474 struct sbiret ret; 475 476 asm volatile ( 477 "ecall" 478 : "+r" (a0), "+r" (a1) 479 : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) 480 : "memory"); 481 ret.error = a0; 482 ret.value = a1; 483 484 return ret; 485 } 486 487 bool guest_sbi_probe_extension(int extid, long *out_val) 488 { 489 struct sbiret ret; 490 491 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, 492 0, 0, 0, 0, 0); 493 494 __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED, 495 "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value); 496 497 if (ret.error == SBI_ERR_NOT_SUPPORTED) 498 return false; 499 500 if (out_val) 501 *out_val = ret.value; 502 503 return true; 504 } 505