1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V code 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 */ 7 8 #include <linux/compiler.h> 9 #include <assert.h> 10 11 #include "kvm_util.h" 12 #include "processor.h" 13 #include "ucall_common.h" 14 15 #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 16 17 static vm_vaddr_t exception_handlers; 18 19 bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) 20 { 21 unsigned long value = 0; 22 int ret; 23 24 ret = __vcpu_get_reg(vcpu, ext, &value); 25 26 return !ret && !!value; 27 } 28 29 static uint64_t page_align(struct kvm_vm *vm, uint64_t v) 30 { 31 return (v + vm->page_size) & ~(vm->page_size - 1); 32 } 33 34 static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) 35 { 36 return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << 37 PGTBL_PAGE_SIZE_SHIFT; 38 } 39 40 static uint64_t ptrs_per_pte(struct kvm_vm *vm) 41 { 42 return PGTBL_PAGE_SIZE / sizeof(uint64_t); 43 } 44 45 static uint64_t pte_index_mask[] = { 46 PGTBL_L0_INDEX_MASK, 47 PGTBL_L1_INDEX_MASK, 48 PGTBL_L2_INDEX_MASK, 49 PGTBL_L3_INDEX_MASK, 50 }; 51 52 static uint32_t pte_index_shift[] = { 53 PGTBL_L0_INDEX_SHIFT, 54 PGTBL_L1_INDEX_SHIFT, 55 PGTBL_L2_INDEX_SHIFT, 56 PGTBL_L3_INDEX_SHIFT, 57 }; 58 59 static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) 60 { 61 TEST_ASSERT(level > -1, 62 "Negative page table level (%d) not possible", level); 63 TEST_ASSERT(level < vm->mmu.pgtable_levels, 64 "Invalid page table level (%d)", level); 65 66 return (gva & pte_index_mask[level]) >> pte_index_shift[level]; 67 } 68 69 void virt_arch_pgd_alloc(struct kvm_vm *vm) 70 { 71 size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; 72 73 if (vm->mmu.pgd_created) 74 return; 75 76 vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, 77 KVM_GUEST_PAGE_TABLE_MIN_PADDR, 78 vm->memslots[MEM_REGION_PT]); 79 vm->mmu.pgd_created = true; 80 } 81 82 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 83 { 84 uint64_t *ptep, next_ppn; 85 int level = vm->mmu.pgtable_levels - 1; 86 87 TEST_ASSERT((vaddr % vm->page_size) == 0, 88 "Virtual address not on page boundary,\n" 89 " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); 90 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 91 (vaddr >> vm->page_shift)), 92 "Invalid virtual address, vaddr: 0x%lx", vaddr); 93 TEST_ASSERT((paddr % vm->page_size) == 0, 94 "Physical address not on page boundary,\n" 95 " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); 96 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 97 "Physical address beyond maximum supported,\n" 98 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 99 paddr, vm->max_gfn, vm->page_size); 100 101 ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8; 102 if (!*ptep) { 103 next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; 104 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | 105 PGTBL_PTE_VALID_MASK; 106 } 107 level--; 108 109 while (level > -1) { 110 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + 111 pte_index(vm, vaddr, level) * 8; 112 if (!*ptep && level > 0) { 113 next_ppn = vm_alloc_page_table(vm) >> 114 PGTBL_PAGE_SIZE_SHIFT; 115 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | 116 PGTBL_PTE_VALID_MASK; 117 } 118 level--; 119 } 120 121 paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; 122 *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | 123 PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; 124 } 125 126 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 127 { 128 uint64_t *ptep; 129 int level = vm->mmu.pgtable_levels - 1; 130 131 if (!vm->mmu.pgd_created) 132 goto unmapped_gva; 133 134 ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; 135 if (!ptep) 136 goto unmapped_gva; 137 level--; 138 139 while (level > -1) { 140 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + 141 pte_index(vm, gva, level) * 8; 142 if (!ptep) 143 goto unmapped_gva; 144 level--; 145 } 146 147 return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); 148 149 unmapped_gva: 150 TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d", 151 gva, level); 152 exit(1); 153 } 154 155 static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, 156 uint64_t page, int level) 157 { 158 #ifdef DEBUG 159 static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; 160 uint64_t pte, *ptep; 161 162 if (level < 0) 163 return; 164 165 for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { 166 ptep = addr_gpa2hva(vm, pte); 167 if (!*ptep) 168 continue; 169 fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", 170 type[level], pte, *ptep, ptep); 171 pte_dump(stream, vm, indent + 1, 172 pte_addr(vm, *ptep), level - 1); 173 } 174 #endif 175 } 176 177 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 178 { 179 struct kvm_mmu *mmu = &vm->mmu; 180 int level = mmu->pgtable_levels - 1; 181 uint64_t pgd, *ptep; 182 183 if (!mmu->pgd_created) 184 return; 185 186 for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { 187 ptep = addr_gpa2hva(vm, pgd); 188 if (!*ptep) 189 continue; 190 fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", 191 pgd, *ptep, ptep); 192 pte_dump(stream, vm, indent + 1, 193 pte_addr(vm, *ptep), level - 1); 194 } 195 } 196 197 void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) 198 { 199 struct kvm_vm *vm = vcpu->vm; 200 unsigned long satp; 201 202 /* 203 * The RISC-V Sv48 MMU mode supports 56-bit physical address 204 * for 48-bit virtual address with 4KB last level page size. 205 */ 206 switch (vm->mode) { 207 case VM_MODE_P52V48_4K: 208 case VM_MODE_P48V48_4K: 209 case VM_MODE_P40V48_4K: 210 break; 211 default: 212 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 213 } 214 215 satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; 216 satp |= SATP_MODE_48; 217 218 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); 219 } 220 221 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) 222 { 223 struct kvm_riscv_core core; 224 225 core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode)); 226 core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc)); 227 core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra)); 228 core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp)); 229 core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp)); 230 core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp)); 231 core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0)); 232 core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1)); 233 core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2)); 234 core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0)); 235 core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1)); 236 core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0)); 237 core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1)); 238 core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2)); 239 core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3)); 240 core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4)); 241 core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5)); 242 core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6)); 243 core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7)); 244 core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2)); 245 core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3)); 246 core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4)); 247 core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5)); 248 core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6)); 249 core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7)); 250 core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8)); 251 core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9)); 252 core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10)); 253 core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11)); 254 core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3)); 255 core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4)); 256 core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5)); 257 core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6)); 258 259 fprintf(stream, 260 " MODE: 0x%lx\n", core.mode); 261 fprintf(stream, 262 " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n", 263 core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp); 264 fprintf(stream, 265 " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n", 266 core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2); 267 fprintf(stream, 268 " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n", 269 core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1); 270 fprintf(stream, 271 " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n", 272 core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5); 273 fprintf(stream, 274 " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n", 275 core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3); 276 fprintf(stream, 277 " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n", 278 core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7); 279 fprintf(stream, 280 " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n", 281 core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11); 282 fprintf(stream, 283 " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n", 284 core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); 285 } 286 287 static void __aligned(16) guest_unexp_trap(void) 288 { 289 sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 290 KVM_RISCV_SELFTESTS_SBI_UNEXP, 291 0, 0, 0, 0, 0, 0); 292 } 293 294 void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) 295 { 296 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); 297 } 298 299 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) 300 { 301 int r; 302 size_t stack_size; 303 unsigned long stack_vaddr; 304 unsigned long current_gp = 0; 305 struct kvm_mp_state mps; 306 struct kvm_vcpu *vcpu; 307 308 stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : 309 vm->page_size; 310 stack_vaddr = __vm_vaddr_alloc(vm, stack_size, 311 DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, 312 MEM_REGION_DATA); 313 314 vcpu = __vm_vcpu_add(vm, vcpu_id); 315 riscv_vcpu_mmu_setup(vcpu); 316 317 /* 318 * With SBI HSM support in KVM RISC-V, all secondary VCPUs are 319 * powered-off by default so we ensure that all secondary VCPUs 320 * are powered-on using KVM_SET_MP_STATE ioctl(). 321 */ 322 mps.mp_state = KVM_MP_STATE_RUNNABLE; 323 r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps); 324 TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r); 325 326 /* Setup global pointer of guest to be same as the host */ 327 asm volatile ( 328 "add %0, gp, zero" : "=r" (current_gp) : : "memory"); 329 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); 330 331 /* Setup stack pointer and program counter of guest */ 332 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); 333 334 /* Setup sscratch for guest_get_vcpuid() */ 335 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id); 336 337 /* Setup default exception vector of guest */ 338 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap); 339 340 return vcpu; 341 } 342 343 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) 344 { 345 va_list ap; 346 uint64_t id = RISCV_CORE_REG(regs.a0); 347 int i; 348 349 TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" 350 " num: %u", num); 351 352 va_start(ap, num); 353 354 for (i = 0; i < num; i++) { 355 switch (i) { 356 case 0: 357 id = RISCV_CORE_REG(regs.a0); 358 break; 359 case 1: 360 id = RISCV_CORE_REG(regs.a1); 361 break; 362 case 2: 363 id = RISCV_CORE_REG(regs.a2); 364 break; 365 case 3: 366 id = RISCV_CORE_REG(regs.a3); 367 break; 368 case 4: 369 id = RISCV_CORE_REG(regs.a4); 370 break; 371 case 5: 372 id = RISCV_CORE_REG(regs.a5); 373 break; 374 case 6: 375 id = RISCV_CORE_REG(regs.a6); 376 break; 377 case 7: 378 id = RISCV_CORE_REG(regs.a7); 379 break; 380 } 381 vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); 382 } 383 384 va_end(ap); 385 } 386 387 void kvm_exit_unexpected_exception(int vector, int ec) 388 { 389 ucall(UCALL_UNHANDLED, 2, vector, ec); 390 } 391 392 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) 393 { 394 struct ucall uc; 395 396 if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { 397 TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)", 398 uc.args[0], uc.args[1]); 399 } 400 } 401 402 struct handlers { 403 exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; 404 }; 405 406 void route_exception(struct pt_regs *regs) 407 { 408 struct handlers *handlers = (struct handlers *)exception_handlers; 409 int vector = 0, ec; 410 411 ec = regs->cause & ~CAUSE_IRQ_FLAG; 412 if (ec >= NR_EXCEPTIONS) 413 goto unexpected_exception; 414 415 /* Use the same handler for all the interrupts */ 416 if (regs->cause & CAUSE_IRQ_FLAG) { 417 vector = 1; 418 ec = 0; 419 } 420 421 if (handlers && handlers->exception_handlers[vector][ec]) 422 return handlers->exception_handlers[vector][ec](regs); 423 424 unexpected_exception: 425 return kvm_exit_unexpected_exception(vector, ec); 426 } 427 428 void vcpu_init_vector_tables(struct kvm_vcpu *vcpu) 429 { 430 extern char exception_vectors; 431 432 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors); 433 } 434 435 void vm_init_vector_tables(struct kvm_vm *vm) 436 { 437 vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), 438 vm->page_size, MEM_REGION_DATA); 439 440 *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; 441 } 442 443 void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler) 444 { 445 struct handlers *handlers = addr_gva2hva(vm, vm->handlers); 446 447 assert(vector < NR_EXCEPTIONS); 448 handlers->exception_handlers[0][vector] = handler; 449 } 450 451 void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler) 452 { 453 struct handlers *handlers = addr_gva2hva(vm, vm->handlers); 454 455 handlers->exception_handlers[1][0] = handler; 456 } 457 458 uint32_t guest_get_vcpuid(void) 459 { 460 return csr_read(CSR_SSCRATCH); 461 } 462 463 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, 464 unsigned long arg1, unsigned long arg2, 465 unsigned long arg3, unsigned long arg4, 466 unsigned long arg5) 467 { 468 register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); 469 register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); 470 register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); 471 register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); 472 register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); 473 register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); 474 register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); 475 register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); 476 struct sbiret ret; 477 478 asm volatile ( 479 "ecall" 480 : "+r" (a0), "+r" (a1) 481 : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) 482 : "memory"); 483 ret.error = a0; 484 ret.value = a1; 485 486 return ret; 487 } 488 489 bool guest_sbi_probe_extension(int extid, long *out_val) 490 { 491 struct sbiret ret; 492 493 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, 494 0, 0, 0, 0, 0); 495 496 __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED, 497 "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value); 498 499 if (ret.error == SBI_ERR_NOT_SUPPORTED) 500 return false; 501 502 if (out_val) 503 *out_val = ret.value; 504 505 return true; 506 } 507 508 unsigned long get_host_sbi_spec_version(void) 509 { 510 struct sbiret ret; 511 512 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0, 513 0, 0, 0, 0, 0); 514 515 GUEST_ASSERT(!ret.error); 516 517 return ret.value; 518 } 519