1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * RISC-V code 4 * 5 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 6 */ 7 8 #include <linux/compiler.h> 9 #include <assert.h> 10 11 #include "kvm_util.h" 12 #include "processor.h" 13 14 #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 15 16 static uint64_t page_align(struct kvm_vm *vm, uint64_t v) 17 { 18 return (v + vm->page_size) & ~(vm->page_size - 1); 19 } 20 21 static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) 22 { 23 return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << 24 PGTBL_PAGE_SIZE_SHIFT; 25 } 26 27 static uint64_t ptrs_per_pte(struct kvm_vm *vm) 28 { 29 return PGTBL_PAGE_SIZE / sizeof(uint64_t); 30 } 31 32 static uint64_t pte_index_mask[] = { 33 PGTBL_L0_INDEX_MASK, 34 PGTBL_L1_INDEX_MASK, 35 PGTBL_L2_INDEX_MASK, 36 PGTBL_L3_INDEX_MASK, 37 }; 38 39 static uint32_t pte_index_shift[] = { 40 PGTBL_L0_INDEX_SHIFT, 41 PGTBL_L1_INDEX_SHIFT, 42 PGTBL_L2_INDEX_SHIFT, 43 PGTBL_L3_INDEX_SHIFT, 44 }; 45 46 static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) 47 { 48 TEST_ASSERT(level > -1, 49 "Negative page table level (%d) not possible", level); 50 TEST_ASSERT(level < vm->pgtable_levels, 51 "Invalid page table level (%d)", level); 52 53 return (gva & pte_index_mask[level]) >> pte_index_shift[level]; 54 } 55 56 void virt_arch_pgd_alloc(struct kvm_vm *vm) 57 { 58 size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; 59 60 if (vm->pgd_created) 61 return; 62 63 vm->pgd = vm_phy_pages_alloc(vm, nr_pages, 64 KVM_GUEST_PAGE_TABLE_MIN_PADDR, 65 vm->memslots[MEM_REGION_PT]); 66 vm->pgd_created = true; 67 } 68 69 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 70 { 71 uint64_t *ptep, next_ppn; 72 int level = vm->pgtable_levels - 1; 73 74 TEST_ASSERT((vaddr % vm->page_size) == 0, 75 "Virtual address not on page boundary,\n" 76 " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); 77 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, 78 (vaddr >> vm->page_shift)), 79 "Invalid virtual address, vaddr: 0x%lx", vaddr); 80 TEST_ASSERT((paddr % vm->page_size) == 0, 81 "Physical address not on page boundary,\n" 82 " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); 83 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 84 "Physical address beyond maximum supported,\n" 85 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 86 paddr, vm->max_gfn, vm->page_size); 87 88 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; 89 if (!*ptep) { 90 next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; 91 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | 92 PGTBL_PTE_VALID_MASK; 93 } 94 level--; 95 96 while (level > -1) { 97 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + 98 pte_index(vm, vaddr, level) * 8; 99 if (!*ptep && level > 0) { 100 next_ppn = vm_alloc_page_table(vm) >> 101 PGTBL_PAGE_SIZE_SHIFT; 102 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | 103 PGTBL_PTE_VALID_MASK; 104 } 105 level--; 106 } 107 108 paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; 109 *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | 110 PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; 111 } 112 113 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) 114 { 115 uint64_t *ptep; 116 int level = vm->pgtable_levels - 1; 117 118 if (!vm->pgd_created) 119 goto unmapped_gva; 120 121 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; 122 if (!ptep) 123 goto unmapped_gva; 124 level--; 125 126 while (level > -1) { 127 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + 128 pte_index(vm, gva, level) * 8; 129 if (!ptep) 130 goto unmapped_gva; 131 level--; 132 } 133 134 return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); 135 136 unmapped_gva: 137 TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d", 138 gva, level); 139 exit(1); 140 } 141 142 static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, 143 uint64_t page, int level) 144 { 145 #ifdef DEBUG 146 static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; 147 uint64_t pte, *ptep; 148 149 if (level < 0) 150 return; 151 152 for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { 153 ptep = addr_gpa2hva(vm, pte); 154 if (!*ptep) 155 continue; 156 fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", 157 type[level], pte, *ptep, ptep); 158 pte_dump(stream, vm, indent + 1, 159 pte_addr(vm, *ptep), level - 1); 160 } 161 #endif 162 } 163 164 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) 165 { 166 int level = vm->pgtable_levels - 1; 167 uint64_t pgd, *ptep; 168 169 if (!vm->pgd_created) 170 return; 171 172 for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { 173 ptep = addr_gpa2hva(vm, pgd); 174 if (!*ptep) 175 continue; 176 fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", 177 pgd, *ptep, ptep); 178 pte_dump(stream, vm, indent + 1, 179 pte_addr(vm, *ptep), level - 1); 180 } 181 } 182 183 void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) 184 { 185 struct kvm_vm *vm = vcpu->vm; 186 unsigned long satp; 187 188 /* 189 * The RISC-V Sv48 MMU mode supports 56-bit physical address 190 * for 48-bit virtual address with 4KB last level page size. 191 */ 192 switch (vm->mode) { 193 case VM_MODE_P52V48_4K: 194 case VM_MODE_P48V48_4K: 195 case VM_MODE_P40V48_4K: 196 break; 197 default: 198 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); 199 } 200 201 satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; 202 satp |= SATP_MODE_48; 203 204 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); 205 } 206 207 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) 208 { 209 struct kvm_riscv_core core; 210 211 vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); 212 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); 213 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); 214 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); 215 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); 216 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); 217 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); 218 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); 219 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); 220 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); 221 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); 222 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); 223 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); 224 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); 225 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); 226 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); 227 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); 228 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); 229 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); 230 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); 231 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); 232 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); 233 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); 234 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); 235 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); 236 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); 237 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); 238 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); 239 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); 240 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); 241 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); 242 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); 243 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); 244 245 fprintf(stream, 246 " MODE: 0x%lx\n", core.mode); 247 fprintf(stream, 248 " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n", 249 core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp); 250 fprintf(stream, 251 " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n", 252 core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2); 253 fprintf(stream, 254 " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n", 255 core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1); 256 fprintf(stream, 257 " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n", 258 core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5); 259 fprintf(stream, 260 " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n", 261 core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3); 262 fprintf(stream, 263 " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n", 264 core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7); 265 fprintf(stream, 266 " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n", 267 core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11); 268 fprintf(stream, 269 " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n", 270 core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); 271 } 272 273 static void __aligned(16) guest_unexp_trap(void) 274 { 275 sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 276 KVM_RISCV_SELFTESTS_SBI_UNEXP, 277 0, 0, 0, 0, 0, 0); 278 } 279 280 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, 281 void *guest_code) 282 { 283 int r; 284 size_t stack_size; 285 unsigned long stack_vaddr; 286 unsigned long current_gp = 0; 287 struct kvm_mp_state mps; 288 struct kvm_vcpu *vcpu; 289 290 stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : 291 vm->page_size; 292 stack_vaddr = __vm_vaddr_alloc(vm, stack_size, 293 DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, 294 MEM_REGION_DATA); 295 296 vcpu = __vm_vcpu_add(vm, vcpu_id); 297 riscv_vcpu_mmu_setup(vcpu); 298 299 /* 300 * With SBI HSM support in KVM RISC-V, all secondary VCPUs are 301 * powered-off by default so we ensure that all secondary VCPUs 302 * are powered-on using KVM_SET_MP_STATE ioctl(). 303 */ 304 mps.mp_state = KVM_MP_STATE_RUNNABLE; 305 r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps); 306 TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r); 307 308 /* Setup global pointer of guest to be same as the host */ 309 asm volatile ( 310 "add %0, gp, zero" : "=r" (current_gp) : : "memory"); 311 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); 312 313 /* Setup stack pointer and program counter of guest */ 314 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); 315 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); 316 317 /* Setup default exception vector of guest */ 318 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap); 319 320 return vcpu; 321 } 322 323 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) 324 { 325 va_list ap; 326 uint64_t id = RISCV_CORE_REG(regs.a0); 327 int i; 328 329 TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" 330 " num: %u", num); 331 332 va_start(ap, num); 333 334 for (i = 0; i < num; i++) { 335 switch (i) { 336 case 0: 337 id = RISCV_CORE_REG(regs.a0); 338 break; 339 case 1: 340 id = RISCV_CORE_REG(regs.a1); 341 break; 342 case 2: 343 id = RISCV_CORE_REG(regs.a2); 344 break; 345 case 3: 346 id = RISCV_CORE_REG(regs.a3); 347 break; 348 case 4: 349 id = RISCV_CORE_REG(regs.a4); 350 break; 351 case 5: 352 id = RISCV_CORE_REG(regs.a5); 353 break; 354 case 6: 355 id = RISCV_CORE_REG(regs.a6); 356 break; 357 case 7: 358 id = RISCV_CORE_REG(regs.a7); 359 break; 360 } 361 vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); 362 } 363 364 va_end(ap); 365 } 366 367 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) 368 { 369 } 370 371 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, 372 unsigned long arg1, unsigned long arg2, 373 unsigned long arg3, unsigned long arg4, 374 unsigned long arg5) 375 { 376 register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); 377 register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); 378 register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); 379 register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); 380 register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); 381 register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); 382 register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); 383 register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); 384 struct sbiret ret; 385 386 asm volatile ( 387 "ecall" 388 : "+r" (a0), "+r" (a1) 389 : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) 390 : "memory"); 391 ret.error = a0; 392 ret.value = a1; 393 394 return ret; 395 } 396 397 bool guest_sbi_probe_extension(int extid, long *out_val) 398 { 399 struct sbiret ret; 400 401 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, 402 0, 0, 0, 0, 0); 403 404 __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED, 405 "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value); 406 407 if (ret.error == SBI_ERR_NOT_SUPPORTED) 408 return false; 409 410 if (out_val) 411 *out_val = ret.value; 412 413 return true; 414 } 415