1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 4 * 5 * Authors: 6 * Anup Patel <anup.patel@wdc.com> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kdebug.h> 13 #include <linux/module.h> 14 #include <linux/percpu.h> 15 #include <linux/vmalloc.h> 16 #include <linux/sched/signal.h> 17 #include <linux/fs.h> 18 #include <linux/kvm_host.h> 19 #include <asm/cacheflush.h> 20 #include <asm/kvm_mmu.h> 21 #include <asm/kvm_nacl.h> 22 #include <asm/kvm_vcpu_vector.h> 23 24 #define CREATE_TRACE_POINTS 25 #include "trace.h" 26 27 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu); 28 29 const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { 30 KVM_GENERIC_VCPU_STATS(), 31 STATS_DESC_COUNTER(VCPU, ecall_exit_stat), 32 STATS_DESC_COUNTER(VCPU, wfi_exit_stat), 33 STATS_DESC_COUNTER(VCPU, wrs_exit_stat), 34 STATS_DESC_COUNTER(VCPU, mmio_exit_user), 35 STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), 36 STATS_DESC_COUNTER(VCPU, csr_exit_user), 37 STATS_DESC_COUNTER(VCPU, csr_exit_kernel), 38 STATS_DESC_COUNTER(VCPU, signal_exits), 39 STATS_DESC_COUNTER(VCPU, exits), 40 STATS_DESC_COUNTER(VCPU, instr_illegal_exits), 41 STATS_DESC_COUNTER(VCPU, load_misaligned_exits), 42 STATS_DESC_COUNTER(VCPU, store_misaligned_exits), 43 STATS_DESC_COUNTER(VCPU, load_access_exits), 44 STATS_DESC_COUNTER(VCPU, store_access_exits), 45 }; 46 47 const struct kvm_stats_header kvm_vcpu_stats_header = { 48 .name_size = KVM_STATS_NAME_SIZE, 49 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 50 .id_offset = sizeof(struct kvm_stats_header), 51 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 52 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 53 sizeof(kvm_vcpu_stats_desc), 54 }; 55 56 static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu, 57 bool kvm_sbi_reset) 58 { 59 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 60 struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; 61 void *vector_datap = cntx->vector.datap; 62 63 memset(cntx, 0, sizeof(*cntx)); 64 memset(csr, 0, sizeof(*csr)); 65 memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); 66 67 /* Restore datap as it's not a part of the guest context. */ 68 cntx->vector.datap = vector_datap; 69 70 if (kvm_sbi_reset) 71 kvm_riscv_vcpu_sbi_load_reset_state(vcpu); 72 73 /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ 74 cntx->sstatus = SR_SPP | SR_SPIE; 75 76 cntx->hstatus |= HSTATUS_VTW; 77 cntx->hstatus |= HSTATUS_SPVP; 78 cntx->hstatus |= HSTATUS_SPV; 79 } 80 81 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset) 82 { 83 bool loaded; 84 85 /** 86 * The preemption should be disabled here because it races with 87 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which 88 * also calls vcpu_load/put. 89 */ 90 get_cpu(); 91 loaded = (vcpu->cpu != -1); 92 if (loaded) 93 kvm_arch_vcpu_put(vcpu); 94 95 vcpu->arch.last_exit_cpu = -1; 96 97 kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset); 98 99 kvm_riscv_vcpu_fp_reset(vcpu); 100 101 kvm_riscv_vcpu_vector_reset(vcpu); 102 103 kvm_riscv_vcpu_timer_reset(vcpu); 104 105 kvm_riscv_vcpu_aia_reset(vcpu); 106 107 bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS); 108 bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS); 109 110 kvm_riscv_vcpu_pmu_reset(vcpu); 111 112 vcpu->arch.hfence_head = 0; 113 vcpu->arch.hfence_tail = 0; 114 memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue)); 115 116 kvm_riscv_vcpu_sbi_reset(vcpu); 117 118 /* Reset the guest CSRs for hotplug usecase */ 119 if (loaded) 120 kvm_arch_vcpu_load(vcpu, smp_processor_id()); 121 put_cpu(); 122 } 123 124 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 125 { 126 return 0; 127 } 128 129 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 130 { 131 int rc; 132 133 spin_lock_init(&vcpu->arch.mp_state_lock); 134 135 /* Mark this VCPU never ran */ 136 vcpu->arch.ran_atleast_once = false; 137 138 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; 139 bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); 140 141 /* Setup VCPU config */ 142 kvm_riscv_vcpu_config_init(vcpu); 143 144 /* Setup ISA features available to VCPU */ 145 kvm_riscv_vcpu_setup_isa(vcpu); 146 147 /* Setup vendor, arch, and implementation details */ 148 vcpu->arch.mvendorid = sbi_get_mvendorid(); 149 vcpu->arch.marchid = sbi_get_marchid(); 150 vcpu->arch.mimpid = sbi_get_mimpid(); 151 152 /* Setup VCPU hfence queue */ 153 spin_lock_init(&vcpu->arch.hfence_lock); 154 155 spin_lock_init(&vcpu->arch.reset_state.lock); 156 157 rc = kvm_riscv_vcpu_alloc_vector_context(vcpu); 158 if (rc) 159 return rc; 160 161 /* Setup VCPU timer */ 162 kvm_riscv_vcpu_timer_init(vcpu); 163 164 /* setup performance monitoring */ 165 kvm_riscv_vcpu_pmu_init(vcpu); 166 167 /* Setup VCPU AIA */ 168 kvm_riscv_vcpu_aia_init(vcpu); 169 170 /* 171 * Setup SBI extensions 172 * NOTE: This must be the last thing to be initialized. 173 */ 174 kvm_riscv_vcpu_sbi_init(vcpu); 175 176 /* Reset VCPU */ 177 kvm_riscv_reset_vcpu(vcpu, false); 178 179 return 0; 180 } 181 182 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 183 { 184 /** 185 * vcpu with id 0 is the designated boot cpu. 186 * Keep all vcpus with non-zero id in power-off state so that 187 * they can be brought up using SBI HSM extension. 188 */ 189 if (vcpu->vcpu_idx != 0) 190 kvm_riscv_vcpu_power_off(vcpu); 191 } 192 193 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 194 { 195 kvm_riscv_vcpu_sbi_deinit(vcpu); 196 197 /* Cleanup VCPU AIA context */ 198 kvm_riscv_vcpu_aia_deinit(vcpu); 199 200 /* Cleanup VCPU timer */ 201 kvm_riscv_vcpu_timer_deinit(vcpu); 202 203 kvm_riscv_vcpu_pmu_deinit(vcpu); 204 205 /* Free unused pages pre-allocated for G-stage page table mappings */ 206 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 207 208 /* Free vector context space for host and guest kernel */ 209 kvm_riscv_vcpu_free_vector_context(vcpu); 210 } 211 212 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 213 { 214 return kvm_riscv_vcpu_timer_pending(vcpu); 215 } 216 217 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 218 { 219 return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && 220 !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); 221 } 222 223 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 224 { 225 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 226 } 227 228 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 229 { 230 return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; 231 } 232 233 #ifdef CONFIG_GUEST_PERF_EVENTS 234 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) 235 { 236 return vcpu->arch.guest_context.sepc; 237 } 238 #endif 239 240 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 241 { 242 return VM_FAULT_SIGBUS; 243 } 244 245 long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, 246 unsigned long arg) 247 { 248 struct kvm_vcpu *vcpu = filp->private_data; 249 void __user *argp = (void __user *)arg; 250 251 if (ioctl == KVM_INTERRUPT) { 252 struct kvm_interrupt irq; 253 254 if (copy_from_user(&irq, argp, sizeof(irq))) 255 return -EFAULT; 256 257 if (irq.irq == KVM_INTERRUPT_SET) 258 return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); 259 else 260 return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); 261 } 262 263 return -ENOIOCTLCMD; 264 } 265 266 long kvm_arch_vcpu_ioctl(struct file *filp, 267 unsigned int ioctl, unsigned long arg) 268 { 269 struct kvm_vcpu *vcpu = filp->private_data; 270 void __user *argp = (void __user *)arg; 271 long r = -EINVAL; 272 273 switch (ioctl) { 274 case KVM_SET_ONE_REG: 275 case KVM_GET_ONE_REG: { 276 struct kvm_one_reg reg; 277 278 r = -EFAULT; 279 if (copy_from_user(®, argp, sizeof(reg))) 280 break; 281 282 if (ioctl == KVM_SET_ONE_REG) 283 r = kvm_riscv_vcpu_set_reg(vcpu, ®); 284 else 285 r = kvm_riscv_vcpu_get_reg(vcpu, ®); 286 break; 287 } 288 case KVM_GET_REG_LIST: { 289 struct kvm_reg_list __user *user_list = argp; 290 struct kvm_reg_list reg_list; 291 unsigned int n; 292 293 r = -EFAULT; 294 if (copy_from_user(®_list, user_list, sizeof(reg_list))) 295 break; 296 n = reg_list.n; 297 reg_list.n = kvm_riscv_vcpu_num_regs(vcpu); 298 if (copy_to_user(user_list, ®_list, sizeof(reg_list))) 299 break; 300 r = -E2BIG; 301 if (n < reg_list.n) 302 break; 303 r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg); 304 break; 305 } 306 default: 307 break; 308 } 309 310 return r; 311 } 312 313 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 314 struct kvm_sregs *sregs) 315 { 316 return -EINVAL; 317 } 318 319 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 320 struct kvm_sregs *sregs) 321 { 322 return -EINVAL; 323 } 324 325 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 326 { 327 return -EINVAL; 328 } 329 330 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 331 { 332 return -EINVAL; 333 } 334 335 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 336 struct kvm_translation *tr) 337 { 338 return -EINVAL; 339 } 340 341 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 342 { 343 return -EINVAL; 344 } 345 346 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 347 { 348 return -EINVAL; 349 } 350 351 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) 352 { 353 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 354 unsigned long mask, val; 355 356 if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) { 357 mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0); 358 val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask; 359 360 csr->hvip &= ~mask; 361 csr->hvip |= val; 362 } 363 364 /* Flush AIA high interrupts */ 365 kvm_riscv_vcpu_aia_flush_interrupts(vcpu); 366 } 367 368 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) 369 { 370 unsigned long hvip; 371 struct kvm_vcpu_arch *v = &vcpu->arch; 372 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 373 374 /* Read current HVIP and VSIE CSRs */ 375 csr->vsie = ncsr_read(CSR_VSIE); 376 377 /* Sync-up HVIP.VSSIP bit changes does by Guest */ 378 hvip = ncsr_read(CSR_HVIP); 379 if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { 380 if (hvip & (1UL << IRQ_VS_SOFT)) { 381 if (!test_and_set_bit(IRQ_VS_SOFT, 382 v->irqs_pending_mask)) 383 set_bit(IRQ_VS_SOFT, v->irqs_pending); 384 } else { 385 if (!test_and_set_bit(IRQ_VS_SOFT, 386 v->irqs_pending_mask)) 387 clear_bit(IRQ_VS_SOFT, v->irqs_pending); 388 } 389 } 390 391 /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ 392 if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { 393 if (!(hvip & (1UL << IRQ_PMU_OVF)) && 394 !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) 395 clear_bit(IRQ_PMU_OVF, v->irqs_pending); 396 } 397 398 /* Sync-up AIA high interrupts */ 399 kvm_riscv_vcpu_aia_sync_interrupts(vcpu); 400 401 /* Sync-up timer CSRs */ 402 kvm_riscv_vcpu_timer_sync(vcpu); 403 } 404 405 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) 406 { 407 /* 408 * We only allow VS-mode software, timer, and external 409 * interrupts when irq is one of the local interrupts 410 * defined by RISC-V privilege specification. 411 */ 412 if (irq < IRQ_LOCAL_MAX && 413 irq != IRQ_VS_SOFT && 414 irq != IRQ_VS_TIMER && 415 irq != IRQ_VS_EXT && 416 irq != IRQ_PMU_OVF) 417 return -EINVAL; 418 419 set_bit(irq, vcpu->arch.irqs_pending); 420 smp_mb__before_atomic(); 421 set_bit(irq, vcpu->arch.irqs_pending_mask); 422 423 kvm_vcpu_kick(vcpu); 424 425 return 0; 426 } 427 428 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) 429 { 430 /* 431 * We only allow VS-mode software, timer, counter overflow and external 432 * interrupts when irq is one of the local interrupts 433 * defined by RISC-V privilege specification. 434 */ 435 if (irq < IRQ_LOCAL_MAX && 436 irq != IRQ_VS_SOFT && 437 irq != IRQ_VS_TIMER && 438 irq != IRQ_VS_EXT && 439 irq != IRQ_PMU_OVF) 440 return -EINVAL; 441 442 clear_bit(irq, vcpu->arch.irqs_pending); 443 smp_mb__before_atomic(); 444 set_bit(irq, vcpu->arch.irqs_pending_mask); 445 446 return 0; 447 } 448 449 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) 450 { 451 unsigned long ie; 452 453 ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) 454 << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask; 455 ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK & 456 (unsigned long)mask; 457 if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie) 458 return true; 459 460 /* Check AIA high interrupts */ 461 return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); 462 } 463 464 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) 465 { 466 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); 467 kvm_make_request(KVM_REQ_SLEEP, vcpu); 468 kvm_vcpu_kick(vcpu); 469 } 470 471 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) 472 { 473 spin_lock(&vcpu->arch.mp_state_lock); 474 __kvm_riscv_vcpu_power_off(vcpu); 475 spin_unlock(&vcpu->arch.mp_state_lock); 476 } 477 478 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) 479 { 480 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); 481 kvm_vcpu_wake_up(vcpu); 482 } 483 484 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) 485 { 486 spin_lock(&vcpu->arch.mp_state_lock); 487 __kvm_riscv_vcpu_power_on(vcpu); 488 spin_unlock(&vcpu->arch.mp_state_lock); 489 } 490 491 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) 492 { 493 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; 494 } 495 496 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 497 struct kvm_mp_state *mp_state) 498 { 499 *mp_state = READ_ONCE(vcpu->arch.mp_state); 500 501 return 0; 502 } 503 504 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 505 struct kvm_mp_state *mp_state) 506 { 507 int ret = 0; 508 509 spin_lock(&vcpu->arch.mp_state_lock); 510 511 switch (mp_state->mp_state) { 512 case KVM_MP_STATE_RUNNABLE: 513 WRITE_ONCE(vcpu->arch.mp_state, *mp_state); 514 break; 515 case KVM_MP_STATE_STOPPED: 516 __kvm_riscv_vcpu_power_off(vcpu); 517 break; 518 case KVM_MP_STATE_INIT_RECEIVED: 519 if (vcpu->kvm->arch.mp_state_reset) 520 kvm_riscv_reset_vcpu(vcpu, false); 521 else 522 ret = -EINVAL; 523 break; 524 default: 525 ret = -EINVAL; 526 } 527 528 spin_unlock(&vcpu->arch.mp_state_lock); 529 530 return ret; 531 } 532 533 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 534 struct kvm_guest_debug *dbg) 535 { 536 if (dbg->control & KVM_GUESTDBG_ENABLE) 537 vcpu->guest_debug = dbg->control; 538 else 539 vcpu->guest_debug = 0; 540 541 return 0; 542 } 543 544 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 545 { 546 void *nsh; 547 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 548 549 /* 550 * If VCPU is being reloaded on the same physical CPU and no 551 * other KVM VCPU has run on this CPU since it was last put, 552 * we can skip the expensive CSR and HGATP writes. 553 * 554 * Note: If a new CSR is added to this fast-path skip block, 555 * make sure that 'csr_dirty' is set to true in any 556 * ioctl (e.g., KVM_SET_ONE_REG) that modifies it. 557 */ 558 if (vcpu != __this_cpu_read(kvm_former_vcpu)) 559 __this_cpu_write(kvm_former_vcpu, vcpu); 560 else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty) 561 goto csr_restore_done; 562 563 vcpu->arch.csr_dirty = false; 564 565 /* 566 * Load VCPU config CSRs before other CSRs because 567 * the read/write behaviour of certain CSRs change 568 * based on VCPU config CSRs. 569 */ 570 kvm_riscv_vcpu_config_load(vcpu); 571 572 if (kvm_riscv_nacl_sync_csr_available()) { 573 nsh = nacl_shmem(); 574 nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus); 575 nacl_csr_write(nsh, CSR_VSIE, csr->vsie); 576 nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec); 577 nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch); 578 nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); 579 nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); 580 nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); 581 nacl_csr_write(nsh, CSR_HVIP, csr->hvip); 582 nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); 583 } else { 584 csr_write(CSR_VSSTATUS, csr->vsstatus); 585 csr_write(CSR_VSIE, csr->vsie); 586 csr_write(CSR_VSTVEC, csr->vstvec); 587 csr_write(CSR_VSSCRATCH, csr->vsscratch); 588 csr_write(CSR_VSEPC, csr->vsepc); 589 csr_write(CSR_VSCAUSE, csr->vscause); 590 csr_write(CSR_VSTVAL, csr->vstval); 591 csr_write(CSR_HVIP, csr->hvip); 592 csr_write(CSR_VSATP, csr->vsatp); 593 } 594 595 kvm_riscv_mmu_update_hgatp(vcpu); 596 597 kvm_riscv_vcpu_aia_load(vcpu, cpu); 598 599 csr_restore_done: 600 kvm_riscv_vcpu_timer_restore(vcpu); 601 602 kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context); 603 kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, 604 vcpu->arch.isa); 605 kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context); 606 kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context, 607 vcpu->arch.isa); 608 609 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); 610 611 vcpu->cpu = cpu; 612 } 613 614 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 615 { 616 void *nsh; 617 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 618 619 vcpu->cpu = -1; 620 621 kvm_riscv_vcpu_aia_put(vcpu); 622 623 kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context, 624 vcpu->arch.isa); 625 kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); 626 627 kvm_riscv_vcpu_timer_save(vcpu); 628 kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context, 629 vcpu->arch.isa); 630 kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context); 631 632 if (kvm_riscv_nacl_available()) { 633 nsh = nacl_shmem(); 634 csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS); 635 csr->vsie = nacl_csr_read(nsh, CSR_VSIE); 636 csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC); 637 csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH); 638 csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC); 639 csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE); 640 csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL); 641 csr->hvip = nacl_csr_read(nsh, CSR_HVIP); 642 csr->vsatp = nacl_csr_read(nsh, CSR_VSATP); 643 } else { 644 csr->vsstatus = csr_read(CSR_VSSTATUS); 645 csr->vsie = csr_read(CSR_VSIE); 646 csr->vstvec = csr_read(CSR_VSTVEC); 647 csr->vsscratch = csr_read(CSR_VSSCRATCH); 648 csr->vsepc = csr_read(CSR_VSEPC); 649 csr->vscause = csr_read(CSR_VSCAUSE); 650 csr->vstval = csr_read(CSR_VSTVAL); 651 csr->hvip = csr_read(CSR_HVIP); 652 csr->vsatp = csr_read(CSR_VSATP); 653 } 654 } 655 656 /** 657 * kvm_riscv_check_vcpu_requests - check and handle pending vCPU requests 658 * @vcpu: the VCPU pointer 659 * 660 * Return: 1 if we should enter the guest 661 * 0 if we should exit to userspace 662 */ 663 static int kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) 664 { 665 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 666 667 if (kvm_request_pending(vcpu)) { 668 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { 669 kvm_vcpu_srcu_read_unlock(vcpu); 670 rcuwait_wait_event(wait, 671 (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), 672 TASK_INTERRUPTIBLE); 673 kvm_vcpu_srcu_read_lock(vcpu); 674 675 if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { 676 /* 677 * Awaken to handle a signal, request to 678 * sleep again later. 679 */ 680 kvm_make_request(KVM_REQ_SLEEP, vcpu); 681 } 682 } 683 684 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 685 kvm_riscv_reset_vcpu(vcpu, true); 686 687 if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) 688 kvm_riscv_mmu_update_hgatp(vcpu); 689 690 if (kvm_check_request(KVM_REQ_FENCE_I, vcpu)) 691 kvm_riscv_fence_i_process(vcpu); 692 693 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 694 kvm_riscv_tlb_flush_process(vcpu); 695 696 if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu)) 697 kvm_riscv_hfence_vvma_all_process(vcpu); 698 699 if (kvm_check_request(KVM_REQ_HFENCE, vcpu)) 700 kvm_riscv_hfence_process(vcpu); 701 702 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) 703 kvm_riscv_vcpu_record_steal_time(vcpu); 704 705 if (kvm_dirty_ring_check_request(vcpu)) 706 return 0; 707 } 708 709 return 1; 710 } 711 712 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) 713 { 714 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 715 716 ncsr_write(CSR_HVIP, csr->hvip); 717 kvm_riscv_vcpu_aia_update_hvip(vcpu); 718 } 719 720 static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *vcpu) 721 { 722 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; 723 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 724 725 vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); 726 vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); 727 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) 728 vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0); 729 } 730 731 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu) 732 { 733 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; 734 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 735 736 csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); 737 csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); 738 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) 739 smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0); 740 } 741 742 /* 743 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while 744 * the vCPU is running. 745 * 746 * This must be noinstr as instrumentation may make use of RCU, and this is not 747 * safe during the EQS. 748 */ 749 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu, 750 struct kvm_cpu_trap *trap) 751 { 752 void *nsh; 753 struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; 754 struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; 755 756 /* 757 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and 758 * HTINST) here because we do local_irq_enable() after this 759 * function in kvm_arch_vcpu_ioctl_run() which can result in 760 * an interrupt immediately after local_irq_enable() and can 761 * potentially change trap CSRs. 762 */ 763 764 kvm_riscv_vcpu_swap_in_guest_state(vcpu); 765 guest_state_enter_irqoff(); 766 767 if (kvm_riscv_nacl_sync_sret_available()) { 768 nsh = nacl_shmem(); 769 770 if (kvm_riscv_nacl_autoswap_csr_available()) { 771 hcntx->hstatus = 772 nacl_csr_read(nsh, CSR_HSTATUS); 773 nacl_scratch_write_long(nsh, 774 SBI_NACL_SHMEM_AUTOSWAP_OFFSET + 775 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS, 776 gcntx->hstatus); 777 nacl_scratch_write_long(nsh, 778 SBI_NACL_SHMEM_AUTOSWAP_OFFSET, 779 SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS); 780 } else if (kvm_riscv_nacl_sync_csr_available()) { 781 hcntx->hstatus = nacl_csr_swap(nsh, 782 CSR_HSTATUS, gcntx->hstatus); 783 } else { 784 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); 785 } 786 787 nacl_scratch_write_longs(nsh, 788 SBI_NACL_SHMEM_SRET_OFFSET + 789 SBI_NACL_SHMEM_SRET_X(1), 790 &gcntx->ra, 791 SBI_NACL_SHMEM_SRET_X_LAST); 792 793 __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL, 794 SBI_EXT_NACL_SYNC_SRET); 795 796 if (kvm_riscv_nacl_autoswap_csr_available()) { 797 nacl_scratch_write_long(nsh, 798 SBI_NACL_SHMEM_AUTOSWAP_OFFSET, 799 0); 800 gcntx->hstatus = nacl_scratch_read_long(nsh, 801 SBI_NACL_SHMEM_AUTOSWAP_OFFSET + 802 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS); 803 } else { 804 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); 805 } 806 807 trap->htval = nacl_csr_read(nsh, CSR_HTVAL); 808 trap->htinst = nacl_csr_read(nsh, CSR_HTINST); 809 } else { 810 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); 811 812 __kvm_riscv_switch_to(&vcpu->arch); 813 814 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); 815 816 trap->htval = csr_read(CSR_HTVAL); 817 trap->htinst = csr_read(CSR_HTINST); 818 } 819 820 trap->sepc = gcntx->sepc; 821 trap->scause = csr_read(CSR_SCAUSE); 822 trap->stval = csr_read(CSR_STVAL); 823 824 vcpu->arch.last_exit_cpu = vcpu->cpu; 825 guest_state_exit_irqoff(); 826 kvm_riscv_vcpu_swap_in_host_state(vcpu); 827 } 828 829 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 830 { 831 int ret; 832 struct kvm_cpu_trap trap; 833 struct kvm_run *run = vcpu->run; 834 835 if (!vcpu->arch.ran_atleast_once) 836 kvm_riscv_vcpu_config_ran_once(vcpu); 837 838 /* Mark this VCPU ran at least once */ 839 vcpu->arch.ran_atleast_once = true; 840 841 kvm_vcpu_srcu_read_lock(vcpu); 842 843 switch (run->exit_reason) { 844 case KVM_EXIT_MMIO: 845 /* Process MMIO value returned from user-space */ 846 ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run); 847 break; 848 case KVM_EXIT_RISCV_SBI: 849 /* Process SBI value returned from user-space */ 850 ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run); 851 break; 852 case KVM_EXIT_RISCV_CSR: 853 /* Process CSR value returned from user-space */ 854 ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run); 855 break; 856 default: 857 ret = 0; 858 break; 859 } 860 if (ret) { 861 kvm_vcpu_srcu_read_unlock(vcpu); 862 return ret; 863 } 864 865 if (!vcpu->wants_to_run) { 866 kvm_vcpu_srcu_read_unlock(vcpu); 867 return -EINTR; 868 } 869 870 vcpu_load(vcpu); 871 872 kvm_sigset_activate(vcpu); 873 874 ret = 1; 875 run->exit_reason = KVM_EXIT_UNKNOWN; 876 while (ret > 0) { 877 /* Check conditions before entering the guest */ 878 ret = kvm_xfer_to_guest_mode_handle_work(vcpu); 879 if (ret) 880 continue; 881 ret = 1; 882 883 kvm_riscv_gstage_vmid_update(vcpu); 884 885 ret = kvm_riscv_check_vcpu_requests(vcpu); 886 if (ret <= 0) 887 continue; 888 889 preempt_disable(); 890 891 /* Update AIA HW state before entering guest */ 892 ret = kvm_riscv_vcpu_aia_update(vcpu); 893 if (ret <= 0) { 894 preempt_enable(); 895 continue; 896 } 897 898 local_irq_disable(); 899 900 /* 901 * Ensure we set mode to IN_GUEST_MODE after we disable 902 * interrupts and before the final VCPU requests check. 903 * See the comment in kvm_vcpu_exiting_guest_mode() and 904 * Documentation/virt/kvm/vcpu-requests.rst 905 */ 906 vcpu->mode = IN_GUEST_MODE; 907 908 kvm_vcpu_srcu_read_unlock(vcpu); 909 smp_mb__after_srcu_read_unlock(); 910 911 /* 912 * We might have got VCPU interrupts updated asynchronously 913 * so update it in HW. 914 */ 915 kvm_riscv_vcpu_flush_interrupts(vcpu); 916 917 /* Update HVIP CSR for current CPU */ 918 kvm_riscv_update_hvip(vcpu); 919 920 if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || 921 kvm_request_pending(vcpu) || 922 xfer_to_guest_mode_work_pending()) { 923 vcpu->mode = OUTSIDE_GUEST_MODE; 924 local_irq_enable(); 925 preempt_enable(); 926 kvm_vcpu_srcu_read_lock(vcpu); 927 continue; 928 } 929 930 /* 931 * Sanitize VMID mappings cached (TLB) on current CPU 932 * 933 * Note: This should be done after G-stage VMID has been 934 * updated using kvm_riscv_gstage_vmid_ver_changed() 935 */ 936 kvm_riscv_local_tlb_sanitize(vcpu); 937 938 trace_kvm_entry(vcpu); 939 940 guest_timing_enter_irqoff(); 941 942 kvm_riscv_vcpu_enter_exit(vcpu, &trap); 943 944 vcpu->mode = OUTSIDE_GUEST_MODE; 945 vcpu->stat.exits++; 946 947 /* Syncup interrupts state with HW */ 948 kvm_riscv_vcpu_sync_interrupts(vcpu); 949 950 /* 951 * We must ensure that any pending interrupts are taken before 952 * we exit guest timing so that timer ticks are accounted as 953 * guest time. Transiently unmask interrupts so that any 954 * pending interrupts are taken. 955 * 956 * There's no barrier which ensures that pending interrupts are 957 * recognised, so we just hope that the CPU takes any pending 958 * interrupts between the enable and disable. 959 */ 960 local_irq_enable(); 961 local_irq_disable(); 962 963 guest_timing_exit_irqoff(); 964 965 local_irq_enable(); 966 967 trace_kvm_exit(&trap); 968 969 preempt_enable(); 970 971 kvm_vcpu_srcu_read_lock(vcpu); 972 973 ret = kvm_riscv_vcpu_exit(vcpu, run, &trap); 974 } 975 976 kvm_sigset_deactivate(vcpu); 977 978 vcpu_put(vcpu); 979 980 kvm_vcpu_srcu_read_unlock(vcpu); 981 982 return ret; 983 } 984