1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2019 Western Digital Corporation or its affiliates. 4 * 5 * Authors: 6 * Anup Patel <anup.patel@wdc.com> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kdebug.h> 13 #include <linux/module.h> 14 #include <linux/percpu.h> 15 #include <linux/vmalloc.h> 16 #include <linux/sched/signal.h> 17 #include <linux/fs.h> 18 #include <linux/kvm_host.h> 19 #include <asm/cacheflush.h> 20 #include <asm/kvm_mmu.h> 21 #include <asm/kvm_nacl.h> 22 #include <asm/kvm_vcpu_vector.h> 23 24 #define CREATE_TRACE_POINTS 25 #include "trace.h" 26 27 static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu); 28 29 const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { 30 KVM_GENERIC_VCPU_STATS(), 31 STATS_DESC_COUNTER(VCPU, ecall_exit_stat), 32 STATS_DESC_COUNTER(VCPU, wfi_exit_stat), 33 STATS_DESC_COUNTER(VCPU, wrs_exit_stat), 34 STATS_DESC_COUNTER(VCPU, mmio_exit_user), 35 STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), 36 STATS_DESC_COUNTER(VCPU, csr_exit_user), 37 STATS_DESC_COUNTER(VCPU, csr_exit_kernel), 38 STATS_DESC_COUNTER(VCPU, signal_exits), 39 STATS_DESC_COUNTER(VCPU, exits), 40 STATS_DESC_COUNTER(VCPU, instr_illegal_exits), 41 STATS_DESC_COUNTER(VCPU, load_misaligned_exits), 42 STATS_DESC_COUNTER(VCPU, store_misaligned_exits), 43 STATS_DESC_COUNTER(VCPU, load_access_exits), 44 STATS_DESC_COUNTER(VCPU, store_access_exits), 45 }; 46 47 const struct kvm_stats_header kvm_vcpu_stats_header = { 48 .name_size = KVM_STATS_NAME_SIZE, 49 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc), 50 .id_offset = sizeof(struct kvm_stats_header), 51 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE, 52 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE + 53 sizeof(kvm_vcpu_stats_desc), 54 }; 55 56 static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu, 57 bool kvm_sbi_reset) 58 { 59 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 60 struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; 61 void *vector_datap = cntx->vector.datap; 62 63 memset(cntx, 0, sizeof(*cntx)); 64 memset(csr, 0, sizeof(*csr)); 65 memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); 66 67 /* Restore datap as it's not a part of the guest context. */ 68 cntx->vector.datap = vector_datap; 69 70 if (kvm_sbi_reset) 71 kvm_riscv_vcpu_sbi_load_reset_state(vcpu); 72 73 /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ 74 cntx->sstatus = SR_SPP | SR_SPIE; 75 76 cntx->hstatus |= HSTATUS_VTW; 77 cntx->hstatus |= HSTATUS_SPVP; 78 cntx->hstatus |= HSTATUS_SPV; 79 } 80 81 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset) 82 { 83 bool loaded; 84 85 /** 86 * The preemption should be disabled here because it races with 87 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which 88 * also calls vcpu_load/put. 89 */ 90 get_cpu(); 91 loaded = (vcpu->cpu != -1); 92 if (loaded) 93 kvm_arch_vcpu_put(vcpu); 94 95 vcpu->arch.last_exit_cpu = -1; 96 97 kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset); 98 99 kvm_riscv_vcpu_fp_reset(vcpu); 100 101 kvm_riscv_vcpu_vector_reset(vcpu); 102 103 kvm_riscv_vcpu_timer_reset(vcpu); 104 105 kvm_riscv_vcpu_aia_reset(vcpu); 106 107 bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS); 108 bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS); 109 110 kvm_riscv_vcpu_pmu_reset(vcpu); 111 112 vcpu->arch.hfence_head = 0; 113 vcpu->arch.hfence_tail = 0; 114 memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue)); 115 116 kvm_riscv_vcpu_sbi_reset(vcpu); 117 118 /* Reset the guest CSRs for hotplug usecase */ 119 if (loaded) 120 kvm_arch_vcpu_load(vcpu, smp_processor_id()); 121 put_cpu(); 122 } 123 124 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 125 { 126 return 0; 127 } 128 129 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 130 { 131 int rc; 132 133 spin_lock_init(&vcpu->arch.mp_state_lock); 134 135 /* Mark this VCPU never ran */ 136 vcpu->arch.ran_atleast_once = false; 137 138 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; 139 bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); 140 141 /* Setup VCPU config */ 142 kvm_riscv_vcpu_config_init(vcpu); 143 144 /* Setup ISA features available to VCPU */ 145 kvm_riscv_vcpu_setup_isa(vcpu); 146 147 /* Setup vendor, arch, and implementation details */ 148 vcpu->arch.mvendorid = sbi_get_mvendorid(); 149 vcpu->arch.marchid = sbi_get_marchid(); 150 vcpu->arch.mimpid = sbi_get_mimpid(); 151 152 /* Setup VCPU hfence queue */ 153 spin_lock_init(&vcpu->arch.hfence_lock); 154 155 spin_lock_init(&vcpu->arch.reset_state.lock); 156 157 rc = kvm_riscv_vcpu_alloc_vector_context(vcpu); 158 if (rc) 159 return rc; 160 161 /* Setup VCPU timer */ 162 kvm_riscv_vcpu_timer_init(vcpu); 163 164 /* setup performance monitoring */ 165 kvm_riscv_vcpu_pmu_init(vcpu); 166 167 /* Setup VCPU AIA */ 168 kvm_riscv_vcpu_aia_init(vcpu); 169 170 /* 171 * Setup SBI extensions 172 * NOTE: This must be the last thing to be initialized. 173 */ 174 kvm_riscv_vcpu_sbi_init(vcpu); 175 176 /* Reset VCPU */ 177 kvm_riscv_reset_vcpu(vcpu, false); 178 179 return 0; 180 } 181 182 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 183 { 184 /** 185 * vcpu with id 0 is the designated boot cpu. 186 * Keep all vcpus with non-zero id in power-off state so that 187 * they can be brought up using SBI HSM extension. 188 */ 189 if (vcpu->vcpu_idx != 0) 190 kvm_riscv_vcpu_power_off(vcpu); 191 } 192 193 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 194 { 195 kvm_riscv_vcpu_sbi_deinit(vcpu); 196 197 /* Cleanup VCPU AIA context */ 198 kvm_riscv_vcpu_aia_deinit(vcpu); 199 200 /* Cleanup VCPU timer */ 201 kvm_riscv_vcpu_timer_deinit(vcpu); 202 203 kvm_riscv_vcpu_pmu_deinit(vcpu); 204 205 /* Free unused pages pre-allocated for G-stage page table mappings */ 206 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 207 208 /* Free vector context space for host and guest kernel */ 209 kvm_riscv_vcpu_free_vector_context(vcpu); 210 } 211 212 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 213 { 214 return kvm_riscv_vcpu_timer_pending(vcpu); 215 } 216 217 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 218 { 219 return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && 220 !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); 221 } 222 223 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 224 { 225 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 226 } 227 228 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 229 { 230 return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; 231 } 232 233 #ifdef CONFIG_GUEST_PERF_EVENTS 234 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) 235 { 236 return vcpu->arch.guest_context.sepc; 237 } 238 #endif 239 240 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 241 { 242 return VM_FAULT_SIGBUS; 243 } 244 245 long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, 246 unsigned long arg) 247 { 248 struct kvm_vcpu *vcpu = filp->private_data; 249 void __user *argp = (void __user *)arg; 250 251 if (ioctl == KVM_INTERRUPT) { 252 struct kvm_interrupt irq; 253 254 if (copy_from_user(&irq, argp, sizeof(irq))) 255 return -EFAULT; 256 257 if (irq.irq == KVM_INTERRUPT_SET) 258 return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT); 259 else 260 return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); 261 } 262 263 return -ENOIOCTLCMD; 264 } 265 266 long kvm_arch_vcpu_ioctl(struct file *filp, 267 unsigned int ioctl, unsigned long arg) 268 { 269 struct kvm_vcpu *vcpu = filp->private_data; 270 void __user *argp = (void __user *)arg; 271 long r = -EINVAL; 272 273 switch (ioctl) { 274 case KVM_SET_ONE_REG: 275 case KVM_GET_ONE_REG: { 276 struct kvm_one_reg reg; 277 278 r = -EFAULT; 279 if (copy_from_user(®, argp, sizeof(reg))) 280 break; 281 282 if (ioctl == KVM_SET_ONE_REG) 283 r = kvm_riscv_vcpu_set_reg(vcpu, ®); 284 else 285 r = kvm_riscv_vcpu_get_reg(vcpu, ®); 286 break; 287 } 288 case KVM_GET_REG_LIST: { 289 struct kvm_reg_list __user *user_list = argp; 290 struct kvm_reg_list reg_list; 291 unsigned int n; 292 293 r = -EFAULT; 294 if (copy_from_user(®_list, user_list, sizeof(reg_list))) 295 break; 296 n = reg_list.n; 297 reg_list.n = kvm_riscv_vcpu_num_regs(vcpu); 298 if (copy_to_user(user_list, ®_list, sizeof(reg_list))) 299 break; 300 r = -E2BIG; 301 if (n < reg_list.n) 302 break; 303 r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg); 304 break; 305 } 306 default: 307 break; 308 } 309 310 return r; 311 } 312 313 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 314 struct kvm_sregs *sregs) 315 { 316 return -EINVAL; 317 } 318 319 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 320 struct kvm_sregs *sregs) 321 { 322 return -EINVAL; 323 } 324 325 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 326 { 327 return -EINVAL; 328 } 329 330 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 331 { 332 return -EINVAL; 333 } 334 335 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 336 struct kvm_translation *tr) 337 { 338 return -EINVAL; 339 } 340 341 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 342 { 343 return -EINVAL; 344 } 345 346 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 347 { 348 return -EINVAL; 349 } 350 351 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) 352 { 353 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 354 unsigned long mask, val; 355 356 if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) { 357 mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0); 358 val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask; 359 360 csr->hvip &= ~mask; 361 csr->hvip |= val; 362 } 363 364 /* Flush AIA high interrupts */ 365 kvm_riscv_vcpu_aia_flush_interrupts(vcpu); 366 } 367 368 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) 369 { 370 unsigned long hvip; 371 struct kvm_vcpu_arch *v = &vcpu->arch; 372 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 373 374 /* Read current HVIP and VSIE CSRs */ 375 csr->vsie = ncsr_read(CSR_VSIE); 376 377 /* Sync-up HVIP.VSSIP bit changes does by Guest */ 378 hvip = ncsr_read(CSR_HVIP); 379 if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { 380 if (hvip & (1UL << IRQ_VS_SOFT)) { 381 if (!test_and_set_bit(IRQ_VS_SOFT, 382 v->irqs_pending_mask)) 383 set_bit(IRQ_VS_SOFT, v->irqs_pending); 384 } else { 385 if (!test_and_set_bit(IRQ_VS_SOFT, 386 v->irqs_pending_mask)) 387 clear_bit(IRQ_VS_SOFT, v->irqs_pending); 388 } 389 } 390 391 /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ 392 if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { 393 if (!(hvip & (1UL << IRQ_PMU_OVF)) && 394 !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) 395 clear_bit(IRQ_PMU_OVF, v->irqs_pending); 396 } 397 398 /* Sync-up AIA high interrupts */ 399 kvm_riscv_vcpu_aia_sync_interrupts(vcpu); 400 401 /* Sync-up timer CSRs */ 402 kvm_riscv_vcpu_timer_sync(vcpu); 403 } 404 405 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) 406 { 407 /* 408 * We only allow VS-mode software, timer, and external 409 * interrupts when irq is one of the local interrupts 410 * defined by RISC-V privilege specification. 411 */ 412 if (irq < IRQ_LOCAL_MAX && 413 irq != IRQ_VS_SOFT && 414 irq != IRQ_VS_TIMER && 415 irq != IRQ_VS_EXT && 416 irq != IRQ_PMU_OVF) 417 return -EINVAL; 418 419 set_bit(irq, vcpu->arch.irqs_pending); 420 smp_mb__before_atomic(); 421 set_bit(irq, vcpu->arch.irqs_pending_mask); 422 423 kvm_vcpu_kick(vcpu); 424 425 return 0; 426 } 427 428 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) 429 { 430 /* 431 * We only allow VS-mode software, timer, counter overflow and external 432 * interrupts when irq is one of the local interrupts 433 * defined by RISC-V privilege specification. 434 */ 435 if (irq < IRQ_LOCAL_MAX && 436 irq != IRQ_VS_SOFT && 437 irq != IRQ_VS_TIMER && 438 irq != IRQ_VS_EXT && 439 irq != IRQ_PMU_OVF) 440 return -EINVAL; 441 442 clear_bit(irq, vcpu->arch.irqs_pending); 443 smp_mb__before_atomic(); 444 set_bit(irq, vcpu->arch.irqs_pending_mask); 445 446 return 0; 447 } 448 449 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) 450 { 451 unsigned long ie; 452 453 ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) 454 << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask; 455 ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK & 456 (unsigned long)mask; 457 if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie) 458 return true; 459 460 /* Check AIA high interrupts */ 461 return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); 462 } 463 464 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) 465 { 466 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); 467 kvm_make_request(KVM_REQ_SLEEP, vcpu); 468 kvm_vcpu_kick(vcpu); 469 } 470 471 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) 472 { 473 spin_lock(&vcpu->arch.mp_state_lock); 474 __kvm_riscv_vcpu_power_off(vcpu); 475 spin_unlock(&vcpu->arch.mp_state_lock); 476 } 477 478 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) 479 { 480 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); 481 kvm_vcpu_wake_up(vcpu); 482 } 483 484 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) 485 { 486 spin_lock(&vcpu->arch.mp_state_lock); 487 __kvm_riscv_vcpu_power_on(vcpu); 488 spin_unlock(&vcpu->arch.mp_state_lock); 489 } 490 491 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) 492 { 493 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; 494 } 495 496 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 497 struct kvm_mp_state *mp_state) 498 { 499 *mp_state = READ_ONCE(vcpu->arch.mp_state); 500 501 return 0; 502 } 503 504 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 505 struct kvm_mp_state *mp_state) 506 { 507 int ret = 0; 508 509 spin_lock(&vcpu->arch.mp_state_lock); 510 511 switch (mp_state->mp_state) { 512 case KVM_MP_STATE_RUNNABLE: 513 WRITE_ONCE(vcpu->arch.mp_state, *mp_state); 514 break; 515 case KVM_MP_STATE_STOPPED: 516 __kvm_riscv_vcpu_power_off(vcpu); 517 break; 518 case KVM_MP_STATE_INIT_RECEIVED: 519 if (vcpu->kvm->arch.mp_state_reset) 520 kvm_riscv_reset_vcpu(vcpu, false); 521 else 522 ret = -EINVAL; 523 break; 524 default: 525 ret = -EINVAL; 526 } 527 528 spin_unlock(&vcpu->arch.mp_state_lock); 529 530 return ret; 531 } 532 533 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 534 struct kvm_guest_debug *dbg) 535 { 536 if (dbg->control & KVM_GUESTDBG_ENABLE) 537 vcpu->guest_debug = dbg->control; 538 else 539 vcpu->guest_debug = 0; 540 541 kvm_riscv_vcpu_config_guest_debug(vcpu); 542 return 0; 543 } 544 545 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 546 { 547 void *nsh; 548 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 549 550 /* 551 * If VCPU is being reloaded on the same physical CPU and no 552 * other KVM VCPU has run on this CPU since it was last put, 553 * we can skip the expensive CSR and HGATP writes. 554 * 555 * Note: If a new CSR is added to this fast-path skip block, 556 * make sure that 'csr_dirty' is set to true in any 557 * ioctl (e.g., KVM_SET_ONE_REG) that modifies it. 558 */ 559 if (vcpu != __this_cpu_read(kvm_former_vcpu)) 560 __this_cpu_write(kvm_former_vcpu, vcpu); 561 else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty) 562 goto csr_restore_done; 563 564 vcpu->arch.csr_dirty = false; 565 566 /* 567 * Load VCPU config CSRs before other CSRs because 568 * the read/write behaviour of certain CSRs change 569 * based on VCPU config CSRs. 570 */ 571 kvm_riscv_vcpu_config_load(vcpu); 572 573 if (kvm_riscv_nacl_sync_csr_available()) { 574 nsh = nacl_shmem(); 575 nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus); 576 nacl_csr_write(nsh, CSR_VSIE, csr->vsie); 577 nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec); 578 nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch); 579 nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); 580 nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); 581 nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); 582 nacl_csr_write(nsh, CSR_HVIP, csr->hvip); 583 nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); 584 } else { 585 csr_write(CSR_VSSTATUS, csr->vsstatus); 586 csr_write(CSR_VSIE, csr->vsie); 587 csr_write(CSR_VSTVEC, csr->vstvec); 588 csr_write(CSR_VSSCRATCH, csr->vsscratch); 589 csr_write(CSR_VSEPC, csr->vsepc); 590 csr_write(CSR_VSCAUSE, csr->vscause); 591 csr_write(CSR_VSTVAL, csr->vstval); 592 csr_write(CSR_HVIP, csr->hvip); 593 csr_write(CSR_VSATP, csr->vsatp); 594 } 595 596 kvm_riscv_mmu_update_hgatp(vcpu); 597 598 kvm_riscv_vcpu_aia_load(vcpu, cpu); 599 600 csr_restore_done: 601 kvm_riscv_vcpu_timer_restore(vcpu); 602 603 kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context); 604 kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, 605 vcpu->arch.isa); 606 kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context); 607 kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context, 608 vcpu->arch.isa); 609 610 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); 611 612 vcpu->cpu = cpu; 613 } 614 615 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 616 { 617 void *nsh; 618 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 619 620 vcpu->cpu = -1; 621 622 kvm_riscv_vcpu_aia_put(vcpu); 623 624 kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context, 625 vcpu->arch.isa); 626 kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context); 627 628 kvm_riscv_vcpu_timer_save(vcpu); 629 kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context, 630 vcpu->arch.isa); 631 kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context); 632 633 if (kvm_riscv_nacl_available()) { 634 nsh = nacl_shmem(); 635 csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS); 636 csr->vsie = nacl_csr_read(nsh, CSR_VSIE); 637 csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC); 638 csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH); 639 csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC); 640 csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE); 641 csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL); 642 csr->hvip = nacl_csr_read(nsh, CSR_HVIP); 643 csr->vsatp = nacl_csr_read(nsh, CSR_VSATP); 644 } else { 645 csr->vsstatus = csr_read(CSR_VSSTATUS); 646 csr->vsie = csr_read(CSR_VSIE); 647 csr->vstvec = csr_read(CSR_VSTVEC); 648 csr->vsscratch = csr_read(CSR_VSSCRATCH); 649 csr->vsepc = csr_read(CSR_VSEPC); 650 csr->vscause = csr_read(CSR_VSCAUSE); 651 csr->vstval = csr_read(CSR_VSTVAL); 652 csr->hvip = csr_read(CSR_HVIP); 653 csr->vsatp = csr_read(CSR_VSATP); 654 } 655 } 656 657 /** 658 * kvm_riscv_check_vcpu_requests - check and handle pending vCPU requests 659 * @vcpu: the VCPU pointer 660 * 661 * Return: 1 if we should enter the guest 662 * 0 if we should exit to userspace 663 */ 664 static int kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) 665 { 666 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 667 668 if (kvm_request_pending(vcpu)) { 669 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { 670 kvm_vcpu_srcu_read_unlock(vcpu); 671 rcuwait_wait_event(wait, 672 (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), 673 TASK_INTERRUPTIBLE); 674 kvm_vcpu_srcu_read_lock(vcpu); 675 676 if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { 677 /* 678 * Awaken to handle a signal, request to 679 * sleep again later. 680 */ 681 kvm_make_request(KVM_REQ_SLEEP, vcpu); 682 } 683 } 684 685 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 686 kvm_riscv_reset_vcpu(vcpu, true); 687 688 if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) 689 kvm_riscv_mmu_update_hgatp(vcpu); 690 691 if (kvm_check_request(KVM_REQ_FENCE_I, vcpu)) 692 kvm_riscv_fence_i_process(vcpu); 693 694 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 695 kvm_riscv_tlb_flush_process(vcpu); 696 697 if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu)) 698 kvm_riscv_hfence_vvma_all_process(vcpu); 699 700 if (kvm_check_request(KVM_REQ_HFENCE, vcpu)) 701 kvm_riscv_hfence_process(vcpu); 702 703 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) 704 kvm_riscv_vcpu_record_steal_time(vcpu); 705 706 if (kvm_dirty_ring_check_request(vcpu)) 707 return 0; 708 } 709 710 return 1; 711 } 712 713 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) 714 { 715 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 716 717 ncsr_write(CSR_HVIP, csr->hvip); 718 kvm_riscv_vcpu_aia_update_hvip(vcpu); 719 } 720 721 static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *vcpu) 722 { 723 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; 724 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 725 726 vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); 727 vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); 728 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) 729 vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0); 730 } 731 732 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu) 733 { 734 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; 735 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 736 737 csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); 738 csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); 739 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) 740 smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0); 741 } 742 743 /* 744 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while 745 * the vCPU is running. 746 * 747 * This must be noinstr as instrumentation may make use of RCU, and this is not 748 * safe during the EQS. 749 */ 750 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu, 751 struct kvm_cpu_trap *trap) 752 { 753 void *nsh; 754 struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; 755 struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; 756 757 /* 758 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and 759 * HTINST) here because we do local_irq_enable() after this 760 * function in kvm_arch_vcpu_ioctl_run() which can result in 761 * an interrupt immediately after local_irq_enable() and can 762 * potentially change trap CSRs. 763 */ 764 765 kvm_riscv_vcpu_swap_in_guest_state(vcpu); 766 guest_state_enter_irqoff(); 767 768 if (kvm_riscv_nacl_sync_sret_available()) { 769 nsh = nacl_shmem(); 770 771 if (kvm_riscv_nacl_autoswap_csr_available()) { 772 hcntx->hstatus = 773 nacl_csr_read(nsh, CSR_HSTATUS); 774 nacl_scratch_write_long(nsh, 775 SBI_NACL_SHMEM_AUTOSWAP_OFFSET + 776 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS, 777 gcntx->hstatus); 778 nacl_scratch_write_long(nsh, 779 SBI_NACL_SHMEM_AUTOSWAP_OFFSET, 780 SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS); 781 } else if (kvm_riscv_nacl_sync_csr_available()) { 782 hcntx->hstatus = nacl_csr_swap(nsh, 783 CSR_HSTATUS, gcntx->hstatus); 784 } else { 785 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); 786 } 787 788 nacl_scratch_write_longs(nsh, 789 SBI_NACL_SHMEM_SRET_OFFSET + 790 SBI_NACL_SHMEM_SRET_X(1), 791 &gcntx->ra, 792 SBI_NACL_SHMEM_SRET_X_LAST); 793 794 __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL, 795 SBI_EXT_NACL_SYNC_SRET); 796 797 if (kvm_riscv_nacl_autoswap_csr_available()) { 798 nacl_scratch_write_long(nsh, 799 SBI_NACL_SHMEM_AUTOSWAP_OFFSET, 800 0); 801 gcntx->hstatus = nacl_scratch_read_long(nsh, 802 SBI_NACL_SHMEM_AUTOSWAP_OFFSET + 803 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS); 804 } else { 805 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); 806 } 807 808 trap->htval = nacl_csr_read(nsh, CSR_HTVAL); 809 trap->htinst = nacl_csr_read(nsh, CSR_HTINST); 810 } else { 811 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); 812 813 __kvm_riscv_switch_to(&vcpu->arch); 814 815 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); 816 817 trap->htval = csr_read(CSR_HTVAL); 818 trap->htinst = csr_read(CSR_HTINST); 819 } 820 821 trap->sepc = gcntx->sepc; 822 trap->scause = csr_read(CSR_SCAUSE); 823 trap->stval = csr_read(CSR_STVAL); 824 825 vcpu->arch.last_exit_cpu = vcpu->cpu; 826 guest_state_exit_irqoff(); 827 kvm_riscv_vcpu_swap_in_host_state(vcpu); 828 } 829 830 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 831 { 832 int ret; 833 struct kvm_cpu_trap trap; 834 struct kvm_run *run = vcpu->run; 835 836 if (!vcpu->arch.ran_atleast_once) 837 kvm_riscv_vcpu_config_ran_once(vcpu); 838 839 /* Mark this VCPU ran at least once */ 840 vcpu->arch.ran_atleast_once = true; 841 842 kvm_vcpu_srcu_read_lock(vcpu); 843 844 switch (run->exit_reason) { 845 case KVM_EXIT_MMIO: 846 /* Process MMIO value returned from user-space */ 847 ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run); 848 break; 849 case KVM_EXIT_RISCV_SBI: 850 /* Process SBI value returned from user-space */ 851 ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run); 852 break; 853 case KVM_EXIT_RISCV_CSR: 854 /* Process CSR value returned from user-space */ 855 ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run); 856 break; 857 default: 858 ret = 0; 859 break; 860 } 861 if (ret) { 862 kvm_vcpu_srcu_read_unlock(vcpu); 863 return ret; 864 } 865 866 if (!vcpu->wants_to_run) { 867 kvm_vcpu_srcu_read_unlock(vcpu); 868 return -EINTR; 869 } 870 871 vcpu_load(vcpu); 872 873 kvm_sigset_activate(vcpu); 874 875 ret = 1; 876 run->exit_reason = KVM_EXIT_UNKNOWN; 877 while (ret > 0) { 878 /* Check conditions before entering the guest */ 879 ret = kvm_xfer_to_guest_mode_handle_work(vcpu); 880 if (ret) 881 continue; 882 ret = 1; 883 884 kvm_riscv_gstage_vmid_update(vcpu); 885 886 ret = kvm_riscv_check_vcpu_requests(vcpu); 887 if (ret <= 0) 888 continue; 889 890 preempt_disable(); 891 892 /* Update AIA HW state before entering guest */ 893 ret = kvm_riscv_vcpu_aia_update(vcpu); 894 if (ret <= 0) { 895 preempt_enable(); 896 continue; 897 } 898 899 local_irq_disable(); 900 901 /* 902 * Ensure we set mode to IN_GUEST_MODE after we disable 903 * interrupts and before the final VCPU requests check. 904 * See the comment in kvm_vcpu_exiting_guest_mode() and 905 * Documentation/virt/kvm/vcpu-requests.rst 906 */ 907 vcpu->mode = IN_GUEST_MODE; 908 909 kvm_vcpu_srcu_read_unlock(vcpu); 910 smp_mb__after_srcu_read_unlock(); 911 912 /* 913 * We might have got VCPU interrupts updated asynchronously 914 * so update it in HW. 915 */ 916 kvm_riscv_vcpu_flush_interrupts(vcpu); 917 918 /* Update HVIP CSR for current CPU */ 919 kvm_riscv_update_hvip(vcpu); 920 921 if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || 922 kvm_request_pending(vcpu) || 923 xfer_to_guest_mode_work_pending()) { 924 vcpu->mode = OUTSIDE_GUEST_MODE; 925 local_irq_enable(); 926 preempt_enable(); 927 kvm_vcpu_srcu_read_lock(vcpu); 928 continue; 929 } 930 931 /* 932 * Sanitize VMID mappings cached (TLB) on current CPU 933 * 934 * Note: This should be done after G-stage VMID has been 935 * updated using kvm_riscv_gstage_vmid_ver_changed() 936 */ 937 kvm_riscv_local_tlb_sanitize(vcpu); 938 939 trace_kvm_entry(vcpu); 940 941 guest_timing_enter_irqoff(); 942 943 kvm_riscv_vcpu_enter_exit(vcpu, &trap); 944 945 vcpu->mode = OUTSIDE_GUEST_MODE; 946 vcpu->stat.exits++; 947 948 /* Syncup interrupts state with HW */ 949 kvm_riscv_vcpu_sync_interrupts(vcpu); 950 951 /* 952 * We must ensure that any pending interrupts are taken before 953 * we exit guest timing so that timer ticks are accounted as 954 * guest time. Transiently unmask interrupts so that any 955 * pending interrupts are taken. 956 * 957 * There's no barrier which ensures that pending interrupts are 958 * recognised, so we just hope that the CPU takes any pending 959 * interrupts between the enable and disable. 960 */ 961 local_irq_enable(); 962 local_irq_disable(); 963 964 guest_timing_exit_irqoff(); 965 966 local_irq_enable(); 967 968 trace_kvm_exit(&trap); 969 970 preempt_enable(); 971 972 kvm_vcpu_srcu_read_lock(vcpu); 973 974 ret = kvm_riscv_vcpu_exit(vcpu, run, &trap); 975 } 976 977 kvm_sigset_deactivate(vcpu); 978 979 vcpu_put(vcpu); 980 981 kvm_vcpu_srcu_read_unlock(vcpu); 982 983 return ret; 984 } 985