1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University 4 * Author: Christoffer Dall <c.dall@virtualopensystems.com> 5 */ 6 7 #include <linux/bug.h> 8 #include <linux/cpu_pm.h> 9 #include <linux/errno.h> 10 #include <linux/err.h> 11 #include <linux/kvm_host.h> 12 #include <linux/list.h> 13 #include <linux/module.h> 14 #include <linux/vmalloc.h> 15 #include <linux/fs.h> 16 #include <linux/mman.h> 17 #include <linux/sched.h> 18 #include <linux/kvm.h> 19 #include <linux/kvm_irqfd.h> 20 #include <linux/irqbypass.h> 21 #include <linux/sched/stat.h> 22 #include <trace/events/kvm.h> 23 24 #define CREATE_TRACE_POINTS 25 #include "trace_arm.h" 26 27 #include <linux/uaccess.h> 28 #include <asm/ptrace.h> 29 #include <asm/mman.h> 30 #include <asm/tlbflush.h> 31 #include <asm/cacheflush.h> 32 #include <asm/cpufeature.h> 33 #include <asm/virt.h> 34 #include <asm/kvm_arm.h> 35 #include <asm/kvm_asm.h> 36 #include <asm/kvm_mmu.h> 37 #include <asm/kvm_emulate.h> 38 #include <asm/kvm_coproc.h> 39 #include <asm/sections.h> 40 41 #include <kvm/arm_hypercalls.h> 42 #include <kvm/arm_pmu.h> 43 #include <kvm/arm_psci.h> 44 45 #ifdef REQUIRES_VIRT 46 __asm__(".arch_extension virt"); 47 #endif 48 49 DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); 50 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); 51 52 /* The VMID used in the VTTBR */ 53 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); 54 static u32 kvm_next_vmid; 55 static DEFINE_SPINLOCK(kvm_vmid_lock); 56 57 static bool vgic_present; 58 59 static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); 60 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); 61 62 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 63 { 64 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 65 } 66 67 int kvm_arch_hardware_setup(void *opaque) 68 { 69 return 0; 70 } 71 72 int kvm_arch_check_processor_compat(void *opaque) 73 { 74 return 0; 75 } 76 77 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, 78 struct kvm_enable_cap *cap) 79 { 80 int r; 81 82 if (cap->flags) 83 return -EINVAL; 84 85 switch (cap->cap) { 86 case KVM_CAP_ARM_NISV_TO_USER: 87 r = 0; 88 kvm->arch.return_nisv_io_abort_to_user = true; 89 break; 90 default: 91 r = -EINVAL; 92 break; 93 } 94 95 return r; 96 } 97 98 static int kvm_arm_default_max_vcpus(void) 99 { 100 return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; 101 } 102 103 /** 104 * kvm_arch_init_vm - initializes a VM data structure 105 * @kvm: pointer to the KVM struct 106 */ 107 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 108 { 109 int ret, cpu; 110 111 ret = kvm_arm_setup_stage2(kvm, type); 112 if (ret) 113 return ret; 114 115 kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); 116 if (!kvm->arch.last_vcpu_ran) 117 return -ENOMEM; 118 119 for_each_possible_cpu(cpu) 120 *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; 121 122 ret = kvm_alloc_stage2_pgd(kvm); 123 if (ret) 124 goto out_fail_alloc; 125 126 ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); 127 if (ret) 128 goto out_free_stage2_pgd; 129 130 kvm_vgic_early_init(kvm); 131 132 /* Mark the initial VMID generation invalid */ 133 kvm->arch.vmid.vmid_gen = 0; 134 135 /* The maximum number of VCPUs is limited by the host's GIC model */ 136 kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); 137 138 return ret; 139 out_free_stage2_pgd: 140 kvm_free_stage2_pgd(kvm); 141 out_fail_alloc: 142 free_percpu(kvm->arch.last_vcpu_ran); 143 kvm->arch.last_vcpu_ran = NULL; 144 return ret; 145 } 146 147 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 148 { 149 return VM_FAULT_SIGBUS; 150 } 151 152 153 /** 154 * kvm_arch_destroy_vm - destroy the VM data structure 155 * @kvm: pointer to the KVM struct 156 */ 157 void kvm_arch_destroy_vm(struct kvm *kvm) 158 { 159 int i; 160 161 kvm_vgic_destroy(kvm); 162 163 free_percpu(kvm->arch.last_vcpu_ran); 164 kvm->arch.last_vcpu_ran = NULL; 165 166 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 167 if (kvm->vcpus[i]) { 168 kvm_vcpu_destroy(kvm->vcpus[i]); 169 kvm->vcpus[i] = NULL; 170 } 171 } 172 atomic_set(&kvm->online_vcpus, 0); 173 } 174 175 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 176 { 177 int r; 178 switch (ext) { 179 case KVM_CAP_IRQCHIP: 180 r = vgic_present; 181 break; 182 case KVM_CAP_IOEVENTFD: 183 case KVM_CAP_DEVICE_CTRL: 184 case KVM_CAP_USER_MEMORY: 185 case KVM_CAP_SYNC_MMU: 186 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 187 case KVM_CAP_ONE_REG: 188 case KVM_CAP_ARM_PSCI: 189 case KVM_CAP_ARM_PSCI_0_2: 190 case KVM_CAP_READONLY_MEM: 191 case KVM_CAP_MP_STATE: 192 case KVM_CAP_IMMEDIATE_EXIT: 193 case KVM_CAP_VCPU_EVENTS: 194 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: 195 case KVM_CAP_ARM_NISV_TO_USER: 196 case KVM_CAP_ARM_INJECT_EXT_DABT: 197 r = 1; 198 break; 199 case KVM_CAP_ARM_SET_DEVICE_ADDR: 200 r = 1; 201 break; 202 case KVM_CAP_NR_VCPUS: 203 r = num_online_cpus(); 204 break; 205 case KVM_CAP_MAX_VCPUS: 206 case KVM_CAP_MAX_VCPU_ID: 207 if (kvm) 208 r = kvm->arch.max_vcpus; 209 else 210 r = kvm_arm_default_max_vcpus(); 211 break; 212 case KVM_CAP_MSI_DEVID: 213 if (!kvm) 214 r = -EINVAL; 215 else 216 r = kvm->arch.vgic.msis_require_devid; 217 break; 218 case KVM_CAP_ARM_USER_IRQ: 219 /* 220 * 1: EL1_VTIMER, EL1_PTIMER, and PMU. 221 * (bump this number if adding more devices) 222 */ 223 r = 1; 224 break; 225 default: 226 r = kvm_arch_vm_ioctl_check_extension(kvm, ext); 227 break; 228 } 229 return r; 230 } 231 232 long kvm_arch_dev_ioctl(struct file *filp, 233 unsigned int ioctl, unsigned long arg) 234 { 235 return -EINVAL; 236 } 237 238 struct kvm *kvm_arch_alloc_vm(void) 239 { 240 if (!has_vhe()) 241 return kzalloc(sizeof(struct kvm), GFP_KERNEL); 242 243 return vzalloc(sizeof(struct kvm)); 244 } 245 246 void kvm_arch_free_vm(struct kvm *kvm) 247 { 248 if (!has_vhe()) 249 kfree(kvm); 250 else 251 vfree(kvm); 252 } 253 254 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 255 { 256 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) 257 return -EBUSY; 258 259 if (id >= kvm->arch.max_vcpus) 260 return -EINVAL; 261 262 return 0; 263 } 264 265 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 266 { 267 int err; 268 269 /* Force users to call KVM_ARM_VCPU_INIT */ 270 vcpu->arch.target = -1; 271 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); 272 273 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; 274 275 /* Set up the timer */ 276 kvm_timer_vcpu_init(vcpu); 277 278 kvm_pmu_vcpu_init(vcpu); 279 280 kvm_arm_reset_debug_ptr(vcpu); 281 282 kvm_arm_pvtime_vcpu_init(&vcpu->arch); 283 284 err = kvm_vgic_vcpu_init(vcpu); 285 if (err) 286 return err; 287 288 return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); 289 } 290 291 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 292 { 293 } 294 295 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 296 { 297 if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) 298 static_branch_dec(&userspace_irqchip_in_use); 299 300 kvm_mmu_free_memory_caches(vcpu); 301 kvm_timer_vcpu_terminate(vcpu); 302 kvm_pmu_vcpu_destroy(vcpu); 303 304 kvm_arm_vcpu_destroy(vcpu); 305 } 306 307 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 308 { 309 return kvm_timer_is_pending(vcpu); 310 } 311 312 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 313 { 314 /* 315 * If we're about to block (most likely because we've just hit a 316 * WFI), we need to sync back the state of the GIC CPU interface 317 * so that we have the latest PMR and group enables. This ensures 318 * that kvm_arch_vcpu_runnable has up-to-date data to decide 319 * whether we have pending interrupts. 320 * 321 * For the same reason, we want to tell GICv4 that we need 322 * doorbells to be signalled, should an interrupt become pending. 323 */ 324 preempt_disable(); 325 kvm_vgic_vmcr_sync(vcpu); 326 vgic_v4_put(vcpu, true); 327 preempt_enable(); 328 } 329 330 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) 331 { 332 preempt_disable(); 333 vgic_v4_load(vcpu); 334 preempt_enable(); 335 } 336 337 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 338 { 339 int *last_ran; 340 341 last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); 342 343 /* 344 * We might get preempted before the vCPU actually runs, but 345 * over-invalidation doesn't affect correctness. 346 */ 347 if (*last_ran != vcpu->vcpu_id) { 348 kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); 349 *last_ran = vcpu->vcpu_id; 350 } 351 352 vcpu->cpu = cpu; 353 354 kvm_vgic_load(vcpu); 355 kvm_timer_vcpu_load(vcpu); 356 kvm_vcpu_load_sysregs(vcpu); 357 kvm_arch_vcpu_load_fp(vcpu); 358 kvm_vcpu_pmu_restore_guest(vcpu); 359 if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) 360 kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); 361 362 if (single_task_running()) 363 vcpu_clear_wfx_traps(vcpu); 364 else 365 vcpu_set_wfx_traps(vcpu); 366 367 if (vcpu_has_ptrauth(vcpu)) 368 vcpu_ptrauth_disable(vcpu); 369 } 370 371 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 372 { 373 kvm_arch_vcpu_put_fp(vcpu); 374 kvm_vcpu_put_sysregs(vcpu); 375 kvm_timer_vcpu_put(vcpu); 376 kvm_vgic_put(vcpu); 377 kvm_vcpu_pmu_restore_host(vcpu); 378 379 vcpu->cpu = -1; 380 } 381 382 static void vcpu_power_off(struct kvm_vcpu *vcpu) 383 { 384 vcpu->arch.power_off = true; 385 kvm_make_request(KVM_REQ_SLEEP, vcpu); 386 kvm_vcpu_kick(vcpu); 387 } 388 389 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 390 struct kvm_mp_state *mp_state) 391 { 392 if (vcpu->arch.power_off) 393 mp_state->mp_state = KVM_MP_STATE_STOPPED; 394 else 395 mp_state->mp_state = KVM_MP_STATE_RUNNABLE; 396 397 return 0; 398 } 399 400 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 401 struct kvm_mp_state *mp_state) 402 { 403 int ret = 0; 404 405 switch (mp_state->mp_state) { 406 case KVM_MP_STATE_RUNNABLE: 407 vcpu->arch.power_off = false; 408 break; 409 case KVM_MP_STATE_STOPPED: 410 vcpu_power_off(vcpu); 411 break; 412 default: 413 ret = -EINVAL; 414 } 415 416 return ret; 417 } 418 419 /** 420 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled 421 * @v: The VCPU pointer 422 * 423 * If the guest CPU is not waiting for interrupts or an interrupt line is 424 * asserted, the CPU is by definition runnable. 425 */ 426 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 427 { 428 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); 429 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) 430 && !v->arch.power_off && !v->arch.pause); 431 } 432 433 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 434 { 435 return vcpu_mode_priv(vcpu); 436 } 437 438 /* Just ensure a guest exit from a particular CPU */ 439 static void exit_vm_noop(void *info) 440 { 441 } 442 443 void force_vm_exit(const cpumask_t *mask) 444 { 445 preempt_disable(); 446 smp_call_function_many(mask, exit_vm_noop, NULL, true); 447 preempt_enable(); 448 } 449 450 /** 451 * need_new_vmid_gen - check that the VMID is still valid 452 * @vmid: The VMID to check 453 * 454 * return true if there is a new generation of VMIDs being used 455 * 456 * The hardware supports a limited set of values with the value zero reserved 457 * for the host, so we check if an assigned value belongs to a previous 458 * generation, which requires us to assign a new value. If we're the first to 459 * use a VMID for the new generation, we must flush necessary caches and TLBs 460 * on all CPUs. 461 */ 462 static bool need_new_vmid_gen(struct kvm_vmid *vmid) 463 { 464 u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); 465 smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ 466 return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); 467 } 468 469 /** 470 * update_vmid - Update the vmid with a valid VMID for the current generation 471 * @kvm: The guest that struct vmid belongs to 472 * @vmid: The stage-2 VMID information struct 473 */ 474 static void update_vmid(struct kvm_vmid *vmid) 475 { 476 if (!need_new_vmid_gen(vmid)) 477 return; 478 479 spin_lock(&kvm_vmid_lock); 480 481 /* 482 * We need to re-check the vmid_gen here to ensure that if another vcpu 483 * already allocated a valid vmid for this vm, then this vcpu should 484 * use the same vmid. 485 */ 486 if (!need_new_vmid_gen(vmid)) { 487 spin_unlock(&kvm_vmid_lock); 488 return; 489 } 490 491 /* First user of a new VMID generation? */ 492 if (unlikely(kvm_next_vmid == 0)) { 493 atomic64_inc(&kvm_vmid_gen); 494 kvm_next_vmid = 1; 495 496 /* 497 * On SMP we know no other CPUs can use this CPU's or each 498 * other's VMID after force_vm_exit returns since the 499 * kvm_vmid_lock blocks them from reentry to the guest. 500 */ 501 force_vm_exit(cpu_all_mask); 502 /* 503 * Now broadcast TLB + ICACHE invalidation over the inner 504 * shareable domain to make sure all data structures are 505 * clean. 506 */ 507 kvm_call_hyp(__kvm_flush_vm_context); 508 } 509 510 vmid->vmid = kvm_next_vmid; 511 kvm_next_vmid++; 512 kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; 513 514 smp_wmb(); 515 WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); 516 517 spin_unlock(&kvm_vmid_lock); 518 } 519 520 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) 521 { 522 struct kvm *kvm = vcpu->kvm; 523 int ret = 0; 524 525 if (likely(vcpu->arch.has_run_once)) 526 return 0; 527 528 if (!kvm_arm_vcpu_is_finalized(vcpu)) 529 return -EPERM; 530 531 vcpu->arch.has_run_once = true; 532 533 if (likely(irqchip_in_kernel(kvm))) { 534 /* 535 * Map the VGIC hardware resources before running a vcpu the 536 * first time on this VM. 537 */ 538 if (unlikely(!vgic_ready(kvm))) { 539 ret = kvm_vgic_map_resources(kvm); 540 if (ret) 541 return ret; 542 } 543 } else { 544 /* 545 * Tell the rest of the code that there are userspace irqchip 546 * VMs in the wild. 547 */ 548 static_branch_inc(&userspace_irqchip_in_use); 549 } 550 551 ret = kvm_timer_enable(vcpu); 552 if (ret) 553 return ret; 554 555 ret = kvm_arm_pmu_v3_enable(vcpu); 556 557 return ret; 558 } 559 560 bool kvm_arch_intc_initialized(struct kvm *kvm) 561 { 562 return vgic_initialized(kvm); 563 } 564 565 void kvm_arm_halt_guest(struct kvm *kvm) 566 { 567 int i; 568 struct kvm_vcpu *vcpu; 569 570 kvm_for_each_vcpu(i, vcpu, kvm) 571 vcpu->arch.pause = true; 572 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); 573 } 574 575 void kvm_arm_resume_guest(struct kvm *kvm) 576 { 577 int i; 578 struct kvm_vcpu *vcpu; 579 580 kvm_for_each_vcpu(i, vcpu, kvm) { 581 vcpu->arch.pause = false; 582 rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); 583 } 584 } 585 586 static void vcpu_req_sleep(struct kvm_vcpu *vcpu) 587 { 588 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 589 590 rcuwait_wait_event(wait, 591 (!vcpu->arch.power_off) &&(!vcpu->arch.pause), 592 TASK_INTERRUPTIBLE); 593 594 if (vcpu->arch.power_off || vcpu->arch.pause) { 595 /* Awaken to handle a signal, request we sleep again later. */ 596 kvm_make_request(KVM_REQ_SLEEP, vcpu); 597 } 598 599 /* 600 * Make sure we will observe a potential reset request if we've 601 * observed a change to the power state. Pairs with the smp_wmb() in 602 * kvm_psci_vcpu_on(). 603 */ 604 smp_rmb(); 605 } 606 607 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) 608 { 609 return vcpu->arch.target >= 0; 610 } 611 612 static void check_vcpu_requests(struct kvm_vcpu *vcpu) 613 { 614 if (kvm_request_pending(vcpu)) { 615 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) 616 vcpu_req_sleep(vcpu); 617 618 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 619 kvm_reset_vcpu(vcpu); 620 621 /* 622 * Clear IRQ_PENDING requests that were made to guarantee 623 * that a VCPU sees new virtual interrupts. 624 */ 625 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); 626 627 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) 628 kvm_update_stolen_time(vcpu); 629 630 if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) { 631 /* The distributor enable bits were changed */ 632 preempt_disable(); 633 vgic_v4_put(vcpu, false); 634 vgic_v4_load(vcpu); 635 preempt_enable(); 636 } 637 } 638 } 639 640 /** 641 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code 642 * @vcpu: The VCPU pointer 643 * 644 * This function is called through the VCPU_RUN ioctl called from user space. It 645 * will execute VM code in a loop until the time slice for the process is used 646 * or some emulation is needed from user space in which case the function will 647 * return with return value 0 and with the kvm_run structure filled in with the 648 * required data for the requested emulation. 649 */ 650 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 651 { 652 struct kvm_run *run = vcpu->run; 653 int ret; 654 655 if (unlikely(!kvm_vcpu_initialized(vcpu))) 656 return -ENOEXEC; 657 658 ret = kvm_vcpu_first_run_init(vcpu); 659 if (ret) 660 return ret; 661 662 if (run->exit_reason == KVM_EXIT_MMIO) { 663 ret = kvm_handle_mmio_return(vcpu); 664 if (ret) 665 return ret; 666 } 667 668 if (run->immediate_exit) 669 return -EINTR; 670 671 vcpu_load(vcpu); 672 673 kvm_sigset_activate(vcpu); 674 675 ret = 1; 676 run->exit_reason = KVM_EXIT_UNKNOWN; 677 while (ret > 0) { 678 /* 679 * Check conditions before entering the guest 680 */ 681 cond_resched(); 682 683 update_vmid(&vcpu->kvm->arch.vmid); 684 685 check_vcpu_requests(vcpu); 686 687 /* 688 * Preparing the interrupts to be injected also 689 * involves poking the GIC, which must be done in a 690 * non-preemptible context. 691 */ 692 preempt_disable(); 693 694 kvm_pmu_flush_hwstate(vcpu); 695 696 local_irq_disable(); 697 698 kvm_vgic_flush_hwstate(vcpu); 699 700 /* 701 * Exit if we have a signal pending so that we can deliver the 702 * signal to user space. 703 */ 704 if (signal_pending(current)) { 705 ret = -EINTR; 706 run->exit_reason = KVM_EXIT_INTR; 707 } 708 709 /* 710 * If we're using a userspace irqchip, then check if we need 711 * to tell a userspace irqchip about timer or PMU level 712 * changes and if so, exit to userspace (the actual level 713 * state gets updated in kvm_timer_update_run and 714 * kvm_pmu_update_run below). 715 */ 716 if (static_branch_unlikely(&userspace_irqchip_in_use)) { 717 if (kvm_timer_should_notify_user(vcpu) || 718 kvm_pmu_should_notify_user(vcpu)) { 719 ret = -EINTR; 720 run->exit_reason = KVM_EXIT_INTR; 721 } 722 } 723 724 /* 725 * Ensure we set mode to IN_GUEST_MODE after we disable 726 * interrupts and before the final VCPU requests check. 727 * See the comment in kvm_vcpu_exiting_guest_mode() and 728 * Documentation/virt/kvm/vcpu-requests.rst 729 */ 730 smp_store_mb(vcpu->mode, IN_GUEST_MODE); 731 732 if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || 733 kvm_request_pending(vcpu)) { 734 vcpu->mode = OUTSIDE_GUEST_MODE; 735 isb(); /* Ensure work in x_flush_hwstate is committed */ 736 kvm_pmu_sync_hwstate(vcpu); 737 if (static_branch_unlikely(&userspace_irqchip_in_use)) 738 kvm_timer_sync_hwstate(vcpu); 739 kvm_vgic_sync_hwstate(vcpu); 740 local_irq_enable(); 741 preempt_enable(); 742 continue; 743 } 744 745 kvm_arm_setup_debug(vcpu); 746 747 /************************************************************** 748 * Enter the guest 749 */ 750 trace_kvm_entry(*vcpu_pc(vcpu)); 751 guest_enter_irqoff(); 752 753 if (has_vhe()) { 754 ret = kvm_vcpu_run_vhe(vcpu); 755 } else { 756 ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); 757 } 758 759 vcpu->mode = OUTSIDE_GUEST_MODE; 760 vcpu->stat.exits++; 761 /* 762 * Back from guest 763 *************************************************************/ 764 765 kvm_arm_clear_debug(vcpu); 766 767 /* 768 * We must sync the PMU state before the vgic state so 769 * that the vgic can properly sample the updated state of the 770 * interrupt line. 771 */ 772 kvm_pmu_sync_hwstate(vcpu); 773 774 /* 775 * Sync the vgic state before syncing the timer state because 776 * the timer code needs to know if the virtual timer 777 * interrupts are active. 778 */ 779 kvm_vgic_sync_hwstate(vcpu); 780 781 /* 782 * Sync the timer hardware state before enabling interrupts as 783 * we don't want vtimer interrupts to race with syncing the 784 * timer virtual interrupt state. 785 */ 786 if (static_branch_unlikely(&userspace_irqchip_in_use)) 787 kvm_timer_sync_hwstate(vcpu); 788 789 kvm_arch_vcpu_ctxsync_fp(vcpu); 790 791 /* 792 * We may have taken a host interrupt in HYP mode (ie 793 * while executing the guest). This interrupt is still 794 * pending, as we haven't serviced it yet! 795 * 796 * We're now back in SVC mode, with interrupts 797 * disabled. Enabling the interrupts now will have 798 * the effect of taking the interrupt again, in SVC 799 * mode this time. 800 */ 801 local_irq_enable(); 802 803 /* 804 * We do local_irq_enable() before calling guest_exit() so 805 * that if a timer interrupt hits while running the guest we 806 * account that tick as being spent in the guest. We enable 807 * preemption after calling guest_exit() so that if we get 808 * preempted we make sure ticks after that is not counted as 809 * guest time. 810 */ 811 guest_exit(); 812 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 813 814 /* Exit types that need handling before we can be preempted */ 815 handle_exit_early(vcpu, ret); 816 817 preempt_enable(); 818 819 ret = handle_exit(vcpu, ret); 820 } 821 822 /* Tell userspace about in-kernel device output levels */ 823 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { 824 kvm_timer_update_run(vcpu); 825 kvm_pmu_update_run(vcpu); 826 } 827 828 kvm_sigset_deactivate(vcpu); 829 830 vcpu_put(vcpu); 831 return ret; 832 } 833 834 static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) 835 { 836 int bit_index; 837 bool set; 838 unsigned long *hcr; 839 840 if (number == KVM_ARM_IRQ_CPU_IRQ) 841 bit_index = __ffs(HCR_VI); 842 else /* KVM_ARM_IRQ_CPU_FIQ */ 843 bit_index = __ffs(HCR_VF); 844 845 hcr = vcpu_hcr(vcpu); 846 if (level) 847 set = test_and_set_bit(bit_index, hcr); 848 else 849 set = test_and_clear_bit(bit_index, hcr); 850 851 /* 852 * If we didn't change anything, no need to wake up or kick other CPUs 853 */ 854 if (set == level) 855 return 0; 856 857 /* 858 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and 859 * trigger a world-switch round on the running physical CPU to set the 860 * virtual IRQ/FIQ fields in the HCR appropriately. 861 */ 862 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 863 kvm_vcpu_kick(vcpu); 864 865 return 0; 866 } 867 868 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, 869 bool line_status) 870 { 871 u32 irq = irq_level->irq; 872 unsigned int irq_type, vcpu_idx, irq_num; 873 int nrcpus = atomic_read(&kvm->online_vcpus); 874 struct kvm_vcpu *vcpu = NULL; 875 bool level = irq_level->level; 876 877 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; 878 vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; 879 vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); 880 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; 881 882 trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); 883 884 switch (irq_type) { 885 case KVM_ARM_IRQ_TYPE_CPU: 886 if (irqchip_in_kernel(kvm)) 887 return -ENXIO; 888 889 if (vcpu_idx >= nrcpus) 890 return -EINVAL; 891 892 vcpu = kvm_get_vcpu(kvm, vcpu_idx); 893 if (!vcpu) 894 return -EINVAL; 895 896 if (irq_num > KVM_ARM_IRQ_CPU_FIQ) 897 return -EINVAL; 898 899 return vcpu_interrupt_line(vcpu, irq_num, level); 900 case KVM_ARM_IRQ_TYPE_PPI: 901 if (!irqchip_in_kernel(kvm)) 902 return -ENXIO; 903 904 if (vcpu_idx >= nrcpus) 905 return -EINVAL; 906 907 vcpu = kvm_get_vcpu(kvm, vcpu_idx); 908 if (!vcpu) 909 return -EINVAL; 910 911 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) 912 return -EINVAL; 913 914 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); 915 case KVM_ARM_IRQ_TYPE_SPI: 916 if (!irqchip_in_kernel(kvm)) 917 return -ENXIO; 918 919 if (irq_num < VGIC_NR_PRIVATE_IRQS) 920 return -EINVAL; 921 922 return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); 923 } 924 925 return -EINVAL; 926 } 927 928 static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 929 const struct kvm_vcpu_init *init) 930 { 931 unsigned int i, ret; 932 int phys_target = kvm_target_cpu(); 933 934 if (init->target != phys_target) 935 return -EINVAL; 936 937 /* 938 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must 939 * use the same target. 940 */ 941 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) 942 return -EINVAL; 943 944 /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ 945 for (i = 0; i < sizeof(init->features) * 8; i++) { 946 bool set = (init->features[i / 32] & (1 << (i % 32))); 947 948 if (set && i >= KVM_VCPU_MAX_FEATURES) 949 return -ENOENT; 950 951 /* 952 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must 953 * use the same feature set. 954 */ 955 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && 956 test_bit(i, vcpu->arch.features) != set) 957 return -EINVAL; 958 959 if (set) 960 set_bit(i, vcpu->arch.features); 961 } 962 963 vcpu->arch.target = phys_target; 964 965 /* Now we know what it is, we can reset it. */ 966 ret = kvm_reset_vcpu(vcpu); 967 if (ret) { 968 vcpu->arch.target = -1; 969 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); 970 } 971 972 return ret; 973 } 974 975 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, 976 struct kvm_vcpu_init *init) 977 { 978 int ret; 979 980 ret = kvm_vcpu_set_target(vcpu, init); 981 if (ret) 982 return ret; 983 984 /* 985 * Ensure a rebooted VM will fault in RAM pages and detect if the 986 * guest MMU is turned off and flush the caches as needed. 987 * 988 * S2FWB enforces all memory accesses to RAM being cacheable, 989 * ensuring that the data side is always coherent. We still 990 * need to invalidate the I-cache though, as FWB does *not* 991 * imply CTR_EL0.DIC. 992 */ 993 if (vcpu->arch.has_run_once) { 994 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) 995 stage2_unmap_vm(vcpu->kvm); 996 else 997 __flush_icache_all(); 998 } 999 1000 vcpu_reset_hcr(vcpu); 1001 1002 /* 1003 * Handle the "start in power-off" case. 1004 */ 1005 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) 1006 vcpu_power_off(vcpu); 1007 else 1008 vcpu->arch.power_off = false; 1009 1010 return 0; 1011 } 1012 1013 static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, 1014 struct kvm_device_attr *attr) 1015 { 1016 int ret = -ENXIO; 1017 1018 switch (attr->group) { 1019 default: 1020 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); 1021 break; 1022 } 1023 1024 return ret; 1025 } 1026 1027 static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, 1028 struct kvm_device_attr *attr) 1029 { 1030 int ret = -ENXIO; 1031 1032 switch (attr->group) { 1033 default: 1034 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); 1035 break; 1036 } 1037 1038 return ret; 1039 } 1040 1041 static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, 1042 struct kvm_device_attr *attr) 1043 { 1044 int ret = -ENXIO; 1045 1046 switch (attr->group) { 1047 default: 1048 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); 1049 break; 1050 } 1051 1052 return ret; 1053 } 1054 1055 static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, 1056 struct kvm_vcpu_events *events) 1057 { 1058 memset(events, 0, sizeof(*events)); 1059 1060 return __kvm_arm_vcpu_get_events(vcpu, events); 1061 } 1062 1063 static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, 1064 struct kvm_vcpu_events *events) 1065 { 1066 int i; 1067 1068 /* check whether the reserved field is zero */ 1069 for (i = 0; i < ARRAY_SIZE(events->reserved); i++) 1070 if (events->reserved[i]) 1071 return -EINVAL; 1072 1073 /* check whether the pad field is zero */ 1074 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) 1075 if (events->exception.pad[i]) 1076 return -EINVAL; 1077 1078 return __kvm_arm_vcpu_set_events(vcpu, events); 1079 } 1080 1081 long kvm_arch_vcpu_ioctl(struct file *filp, 1082 unsigned int ioctl, unsigned long arg) 1083 { 1084 struct kvm_vcpu *vcpu = filp->private_data; 1085 void __user *argp = (void __user *)arg; 1086 struct kvm_device_attr attr; 1087 long r; 1088 1089 switch (ioctl) { 1090 case KVM_ARM_VCPU_INIT: { 1091 struct kvm_vcpu_init init; 1092 1093 r = -EFAULT; 1094 if (copy_from_user(&init, argp, sizeof(init))) 1095 break; 1096 1097 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); 1098 break; 1099 } 1100 case KVM_SET_ONE_REG: 1101 case KVM_GET_ONE_REG: { 1102 struct kvm_one_reg reg; 1103 1104 r = -ENOEXEC; 1105 if (unlikely(!kvm_vcpu_initialized(vcpu))) 1106 break; 1107 1108 r = -EFAULT; 1109 if (copy_from_user(®, argp, sizeof(reg))) 1110 break; 1111 1112 if (ioctl == KVM_SET_ONE_REG) 1113 r = kvm_arm_set_reg(vcpu, ®); 1114 else 1115 r = kvm_arm_get_reg(vcpu, ®); 1116 break; 1117 } 1118 case KVM_GET_REG_LIST: { 1119 struct kvm_reg_list __user *user_list = argp; 1120 struct kvm_reg_list reg_list; 1121 unsigned n; 1122 1123 r = -ENOEXEC; 1124 if (unlikely(!kvm_vcpu_initialized(vcpu))) 1125 break; 1126 1127 r = -EPERM; 1128 if (!kvm_arm_vcpu_is_finalized(vcpu)) 1129 break; 1130 1131 r = -EFAULT; 1132 if (copy_from_user(®_list, user_list, sizeof(reg_list))) 1133 break; 1134 n = reg_list.n; 1135 reg_list.n = kvm_arm_num_regs(vcpu); 1136 if (copy_to_user(user_list, ®_list, sizeof(reg_list))) 1137 break; 1138 r = -E2BIG; 1139 if (n < reg_list.n) 1140 break; 1141 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); 1142 break; 1143 } 1144 case KVM_SET_DEVICE_ATTR: { 1145 r = -EFAULT; 1146 if (copy_from_user(&attr, argp, sizeof(attr))) 1147 break; 1148 r = kvm_arm_vcpu_set_attr(vcpu, &attr); 1149 break; 1150 } 1151 case KVM_GET_DEVICE_ATTR: { 1152 r = -EFAULT; 1153 if (copy_from_user(&attr, argp, sizeof(attr))) 1154 break; 1155 r = kvm_arm_vcpu_get_attr(vcpu, &attr); 1156 break; 1157 } 1158 case KVM_HAS_DEVICE_ATTR: { 1159 r = -EFAULT; 1160 if (copy_from_user(&attr, argp, sizeof(attr))) 1161 break; 1162 r = kvm_arm_vcpu_has_attr(vcpu, &attr); 1163 break; 1164 } 1165 case KVM_GET_VCPU_EVENTS: { 1166 struct kvm_vcpu_events events; 1167 1168 if (kvm_arm_vcpu_get_events(vcpu, &events)) 1169 return -EINVAL; 1170 1171 if (copy_to_user(argp, &events, sizeof(events))) 1172 return -EFAULT; 1173 1174 return 0; 1175 } 1176 case KVM_SET_VCPU_EVENTS: { 1177 struct kvm_vcpu_events events; 1178 1179 if (copy_from_user(&events, argp, sizeof(events))) 1180 return -EFAULT; 1181 1182 return kvm_arm_vcpu_set_events(vcpu, &events); 1183 } 1184 case KVM_ARM_VCPU_FINALIZE: { 1185 int what; 1186 1187 if (!kvm_vcpu_initialized(vcpu)) 1188 return -ENOEXEC; 1189 1190 if (get_user(what, (const int __user *)argp)) 1191 return -EFAULT; 1192 1193 return kvm_arm_vcpu_finalize(vcpu, what); 1194 } 1195 default: 1196 r = -EINVAL; 1197 } 1198 1199 return r; 1200 } 1201 1202 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 1203 { 1204 1205 } 1206 1207 void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, 1208 struct kvm_memory_slot *memslot) 1209 { 1210 kvm_flush_remote_tlbs(kvm); 1211 } 1212 1213 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, 1214 struct kvm_arm_device_addr *dev_addr) 1215 { 1216 unsigned long dev_id, type; 1217 1218 dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >> 1219 KVM_ARM_DEVICE_ID_SHIFT; 1220 type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >> 1221 KVM_ARM_DEVICE_TYPE_SHIFT; 1222 1223 switch (dev_id) { 1224 case KVM_ARM_DEVICE_VGIC_V2: 1225 if (!vgic_present) 1226 return -ENXIO; 1227 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); 1228 default: 1229 return -ENODEV; 1230 } 1231 } 1232 1233 long kvm_arch_vm_ioctl(struct file *filp, 1234 unsigned int ioctl, unsigned long arg) 1235 { 1236 struct kvm *kvm = filp->private_data; 1237 void __user *argp = (void __user *)arg; 1238 1239 switch (ioctl) { 1240 case KVM_CREATE_IRQCHIP: { 1241 int ret; 1242 if (!vgic_present) 1243 return -ENXIO; 1244 mutex_lock(&kvm->lock); 1245 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); 1246 mutex_unlock(&kvm->lock); 1247 return ret; 1248 } 1249 case KVM_ARM_SET_DEVICE_ADDR: { 1250 struct kvm_arm_device_addr dev_addr; 1251 1252 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) 1253 return -EFAULT; 1254 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); 1255 } 1256 case KVM_ARM_PREFERRED_TARGET: { 1257 int err; 1258 struct kvm_vcpu_init init; 1259 1260 err = kvm_vcpu_preferred_target(&init); 1261 if (err) 1262 return err; 1263 1264 if (copy_to_user(argp, &init, sizeof(init))) 1265 return -EFAULT; 1266 1267 return 0; 1268 } 1269 default: 1270 return -EINVAL; 1271 } 1272 } 1273 1274 static void cpu_init_hyp_mode(void) 1275 { 1276 phys_addr_t pgd_ptr; 1277 unsigned long hyp_stack_ptr; 1278 unsigned long vector_ptr; 1279 unsigned long tpidr_el2; 1280 1281 /* Switch from the HYP stub to our own HYP init vector */ 1282 __hyp_set_vectors(kvm_get_idmap_vector()); 1283 1284 /* 1285 * Calculate the raw per-cpu offset without a translation from the 1286 * kernel's mapping to the linear mapping, and store it in tpidr_el2 1287 * so that we can use adr_l to access per-cpu variables in EL2. 1288 */ 1289 tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - 1290 (unsigned long)kvm_ksym_ref(kvm_host_data)); 1291 1292 pgd_ptr = kvm_mmu_get_httbr(); 1293 hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; 1294 vector_ptr = (unsigned long)kvm_get_hyp_vector(); 1295 1296 /* 1297 * Call initialization code, and switch to the full blown HYP code. 1298 * If the cpucaps haven't been finalized yet, something has gone very 1299 * wrong, and hyp will crash and burn when it uses any 1300 * cpus_have_const_cap() wrapper. 1301 */ 1302 BUG_ON(!system_capabilities_finalized()); 1303 __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); 1304 1305 /* 1306 * Disabling SSBD on a non-VHE system requires us to enable SSBS 1307 * at EL2. 1308 */ 1309 if (this_cpu_has_cap(ARM64_SSBS) && 1310 arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { 1311 kvm_call_hyp(__kvm_enable_ssbs); 1312 } 1313 } 1314 1315 static void cpu_hyp_reset(void) 1316 { 1317 if (!is_kernel_in_hyp_mode()) 1318 __hyp_reset_vectors(); 1319 } 1320 1321 static void cpu_hyp_reinit(void) 1322 { 1323 kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt); 1324 1325 cpu_hyp_reset(); 1326 1327 if (is_kernel_in_hyp_mode()) 1328 kvm_timer_init_vhe(); 1329 else 1330 cpu_init_hyp_mode(); 1331 1332 kvm_arm_init_debug(); 1333 1334 if (vgic_present) 1335 kvm_vgic_init_cpu_hardware(); 1336 } 1337 1338 static void _kvm_arch_hardware_enable(void *discard) 1339 { 1340 if (!__this_cpu_read(kvm_arm_hardware_enabled)) { 1341 cpu_hyp_reinit(); 1342 __this_cpu_write(kvm_arm_hardware_enabled, 1); 1343 } 1344 } 1345 1346 int kvm_arch_hardware_enable(void) 1347 { 1348 _kvm_arch_hardware_enable(NULL); 1349 return 0; 1350 } 1351 1352 static void _kvm_arch_hardware_disable(void *discard) 1353 { 1354 if (__this_cpu_read(kvm_arm_hardware_enabled)) { 1355 cpu_hyp_reset(); 1356 __this_cpu_write(kvm_arm_hardware_enabled, 0); 1357 } 1358 } 1359 1360 void kvm_arch_hardware_disable(void) 1361 { 1362 _kvm_arch_hardware_disable(NULL); 1363 } 1364 1365 #ifdef CONFIG_CPU_PM 1366 static int hyp_init_cpu_pm_notifier(struct notifier_block *self, 1367 unsigned long cmd, 1368 void *v) 1369 { 1370 /* 1371 * kvm_arm_hardware_enabled is left with its old value over 1372 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should 1373 * re-enable hyp. 1374 */ 1375 switch (cmd) { 1376 case CPU_PM_ENTER: 1377 if (__this_cpu_read(kvm_arm_hardware_enabled)) 1378 /* 1379 * don't update kvm_arm_hardware_enabled here 1380 * so that the hardware will be re-enabled 1381 * when we resume. See below. 1382 */ 1383 cpu_hyp_reset(); 1384 1385 return NOTIFY_OK; 1386 case CPU_PM_ENTER_FAILED: 1387 case CPU_PM_EXIT: 1388 if (__this_cpu_read(kvm_arm_hardware_enabled)) 1389 /* The hardware was enabled before suspend. */ 1390 cpu_hyp_reinit(); 1391 1392 return NOTIFY_OK; 1393 1394 default: 1395 return NOTIFY_DONE; 1396 } 1397 } 1398 1399 static struct notifier_block hyp_init_cpu_pm_nb = { 1400 .notifier_call = hyp_init_cpu_pm_notifier, 1401 }; 1402 1403 static void __init hyp_cpu_pm_init(void) 1404 { 1405 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); 1406 } 1407 static void __init hyp_cpu_pm_exit(void) 1408 { 1409 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); 1410 } 1411 #else 1412 static inline void hyp_cpu_pm_init(void) 1413 { 1414 } 1415 static inline void hyp_cpu_pm_exit(void) 1416 { 1417 } 1418 #endif 1419 1420 static int init_common_resources(void) 1421 { 1422 return kvm_set_ipa_limit(); 1423 } 1424 1425 static int init_subsystems(void) 1426 { 1427 int err = 0; 1428 1429 /* 1430 * Enable hardware so that subsystem initialisation can access EL2. 1431 */ 1432 on_each_cpu(_kvm_arch_hardware_enable, NULL, 1); 1433 1434 /* 1435 * Register CPU lower-power notifier 1436 */ 1437 hyp_cpu_pm_init(); 1438 1439 /* 1440 * Init HYP view of VGIC 1441 */ 1442 err = kvm_vgic_hyp_init(); 1443 switch (err) { 1444 case 0: 1445 vgic_present = true; 1446 break; 1447 case -ENODEV: 1448 case -ENXIO: 1449 vgic_present = false; 1450 err = 0; 1451 break; 1452 default: 1453 goto out; 1454 } 1455 1456 /* 1457 * Init HYP architected timer support 1458 */ 1459 err = kvm_timer_hyp_init(vgic_present); 1460 if (err) 1461 goto out; 1462 1463 kvm_perf_init(); 1464 kvm_coproc_table_init(); 1465 1466 out: 1467 on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); 1468 1469 return err; 1470 } 1471 1472 static void teardown_hyp_mode(void) 1473 { 1474 int cpu; 1475 1476 free_hyp_pgds(); 1477 for_each_possible_cpu(cpu) 1478 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); 1479 } 1480 1481 /** 1482 * Inits Hyp-mode on all online CPUs 1483 */ 1484 static int init_hyp_mode(void) 1485 { 1486 int cpu; 1487 int err = 0; 1488 1489 /* 1490 * Allocate Hyp PGD and setup Hyp identity mapping 1491 */ 1492 err = kvm_mmu_init(); 1493 if (err) 1494 goto out_err; 1495 1496 /* 1497 * Allocate stack pages for Hypervisor-mode 1498 */ 1499 for_each_possible_cpu(cpu) { 1500 unsigned long stack_page; 1501 1502 stack_page = __get_free_page(GFP_KERNEL); 1503 if (!stack_page) { 1504 err = -ENOMEM; 1505 goto out_err; 1506 } 1507 1508 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; 1509 } 1510 1511 /* 1512 * Map the Hyp-code called directly from the host 1513 */ 1514 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), 1515 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); 1516 if (err) { 1517 kvm_err("Cannot map world-switch code\n"); 1518 goto out_err; 1519 } 1520 1521 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), 1522 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); 1523 if (err) { 1524 kvm_err("Cannot map rodata section\n"); 1525 goto out_err; 1526 } 1527 1528 err = create_hyp_mappings(kvm_ksym_ref(__bss_start), 1529 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); 1530 if (err) { 1531 kvm_err("Cannot map bss section\n"); 1532 goto out_err; 1533 } 1534 1535 err = kvm_map_vectors(); 1536 if (err) { 1537 kvm_err("Cannot map vectors\n"); 1538 goto out_err; 1539 } 1540 1541 /* 1542 * Map the Hyp stack pages 1543 */ 1544 for_each_possible_cpu(cpu) { 1545 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); 1546 err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, 1547 PAGE_HYP); 1548 1549 if (err) { 1550 kvm_err("Cannot map hyp stack\n"); 1551 goto out_err; 1552 } 1553 } 1554 1555 for_each_possible_cpu(cpu) { 1556 kvm_host_data_t *cpu_data; 1557 1558 cpu_data = per_cpu_ptr(&kvm_host_data, cpu); 1559 err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP); 1560 1561 if (err) { 1562 kvm_err("Cannot map host CPU state: %d\n", err); 1563 goto out_err; 1564 } 1565 } 1566 1567 err = hyp_map_aux_data(); 1568 if (err) 1569 kvm_err("Cannot map host auxiliary data: %d\n", err); 1570 1571 return 0; 1572 1573 out_err: 1574 teardown_hyp_mode(); 1575 kvm_err("error initializing Hyp mode: %d\n", err); 1576 return err; 1577 } 1578 1579 static void check_kvm_target_cpu(void *ret) 1580 { 1581 *(int *)ret = kvm_target_cpu(); 1582 } 1583 1584 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) 1585 { 1586 struct kvm_vcpu *vcpu; 1587 int i; 1588 1589 mpidr &= MPIDR_HWID_BITMASK; 1590 kvm_for_each_vcpu(i, vcpu, kvm) { 1591 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) 1592 return vcpu; 1593 } 1594 return NULL; 1595 } 1596 1597 bool kvm_arch_has_irq_bypass(void) 1598 { 1599 return true; 1600 } 1601 1602 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, 1603 struct irq_bypass_producer *prod) 1604 { 1605 struct kvm_kernel_irqfd *irqfd = 1606 container_of(cons, struct kvm_kernel_irqfd, consumer); 1607 1608 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, 1609 &irqfd->irq_entry); 1610 } 1611 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, 1612 struct irq_bypass_producer *prod) 1613 { 1614 struct kvm_kernel_irqfd *irqfd = 1615 container_of(cons, struct kvm_kernel_irqfd, consumer); 1616 1617 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, 1618 &irqfd->irq_entry); 1619 } 1620 1621 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) 1622 { 1623 struct kvm_kernel_irqfd *irqfd = 1624 container_of(cons, struct kvm_kernel_irqfd, consumer); 1625 1626 kvm_arm_halt_guest(irqfd->kvm); 1627 } 1628 1629 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) 1630 { 1631 struct kvm_kernel_irqfd *irqfd = 1632 container_of(cons, struct kvm_kernel_irqfd, consumer); 1633 1634 kvm_arm_resume_guest(irqfd->kvm); 1635 } 1636 1637 /** 1638 * Initialize Hyp-mode and memory mappings on all CPUs. 1639 */ 1640 int kvm_arch_init(void *opaque) 1641 { 1642 int err; 1643 int ret, cpu; 1644 bool in_hyp_mode; 1645 1646 if (!is_hyp_mode_available()) { 1647 kvm_info("HYP mode not available\n"); 1648 return -ENODEV; 1649 } 1650 1651 in_hyp_mode = is_kernel_in_hyp_mode(); 1652 1653 if (!in_hyp_mode && kvm_arch_requires_vhe()) { 1654 kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); 1655 return -ENODEV; 1656 } 1657 1658 for_each_online_cpu(cpu) { 1659 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); 1660 if (ret < 0) { 1661 kvm_err("Error, CPU %d not supported!\n", cpu); 1662 return -ENODEV; 1663 } 1664 } 1665 1666 err = init_common_resources(); 1667 if (err) 1668 return err; 1669 1670 err = kvm_arm_init_sve(); 1671 if (err) 1672 return err; 1673 1674 if (!in_hyp_mode) { 1675 err = init_hyp_mode(); 1676 if (err) 1677 goto out_err; 1678 } 1679 1680 err = init_subsystems(); 1681 if (err) 1682 goto out_hyp; 1683 1684 if (in_hyp_mode) 1685 kvm_info("VHE mode initialized successfully\n"); 1686 else 1687 kvm_info("Hyp mode initialized successfully\n"); 1688 1689 return 0; 1690 1691 out_hyp: 1692 hyp_cpu_pm_exit(); 1693 if (!in_hyp_mode) 1694 teardown_hyp_mode(); 1695 out_err: 1696 return err; 1697 } 1698 1699 /* NOP: Compiling as a module not supported */ 1700 void kvm_arch_exit(void) 1701 { 1702 kvm_perf_teardown(); 1703 } 1704 1705 static int arm_init(void) 1706 { 1707 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1708 return rc; 1709 } 1710 1711 module_init(arm_init); 1712