1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University 4 * Author: Christoffer Dall <c.dall@virtualopensystems.com> 5 */ 6 7 #include <linux/bug.h> 8 #include <linux/cpu_pm.h> 9 #include <linux/entry-kvm.h> 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kvm_host.h> 13 #include <linux/list.h> 14 #include <linux/module.h> 15 #include <linux/vmalloc.h> 16 #include <linux/fs.h> 17 #include <linux/mman.h> 18 #include <linux/sched.h> 19 #include <linux/kvm.h> 20 #include <linux/kvm_irqfd.h> 21 #include <linux/irqbypass.h> 22 #include <linux/sched/stat.h> 23 #include <linux/psci.h> 24 #include <trace/events/kvm.h> 25 26 #define CREATE_TRACE_POINTS 27 #include "trace_arm.h" 28 29 #include <linux/uaccess.h> 30 #include <asm/ptrace.h> 31 #include <asm/mman.h> 32 #include <asm/tlbflush.h> 33 #include <asm/cacheflush.h> 34 #include <asm/cpufeature.h> 35 #include <asm/virt.h> 36 #include <asm/kvm_arm.h> 37 #include <asm/kvm_asm.h> 38 #include <asm/kvm_mmu.h> 39 #include <asm/kvm_nested.h> 40 #include <asm/kvm_pkvm.h> 41 #include <asm/kvm_emulate.h> 42 #include <asm/sections.h> 43 44 #include <kvm/arm_hypercalls.h> 45 #include <kvm/arm_pmu.h> 46 #include <kvm/arm_psci.h> 47 48 static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; 49 50 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); 51 52 DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); 53 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); 54 55 DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); 56 57 static bool vgic_present, kvm_arm_initialised; 58 59 static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized); 60 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); 61 62 bool is_kvm_arm_initialised(void) 63 { 64 return kvm_arm_initialised; 65 } 66 67 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 68 { 69 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 70 } 71 72 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, 73 struct kvm_enable_cap *cap) 74 { 75 int r; 76 u64 new_cap; 77 78 if (cap->flags) 79 return -EINVAL; 80 81 switch (cap->cap) { 82 case KVM_CAP_ARM_NISV_TO_USER: 83 r = 0; 84 set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, 85 &kvm->arch.flags); 86 break; 87 case KVM_CAP_ARM_MTE: 88 mutex_lock(&kvm->lock); 89 if (!system_supports_mte() || kvm->created_vcpus) { 90 r = -EINVAL; 91 } else { 92 r = 0; 93 set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); 94 } 95 mutex_unlock(&kvm->lock); 96 break; 97 case KVM_CAP_ARM_SYSTEM_SUSPEND: 98 r = 0; 99 set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); 100 break; 101 case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: 102 new_cap = cap->args[0]; 103 104 mutex_lock(&kvm->slots_lock); 105 /* 106 * To keep things simple, allow changing the chunk 107 * size only when no memory slots have been created. 108 */ 109 if (!kvm_are_all_memslots_empty(kvm)) { 110 r = -EINVAL; 111 } else if (new_cap && !kvm_is_block_size_supported(new_cap)) { 112 r = -EINVAL; 113 } else { 114 r = 0; 115 kvm->arch.mmu.split_page_chunk_size = new_cap; 116 } 117 mutex_unlock(&kvm->slots_lock); 118 break; 119 default: 120 r = -EINVAL; 121 break; 122 } 123 124 return r; 125 } 126 127 static int kvm_arm_default_max_vcpus(void) 128 { 129 return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; 130 } 131 132 /** 133 * kvm_arch_init_vm - initializes a VM data structure 134 * @kvm: pointer to the KVM struct 135 */ 136 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 137 { 138 int ret; 139 140 mutex_init(&kvm->arch.config_lock); 141 142 #ifdef CONFIG_LOCKDEP 143 /* Clue in lockdep that the config_lock must be taken inside kvm->lock */ 144 mutex_lock(&kvm->lock); 145 mutex_lock(&kvm->arch.config_lock); 146 mutex_unlock(&kvm->arch.config_lock); 147 mutex_unlock(&kvm->lock); 148 #endif 149 150 ret = kvm_share_hyp(kvm, kvm + 1); 151 if (ret) 152 return ret; 153 154 ret = pkvm_init_host_vm(kvm); 155 if (ret) 156 goto err_unshare_kvm; 157 158 if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { 159 ret = -ENOMEM; 160 goto err_unshare_kvm; 161 } 162 cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); 163 164 ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); 165 if (ret) 166 goto err_free_cpumask; 167 168 kvm_vgic_early_init(kvm); 169 170 kvm_timer_init_vm(kvm); 171 172 /* The maximum number of VCPUs is limited by the host's GIC model */ 173 kvm->max_vcpus = kvm_arm_default_max_vcpus(); 174 175 kvm_arm_init_hypercalls(kvm); 176 177 bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); 178 179 return 0; 180 181 err_free_cpumask: 182 free_cpumask_var(kvm->arch.supported_cpus); 183 err_unshare_kvm: 184 kvm_unshare_hyp(kvm, kvm + 1); 185 return ret; 186 } 187 188 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 189 { 190 return VM_FAULT_SIGBUS; 191 } 192 193 194 /** 195 * kvm_arch_destroy_vm - destroy the VM data structure 196 * @kvm: pointer to the KVM struct 197 */ 198 void kvm_arch_destroy_vm(struct kvm *kvm) 199 { 200 bitmap_free(kvm->arch.pmu_filter); 201 free_cpumask_var(kvm->arch.supported_cpus); 202 203 kvm_vgic_destroy(kvm); 204 205 if (is_protected_kvm_enabled()) 206 pkvm_destroy_hyp_vm(kvm); 207 208 kfree(kvm->arch.mpidr_data); 209 kvm_destroy_vcpus(kvm); 210 211 kvm_unshare_hyp(kvm, kvm + 1); 212 213 kvm_arm_teardown_hypercalls(kvm); 214 } 215 216 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 217 { 218 int r; 219 switch (ext) { 220 case KVM_CAP_IRQCHIP: 221 r = vgic_present; 222 break; 223 case KVM_CAP_IOEVENTFD: 224 case KVM_CAP_DEVICE_CTRL: 225 case KVM_CAP_USER_MEMORY: 226 case KVM_CAP_SYNC_MMU: 227 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 228 case KVM_CAP_ONE_REG: 229 case KVM_CAP_ARM_PSCI: 230 case KVM_CAP_ARM_PSCI_0_2: 231 case KVM_CAP_READONLY_MEM: 232 case KVM_CAP_MP_STATE: 233 case KVM_CAP_IMMEDIATE_EXIT: 234 case KVM_CAP_VCPU_EVENTS: 235 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: 236 case KVM_CAP_ARM_NISV_TO_USER: 237 case KVM_CAP_ARM_INJECT_EXT_DABT: 238 case KVM_CAP_SET_GUEST_DEBUG: 239 case KVM_CAP_VCPU_ATTRIBUTES: 240 case KVM_CAP_PTP_KVM: 241 case KVM_CAP_ARM_SYSTEM_SUSPEND: 242 case KVM_CAP_IRQFD_RESAMPLE: 243 case KVM_CAP_COUNTER_OFFSET: 244 r = 1; 245 break; 246 case KVM_CAP_SET_GUEST_DEBUG2: 247 return KVM_GUESTDBG_VALID_MASK; 248 case KVM_CAP_ARM_SET_DEVICE_ADDR: 249 r = 1; 250 break; 251 case KVM_CAP_NR_VCPUS: 252 /* 253 * ARM64 treats KVM_CAP_NR_CPUS differently from all other 254 * architectures, as it does not always bound it to 255 * KVM_CAP_MAX_VCPUS. It should not matter much because 256 * this is just an advisory value. 257 */ 258 r = min_t(unsigned int, num_online_cpus(), 259 kvm_arm_default_max_vcpus()); 260 break; 261 case KVM_CAP_MAX_VCPUS: 262 case KVM_CAP_MAX_VCPU_ID: 263 if (kvm) 264 r = kvm->max_vcpus; 265 else 266 r = kvm_arm_default_max_vcpus(); 267 break; 268 case KVM_CAP_MSI_DEVID: 269 if (!kvm) 270 r = -EINVAL; 271 else 272 r = kvm->arch.vgic.msis_require_devid; 273 break; 274 case KVM_CAP_ARM_USER_IRQ: 275 /* 276 * 1: EL1_VTIMER, EL1_PTIMER, and PMU. 277 * (bump this number if adding more devices) 278 */ 279 r = 1; 280 break; 281 case KVM_CAP_ARM_MTE: 282 r = system_supports_mte(); 283 break; 284 case KVM_CAP_STEAL_TIME: 285 r = kvm_arm_pvtime_supported(); 286 break; 287 case KVM_CAP_ARM_EL1_32BIT: 288 r = cpus_have_final_cap(ARM64_HAS_32BIT_EL1); 289 break; 290 case KVM_CAP_GUEST_DEBUG_HW_BPS: 291 r = get_num_brps(); 292 break; 293 case KVM_CAP_GUEST_DEBUG_HW_WPS: 294 r = get_num_wrps(); 295 break; 296 case KVM_CAP_ARM_PMU_V3: 297 r = kvm_arm_support_pmu_v3(); 298 break; 299 case KVM_CAP_ARM_INJECT_SERROR_ESR: 300 r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); 301 break; 302 case KVM_CAP_ARM_VM_IPA_SIZE: 303 r = get_kvm_ipa_limit(); 304 break; 305 case KVM_CAP_ARM_SVE: 306 r = system_supports_sve(); 307 break; 308 case KVM_CAP_ARM_PTRAUTH_ADDRESS: 309 case KVM_CAP_ARM_PTRAUTH_GENERIC: 310 r = system_has_full_ptr_auth(); 311 break; 312 case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: 313 if (kvm) 314 r = kvm->arch.mmu.split_page_chunk_size; 315 else 316 r = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; 317 break; 318 case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: 319 r = kvm_supported_block_sizes(); 320 break; 321 case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES: 322 r = BIT(0); 323 break; 324 default: 325 r = 0; 326 } 327 328 return r; 329 } 330 331 long kvm_arch_dev_ioctl(struct file *filp, 332 unsigned int ioctl, unsigned long arg) 333 { 334 return -EINVAL; 335 } 336 337 struct kvm *kvm_arch_alloc_vm(void) 338 { 339 size_t sz = sizeof(struct kvm); 340 341 if (!has_vhe()) 342 return kzalloc(sz, GFP_KERNEL_ACCOUNT); 343 344 return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO); 345 } 346 347 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 348 { 349 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) 350 return -EBUSY; 351 352 if (id >= kvm->max_vcpus) 353 return -EINVAL; 354 355 return 0; 356 } 357 358 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 359 { 360 int err; 361 362 spin_lock_init(&vcpu->arch.mp_state_lock); 363 364 #ifdef CONFIG_LOCKDEP 365 /* Inform lockdep that the config_lock is acquired after vcpu->mutex */ 366 mutex_lock(&vcpu->mutex); 367 mutex_lock(&vcpu->kvm->arch.config_lock); 368 mutex_unlock(&vcpu->kvm->arch.config_lock); 369 mutex_unlock(&vcpu->mutex); 370 #endif 371 372 /* Force users to call KVM_ARM_VCPU_INIT */ 373 vcpu_clear_flag(vcpu, VCPU_INITIALIZED); 374 375 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; 376 377 /* 378 * Default value for the FP state, will be overloaded at load 379 * time if we support FP (pretty likely) 380 */ 381 vcpu->arch.fp_state = FP_STATE_FREE; 382 383 /* Set up the timer */ 384 kvm_timer_vcpu_init(vcpu); 385 386 kvm_pmu_vcpu_init(vcpu); 387 388 kvm_arm_reset_debug_ptr(vcpu); 389 390 kvm_arm_pvtime_vcpu_init(&vcpu->arch); 391 392 vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; 393 394 err = kvm_vgic_vcpu_init(vcpu); 395 if (err) 396 return err; 397 398 return kvm_share_hyp(vcpu, vcpu + 1); 399 } 400 401 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 402 { 403 } 404 405 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 406 { 407 if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm))) 408 static_branch_dec(&userspace_irqchip_in_use); 409 410 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 411 kvm_timer_vcpu_terminate(vcpu); 412 kvm_pmu_vcpu_destroy(vcpu); 413 kvm_vgic_vcpu_destroy(vcpu); 414 kvm_arm_vcpu_destroy(vcpu); 415 } 416 417 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 418 { 419 420 } 421 422 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) 423 { 424 425 } 426 427 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 428 { 429 struct kvm_s2_mmu *mmu; 430 int *last_ran; 431 432 mmu = vcpu->arch.hw_mmu; 433 last_ran = this_cpu_ptr(mmu->last_vcpu_ran); 434 435 /* 436 * We guarantee that both TLBs and I-cache are private to each 437 * vcpu. If detecting that a vcpu from the same VM has 438 * previously run on the same physical CPU, call into the 439 * hypervisor code to nuke the relevant contexts. 440 * 441 * We might get preempted before the vCPU actually runs, but 442 * over-invalidation doesn't affect correctness. 443 */ 444 if (*last_ran != vcpu->vcpu_idx) { 445 kvm_call_hyp(__kvm_flush_cpu_context, mmu); 446 *last_ran = vcpu->vcpu_idx; 447 } 448 449 vcpu->cpu = cpu; 450 451 kvm_vgic_load(vcpu); 452 kvm_timer_vcpu_load(vcpu); 453 if (has_vhe()) 454 kvm_vcpu_load_vhe(vcpu); 455 kvm_arch_vcpu_load_fp(vcpu); 456 kvm_vcpu_pmu_restore_guest(vcpu); 457 if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) 458 kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); 459 460 if (single_task_running()) 461 vcpu_clear_wfx_traps(vcpu); 462 else 463 vcpu_set_wfx_traps(vcpu); 464 465 if (vcpu_has_ptrauth(vcpu)) 466 vcpu_ptrauth_disable(vcpu); 467 kvm_arch_vcpu_load_debug_state_flags(vcpu); 468 469 if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) 470 vcpu_set_on_unsupported_cpu(vcpu); 471 } 472 473 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 474 { 475 kvm_arch_vcpu_put_debug_state_flags(vcpu); 476 kvm_arch_vcpu_put_fp(vcpu); 477 if (has_vhe()) 478 kvm_vcpu_put_vhe(vcpu); 479 kvm_timer_vcpu_put(vcpu); 480 kvm_vgic_put(vcpu); 481 kvm_vcpu_pmu_restore_host(vcpu); 482 kvm_arm_vmid_clear_active(); 483 484 vcpu_clear_on_unsupported_cpu(vcpu); 485 vcpu->cpu = -1; 486 } 487 488 static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) 489 { 490 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); 491 kvm_make_request(KVM_REQ_SLEEP, vcpu); 492 kvm_vcpu_kick(vcpu); 493 } 494 495 void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) 496 { 497 spin_lock(&vcpu->arch.mp_state_lock); 498 __kvm_arm_vcpu_power_off(vcpu); 499 spin_unlock(&vcpu->arch.mp_state_lock); 500 } 501 502 bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) 503 { 504 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; 505 } 506 507 static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) 508 { 509 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED); 510 kvm_make_request(KVM_REQ_SUSPEND, vcpu); 511 kvm_vcpu_kick(vcpu); 512 } 513 514 static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) 515 { 516 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED; 517 } 518 519 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 520 struct kvm_mp_state *mp_state) 521 { 522 *mp_state = READ_ONCE(vcpu->arch.mp_state); 523 524 return 0; 525 } 526 527 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 528 struct kvm_mp_state *mp_state) 529 { 530 int ret = 0; 531 532 spin_lock(&vcpu->arch.mp_state_lock); 533 534 switch (mp_state->mp_state) { 535 case KVM_MP_STATE_RUNNABLE: 536 WRITE_ONCE(vcpu->arch.mp_state, *mp_state); 537 break; 538 case KVM_MP_STATE_STOPPED: 539 __kvm_arm_vcpu_power_off(vcpu); 540 break; 541 case KVM_MP_STATE_SUSPENDED: 542 kvm_arm_vcpu_suspend(vcpu); 543 break; 544 default: 545 ret = -EINVAL; 546 } 547 548 spin_unlock(&vcpu->arch.mp_state_lock); 549 550 return ret; 551 } 552 553 /** 554 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled 555 * @v: The VCPU pointer 556 * 557 * If the guest CPU is not waiting for interrupts or an interrupt line is 558 * asserted, the CPU is by definition runnable. 559 */ 560 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 561 { 562 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); 563 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) 564 && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); 565 } 566 567 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 568 { 569 return vcpu_mode_priv(vcpu); 570 } 571 572 #ifdef CONFIG_GUEST_PERF_EVENTS 573 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) 574 { 575 return *vcpu_pc(vcpu); 576 } 577 #endif 578 579 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) 580 { 581 return vcpu_get_flag(vcpu, VCPU_INITIALIZED); 582 } 583 584 static void kvm_init_mpidr_data(struct kvm *kvm) 585 { 586 struct kvm_mpidr_data *data = NULL; 587 unsigned long c, mask, nr_entries; 588 u64 aff_set = 0, aff_clr = ~0UL; 589 struct kvm_vcpu *vcpu; 590 591 mutex_lock(&kvm->arch.config_lock); 592 593 if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1) 594 goto out; 595 596 kvm_for_each_vcpu(c, vcpu, kvm) { 597 u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); 598 aff_set |= aff; 599 aff_clr &= aff; 600 } 601 602 /* 603 * A significant bit can be either 0 or 1, and will only appear in 604 * aff_set. Use aff_clr to weed out the useless stuff. 605 */ 606 mask = aff_set ^ aff_clr; 607 nr_entries = BIT_ULL(hweight_long(mask)); 608 609 /* 610 * Don't let userspace fool us. If we need more than a single page 611 * to describe the compressed MPIDR array, just fall back to the 612 * iterative method. Single vcpu VMs do not need this either. 613 */ 614 if (struct_size(data, cmpidr_to_idx, nr_entries) <= PAGE_SIZE) 615 data = kzalloc(struct_size(data, cmpidr_to_idx, nr_entries), 616 GFP_KERNEL_ACCOUNT); 617 618 if (!data) 619 goto out; 620 621 data->mpidr_mask = mask; 622 623 kvm_for_each_vcpu(c, vcpu, kvm) { 624 u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); 625 u16 index = kvm_mpidr_index(data, aff); 626 627 data->cmpidr_to_idx[index] = c; 628 } 629 630 kvm->arch.mpidr_data = data; 631 out: 632 mutex_unlock(&kvm->arch.config_lock); 633 } 634 635 /* 636 * Handle both the initialisation that is being done when the vcpu is 637 * run for the first time, as well as the updates that must be 638 * performed each time we get a new thread dealing with this vcpu. 639 */ 640 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) 641 { 642 struct kvm *kvm = vcpu->kvm; 643 int ret; 644 645 if (!kvm_vcpu_initialized(vcpu)) 646 return -ENOEXEC; 647 648 if (!kvm_arm_vcpu_is_finalized(vcpu)) 649 return -EPERM; 650 651 ret = kvm_arch_vcpu_run_map_fp(vcpu); 652 if (ret) 653 return ret; 654 655 if (likely(vcpu_has_run_once(vcpu))) 656 return 0; 657 658 kvm_init_mpidr_data(kvm); 659 660 kvm_arm_vcpu_init_debug(vcpu); 661 662 if (likely(irqchip_in_kernel(kvm))) { 663 /* 664 * Map the VGIC hardware resources before running a vcpu the 665 * first time on this VM. 666 */ 667 ret = kvm_vgic_map_resources(kvm); 668 if (ret) 669 return ret; 670 } 671 672 ret = kvm_timer_enable(vcpu); 673 if (ret) 674 return ret; 675 676 ret = kvm_arm_pmu_v3_enable(vcpu); 677 if (ret) 678 return ret; 679 680 if (is_protected_kvm_enabled()) { 681 ret = pkvm_create_hyp_vm(kvm); 682 if (ret) 683 return ret; 684 } 685 686 if (!irqchip_in_kernel(kvm)) { 687 /* 688 * Tell the rest of the code that there are userspace irqchip 689 * VMs in the wild. 690 */ 691 static_branch_inc(&userspace_irqchip_in_use); 692 } 693 694 /* 695 * Initialize traps for protected VMs. 696 * NOTE: Move to run in EL2 directly, rather than via a hypercall, once 697 * the code is in place for first run initialization at EL2. 698 */ 699 if (kvm_vm_is_protected(kvm)) 700 kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); 701 702 mutex_lock(&kvm->arch.config_lock); 703 set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); 704 mutex_unlock(&kvm->arch.config_lock); 705 706 return ret; 707 } 708 709 bool kvm_arch_intc_initialized(struct kvm *kvm) 710 { 711 return vgic_initialized(kvm); 712 } 713 714 void kvm_arm_halt_guest(struct kvm *kvm) 715 { 716 unsigned long i; 717 struct kvm_vcpu *vcpu; 718 719 kvm_for_each_vcpu(i, vcpu, kvm) 720 vcpu->arch.pause = true; 721 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); 722 } 723 724 void kvm_arm_resume_guest(struct kvm *kvm) 725 { 726 unsigned long i; 727 struct kvm_vcpu *vcpu; 728 729 kvm_for_each_vcpu(i, vcpu, kvm) { 730 vcpu->arch.pause = false; 731 __kvm_vcpu_wake_up(vcpu); 732 } 733 } 734 735 static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu) 736 { 737 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 738 739 rcuwait_wait_event(wait, 740 (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), 741 TASK_INTERRUPTIBLE); 742 743 if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) { 744 /* Awaken to handle a signal, request we sleep again later. */ 745 kvm_make_request(KVM_REQ_SLEEP, vcpu); 746 } 747 748 /* 749 * Make sure we will observe a potential reset request if we've 750 * observed a change to the power state. Pairs with the smp_wmb() in 751 * kvm_psci_vcpu_on(). 752 */ 753 smp_rmb(); 754 } 755 756 /** 757 * kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior 758 * @vcpu: The VCPU pointer 759 * 760 * Suspend execution of a vCPU until a valid wake event is detected, i.e. until 761 * the vCPU is runnable. The vCPU may or may not be scheduled out, depending 762 * on when a wake event arrives, e.g. there may already be a pending wake event. 763 */ 764 void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) 765 { 766 /* 767 * Sync back the state of the GIC CPU interface so that we have 768 * the latest PMR and group enables. This ensures that 769 * kvm_arch_vcpu_runnable has up-to-date data to decide whether 770 * we have pending interrupts, e.g. when determining if the 771 * vCPU should block. 772 * 773 * For the same reason, we want to tell GICv4 that we need 774 * doorbells to be signalled, should an interrupt become pending. 775 */ 776 preempt_disable(); 777 kvm_vgic_vmcr_sync(vcpu); 778 vcpu_set_flag(vcpu, IN_WFI); 779 vgic_v4_put(vcpu); 780 preempt_enable(); 781 782 kvm_vcpu_halt(vcpu); 783 vcpu_clear_flag(vcpu, IN_WFIT); 784 785 preempt_disable(); 786 vcpu_clear_flag(vcpu, IN_WFI); 787 vgic_v4_load(vcpu); 788 preempt_enable(); 789 } 790 791 static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) 792 { 793 if (!kvm_arm_vcpu_suspended(vcpu)) 794 return 1; 795 796 kvm_vcpu_wfi(vcpu); 797 798 /* 799 * The suspend state is sticky; we do not leave it until userspace 800 * explicitly marks the vCPU as runnable. Request that we suspend again 801 * later. 802 */ 803 kvm_make_request(KVM_REQ_SUSPEND, vcpu); 804 805 /* 806 * Check to make sure the vCPU is actually runnable. If so, exit to 807 * userspace informing it of the wakeup condition. 808 */ 809 if (kvm_arch_vcpu_runnable(vcpu)) { 810 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 811 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; 812 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 813 return 0; 814 } 815 816 /* 817 * Otherwise, we were unblocked to process a different event, such as a 818 * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to 819 * process the event. 820 */ 821 return 1; 822 } 823 824 /** 825 * check_vcpu_requests - check and handle pending vCPU requests 826 * @vcpu: the VCPU pointer 827 * 828 * Return: 1 if we should enter the guest 829 * 0 if we should exit to userspace 830 * < 0 if we should exit to userspace, where the return value indicates 831 * an error 832 */ 833 static int check_vcpu_requests(struct kvm_vcpu *vcpu) 834 { 835 if (kvm_request_pending(vcpu)) { 836 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) 837 kvm_vcpu_sleep(vcpu); 838 839 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 840 kvm_reset_vcpu(vcpu); 841 842 /* 843 * Clear IRQ_PENDING requests that were made to guarantee 844 * that a VCPU sees new virtual interrupts. 845 */ 846 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); 847 848 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) 849 kvm_update_stolen_time(vcpu); 850 851 if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) { 852 /* The distributor enable bits were changed */ 853 preempt_disable(); 854 vgic_v4_put(vcpu); 855 vgic_v4_load(vcpu); 856 preempt_enable(); 857 } 858 859 if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) 860 kvm_vcpu_reload_pmu(vcpu); 861 862 if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu)) 863 kvm_vcpu_pmu_restore_guest(vcpu); 864 865 if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) 866 return kvm_vcpu_suspend(vcpu); 867 868 if (kvm_dirty_ring_check_request(vcpu)) 869 return 0; 870 } 871 872 return 1; 873 } 874 875 static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) 876 { 877 if (likely(!vcpu_mode_is_32bit(vcpu))) 878 return false; 879 880 if (vcpu_has_nv(vcpu)) 881 return true; 882 883 return !kvm_supports_32bit_el0(); 884 } 885 886 /** 887 * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest 888 * @vcpu: The VCPU pointer 889 * @ret: Pointer to write optional return code 890 * 891 * Returns: true if the VCPU needs to return to a preemptible + interruptible 892 * and skip guest entry. 893 * 894 * This function disambiguates between two different types of exits: exits to a 895 * preemptible + interruptible kernel context and exits to userspace. For an 896 * exit to userspace, this function will write the return code to ret and return 897 * true. For an exit to preemptible + interruptible kernel context (i.e. check 898 * for pending work and re-enter), return true without writing to ret. 899 */ 900 static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) 901 { 902 struct kvm_run *run = vcpu->run; 903 904 /* 905 * If we're using a userspace irqchip, then check if we need 906 * to tell a userspace irqchip about timer or PMU level 907 * changes and if so, exit to userspace (the actual level 908 * state gets updated in kvm_timer_update_run and 909 * kvm_pmu_update_run below). 910 */ 911 if (static_branch_unlikely(&userspace_irqchip_in_use)) { 912 if (kvm_timer_should_notify_user(vcpu) || 913 kvm_pmu_should_notify_user(vcpu)) { 914 *ret = -EINTR; 915 run->exit_reason = KVM_EXIT_INTR; 916 return true; 917 } 918 } 919 920 if (unlikely(vcpu_on_unsupported_cpu(vcpu))) { 921 run->exit_reason = KVM_EXIT_FAIL_ENTRY; 922 run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED; 923 run->fail_entry.cpu = smp_processor_id(); 924 *ret = 0; 925 return true; 926 } 927 928 return kvm_request_pending(vcpu) || 929 xfer_to_guest_mode_work_pending(); 930 } 931 932 /* 933 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while 934 * the vCPU is running. 935 * 936 * This must be noinstr as instrumentation may make use of RCU, and this is not 937 * safe during the EQS. 938 */ 939 static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu) 940 { 941 int ret; 942 943 guest_state_enter_irqoff(); 944 ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); 945 guest_state_exit_irqoff(); 946 947 return ret; 948 } 949 950 /** 951 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code 952 * @vcpu: The VCPU pointer 953 * 954 * This function is called through the VCPU_RUN ioctl called from user space. It 955 * will execute VM code in a loop until the time slice for the process is used 956 * or some emulation is needed from user space in which case the function will 957 * return with return value 0 and with the kvm_run structure filled in with the 958 * required data for the requested emulation. 959 */ 960 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 961 { 962 struct kvm_run *run = vcpu->run; 963 int ret; 964 965 if (run->exit_reason == KVM_EXIT_MMIO) { 966 ret = kvm_handle_mmio_return(vcpu); 967 if (ret) 968 return ret; 969 } 970 971 vcpu_load(vcpu); 972 973 if (run->immediate_exit) { 974 ret = -EINTR; 975 goto out; 976 } 977 978 kvm_sigset_activate(vcpu); 979 980 ret = 1; 981 run->exit_reason = KVM_EXIT_UNKNOWN; 982 run->flags = 0; 983 while (ret > 0) { 984 /* 985 * Check conditions before entering the guest 986 */ 987 ret = xfer_to_guest_mode_handle_work(vcpu); 988 if (!ret) 989 ret = 1; 990 991 if (ret > 0) 992 ret = check_vcpu_requests(vcpu); 993 994 /* 995 * Preparing the interrupts to be injected also 996 * involves poking the GIC, which must be done in a 997 * non-preemptible context. 998 */ 999 preempt_disable(); 1000 1001 /* 1002 * The VMID allocator only tracks active VMIDs per 1003 * physical CPU, and therefore the VMID allocated may not be 1004 * preserved on VMID roll-over if the task was preempted, 1005 * making a thread's VMID inactive. So we need to call 1006 * kvm_arm_vmid_update() in non-premptible context. 1007 */ 1008 if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) && 1009 has_vhe()) 1010 __load_stage2(vcpu->arch.hw_mmu, 1011 vcpu->arch.hw_mmu->arch); 1012 1013 kvm_pmu_flush_hwstate(vcpu); 1014 1015 local_irq_disable(); 1016 1017 kvm_vgic_flush_hwstate(vcpu); 1018 1019 kvm_pmu_update_vcpu_events(vcpu); 1020 1021 /* 1022 * Ensure we set mode to IN_GUEST_MODE after we disable 1023 * interrupts and before the final VCPU requests check. 1024 * See the comment in kvm_vcpu_exiting_guest_mode() and 1025 * Documentation/virt/kvm/vcpu-requests.rst 1026 */ 1027 smp_store_mb(vcpu->mode, IN_GUEST_MODE); 1028 1029 if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) { 1030 vcpu->mode = OUTSIDE_GUEST_MODE; 1031 isb(); /* Ensure work in x_flush_hwstate is committed */ 1032 kvm_pmu_sync_hwstate(vcpu); 1033 if (static_branch_unlikely(&userspace_irqchip_in_use)) 1034 kvm_timer_sync_user(vcpu); 1035 kvm_vgic_sync_hwstate(vcpu); 1036 local_irq_enable(); 1037 preempt_enable(); 1038 continue; 1039 } 1040 1041 kvm_arm_setup_debug(vcpu); 1042 kvm_arch_vcpu_ctxflush_fp(vcpu); 1043 1044 /************************************************************** 1045 * Enter the guest 1046 */ 1047 trace_kvm_entry(*vcpu_pc(vcpu)); 1048 guest_timing_enter_irqoff(); 1049 1050 ret = kvm_arm_vcpu_enter_exit(vcpu); 1051 1052 vcpu->mode = OUTSIDE_GUEST_MODE; 1053 vcpu->stat.exits++; 1054 /* 1055 * Back from guest 1056 *************************************************************/ 1057 1058 kvm_arm_clear_debug(vcpu); 1059 1060 /* 1061 * We must sync the PMU state before the vgic state so 1062 * that the vgic can properly sample the updated state of the 1063 * interrupt line. 1064 */ 1065 kvm_pmu_sync_hwstate(vcpu); 1066 1067 /* 1068 * Sync the vgic state before syncing the timer state because 1069 * the timer code needs to know if the virtual timer 1070 * interrupts are active. 1071 */ 1072 kvm_vgic_sync_hwstate(vcpu); 1073 1074 /* 1075 * Sync the timer hardware state before enabling interrupts as 1076 * we don't want vtimer interrupts to race with syncing the 1077 * timer virtual interrupt state. 1078 */ 1079 if (static_branch_unlikely(&userspace_irqchip_in_use)) 1080 kvm_timer_sync_user(vcpu); 1081 1082 kvm_arch_vcpu_ctxsync_fp(vcpu); 1083 1084 /* 1085 * We must ensure that any pending interrupts are taken before 1086 * we exit guest timing so that timer ticks are accounted as 1087 * guest time. Transiently unmask interrupts so that any 1088 * pending interrupts are taken. 1089 * 1090 * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other 1091 * context synchronization event) is necessary to ensure that 1092 * pending interrupts are taken. 1093 */ 1094 if (ARM_EXCEPTION_CODE(ret) == ARM_EXCEPTION_IRQ) { 1095 local_irq_enable(); 1096 isb(); 1097 local_irq_disable(); 1098 } 1099 1100 guest_timing_exit_irqoff(); 1101 1102 local_irq_enable(); 1103 1104 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 1105 1106 /* Exit types that need handling before we can be preempted */ 1107 handle_exit_early(vcpu, ret); 1108 1109 preempt_enable(); 1110 1111 /* 1112 * The ARMv8 architecture doesn't give the hypervisor 1113 * a mechanism to prevent a guest from dropping to AArch32 EL0 1114 * if implemented by the CPU. If we spot the guest in such 1115 * state and that we decided it wasn't supposed to do so (like 1116 * with the asymmetric AArch32 case), return to userspace with 1117 * a fatal error. 1118 */ 1119 if (vcpu_mode_is_bad_32bit(vcpu)) { 1120 /* 1121 * As we have caught the guest red-handed, decide that 1122 * it isn't fit for purpose anymore by making the vcpu 1123 * invalid. The VMM can try and fix it by issuing a 1124 * KVM_ARM_VCPU_INIT if it really wants to. 1125 */ 1126 vcpu_clear_flag(vcpu, VCPU_INITIALIZED); 1127 ret = ARM_EXCEPTION_IL; 1128 } 1129 1130 ret = handle_exit(vcpu, ret); 1131 } 1132 1133 /* Tell userspace about in-kernel device output levels */ 1134 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { 1135 kvm_timer_update_run(vcpu); 1136 kvm_pmu_update_run(vcpu); 1137 } 1138 1139 kvm_sigset_deactivate(vcpu); 1140 1141 out: 1142 /* 1143 * In the unlikely event that we are returning to userspace 1144 * with pending exceptions or PC adjustment, commit these 1145 * adjustments in order to give userspace a consistent view of 1146 * the vcpu state. Note that this relies on __kvm_adjust_pc() 1147 * being preempt-safe on VHE. 1148 */ 1149 if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) || 1150 vcpu_get_flag(vcpu, INCREMENT_PC))) 1151 kvm_call_hyp(__kvm_adjust_pc, vcpu); 1152 1153 vcpu_put(vcpu); 1154 return ret; 1155 } 1156 1157 static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) 1158 { 1159 int bit_index; 1160 bool set; 1161 unsigned long *hcr; 1162 1163 if (number == KVM_ARM_IRQ_CPU_IRQ) 1164 bit_index = __ffs(HCR_VI); 1165 else /* KVM_ARM_IRQ_CPU_FIQ */ 1166 bit_index = __ffs(HCR_VF); 1167 1168 hcr = vcpu_hcr(vcpu); 1169 if (level) 1170 set = test_and_set_bit(bit_index, hcr); 1171 else 1172 set = test_and_clear_bit(bit_index, hcr); 1173 1174 /* 1175 * If we didn't change anything, no need to wake up or kick other CPUs 1176 */ 1177 if (set == level) 1178 return 0; 1179 1180 /* 1181 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and 1182 * trigger a world-switch round on the running physical CPU to set the 1183 * virtual IRQ/FIQ fields in the HCR appropriately. 1184 */ 1185 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 1186 kvm_vcpu_kick(vcpu); 1187 1188 return 0; 1189 } 1190 1191 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, 1192 bool line_status) 1193 { 1194 u32 irq = irq_level->irq; 1195 unsigned int irq_type, vcpu_id, irq_num; 1196 struct kvm_vcpu *vcpu = NULL; 1197 bool level = irq_level->level; 1198 1199 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; 1200 vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; 1201 vcpu_id += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); 1202 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; 1203 1204 trace_kvm_irq_line(irq_type, vcpu_id, irq_num, irq_level->level); 1205 1206 switch (irq_type) { 1207 case KVM_ARM_IRQ_TYPE_CPU: 1208 if (irqchip_in_kernel(kvm)) 1209 return -ENXIO; 1210 1211 vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); 1212 if (!vcpu) 1213 return -EINVAL; 1214 1215 if (irq_num > KVM_ARM_IRQ_CPU_FIQ) 1216 return -EINVAL; 1217 1218 return vcpu_interrupt_line(vcpu, irq_num, level); 1219 case KVM_ARM_IRQ_TYPE_PPI: 1220 if (!irqchip_in_kernel(kvm)) 1221 return -ENXIO; 1222 1223 vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); 1224 if (!vcpu) 1225 return -EINVAL; 1226 1227 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) 1228 return -EINVAL; 1229 1230 return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL); 1231 case KVM_ARM_IRQ_TYPE_SPI: 1232 if (!irqchip_in_kernel(kvm)) 1233 return -ENXIO; 1234 1235 if (irq_num < VGIC_NR_PRIVATE_IRQS) 1236 return -EINVAL; 1237 1238 return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL); 1239 } 1240 1241 return -EINVAL; 1242 } 1243 1244 static unsigned long system_supported_vcpu_features(void) 1245 { 1246 unsigned long features = KVM_VCPU_VALID_FEATURES; 1247 1248 if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) 1249 clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); 1250 1251 if (!kvm_arm_support_pmu_v3()) 1252 clear_bit(KVM_ARM_VCPU_PMU_V3, &features); 1253 1254 if (!system_supports_sve()) 1255 clear_bit(KVM_ARM_VCPU_SVE, &features); 1256 1257 if (!system_has_full_ptr_auth()) { 1258 clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features); 1259 clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features); 1260 } 1261 1262 if (!cpus_have_final_cap(ARM64_HAS_NESTED_VIRT)) 1263 clear_bit(KVM_ARM_VCPU_HAS_EL2, &features); 1264 1265 return features; 1266 } 1267 1268 static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, 1269 const struct kvm_vcpu_init *init) 1270 { 1271 unsigned long features = init->features[0]; 1272 int i; 1273 1274 if (features & ~KVM_VCPU_VALID_FEATURES) 1275 return -ENOENT; 1276 1277 for (i = 1; i < ARRAY_SIZE(init->features); i++) { 1278 if (init->features[i]) 1279 return -ENOENT; 1280 } 1281 1282 if (features & ~system_supported_vcpu_features()) 1283 return -EINVAL; 1284 1285 /* 1286 * For now make sure that both address/generic pointer authentication 1287 * features are requested by the userspace together. 1288 */ 1289 if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features) != 1290 test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features)) 1291 return -EINVAL; 1292 1293 /* Disallow NV+SVE for the time being */ 1294 if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features) && 1295 test_bit(KVM_ARM_VCPU_SVE, &features)) 1296 return -EINVAL; 1297 1298 if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) 1299 return 0; 1300 1301 /* MTE is incompatible with AArch32 */ 1302 if (kvm_has_mte(vcpu->kvm)) 1303 return -EINVAL; 1304 1305 /* NV is incompatible with AArch32 */ 1306 if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features)) 1307 return -EINVAL; 1308 1309 return 0; 1310 } 1311 1312 static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu, 1313 const struct kvm_vcpu_init *init) 1314 { 1315 unsigned long features = init->features[0]; 1316 1317 return !bitmap_equal(vcpu->kvm->arch.vcpu_features, &features, 1318 KVM_VCPU_MAX_FEATURES); 1319 } 1320 1321 static int kvm_setup_vcpu(struct kvm_vcpu *vcpu) 1322 { 1323 struct kvm *kvm = vcpu->kvm; 1324 int ret = 0; 1325 1326 /* 1327 * When the vCPU has a PMU, but no PMU is set for the guest 1328 * yet, set the default one. 1329 */ 1330 if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu) 1331 ret = kvm_arm_set_default_pmu(kvm); 1332 1333 return ret; 1334 } 1335 1336 static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 1337 const struct kvm_vcpu_init *init) 1338 { 1339 unsigned long features = init->features[0]; 1340 struct kvm *kvm = vcpu->kvm; 1341 int ret = -EINVAL; 1342 1343 mutex_lock(&kvm->arch.config_lock); 1344 1345 if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) && 1346 kvm_vcpu_init_changed(vcpu, init)) 1347 goto out_unlock; 1348 1349 bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES); 1350 1351 ret = kvm_setup_vcpu(vcpu); 1352 if (ret) 1353 goto out_unlock; 1354 1355 /* Now we know what it is, we can reset it. */ 1356 kvm_reset_vcpu(vcpu); 1357 1358 set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags); 1359 vcpu_set_flag(vcpu, VCPU_INITIALIZED); 1360 ret = 0; 1361 out_unlock: 1362 mutex_unlock(&kvm->arch.config_lock); 1363 return ret; 1364 } 1365 1366 static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 1367 const struct kvm_vcpu_init *init) 1368 { 1369 int ret; 1370 1371 if (init->target != KVM_ARM_TARGET_GENERIC_V8 && 1372 init->target != kvm_target_cpu()) 1373 return -EINVAL; 1374 1375 ret = kvm_vcpu_init_check_features(vcpu, init); 1376 if (ret) 1377 return ret; 1378 1379 if (!kvm_vcpu_initialized(vcpu)) 1380 return __kvm_vcpu_set_target(vcpu, init); 1381 1382 if (kvm_vcpu_init_changed(vcpu, init)) 1383 return -EINVAL; 1384 1385 kvm_reset_vcpu(vcpu); 1386 return 0; 1387 } 1388 1389 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, 1390 struct kvm_vcpu_init *init) 1391 { 1392 bool power_off = false; 1393 int ret; 1394 1395 /* 1396 * Treat the power-off vCPU feature as ephemeral. Clear the bit to avoid 1397 * reflecting it in the finalized feature set, thus limiting its scope 1398 * to a single KVM_ARM_VCPU_INIT call. 1399 */ 1400 if (init->features[0] & BIT(KVM_ARM_VCPU_POWER_OFF)) { 1401 init->features[0] &= ~BIT(KVM_ARM_VCPU_POWER_OFF); 1402 power_off = true; 1403 } 1404 1405 ret = kvm_vcpu_set_target(vcpu, init); 1406 if (ret) 1407 return ret; 1408 1409 /* 1410 * Ensure a rebooted VM will fault in RAM pages and detect if the 1411 * guest MMU is turned off and flush the caches as needed. 1412 * 1413 * S2FWB enforces all memory accesses to RAM being cacheable, 1414 * ensuring that the data side is always coherent. We still 1415 * need to invalidate the I-cache though, as FWB does *not* 1416 * imply CTR_EL0.DIC. 1417 */ 1418 if (vcpu_has_run_once(vcpu)) { 1419 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) 1420 stage2_unmap_vm(vcpu->kvm); 1421 else 1422 icache_inval_all_pou(); 1423 } 1424 1425 vcpu_reset_hcr(vcpu); 1426 vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu); 1427 1428 /* 1429 * Handle the "start in power-off" case. 1430 */ 1431 spin_lock(&vcpu->arch.mp_state_lock); 1432 1433 if (power_off) 1434 __kvm_arm_vcpu_power_off(vcpu); 1435 else 1436 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); 1437 1438 spin_unlock(&vcpu->arch.mp_state_lock); 1439 1440 return 0; 1441 } 1442 1443 static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, 1444 struct kvm_device_attr *attr) 1445 { 1446 int ret = -ENXIO; 1447 1448 switch (attr->group) { 1449 default: 1450 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); 1451 break; 1452 } 1453 1454 return ret; 1455 } 1456 1457 static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, 1458 struct kvm_device_attr *attr) 1459 { 1460 int ret = -ENXIO; 1461 1462 switch (attr->group) { 1463 default: 1464 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); 1465 break; 1466 } 1467 1468 return ret; 1469 } 1470 1471 static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, 1472 struct kvm_device_attr *attr) 1473 { 1474 int ret = -ENXIO; 1475 1476 switch (attr->group) { 1477 default: 1478 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); 1479 break; 1480 } 1481 1482 return ret; 1483 } 1484 1485 static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, 1486 struct kvm_vcpu_events *events) 1487 { 1488 memset(events, 0, sizeof(*events)); 1489 1490 return __kvm_arm_vcpu_get_events(vcpu, events); 1491 } 1492 1493 static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, 1494 struct kvm_vcpu_events *events) 1495 { 1496 int i; 1497 1498 /* check whether the reserved field is zero */ 1499 for (i = 0; i < ARRAY_SIZE(events->reserved); i++) 1500 if (events->reserved[i]) 1501 return -EINVAL; 1502 1503 /* check whether the pad field is zero */ 1504 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) 1505 if (events->exception.pad[i]) 1506 return -EINVAL; 1507 1508 return __kvm_arm_vcpu_set_events(vcpu, events); 1509 } 1510 1511 long kvm_arch_vcpu_ioctl(struct file *filp, 1512 unsigned int ioctl, unsigned long arg) 1513 { 1514 struct kvm_vcpu *vcpu = filp->private_data; 1515 void __user *argp = (void __user *)arg; 1516 struct kvm_device_attr attr; 1517 long r; 1518 1519 switch (ioctl) { 1520 case KVM_ARM_VCPU_INIT: { 1521 struct kvm_vcpu_init init; 1522 1523 r = -EFAULT; 1524 if (copy_from_user(&init, argp, sizeof(init))) 1525 break; 1526 1527 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); 1528 break; 1529 } 1530 case KVM_SET_ONE_REG: 1531 case KVM_GET_ONE_REG: { 1532 struct kvm_one_reg reg; 1533 1534 r = -ENOEXEC; 1535 if (unlikely(!kvm_vcpu_initialized(vcpu))) 1536 break; 1537 1538 r = -EFAULT; 1539 if (copy_from_user(®, argp, sizeof(reg))) 1540 break; 1541 1542 /* 1543 * We could owe a reset due to PSCI. Handle the pending reset 1544 * here to ensure userspace register accesses are ordered after 1545 * the reset. 1546 */ 1547 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 1548 kvm_reset_vcpu(vcpu); 1549 1550 if (ioctl == KVM_SET_ONE_REG) 1551 r = kvm_arm_set_reg(vcpu, ®); 1552 else 1553 r = kvm_arm_get_reg(vcpu, ®); 1554 break; 1555 } 1556 case KVM_GET_REG_LIST: { 1557 struct kvm_reg_list __user *user_list = argp; 1558 struct kvm_reg_list reg_list; 1559 unsigned n; 1560 1561 r = -ENOEXEC; 1562 if (unlikely(!kvm_vcpu_initialized(vcpu))) 1563 break; 1564 1565 r = -EPERM; 1566 if (!kvm_arm_vcpu_is_finalized(vcpu)) 1567 break; 1568 1569 r = -EFAULT; 1570 if (copy_from_user(®_list, user_list, sizeof(reg_list))) 1571 break; 1572 n = reg_list.n; 1573 reg_list.n = kvm_arm_num_regs(vcpu); 1574 if (copy_to_user(user_list, ®_list, sizeof(reg_list))) 1575 break; 1576 r = -E2BIG; 1577 if (n < reg_list.n) 1578 break; 1579 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); 1580 break; 1581 } 1582 case KVM_SET_DEVICE_ATTR: { 1583 r = -EFAULT; 1584 if (copy_from_user(&attr, argp, sizeof(attr))) 1585 break; 1586 r = kvm_arm_vcpu_set_attr(vcpu, &attr); 1587 break; 1588 } 1589 case KVM_GET_DEVICE_ATTR: { 1590 r = -EFAULT; 1591 if (copy_from_user(&attr, argp, sizeof(attr))) 1592 break; 1593 r = kvm_arm_vcpu_get_attr(vcpu, &attr); 1594 break; 1595 } 1596 case KVM_HAS_DEVICE_ATTR: { 1597 r = -EFAULT; 1598 if (copy_from_user(&attr, argp, sizeof(attr))) 1599 break; 1600 r = kvm_arm_vcpu_has_attr(vcpu, &attr); 1601 break; 1602 } 1603 case KVM_GET_VCPU_EVENTS: { 1604 struct kvm_vcpu_events events; 1605 1606 if (kvm_arm_vcpu_get_events(vcpu, &events)) 1607 return -EINVAL; 1608 1609 if (copy_to_user(argp, &events, sizeof(events))) 1610 return -EFAULT; 1611 1612 return 0; 1613 } 1614 case KVM_SET_VCPU_EVENTS: { 1615 struct kvm_vcpu_events events; 1616 1617 if (copy_from_user(&events, argp, sizeof(events))) 1618 return -EFAULT; 1619 1620 return kvm_arm_vcpu_set_events(vcpu, &events); 1621 } 1622 case KVM_ARM_VCPU_FINALIZE: { 1623 int what; 1624 1625 if (!kvm_vcpu_initialized(vcpu)) 1626 return -ENOEXEC; 1627 1628 if (get_user(what, (const int __user *)argp)) 1629 return -EFAULT; 1630 1631 return kvm_arm_vcpu_finalize(vcpu, what); 1632 } 1633 default: 1634 r = -EINVAL; 1635 } 1636 1637 return r; 1638 } 1639 1640 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 1641 { 1642 1643 } 1644 1645 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, 1646 struct kvm_arm_device_addr *dev_addr) 1647 { 1648 switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) { 1649 case KVM_ARM_DEVICE_VGIC_V2: 1650 if (!vgic_present) 1651 return -ENXIO; 1652 return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr); 1653 default: 1654 return -ENODEV; 1655 } 1656 } 1657 1658 static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1659 { 1660 switch (attr->group) { 1661 case KVM_ARM_VM_SMCCC_CTRL: 1662 return kvm_vm_smccc_has_attr(kvm, attr); 1663 default: 1664 return -ENXIO; 1665 } 1666 } 1667 1668 static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1669 { 1670 switch (attr->group) { 1671 case KVM_ARM_VM_SMCCC_CTRL: 1672 return kvm_vm_smccc_set_attr(kvm, attr); 1673 default: 1674 return -ENXIO; 1675 } 1676 } 1677 1678 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 1679 { 1680 struct kvm *kvm = filp->private_data; 1681 void __user *argp = (void __user *)arg; 1682 struct kvm_device_attr attr; 1683 1684 switch (ioctl) { 1685 case KVM_CREATE_IRQCHIP: { 1686 int ret; 1687 if (!vgic_present) 1688 return -ENXIO; 1689 mutex_lock(&kvm->lock); 1690 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); 1691 mutex_unlock(&kvm->lock); 1692 return ret; 1693 } 1694 case KVM_ARM_SET_DEVICE_ADDR: { 1695 struct kvm_arm_device_addr dev_addr; 1696 1697 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) 1698 return -EFAULT; 1699 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); 1700 } 1701 case KVM_ARM_PREFERRED_TARGET: { 1702 struct kvm_vcpu_init init = { 1703 .target = KVM_ARM_TARGET_GENERIC_V8, 1704 }; 1705 1706 if (copy_to_user(argp, &init, sizeof(init))) 1707 return -EFAULT; 1708 1709 return 0; 1710 } 1711 case KVM_ARM_MTE_COPY_TAGS: { 1712 struct kvm_arm_copy_mte_tags copy_tags; 1713 1714 if (copy_from_user(©_tags, argp, sizeof(copy_tags))) 1715 return -EFAULT; 1716 return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); 1717 } 1718 case KVM_ARM_SET_COUNTER_OFFSET: { 1719 struct kvm_arm_counter_offset offset; 1720 1721 if (copy_from_user(&offset, argp, sizeof(offset))) 1722 return -EFAULT; 1723 return kvm_vm_ioctl_set_counter_offset(kvm, &offset); 1724 } 1725 case KVM_HAS_DEVICE_ATTR: { 1726 if (copy_from_user(&attr, argp, sizeof(attr))) 1727 return -EFAULT; 1728 1729 return kvm_vm_has_attr(kvm, &attr); 1730 } 1731 case KVM_SET_DEVICE_ATTR: { 1732 if (copy_from_user(&attr, argp, sizeof(attr))) 1733 return -EFAULT; 1734 1735 return kvm_vm_set_attr(kvm, &attr); 1736 } 1737 case KVM_ARM_GET_REG_WRITABLE_MASKS: { 1738 struct reg_mask_range range; 1739 1740 if (copy_from_user(&range, argp, sizeof(range))) 1741 return -EFAULT; 1742 return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); 1743 } 1744 default: 1745 return -EINVAL; 1746 } 1747 } 1748 1749 /* unlocks vcpus from @vcpu_lock_idx and smaller */ 1750 static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) 1751 { 1752 struct kvm_vcpu *tmp_vcpu; 1753 1754 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { 1755 tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); 1756 mutex_unlock(&tmp_vcpu->mutex); 1757 } 1758 } 1759 1760 void unlock_all_vcpus(struct kvm *kvm) 1761 { 1762 lockdep_assert_held(&kvm->lock); 1763 1764 unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); 1765 } 1766 1767 /* Returns true if all vcpus were locked, false otherwise */ 1768 bool lock_all_vcpus(struct kvm *kvm) 1769 { 1770 struct kvm_vcpu *tmp_vcpu; 1771 unsigned long c; 1772 1773 lockdep_assert_held(&kvm->lock); 1774 1775 /* 1776 * Any time a vcpu is in an ioctl (including running), the 1777 * core KVM code tries to grab the vcpu->mutex. 1778 * 1779 * By grabbing the vcpu->mutex of all VCPUs we ensure that no 1780 * other VCPUs can fiddle with the state while we access it. 1781 */ 1782 kvm_for_each_vcpu(c, tmp_vcpu, kvm) { 1783 if (!mutex_trylock(&tmp_vcpu->mutex)) { 1784 unlock_vcpus(kvm, c - 1); 1785 return false; 1786 } 1787 } 1788 1789 return true; 1790 } 1791 1792 static unsigned long nvhe_percpu_size(void) 1793 { 1794 return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - 1795 (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start); 1796 } 1797 1798 static unsigned long nvhe_percpu_order(void) 1799 { 1800 unsigned long size = nvhe_percpu_size(); 1801 1802 return size ? get_order(size) : 0; 1803 } 1804 1805 /* A lookup table holding the hypervisor VA for each vector slot */ 1806 static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; 1807 1808 static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) 1809 { 1810 hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot); 1811 } 1812 1813 static int kvm_init_vector_slots(void) 1814 { 1815 int err; 1816 void *base; 1817 1818 base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); 1819 kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); 1820 1821 base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); 1822 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); 1823 1824 if (kvm_system_needs_idmapped_vectors() && 1825 !is_protected_kvm_enabled()) { 1826 err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), 1827 __BP_HARDEN_HYP_VECS_SZ, &base); 1828 if (err) 1829 return err; 1830 } 1831 1832 kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); 1833 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); 1834 return 0; 1835 } 1836 1837 static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) 1838 { 1839 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); 1840 unsigned long tcr; 1841 1842 /* 1843 * Calculate the raw per-cpu offset without a translation from the 1844 * kernel's mapping to the linear mapping, and store it in tpidr_el2 1845 * so that we can use adr_l to access per-cpu variables in EL2. 1846 * Also drop the KASAN tag which gets in the way... 1847 */ 1848 params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) - 1849 (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); 1850 1851 params->mair_el2 = read_sysreg(mair_el1); 1852 1853 tcr = read_sysreg(tcr_el1); 1854 if (cpus_have_final_cap(ARM64_KVM_HVHE)) { 1855 tcr |= TCR_EPD1_MASK; 1856 } else { 1857 tcr &= TCR_EL2_MASK; 1858 tcr |= TCR_EL2_RES1; 1859 } 1860 tcr &= ~TCR_T0SZ_MASK; 1861 tcr |= TCR_T0SZ(hyp_va_bits); 1862 params->tcr_el2 = tcr; 1863 1864 params->pgd_pa = kvm_mmu_get_httbr(); 1865 if (is_protected_kvm_enabled()) 1866 params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; 1867 else 1868 params->hcr_el2 = HCR_HOST_NVHE_FLAGS; 1869 if (cpus_have_final_cap(ARM64_KVM_HVHE)) 1870 params->hcr_el2 |= HCR_E2H; 1871 params->vttbr = params->vtcr = 0; 1872 1873 /* 1874 * Flush the init params from the data cache because the struct will 1875 * be read while the MMU is off. 1876 */ 1877 kvm_flush_dcache_to_poc(params, sizeof(*params)); 1878 } 1879 1880 static void hyp_install_host_vector(void) 1881 { 1882 struct kvm_nvhe_init_params *params; 1883 struct arm_smccc_res res; 1884 1885 /* Switch from the HYP stub to our own HYP init vector */ 1886 __hyp_set_vectors(kvm_get_idmap_vector()); 1887 1888 /* 1889 * Call initialization code, and switch to the full blown HYP code. 1890 * If the cpucaps haven't been finalized yet, something has gone very 1891 * wrong, and hyp will crash and burn when it uses any 1892 * cpus_have_*_cap() wrapper. 1893 */ 1894 BUG_ON(!system_capabilities_finalized()); 1895 params = this_cpu_ptr_nvhe_sym(kvm_init_params); 1896 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); 1897 WARN_ON(res.a0 != SMCCC_RET_SUCCESS); 1898 } 1899 1900 static void cpu_init_hyp_mode(void) 1901 { 1902 hyp_install_host_vector(); 1903 1904 /* 1905 * Disabling SSBD on a non-VHE system requires us to enable SSBS 1906 * at EL2. 1907 */ 1908 if (this_cpu_has_cap(ARM64_SSBS) && 1909 arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) { 1910 kvm_call_hyp_nvhe(__kvm_enable_ssbs); 1911 } 1912 } 1913 1914 static void cpu_hyp_reset(void) 1915 { 1916 if (!is_kernel_in_hyp_mode()) 1917 __hyp_reset_vectors(); 1918 } 1919 1920 /* 1921 * EL2 vectors can be mapped and rerouted in a number of ways, 1922 * depending on the kernel configuration and CPU present: 1923 * 1924 * - If the CPU is affected by Spectre-v2, the hardening sequence is 1925 * placed in one of the vector slots, which is executed before jumping 1926 * to the real vectors. 1927 * 1928 * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot 1929 * containing the hardening sequence is mapped next to the idmap page, 1930 * and executed before jumping to the real vectors. 1931 * 1932 * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an 1933 * empty slot is selected, mapped next to the idmap page, and 1934 * executed before jumping to the real vectors. 1935 * 1936 * Note that ARM64_SPECTRE_V3A is somewhat incompatible with 1937 * VHE, as we don't have hypervisor-specific mappings. If the system 1938 * is VHE and yet selects this capability, it will be ignored. 1939 */ 1940 static void cpu_set_hyp_vector(void) 1941 { 1942 struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); 1943 void *vector = hyp_spectre_vector_selector[data->slot]; 1944 1945 if (!is_protected_kvm_enabled()) 1946 *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; 1947 else 1948 kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); 1949 } 1950 1951 static void cpu_hyp_init_context(void) 1952 { 1953 kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); 1954 1955 if (!is_kernel_in_hyp_mode()) 1956 cpu_init_hyp_mode(); 1957 } 1958 1959 static void cpu_hyp_init_features(void) 1960 { 1961 cpu_set_hyp_vector(); 1962 kvm_arm_init_debug(); 1963 1964 if (is_kernel_in_hyp_mode()) 1965 kvm_timer_init_vhe(); 1966 1967 if (vgic_present) 1968 kvm_vgic_init_cpu_hardware(); 1969 } 1970 1971 static void cpu_hyp_reinit(void) 1972 { 1973 cpu_hyp_reset(); 1974 cpu_hyp_init_context(); 1975 cpu_hyp_init_features(); 1976 } 1977 1978 static void cpu_hyp_init(void *discard) 1979 { 1980 if (!__this_cpu_read(kvm_hyp_initialized)) { 1981 cpu_hyp_reinit(); 1982 __this_cpu_write(kvm_hyp_initialized, 1); 1983 } 1984 } 1985 1986 static void cpu_hyp_uninit(void *discard) 1987 { 1988 if (__this_cpu_read(kvm_hyp_initialized)) { 1989 cpu_hyp_reset(); 1990 __this_cpu_write(kvm_hyp_initialized, 0); 1991 } 1992 } 1993 1994 int kvm_arch_hardware_enable(void) 1995 { 1996 /* 1997 * Most calls to this function are made with migration 1998 * disabled, but not with preemption disabled. The former is 1999 * enough to ensure correctness, but most of the helpers 2000 * expect the later and will throw a tantrum otherwise. 2001 */ 2002 preempt_disable(); 2003 2004 cpu_hyp_init(NULL); 2005 2006 kvm_vgic_cpu_up(); 2007 kvm_timer_cpu_up(); 2008 2009 preempt_enable(); 2010 2011 return 0; 2012 } 2013 2014 void kvm_arch_hardware_disable(void) 2015 { 2016 kvm_timer_cpu_down(); 2017 kvm_vgic_cpu_down(); 2018 2019 if (!is_protected_kvm_enabled()) 2020 cpu_hyp_uninit(NULL); 2021 } 2022 2023 #ifdef CONFIG_CPU_PM 2024 static int hyp_init_cpu_pm_notifier(struct notifier_block *self, 2025 unsigned long cmd, 2026 void *v) 2027 { 2028 /* 2029 * kvm_hyp_initialized is left with its old value over 2030 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should 2031 * re-enable hyp. 2032 */ 2033 switch (cmd) { 2034 case CPU_PM_ENTER: 2035 if (__this_cpu_read(kvm_hyp_initialized)) 2036 /* 2037 * don't update kvm_hyp_initialized here 2038 * so that the hyp will be re-enabled 2039 * when we resume. See below. 2040 */ 2041 cpu_hyp_reset(); 2042 2043 return NOTIFY_OK; 2044 case CPU_PM_ENTER_FAILED: 2045 case CPU_PM_EXIT: 2046 if (__this_cpu_read(kvm_hyp_initialized)) 2047 /* The hyp was enabled before suspend. */ 2048 cpu_hyp_reinit(); 2049 2050 return NOTIFY_OK; 2051 2052 default: 2053 return NOTIFY_DONE; 2054 } 2055 } 2056 2057 static struct notifier_block hyp_init_cpu_pm_nb = { 2058 .notifier_call = hyp_init_cpu_pm_notifier, 2059 }; 2060 2061 static void __init hyp_cpu_pm_init(void) 2062 { 2063 if (!is_protected_kvm_enabled()) 2064 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); 2065 } 2066 static void __init hyp_cpu_pm_exit(void) 2067 { 2068 if (!is_protected_kvm_enabled()) 2069 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); 2070 } 2071 #else 2072 static inline void __init hyp_cpu_pm_init(void) 2073 { 2074 } 2075 static inline void __init hyp_cpu_pm_exit(void) 2076 { 2077 } 2078 #endif 2079 2080 static void __init init_cpu_logical_map(void) 2081 { 2082 unsigned int cpu; 2083 2084 /* 2085 * Copy the MPIDR <-> logical CPU ID mapping to hyp. 2086 * Only copy the set of online CPUs whose features have been checked 2087 * against the finalized system capabilities. The hypervisor will not 2088 * allow any other CPUs from the `possible` set to boot. 2089 */ 2090 for_each_online_cpu(cpu) 2091 hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); 2092 } 2093 2094 #define init_psci_0_1_impl_state(config, what) \ 2095 config.psci_0_1_ ## what ## _implemented = psci_ops.what 2096 2097 static bool __init init_psci_relay(void) 2098 { 2099 /* 2100 * If PSCI has not been initialized, protected KVM cannot install 2101 * itself on newly booted CPUs. 2102 */ 2103 if (!psci_ops.get_version) { 2104 kvm_err("Cannot initialize protected mode without PSCI\n"); 2105 return false; 2106 } 2107 2108 kvm_host_psci_config.version = psci_ops.get_version(); 2109 kvm_host_psci_config.smccc_version = arm_smccc_get_version(); 2110 2111 if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { 2112 kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); 2113 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend); 2114 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on); 2115 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off); 2116 init_psci_0_1_impl_state(kvm_host_psci_config, migrate); 2117 } 2118 return true; 2119 } 2120 2121 static int __init init_subsystems(void) 2122 { 2123 int err = 0; 2124 2125 /* 2126 * Enable hardware so that subsystem initialisation can access EL2. 2127 */ 2128 on_each_cpu(cpu_hyp_init, NULL, 1); 2129 2130 /* 2131 * Register CPU lower-power notifier 2132 */ 2133 hyp_cpu_pm_init(); 2134 2135 /* 2136 * Init HYP view of VGIC 2137 */ 2138 err = kvm_vgic_hyp_init(); 2139 switch (err) { 2140 case 0: 2141 vgic_present = true; 2142 break; 2143 case -ENODEV: 2144 case -ENXIO: 2145 vgic_present = false; 2146 err = 0; 2147 break; 2148 default: 2149 goto out; 2150 } 2151 2152 /* 2153 * Init HYP architected timer support 2154 */ 2155 err = kvm_timer_hyp_init(vgic_present); 2156 if (err) 2157 goto out; 2158 2159 kvm_register_perf_callbacks(NULL); 2160 2161 out: 2162 if (err) 2163 hyp_cpu_pm_exit(); 2164 2165 if (err || !is_protected_kvm_enabled()) 2166 on_each_cpu(cpu_hyp_uninit, NULL, 1); 2167 2168 return err; 2169 } 2170 2171 static void __init teardown_subsystems(void) 2172 { 2173 kvm_unregister_perf_callbacks(); 2174 hyp_cpu_pm_exit(); 2175 } 2176 2177 static void __init teardown_hyp_mode(void) 2178 { 2179 int cpu; 2180 2181 free_hyp_pgds(); 2182 for_each_possible_cpu(cpu) { 2183 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); 2184 free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); 2185 } 2186 } 2187 2188 static int __init do_pkvm_init(u32 hyp_va_bits) 2189 { 2190 void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); 2191 int ret; 2192 2193 preempt_disable(); 2194 cpu_hyp_init_context(); 2195 ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, 2196 num_possible_cpus(), kern_hyp_va(per_cpu_base), 2197 hyp_va_bits); 2198 cpu_hyp_init_features(); 2199 2200 /* 2201 * The stub hypercalls are now disabled, so set our local flag to 2202 * prevent a later re-init attempt in kvm_arch_hardware_enable(). 2203 */ 2204 __this_cpu_write(kvm_hyp_initialized, 1); 2205 preempt_enable(); 2206 2207 return ret; 2208 } 2209 2210 static u64 get_hyp_id_aa64pfr0_el1(void) 2211 { 2212 /* 2213 * Track whether the system isn't affected by spectre/meltdown in the 2214 * hypervisor's view of id_aa64pfr0_el1, used for protected VMs. 2215 * Although this is per-CPU, we make it global for simplicity, e.g., not 2216 * to have to worry about vcpu migration. 2217 * 2218 * Unlike for non-protected VMs, userspace cannot override this for 2219 * protected VMs. 2220 */ 2221 u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); 2222 2223 val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | 2224 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); 2225 2226 val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), 2227 arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); 2228 val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), 2229 arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); 2230 2231 return val; 2232 } 2233 2234 static void kvm_hyp_init_symbols(void) 2235 { 2236 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = get_hyp_id_aa64pfr0_el1(); 2237 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); 2238 kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); 2239 kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); 2240 kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); 2241 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 2242 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 2243 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); 2244 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); 2245 kvm_nvhe_sym(__icache_flags) = __icache_flags; 2246 kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; 2247 } 2248 2249 static int __init kvm_hyp_init_protection(u32 hyp_va_bits) 2250 { 2251 void *addr = phys_to_virt(hyp_mem_base); 2252 int ret; 2253 2254 ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); 2255 if (ret) 2256 return ret; 2257 2258 ret = do_pkvm_init(hyp_va_bits); 2259 if (ret) 2260 return ret; 2261 2262 free_hyp_pgds(); 2263 2264 return 0; 2265 } 2266 2267 static void pkvm_hyp_init_ptrauth(void) 2268 { 2269 struct kvm_cpu_context *hyp_ctxt; 2270 int cpu; 2271 2272 for_each_possible_cpu(cpu) { 2273 hyp_ctxt = per_cpu_ptr_nvhe_sym(kvm_hyp_ctxt, cpu); 2274 hyp_ctxt->sys_regs[APIAKEYLO_EL1] = get_random_long(); 2275 hyp_ctxt->sys_regs[APIAKEYHI_EL1] = get_random_long(); 2276 hyp_ctxt->sys_regs[APIBKEYLO_EL1] = get_random_long(); 2277 hyp_ctxt->sys_regs[APIBKEYHI_EL1] = get_random_long(); 2278 hyp_ctxt->sys_regs[APDAKEYLO_EL1] = get_random_long(); 2279 hyp_ctxt->sys_regs[APDAKEYHI_EL1] = get_random_long(); 2280 hyp_ctxt->sys_regs[APDBKEYLO_EL1] = get_random_long(); 2281 hyp_ctxt->sys_regs[APDBKEYHI_EL1] = get_random_long(); 2282 hyp_ctxt->sys_regs[APGAKEYLO_EL1] = get_random_long(); 2283 hyp_ctxt->sys_regs[APGAKEYHI_EL1] = get_random_long(); 2284 } 2285 } 2286 2287 /* Inits Hyp-mode on all online CPUs */ 2288 static int __init init_hyp_mode(void) 2289 { 2290 u32 hyp_va_bits; 2291 int cpu; 2292 int err = -ENOMEM; 2293 2294 /* 2295 * The protected Hyp-mode cannot be initialized if the memory pool 2296 * allocation has failed. 2297 */ 2298 if (is_protected_kvm_enabled() && !hyp_mem_base) 2299 goto out_err; 2300 2301 /* 2302 * Allocate Hyp PGD and setup Hyp identity mapping 2303 */ 2304 err = kvm_mmu_init(&hyp_va_bits); 2305 if (err) 2306 goto out_err; 2307 2308 /* 2309 * Allocate stack pages for Hypervisor-mode 2310 */ 2311 for_each_possible_cpu(cpu) { 2312 unsigned long stack_page; 2313 2314 stack_page = __get_free_page(GFP_KERNEL); 2315 if (!stack_page) { 2316 err = -ENOMEM; 2317 goto out_err; 2318 } 2319 2320 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; 2321 } 2322 2323 /* 2324 * Allocate and initialize pages for Hypervisor-mode percpu regions. 2325 */ 2326 for_each_possible_cpu(cpu) { 2327 struct page *page; 2328 void *page_addr; 2329 2330 page = alloc_pages(GFP_KERNEL, nvhe_percpu_order()); 2331 if (!page) { 2332 err = -ENOMEM; 2333 goto out_err; 2334 } 2335 2336 page_addr = page_address(page); 2337 memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size()); 2338 kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr; 2339 } 2340 2341 /* 2342 * Map the Hyp-code called directly from the host 2343 */ 2344 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), 2345 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); 2346 if (err) { 2347 kvm_err("Cannot map world-switch code\n"); 2348 goto out_err; 2349 } 2350 2351 err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start), 2352 kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO); 2353 if (err) { 2354 kvm_err("Cannot map .hyp.rodata section\n"); 2355 goto out_err; 2356 } 2357 2358 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), 2359 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); 2360 if (err) { 2361 kvm_err("Cannot map rodata section\n"); 2362 goto out_err; 2363 } 2364 2365 /* 2366 * .hyp.bss is guaranteed to be placed at the beginning of the .bss 2367 * section thanks to an assertion in the linker script. Map it RW and 2368 * the rest of .bss RO. 2369 */ 2370 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start), 2371 kvm_ksym_ref(__hyp_bss_end), PAGE_HYP); 2372 if (err) { 2373 kvm_err("Cannot map hyp bss section: %d\n", err); 2374 goto out_err; 2375 } 2376 2377 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end), 2378 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); 2379 if (err) { 2380 kvm_err("Cannot map bss section\n"); 2381 goto out_err; 2382 } 2383 2384 /* 2385 * Map the Hyp stack pages 2386 */ 2387 for_each_possible_cpu(cpu) { 2388 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); 2389 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); 2390 2391 err = create_hyp_stack(__pa(stack_page), ¶ms->stack_hyp_va); 2392 if (err) { 2393 kvm_err("Cannot map hyp stack\n"); 2394 goto out_err; 2395 } 2396 2397 /* 2398 * Save the stack PA in nvhe_init_params. This will be needed 2399 * to recreate the stack mapping in protected nVHE mode. 2400 * __hyp_pa() won't do the right thing there, since the stack 2401 * has been mapped in the flexible private VA space. 2402 */ 2403 params->stack_pa = __pa(stack_page); 2404 } 2405 2406 for_each_possible_cpu(cpu) { 2407 char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; 2408 char *percpu_end = percpu_begin + nvhe_percpu_size(); 2409 2410 /* Map Hyp percpu pages */ 2411 err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP); 2412 if (err) { 2413 kvm_err("Cannot map hyp percpu region\n"); 2414 goto out_err; 2415 } 2416 2417 /* Prepare the CPU initialization parameters */ 2418 cpu_prepare_hyp_mode(cpu, hyp_va_bits); 2419 } 2420 2421 kvm_hyp_init_symbols(); 2422 2423 if (is_protected_kvm_enabled()) { 2424 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && 2425 cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH)) 2426 pkvm_hyp_init_ptrauth(); 2427 2428 init_cpu_logical_map(); 2429 2430 if (!init_psci_relay()) { 2431 err = -ENODEV; 2432 goto out_err; 2433 } 2434 2435 err = kvm_hyp_init_protection(hyp_va_bits); 2436 if (err) { 2437 kvm_err("Failed to init hyp memory protection\n"); 2438 goto out_err; 2439 } 2440 } 2441 2442 return 0; 2443 2444 out_err: 2445 teardown_hyp_mode(); 2446 kvm_err("error initializing Hyp mode: %d\n", err); 2447 return err; 2448 } 2449 2450 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) 2451 { 2452 struct kvm_vcpu *vcpu; 2453 unsigned long i; 2454 2455 mpidr &= MPIDR_HWID_BITMASK; 2456 2457 if (kvm->arch.mpidr_data) { 2458 u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr); 2459 2460 vcpu = kvm_get_vcpu(kvm, 2461 kvm->arch.mpidr_data->cmpidr_to_idx[idx]); 2462 if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu)) 2463 vcpu = NULL; 2464 2465 return vcpu; 2466 } 2467 2468 kvm_for_each_vcpu(i, vcpu, kvm) { 2469 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) 2470 return vcpu; 2471 } 2472 return NULL; 2473 } 2474 2475 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) 2476 { 2477 return irqchip_in_kernel(kvm); 2478 } 2479 2480 bool kvm_arch_has_irq_bypass(void) 2481 { 2482 return true; 2483 } 2484 2485 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, 2486 struct irq_bypass_producer *prod) 2487 { 2488 struct kvm_kernel_irqfd *irqfd = 2489 container_of(cons, struct kvm_kernel_irqfd, consumer); 2490 2491 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, 2492 &irqfd->irq_entry); 2493 } 2494 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, 2495 struct irq_bypass_producer *prod) 2496 { 2497 struct kvm_kernel_irqfd *irqfd = 2498 container_of(cons, struct kvm_kernel_irqfd, consumer); 2499 2500 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, 2501 &irqfd->irq_entry); 2502 } 2503 2504 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) 2505 { 2506 struct kvm_kernel_irqfd *irqfd = 2507 container_of(cons, struct kvm_kernel_irqfd, consumer); 2508 2509 kvm_arm_halt_guest(irqfd->kvm); 2510 } 2511 2512 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) 2513 { 2514 struct kvm_kernel_irqfd *irqfd = 2515 container_of(cons, struct kvm_kernel_irqfd, consumer); 2516 2517 kvm_arm_resume_guest(irqfd->kvm); 2518 } 2519 2520 /* Initialize Hyp-mode and memory mappings on all CPUs */ 2521 static __init int kvm_arm_init(void) 2522 { 2523 int err; 2524 bool in_hyp_mode; 2525 2526 if (!is_hyp_mode_available()) { 2527 kvm_info("HYP mode not available\n"); 2528 return -ENODEV; 2529 } 2530 2531 if (kvm_get_mode() == KVM_MODE_NONE) { 2532 kvm_info("KVM disabled from command line\n"); 2533 return -ENODEV; 2534 } 2535 2536 err = kvm_sys_reg_table_init(); 2537 if (err) { 2538 kvm_info("Error initializing system register tables"); 2539 return err; 2540 } 2541 2542 in_hyp_mode = is_kernel_in_hyp_mode(); 2543 2544 if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || 2545 cpus_have_final_cap(ARM64_WORKAROUND_1508412)) 2546 kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ 2547 "Only trusted guests should be used on this system.\n"); 2548 2549 err = kvm_set_ipa_limit(); 2550 if (err) 2551 return err; 2552 2553 err = kvm_arm_init_sve(); 2554 if (err) 2555 return err; 2556 2557 err = kvm_arm_vmid_alloc_init(); 2558 if (err) { 2559 kvm_err("Failed to initialize VMID allocator.\n"); 2560 return err; 2561 } 2562 2563 if (!in_hyp_mode) { 2564 err = init_hyp_mode(); 2565 if (err) 2566 goto out_err; 2567 } 2568 2569 err = kvm_init_vector_slots(); 2570 if (err) { 2571 kvm_err("Cannot initialise vector slots\n"); 2572 goto out_hyp; 2573 } 2574 2575 err = init_subsystems(); 2576 if (err) 2577 goto out_hyp; 2578 2579 if (is_protected_kvm_enabled()) { 2580 kvm_info("Protected nVHE mode initialized successfully\n"); 2581 } else if (in_hyp_mode) { 2582 kvm_info("VHE mode initialized successfully\n"); 2583 } else { 2584 kvm_info("Hyp mode initialized successfully\n"); 2585 } 2586 2587 /* 2588 * FIXME: Do something reasonable if kvm_init() fails after pKVM 2589 * hypervisor protection is finalized. 2590 */ 2591 err = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); 2592 if (err) 2593 goto out_subs; 2594 2595 kvm_arm_initialised = true; 2596 2597 return 0; 2598 2599 out_subs: 2600 teardown_subsystems(); 2601 out_hyp: 2602 if (!in_hyp_mode) 2603 teardown_hyp_mode(); 2604 out_err: 2605 kvm_arm_vmid_alloc_free(); 2606 return err; 2607 } 2608 2609 static int __init early_kvm_mode_cfg(char *arg) 2610 { 2611 if (!arg) 2612 return -EINVAL; 2613 2614 if (strcmp(arg, "none") == 0) { 2615 kvm_mode = KVM_MODE_NONE; 2616 return 0; 2617 } 2618 2619 if (!is_hyp_mode_available()) { 2620 pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n"); 2621 return 0; 2622 } 2623 2624 if (strcmp(arg, "protected") == 0) { 2625 if (!is_kernel_in_hyp_mode()) 2626 kvm_mode = KVM_MODE_PROTECTED; 2627 else 2628 pr_warn_once("Protected KVM not available with VHE\n"); 2629 2630 return 0; 2631 } 2632 2633 if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) { 2634 kvm_mode = KVM_MODE_DEFAULT; 2635 return 0; 2636 } 2637 2638 if (strcmp(arg, "nested") == 0 && !WARN_ON(!is_kernel_in_hyp_mode())) { 2639 kvm_mode = KVM_MODE_NV; 2640 return 0; 2641 } 2642 2643 return -EINVAL; 2644 } 2645 early_param("kvm-arm.mode", early_kvm_mode_cfg); 2646 2647 enum kvm_mode kvm_get_mode(void) 2648 { 2649 return kvm_mode; 2650 } 2651 2652 module_init(kvm_arm_init); 2653