1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University 4 * Author: Christoffer Dall <c.dall@virtualopensystems.com> 5 */ 6 7 #include <linux/arm-smccc.h> 8 #include <linux/bug.h> 9 #include <linux/cpu_pm.h> 10 #include <linux/errno.h> 11 #include <linux/err.h> 12 #include <linux/kvm_host.h> 13 #include <linux/list.h> 14 #include <linux/module.h> 15 #include <linux/vmalloc.h> 16 #include <linux/fs.h> 17 #include <linux/mman.h> 18 #include <linux/sched.h> 19 #include <linux/kvm.h> 20 #include <linux/kvm_irqfd.h> 21 #include <linux/irqbypass.h> 22 #include <linux/sched/stat.h> 23 #include <linux/psci.h> 24 #include <trace/events/kvm.h> 25 26 #define CREATE_TRACE_POINTS 27 #include "trace_arm.h" 28 #include "hyp_trace.h" 29 30 #include <linux/uaccess.h> 31 #include <asm/ptrace.h> 32 #include <asm/mman.h> 33 #include <asm/tlbflush.h> 34 #include <asm/cacheflush.h> 35 #include <asm/cpufeature.h> 36 #include <asm/virt.h> 37 #include <asm/kvm_arm.h> 38 #include <asm/kvm_asm.h> 39 #include <asm/kvm_emulate.h> 40 #include <asm/kvm_hyp.h> 41 #include <asm/kvm_mmu.h> 42 #include <asm/kvm_nested.h> 43 #include <asm/kvm_pkvm.h> 44 #include <asm/kvm_ptrauth.h> 45 #include <asm/sections.h> 46 #include <asm/stacktrace/nvhe.h> 47 48 #include <kvm/arm_hypercalls.h> 49 #include <kvm/arm_pmu.h> 50 #include <kvm/arm_psci.h> 51 #include <kvm/arm_vgic.h> 52 53 #include <linux/irqchip/arm-gic-v5.h> 54 55 #include "vgic/vgic.h" 56 #include "sys_regs.h" 57 58 static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; 59 60 enum kvm_wfx_trap_policy { 61 KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */ 62 KVM_WFX_NOTRAP, 63 KVM_WFX_TRAP, 64 }; 65 66 static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; 67 static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; 68 69 /* 70 * Tracks KVM IOCTLs and their associated KVM capabilities. 71 */ 72 struct kvm_ioctl_cap_map { 73 unsigned int ioctl; 74 long ext; 75 }; 76 77 /* Make KVM_CAP_NR_VCPUS the reference for features we always supported */ 78 #define KVM_CAP_ARM_BASIC KVM_CAP_NR_VCPUS 79 80 /* 81 * Sorted by ioctl to allow for potential binary search, 82 * though linear scan is sufficient for this size. 83 */ 84 static const struct kvm_ioctl_cap_map vm_ioctl_caps[] = { 85 { KVM_CREATE_IRQCHIP, KVM_CAP_IRQCHIP }, 86 { KVM_ARM_SET_DEVICE_ADDR, KVM_CAP_ARM_SET_DEVICE_ADDR }, 87 { KVM_ARM_MTE_COPY_TAGS, KVM_CAP_ARM_MTE }, 88 { KVM_SET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, 89 { KVM_GET_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, 90 { KVM_HAS_DEVICE_ATTR, KVM_CAP_DEVICE_CTRL }, 91 { KVM_ARM_SET_COUNTER_OFFSET, KVM_CAP_COUNTER_OFFSET }, 92 { KVM_ARM_GET_REG_WRITABLE_MASKS, KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES }, 93 { KVM_ARM_PREFERRED_TARGET, KVM_CAP_ARM_BASIC }, 94 }; 95 96 /* 97 * Set *ext to the capability. 98 * Return 0 if found, or -EINVAL if no IOCTL matches. 99 */ 100 long kvm_get_cap_for_kvm_ioctl(unsigned int ioctl, long *ext) 101 { 102 int i; 103 104 for (i = 0; i < ARRAY_SIZE(vm_ioctl_caps); i++) { 105 if (vm_ioctl_caps[i].ioctl == ioctl) { 106 *ext = vm_ioctl_caps[i].ext; 107 return 0; 108 } 109 } 110 111 return -EINVAL; 112 } 113 114 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); 115 116 DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base); 117 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); 118 119 DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); 120 121 static bool vgic_present, kvm_arm_initialised; 122 123 static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized); 124 125 bool is_kvm_arm_initialised(void) 126 { 127 return kvm_arm_initialised; 128 } 129 130 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 131 { 132 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; 133 } 134 135 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, 136 struct kvm_enable_cap *cap) 137 { 138 int r = -EINVAL; 139 140 if (cap->flags) 141 return -EINVAL; 142 143 if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, cap->cap)) 144 return -EINVAL; 145 146 switch (cap->cap) { 147 case KVM_CAP_ARM_NISV_TO_USER: 148 r = 0; 149 set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, 150 &kvm->arch.flags); 151 break; 152 case KVM_CAP_ARM_MTE: 153 mutex_lock(&kvm->lock); 154 if (system_supports_mte() && !kvm->created_vcpus) { 155 r = 0; 156 set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); 157 } 158 mutex_unlock(&kvm->lock); 159 break; 160 case KVM_CAP_ARM_SYSTEM_SUSPEND: 161 r = 0; 162 set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags); 163 break; 164 case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: 165 mutex_lock(&kvm->slots_lock); 166 /* 167 * To keep things simple, allow changing the chunk 168 * size only when no memory slots have been created. 169 */ 170 if (kvm_are_all_memslots_empty(kvm)) { 171 u64 new_cap = cap->args[0]; 172 173 if (!new_cap || kvm_is_block_size_supported(new_cap)) { 174 r = 0; 175 kvm->arch.mmu.split_page_chunk_size = new_cap; 176 } 177 } 178 mutex_unlock(&kvm->slots_lock); 179 break; 180 case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: 181 mutex_lock(&kvm->lock); 182 if (!kvm->created_vcpus) { 183 r = 0; 184 set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags); 185 } 186 mutex_unlock(&kvm->lock); 187 break; 188 case KVM_CAP_ARM_SEA_TO_USER: 189 r = 0; 190 set_bit(KVM_ARCH_FLAG_EXIT_SEA, &kvm->arch.flags); 191 break; 192 default: 193 break; 194 } 195 196 return r; 197 } 198 199 static int kvm_arm_default_max_vcpus(void) 200 { 201 return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; 202 } 203 204 /** 205 * kvm_arch_init_vm - initializes a VM data structure 206 * @kvm: pointer to the KVM struct 207 * @type: kvm device type 208 */ 209 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 210 { 211 int ret; 212 213 if (type & ~KVM_VM_TYPE_ARM_MASK) 214 return -EINVAL; 215 216 mutex_init(&kvm->arch.config_lock); 217 218 #ifdef CONFIG_LOCKDEP 219 /* Clue in lockdep that the config_lock must be taken inside kvm->lock */ 220 mutex_lock(&kvm->lock); 221 mutex_lock(&kvm->arch.config_lock); 222 mutex_unlock(&kvm->arch.config_lock); 223 mutex_unlock(&kvm->lock); 224 #endif 225 226 kvm_init_nested(kvm); 227 228 ret = kvm_share_hyp(kvm, kvm + 1); 229 if (ret) 230 return ret; 231 232 if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { 233 ret = -ENOMEM; 234 goto err_unshare_kvm; 235 } 236 cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); 237 238 ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); 239 if (ret) 240 goto err_free_cpumask; 241 242 if (is_protected_kvm_enabled()) { 243 /* 244 * If any failures occur after this is successful, make sure to 245 * call __pkvm_unreserve_vm to unreserve the VM in hyp. 246 */ 247 ret = pkvm_init_host_vm(kvm, type); 248 if (ret) 249 goto err_uninit_mmu; 250 } else if (type & KVM_VM_TYPE_ARM_PROTECTED) { 251 ret = -EINVAL; 252 goto err_uninit_mmu; 253 } 254 255 kvm_vgic_early_init(kvm); 256 257 kvm_timer_init_vm(kvm); 258 259 /* The maximum number of VCPUs is limited by the host's GIC model */ 260 kvm->max_vcpus = kvm_arm_default_max_vcpus(); 261 262 kvm_arm_init_hypercalls(kvm); 263 264 bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); 265 266 return 0; 267 268 err_uninit_mmu: 269 kvm_uninit_stage2_mmu(kvm); 270 err_free_cpumask: 271 free_cpumask_var(kvm->arch.supported_cpus); 272 err_unshare_kvm: 273 kvm_unshare_hyp(kvm, kvm + 1); 274 return ret; 275 } 276 277 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 278 { 279 return VM_FAULT_SIGBUS; 280 } 281 282 void kvm_arch_create_vm_debugfs(struct kvm *kvm) 283 { 284 kvm_sys_regs_create_debugfs(kvm); 285 kvm_s2_ptdump_create_debugfs(kvm); 286 } 287 288 static void kvm_destroy_mpidr_data(struct kvm *kvm) 289 { 290 struct kvm_mpidr_data *data; 291 292 mutex_lock(&kvm->arch.config_lock); 293 294 data = rcu_dereference_protected(kvm->arch.mpidr_data, 295 lockdep_is_held(&kvm->arch.config_lock)); 296 if (data) { 297 rcu_assign_pointer(kvm->arch.mpidr_data, NULL); 298 synchronize_rcu(); 299 kfree(data); 300 } 301 302 mutex_unlock(&kvm->arch.config_lock); 303 } 304 305 /** 306 * kvm_arch_destroy_vm - destroy the VM data structure 307 * @kvm: pointer to the KVM struct 308 */ 309 void kvm_arch_destroy_vm(struct kvm *kvm) 310 { 311 bitmap_free(kvm->arch.pmu_filter); 312 free_cpumask_var(kvm->arch.supported_cpus); 313 314 kvm_vgic_destroy(kvm); 315 316 if (is_protected_kvm_enabled()) 317 pkvm_destroy_hyp_vm(kvm); 318 319 kvm_uninit_stage2_mmu(kvm); 320 kvm_destroy_mpidr_data(kvm); 321 322 kfree(kvm->arch.sysreg_masks); 323 kvm_destroy_vcpus(kvm); 324 325 kvm_unshare_hyp(kvm, kvm + 1); 326 327 kvm_arm_teardown_hypercalls(kvm); 328 } 329 330 static bool kvm_has_full_ptr_auth(void) 331 { 332 bool apa, gpa, api, gpi, apa3, gpa3; 333 u64 isar1, isar2, val; 334 335 /* 336 * Check that: 337 * 338 * - both Address and Generic auth are implemented for a given 339 * algorithm (Q5, IMPDEF or Q3) 340 * - only a single algorithm is implemented. 341 */ 342 if (!system_has_full_ptr_auth()) 343 return false; 344 345 isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); 346 isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); 347 348 apa = !!FIELD_GET(ID_AA64ISAR1_EL1_APA_MASK, isar1); 349 val = FIELD_GET(ID_AA64ISAR1_EL1_GPA_MASK, isar1); 350 gpa = (val == ID_AA64ISAR1_EL1_GPA_IMP); 351 352 api = !!FIELD_GET(ID_AA64ISAR1_EL1_API_MASK, isar1); 353 val = FIELD_GET(ID_AA64ISAR1_EL1_GPI_MASK, isar1); 354 gpi = (val == ID_AA64ISAR1_EL1_GPI_IMP); 355 356 apa3 = !!FIELD_GET(ID_AA64ISAR2_EL1_APA3_MASK, isar2); 357 val = FIELD_GET(ID_AA64ISAR2_EL1_GPA3_MASK, isar2); 358 gpa3 = (val == ID_AA64ISAR2_EL1_GPA3_IMP); 359 360 return (apa == gpa && api == gpi && apa3 == gpa3 && 361 (apa + api + apa3) == 1); 362 } 363 364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 365 { 366 int r; 367 368 if (is_protected_kvm_enabled() && !kvm_pkvm_ext_allowed(kvm, ext)) 369 return 0; 370 371 switch (ext) { 372 case KVM_CAP_IRQCHIP: 373 r = vgic_present; 374 break; 375 case KVM_CAP_IOEVENTFD: 376 case KVM_CAP_USER_MEMORY: 377 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: 378 case KVM_CAP_ONE_REG: 379 case KVM_CAP_ARM_PSCI: 380 case KVM_CAP_ARM_PSCI_0_2: 381 case KVM_CAP_READONLY_MEM: 382 case KVM_CAP_MP_STATE: 383 case KVM_CAP_IMMEDIATE_EXIT: 384 case KVM_CAP_VCPU_EVENTS: 385 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: 386 case KVM_CAP_ARM_NISV_TO_USER: 387 case KVM_CAP_ARM_INJECT_EXT_DABT: 388 case KVM_CAP_SET_GUEST_DEBUG: 389 case KVM_CAP_VCPU_ATTRIBUTES: 390 case KVM_CAP_PTP_KVM: 391 case KVM_CAP_ARM_SYSTEM_SUSPEND: 392 case KVM_CAP_IRQFD_RESAMPLE: 393 case KVM_CAP_COUNTER_OFFSET: 394 case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: 395 case KVM_CAP_ARM_SEA_TO_USER: 396 r = 1; 397 break; 398 case KVM_CAP_SET_GUEST_DEBUG2: 399 return KVM_GUESTDBG_VALID_MASK; 400 case KVM_CAP_ARM_SET_DEVICE_ADDR: 401 r = 1; 402 break; 403 case KVM_CAP_NR_VCPUS: 404 /* 405 * ARM64 treats KVM_CAP_NR_CPUS differently from all other 406 * architectures, as it does not always bound it to 407 * KVM_CAP_MAX_VCPUS. It should not matter much because 408 * this is just an advisory value. 409 */ 410 r = min_t(unsigned int, num_online_cpus(), 411 kvm_arm_default_max_vcpus()); 412 break; 413 case KVM_CAP_MAX_VCPUS: 414 case KVM_CAP_MAX_VCPU_ID: 415 if (kvm) 416 r = kvm->max_vcpus; 417 else 418 r = kvm_arm_default_max_vcpus(); 419 break; 420 case KVM_CAP_MSI_DEVID: 421 if (!kvm) 422 r = -EINVAL; 423 else 424 r = kvm->arch.vgic.msis_require_devid; 425 break; 426 case KVM_CAP_ARM_USER_IRQ: 427 /* 428 * 1: EL1_VTIMER, EL1_PTIMER, and PMU. 429 * (bump this number if adding more devices) 430 */ 431 r = 1; 432 break; 433 case KVM_CAP_ARM_MTE: 434 r = system_supports_mte(); 435 break; 436 case KVM_CAP_STEAL_TIME: 437 r = kvm_arm_pvtime_supported(); 438 break; 439 case KVM_CAP_ARM_EL1_32BIT: 440 r = cpus_have_final_cap(ARM64_HAS_32BIT_EL1); 441 break; 442 case KVM_CAP_ARM_EL2: 443 r = cpus_have_final_cap(ARM64_HAS_NESTED_VIRT); 444 break; 445 case KVM_CAP_ARM_EL2_E2H0: 446 r = cpus_have_final_cap(ARM64_HAS_HCR_NV1); 447 break; 448 case KVM_CAP_GUEST_DEBUG_HW_BPS: 449 r = get_num_brps(); 450 break; 451 case KVM_CAP_GUEST_DEBUG_HW_WPS: 452 r = get_num_wrps(); 453 break; 454 case KVM_CAP_ARM_PMU_V3: 455 r = kvm_supports_guest_pmuv3(); 456 break; 457 case KVM_CAP_ARM_INJECT_SERROR_ESR: 458 r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); 459 break; 460 case KVM_CAP_ARM_VM_IPA_SIZE: 461 r = get_kvm_ipa_limit(); 462 break; 463 case KVM_CAP_ARM_SVE: 464 r = system_supports_sve(); 465 break; 466 case KVM_CAP_ARM_PTRAUTH_ADDRESS: 467 case KVM_CAP_ARM_PTRAUTH_GENERIC: 468 r = kvm_has_full_ptr_auth(); 469 break; 470 case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: 471 if (kvm) 472 r = kvm->arch.mmu.split_page_chunk_size; 473 else 474 r = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT; 475 break; 476 case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: 477 r = kvm_supported_block_sizes(); 478 break; 479 case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES: 480 r = BIT(0); 481 break; 482 case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED: 483 if (!kvm) 484 r = -EINVAL; 485 else 486 r = kvm_supports_cacheable_pfnmap(); 487 break; 488 489 default: 490 r = 0; 491 } 492 493 return r; 494 } 495 496 long kvm_arch_dev_ioctl(struct file *filp, 497 unsigned int ioctl, unsigned long arg) 498 { 499 return -EINVAL; 500 } 501 502 struct kvm *kvm_arch_alloc_vm(void) 503 { 504 size_t sz = sizeof(struct kvm); 505 506 if (!has_vhe()) 507 return kzalloc(sz, GFP_KERNEL_ACCOUNT); 508 509 return kvzalloc(sz, GFP_KERNEL_ACCOUNT); 510 } 511 512 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) 513 { 514 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) 515 return -EBUSY; 516 517 if (id >= kvm->max_vcpus) 518 return -EINVAL; 519 520 return 0; 521 } 522 523 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 524 { 525 int err; 526 527 spin_lock_init(&vcpu->arch.mp_state_lock); 528 529 #ifdef CONFIG_LOCKDEP 530 /* Inform lockdep that the config_lock is acquired after vcpu->mutex */ 531 mutex_lock(&vcpu->mutex); 532 mutex_lock(&vcpu->kvm->arch.config_lock); 533 mutex_unlock(&vcpu->kvm->arch.config_lock); 534 mutex_unlock(&vcpu->mutex); 535 #endif 536 537 /* Force users to call KVM_ARM_VCPU_INIT */ 538 vcpu_clear_flag(vcpu, VCPU_INITIALIZED); 539 540 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; 541 542 /* Set up the timer */ 543 kvm_timer_vcpu_init(vcpu); 544 545 kvm_pmu_vcpu_init(vcpu); 546 547 kvm_arm_pvtime_vcpu_init(&vcpu->arch); 548 549 vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; 550 551 /* 552 * This vCPU may have been created after mpidr_data was initialized. 553 * Throw out the pre-computed mappings if that is the case which forces 554 * KVM to fall back to iteratively searching the vCPUs. 555 */ 556 kvm_destroy_mpidr_data(vcpu->kvm); 557 558 err = kvm_vgic_vcpu_init(vcpu); 559 if (err) { 560 kvm_vgic_vcpu_destroy(vcpu); 561 return err; 562 } 563 564 err = kvm_share_hyp(vcpu, vcpu + 1); 565 if (err) 566 kvm_vgic_vcpu_destroy(vcpu); 567 568 return err; 569 } 570 571 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 572 { 573 } 574 575 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 576 { 577 if (!is_protected_kvm_enabled()) 578 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); 579 else 580 free_hyp_memcache(&vcpu->arch.pkvm_memcache); 581 kvm_timer_vcpu_terminate(vcpu); 582 kvm_pmu_vcpu_destroy(vcpu); 583 kvm_vgic_vcpu_destroy(vcpu); 584 kvm_arm_vcpu_destroy(vcpu); 585 } 586 587 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) 588 { 589 590 } 591 592 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) 593 { 594 595 } 596 597 static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) 598 { 599 if (vcpu_has_ptrauth(vcpu) && !is_protected_kvm_enabled()) { 600 /* 601 * Either we're running an L2 guest, and the API/APK bits come 602 * from L1's HCR_EL2, or API/APK are both set. 603 */ 604 if (unlikely(is_nested_ctxt(vcpu))) { 605 u64 val; 606 607 val = __vcpu_sys_reg(vcpu, HCR_EL2); 608 val &= (HCR_API | HCR_APK); 609 vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); 610 vcpu->arch.hcr_el2 |= val; 611 } else { 612 vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); 613 } 614 615 /* 616 * Save the host keys if there is any chance for the guest 617 * to use pauth, as the entry code will reload the guest 618 * keys in that case. 619 */ 620 if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { 621 struct kvm_cpu_context *ctxt; 622 623 ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); 624 ptrauth_save_keys(ctxt); 625 } 626 } 627 } 628 629 static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu) 630 { 631 if (unlikely(kvm_wfi_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK)) 632 return kvm_wfi_trap_policy == KVM_WFX_NOTRAP; 633 634 if (vgic_is_v5(vcpu->kvm)) 635 return single_task_running(); 636 637 return single_task_running() && 638 vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 && 639 (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || 640 vcpu->kvm->arch.vgic.nassgireq); 641 } 642 643 static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu) 644 { 645 if (unlikely(kvm_wfe_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK)) 646 return kvm_wfe_trap_policy == KVM_WFX_NOTRAP; 647 648 return single_task_running(); 649 } 650 651 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 652 { 653 struct kvm_s2_mmu *mmu; 654 int *last_ran; 655 656 if (is_protected_kvm_enabled()) 657 goto nommu; 658 659 if (vcpu_has_nv(vcpu)) 660 kvm_vcpu_load_hw_mmu(vcpu); 661 662 mmu = vcpu->arch.hw_mmu; 663 last_ran = this_cpu_ptr(mmu->last_vcpu_ran); 664 665 /* 666 * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2, 667 * which happens eagerly in VHE. 668 * 669 * Also, the VMID allocator only preserves VMIDs that are active at the 670 * time of rollover, so KVM might need to grab a new VMID for the MMU if 671 * this is called from kvm_sched_in(). 672 */ 673 kvm_arm_vmid_update(&mmu->vmid); 674 675 /* 676 * We guarantee that both TLBs and I-cache are private to each 677 * vcpu. If detecting that a vcpu from the same VM has 678 * previously run on the same physical CPU, call into the 679 * hypervisor code to nuke the relevant contexts. 680 * 681 * We might get preempted before the vCPU actually runs, but 682 * over-invalidation doesn't affect correctness. 683 */ 684 if (*last_ran != vcpu->vcpu_idx) { 685 kvm_call_hyp(__kvm_flush_cpu_context, mmu); 686 *last_ran = vcpu->vcpu_idx; 687 } 688 689 nommu: 690 vcpu->cpu = cpu; 691 692 /* 693 * The timer must be loaded before the vgic to correctly set up physical 694 * interrupt deactivation in nested state (e.g. timer interrupt). 695 */ 696 kvm_timer_vcpu_load(vcpu); 697 kvm_vgic_load(vcpu); 698 kvm_vcpu_load_debug(vcpu); 699 kvm_vcpu_load_fgt(vcpu); 700 if (has_vhe()) 701 kvm_vcpu_load_vhe(vcpu); 702 kvm_arch_vcpu_load_fp(vcpu); 703 kvm_vcpu_pmu_restore_guest(vcpu); 704 if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) 705 kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); 706 707 if (kvm_vcpu_should_clear_twe(vcpu)) 708 vcpu->arch.hcr_el2 &= ~HCR_TWE; 709 else 710 vcpu->arch.hcr_el2 |= HCR_TWE; 711 712 if (kvm_vcpu_should_clear_twi(vcpu)) 713 vcpu->arch.hcr_el2 &= ~HCR_TWI; 714 else 715 vcpu->arch.hcr_el2 |= HCR_TWI; 716 717 vcpu_set_pauth_traps(vcpu); 718 719 if (is_protected_kvm_enabled()) { 720 kvm_call_hyp_nvhe(__pkvm_vcpu_load, 721 vcpu->kvm->arch.pkvm.handle, 722 vcpu->vcpu_idx, vcpu->arch.hcr_el2); 723 kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, 724 &vcpu->arch.vgic_cpu.vgic_v3); 725 } 726 727 if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus)) 728 vcpu_set_on_unsupported_cpu(vcpu); 729 730 vcpu->arch.pid = pid_nr(vcpu->pid); 731 } 732 733 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 734 { 735 if (is_protected_kvm_enabled()) { 736 kvm_call_hyp(__vgic_v3_save_aprs, &vcpu->arch.vgic_cpu.vgic_v3); 737 kvm_call_hyp_nvhe(__pkvm_vcpu_put); 738 } 739 740 kvm_vcpu_put_debug(vcpu); 741 kvm_arch_vcpu_put_fp(vcpu); 742 if (has_vhe()) 743 kvm_vcpu_put_vhe(vcpu); 744 kvm_timer_vcpu_put(vcpu); 745 kvm_vgic_put(vcpu); 746 kvm_vcpu_pmu_restore_host(vcpu); 747 if (vcpu_has_nv(vcpu)) 748 kvm_vcpu_put_hw_mmu(vcpu); 749 kvm_arm_vmid_clear_active(); 750 751 vcpu_clear_on_unsupported_cpu(vcpu); 752 vcpu->cpu = -1; 753 } 754 755 static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) 756 { 757 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); 758 kvm_make_request(KVM_REQ_SLEEP, vcpu); 759 kvm_vcpu_kick(vcpu); 760 } 761 762 void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) 763 { 764 spin_lock(&vcpu->arch.mp_state_lock); 765 __kvm_arm_vcpu_power_off(vcpu); 766 spin_unlock(&vcpu->arch.mp_state_lock); 767 } 768 769 bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) 770 { 771 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; 772 } 773 774 static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) 775 { 776 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED); 777 kvm_make_request(KVM_REQ_SUSPEND, vcpu); 778 kvm_vcpu_kick(vcpu); 779 } 780 781 static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) 782 { 783 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED; 784 } 785 786 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 787 struct kvm_mp_state *mp_state) 788 { 789 *mp_state = READ_ONCE(vcpu->arch.mp_state); 790 791 return 0; 792 } 793 794 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 795 struct kvm_mp_state *mp_state) 796 { 797 int ret = 0; 798 799 spin_lock(&vcpu->arch.mp_state_lock); 800 801 switch (mp_state->mp_state) { 802 case KVM_MP_STATE_RUNNABLE: 803 WRITE_ONCE(vcpu->arch.mp_state, *mp_state); 804 break; 805 case KVM_MP_STATE_STOPPED: 806 __kvm_arm_vcpu_power_off(vcpu); 807 break; 808 case KVM_MP_STATE_SUSPENDED: 809 kvm_arm_vcpu_suspend(vcpu); 810 break; 811 default: 812 ret = -EINVAL; 813 } 814 815 spin_unlock(&vcpu->arch.mp_state_lock); 816 817 return ret; 818 } 819 820 /** 821 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled 822 * @v: The VCPU pointer 823 * 824 * If the guest CPU is not waiting for interrupts or an interrupt line is 825 * asserted, the CPU is by definition runnable. 826 */ 827 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) 828 { 829 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF | HCR_VSE); 830 831 irq_lines |= (!irqchip_in_kernel(v->kvm) && 832 (kvm_timer_should_notify_user(v) || 833 kvm_pmu_should_notify_user(v))); 834 835 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) 836 && !kvm_arm_vcpu_stopped(v) && !v->arch.pause); 837 } 838 839 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) 840 { 841 return vcpu_mode_priv(vcpu); 842 } 843 844 #ifdef CONFIG_GUEST_PERF_EVENTS 845 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) 846 { 847 return *vcpu_pc(vcpu); 848 } 849 #endif 850 851 static void kvm_init_mpidr_data(struct kvm *kvm) 852 { 853 struct kvm_mpidr_data *data = NULL; 854 unsigned long c, mask, nr_entries; 855 u64 aff_set = 0, aff_clr = ~0UL; 856 struct kvm_vcpu *vcpu; 857 858 mutex_lock(&kvm->arch.config_lock); 859 860 if (rcu_access_pointer(kvm->arch.mpidr_data) || 861 atomic_read(&kvm->online_vcpus) == 1) 862 goto out; 863 864 kvm_for_each_vcpu(c, vcpu, kvm) { 865 u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); 866 aff_set |= aff; 867 aff_clr &= aff; 868 } 869 870 /* 871 * A significant bit can be either 0 or 1, and will only appear in 872 * aff_set. Use aff_clr to weed out the useless stuff. 873 */ 874 mask = aff_set ^ aff_clr; 875 nr_entries = BIT_ULL(hweight_long(mask)); 876 877 /* 878 * Don't let userspace fool us. If we need more than a single page 879 * to describe the compressed MPIDR array, just fall back to the 880 * iterative method. Single vcpu VMs do not need this either. 881 */ 882 if (struct_size(data, cmpidr_to_idx, nr_entries) <= PAGE_SIZE) 883 data = kzalloc_flex(*data, cmpidr_to_idx, nr_entries, 884 GFP_KERNEL_ACCOUNT); 885 886 if (!data) 887 goto out; 888 889 data->mpidr_mask = mask; 890 891 kvm_for_each_vcpu(c, vcpu, kvm) { 892 u64 aff = kvm_vcpu_get_mpidr_aff(vcpu); 893 u16 index = kvm_mpidr_index(data, aff); 894 895 data->cmpidr_to_idx[index] = c; 896 } 897 898 rcu_assign_pointer(kvm->arch.mpidr_data, data); 899 out: 900 mutex_unlock(&kvm->arch.config_lock); 901 } 902 903 /* 904 * Handle both the initialisation that is being done when the vcpu is 905 * run for the first time, as well as the updates that must be 906 * performed each time we get a new thread dealing with this vcpu. 907 */ 908 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) 909 { 910 struct kvm *kvm = vcpu->kvm; 911 int ret; 912 913 if (!kvm_vcpu_initialized(vcpu)) 914 return -ENOEXEC; 915 916 if (!kvm_arm_vcpu_is_finalized(vcpu)) 917 return -EPERM; 918 919 if (likely(vcpu_has_run_once(vcpu))) 920 return 0; 921 922 kvm_init_mpidr_data(kvm); 923 924 if (likely(irqchip_in_kernel(kvm))) { 925 /* 926 * Map the VGIC hardware resources before running a vcpu the 927 * first time on this VM. 928 */ 929 ret = kvm_vgic_map_resources(kvm); 930 if (ret) 931 return ret; 932 } 933 934 ret = kvm_finalize_sys_regs(vcpu); 935 if (ret) 936 return ret; 937 938 if (vcpu_has_nv(vcpu)) { 939 ret = kvm_vcpu_allocate_vncr_tlb(vcpu); 940 if (ret) 941 return ret; 942 943 ret = kvm_vgic_vcpu_nv_init(vcpu); 944 if (ret) 945 return ret; 946 } 947 948 /* 949 * This needs to happen after any restriction has been applied 950 * to the feature set. 951 */ 952 kvm_calculate_traps(vcpu); 953 954 ret = kvm_timer_enable(vcpu); 955 if (ret) 956 return ret; 957 958 if (kvm_vcpu_has_pmu(vcpu)) { 959 ret = kvm_arm_pmu_v3_enable(vcpu); 960 if (ret) 961 return ret; 962 } 963 964 ret = vgic_v5_finalize_ppi_state(kvm); 965 if (ret) 966 return ret; 967 968 if (is_protected_kvm_enabled()) { 969 ret = pkvm_create_hyp_vm(kvm); 970 if (ret) 971 return ret; 972 973 ret = pkvm_create_hyp_vcpu(vcpu); 974 if (ret) 975 return ret; 976 } 977 978 mutex_lock(&kvm->arch.config_lock); 979 set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags); 980 mutex_unlock(&kvm->arch.config_lock); 981 982 return ret; 983 } 984 985 bool kvm_arch_intc_initialized(struct kvm *kvm) 986 { 987 return vgic_initialized(kvm); 988 } 989 990 void kvm_arm_halt_guest(struct kvm *kvm) 991 { 992 unsigned long i; 993 struct kvm_vcpu *vcpu; 994 995 kvm_for_each_vcpu(i, vcpu, kvm) 996 vcpu->arch.pause = true; 997 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); 998 } 999 1000 void kvm_arm_resume_guest(struct kvm *kvm) 1001 { 1002 unsigned long i; 1003 struct kvm_vcpu *vcpu; 1004 1005 kvm_for_each_vcpu(i, vcpu, kvm) { 1006 vcpu->arch.pause = false; 1007 __kvm_vcpu_wake_up(vcpu); 1008 } 1009 } 1010 1011 static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu) 1012 { 1013 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 1014 1015 rcuwait_wait_event(wait, 1016 (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), 1017 TASK_INTERRUPTIBLE); 1018 1019 if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) { 1020 /* Awaken to handle a signal, request we sleep again later. */ 1021 kvm_make_request(KVM_REQ_SLEEP, vcpu); 1022 } 1023 1024 /* 1025 * Make sure we will observe a potential reset request if we've 1026 * observed a change to the power state. Pairs with the smp_wmb() in 1027 * kvm_psci_vcpu_on(). 1028 */ 1029 smp_rmb(); 1030 } 1031 1032 /** 1033 * kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior 1034 * @vcpu: The VCPU pointer 1035 * 1036 * Suspend execution of a vCPU until a valid wake event is detected, i.e. until 1037 * the vCPU is runnable. The vCPU may or may not be scheduled out, depending 1038 * on when a wake event arrives, e.g. there may already be a pending wake event. 1039 */ 1040 void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) 1041 { 1042 /* 1043 * Sync back the state of the GIC CPU interface so that we have 1044 * the latest PMR and group enables. This ensures that 1045 * kvm_arch_vcpu_runnable has up-to-date data to decide whether 1046 * we have pending interrupts, e.g. when determining if the 1047 * vCPU should block. 1048 * 1049 * For the same reason, we want to tell GICv4 that we need 1050 * doorbells to be signalled, should an interrupt become pending. 1051 */ 1052 preempt_disable(); 1053 vcpu_set_flag(vcpu, IN_WFI); 1054 kvm_vgic_put(vcpu); 1055 preempt_enable(); 1056 1057 kvm_vcpu_halt(vcpu); 1058 vcpu_clear_flag(vcpu, IN_WFIT); 1059 1060 preempt_disable(); 1061 vcpu_clear_flag(vcpu, IN_WFI); 1062 kvm_vgic_load(vcpu); 1063 preempt_enable(); 1064 } 1065 1066 static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu) 1067 { 1068 if (!kvm_arm_vcpu_suspended(vcpu)) 1069 return 1; 1070 1071 kvm_vcpu_wfi(vcpu); 1072 1073 /* 1074 * The suspend state is sticky; we do not leave it until userspace 1075 * explicitly marks the vCPU as runnable. Request that we suspend again 1076 * later. 1077 */ 1078 kvm_make_request(KVM_REQ_SUSPEND, vcpu); 1079 1080 /* 1081 * Check to make sure the vCPU is actually runnable. If so, exit to 1082 * userspace informing it of the wakeup condition. 1083 */ 1084 if (kvm_arch_vcpu_runnable(vcpu)) { 1085 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); 1086 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP; 1087 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 1088 return 0; 1089 } 1090 1091 /* 1092 * Otherwise, we were unblocked to process a different event, such as a 1093 * pending signal. Return 1 and allow kvm_arch_vcpu_ioctl_run() to 1094 * process the event. 1095 */ 1096 return 1; 1097 } 1098 1099 /** 1100 * check_vcpu_requests - check and handle pending vCPU requests 1101 * @vcpu: the VCPU pointer 1102 * 1103 * Return: 1 if we should enter the guest 1104 * 0 if we should exit to userspace 1105 * < 0 if we should exit to userspace, where the return value indicates 1106 * an error 1107 */ 1108 static int check_vcpu_requests(struct kvm_vcpu *vcpu) 1109 { 1110 if (kvm_request_pending(vcpu)) { 1111 if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) 1112 return -EIO; 1113 1114 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) 1115 kvm_vcpu_sleep(vcpu); 1116 1117 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 1118 kvm_reset_vcpu(vcpu); 1119 1120 /* 1121 * Clear IRQ_PENDING requests that were made to guarantee 1122 * that a VCPU sees new virtual interrupts. 1123 */ 1124 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); 1125 1126 /* Process interrupts deactivated through a trap */ 1127 if (kvm_check_request(KVM_REQ_VGIC_PROCESS_UPDATE, vcpu)) 1128 kvm_vgic_process_async_update(vcpu); 1129 1130 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) 1131 kvm_update_stolen_time(vcpu); 1132 1133 if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) { 1134 /* The distributor enable bits were changed */ 1135 preempt_disable(); 1136 vgic_v4_put(vcpu); 1137 vgic_v4_load(vcpu); 1138 preempt_enable(); 1139 } 1140 1141 if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) 1142 kvm_vcpu_reload_pmu(vcpu); 1143 1144 if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu)) 1145 kvm_vcpu_pmu_restore_guest(vcpu); 1146 1147 if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) 1148 return kvm_vcpu_suspend(vcpu); 1149 1150 if (kvm_dirty_ring_check_request(vcpu)) 1151 return 0; 1152 1153 check_nested_vcpu_requests(vcpu); 1154 } 1155 1156 return 1; 1157 } 1158 1159 static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) 1160 { 1161 if (likely(!vcpu_mode_is_32bit(vcpu))) 1162 return false; 1163 1164 if (vcpu_has_nv(vcpu)) 1165 return true; 1166 1167 return !kvm_supports_32bit_el0(); 1168 } 1169 1170 static bool kvm_irq_update_run(struct kvm_vcpu *vcpu) 1171 { 1172 bool r; 1173 1174 r = kvm_timer_update_run(vcpu); 1175 r |= kvm_pmu_update_run(vcpu); 1176 return r; 1177 } 1178 1179 /** 1180 * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest 1181 * @vcpu: The VCPU pointer 1182 * @ret: Pointer to write optional return code 1183 * 1184 * Returns: true if the VCPU needs to return to a preemptible + interruptible 1185 * and skip guest entry. 1186 * 1187 * This function disambiguates between two different types of exits: exits to a 1188 * preemptible + interruptible kernel context and exits to userspace. For an 1189 * exit to userspace, this function will write the return code to ret and return 1190 * true. For an exit to preemptible + interruptible kernel context (i.e. check 1191 * for pending work and re-enter), return true without writing to ret. 1192 */ 1193 static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) 1194 { 1195 struct kvm_run *run = vcpu->run; 1196 1197 /* 1198 * If we're using a userspace irqchip, then check if we need 1199 * to tell a userspace irqchip about timer or PMU level 1200 * changes and if so, exit to userspace while updating the run 1201 * state. 1202 */ 1203 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { 1204 if (unlikely(kvm_irq_update_run(vcpu))) { 1205 *ret = -EINTR; 1206 run->exit_reason = KVM_EXIT_INTR; 1207 return true; 1208 } 1209 } 1210 1211 if (unlikely(vcpu_on_unsupported_cpu(vcpu))) { 1212 run->exit_reason = KVM_EXIT_FAIL_ENTRY; 1213 run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED; 1214 run->fail_entry.cpu = smp_processor_id(); 1215 *ret = 0; 1216 return true; 1217 } 1218 1219 return kvm_request_pending(vcpu) || 1220 xfer_to_guest_mode_work_pending(); 1221 } 1222 1223 /* 1224 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while 1225 * the vCPU is running. 1226 * 1227 * This must be noinstr as instrumentation may make use of RCU, and this is not 1228 * safe during the EQS. 1229 */ 1230 static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu) 1231 { 1232 int ret; 1233 1234 guest_state_enter_irqoff(); 1235 ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); 1236 guest_state_exit_irqoff(); 1237 1238 return ret; 1239 } 1240 1241 /** 1242 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code 1243 * @vcpu: The VCPU pointer 1244 * 1245 * This function is called through the VCPU_RUN ioctl called from user space. It 1246 * will execute VM code in a loop until the time slice for the process is used 1247 * or some emulation is needed from user space in which case the function will 1248 * return with return value 0 and with the kvm_run structure filled in with the 1249 * required data for the requested emulation. 1250 */ 1251 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) 1252 { 1253 struct kvm_run *run = vcpu->run; 1254 int ret; 1255 1256 if (run->exit_reason == KVM_EXIT_MMIO) { 1257 ret = kvm_handle_mmio_return(vcpu); 1258 if (ret <= 0) 1259 return ret; 1260 } 1261 1262 vcpu_load(vcpu); 1263 1264 if (!vcpu->wants_to_run) { 1265 ret = -EINTR; 1266 goto out; 1267 } 1268 1269 kvm_sigset_activate(vcpu); 1270 1271 ret = 1; 1272 run->exit_reason = KVM_EXIT_UNKNOWN; 1273 run->flags = 0; 1274 while (ret > 0) { 1275 /* 1276 * Check conditions before entering the guest 1277 */ 1278 ret = kvm_xfer_to_guest_mode_handle_work(vcpu); 1279 if (!ret) 1280 ret = 1; 1281 1282 if (ret > 0) 1283 ret = check_vcpu_requests(vcpu); 1284 1285 /* 1286 * Preparing the interrupts to be injected also 1287 * involves poking the GIC, which must be done in a 1288 * non-preemptible context. 1289 */ 1290 preempt_disable(); 1291 1292 kvm_nested_flush_hwstate(vcpu); 1293 1294 if (kvm_vcpu_has_pmu(vcpu)) 1295 kvm_pmu_flush_hwstate(vcpu); 1296 1297 local_irq_disable(); 1298 1299 kvm_vgic_flush_hwstate(vcpu); 1300 1301 kvm_pmu_update_vcpu_events(vcpu); 1302 1303 /* 1304 * Ensure we set mode to IN_GUEST_MODE after we disable 1305 * interrupts and before the final VCPU requests check. 1306 * See the comment in kvm_vcpu_exiting_guest_mode() and 1307 * Documentation/virt/kvm/vcpu-requests.rst 1308 */ 1309 smp_store_mb(vcpu->mode, IN_GUEST_MODE); 1310 1311 if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) { 1312 vcpu->mode = OUTSIDE_GUEST_MODE; 1313 isb(); /* Ensure work in x_flush_hwstate is committed */ 1314 if (kvm_vcpu_has_pmu(vcpu)) 1315 kvm_pmu_sync_hwstate(vcpu); 1316 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1317 kvm_timer_sync_user(vcpu); 1318 kvm_vgic_sync_hwstate(vcpu); 1319 local_irq_enable(); 1320 preempt_enable(); 1321 continue; 1322 } 1323 1324 kvm_arch_vcpu_ctxflush_fp(vcpu); 1325 1326 /************************************************************** 1327 * Enter the guest 1328 */ 1329 trace_kvm_entry(*vcpu_pc(vcpu)); 1330 guest_timing_enter_irqoff(); 1331 1332 ret = kvm_arm_vcpu_enter_exit(vcpu); 1333 1334 vcpu->mode = OUTSIDE_GUEST_MODE; 1335 vcpu->stat.exits++; 1336 /* 1337 * Back from guest 1338 *************************************************************/ 1339 1340 /* 1341 * We must sync the PMU state before the vgic state so 1342 * that the vgic can properly sample the updated state of the 1343 * interrupt line. 1344 */ 1345 if (kvm_vcpu_has_pmu(vcpu)) 1346 kvm_pmu_sync_hwstate(vcpu); 1347 1348 /* 1349 * Sync the vgic state before syncing the timer state because 1350 * the timer code needs to know if the virtual timer 1351 * interrupts are active. 1352 */ 1353 kvm_vgic_sync_hwstate(vcpu); 1354 1355 /* 1356 * Sync the timer hardware state before enabling interrupts as 1357 * we don't want vtimer interrupts to race with syncing the 1358 * timer virtual interrupt state. 1359 */ 1360 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1361 kvm_timer_sync_user(vcpu); 1362 1363 if (is_hyp_ctxt(vcpu)) 1364 kvm_timer_sync_nested(vcpu); 1365 1366 kvm_arch_vcpu_ctxsync_fp(vcpu); 1367 1368 /* 1369 * We must ensure that any pending interrupts are taken before 1370 * we exit guest timing so that timer ticks are accounted as 1371 * guest time. Transiently unmask interrupts so that any 1372 * pending interrupts are taken. 1373 * 1374 * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other 1375 * context synchronization event) is necessary to ensure that 1376 * pending interrupts are taken. 1377 */ 1378 if (ARM_EXCEPTION_CODE(ret) == ARM_EXCEPTION_IRQ) { 1379 local_irq_enable(); 1380 isb(); 1381 local_irq_disable(); 1382 } 1383 1384 guest_timing_exit_irqoff(); 1385 1386 local_irq_enable(); 1387 1388 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); 1389 1390 /* Exit types that need handling before we can be preempted */ 1391 handle_exit_early(vcpu, ret); 1392 1393 kvm_nested_sync_hwstate(vcpu); 1394 1395 preempt_enable(); 1396 1397 /* 1398 * The ARMv8 architecture doesn't give the hypervisor 1399 * a mechanism to prevent a guest from dropping to AArch32 EL0 1400 * if implemented by the CPU. If we spot the guest in such 1401 * state and that we decided it wasn't supposed to do so (like 1402 * with the asymmetric AArch32 case), return to userspace with 1403 * a fatal error. 1404 */ 1405 if (vcpu_mode_is_bad_32bit(vcpu)) { 1406 /* 1407 * As we have caught the guest red-handed, decide that 1408 * it isn't fit for purpose anymore by making the vcpu 1409 * invalid. The VMM can try and fix it by issuing a 1410 * KVM_ARM_VCPU_INIT if it really wants to. 1411 */ 1412 vcpu_clear_flag(vcpu, VCPU_INITIALIZED); 1413 ret = ARM_EXCEPTION_IL; 1414 } 1415 1416 ret = handle_exit(vcpu, ret); 1417 } 1418 1419 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1420 kvm_irq_update_run(vcpu); 1421 1422 kvm_sigset_deactivate(vcpu); 1423 1424 out: 1425 /* 1426 * In the unlikely event that we are returning to userspace 1427 * with pending exceptions or PC adjustment, commit these 1428 * adjustments in order to give userspace a consistent view of 1429 * the vcpu state. Note that this relies on __kvm_adjust_pc() 1430 * being preempt-safe on VHE. 1431 */ 1432 if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) || 1433 vcpu_get_flag(vcpu, INCREMENT_PC))) 1434 kvm_call_hyp(__kvm_adjust_pc, vcpu); 1435 1436 vcpu_put(vcpu); 1437 return ret; 1438 } 1439 1440 static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) 1441 { 1442 int bit_index; 1443 bool set; 1444 unsigned long *hcr; 1445 1446 if (number == KVM_ARM_IRQ_CPU_IRQ) 1447 bit_index = __ffs(HCR_VI); 1448 else /* KVM_ARM_IRQ_CPU_FIQ */ 1449 bit_index = __ffs(HCR_VF); 1450 1451 hcr = vcpu_hcr(vcpu); 1452 if (level) 1453 set = test_and_set_bit(bit_index, hcr); 1454 else 1455 set = test_and_clear_bit(bit_index, hcr); 1456 1457 /* 1458 * If we didn't change anything, no need to wake up or kick other CPUs 1459 */ 1460 if (set == level) 1461 return 0; 1462 1463 /* 1464 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and 1465 * trigger a world-switch round on the running physical CPU to set the 1466 * virtual IRQ/FIQ fields in the HCR appropriately. 1467 */ 1468 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 1469 kvm_vcpu_kick(vcpu); 1470 1471 return 0; 1472 } 1473 1474 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, 1475 bool line_status) 1476 { 1477 unsigned int irq_type, vcpu_id, irq_num; 1478 struct kvm_vcpu *vcpu = NULL; 1479 bool level = irq_level->level; 1480 u32 irq = irq_level->irq; 1481 unsigned long *mask; 1482 1483 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; 1484 vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; 1485 vcpu_id += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); 1486 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; 1487 1488 trace_kvm_irq_line(irq_type, vcpu_id, irq_num, irq_level->level); 1489 1490 switch (irq_type) { 1491 case KVM_ARM_IRQ_TYPE_CPU: 1492 if (irqchip_in_kernel(kvm)) 1493 return -ENXIO; 1494 1495 vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); 1496 if (!vcpu) 1497 return -EINVAL; 1498 1499 if (irq_num > KVM_ARM_IRQ_CPU_FIQ) 1500 return -EINVAL; 1501 1502 return vcpu_interrupt_line(vcpu, irq_num, level); 1503 case KVM_ARM_IRQ_TYPE_PPI: 1504 if (irqchip_in_kernel(kvm)) { 1505 int ret = vgic_lazy_init(kvm); 1506 if (ret) 1507 return ret; 1508 } else { 1509 return -ENXIO; 1510 } 1511 1512 vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); 1513 if (!vcpu) 1514 return -EINVAL; 1515 1516 if (vgic_is_v5(kvm)) { 1517 if (irq_num >= VGIC_V5_NR_PRIVATE_IRQS) 1518 return -EINVAL; 1519 1520 /* 1521 * Only allow PPIs that are explicitly exposed to 1522 * usespace to be driven via KVM_IRQ_LINE 1523 */ 1524 mask = kvm->arch.vgic.gicv5_vm.userspace_ppis; 1525 if (!test_bit(irq_num, mask)) 1526 return -EINVAL; 1527 1528 /* Build a GICv5-style IntID here */ 1529 irq_num = vgic_v5_make_ppi(irq_num); 1530 } else if (irq_num < VGIC_NR_SGIS || 1531 irq_num >= VGIC_NR_PRIVATE_IRQS) { 1532 return -EINVAL; 1533 } 1534 1535 return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL); 1536 case KVM_ARM_IRQ_TYPE_SPI: 1537 if (irqchip_in_kernel(kvm)) { 1538 int ret = vgic_lazy_init(kvm); 1539 if (ret) 1540 return ret; 1541 } else { 1542 return -ENXIO; 1543 } 1544 1545 if (vgic_is_v5(kvm)) { 1546 /* Build a GICv5-style IntID here */ 1547 irq_num = vgic_v5_make_spi(irq_num); 1548 } else { 1549 if (irq_num < VGIC_NR_PRIVATE_IRQS) 1550 return -EINVAL; 1551 } 1552 1553 return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL); 1554 } 1555 1556 return -EINVAL; 1557 } 1558 1559 static unsigned long system_supported_vcpu_features(void) 1560 { 1561 unsigned long features = KVM_VCPU_VALID_FEATURES; 1562 1563 if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) 1564 clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); 1565 1566 if (!kvm_supports_guest_pmuv3()) 1567 clear_bit(KVM_ARM_VCPU_PMU_V3, &features); 1568 1569 if (!system_supports_sve()) 1570 clear_bit(KVM_ARM_VCPU_SVE, &features); 1571 1572 if (!kvm_has_full_ptr_auth()) { 1573 clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features); 1574 clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features); 1575 } 1576 1577 if (!cpus_have_final_cap(ARM64_HAS_NESTED_VIRT)) 1578 clear_bit(KVM_ARM_VCPU_HAS_EL2, &features); 1579 1580 return features; 1581 } 1582 1583 static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, 1584 const struct kvm_vcpu_init *init) 1585 { 1586 unsigned long features = init->features[0]; 1587 int i; 1588 1589 if (features & ~KVM_VCPU_VALID_FEATURES) 1590 return -ENOENT; 1591 1592 for (i = 1; i < ARRAY_SIZE(init->features); i++) { 1593 if (init->features[i]) 1594 return -ENOENT; 1595 } 1596 1597 if (features & ~system_supported_vcpu_features()) 1598 return -EINVAL; 1599 1600 /* 1601 * For now make sure that both address/generic pointer authentication 1602 * features are requested by the userspace together. 1603 */ 1604 if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features) != 1605 test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features)) 1606 return -EINVAL; 1607 1608 if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) 1609 return 0; 1610 1611 /* MTE is incompatible with AArch32 */ 1612 if (kvm_has_mte(vcpu->kvm)) 1613 return -EINVAL; 1614 1615 /* NV is incompatible with AArch32 */ 1616 if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features)) 1617 return -EINVAL; 1618 1619 return 0; 1620 } 1621 1622 static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu, 1623 const struct kvm_vcpu_init *init) 1624 { 1625 unsigned long features = init->features[0]; 1626 1627 return !bitmap_equal(vcpu->kvm->arch.vcpu_features, &features, 1628 KVM_VCPU_MAX_FEATURES); 1629 } 1630 1631 static int kvm_setup_vcpu(struct kvm_vcpu *vcpu) 1632 { 1633 struct kvm *kvm = vcpu->kvm; 1634 int ret = 0; 1635 1636 /* 1637 * When the vCPU has a PMU, but no PMU is set for the guest 1638 * yet, set the default one. 1639 */ 1640 if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu) 1641 ret = kvm_arm_set_default_pmu(kvm); 1642 1643 /* Prepare for nested if required */ 1644 if (!ret && vcpu_has_nv(vcpu)) 1645 ret = kvm_vcpu_init_nested(vcpu); 1646 1647 return ret; 1648 } 1649 1650 static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 1651 const struct kvm_vcpu_init *init) 1652 { 1653 unsigned long features = init->features[0]; 1654 struct kvm *kvm = vcpu->kvm; 1655 int ret = -EINVAL; 1656 1657 mutex_lock(&kvm->arch.config_lock); 1658 1659 if (test_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags) && 1660 kvm_vcpu_init_changed(vcpu, init)) 1661 goto out_unlock; 1662 1663 bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES); 1664 1665 ret = kvm_setup_vcpu(vcpu); 1666 if (ret) 1667 goto out_unlock; 1668 1669 /* Now we know what it is, we can reset it. */ 1670 kvm_reset_vcpu(vcpu); 1671 1672 set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags); 1673 vcpu_set_flag(vcpu, VCPU_INITIALIZED); 1674 ret = 0; 1675 out_unlock: 1676 mutex_unlock(&kvm->arch.config_lock); 1677 return ret; 1678 } 1679 1680 static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, 1681 const struct kvm_vcpu_init *init) 1682 { 1683 int ret; 1684 1685 if (init->target != KVM_ARM_TARGET_GENERIC_V8 && 1686 init->target != kvm_target_cpu()) 1687 return -EINVAL; 1688 1689 ret = kvm_vcpu_init_check_features(vcpu, init); 1690 if (ret) 1691 return ret; 1692 1693 if (!kvm_vcpu_initialized(vcpu)) 1694 return __kvm_vcpu_set_target(vcpu, init); 1695 1696 if (kvm_vcpu_init_changed(vcpu, init)) 1697 return -EINVAL; 1698 1699 kvm_reset_vcpu(vcpu); 1700 return 0; 1701 } 1702 1703 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, 1704 struct kvm_vcpu_init *init) 1705 { 1706 bool power_off = false; 1707 int ret; 1708 1709 /* 1710 * Treat the power-off vCPU feature as ephemeral. Clear the bit to avoid 1711 * reflecting it in the finalized feature set, thus limiting its scope 1712 * to a single KVM_ARM_VCPU_INIT call. 1713 */ 1714 if (init->features[0] & BIT(KVM_ARM_VCPU_POWER_OFF)) { 1715 init->features[0] &= ~BIT(KVM_ARM_VCPU_POWER_OFF); 1716 power_off = true; 1717 } 1718 1719 ret = kvm_vcpu_set_target(vcpu, init); 1720 if (ret) 1721 return ret; 1722 1723 /* 1724 * Ensure a rebooted VM will fault in RAM pages and detect if the 1725 * guest MMU is turned off and flush the caches as needed. 1726 * 1727 * S2FWB enforces all memory accesses to RAM being cacheable, 1728 * ensuring that the data side is always coherent. We still 1729 * need to invalidate the I-cache though, as FWB does *not* 1730 * imply CTR_EL0.DIC. 1731 */ 1732 if (vcpu_has_run_once(vcpu)) { 1733 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) 1734 stage2_unmap_vm(vcpu->kvm); 1735 else 1736 icache_inval_all_pou(); 1737 } 1738 1739 vcpu_reset_hcr(vcpu); 1740 1741 /* 1742 * Handle the "start in power-off" case. 1743 */ 1744 spin_lock(&vcpu->arch.mp_state_lock); 1745 1746 if (power_off) 1747 __kvm_arm_vcpu_power_off(vcpu); 1748 else 1749 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); 1750 1751 spin_unlock(&vcpu->arch.mp_state_lock); 1752 1753 return 0; 1754 } 1755 1756 static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, 1757 struct kvm_device_attr *attr) 1758 { 1759 int ret = -ENXIO; 1760 1761 switch (attr->group) { 1762 default: 1763 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); 1764 break; 1765 } 1766 1767 return ret; 1768 } 1769 1770 static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, 1771 struct kvm_device_attr *attr) 1772 { 1773 int ret = -ENXIO; 1774 1775 switch (attr->group) { 1776 default: 1777 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); 1778 break; 1779 } 1780 1781 return ret; 1782 } 1783 1784 static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, 1785 struct kvm_device_attr *attr) 1786 { 1787 int ret = -ENXIO; 1788 1789 switch (attr->group) { 1790 default: 1791 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); 1792 break; 1793 } 1794 1795 return ret; 1796 } 1797 1798 static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, 1799 struct kvm_vcpu_events *events) 1800 { 1801 memset(events, 0, sizeof(*events)); 1802 1803 return __kvm_arm_vcpu_get_events(vcpu, events); 1804 } 1805 1806 static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, 1807 struct kvm_vcpu_events *events) 1808 { 1809 int i; 1810 1811 /* check whether the reserved field is zero */ 1812 for (i = 0; i < ARRAY_SIZE(events->reserved); i++) 1813 if (events->reserved[i]) 1814 return -EINVAL; 1815 1816 /* check whether the pad field is zero */ 1817 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) 1818 if (events->exception.pad[i]) 1819 return -EINVAL; 1820 1821 return __kvm_arm_vcpu_set_events(vcpu, events); 1822 } 1823 1824 long kvm_arch_vcpu_ioctl(struct file *filp, 1825 unsigned int ioctl, unsigned long arg) 1826 { 1827 struct kvm_vcpu *vcpu = filp->private_data; 1828 void __user *argp = (void __user *)arg; 1829 struct kvm_device_attr attr; 1830 long r; 1831 1832 switch (ioctl) { 1833 case KVM_ARM_VCPU_INIT: { 1834 struct kvm_vcpu_init init; 1835 1836 r = -EFAULT; 1837 if (copy_from_user(&init, argp, sizeof(init))) 1838 break; 1839 1840 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); 1841 break; 1842 } 1843 case KVM_SET_ONE_REG: 1844 case KVM_GET_ONE_REG: { 1845 struct kvm_one_reg reg; 1846 1847 r = -ENOEXEC; 1848 if (unlikely(!kvm_vcpu_initialized(vcpu))) 1849 break; 1850 1851 r = -EFAULT; 1852 if (copy_from_user(®, argp, sizeof(reg))) 1853 break; 1854 1855 /* 1856 * We could owe a reset due to PSCI. Handle the pending reset 1857 * here to ensure userspace register accesses are ordered after 1858 * the reset. 1859 */ 1860 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 1861 kvm_reset_vcpu(vcpu); 1862 1863 if (ioctl == KVM_SET_ONE_REG) 1864 r = kvm_arm_set_reg(vcpu, ®); 1865 else 1866 r = kvm_arm_get_reg(vcpu, ®); 1867 break; 1868 } 1869 case KVM_GET_REG_LIST: { 1870 struct kvm_reg_list __user *user_list = argp; 1871 struct kvm_reg_list reg_list; 1872 unsigned n; 1873 1874 r = -ENOEXEC; 1875 if (unlikely(!kvm_vcpu_initialized(vcpu))) 1876 break; 1877 1878 r = -EPERM; 1879 if (!kvm_arm_vcpu_is_finalized(vcpu)) 1880 break; 1881 1882 r = -EFAULT; 1883 if (copy_from_user(®_list, user_list, sizeof(reg_list))) 1884 break; 1885 n = reg_list.n; 1886 reg_list.n = kvm_arm_num_regs(vcpu); 1887 if (copy_to_user(user_list, ®_list, sizeof(reg_list))) 1888 break; 1889 r = -E2BIG; 1890 if (n < reg_list.n) 1891 break; 1892 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); 1893 break; 1894 } 1895 case KVM_SET_DEVICE_ATTR: { 1896 r = -EFAULT; 1897 if (copy_from_user(&attr, argp, sizeof(attr))) 1898 break; 1899 r = kvm_arm_vcpu_set_attr(vcpu, &attr); 1900 break; 1901 } 1902 case KVM_GET_DEVICE_ATTR: { 1903 r = -EFAULT; 1904 if (copy_from_user(&attr, argp, sizeof(attr))) 1905 break; 1906 r = kvm_arm_vcpu_get_attr(vcpu, &attr); 1907 break; 1908 } 1909 case KVM_HAS_DEVICE_ATTR: { 1910 r = -EFAULT; 1911 if (copy_from_user(&attr, argp, sizeof(attr))) 1912 break; 1913 r = kvm_arm_vcpu_has_attr(vcpu, &attr); 1914 break; 1915 } 1916 case KVM_GET_VCPU_EVENTS: { 1917 struct kvm_vcpu_events events; 1918 1919 if (!kvm_vcpu_initialized(vcpu)) 1920 return -ENOEXEC; 1921 1922 if (kvm_arm_vcpu_get_events(vcpu, &events)) 1923 return -EINVAL; 1924 1925 if (copy_to_user(argp, &events, sizeof(events))) 1926 return -EFAULT; 1927 1928 return 0; 1929 } 1930 case KVM_SET_VCPU_EVENTS: { 1931 struct kvm_vcpu_events events; 1932 1933 if (!kvm_vcpu_initialized(vcpu)) 1934 return -ENOEXEC; 1935 1936 if (copy_from_user(&events, argp, sizeof(events))) 1937 return -EFAULT; 1938 1939 return kvm_arm_vcpu_set_events(vcpu, &events); 1940 } 1941 case KVM_ARM_VCPU_FINALIZE: { 1942 int what; 1943 1944 if (!kvm_vcpu_initialized(vcpu)) 1945 return -ENOEXEC; 1946 1947 if (get_user(what, (const int __user *)argp)) 1948 return -EFAULT; 1949 1950 return kvm_arm_vcpu_finalize(vcpu, what); 1951 } 1952 default: 1953 r = -EINVAL; 1954 } 1955 1956 return r; 1957 } 1958 1959 long kvm_arch_vcpu_unlocked_ioctl(struct file *filp, unsigned int ioctl, 1960 unsigned long arg) 1961 { 1962 return -ENOIOCTLCMD; 1963 } 1964 1965 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 1966 { 1967 1968 } 1969 1970 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, 1971 struct kvm_arm_device_addr *dev_addr) 1972 { 1973 switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) { 1974 case KVM_ARM_DEVICE_VGIC_V2: 1975 if (!vgic_present) 1976 return -ENXIO; 1977 return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr); 1978 default: 1979 return -ENODEV; 1980 } 1981 } 1982 1983 static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1984 { 1985 switch (attr->group) { 1986 case KVM_ARM_VM_SMCCC_CTRL: 1987 return kvm_vm_smccc_has_attr(kvm, attr); 1988 default: 1989 return -ENXIO; 1990 } 1991 } 1992 1993 static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) 1994 { 1995 switch (attr->group) { 1996 case KVM_ARM_VM_SMCCC_CTRL: 1997 return kvm_vm_smccc_set_attr(kvm, attr); 1998 default: 1999 return -ENXIO; 2000 } 2001 } 2002 2003 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) 2004 { 2005 struct kvm *kvm = filp->private_data; 2006 void __user *argp = (void __user *)arg; 2007 struct kvm_device_attr attr; 2008 2009 if (is_protected_kvm_enabled() && !kvm_pkvm_ioctl_allowed(kvm, ioctl)) 2010 return -EINVAL; 2011 2012 switch (ioctl) { 2013 case KVM_CREATE_IRQCHIP: { 2014 int ret; 2015 if (!vgic_present) 2016 return -ENXIO; 2017 mutex_lock(&kvm->lock); 2018 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); 2019 mutex_unlock(&kvm->lock); 2020 return ret; 2021 } 2022 case KVM_ARM_SET_DEVICE_ADDR: { 2023 struct kvm_arm_device_addr dev_addr; 2024 2025 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) 2026 return -EFAULT; 2027 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); 2028 } 2029 case KVM_ARM_PREFERRED_TARGET: { 2030 struct kvm_vcpu_init init = { 2031 .target = KVM_ARM_TARGET_GENERIC_V8, 2032 }; 2033 2034 if (copy_to_user(argp, &init, sizeof(init))) 2035 return -EFAULT; 2036 2037 return 0; 2038 } 2039 case KVM_ARM_MTE_COPY_TAGS: { 2040 struct kvm_arm_copy_mte_tags copy_tags; 2041 2042 if (copy_from_user(©_tags, argp, sizeof(copy_tags))) 2043 return -EFAULT; 2044 return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); 2045 } 2046 case KVM_ARM_SET_COUNTER_OFFSET: { 2047 struct kvm_arm_counter_offset offset; 2048 2049 if (copy_from_user(&offset, argp, sizeof(offset))) 2050 return -EFAULT; 2051 return kvm_vm_ioctl_set_counter_offset(kvm, &offset); 2052 } 2053 case KVM_HAS_DEVICE_ATTR: { 2054 if (copy_from_user(&attr, argp, sizeof(attr))) 2055 return -EFAULT; 2056 2057 return kvm_vm_has_attr(kvm, &attr); 2058 } 2059 case KVM_SET_DEVICE_ATTR: { 2060 if (copy_from_user(&attr, argp, sizeof(attr))) 2061 return -EFAULT; 2062 2063 return kvm_vm_set_attr(kvm, &attr); 2064 } 2065 case KVM_ARM_GET_REG_WRITABLE_MASKS: { 2066 struct reg_mask_range range; 2067 2068 if (copy_from_user(&range, argp, sizeof(range))) 2069 return -EFAULT; 2070 return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); 2071 } 2072 default: 2073 return -EINVAL; 2074 } 2075 } 2076 2077 static unsigned long nvhe_percpu_size(void) 2078 { 2079 return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - 2080 (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start); 2081 } 2082 2083 static unsigned long nvhe_percpu_order(void) 2084 { 2085 unsigned long size = nvhe_percpu_size(); 2086 2087 return size ? get_order(size) : 0; 2088 } 2089 2090 static size_t pkvm_host_sve_state_order(void) 2091 { 2092 return get_order(pkvm_host_sve_state_size()); 2093 } 2094 2095 /* A lookup table holding the hypervisor VA for each vector slot */ 2096 static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; 2097 2098 static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) 2099 { 2100 hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot); 2101 } 2102 2103 static int kvm_init_vector_slots(void) 2104 { 2105 int err; 2106 void *base; 2107 2108 base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); 2109 kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); 2110 2111 base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); 2112 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); 2113 2114 if (kvm_system_needs_idmapped_vectors() && 2115 !is_protected_kvm_enabled()) { 2116 err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), 2117 __BP_HARDEN_HYP_VECS_SZ, &base); 2118 if (err) 2119 return err; 2120 } 2121 2122 kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); 2123 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); 2124 return 0; 2125 } 2126 2127 static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) 2128 { 2129 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); 2130 unsigned long tcr; 2131 2132 /* 2133 * Calculate the raw per-cpu offset without a translation from the 2134 * kernel's mapping to the linear mapping, and store it in tpidr_el2 2135 * so that we can use adr_l to access per-cpu variables in EL2. 2136 * Also drop the KASAN tag which gets in the way... 2137 */ 2138 params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) - 2139 (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); 2140 2141 params->mair_el2 = read_sysreg(mair_el1); 2142 2143 tcr = read_sysreg(tcr_el1); 2144 if (cpus_have_final_cap(ARM64_KVM_HVHE)) { 2145 tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK); 2146 tcr |= TCR_EPD1_MASK; 2147 } else { 2148 unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr); 2149 2150 tcr &= TCR_EL2_MASK; 2151 tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips); 2152 if (lpa2_is_enabled()) 2153 tcr |= TCR_EL2_DS; 2154 } 2155 tcr |= TCR_T0SZ(hyp_va_bits); 2156 params->tcr_el2 = tcr; 2157 2158 params->pgd_pa = kvm_mmu_get_httbr(); 2159 if (is_protected_kvm_enabled()) 2160 params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS; 2161 else 2162 params->hcr_el2 = HCR_HOST_NVHE_FLAGS; 2163 2164 if (system_supports_mte()) 2165 params->hcr_el2 |= HCR_ATA; 2166 else 2167 params->hcr_el2 |= HCR_TID5; 2168 2169 if (cpus_have_final_cap(ARM64_KVM_HVHE)) 2170 params->hcr_el2 |= HCR_E2H; 2171 params->vttbr = params->vtcr = 0; 2172 2173 /* 2174 * Flush the init params from the data cache because the struct will 2175 * be read while the MMU is off. 2176 */ 2177 kvm_flush_dcache_to_poc(params, sizeof(*params)); 2178 } 2179 2180 static void hyp_install_host_vector(void) 2181 { 2182 struct kvm_nvhe_init_params *params; 2183 struct arm_smccc_res res; 2184 2185 /* Switch from the HYP stub to our own HYP init vector */ 2186 __hyp_set_vectors(kvm_get_idmap_vector()); 2187 2188 /* 2189 * Call initialization code, and switch to the full blown HYP code. 2190 * If the cpucaps haven't been finalized yet, something has gone very 2191 * wrong, and hyp will crash and burn when it uses any 2192 * cpus_have_*_cap() wrapper. 2193 */ 2194 BUG_ON(!system_capabilities_finalized()); 2195 params = this_cpu_ptr_nvhe_sym(kvm_init_params); 2196 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); 2197 WARN_ON(res.a0 != SMCCC_RET_SUCCESS); 2198 } 2199 2200 static void cpu_init_hyp_mode(void) 2201 { 2202 hyp_install_host_vector(); 2203 2204 /* 2205 * Disabling SSBD on a non-VHE system requires us to enable SSBS 2206 * at EL2. 2207 */ 2208 if (this_cpu_has_cap(ARM64_SSBS) && 2209 arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) { 2210 kvm_call_hyp_nvhe(__kvm_enable_ssbs); 2211 } 2212 } 2213 2214 static void cpu_hyp_reset(void) 2215 { 2216 if (!is_kernel_in_hyp_mode()) 2217 __hyp_reset_vectors(); 2218 } 2219 2220 /* 2221 * EL2 vectors can be mapped and rerouted in a number of ways, 2222 * depending on the kernel configuration and CPU present: 2223 * 2224 * - If the CPU is affected by Spectre-v2, the hardening sequence is 2225 * placed in one of the vector slots, which is executed before jumping 2226 * to the real vectors. 2227 * 2228 * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot 2229 * containing the hardening sequence is mapped next to the idmap page, 2230 * and executed before jumping to the real vectors. 2231 * 2232 * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an 2233 * empty slot is selected, mapped next to the idmap page, and 2234 * executed before jumping to the real vectors. 2235 * 2236 * Note that ARM64_SPECTRE_V3A is somewhat incompatible with 2237 * VHE, as we don't have hypervisor-specific mappings. If the system 2238 * is VHE and yet selects this capability, it will be ignored. 2239 */ 2240 static void cpu_set_hyp_vector(void) 2241 { 2242 struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); 2243 void *vector = hyp_spectre_vector_selector[data->slot]; 2244 2245 if (!is_protected_kvm_enabled()) 2246 *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; 2247 else 2248 kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); 2249 } 2250 2251 static void cpu_hyp_init_context(void) 2252 { 2253 kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); 2254 kvm_init_host_debug_data(); 2255 2256 if (!is_kernel_in_hyp_mode()) 2257 cpu_init_hyp_mode(); 2258 } 2259 2260 static void cpu_hyp_init_features(void) 2261 { 2262 cpu_set_hyp_vector(); 2263 2264 if (is_kernel_in_hyp_mode()) { 2265 kvm_timer_init_vhe(); 2266 kvm_debug_init_vhe(); 2267 } 2268 2269 if (vgic_present) 2270 kvm_vgic_init_cpu_hardware(); 2271 } 2272 2273 static void cpu_hyp_reinit(void) 2274 { 2275 cpu_hyp_reset(); 2276 cpu_hyp_init_context(); 2277 cpu_hyp_init_features(); 2278 } 2279 2280 static void cpu_hyp_init(void *discard) 2281 { 2282 if (!__this_cpu_read(kvm_hyp_initialized)) { 2283 cpu_hyp_reinit(); 2284 __this_cpu_write(kvm_hyp_initialized, 1); 2285 } 2286 } 2287 2288 static void cpu_hyp_uninit(void *discard) 2289 { 2290 if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { 2291 cpu_hyp_reset(); 2292 __this_cpu_write(kvm_hyp_initialized, 0); 2293 } 2294 } 2295 2296 int kvm_arch_enable_virtualization_cpu(void) 2297 { 2298 /* 2299 * Most calls to this function are made with migration 2300 * disabled, but not with preemption disabled. The former is 2301 * enough to ensure correctness, but most of the helpers 2302 * expect the later and will throw a tantrum otherwise. 2303 */ 2304 preempt_disable(); 2305 2306 cpu_hyp_init(NULL); 2307 2308 kvm_vgic_cpu_up(); 2309 kvm_timer_cpu_up(); 2310 2311 preempt_enable(); 2312 2313 return 0; 2314 } 2315 2316 void kvm_arch_disable_virtualization_cpu(void) 2317 { 2318 kvm_timer_cpu_down(); 2319 kvm_vgic_cpu_down(); 2320 2321 if (!is_protected_kvm_enabled()) 2322 cpu_hyp_uninit(NULL); 2323 } 2324 2325 #ifdef CONFIG_CPU_PM 2326 static int hyp_init_cpu_pm_notifier(struct notifier_block *self, 2327 unsigned long cmd, 2328 void *v) 2329 { 2330 /* 2331 * kvm_hyp_initialized is left with its old value over 2332 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should 2333 * re-enable hyp. 2334 */ 2335 switch (cmd) { 2336 case CPU_PM_ENTER: 2337 if (__this_cpu_read(kvm_hyp_initialized)) 2338 /* 2339 * don't update kvm_hyp_initialized here 2340 * so that the hyp will be re-enabled 2341 * when we resume. See below. 2342 */ 2343 cpu_hyp_reset(); 2344 2345 return NOTIFY_OK; 2346 case CPU_PM_ENTER_FAILED: 2347 case CPU_PM_EXIT: 2348 if (__this_cpu_read(kvm_hyp_initialized)) 2349 /* The hyp was enabled before suspend. */ 2350 cpu_hyp_reinit(); 2351 2352 return NOTIFY_OK; 2353 2354 default: 2355 return NOTIFY_DONE; 2356 } 2357 } 2358 2359 static struct notifier_block hyp_init_cpu_pm_nb = { 2360 .notifier_call = hyp_init_cpu_pm_notifier, 2361 }; 2362 2363 static void __init hyp_cpu_pm_init(void) 2364 { 2365 if (!is_protected_kvm_enabled()) 2366 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); 2367 } 2368 static void __init hyp_cpu_pm_exit(void) 2369 { 2370 if (!is_protected_kvm_enabled()) 2371 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); 2372 } 2373 #else 2374 static inline void __init hyp_cpu_pm_init(void) 2375 { 2376 } 2377 static inline void __init hyp_cpu_pm_exit(void) 2378 { 2379 } 2380 #endif 2381 2382 static void __init init_cpu_logical_map(void) 2383 { 2384 unsigned int cpu; 2385 2386 /* 2387 * Copy the MPIDR <-> logical CPU ID mapping to hyp. 2388 * Only copy the set of online CPUs whose features have been checked 2389 * against the finalized system capabilities. The hypervisor will not 2390 * allow any other CPUs from the `possible` set to boot. 2391 */ 2392 for_each_online_cpu(cpu) 2393 hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); 2394 } 2395 2396 #define init_psci_0_1_impl_state(config, what) \ 2397 config.psci_0_1_ ## what ## _implemented = psci_ops.what 2398 2399 static bool __init init_psci_relay(void) 2400 { 2401 /* 2402 * If PSCI has not been initialized, protected KVM cannot install 2403 * itself on newly booted CPUs. 2404 */ 2405 if (!psci_ops.get_version) { 2406 kvm_err("Cannot initialize protected mode without PSCI\n"); 2407 return false; 2408 } 2409 2410 kvm_host_psci_config.version = psci_ops.get_version(); 2411 kvm_host_psci_config.smccc_version = arm_smccc_get_version(); 2412 2413 if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { 2414 kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); 2415 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend); 2416 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on); 2417 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off); 2418 init_psci_0_1_impl_state(kvm_host_psci_config, migrate); 2419 } 2420 return true; 2421 } 2422 2423 static int __init init_subsystems(void) 2424 { 2425 int err = 0; 2426 2427 /* 2428 * Enable hardware so that subsystem initialisation can access EL2. 2429 */ 2430 on_each_cpu(cpu_hyp_init, NULL, 1); 2431 2432 /* 2433 * Register CPU lower-power notifier 2434 */ 2435 hyp_cpu_pm_init(); 2436 2437 /* 2438 * Init HYP view of VGIC 2439 */ 2440 err = kvm_vgic_hyp_init(); 2441 switch (err) { 2442 case 0: 2443 vgic_present = true; 2444 if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 2445 kvm_nvhe_sym(hyp_gicv3_nr_lr) = kvm_vgic_global_state.nr_lr; 2446 break; 2447 case -ENODEV: 2448 case -ENXIO: 2449 /* 2450 * No VGIC? No pKVM for you. 2451 * 2452 * Protected mode assumes that VGICv3 is present, so no point 2453 * in trying to hobble along if vgic initialization fails. 2454 */ 2455 if (is_protected_kvm_enabled()) 2456 goto out; 2457 2458 /* 2459 * Otherwise, userspace could choose to implement a GIC for its 2460 * guest on non-cooperative hardware. 2461 */ 2462 vgic_present = false; 2463 err = 0; 2464 break; 2465 default: 2466 goto out; 2467 } 2468 2469 if (kvm_mode == KVM_MODE_NV && 2470 !(vgic_present && (kvm_vgic_global_state.type == VGIC_V3 || 2471 kvm_vgic_global_state.has_gcie_v3_compat))) { 2472 kvm_err("NV support requires GICv3 or GICv5 with legacy support, giving up\n"); 2473 err = -EINVAL; 2474 goto out; 2475 } 2476 2477 /* 2478 * Init HYP architected timer support 2479 */ 2480 err = kvm_timer_hyp_init(vgic_present); 2481 if (err) 2482 goto out; 2483 2484 kvm_register_perf_callbacks(); 2485 2486 err = kvm_hyp_trace_init(); 2487 if (err) 2488 kvm_err("Failed to initialize Hyp tracing\n"); 2489 2490 out: 2491 if (err) 2492 hyp_cpu_pm_exit(); 2493 2494 if (err || !is_protected_kvm_enabled()) 2495 on_each_cpu(cpu_hyp_uninit, NULL, 1); 2496 2497 return err; 2498 } 2499 2500 static void __init teardown_subsystems(void) 2501 { 2502 kvm_unregister_perf_callbacks(); 2503 hyp_cpu_pm_exit(); 2504 } 2505 2506 static void __init teardown_hyp_mode(void) 2507 { 2508 bool free_sve = system_supports_sve() && is_protected_kvm_enabled(); 2509 int cpu; 2510 2511 free_hyp_pgds(); 2512 for_each_possible_cpu(cpu) { 2513 if (per_cpu(kvm_hyp_initialized, cpu)) 2514 continue; 2515 2516 free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); 2517 2518 if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) 2519 continue; 2520 2521 if (free_sve) { 2522 struct arm64_sve_state *sve_regs; 2523 2524 sve_regs = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs; 2525 free_pages((unsigned long) sve_regs, pkvm_host_sve_state_order()); 2526 } 2527 2528 free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); 2529 2530 } 2531 } 2532 2533 static int __init do_pkvm_init(u32 hyp_va_bits) 2534 { 2535 void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); 2536 int ret; 2537 2538 preempt_disable(); 2539 cpu_hyp_init_context(); 2540 ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, 2541 kern_hyp_va(per_cpu_base), 2542 hyp_va_bits); 2543 cpu_hyp_init_features(); 2544 2545 /* 2546 * The stub hypercalls are now disabled, so set our local flag to 2547 * prevent a later re-init attempt in kvm_arch_enable_virtualization_cpu(). 2548 */ 2549 __this_cpu_write(kvm_hyp_initialized, 1); 2550 preempt_enable(); 2551 2552 return ret; 2553 } 2554 2555 static u64 get_hyp_id_aa64pfr0_el1(void) 2556 { 2557 /* 2558 * Track whether the system isn't affected by spectre/meltdown in the 2559 * hypervisor's view of id_aa64pfr0_el1, used for protected VMs. 2560 * Although this is per-CPU, we make it global for simplicity, e.g., not 2561 * to have to worry about vcpu migration. 2562 * 2563 * Unlike for non-protected VMs, userspace cannot override this for 2564 * protected VMs. 2565 */ 2566 u64 val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); 2567 2568 val &= ~(ID_AA64PFR0_EL1_CSV2 | 2569 ID_AA64PFR0_EL1_CSV3); 2570 2571 val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV2, 2572 arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED); 2573 val |= FIELD_PREP(ID_AA64PFR0_EL1_CSV3, 2574 arm64_get_meltdown_state() == SPECTRE_UNAFFECTED); 2575 2576 return val; 2577 } 2578 2579 static void kvm_hyp_init_symbols(void) 2580 { 2581 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = get_hyp_id_aa64pfr0_el1(); 2582 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); 2583 kvm_nvhe_sym(id_aa64pfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR2_EL1); 2584 kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); 2585 kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); 2586 kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); 2587 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 2588 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 2589 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); 2590 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); 2591 kvm_nvhe_sym(__icache_flags) = __icache_flags; 2592 kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; 2593 2594 /* Propagate the FGT state to the nVHE side */ 2595 kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks; 2596 kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks; 2597 kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks; 2598 kvm_nvhe_sym(hdfgrtr_masks) = hdfgrtr_masks; 2599 kvm_nvhe_sym(hdfgwtr_masks) = hdfgwtr_masks; 2600 kvm_nvhe_sym(hafgrtr_masks) = hafgrtr_masks; 2601 kvm_nvhe_sym(hfgrtr2_masks) = hfgrtr2_masks; 2602 kvm_nvhe_sym(hfgwtr2_masks) = hfgwtr2_masks; 2603 kvm_nvhe_sym(hfgitr2_masks) = hfgitr2_masks; 2604 kvm_nvhe_sym(hdfgrtr2_masks)= hdfgrtr2_masks; 2605 kvm_nvhe_sym(hdfgwtr2_masks)= hdfgwtr2_masks; 2606 kvm_nvhe_sym(ich_hfgrtr_masks) = ich_hfgrtr_masks; 2607 kvm_nvhe_sym(ich_hfgwtr_masks) = ich_hfgwtr_masks; 2608 kvm_nvhe_sym(ich_hfgitr_masks) = ich_hfgitr_masks; 2609 2610 /* 2611 * Flush entire BSS since part of its data containing init symbols is read 2612 * while the MMU is off. 2613 */ 2614 kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start), 2615 kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start)); 2616 } 2617 2618 static int __init kvm_hyp_init_protection(u32 hyp_va_bits) 2619 { 2620 void *addr = phys_to_virt(hyp_mem_base); 2621 int ret; 2622 2623 ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); 2624 if (ret) 2625 return ret; 2626 2627 ret = do_pkvm_init(hyp_va_bits); 2628 if (ret) 2629 return ret; 2630 2631 free_hyp_pgds(); 2632 2633 return 0; 2634 } 2635 2636 static int init_pkvm_host_sve_state(void) 2637 { 2638 int cpu; 2639 2640 if (!system_supports_sve()) 2641 return 0; 2642 2643 /* Allocate pages for host sve state in protected mode. */ 2644 for_each_possible_cpu(cpu) { 2645 struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order()); 2646 2647 if (!page) 2648 return -ENOMEM; 2649 2650 per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs = page_address(page); 2651 } 2652 2653 /* 2654 * Don't map the pages in hyp since these are only used in protected 2655 * mode, which will (re)create its own mapping when initialized. 2656 */ 2657 2658 return 0; 2659 } 2660 2661 static int pkvm_check_sme_dvmsync_fw_call(void) 2662 { 2663 struct arm_smccc_res res; 2664 2665 if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714)) 2666 return 0; 2667 2668 arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res); 2669 if (res.a0) { 2670 kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n"); 2671 return -ENODEV; 2672 } 2673 2674 return 0; 2675 } 2676 2677 /* 2678 * Finalizes the initialization of hyp mode, once everything else is initialized 2679 * and the initialziation process cannot fail. 2680 */ 2681 static void finalize_init_hyp_mode(void) 2682 { 2683 int cpu; 2684 2685 if (system_supports_sve() && is_protected_kvm_enabled()) { 2686 for_each_possible_cpu(cpu) { 2687 struct arm64_sve_state *sve_regs; 2688 2689 sve_regs = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs; 2690 per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_regs = 2691 kern_hyp_va(sve_regs); 2692 } 2693 } 2694 } 2695 2696 static void pkvm_hyp_init_ptrauth(void) 2697 { 2698 struct kvm_cpu_context *hyp_ctxt; 2699 int cpu; 2700 2701 for_each_possible_cpu(cpu) { 2702 hyp_ctxt = per_cpu_ptr_nvhe_sym(kvm_hyp_ctxt, cpu); 2703 hyp_ctxt->sys_regs[APIAKEYLO_EL1] = get_random_long(); 2704 hyp_ctxt->sys_regs[APIAKEYHI_EL1] = get_random_long(); 2705 hyp_ctxt->sys_regs[APIBKEYLO_EL1] = get_random_long(); 2706 hyp_ctxt->sys_regs[APIBKEYHI_EL1] = get_random_long(); 2707 hyp_ctxt->sys_regs[APDAKEYLO_EL1] = get_random_long(); 2708 hyp_ctxt->sys_regs[APDAKEYHI_EL1] = get_random_long(); 2709 hyp_ctxt->sys_regs[APDBKEYLO_EL1] = get_random_long(); 2710 hyp_ctxt->sys_regs[APDBKEYHI_EL1] = get_random_long(); 2711 hyp_ctxt->sys_regs[APGAKEYLO_EL1] = get_random_long(); 2712 hyp_ctxt->sys_regs[APGAKEYHI_EL1] = get_random_long(); 2713 } 2714 } 2715 2716 /* Inits Hyp-mode on all online CPUs */ 2717 static int __init init_hyp_mode(void) 2718 { 2719 u32 hyp_va_bits = kvm_hyp_va_bits(); 2720 int cpu; 2721 int err = -ENOMEM; 2722 2723 /* 2724 * The protected Hyp-mode cannot be initialized if the memory pool 2725 * allocation has failed. 2726 */ 2727 if (is_protected_kvm_enabled() && !hyp_mem_base) 2728 goto out_err; 2729 2730 /* 2731 * Allocate Hyp PGD and setup Hyp identity mapping 2732 */ 2733 err = kvm_mmu_init(hyp_va_bits); 2734 if (err) 2735 goto out_err; 2736 2737 /* 2738 * Allocate stack pages for Hypervisor-mode 2739 */ 2740 for_each_possible_cpu(cpu) { 2741 unsigned long stack_base; 2742 2743 stack_base = __get_free_pages(GFP_KERNEL, NVHE_STACK_SHIFT - PAGE_SHIFT); 2744 if (!stack_base) { 2745 err = -ENOMEM; 2746 goto out_err; 2747 } 2748 2749 per_cpu(kvm_arm_hyp_stack_base, cpu) = stack_base; 2750 } 2751 2752 /* 2753 * Allocate and initialize pages for Hypervisor-mode percpu regions. 2754 */ 2755 for_each_possible_cpu(cpu) { 2756 struct page *page; 2757 void *page_addr; 2758 2759 page = alloc_pages(GFP_KERNEL, nvhe_percpu_order()); 2760 if (!page) { 2761 err = -ENOMEM; 2762 goto out_err; 2763 } 2764 2765 page_addr = page_address(page); 2766 memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size()); 2767 kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr; 2768 } 2769 2770 kvm_nvhe_sym(hyp_nr_cpus) = num_possible_cpus(); 2771 2772 /* 2773 * Map the Hyp-code called directly from the host 2774 */ 2775 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), 2776 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); 2777 if (err) { 2778 kvm_err("Cannot map world-switch code\n"); 2779 goto out_err; 2780 } 2781 2782 err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start), 2783 kvm_ksym_ref(__hyp_data_end), PAGE_HYP); 2784 if (err) { 2785 kvm_err("Cannot map .hyp.data section\n"); 2786 goto out_err; 2787 } 2788 2789 err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start), 2790 kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO); 2791 if (err) { 2792 kvm_err("Cannot map .hyp.rodata section\n"); 2793 goto out_err; 2794 } 2795 2796 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), 2797 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); 2798 if (err) { 2799 kvm_err("Cannot map rodata section\n"); 2800 goto out_err; 2801 } 2802 2803 /* 2804 * .hyp.bss is guaranteed to be placed at the beginning of the .bss 2805 * section thanks to an assertion in the linker script. Map it RW and 2806 * the rest of .bss RO. 2807 */ 2808 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start), 2809 kvm_ksym_ref(__hyp_bss_end), PAGE_HYP); 2810 if (err) { 2811 kvm_err("Cannot map hyp bss section: %d\n", err); 2812 goto out_err; 2813 } 2814 2815 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end), 2816 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); 2817 if (err) { 2818 kvm_err("Cannot map bss section\n"); 2819 goto out_err; 2820 } 2821 2822 /* 2823 * Map the Hyp stack pages 2824 */ 2825 for_each_possible_cpu(cpu) { 2826 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); 2827 char *stack_base = (char *)per_cpu(kvm_arm_hyp_stack_base, cpu); 2828 2829 err = create_hyp_stack(__pa(stack_base), ¶ms->stack_hyp_va); 2830 if (err) { 2831 kvm_err("Cannot map hyp stack\n"); 2832 goto out_err; 2833 } 2834 2835 /* 2836 * Save the stack PA in nvhe_init_params. This will be needed 2837 * to recreate the stack mapping in protected nVHE mode. 2838 * __hyp_pa() won't do the right thing there, since the stack 2839 * has been mapped in the flexible private VA space. 2840 */ 2841 params->stack_pa = __pa(stack_base); 2842 } 2843 2844 for_each_possible_cpu(cpu) { 2845 char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; 2846 char *percpu_end = percpu_begin + nvhe_percpu_size(); 2847 2848 /* Map Hyp percpu pages */ 2849 err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP); 2850 if (err) { 2851 kvm_err("Cannot map hyp percpu region\n"); 2852 goto out_err; 2853 } 2854 2855 /* Prepare the CPU initialization parameters */ 2856 cpu_prepare_hyp_mode(cpu, hyp_va_bits); 2857 } 2858 2859 kvm_hyp_init_symbols(); 2860 2861 if (is_protected_kvm_enabled()) { 2862 if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) && 2863 cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH)) 2864 pkvm_hyp_init_ptrauth(); 2865 2866 init_cpu_logical_map(); 2867 2868 if (!init_psci_relay()) { 2869 err = -ENODEV; 2870 goto out_err; 2871 } 2872 2873 err = init_pkvm_host_sve_state(); 2874 if (err) 2875 goto out_err; 2876 2877 err = pkvm_check_sme_dvmsync_fw_call(); 2878 if (err) 2879 goto out_err; 2880 2881 err = kvm_hyp_init_protection(hyp_va_bits); 2882 if (err) { 2883 kvm_err("Failed to init hyp memory protection\n"); 2884 goto out_err; 2885 } 2886 } 2887 2888 return 0; 2889 2890 out_err: 2891 teardown_hyp_mode(); 2892 kvm_err("error initializing Hyp mode: %d\n", err); 2893 return err; 2894 } 2895 2896 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) 2897 { 2898 struct kvm_vcpu *vcpu = NULL; 2899 struct kvm_mpidr_data *data; 2900 unsigned long i; 2901 2902 mpidr &= MPIDR_HWID_BITMASK; 2903 2904 rcu_read_lock(); 2905 data = rcu_dereference(kvm->arch.mpidr_data); 2906 2907 if (data) { 2908 u16 idx = kvm_mpidr_index(data, mpidr); 2909 2910 vcpu = kvm_get_vcpu(kvm, data->cmpidr_to_idx[idx]); 2911 if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu)) 2912 vcpu = NULL; 2913 } 2914 2915 rcu_read_unlock(); 2916 2917 if (vcpu) 2918 return vcpu; 2919 2920 kvm_for_each_vcpu(i, vcpu, kvm) { 2921 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) 2922 return vcpu; 2923 } 2924 return NULL; 2925 } 2926 2927 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) 2928 { 2929 return irqchip_in_kernel(kvm); 2930 } 2931 2932 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, 2933 struct irq_bypass_producer *prod) 2934 { 2935 struct kvm_kernel_irqfd *irqfd = 2936 container_of(cons, struct kvm_kernel_irqfd, consumer); 2937 struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry; 2938 2939 /* 2940 * The only thing we have a chance of directly-injecting is LPIs. Maybe 2941 * one day... 2942 */ 2943 if (irq_entry->type != KVM_IRQ_ROUTING_MSI) 2944 return 0; 2945 2946 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, 2947 &irqfd->irq_entry); 2948 } 2949 2950 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, 2951 struct irq_bypass_producer *prod) 2952 { 2953 struct kvm_kernel_irqfd *irqfd = 2954 container_of(cons, struct kvm_kernel_irqfd, consumer); 2955 struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry; 2956 2957 if (irq_entry->type != KVM_IRQ_ROUTING_MSI) 2958 return; 2959 2960 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq); 2961 } 2962 2963 void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, 2964 struct kvm_kernel_irq_routing_entry *old, 2965 struct kvm_kernel_irq_routing_entry *new) 2966 { 2967 if (old->type == KVM_IRQ_ROUTING_MSI && 2968 new->type == KVM_IRQ_ROUTING_MSI && 2969 !memcmp(&old->msi, &new->msi, sizeof(new->msi))) 2970 return; 2971 2972 /* 2973 * Remapping the vLPI requires taking the its_lock mutex to resolve 2974 * the new translation. We're in spinlock land at this point, so no 2975 * chance of resolving the translation. 2976 * 2977 * Unmap the vLPI and fall back to software LPI injection. 2978 */ 2979 return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq); 2980 } 2981 2982 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) 2983 { 2984 struct kvm_kernel_irqfd *irqfd = 2985 container_of(cons, struct kvm_kernel_irqfd, consumer); 2986 2987 kvm_arm_halt_guest(irqfd->kvm); 2988 } 2989 2990 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) 2991 { 2992 struct kvm_kernel_irqfd *irqfd = 2993 container_of(cons, struct kvm_kernel_irqfd, consumer); 2994 2995 kvm_arm_resume_guest(irqfd->kvm); 2996 } 2997 2998 /* Initialize Hyp-mode and memory mappings on all CPUs */ 2999 static __init int kvm_arm_init(void) 3000 { 3001 int err; 3002 bool in_hyp_mode; 3003 3004 if (!is_hyp_mode_available()) { 3005 kvm_info("HYP mode not available\n"); 3006 return -ENODEV; 3007 } 3008 3009 if (kvm_get_mode() == KVM_MODE_NONE) { 3010 kvm_info("KVM disabled from command line\n"); 3011 return -ENODEV; 3012 } 3013 3014 err = kvm_sys_reg_table_init(); 3015 if (err) { 3016 kvm_info("Error initializing system register tables"); 3017 return err; 3018 } 3019 3020 in_hyp_mode = is_kernel_in_hyp_mode(); 3021 3022 if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || 3023 cpus_have_final_cap(ARM64_WORKAROUND_1508412)) 3024 kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ 3025 "Only trusted guests should be used on this system.\n"); 3026 3027 err = kvm_set_ipa_limit(); 3028 if (err) 3029 return err; 3030 3031 err = kvm_arm_init_sve(); 3032 if (err) 3033 return err; 3034 3035 err = kvm_arm_vmid_alloc_init(); 3036 if (err) { 3037 kvm_err("Failed to initialize VMID allocator.\n"); 3038 return err; 3039 } 3040 3041 if (!in_hyp_mode) { 3042 err = init_hyp_mode(); 3043 if (err) 3044 goto out_err; 3045 } 3046 3047 err = kvm_init_vector_slots(); 3048 if (err) { 3049 kvm_err("Cannot initialise vector slots\n"); 3050 goto out_hyp; 3051 } 3052 3053 err = init_subsystems(); 3054 if (err) 3055 goto out_hyp; 3056 3057 kvm_info("%s%sVHE%s mode initialized successfully\n", 3058 in_hyp_mode ? "" : (is_protected_kvm_enabled() ? 3059 "Protected " : "Hyp "), 3060 in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? 3061 "h" : "n"), 3062 cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": ""); 3063 3064 /* 3065 * FIXME: Do something reasonable if kvm_init() fails after pKVM 3066 * hypervisor protection is finalized. 3067 */ 3068 err = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); 3069 if (err) 3070 goto out_subs; 3071 3072 /* 3073 * This should be called after initialization is done and failure isn't 3074 * possible anymore. 3075 */ 3076 if (!in_hyp_mode) 3077 finalize_init_hyp_mode(); 3078 3079 kvm_arm_initialised = true; 3080 3081 return 0; 3082 3083 out_subs: 3084 teardown_subsystems(); 3085 out_hyp: 3086 if (!in_hyp_mode) 3087 teardown_hyp_mode(); 3088 out_err: 3089 kvm_arm_vmid_alloc_free(); 3090 return err; 3091 } 3092 3093 static int __init early_kvm_mode_cfg(char *arg) 3094 { 3095 if (!arg) 3096 return -EINVAL; 3097 3098 if (strcmp(arg, "none") == 0) { 3099 kvm_mode = KVM_MODE_NONE; 3100 return 0; 3101 } 3102 3103 if (!is_hyp_mode_available()) { 3104 pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n"); 3105 return 0; 3106 } 3107 3108 if (strcmp(arg, "protected") == 0) { 3109 if (!is_kernel_in_hyp_mode()) 3110 kvm_mode = KVM_MODE_PROTECTED; 3111 else 3112 pr_warn_once("Protected KVM not available with VHE\n"); 3113 3114 return 0; 3115 } 3116 3117 if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) { 3118 kvm_mode = KVM_MODE_DEFAULT; 3119 return 0; 3120 } 3121 3122 if (strcmp(arg, "nested") == 0 && !WARN_ON(!is_kernel_in_hyp_mode())) { 3123 kvm_mode = KVM_MODE_NV; 3124 return 0; 3125 } 3126 3127 return -EINVAL; 3128 } 3129 early_param("kvm-arm.mode", early_kvm_mode_cfg); 3130 3131 static int __init early_kvm_wfx_trap_policy_cfg(char *arg, enum kvm_wfx_trap_policy *p) 3132 { 3133 if (!arg) 3134 return -EINVAL; 3135 3136 if (strcmp(arg, "trap") == 0) { 3137 *p = KVM_WFX_TRAP; 3138 return 0; 3139 } 3140 3141 if (strcmp(arg, "notrap") == 0) { 3142 *p = KVM_WFX_NOTRAP; 3143 return 0; 3144 } 3145 3146 return -EINVAL; 3147 } 3148 3149 static int __init early_kvm_wfi_trap_policy_cfg(char *arg) 3150 { 3151 return early_kvm_wfx_trap_policy_cfg(arg, &kvm_wfi_trap_policy); 3152 } 3153 early_param("kvm-arm.wfi_trap_policy", early_kvm_wfi_trap_policy_cfg); 3154 3155 static int __init early_kvm_wfe_trap_policy_cfg(char *arg) 3156 { 3157 return early_kvm_wfx_trap_policy_cfg(arg, &kvm_wfe_trap_policy); 3158 } 3159 early_param("kvm-arm.wfe_trap_policy", early_kvm_wfe_trap_policy_cfg); 3160 3161 enum kvm_mode kvm_get_mode(void) 3162 { 3163 return kvm_mode; 3164 } 3165 3166 module_init(kvm_arm_init); 3167