1 /* 2 * Copyright (C) 2012,2013 - ARM Ltd 3 * Author: Marc Zyngier <marc.zyngier@arm.com> 4 * 5 * Derived from arch/arm/kvm/reset.c 6 * Copyright (C) 2012 - Virtual Open Systems and Columbia University 7 * Author: Christoffer Dall <c.dall@virtualopensystems.com> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License, version 2, as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include <linux/errno.h> 23 #include <linux/kernel.h> 24 #include <linux/kvm_host.h> 25 #include <linux/kvm.h> 26 #include <linux/hw_breakpoint.h> 27 #include <linux/slab.h> 28 #include <linux/string.h> 29 #include <linux/types.h> 30 31 #include <kvm/arm_arch_timer.h> 32 33 #include <asm/cpufeature.h> 34 #include <asm/cputype.h> 35 #include <asm/fpsimd.h> 36 #include <asm/ptrace.h> 37 #include <asm/kvm_arm.h> 38 #include <asm/kvm_asm.h> 39 #include <asm/kvm_coproc.h> 40 #include <asm/kvm_emulate.h> 41 #include <asm/kvm_mmu.h> 42 #include <asm/virt.h> 43 44 /* Maximum phys_shift supported for any VM on this host */ 45 static u32 kvm_ipa_limit; 46 47 /* 48 * ARMv8 Reset Values 49 */ 50 static const struct kvm_regs default_regs_reset = { 51 .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | 52 PSR_F_BIT | PSR_D_BIT), 53 }; 54 55 static const struct kvm_regs default_regs_reset32 = { 56 .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | 57 PSR_AA32_I_BIT | PSR_AA32_F_BIT), 58 }; 59 60 static bool cpu_has_32bit_el1(void) 61 { 62 u64 pfr0; 63 64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); 65 return !!(pfr0 & 0x20); 66 } 67 68 /** 69 * kvm_arch_vm_ioctl_check_extension 70 * 71 * We currently assume that the number of HW registers is uniform 72 * across all CPUs (see cpuinfo_sanity_check). 73 */ 74 int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) 75 { 76 int r; 77 78 switch (ext) { 79 case KVM_CAP_ARM_EL1_32BIT: 80 r = cpu_has_32bit_el1(); 81 break; 82 case KVM_CAP_GUEST_DEBUG_HW_BPS: 83 r = get_num_brps(); 84 break; 85 case KVM_CAP_GUEST_DEBUG_HW_WPS: 86 r = get_num_wrps(); 87 break; 88 case KVM_CAP_ARM_PMU_V3: 89 r = kvm_arm_support_pmu_v3(); 90 break; 91 case KVM_CAP_ARM_INJECT_SERROR_ESR: 92 r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); 93 break; 94 case KVM_CAP_SET_GUEST_DEBUG: 95 case KVM_CAP_VCPU_ATTRIBUTES: 96 r = 1; 97 break; 98 case KVM_CAP_ARM_VM_IPA_SIZE: 99 r = kvm_ipa_limit; 100 break; 101 case KVM_CAP_ARM_SVE: 102 r = system_supports_sve(); 103 break; 104 case KVM_CAP_ARM_PTRAUTH_ADDRESS: 105 case KVM_CAP_ARM_PTRAUTH_GENERIC: 106 r = has_vhe() && system_supports_address_auth() && 107 system_supports_generic_auth(); 108 break; 109 default: 110 r = 0; 111 } 112 113 return r; 114 } 115 116 unsigned int kvm_sve_max_vl; 117 118 int kvm_arm_init_sve(void) 119 { 120 if (system_supports_sve()) { 121 kvm_sve_max_vl = sve_max_virtualisable_vl; 122 123 /* 124 * The get_sve_reg()/set_sve_reg() ioctl interface will need 125 * to be extended with multiple register slice support in 126 * order to support vector lengths greater than 127 * SVE_VL_ARCH_MAX: 128 */ 129 if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX)) 130 kvm_sve_max_vl = SVE_VL_ARCH_MAX; 131 132 /* 133 * Don't even try to make use of vector lengths that 134 * aren't available on all CPUs, for now: 135 */ 136 if (kvm_sve_max_vl < sve_max_vl) 137 pr_warn("KVM: SVE vector length for guests limited to %u bytes\n", 138 kvm_sve_max_vl); 139 } 140 141 return 0; 142 } 143 144 static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) 145 { 146 if (!system_supports_sve()) 147 return -EINVAL; 148 149 /* Verify that KVM startup enforced this when SVE was detected: */ 150 if (WARN_ON(!has_vhe())) 151 return -EINVAL; 152 153 vcpu->arch.sve_max_vl = kvm_sve_max_vl; 154 155 /* 156 * Userspace can still customize the vector lengths by writing 157 * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until 158 * kvm_arm_vcpu_finalize(), which freezes the configuration. 159 */ 160 vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE; 161 162 return 0; 163 } 164 165 /* 166 * Finalize vcpu's maximum SVE vector length, allocating 167 * vcpu->arch.sve_state as necessary. 168 */ 169 static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) 170 { 171 void *buf; 172 unsigned int vl; 173 174 vl = vcpu->arch.sve_max_vl; 175 176 /* 177 * Resposibility for these properties is shared between 178 * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and 179 * set_sve_vls(). Double-check here just to be sure: 180 */ 181 if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || 182 vl > SVE_VL_ARCH_MAX)) 183 return -EIO; 184 185 buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL); 186 if (!buf) 187 return -ENOMEM; 188 189 vcpu->arch.sve_state = buf; 190 vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED; 191 return 0; 192 } 193 194 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) 195 { 196 switch (feature) { 197 case KVM_ARM_VCPU_SVE: 198 if (!vcpu_has_sve(vcpu)) 199 return -EINVAL; 200 201 if (kvm_arm_vcpu_sve_finalized(vcpu)) 202 return -EPERM; 203 204 return kvm_vcpu_finalize_sve(vcpu); 205 } 206 207 return -EINVAL; 208 } 209 210 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) 211 { 212 if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu)) 213 return false; 214 215 return true; 216 } 217 218 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 219 { 220 kfree(vcpu->arch.sve_state); 221 } 222 223 static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) 224 { 225 if (vcpu_has_sve(vcpu)) 226 memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu)); 227 } 228 229 static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) 230 { 231 /* Support ptrauth only if the system supports these capabilities. */ 232 if (!has_vhe()) 233 return -EINVAL; 234 235 if (!system_supports_address_auth() || 236 !system_supports_generic_auth()) 237 return -EINVAL; 238 /* 239 * For now make sure that both address/generic pointer authentication 240 * features are requested by the userspace together. 241 */ 242 if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || 243 !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) 244 return -EINVAL; 245 246 vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; 247 return 0; 248 } 249 250 /** 251 * kvm_reset_vcpu - sets core registers and sys_regs to reset value 252 * @vcpu: The VCPU pointer 253 * 254 * This function finds the right table above and sets the registers on 255 * the virtual CPU struct to their architecturally defined reset 256 * values, except for registers whose reset is deferred until 257 * kvm_arm_vcpu_finalize(). 258 * 259 * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT 260 * ioctl or as part of handling a request issued by another VCPU in the PSCI 261 * handling code. In the first case, the VCPU will not be loaded, and in the 262 * second case the VCPU will be loaded. Because this function operates purely 263 * on the memory-backed valus of system registers, we want to do a full put if 264 * we were loaded (handling a request) and load the values back at the end of 265 * the function. Otherwise we leave the state alone. In both cases, we 266 * disable preemption around the vcpu reset as we would otherwise race with 267 * preempt notifiers which also call put/load. 268 */ 269 int kvm_reset_vcpu(struct kvm_vcpu *vcpu) 270 { 271 const struct kvm_regs *cpu_reset; 272 int ret = -EINVAL; 273 bool loaded; 274 275 /* Reset PMU outside of the non-preemptible section */ 276 kvm_pmu_vcpu_reset(vcpu); 277 278 preempt_disable(); 279 loaded = (vcpu->cpu != -1); 280 if (loaded) 281 kvm_arch_vcpu_put(vcpu); 282 283 if (!kvm_arm_vcpu_sve_finalized(vcpu)) { 284 if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) { 285 ret = kvm_vcpu_enable_sve(vcpu); 286 if (ret) 287 goto out; 288 } 289 } else { 290 kvm_vcpu_reset_sve(vcpu); 291 } 292 293 if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || 294 test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { 295 if (kvm_vcpu_enable_ptrauth(vcpu)) 296 goto out; 297 } 298 299 switch (vcpu->arch.target) { 300 default: 301 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { 302 if (!cpu_has_32bit_el1()) 303 goto out; 304 cpu_reset = &default_regs_reset32; 305 } else { 306 cpu_reset = &default_regs_reset; 307 } 308 309 break; 310 } 311 312 /* Reset core registers */ 313 memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); 314 315 /* Reset system registers */ 316 kvm_reset_sys_regs(vcpu); 317 318 /* 319 * Additional reset state handling that PSCI may have imposed on us. 320 * Must be done after all the sys_reg reset. 321 */ 322 if (vcpu->arch.reset_state.reset) { 323 unsigned long target_pc = vcpu->arch.reset_state.pc; 324 325 /* Gracefully handle Thumb2 entry point */ 326 if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { 327 target_pc &= ~1UL; 328 vcpu_set_thumb(vcpu); 329 } 330 331 /* Propagate caller endianness */ 332 if (vcpu->arch.reset_state.be) 333 kvm_vcpu_set_be(vcpu); 334 335 *vcpu_pc(vcpu) = target_pc; 336 vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); 337 338 vcpu->arch.reset_state.reset = false; 339 } 340 341 /* Default workaround setup is enabled (if supported) */ 342 if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) 343 vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; 344 345 /* Reset timer */ 346 ret = kvm_timer_vcpu_reset(vcpu); 347 out: 348 if (loaded) 349 kvm_arch_vcpu_load(vcpu, smp_processor_id()); 350 preempt_enable(); 351 return ret; 352 } 353 354 void kvm_set_ipa_limit(void) 355 { 356 unsigned int ipa_max, pa_max, va_max, parange; 357 358 parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; 359 pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); 360 361 /* Clamp the IPA limit to the PA size supported by the kernel */ 362 ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max; 363 /* 364 * Since our stage2 table is dependent on the stage1 page table code, 365 * we must always honor the following condition: 366 * 367 * Number of levels in Stage1 >= Number of levels in Stage2. 368 * 369 * So clamp the ipa limit further down to limit the number of levels. 370 * Since we can concatenate upto 16 tables at entry level, we could 371 * go upto 4bits above the maximum VA addressible with the current 372 * number of levels. 373 */ 374 va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; 375 va_max += 4; 376 377 if (va_max < ipa_max) 378 ipa_max = va_max; 379 380 /* 381 * If the final limit is lower than the real physical address 382 * limit of the CPUs, report the reason. 383 */ 384 if (ipa_max < pa_max) 385 pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", 386 (va_max < pa_max) ? "Virtual" : "Physical"); 387 388 WARN(ipa_max < KVM_PHYS_SHIFT, 389 "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); 390 kvm_ipa_limit = ipa_max; 391 kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); 392 } 393 394 /* 395 * Configure the VTCR_EL2 for this VM. The VTCR value is common 396 * across all the physical CPUs on the system. We use system wide 397 * sanitised values to fill in different fields, except for Hardware 398 * Management of Access Flags. HA Flag is set unconditionally on 399 * all CPUs, as it is safe to run with or without the feature and 400 * the bit is RES0 on CPUs that don't support it. 401 */ 402 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) 403 { 404 u64 vtcr = VTCR_EL2_FLAGS; 405 u32 parange, phys_shift; 406 u8 lvls; 407 408 if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) 409 return -EINVAL; 410 411 phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); 412 if (phys_shift) { 413 if (phys_shift > kvm_ipa_limit || 414 phys_shift < 32) 415 return -EINVAL; 416 } else { 417 phys_shift = KVM_PHYS_SHIFT; 418 } 419 420 parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; 421 if (parange > ID_AA64MMFR0_PARANGE_MAX) 422 parange = ID_AA64MMFR0_PARANGE_MAX; 423 vtcr |= parange << VTCR_EL2_PS_SHIFT; 424 425 vtcr |= VTCR_EL2_T0SZ(phys_shift); 426 /* 427 * Use a minimum 2 level page table to prevent splitting 428 * host PMD huge pages at stage2. 429 */ 430 lvls = stage2_pgtable_levels(phys_shift); 431 if (lvls < 2) 432 lvls = 2; 433 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); 434 435 /* 436 * Enable the Hardware Access Flag management, unconditionally 437 * on all CPUs. The features is RES0 on CPUs without the support 438 * and must be ignored by the CPUs. 439 */ 440 vtcr |= VTCR_EL2_HA; 441 442 /* Set the vmid bits */ 443 vtcr |= (kvm_get_vmid_bits() == 16) ? 444 VTCR_EL2_VS_16BIT : 445 VTCR_EL2_VS_8BIT; 446 kvm->arch.vtcr = vtcr; 447 return 0; 448 } 449