1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2021 Google LLC 4 * Author: Fuad Tabba <tabba@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <linux/mm.h> 9 10 #include <asm/kvm_emulate.h> 11 12 #include <nvhe/fixed_config.h> 13 #include <nvhe/mem_protect.h> 14 #include <nvhe/memory.h> 15 #include <nvhe/pkvm.h> 16 #include <nvhe/trap_handler.h> 17 18 /* Used by icache_is_aliasing(). */ 19 unsigned long __icache_flags; 20 21 /* Used by kvm_get_vttbr(). */ 22 unsigned int kvm_arm_vmid_bits; 23 24 unsigned int kvm_host_sve_max_vl; 25 26 /* 27 * Set trap register values based on features in ID_AA64PFR0. 28 */ 29 static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) 30 { 31 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); 32 u64 hcr_set = HCR_RW; 33 u64 hcr_clear = 0; 34 u64 cptr_set = 0; 35 u64 cptr_clear = 0; 36 37 /* Protected KVM does not support AArch32 guests. */ 38 BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), 39 PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP); 40 BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), 41 PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP); 42 43 /* 44 * Linux guests assume support for floating-point and Advanced SIMD. Do 45 * not change the trapping behavior for these from the KVM default. 46 */ 47 BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP), 48 PVM_ID_AA64PFR0_ALLOW)); 49 BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD), 50 PVM_ID_AA64PFR0_ALLOW)); 51 52 if (has_hvhe()) 53 hcr_set |= HCR_E2H; 54 55 /* Trap RAS unless all current versions are supported */ 56 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) < 57 ID_AA64PFR0_EL1_RAS_V1P1) { 58 hcr_set |= HCR_TERR | HCR_TEA; 59 hcr_clear |= HCR_FIEN; 60 } 61 62 /* Trap AMU */ 63 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) { 64 hcr_clear |= HCR_AMVOFFEN; 65 cptr_set |= CPTR_EL2_TAM; 66 } 67 68 /* Trap SVE */ 69 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) { 70 if (has_hvhe()) 71 cptr_clear |= CPACR_ELx_ZEN; 72 else 73 cptr_set |= CPTR_EL2_TZ; 74 } 75 76 vcpu->arch.hcr_el2 |= hcr_set; 77 vcpu->arch.hcr_el2 &= ~hcr_clear; 78 vcpu->arch.cptr_el2 |= cptr_set; 79 vcpu->arch.cptr_el2 &= ~cptr_clear; 80 } 81 82 /* 83 * Set trap register values based on features in ID_AA64PFR1. 84 */ 85 static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) 86 { 87 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); 88 u64 hcr_set = 0; 89 u64 hcr_clear = 0; 90 91 /* Memory Tagging: Trap and Treat as Untagged if not supported. */ 92 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) { 93 hcr_set |= HCR_TID5; 94 hcr_clear |= HCR_DCT | HCR_ATA; 95 } 96 97 vcpu->arch.hcr_el2 |= hcr_set; 98 vcpu->arch.hcr_el2 &= ~hcr_clear; 99 } 100 101 /* 102 * Set trap register values based on features in ID_AA64DFR0. 103 */ 104 static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) 105 { 106 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); 107 u64 mdcr_set = 0; 108 u64 mdcr_clear = 0; 109 u64 cptr_set = 0; 110 111 /* Trap/constrain PMU */ 112 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) { 113 mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; 114 mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | 115 MDCR_EL2_HPMN_MASK; 116 } 117 118 /* Trap Debug */ 119 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids)) 120 mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; 121 122 /* Trap OS Double Lock */ 123 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids)) 124 mdcr_set |= MDCR_EL2_TDOSA; 125 126 /* Trap SPE */ 127 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) { 128 mdcr_set |= MDCR_EL2_TPMS; 129 mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; 130 } 131 132 /* Trap Trace Filter */ 133 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids)) 134 mdcr_set |= MDCR_EL2_TTRF; 135 136 /* Trap Trace */ 137 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) { 138 if (has_hvhe()) 139 cptr_set |= CPACR_EL1_TTA; 140 else 141 cptr_set |= CPTR_EL2_TTA; 142 } 143 144 /* Trap External Trace */ 145 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids)) 146 mdcr_clear |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; 147 148 vcpu->arch.mdcr_el2 |= mdcr_set; 149 vcpu->arch.mdcr_el2 &= ~mdcr_clear; 150 vcpu->arch.cptr_el2 |= cptr_set; 151 } 152 153 /* 154 * Set trap register values based on features in ID_AA64MMFR0. 155 */ 156 static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) 157 { 158 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); 159 u64 mdcr_set = 0; 160 161 /* Trap Debug Communications Channel registers */ 162 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids)) 163 mdcr_set |= MDCR_EL2_TDCC; 164 165 vcpu->arch.mdcr_el2 |= mdcr_set; 166 } 167 168 /* 169 * Set trap register values based on features in ID_AA64MMFR1. 170 */ 171 static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) 172 { 173 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); 174 u64 hcr_set = 0; 175 176 /* Trap LOR */ 177 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids)) 178 hcr_set |= HCR_TLOR; 179 180 vcpu->arch.hcr_el2 |= hcr_set; 181 } 182 183 /* 184 * Set baseline trap register values. 185 */ 186 static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) 187 { 188 const u64 hcr_trap_feat_regs = HCR_TID3; 189 const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1; 190 191 /* 192 * Always trap: 193 * - Feature id registers: to control features exposed to guests 194 * - Implementation-defined features 195 */ 196 vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef; 197 198 /* Clear res0 and set res1 bits to trap potential new features. */ 199 vcpu->arch.hcr_el2 &= ~(HCR_RES0); 200 vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0); 201 if (!has_hvhe()) { 202 vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1; 203 vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0); 204 } 205 } 206 207 static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu) 208 { 209 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; 210 211 if (has_hvhe()) 212 vcpu->arch.hcr_el2 |= HCR_E2H; 213 214 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { 215 /* route synchronous external abort exceptions to EL2 */ 216 vcpu->arch.hcr_el2 |= HCR_TEA; 217 /* trap error record accesses */ 218 vcpu->arch.hcr_el2 |= HCR_TERR; 219 } 220 221 if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) 222 vcpu->arch.hcr_el2 |= HCR_FWB; 223 224 if (cpus_have_final_cap(ARM64_HAS_EVT) && 225 !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) 226 vcpu->arch.hcr_el2 |= HCR_TID4; 227 else 228 vcpu->arch.hcr_el2 |= HCR_TID2; 229 230 if (vcpu_has_ptrauth(vcpu)) 231 vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); 232 } 233 234 /* 235 * Initialize trap register values in protected mode. 236 */ 237 static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) 238 { 239 vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu); 240 vcpu->arch.mdcr_el2 = 0; 241 242 pkvm_vcpu_reset_hcr(vcpu); 243 244 if ((!vcpu_is_protected(vcpu))) 245 return; 246 247 pvm_init_trap_regs(vcpu); 248 pvm_init_traps_aa64pfr0(vcpu); 249 pvm_init_traps_aa64pfr1(vcpu); 250 pvm_init_traps_aa64dfr0(vcpu); 251 pvm_init_traps_aa64mmfr0(vcpu); 252 pvm_init_traps_aa64mmfr1(vcpu); 253 } 254 255 /* 256 * Start the VM table handle at the offset defined instead of at 0. 257 * Mainly for sanity checking and debugging. 258 */ 259 #define HANDLE_OFFSET 0x1000 260 261 static unsigned int vm_handle_to_idx(pkvm_handle_t handle) 262 { 263 return handle - HANDLE_OFFSET; 264 } 265 266 static pkvm_handle_t idx_to_vm_handle(unsigned int idx) 267 { 268 return idx + HANDLE_OFFSET; 269 } 270 271 /* 272 * Spinlock for protecting state related to the VM table. Protects writes 273 * to 'vm_table' and 'nr_table_entries' as well as reads and writes to 274 * 'last_hyp_vcpu_lookup'. 275 */ 276 static DEFINE_HYP_SPINLOCK(vm_table_lock); 277 278 /* 279 * The table of VM entries for protected VMs in hyp. 280 * Allocated at hyp initialization and setup. 281 */ 282 static struct pkvm_hyp_vm **vm_table; 283 284 void pkvm_hyp_vm_table_init(void *tbl) 285 { 286 WARN_ON(vm_table); 287 vm_table = tbl; 288 } 289 290 /* 291 * Return the hyp vm structure corresponding to the handle. 292 */ 293 static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) 294 { 295 unsigned int idx = vm_handle_to_idx(handle); 296 297 if (unlikely(idx >= KVM_MAX_PVMS)) 298 return NULL; 299 300 return vm_table[idx]; 301 } 302 303 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, 304 unsigned int vcpu_idx) 305 { 306 struct pkvm_hyp_vcpu *hyp_vcpu = NULL; 307 struct pkvm_hyp_vm *hyp_vm; 308 309 hyp_spin_lock(&vm_table_lock); 310 hyp_vm = get_vm_by_handle(handle); 311 if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) 312 goto unlock; 313 314 hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; 315 hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); 316 unlock: 317 hyp_spin_unlock(&vm_table_lock); 318 return hyp_vcpu; 319 } 320 321 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) 322 { 323 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); 324 325 hyp_spin_lock(&vm_table_lock); 326 hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); 327 hyp_spin_unlock(&vm_table_lock); 328 } 329 330 static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm) 331 { 332 struct kvm *kvm = &hyp_vm->kvm; 333 DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES); 334 335 /* No restrictions for non-protected VMs. */ 336 if (!kvm_vm_is_protected(kvm)) { 337 bitmap_copy(kvm->arch.vcpu_features, 338 host_kvm->arch.vcpu_features, 339 KVM_VCPU_MAX_FEATURES); 340 return; 341 } 342 343 bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); 344 345 /* 346 * For protected VMs, always allow: 347 * - CPU starting in poweroff state 348 * - PSCI v0.2 349 */ 350 set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features); 351 set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); 352 353 /* 354 * Check if remaining features are allowed: 355 * - Performance Monitoring 356 * - Scalable Vectors 357 * - Pointer Authentication 358 */ 359 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), PVM_ID_AA64DFR0_ALLOW)) 360 set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); 361 362 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), PVM_ID_AA64PFR0_ALLOW)) 363 set_bit(KVM_ARM_VCPU_SVE, allowed_features); 364 365 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED) && 366 FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED)) 367 set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); 368 369 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI), PVM_ID_AA64ISAR1_ALLOW) && 370 FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA), PVM_ID_AA64ISAR1_ALLOW)) 371 set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); 372 373 bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features, 374 allowed_features, KVM_VCPU_MAX_FEATURES); 375 } 376 377 static void pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu *hyp_vcpu) 378 { 379 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; 380 381 if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || 382 vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) { 383 kvm_vcpu_enable_ptrauth(vcpu); 384 } else { 385 vcpu_clear_flag(&hyp_vcpu->vcpu, GUEST_HAS_PTRAUTH); 386 } 387 } 388 389 static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) 390 { 391 if (host_vcpu) 392 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); 393 } 394 395 static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], 396 unsigned int nr_vcpus) 397 { 398 int i; 399 400 for (i = 0; i < nr_vcpus; i++) 401 unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); 402 } 403 404 static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, 405 unsigned int nr_vcpus) 406 { 407 hyp_vm->host_kvm = host_kvm; 408 hyp_vm->kvm.created_vcpus = nr_vcpus; 409 hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr; 410 hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled); 411 pkvm_init_features_from_host(hyp_vm, host_kvm); 412 } 413 414 static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu) 415 { 416 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; 417 418 if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) { 419 vcpu_clear_flag(vcpu, GUEST_HAS_SVE); 420 vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED); 421 } 422 } 423 424 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, 425 struct pkvm_hyp_vm *hyp_vm, 426 struct kvm_vcpu *host_vcpu, 427 unsigned int vcpu_idx) 428 { 429 int ret = 0; 430 431 if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) 432 return -EBUSY; 433 434 if (host_vcpu->vcpu_idx != vcpu_idx) { 435 ret = -EINVAL; 436 goto done; 437 } 438 439 hyp_vcpu->host_vcpu = host_vcpu; 440 441 hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; 442 hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); 443 hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; 444 445 hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; 446 hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); 447 hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; 448 449 pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu); 450 pkvm_vcpu_init_ptrauth(hyp_vcpu); 451 pkvm_vcpu_init_traps(&hyp_vcpu->vcpu); 452 done: 453 if (ret) 454 unpin_host_vcpu(host_vcpu); 455 return ret; 456 } 457 458 static int find_free_vm_table_entry(struct kvm *host_kvm) 459 { 460 int i; 461 462 for (i = 0; i < KVM_MAX_PVMS; ++i) { 463 if (!vm_table[i]) 464 return i; 465 } 466 467 return -ENOMEM; 468 } 469 470 /* 471 * Allocate a VM table entry and insert a pointer to the new vm. 472 * 473 * Return a unique handle to the protected VM on success, 474 * negative error code on failure. 475 */ 476 static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, 477 struct pkvm_hyp_vm *hyp_vm) 478 { 479 struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; 480 int idx; 481 482 hyp_assert_lock_held(&vm_table_lock); 483 484 /* 485 * Initializing protected state might have failed, yet a malicious 486 * host could trigger this function. Thus, ensure that 'vm_table' 487 * exists. 488 */ 489 if (unlikely(!vm_table)) 490 return -EINVAL; 491 492 idx = find_free_vm_table_entry(host_kvm); 493 if (idx < 0) 494 return idx; 495 496 hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); 497 498 /* VMID 0 is reserved for the host */ 499 atomic64_set(&mmu->vmid.id, idx + 1); 500 501 mmu->arch = &hyp_vm->kvm.arch; 502 mmu->pgt = &hyp_vm->pgt; 503 504 vm_table[idx] = hyp_vm; 505 return hyp_vm->kvm.arch.pkvm.handle; 506 } 507 508 /* 509 * Deallocate and remove the VM table entry corresponding to the handle. 510 */ 511 static void remove_vm_table_entry(pkvm_handle_t handle) 512 { 513 hyp_assert_lock_held(&vm_table_lock); 514 vm_table[vm_handle_to_idx(handle)] = NULL; 515 } 516 517 static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) 518 { 519 return size_add(sizeof(struct pkvm_hyp_vm), 520 size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); 521 } 522 523 static void *map_donated_memory_noclear(unsigned long host_va, size_t size) 524 { 525 void *va = (void *)kern_hyp_va(host_va); 526 527 if (!PAGE_ALIGNED(va)) 528 return NULL; 529 530 if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), 531 PAGE_ALIGN(size) >> PAGE_SHIFT)) 532 return NULL; 533 534 return va; 535 } 536 537 static void *map_donated_memory(unsigned long host_va, size_t size) 538 { 539 void *va = map_donated_memory_noclear(host_va, size); 540 541 if (va) 542 memset(va, 0, size); 543 544 return va; 545 } 546 547 static void __unmap_donated_memory(void *va, size_t size) 548 { 549 kvm_flush_dcache_to_poc(va, size); 550 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), 551 PAGE_ALIGN(size) >> PAGE_SHIFT)); 552 } 553 554 static void unmap_donated_memory(void *va, size_t size) 555 { 556 if (!va) 557 return; 558 559 memset(va, 0, size); 560 __unmap_donated_memory(va, size); 561 } 562 563 static void unmap_donated_memory_noclear(void *va, size_t size) 564 { 565 if (!va) 566 return; 567 568 __unmap_donated_memory(va, size); 569 } 570 571 /* 572 * Initialize the hypervisor copy of the protected VM state using the 573 * memory donated by the host. 574 * 575 * Unmaps the donated memory from the host at stage 2. 576 * 577 * host_kvm: A pointer to the host's struct kvm. 578 * vm_hva: The host va of the area being donated for the VM state. 579 * Must be page aligned. 580 * pgd_hva: The host va of the area being donated for the stage-2 PGD for 581 * the VM. Must be page aligned. Its size is implied by the VM's 582 * VTCR. 583 * 584 * Return a unique handle to the protected VM on success, 585 * negative error code on failure. 586 */ 587 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, 588 unsigned long pgd_hva) 589 { 590 struct pkvm_hyp_vm *hyp_vm = NULL; 591 size_t vm_size, pgd_size; 592 unsigned int nr_vcpus; 593 void *pgd = NULL; 594 int ret; 595 596 ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); 597 if (ret) 598 return ret; 599 600 nr_vcpus = READ_ONCE(host_kvm->created_vcpus); 601 if (nr_vcpus < 1) { 602 ret = -EINVAL; 603 goto err_unpin_kvm; 604 } 605 606 vm_size = pkvm_get_hyp_vm_size(nr_vcpus); 607 pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr); 608 609 ret = -ENOMEM; 610 611 hyp_vm = map_donated_memory(vm_hva, vm_size); 612 if (!hyp_vm) 613 goto err_remove_mappings; 614 615 pgd = map_donated_memory_noclear(pgd_hva, pgd_size); 616 if (!pgd) 617 goto err_remove_mappings; 618 619 init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); 620 621 hyp_spin_lock(&vm_table_lock); 622 ret = insert_vm_table_entry(host_kvm, hyp_vm); 623 if (ret < 0) 624 goto err_unlock; 625 626 ret = kvm_guest_prepare_stage2(hyp_vm, pgd); 627 if (ret) 628 goto err_remove_vm_table_entry; 629 hyp_spin_unlock(&vm_table_lock); 630 631 return hyp_vm->kvm.arch.pkvm.handle; 632 633 err_remove_vm_table_entry: 634 remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); 635 err_unlock: 636 hyp_spin_unlock(&vm_table_lock); 637 err_remove_mappings: 638 unmap_donated_memory(hyp_vm, vm_size); 639 unmap_donated_memory(pgd, pgd_size); 640 err_unpin_kvm: 641 hyp_unpin_shared_mem(host_kvm, host_kvm + 1); 642 return ret; 643 } 644 645 /* 646 * Initialize the hypervisor copy of the protected vCPU state using the 647 * memory donated by the host. 648 * 649 * handle: The handle for the protected vm. 650 * host_vcpu: A pointer to the corresponding host vcpu. 651 * vcpu_hva: The host va of the area being donated for the vcpu state. 652 * Must be page aligned. The size of the area must be equal to 653 * the page-aligned size of 'struct pkvm_hyp_vcpu'. 654 * Return 0 on success, negative error code on failure. 655 */ 656 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, 657 unsigned long vcpu_hva) 658 { 659 struct pkvm_hyp_vcpu *hyp_vcpu; 660 struct pkvm_hyp_vm *hyp_vm; 661 unsigned int idx; 662 int ret; 663 664 hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); 665 if (!hyp_vcpu) 666 return -ENOMEM; 667 668 hyp_spin_lock(&vm_table_lock); 669 670 hyp_vm = get_vm_by_handle(handle); 671 if (!hyp_vm) { 672 ret = -ENOENT; 673 goto unlock; 674 } 675 676 idx = hyp_vm->nr_vcpus; 677 if (idx >= hyp_vm->kvm.created_vcpus) { 678 ret = -EINVAL; 679 goto unlock; 680 } 681 682 ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); 683 if (ret) 684 goto unlock; 685 686 hyp_vm->vcpus[idx] = hyp_vcpu; 687 hyp_vm->nr_vcpus++; 688 unlock: 689 hyp_spin_unlock(&vm_table_lock); 690 691 if (ret) { 692 unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); 693 return ret; 694 } 695 696 hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu); 697 698 return 0; 699 } 700 701 static void 702 teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) 703 { 704 size = PAGE_ALIGN(size); 705 memset(addr, 0, size); 706 707 for (void *start = addr; start < addr + size; start += PAGE_SIZE) 708 push_hyp_memcache(mc, start, hyp_virt_to_phys); 709 710 unmap_donated_memory_noclear(addr, size); 711 } 712 713 int __pkvm_teardown_vm(pkvm_handle_t handle) 714 { 715 struct kvm_hyp_memcache *mc; 716 struct pkvm_hyp_vm *hyp_vm; 717 struct kvm *host_kvm; 718 unsigned int idx; 719 size_t vm_size; 720 int err; 721 722 hyp_spin_lock(&vm_table_lock); 723 hyp_vm = get_vm_by_handle(handle); 724 if (!hyp_vm) { 725 err = -ENOENT; 726 goto err_unlock; 727 } 728 729 if (WARN_ON(hyp_page_count(hyp_vm))) { 730 err = -EBUSY; 731 goto err_unlock; 732 } 733 734 host_kvm = hyp_vm->host_kvm; 735 736 /* Ensure the VMID is clean before it can be reallocated */ 737 __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); 738 remove_vm_table_entry(handle); 739 hyp_spin_unlock(&vm_table_lock); 740 741 /* Reclaim guest pages (including page-table pages) */ 742 mc = &host_kvm->arch.pkvm.teardown_mc; 743 reclaim_guest_pages(hyp_vm, mc); 744 unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); 745 746 /* Push the metadata pages to the teardown memcache */ 747 for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { 748 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; 749 750 teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); 751 } 752 753 vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); 754 teardown_donated_memory(mc, hyp_vm, vm_size); 755 hyp_unpin_shared_mem(host_kvm, host_kvm + 1); 756 return 0; 757 758 err_unlock: 759 hyp_spin_unlock(&vm_table_lock); 760 return err; 761 } 762