1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2021 Google LLC
4 * Author: Fuad Tabba <tabba@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <linux/mm.h>
9
10 #include <asm/kvm_emulate.h>
11
12 #include <nvhe/fixed_config.h>
13 #include <nvhe/mem_protect.h>
14 #include <nvhe/memory.h>
15 #include <nvhe/pkvm.h>
16 #include <nvhe/trap_handler.h>
17
18 /* Used by icache_is_aliasing(). */
19 unsigned long __icache_flags;
20
21 /* Used by kvm_get_vttbr(). */
22 unsigned int kvm_arm_vmid_bits;
23
24 unsigned int kvm_host_sve_max_vl;
25
26 /*
27 * Set trap register values based on features in ID_AA64PFR0.
28 */
pvm_init_traps_aa64pfr0(struct kvm_vcpu * vcpu)29 static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
30 {
31 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
32 u64 hcr_set = HCR_RW;
33 u64 hcr_clear = 0;
34 u64 cptr_set = 0;
35 u64 cptr_clear = 0;
36
37 /* Protected KVM does not support AArch32 guests. */
38 BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
39 PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL0_IMP);
40 BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
41 PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_EL1_IMP);
42
43 /*
44 * Linux guests assume support for floating-point and Advanced SIMD. Do
45 * not change the trapping behavior for these from the KVM default.
46 */
47 BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
48 PVM_ID_AA64PFR0_ALLOW));
49 BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
50 PVM_ID_AA64PFR0_ALLOW));
51
52 if (has_hvhe())
53 hcr_set |= HCR_E2H;
54
55 /* Trap RAS unless all current versions are supported */
56 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
57 ID_AA64PFR0_EL1_RAS_V1P1) {
58 hcr_set |= HCR_TERR | HCR_TEA;
59 hcr_clear |= HCR_FIEN;
60 }
61
62 /* Trap AMU */
63 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
64 hcr_clear |= HCR_AMVOFFEN;
65 cptr_set |= CPTR_EL2_TAM;
66 }
67
68 /* Trap SVE */
69 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
70 if (has_hvhe())
71 cptr_clear |= CPACR_ELx_ZEN;
72 else
73 cptr_set |= CPTR_EL2_TZ;
74 }
75
76 vcpu->arch.hcr_el2 |= hcr_set;
77 vcpu->arch.hcr_el2 &= ~hcr_clear;
78 vcpu->arch.cptr_el2 |= cptr_set;
79 vcpu->arch.cptr_el2 &= ~cptr_clear;
80 }
81
82 /*
83 * Set trap register values based on features in ID_AA64PFR1.
84 */
pvm_init_traps_aa64pfr1(struct kvm_vcpu * vcpu)85 static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
86 {
87 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
88 u64 hcr_set = 0;
89 u64 hcr_clear = 0;
90
91 /* Memory Tagging: Trap and Treat as Untagged if not supported. */
92 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
93 hcr_set |= HCR_TID5;
94 hcr_clear |= HCR_DCT | HCR_ATA;
95 }
96
97 vcpu->arch.hcr_el2 |= hcr_set;
98 vcpu->arch.hcr_el2 &= ~hcr_clear;
99 }
100
101 /*
102 * Set trap register values based on features in ID_AA64DFR0.
103 */
pvm_init_traps_aa64dfr0(struct kvm_vcpu * vcpu)104 static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
105 {
106 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
107 u64 mdcr_set = 0;
108 u64 mdcr_clear = 0;
109 u64 cptr_set = 0;
110
111 /* Trap/constrain PMU */
112 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
113 mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
114 mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
115 MDCR_EL2_HPMN_MASK;
116 }
117
118 /* Trap Debug */
119 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
120 mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
121
122 /* Trap OS Double Lock */
123 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
124 mdcr_set |= MDCR_EL2_TDOSA;
125
126 /* Trap SPE */
127 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
128 mdcr_set |= MDCR_EL2_TPMS;
129 mdcr_clear |= MDCR_EL2_E2PB_MASK;
130 }
131
132 /* Trap Trace Filter */
133 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
134 mdcr_set |= MDCR_EL2_TTRF;
135
136 /* Trap Trace */
137 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) {
138 if (has_hvhe())
139 cptr_set |= CPACR_EL1_TTA;
140 else
141 cptr_set |= CPTR_EL2_TTA;
142 }
143
144 /* Trap External Trace */
145 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
146 mdcr_clear |= MDCR_EL2_E2TB_MASK;
147
148 vcpu->arch.mdcr_el2 |= mdcr_set;
149 vcpu->arch.mdcr_el2 &= ~mdcr_clear;
150 vcpu->arch.cptr_el2 |= cptr_set;
151 }
152
153 /*
154 * Set trap register values based on features in ID_AA64MMFR0.
155 */
pvm_init_traps_aa64mmfr0(struct kvm_vcpu * vcpu)156 static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
157 {
158 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
159 u64 mdcr_set = 0;
160
161 /* Trap Debug Communications Channel registers */
162 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
163 mdcr_set |= MDCR_EL2_TDCC;
164
165 vcpu->arch.mdcr_el2 |= mdcr_set;
166 }
167
168 /*
169 * Set trap register values based on features in ID_AA64MMFR1.
170 */
pvm_init_traps_aa64mmfr1(struct kvm_vcpu * vcpu)171 static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
172 {
173 const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
174 u64 hcr_set = 0;
175
176 /* Trap LOR */
177 if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
178 hcr_set |= HCR_TLOR;
179
180 vcpu->arch.hcr_el2 |= hcr_set;
181 }
182
183 /*
184 * Set baseline trap register values.
185 */
pvm_init_trap_regs(struct kvm_vcpu * vcpu)186 static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
187 {
188 const u64 hcr_trap_feat_regs = HCR_TID3;
189 const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
190
191 /*
192 * Always trap:
193 * - Feature id registers: to control features exposed to guests
194 * - Implementation-defined features
195 */
196 vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
197
198 /* Clear res0 and set res1 bits to trap potential new features. */
199 vcpu->arch.hcr_el2 &= ~(HCR_RES0);
200 vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
201 if (!has_hvhe()) {
202 vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
203 vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
204 }
205 }
206
pkvm_vcpu_reset_hcr(struct kvm_vcpu * vcpu)207 static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
208 {
209 vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
210
211 if (has_hvhe())
212 vcpu->arch.hcr_el2 |= HCR_E2H;
213
214 if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
215 /* route synchronous external abort exceptions to EL2 */
216 vcpu->arch.hcr_el2 |= HCR_TEA;
217 /* trap error record accesses */
218 vcpu->arch.hcr_el2 |= HCR_TERR;
219 }
220
221 if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
222 vcpu->arch.hcr_el2 |= HCR_FWB;
223
224 if (cpus_have_final_cap(ARM64_HAS_EVT) &&
225 !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
226 vcpu->arch.hcr_el2 |= HCR_TID4;
227 else
228 vcpu->arch.hcr_el2 |= HCR_TID2;
229
230 if (vcpu_has_ptrauth(vcpu))
231 vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
232 }
233
234 /*
235 * Initialize trap register values in protected mode.
236 */
pkvm_vcpu_init_traps(struct kvm_vcpu * vcpu)237 static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
238 {
239 vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
240 vcpu->arch.mdcr_el2 = 0;
241
242 pkvm_vcpu_reset_hcr(vcpu);
243
244 if ((!vcpu_is_protected(vcpu)))
245 return;
246
247 pvm_init_trap_regs(vcpu);
248 pvm_init_traps_aa64pfr0(vcpu);
249 pvm_init_traps_aa64pfr1(vcpu);
250 pvm_init_traps_aa64dfr0(vcpu);
251 pvm_init_traps_aa64mmfr0(vcpu);
252 pvm_init_traps_aa64mmfr1(vcpu);
253 }
254
255 /*
256 * Start the VM table handle at the offset defined instead of at 0.
257 * Mainly for sanity checking and debugging.
258 */
259 #define HANDLE_OFFSET 0x1000
260
vm_handle_to_idx(pkvm_handle_t handle)261 static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
262 {
263 return handle - HANDLE_OFFSET;
264 }
265
idx_to_vm_handle(unsigned int idx)266 static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
267 {
268 return idx + HANDLE_OFFSET;
269 }
270
271 /*
272 * Spinlock for protecting state related to the VM table. Protects writes
273 * to 'vm_table' and 'nr_table_entries' as well as reads and writes to
274 * 'last_hyp_vcpu_lookup'.
275 */
276 static DEFINE_HYP_SPINLOCK(vm_table_lock);
277
278 /*
279 * The table of VM entries for protected VMs in hyp.
280 * Allocated at hyp initialization and setup.
281 */
282 static struct pkvm_hyp_vm **vm_table;
283
pkvm_hyp_vm_table_init(void * tbl)284 void pkvm_hyp_vm_table_init(void *tbl)
285 {
286 WARN_ON(vm_table);
287 vm_table = tbl;
288 }
289
290 /*
291 * Return the hyp vm structure corresponding to the handle.
292 */
get_vm_by_handle(pkvm_handle_t handle)293 static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
294 {
295 unsigned int idx = vm_handle_to_idx(handle);
296
297 if (unlikely(idx >= KVM_MAX_PVMS))
298 return NULL;
299
300 return vm_table[idx];
301 }
302
pkvm_load_hyp_vcpu(pkvm_handle_t handle,unsigned int vcpu_idx)303 struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
304 unsigned int vcpu_idx)
305 {
306 struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
307 struct pkvm_hyp_vm *hyp_vm;
308
309 hyp_spin_lock(&vm_table_lock);
310 hyp_vm = get_vm_by_handle(handle);
311 if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
312 goto unlock;
313
314 hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
315 hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
316 unlock:
317 hyp_spin_unlock(&vm_table_lock);
318 return hyp_vcpu;
319 }
320
pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu)321 void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
322 {
323 struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
324
325 hyp_spin_lock(&vm_table_lock);
326 hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
327 hyp_spin_unlock(&vm_table_lock);
328 }
329
pkvm_init_features_from_host(struct pkvm_hyp_vm * hyp_vm,const struct kvm * host_kvm)330 static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struct kvm *host_kvm)
331 {
332 struct kvm *kvm = &hyp_vm->kvm;
333 DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES);
334
335 /* No restrictions for non-protected VMs. */
336 if (!kvm_vm_is_protected(kvm)) {
337 bitmap_copy(kvm->arch.vcpu_features,
338 host_kvm->arch.vcpu_features,
339 KVM_VCPU_MAX_FEATURES);
340 return;
341 }
342
343 bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES);
344
345 /*
346 * For protected VMs, always allow:
347 * - CPU starting in poweroff state
348 * - PSCI v0.2
349 */
350 set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features);
351 set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features);
352
353 /*
354 * Check if remaining features are allowed:
355 * - Performance Monitoring
356 * - Scalable Vectors
357 * - Pointer Authentication
358 */
359 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), PVM_ID_AA64DFR0_ALLOW))
360 set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features);
361
362 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), PVM_ID_AA64PFR0_ALLOW))
363 set_bit(KVM_ARM_VCPU_SVE, allowed_features);
364
365 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED) &&
366 FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA), PVM_ID_AA64ISAR1_RESTRICT_UNSIGNED))
367 set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features);
368
369 if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI), PVM_ID_AA64ISAR1_ALLOW) &&
370 FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA), PVM_ID_AA64ISAR1_ALLOW))
371 set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features);
372
373 bitmap_and(kvm->arch.vcpu_features, host_kvm->arch.vcpu_features,
374 allowed_features, KVM_VCPU_MAX_FEATURES);
375 }
376
pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu * hyp_vcpu)377 static void pkvm_vcpu_init_ptrauth(struct pkvm_hyp_vcpu *hyp_vcpu)
378 {
379 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
380
381 if (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) ||
382 vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)) {
383 kvm_vcpu_enable_ptrauth(vcpu);
384 } else {
385 vcpu_clear_flag(&hyp_vcpu->vcpu, GUEST_HAS_PTRAUTH);
386 }
387 }
388
unpin_host_vcpu(struct kvm_vcpu * host_vcpu)389 static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
390 {
391 if (host_vcpu)
392 hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
393 }
394
unpin_host_vcpus(struct pkvm_hyp_vcpu * hyp_vcpus[],unsigned int nr_vcpus)395 static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
396 unsigned int nr_vcpus)
397 {
398 int i;
399
400 for (i = 0; i < nr_vcpus; i++)
401 unpin_host_vcpu(hyp_vcpus[i]->host_vcpu);
402 }
403
init_pkvm_hyp_vm(struct kvm * host_kvm,struct pkvm_hyp_vm * hyp_vm,unsigned int nr_vcpus)404 static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
405 unsigned int nr_vcpus)
406 {
407 hyp_vm->host_kvm = host_kvm;
408 hyp_vm->kvm.created_vcpus = nr_vcpus;
409 hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
410 hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
411 pkvm_init_features_from_host(hyp_vm, host_kvm);
412 }
413
pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu * hyp_vcpu,struct kvm_vcpu * host_vcpu)414 static void pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
415 {
416 struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu;
417
418 if (!vcpu_has_feature(vcpu, KVM_ARM_VCPU_SVE)) {
419 vcpu_clear_flag(vcpu, GUEST_HAS_SVE);
420 vcpu_clear_flag(vcpu, VCPU_SVE_FINALIZED);
421 }
422 }
423
init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu * hyp_vcpu,struct pkvm_hyp_vm * hyp_vm,struct kvm_vcpu * host_vcpu,unsigned int vcpu_idx)424 static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
425 struct pkvm_hyp_vm *hyp_vm,
426 struct kvm_vcpu *host_vcpu,
427 unsigned int vcpu_idx)
428 {
429 int ret = 0;
430
431 if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
432 return -EBUSY;
433
434 if (host_vcpu->vcpu_idx != vcpu_idx) {
435 ret = -EINVAL;
436 goto done;
437 }
438
439 hyp_vcpu->host_vcpu = host_vcpu;
440
441 hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
442 hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
443 hyp_vcpu->vcpu.vcpu_idx = vcpu_idx;
444
445 hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
446 hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
447 hyp_vcpu->vcpu.arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
448
449 pkvm_vcpu_init_sve(hyp_vcpu, host_vcpu);
450 pkvm_vcpu_init_ptrauth(hyp_vcpu);
451 pkvm_vcpu_init_traps(&hyp_vcpu->vcpu);
452 done:
453 if (ret)
454 unpin_host_vcpu(host_vcpu);
455 return ret;
456 }
457
find_free_vm_table_entry(struct kvm * host_kvm)458 static int find_free_vm_table_entry(struct kvm *host_kvm)
459 {
460 int i;
461
462 for (i = 0; i < KVM_MAX_PVMS; ++i) {
463 if (!vm_table[i])
464 return i;
465 }
466
467 return -ENOMEM;
468 }
469
470 /*
471 * Allocate a VM table entry and insert a pointer to the new vm.
472 *
473 * Return a unique handle to the protected VM on success,
474 * negative error code on failure.
475 */
insert_vm_table_entry(struct kvm * host_kvm,struct pkvm_hyp_vm * hyp_vm)476 static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
477 struct pkvm_hyp_vm *hyp_vm)
478 {
479 struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
480 int idx;
481
482 hyp_assert_lock_held(&vm_table_lock);
483
484 /*
485 * Initializing protected state might have failed, yet a malicious
486 * host could trigger this function. Thus, ensure that 'vm_table'
487 * exists.
488 */
489 if (unlikely(!vm_table))
490 return -EINVAL;
491
492 idx = find_free_vm_table_entry(host_kvm);
493 if (idx < 0)
494 return idx;
495
496 hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
497
498 /* VMID 0 is reserved for the host */
499 atomic64_set(&mmu->vmid.id, idx + 1);
500
501 mmu->arch = &hyp_vm->kvm.arch;
502 mmu->pgt = &hyp_vm->pgt;
503
504 vm_table[idx] = hyp_vm;
505 return hyp_vm->kvm.arch.pkvm.handle;
506 }
507
508 /*
509 * Deallocate and remove the VM table entry corresponding to the handle.
510 */
remove_vm_table_entry(pkvm_handle_t handle)511 static void remove_vm_table_entry(pkvm_handle_t handle)
512 {
513 hyp_assert_lock_held(&vm_table_lock);
514 vm_table[vm_handle_to_idx(handle)] = NULL;
515 }
516
pkvm_get_hyp_vm_size(unsigned int nr_vcpus)517 static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
518 {
519 return size_add(sizeof(struct pkvm_hyp_vm),
520 size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
521 }
522
map_donated_memory_noclear(unsigned long host_va,size_t size)523 static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
524 {
525 void *va = (void *)kern_hyp_va(host_va);
526
527 if (!PAGE_ALIGNED(va))
528 return NULL;
529
530 if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
531 PAGE_ALIGN(size) >> PAGE_SHIFT))
532 return NULL;
533
534 return va;
535 }
536
map_donated_memory(unsigned long host_va,size_t size)537 static void *map_donated_memory(unsigned long host_va, size_t size)
538 {
539 void *va = map_donated_memory_noclear(host_va, size);
540
541 if (va)
542 memset(va, 0, size);
543
544 return va;
545 }
546
__unmap_donated_memory(void * va,size_t size)547 static void __unmap_donated_memory(void *va, size_t size)
548 {
549 kvm_flush_dcache_to_poc(va, size);
550 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
551 PAGE_ALIGN(size) >> PAGE_SHIFT));
552 }
553
unmap_donated_memory(void * va,size_t size)554 static void unmap_donated_memory(void *va, size_t size)
555 {
556 if (!va)
557 return;
558
559 memset(va, 0, size);
560 __unmap_donated_memory(va, size);
561 }
562
unmap_donated_memory_noclear(void * va,size_t size)563 static void unmap_donated_memory_noclear(void *va, size_t size)
564 {
565 if (!va)
566 return;
567
568 __unmap_donated_memory(va, size);
569 }
570
571 /*
572 * Initialize the hypervisor copy of the protected VM state using the
573 * memory donated by the host.
574 *
575 * Unmaps the donated memory from the host at stage 2.
576 *
577 * host_kvm: A pointer to the host's struct kvm.
578 * vm_hva: The host va of the area being donated for the VM state.
579 * Must be page aligned.
580 * pgd_hva: The host va of the area being donated for the stage-2 PGD for
581 * the VM. Must be page aligned. Its size is implied by the VM's
582 * VTCR.
583 *
584 * Return a unique handle to the protected VM on success,
585 * negative error code on failure.
586 */
__pkvm_init_vm(struct kvm * host_kvm,unsigned long vm_hva,unsigned long pgd_hva)587 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
588 unsigned long pgd_hva)
589 {
590 struct pkvm_hyp_vm *hyp_vm = NULL;
591 size_t vm_size, pgd_size;
592 unsigned int nr_vcpus;
593 void *pgd = NULL;
594 int ret;
595
596 ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
597 if (ret)
598 return ret;
599
600 nr_vcpus = READ_ONCE(host_kvm->created_vcpus);
601 if (nr_vcpus < 1) {
602 ret = -EINVAL;
603 goto err_unpin_kvm;
604 }
605
606 vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
607 pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
608
609 ret = -ENOMEM;
610
611 hyp_vm = map_donated_memory(vm_hva, vm_size);
612 if (!hyp_vm)
613 goto err_remove_mappings;
614
615 pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
616 if (!pgd)
617 goto err_remove_mappings;
618
619 init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus);
620
621 hyp_spin_lock(&vm_table_lock);
622 ret = insert_vm_table_entry(host_kvm, hyp_vm);
623 if (ret < 0)
624 goto err_unlock;
625
626 ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
627 if (ret)
628 goto err_remove_vm_table_entry;
629 hyp_spin_unlock(&vm_table_lock);
630
631 return hyp_vm->kvm.arch.pkvm.handle;
632
633 err_remove_vm_table_entry:
634 remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
635 err_unlock:
636 hyp_spin_unlock(&vm_table_lock);
637 err_remove_mappings:
638 unmap_donated_memory(hyp_vm, vm_size);
639 unmap_donated_memory(pgd, pgd_size);
640 err_unpin_kvm:
641 hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
642 return ret;
643 }
644
645 /*
646 * Initialize the hypervisor copy of the protected vCPU state using the
647 * memory donated by the host.
648 *
649 * handle: The handle for the protected vm.
650 * host_vcpu: A pointer to the corresponding host vcpu.
651 * vcpu_hva: The host va of the area being donated for the vcpu state.
652 * Must be page aligned. The size of the area must be equal to
653 * the page-aligned size of 'struct pkvm_hyp_vcpu'.
654 * Return 0 on success, negative error code on failure.
655 */
__pkvm_init_vcpu(pkvm_handle_t handle,struct kvm_vcpu * host_vcpu,unsigned long vcpu_hva)656 int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
657 unsigned long vcpu_hva)
658 {
659 struct pkvm_hyp_vcpu *hyp_vcpu;
660 struct pkvm_hyp_vm *hyp_vm;
661 unsigned int idx;
662 int ret;
663
664 hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu));
665 if (!hyp_vcpu)
666 return -ENOMEM;
667
668 hyp_spin_lock(&vm_table_lock);
669
670 hyp_vm = get_vm_by_handle(handle);
671 if (!hyp_vm) {
672 ret = -ENOENT;
673 goto unlock;
674 }
675
676 idx = hyp_vm->nr_vcpus;
677 if (idx >= hyp_vm->kvm.created_vcpus) {
678 ret = -EINVAL;
679 goto unlock;
680 }
681
682 ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx);
683 if (ret)
684 goto unlock;
685
686 hyp_vm->vcpus[idx] = hyp_vcpu;
687 hyp_vm->nr_vcpus++;
688 unlock:
689 hyp_spin_unlock(&vm_table_lock);
690
691 if (ret) {
692 unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
693 return ret;
694 }
695
696 hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
697
698 return 0;
699 }
700
701 static void
teardown_donated_memory(struct kvm_hyp_memcache * mc,void * addr,size_t size)702 teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
703 {
704 size = PAGE_ALIGN(size);
705 memset(addr, 0, size);
706
707 for (void *start = addr; start < addr + size; start += PAGE_SIZE)
708 push_hyp_memcache(mc, start, hyp_virt_to_phys);
709
710 unmap_donated_memory_noclear(addr, size);
711 }
712
__pkvm_teardown_vm(pkvm_handle_t handle)713 int __pkvm_teardown_vm(pkvm_handle_t handle)
714 {
715 struct kvm_hyp_memcache *mc;
716 struct pkvm_hyp_vm *hyp_vm;
717 struct kvm *host_kvm;
718 unsigned int idx;
719 size_t vm_size;
720 int err;
721
722 hyp_spin_lock(&vm_table_lock);
723 hyp_vm = get_vm_by_handle(handle);
724 if (!hyp_vm) {
725 err = -ENOENT;
726 goto err_unlock;
727 }
728
729 if (WARN_ON(hyp_page_count(hyp_vm))) {
730 err = -EBUSY;
731 goto err_unlock;
732 }
733
734 host_kvm = hyp_vm->host_kvm;
735
736 /* Ensure the VMID is clean before it can be reallocated */
737 __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
738 remove_vm_table_entry(handle);
739 hyp_spin_unlock(&vm_table_lock);
740
741 /* Reclaim guest pages (including page-table pages) */
742 mc = &host_kvm->arch.pkvm.teardown_mc;
743 reclaim_guest_pages(hyp_vm, mc);
744 unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
745
746 /* Push the metadata pages to the teardown memcache */
747 for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
748 struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
749
750 teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
751 }
752
753 vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
754 teardown_donated_memory(mc, hyp_vm, vm_size);
755 hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
756 return 0;
757
758 err_unlock:
759 hyp_spin_unlock(&vm_table_lock);
760 return err;
761 }
762