1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2020 Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8
9 #include <asm/kvm_emulate.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/kvm_pgtable.h>
13 #include <asm/kvm_pkvm.h>
14 #include <asm/stage2_pgtable.h>
15
16 #include <hyp/fault.h>
17
18 #include <nvhe/arm-smccc.h>
19 #include <nvhe/gfp.h>
20 #include <nvhe/memory.h>
21 #include <nvhe/mem_protect.h>
22 #include <nvhe/mm.h>
23 #include <nvhe/trap_handler.h>
24
25 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)
26
27 struct host_mmu host_mmu;
28
29 static struct hyp_pool host_s2_pool;
30
31 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
32 #define current_vm (*this_cpu_ptr(&__current_vm))
33
pkvm_sme_dvmsync_fw_call(void)34 static void pkvm_sme_dvmsync_fw_call(void)
35 {
36 if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
37 struct arm_smccc_res res;
38
39 /*
40 * Ignore the return value. Probing for the workaround
41 * availability took place in init_hyp_mode().
42 */
43 hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
44 }
45 }
46
guest_lock_component(struct pkvm_hyp_vm * vm)47 static void guest_lock_component(struct pkvm_hyp_vm *vm)
48 {
49 hyp_spin_lock(&vm->lock);
50 current_vm = vm;
51 }
52
guest_unlock_component(struct pkvm_hyp_vm * vm)53 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
54 {
55 current_vm = NULL;
56 hyp_spin_unlock(&vm->lock);
57 }
58
host_lock_component(void)59 static void host_lock_component(void)
60 {
61 hyp_spin_lock(&host_mmu.lock);
62 }
63
host_unlock_component(void)64 static void host_unlock_component(void)
65 {
66 hyp_spin_unlock(&host_mmu.lock);
67 }
68
hyp_lock_component(void)69 static void hyp_lock_component(void)
70 {
71 hyp_spin_lock(&pkvm_pgd_lock);
72 }
73
hyp_unlock_component(void)74 static void hyp_unlock_component(void)
75 {
76 hyp_spin_unlock(&pkvm_pgd_lock);
77 }
78
79 #define for_each_hyp_page(__p, __st, __sz) \
80 for (struct hyp_page *__p = hyp_phys_to_page(__st), \
81 *__e = __p + ((__sz) >> PAGE_SHIFT); \
82 __p < __e; __p++)
83
host_s2_zalloc_pages_exact(size_t size)84 static void *host_s2_zalloc_pages_exact(size_t size)
85 {
86 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
87
88 hyp_split_page(hyp_virt_to_page(addr));
89
90 /*
91 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
92 * so there should be no need to free any of the tail pages to make the
93 * allocation exact.
94 */
95 WARN_ON(size != (PAGE_SIZE << get_order(size)));
96
97 return addr;
98 }
99
host_s2_zalloc_page(void * pool)100 static void *host_s2_zalloc_page(void *pool)
101 {
102 return hyp_alloc_pages(pool, 0);
103 }
104
host_s2_get_page(void * addr)105 static void host_s2_get_page(void *addr)
106 {
107 hyp_get_page(&host_s2_pool, addr);
108 }
109
host_s2_put_page(void * addr)110 static void host_s2_put_page(void *addr)
111 {
112 hyp_put_page(&host_s2_pool, addr);
113 }
114
host_s2_free_unlinked_table(void * addr,s8 level)115 static void host_s2_free_unlinked_table(void *addr, s8 level)
116 {
117 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
118 }
119
prepare_s2_pool(void * pgt_pool_base)120 static int prepare_s2_pool(void *pgt_pool_base)
121 {
122 unsigned long nr_pages, pfn;
123 int ret;
124
125 pfn = hyp_virt_to_pfn(pgt_pool_base);
126 nr_pages = host_s2_pgtable_pages();
127 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
128 if (ret)
129 return ret;
130
131 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
132 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
133 .zalloc_page = host_s2_zalloc_page,
134 .free_unlinked_table = host_s2_free_unlinked_table,
135 .phys_to_virt = hyp_phys_to_virt,
136 .virt_to_phys = hyp_virt_to_phys,
137 .page_count = hyp_page_count,
138 .get_page = host_s2_get_page,
139 .put_page = host_s2_put_page,
140 };
141
142 return 0;
143 }
144
prepare_host_vtcr(void)145 static void prepare_host_vtcr(void)
146 {
147 u32 parange, phys_shift;
148
149 /* The host stage 2 is id-mapped, so use parange for T0SZ */
150 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
151 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
152
153 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
154 id_aa64mmfr1_el1_sys_val, phys_shift);
155 }
156
157 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
158
kvm_host_prepare_stage2(void * pgt_pool_base)159 int kvm_host_prepare_stage2(void *pgt_pool_base)
160 {
161 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
162 int ret;
163
164 prepare_host_vtcr();
165 hyp_spin_lock_init(&host_mmu.lock);
166 mmu->arch = &host_mmu.arch;
167
168 ret = prepare_s2_pool(pgt_pool_base);
169 if (ret)
170 return ret;
171
172 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
173 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
174 host_stage2_force_pte_cb);
175 if (ret)
176 return ret;
177
178 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
179 mmu->pgt = &host_mmu.pgt;
180 atomic64_set(&mmu->vmid.id, 0);
181
182 return 0;
183 }
184
guest_s2_zalloc_pages_exact(size_t size)185 static void *guest_s2_zalloc_pages_exact(size_t size)
186 {
187 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
188
189 WARN_ON(size != (PAGE_SIZE << get_order(size)));
190 hyp_split_page(hyp_virt_to_page(addr));
191
192 return addr;
193 }
194
guest_s2_free_pages_exact(void * addr,unsigned long size)195 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
196 {
197 u8 order = get_order(size);
198 unsigned int i;
199
200 for (i = 0; i < (1 << order); i++)
201 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
202 }
203
guest_s2_zalloc_page(void * mc)204 static void *guest_s2_zalloc_page(void *mc)
205 {
206 struct hyp_page *p;
207 void *addr;
208
209 addr = hyp_alloc_pages(¤t_vm->pool, 0);
210 if (addr)
211 return addr;
212
213 addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
214 if (!addr)
215 return addr;
216
217 memset(addr, 0, PAGE_SIZE);
218 p = hyp_virt_to_page(addr);
219 p->refcount = 1;
220 p->order = 0;
221
222 return addr;
223 }
224
guest_s2_get_page(void * addr)225 static void guest_s2_get_page(void *addr)
226 {
227 hyp_get_page(¤t_vm->pool, addr);
228 }
229
guest_s2_put_page(void * addr)230 static void guest_s2_put_page(void *addr)
231 {
232 hyp_put_page(¤t_vm->pool, addr);
233 }
234
__apply_guest_page(void * va,size_t size,void (* func)(void * addr,size_t size))235 static void __apply_guest_page(void *va, size_t size,
236 void (*func)(void *addr, size_t size))
237 {
238 size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
239 va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
240 size = PAGE_ALIGN(size);
241
242 while (size) {
243 size_t map_size = PAGE_SIZE;
244 void *map;
245
246 if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)
247 map = hyp_fixblock_map(__hyp_pa(va), &map_size);
248 else
249 map = hyp_fixmap_map(__hyp_pa(va));
250
251 func(map, map_size);
252
253 if (map_size == PMD_SIZE)
254 hyp_fixblock_unmap();
255 else
256 hyp_fixmap_unmap();
257
258 size -= map_size;
259 va += map_size;
260 }
261 }
262
clean_dcache_guest_page(void * va,size_t size)263 static void clean_dcache_guest_page(void *va, size_t size)
264 {
265 __apply_guest_page(va, size, __clean_dcache_guest_page);
266 }
267
invalidate_icache_guest_page(void * va,size_t size)268 static void invalidate_icache_guest_page(void *va, size_t size)
269 {
270 __apply_guest_page(va, size, __invalidate_icache_guest_page);
271 }
272
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)273 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
274 {
275 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
276 unsigned long nr_pages;
277 int ret;
278
279 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
280 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
281 if (ret)
282 return ret;
283
284 hyp_spin_lock_init(&vm->lock);
285 vm->mm_ops = (struct kvm_pgtable_mm_ops) {
286 .zalloc_pages_exact = guest_s2_zalloc_pages_exact,
287 .free_pages_exact = guest_s2_free_pages_exact,
288 .zalloc_page = guest_s2_zalloc_page,
289 .phys_to_virt = hyp_phys_to_virt,
290 .virt_to_phys = hyp_virt_to_phys,
291 .page_count = hyp_page_count,
292 .get_page = guest_s2_get_page,
293 .put_page = guest_s2_put_page,
294 .dcache_clean_inval_poc = clean_dcache_guest_page,
295 .icache_inval_pou = invalidate_icache_guest_page,
296 };
297
298 guest_lock_component(vm);
299 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL);
300 guest_unlock_component(vm);
301 if (ret)
302 return ret;
303
304 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
305
306 return 0;
307 }
308
reclaim_pgtable_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)309 void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
310 {
311 struct hyp_page *page;
312 void *addr;
313
314 /* Dump all pgtable pages in the hyp_pool */
315 guest_lock_component(vm);
316 kvm_pgtable_stage2_destroy(&vm->pgt);
317 vm->kvm.arch.mmu.pgd_phys = 0ULL;
318 guest_unlock_component(vm);
319
320 /* Drain the hyp_pool into the memcache */
321 addr = hyp_alloc_pages(&vm->pool, 0);
322 while (addr) {
323 page = hyp_virt_to_page(addr);
324 page->refcount = 0;
325 page->order = 0;
326 push_hyp_memcache(mc, addr, hyp_virt_to_phys);
327 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
328 addr = hyp_alloc_pages(&vm->pool, 0);
329 }
330 }
331
__pkvm_prot_finalize(void)332 int __pkvm_prot_finalize(void)
333 {
334 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
335 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
336
337 if (params->hcr_el2 & HCR_VM)
338 return -EPERM;
339
340 params->vttbr = kvm_get_vttbr(mmu);
341 params->vtcr = mmu->vtcr;
342 params->hcr_el2 |= HCR_VM;
343 if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
344 params->hcr_el2 |= HCR_FWB;
345
346 /*
347 * The CMO below not only cleans the updated params to the
348 * PoC, but also provides the DSB that ensures ongoing
349 * page-table walks that have started before we trapped to EL2
350 * have completed.
351 */
352 kvm_flush_dcache_to_poc(params, sizeof(*params));
353
354 write_sysreg_hcr(params->hcr_el2);
355 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
356
357 /*
358 * Make sure to have an ISB before the TLB maintenance below but only
359 * when __load_stage2() doesn't include one already.
360 */
361 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
362
363 /* Invalidate stale HCR bits that may be cached in TLBs */
364 __tlbi(vmalls12e1);
365 dsb(nsh);
366 isb();
367
368 return 0;
369 }
370
host_stage2_unmap_dev_all(void)371 static int host_stage2_unmap_dev_all(void)
372 {
373 struct kvm_pgtable *pgt = &host_mmu.pgt;
374 struct memblock_region *reg;
375 u64 addr = 0;
376 int i, ret;
377
378 /* Unmap all non-memory regions to recycle the pages */
379 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
380 reg = &hyp_memory[i];
381 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
382 if (ret)
383 return ret;
384 }
385 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
386 }
387
388 /*
389 * Ensure the PFN range is contained within PA-range.
390 *
391 * This check is also robust to overflows and is therefore a requirement before
392 * using a pfn/nr_pages pair from an untrusted source.
393 */
pfn_range_is_valid(u64 pfn,u64 nr_pages)394 static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)
395 {
396 u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);
397
398 return pfn < limit && ((limit - pfn) >= nr_pages);
399 }
400
401 struct kvm_mem_range {
402 u64 start;
403 u64 end;
404 };
405
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)406 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
407 {
408 int cur, left = 0, right = hyp_memblock_nr;
409 struct memblock_region *reg;
410 phys_addr_t end;
411
412 range->start = 0;
413 range->end = ULONG_MAX;
414
415 /* The list of memblock regions is sorted, binary search it */
416 while (left < right) {
417 cur = (left + right) >> 1;
418 reg = &hyp_memory[cur];
419 end = reg->base + reg->size;
420 if (addr < reg->base) {
421 right = cur;
422 range->end = reg->base;
423 } else if (addr >= end) {
424 left = cur + 1;
425 range->start = end;
426 } else {
427 range->start = reg->base;
428 range->end = end;
429 return reg;
430 }
431 }
432
433 return NULL;
434 }
435
addr_is_memory(phys_addr_t phys)436 bool addr_is_memory(phys_addr_t phys)
437 {
438 struct kvm_mem_range range;
439
440 return !!find_mem_range(phys, &range);
441 }
442
is_in_mem_range(u64 addr,struct kvm_mem_range * range)443 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
444 {
445 return range->start <= addr && addr < range->end;
446 }
447
check_range_allowed_memory(u64 start,u64 end)448 static int check_range_allowed_memory(u64 start, u64 end)
449 {
450 struct memblock_region *reg;
451 struct kvm_mem_range range;
452
453 /*
454 * Callers can't check the state of a range that overlaps memory and
455 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
456 */
457 reg = find_mem_range(start, &range);
458 if (!is_in_mem_range(end - 1, &range))
459 return -EINVAL;
460
461 if (!reg || reg->flags & MEMBLOCK_NOMAP)
462 return -EPERM;
463
464 return 0;
465 }
466
range_is_memory(u64 start,u64 end)467 static bool range_is_memory(u64 start, u64 end)
468 {
469 struct kvm_mem_range r;
470
471 if (!find_mem_range(start, &r))
472 return false;
473
474 return is_in_mem_range(end - 1, &r);
475 }
476
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)477 static inline int __host_stage2_idmap(u64 start, u64 end,
478 enum kvm_pgtable_prot prot)
479 {
480 /*
481 * We don't make permission changes to the host idmap after
482 * initialisation, so we can squash -EAGAIN to save callers
483 * having to treat it like success in the case that they try to
484 * map something that is already mapped.
485 */
486 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
487 prot, &host_s2_pool,
488 KVM_PGTABLE_WALK_IGNORE_EAGAIN);
489 }
490
491 /*
492 * The pool has been provided with enough pages to cover all of memory with
493 * page granularity, but it is difficult to know how much of the MMIO range
494 * we will need to cover upfront, so we may need to 'recycle' the pages if we
495 * run out.
496 */
497 #define host_stage2_try(fn, ...) \
498 ({ \
499 int __ret; \
500 hyp_assert_lock_held(&host_mmu.lock); \
501 __ret = fn(__VA_ARGS__); \
502 if (__ret == -ENOMEM) { \
503 __ret = host_stage2_unmap_dev_all(); \
504 if (!__ret) \
505 __ret = fn(__VA_ARGS__); \
506 } \
507 __ret; \
508 })
509
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)510 static inline bool range_included(struct kvm_mem_range *child,
511 struct kvm_mem_range *parent)
512 {
513 return parent->start <= child->start && child->end <= parent->end;
514 }
515
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)516 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
517 {
518 struct kvm_mem_range cur;
519 kvm_pte_t pte;
520 u64 granule;
521 s8 level;
522 int ret;
523
524 hyp_assert_lock_held(&host_mmu.lock);
525 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
526 if (ret)
527 return ret;
528
529 if (kvm_pte_valid(pte))
530 return -EEXIST;
531
532 if (pte) {
533 WARN_ON(addr_is_memory(addr) &&
534 get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE);
535 return -EPERM;
536 }
537
538 for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {
539 if (!kvm_level_supports_block_mapping(level))
540 continue;
541 granule = kvm_granule_size(level);
542 cur.start = ALIGN_DOWN(addr, granule);
543 cur.end = cur.start + granule;
544 if (!range_included(&cur, range) && level < KVM_PGTABLE_LAST_LEVEL)
545 continue;
546 *range = cur;
547 return 0;
548 }
549
550 WARN_ON(1);
551
552 return -EINVAL;
553 }
554
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)555 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
556 enum kvm_pgtable_prot prot)
557 {
558 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
559 }
560
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)561 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
562 {
563 for_each_hyp_page(page, addr, size)
564 set_host_state(page, state);
565 }
566
567 #define KVM_HOST_DONATION_PTE_OWNER_MASK GENMASK(3, 1)
568 #define KVM_HOST_DONATION_PTE_EXTRA_MASK GENMASK(59, 4)
host_stage2_set_owner_metadata_locked(phys_addr_t addr,u64 size,u8 owner_id,u64 meta)569 static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
570 u8 owner_id, u64 meta)
571 {
572 kvm_pte_t annotation;
573 int ret;
574
575 if (owner_id == PKVM_ID_HOST)
576 return -EINVAL;
577
578 if (!range_is_memory(addr, addr + size))
579 return -EPERM;
580
581 if (!FIELD_FIT(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id))
582 return -EINVAL;
583
584 if (!FIELD_FIT(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta))
585 return -EINVAL;
586
587 annotation = FIELD_PREP(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id) |
588 FIELD_PREP(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta);
589 ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
590 addr, size, &host_s2_pool,
591 KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
592 if (!ret) {
593 /*
594 * After stage2 maintenance has happened, but before the page
595 * owner has changed.
596 */
597 pkvm_sme_dvmsync_fw_call();
598 __host_update_page_state(addr, size, PKVM_NOPAGE);
599 }
600
601 return ret;
602 }
603
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)604 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
605 {
606 int ret = -EINVAL;
607
608 switch (owner_id) {
609 case PKVM_ID_HOST:
610 if (!range_is_memory(addr, addr + size))
611 return -EPERM;
612
613 ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
614 if (!ret)
615 __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
616 break;
617 case PKVM_ID_HYP:
618 ret = host_stage2_set_owner_metadata_locked(addr, size,
619 owner_id, 0);
620 break;
621 }
622
623 return ret;
624 }
625
626 #define KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK GENMASK(15, 0)
627 /* We need 40 bits for the GFN to cover a 52-bit IPA with 4k pages and LPA2 */
628 #define KVM_HOST_PTE_OWNER_GUEST_GFN_MASK GENMASK(55, 16)
host_stage2_encode_gfn_meta(struct pkvm_hyp_vm * vm,u64 gfn)629 static u64 host_stage2_encode_gfn_meta(struct pkvm_hyp_vm *vm, u64 gfn)
630 {
631 pkvm_handle_t handle = vm->kvm.arch.pkvm.handle;
632
633 BUILD_BUG_ON((pkvm_handle_t)-1 > KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK);
634 WARN_ON(!FIELD_FIT(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn));
635
636 return FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, handle) |
637 FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn);
638 }
639
host_stage2_decode_gfn_meta(kvm_pte_t pte,struct pkvm_hyp_vm ** vm,u64 * gfn)640 static int host_stage2_decode_gfn_meta(kvm_pte_t pte, struct pkvm_hyp_vm **vm,
641 u64 *gfn)
642 {
643 pkvm_handle_t handle;
644 u64 meta;
645
646 if (WARN_ON(kvm_pte_valid(pte)))
647 return -EINVAL;
648
649 if (FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) !=
650 KVM_HOST_INVALID_PTE_TYPE_DONATION) {
651 return -EINVAL;
652 }
653
654 if (FIELD_GET(KVM_HOST_DONATION_PTE_OWNER_MASK, pte) != PKVM_ID_GUEST)
655 return -EPERM;
656
657 meta = FIELD_GET(KVM_HOST_DONATION_PTE_EXTRA_MASK, pte);
658 handle = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, meta);
659 *vm = get_vm_by_handle(handle);
660 if (!*vm) {
661 /* We probably raced with teardown; try again */
662 return -EAGAIN;
663 }
664
665 *gfn = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, meta);
666 return 0;
667 }
668
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)669 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
670 {
671 /*
672 * Block mappings must be used with care in the host stage-2 as a
673 * kvm_pgtable_stage2_map() operation targeting a page in the range of
674 * an existing block will delete the block under the assumption that
675 * mappings in the rest of the block range can always be rebuilt lazily.
676 * That assumption is correct for the host stage-2 with RWX mappings
677 * targeting memory or RW mappings targeting MMIO ranges (see
678 * host_stage2_idmap() below which implements some of the host memory
679 * abort logic). However, this is not safe for any other mappings where
680 * the host stage-2 page-table is in fact the only place where this
681 * state is stored. In all those cases, it is safer to use page-level
682 * mappings, hence avoiding to lose the state because of side-effects in
683 * kvm_pgtable_stage2_map().
684 */
685 if (range_is_memory(addr, end))
686 return prot != PKVM_HOST_MEM_PROT;
687 else
688 return prot != PKVM_HOST_MMIO_PROT;
689 }
690
host_stage2_idmap(u64 addr)691 static int host_stage2_idmap(u64 addr)
692 {
693 struct kvm_mem_range range;
694 bool is_memory = !!find_mem_range(addr, &range);
695 enum kvm_pgtable_prot prot;
696 int ret;
697
698 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
699
700 host_lock_component();
701 ret = host_stage2_adjust_range(addr, &range);
702 if (ret)
703 goto unlock;
704
705 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
706 unlock:
707 host_unlock_component();
708
709 return ret;
710 }
711
host_inject_mem_abort(struct kvm_cpu_context * host_ctxt)712 static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt)
713 {
714 u64 ec, esr, spsr;
715
716 esr = read_sysreg_el2(SYS_ESR);
717 spsr = read_sysreg_el2(SYS_SPSR);
718
719 /* Repaint the ESR to report a same-level fault if taken from EL1 */
720 if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
721 ec = ESR_ELx_EC(esr);
722 if (ec == ESR_ELx_EC_DABT_LOW)
723 ec = ESR_ELx_EC_DABT_CUR;
724 else if (ec == ESR_ELx_EC_IABT_LOW)
725 ec = ESR_ELx_EC_IABT_CUR;
726 else
727 WARN_ON(1);
728 esr &= ~ESR_ELx_EC_MASK;
729 esr |= ec << ESR_ELx_EC_SHIFT;
730 }
731
732 /*
733 * Since S1PTW should only ever be set for stage-2 faults, we're pretty
734 * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
735 * let's use that bit to allow the host abort handler to differentiate
736 * this abort from normal userspace faults.
737 *
738 * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
739 * architecture to be backed by flops, so it should be safe to use.
740 */
741 esr |= ESR_ELx_S1PTW;
742 inject_host_exception(esr);
743 }
744
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)745 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
746 {
747 struct kvm_vcpu_fault_info fault;
748 u64 esr, addr;
749
750 esr = read_sysreg_el2(SYS_ESR);
751 if (!__get_fault_info(esr, &fault)) {
752 /*
753 * We've presumably raced with a page-table change which caused
754 * AT to fail, try again.
755 */
756 return;
757 }
758
759
760 /*
761 * Yikes, we couldn't resolve the fault IPA. This should reinject an
762 * abort into the host when we figure out how to do that.
763 */
764 BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
765 addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
766
767 switch (host_stage2_idmap(addr)) {
768 case -EPERM:
769 host_inject_mem_abort(host_ctxt);
770 fallthrough;
771 case -EEXIST:
772 case 0:
773 break;
774 default:
775 BUG();
776 }
777 }
778
779 struct check_walk_data {
780 enum pkvm_page_state desired;
781 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
782 };
783
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)784 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
785 enum kvm_pgtable_walk_flags visit)
786 {
787 struct check_walk_data *d = ctx->arg;
788
789 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
790 }
791
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)792 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
793 struct check_walk_data *data)
794 {
795 struct kvm_pgtable_walker walker = {
796 .cb = __check_page_state_visitor,
797 .arg = data,
798 .flags = KVM_PGTABLE_WALK_LEAF,
799 };
800
801 return kvm_pgtable_walk(pgt, addr, size, &walker);
802 }
803
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)804 static int __host_check_page_state_range(u64 addr, u64 size,
805 enum pkvm_page_state state)
806 {
807 int ret;
808
809 ret = check_range_allowed_memory(addr, addr + size);
810 if (ret)
811 return ret;
812
813 hyp_assert_lock_held(&host_mmu.lock);
814
815 for_each_hyp_page(page, addr, size) {
816 if (get_host_state(page) != state)
817 return -EPERM;
818 }
819
820 return 0;
821 }
822
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)823 static int __host_set_page_state_range(u64 addr, u64 size,
824 enum pkvm_page_state state)
825 {
826 if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) {
827 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
828
829 if (ret)
830 return ret;
831 }
832
833 __host_update_page_state(addr, size, state);
834
835 return 0;
836 }
837
__hyp_set_page_state_range(phys_addr_t phys,u64 size,enum pkvm_page_state state)838 static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
839 {
840 for_each_hyp_page(page, phys, size)
841 set_hyp_state(page, state);
842 }
843
__hyp_check_page_state_range(phys_addr_t phys,u64 size,enum pkvm_page_state state)844 static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
845 {
846 for_each_hyp_page(page, phys, size) {
847 if (get_hyp_state(page) != state)
848 return -EPERM;
849 }
850
851 return 0;
852 }
853
guest_pte_is_poisoned(kvm_pte_t pte)854 static bool guest_pte_is_poisoned(kvm_pte_t pte)
855 {
856 if (kvm_pte_valid(pte))
857 return false;
858
859 return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) ==
860 KVM_GUEST_INVALID_PTE_TYPE_POISONED;
861 }
862
guest_get_page_state(kvm_pte_t pte,u64 addr)863 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
864 {
865 if (guest_pte_is_poisoned(pte))
866 return PKVM_POISON;
867
868 if (!kvm_pte_valid(pte))
869 return PKVM_NOPAGE;
870
871 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
872 }
873
__guest_check_page_state_range(struct pkvm_hyp_vm * vm,u64 addr,u64 size,enum pkvm_page_state state)874 static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
875 u64 size, enum pkvm_page_state state)
876 {
877 struct check_walk_data d = {
878 .desired = state,
879 .get_page_state = guest_get_page_state,
880 };
881
882 hyp_assert_lock_held(&vm->lock);
883 return check_page_state_range(&vm->pgt, addr, size, &d);
884 }
885
get_valid_guest_pte(struct pkvm_hyp_vm * vm,u64 ipa,kvm_pte_t * ptep,u64 * physp)886 static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
887 {
888 kvm_pte_t pte;
889 u64 phys;
890 s8 level;
891 int ret;
892
893 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
894 if (ret)
895 return ret;
896 if (guest_pte_is_poisoned(pte))
897 return -EHWPOISON;
898 if (!kvm_pte_valid(pte))
899 return -ENOENT;
900 if (level != KVM_PGTABLE_LAST_LEVEL)
901 return -E2BIG;
902
903 phys = kvm_pte_to_phys(pte);
904 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
905 if (WARN_ON(ret))
906 return ret;
907
908 *ptep = pte;
909 *physp = phys;
910
911 return 0;
912 }
913
__pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu * hyp_vcpu)914 int __pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu *hyp_vcpu)
915 {
916 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
917 kvm_pte_t pte;
918 s8 level;
919 u64 ipa;
920 int ret;
921
922 switch (kvm_vcpu_trap_get_class(&hyp_vcpu->vcpu)) {
923 case ESR_ELx_EC_DABT_LOW:
924 case ESR_ELx_EC_IABT_LOW:
925 if (kvm_vcpu_trap_is_translation_fault(&hyp_vcpu->vcpu))
926 break;
927 fallthrough;
928 default:
929 return -EINVAL;
930 }
931
932 /*
933 * The host has the faulting IPA when it calls us from the guest
934 * fault handler but we retrieve it ourselves from the FAR so as
935 * to avoid exposing an "oracle" that could reveal data access
936 * patterns of the guest after initial donation of its pages.
937 */
938 ipa = kvm_vcpu_get_fault_ipa(&hyp_vcpu->vcpu);
939 ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(&hyp_vcpu->vcpu));
940
941 guest_lock_component(vm);
942 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
943 if (ret)
944 goto unlock;
945
946 if (level != KVM_PGTABLE_LAST_LEVEL) {
947 ret = -EINVAL;
948 goto unlock;
949 }
950
951 ret = guest_pte_is_poisoned(pte);
952 unlock:
953 guest_unlock_component(vm);
954 return ret;
955 }
956
__pkvm_host_share_hyp(u64 pfn)957 int __pkvm_host_share_hyp(u64 pfn)
958 {
959 u64 phys = hyp_pfn_to_phys(pfn);
960 u64 size = PAGE_SIZE;
961 int ret;
962
963 host_lock_component();
964 hyp_lock_component();
965
966 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
967 if (ret)
968 goto unlock;
969 ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
970 if (ret)
971 goto unlock;
972
973 __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
974 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
975
976 unlock:
977 hyp_unlock_component();
978 host_unlock_component();
979
980 return ret;
981 }
982
__pkvm_guest_share_host(struct pkvm_hyp_vcpu * vcpu,u64 gfn)983 int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
984 {
985 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
986 u64 phys, ipa = hyp_pfn_to_phys(gfn);
987 kvm_pte_t pte;
988 int ret;
989
990 host_lock_component();
991 guest_lock_component(vm);
992
993 ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
994 if (ret)
995 goto unlock;
996
997 ret = -EPERM;
998 if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_OWNED)
999 goto unlock;
1000 if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE))
1001 goto unlock;
1002
1003 ret = 0;
1004 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1005 pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_OWNED),
1006 &vcpu->vcpu.arch.pkvm_memcache, 0));
1007 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
1008 unlock:
1009 guest_unlock_component(vm);
1010 host_unlock_component();
1011
1012 return ret;
1013 }
1014
__pkvm_guest_unshare_host(struct pkvm_hyp_vcpu * vcpu,u64 gfn)1015 int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
1016 {
1017 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1018 u64 meta, phys, ipa = hyp_pfn_to_phys(gfn);
1019 kvm_pte_t pte;
1020 int ret;
1021
1022 host_lock_component();
1023 guest_lock_component(vm);
1024
1025 ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
1026 if (ret)
1027 goto unlock;
1028
1029 ret = -EPERM;
1030 if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_SHARED_OWNED)
1031 goto unlock;
1032 if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED))
1033 goto unlock;
1034
1035 ret = 0;
1036 meta = host_stage2_encode_gfn_meta(vm, gfn);
1037 WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
1038 PKVM_ID_GUEST, meta));
1039 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1040 pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
1041 &vcpu->vcpu.arch.pkvm_memcache, 0));
1042 unlock:
1043 guest_unlock_component(vm);
1044 host_unlock_component();
1045
1046 return ret;
1047 }
1048
__pkvm_host_unshare_hyp(u64 pfn)1049 int __pkvm_host_unshare_hyp(u64 pfn)
1050 {
1051 u64 phys = hyp_pfn_to_phys(pfn);
1052 u64 virt = (u64)__hyp_va(phys);
1053 u64 size = PAGE_SIZE;
1054 int ret;
1055
1056 host_lock_component();
1057 hyp_lock_component();
1058
1059 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1060 if (ret)
1061 goto unlock;
1062 ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1063 if (ret)
1064 goto unlock;
1065 if (hyp_page_count((void *)virt)) {
1066 ret = -EBUSY;
1067 goto unlock;
1068 }
1069
1070 __hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
1071 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
1072
1073 unlock:
1074 hyp_unlock_component();
1075 host_unlock_component();
1076
1077 return ret;
1078 }
1079
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1080 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1081 {
1082 u64 phys = hyp_pfn_to_phys(pfn);
1083 u64 size = PAGE_SIZE * nr_pages;
1084 void *virt = __hyp_va(phys);
1085 int ret;
1086
1087 if (!pfn_range_is_valid(pfn, nr_pages))
1088 return -EINVAL;
1089
1090 host_lock_component();
1091 hyp_lock_component();
1092
1093 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1094 if (ret)
1095 goto unlock;
1096 ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
1097 if (ret)
1098 goto unlock;
1099
1100 __hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1101 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));
1102 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
1103
1104 unlock:
1105 hyp_unlock_component();
1106 host_unlock_component();
1107
1108 return ret;
1109 }
1110
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1111 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1112 {
1113 u64 phys = hyp_pfn_to_phys(pfn);
1114 u64 size = PAGE_SIZE * nr_pages;
1115 u64 virt = (u64)__hyp_va(phys);
1116 int ret;
1117
1118 if (!pfn_range_is_valid(pfn, nr_pages))
1119 return -EINVAL;
1120
1121 host_lock_component();
1122 hyp_lock_component();
1123
1124 ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1125 if (ret)
1126 goto unlock;
1127 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1128 if (ret)
1129 goto unlock;
1130
1131 __hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
1132 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1133 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1134
1135 unlock:
1136 hyp_unlock_component();
1137 host_unlock_component();
1138
1139 return ret;
1140 }
1141
hyp_pin_shared_mem(void * from,void * to)1142 int hyp_pin_shared_mem(void *from, void *to)
1143 {
1144 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1145 u64 end = PAGE_ALIGN((u64)to);
1146 u64 phys = __hyp_pa(start);
1147 u64 size = end - start;
1148 struct hyp_page *p;
1149 int ret;
1150
1151 host_lock_component();
1152 hyp_lock_component();
1153
1154 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1155 if (ret)
1156 goto unlock;
1157
1158 ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1159 if (ret)
1160 goto unlock;
1161
1162 for (cur = start; cur < end; cur += PAGE_SIZE) {
1163 p = hyp_virt_to_page(cur);
1164 hyp_page_ref_inc(p);
1165 if (p->refcount == 1)
1166 WARN_ON(pkvm_create_mappings_locked((void *)cur,
1167 (void *)cur + PAGE_SIZE,
1168 PAGE_HYP));
1169 }
1170
1171 unlock:
1172 hyp_unlock_component();
1173 host_unlock_component();
1174
1175 return ret;
1176 }
1177
hyp_unpin_shared_mem(void * from,void * to)1178 void hyp_unpin_shared_mem(void *from, void *to)
1179 {
1180 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1181 u64 end = PAGE_ALIGN((u64)to);
1182 struct hyp_page *p;
1183
1184 host_lock_component();
1185 hyp_lock_component();
1186
1187 for (cur = start; cur < end; cur += PAGE_SIZE) {
1188 p = hyp_virt_to_page(cur);
1189 if (p->refcount == 1)
1190 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);
1191 hyp_page_ref_dec(p);
1192 }
1193
1194 hyp_unlock_component();
1195 host_unlock_component();
1196 }
1197
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)1198 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
1199 {
1200 u64 phys = hyp_pfn_to_phys(pfn);
1201 u64 size = PAGE_SIZE * nr_pages;
1202 int ret;
1203
1204 if (!pfn_range_is_valid(pfn, nr_pages))
1205 return -EINVAL;
1206
1207 host_lock_component();
1208 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1209 if (!ret)
1210 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1211 host_unlock_component();
1212
1213 return ret;
1214 }
1215
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)1216 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
1217 {
1218 u64 phys = hyp_pfn_to_phys(pfn);
1219 u64 size = PAGE_SIZE * nr_pages;
1220 int ret;
1221
1222 if (!pfn_range_is_valid(pfn, nr_pages))
1223 return -EINVAL;
1224
1225 host_lock_component();
1226 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1227 if (!ret)
1228 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1229 host_unlock_component();
1230
1231 return ret;
1232 }
1233
__guest_check_transition_size(u64 phys,u64 ipa,u64 nr_pages,u64 * size)1234 static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)
1235 {
1236 size_t block_size;
1237
1238 if (nr_pages == 1) {
1239 *size = PAGE_SIZE;
1240 return 0;
1241 }
1242
1243 /* We solely support second to last level huge mapping */
1244 block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1);
1245
1246 if (nr_pages != block_size >> PAGE_SHIFT)
1247 return -EINVAL;
1248
1249 if (!IS_ALIGNED(phys | ipa, block_size))
1250 return -EINVAL;
1251
1252 *size = block_size;
1253 return 0;
1254 }
1255
hyp_poison_page(phys_addr_t phys)1256 static void hyp_poison_page(phys_addr_t phys)
1257 {
1258 void *addr = hyp_fixmap_map(phys);
1259
1260 memset(addr, 0, PAGE_SIZE);
1261 /*
1262 * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
1263 * here as the latter may elide the CMO under the assumption that FWB
1264 * will be enabled on CPUs that support it. This is incorrect for the
1265 * host stage-2 and would otherwise lead to a malicious host potentially
1266 * being able to read the contents of newly reclaimed guest pages.
1267 */
1268 kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
1269 hyp_fixmap_unmap();
1270 }
1271
host_stage2_get_guest_info(phys_addr_t phys,struct pkvm_hyp_vm ** vm,u64 * gfn)1272 static int host_stage2_get_guest_info(phys_addr_t phys, struct pkvm_hyp_vm **vm,
1273 u64 *gfn)
1274 {
1275 enum pkvm_page_state state;
1276 kvm_pte_t pte;
1277 s8 level;
1278 int ret;
1279
1280 if (!addr_is_memory(phys))
1281 return -EFAULT;
1282
1283 state = get_host_state(hyp_phys_to_page(phys));
1284 switch (state) {
1285 case PKVM_PAGE_OWNED:
1286 case PKVM_PAGE_SHARED_OWNED:
1287 case PKVM_PAGE_SHARED_BORROWED:
1288 /* The access should no longer fault; try again. */
1289 return -EAGAIN;
1290 case PKVM_NOPAGE:
1291 break;
1292 default:
1293 return -EPERM;
1294 }
1295
1296 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, &level);
1297 if (ret)
1298 return ret;
1299
1300 if (WARN_ON(level != KVM_PGTABLE_LAST_LEVEL))
1301 return -EINVAL;
1302
1303 return host_stage2_decode_gfn_meta(pte, vm, gfn);
1304 }
1305
__pkvm_host_force_reclaim_page_guest(phys_addr_t phys)1306 int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys)
1307 {
1308 struct pkvm_hyp_vm *vm;
1309 u64 gfn, ipa, pa;
1310 kvm_pte_t pte;
1311 int ret;
1312
1313 phys &= PAGE_MASK;
1314
1315 hyp_spin_lock(&vm_table_lock);
1316 host_lock_component();
1317
1318 ret = host_stage2_get_guest_info(phys, &vm, &gfn);
1319 if (ret)
1320 goto unlock_host;
1321
1322 ipa = hyp_pfn_to_phys(gfn);
1323 guest_lock_component(vm);
1324 ret = get_valid_guest_pte(vm, ipa, &pte, &pa);
1325 if (ret)
1326 goto unlock_guest;
1327
1328 WARN_ON(pa != phys);
1329 if (guest_get_page_state(pte, ipa) != PKVM_PAGE_OWNED) {
1330 ret = -EPERM;
1331 goto unlock_guest;
1332 }
1333
1334 /* We really shouldn't be allocating, so don't pass a memcache */
1335 ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, NULL,
1336 KVM_GUEST_INVALID_PTE_TYPE_POISONED,
1337 0);
1338 if (ret)
1339 goto unlock_guest;
1340
1341 hyp_poison_page(phys);
1342 WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
1343 unlock_guest:
1344 guest_unlock_component(vm);
1345 unlock_host:
1346 host_unlock_component();
1347 hyp_spin_unlock(&vm_table_lock);
1348
1349 return ret;
1350 }
1351
__pkvm_host_reclaim_page_guest(u64 gfn,struct pkvm_hyp_vm * vm)1352 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
1353 {
1354 u64 ipa = hyp_pfn_to_phys(gfn);
1355 kvm_pte_t pte;
1356 u64 phys;
1357 int ret;
1358
1359 host_lock_component();
1360 guest_lock_component(vm);
1361
1362 ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
1363 if (ret)
1364 goto unlock;
1365
1366 switch (guest_get_page_state(pte, ipa)) {
1367 case PKVM_PAGE_OWNED:
1368 WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
1369 hyp_poison_page(phys);
1370 break;
1371 case PKVM_PAGE_SHARED_OWNED:
1372 WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
1373 break;
1374 default:
1375 ret = -EPERM;
1376 goto unlock;
1377 }
1378
1379 WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
1380 WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
1381
1382 unlock:
1383 guest_unlock_component(vm);
1384 host_unlock_component();
1385
1386 /*
1387 * -EHWPOISON implies that the page was forcefully reclaimed already
1388 * so return success for the GUP pin to be dropped.
1389 */
1390 return ret && ret != -EHWPOISON ? ret : 0;
1391 }
1392
1393 /*
1394 * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
1395 * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
1396 * bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
1397 * conservative by one for the block.
1398 */
__guest_check_pgtable_memcache(struct pkvm_hyp_vcpu * vcpu)1399 static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
1400 {
1401 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1402
1403 if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
1404 return -ENOMEM;
1405
1406 return 0;
1407 }
1408
__pkvm_host_donate_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu)1409 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1410 {
1411 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1412 u64 phys = hyp_pfn_to_phys(pfn);
1413 u64 ipa = hyp_pfn_to_phys(gfn);
1414 u64 meta;
1415 int ret;
1416
1417 host_lock_component();
1418 guest_lock_component(vm);
1419
1420 ret = __host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED);
1421 if (ret)
1422 goto unlock;
1423
1424 ret = __guest_check_page_state_range(vm, ipa, PAGE_SIZE, PKVM_NOPAGE);
1425 if (ret)
1426 goto unlock;
1427
1428 ret = __guest_check_pgtable_memcache(vcpu);
1429 if (ret)
1430 goto unlock;
1431
1432 meta = host_stage2_encode_gfn_meta(vm, gfn);
1433 WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
1434 PKVM_ID_GUEST, meta));
1435 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1436 pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
1437 &vcpu->vcpu.arch.pkvm_memcache, 0));
1438
1439 unlock:
1440 guest_unlock_component(vm);
1441 host_unlock_component();
1442
1443 return ret;
1444 }
1445
__pkvm_host_share_guest(u64 pfn,u64 gfn,u64 nr_pages,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1446 int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
1447 enum kvm_pgtable_prot prot)
1448 {
1449 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1450 u64 phys = hyp_pfn_to_phys(pfn);
1451 u64 ipa = hyp_pfn_to_phys(gfn);
1452 u64 size;
1453 int ret;
1454
1455 if (prot & ~KVM_PGTABLE_PROT_RWX)
1456 return -EINVAL;
1457
1458 if (!pfn_range_is_valid(pfn, nr_pages))
1459 return -EINVAL;
1460
1461 ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
1462 if (ret)
1463 return ret;
1464
1465 ret = check_range_allowed_memory(phys, phys + size);
1466 if (ret)
1467 return ret;
1468
1469 host_lock_component();
1470 guest_lock_component(vm);
1471
1472 ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
1473 if (ret)
1474 goto unlock;
1475
1476 for_each_hyp_page(page, phys, size) {
1477 switch (get_host_state(page)) {
1478 case PKVM_PAGE_OWNED:
1479 continue;
1480 case PKVM_PAGE_SHARED_OWNED:
1481 if (page->host_share_guest_count == U32_MAX) {
1482 ret = -EBUSY;
1483 goto unlock;
1484 }
1485
1486 /* Only host to np-guest multi-sharing is tolerated */
1487 if (page->host_share_guest_count)
1488 continue;
1489
1490 fallthrough;
1491 default:
1492 ret = -EPERM;
1493 goto unlock;
1494 }
1495 }
1496
1497 ret = __guest_check_pgtable_memcache(vcpu);
1498 if (ret)
1499 goto unlock;
1500
1501 for_each_hyp_page(page, phys, size) {
1502 set_host_state(page, PKVM_PAGE_SHARED_OWNED);
1503 page->host_share_guest_count++;
1504 }
1505
1506 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
1507 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
1508 &vcpu->vcpu.arch.pkvm_memcache, 0));
1509
1510 unlock:
1511 guest_unlock_component(vm);
1512 host_unlock_component();
1513
1514 return ret;
1515 }
1516
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa,u64 size)1517 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size)
1518 {
1519 enum pkvm_page_state state;
1520 kvm_pte_t pte;
1521 u64 phys;
1522 s8 level;
1523 int ret;
1524
1525 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
1526 if (ret)
1527 return ret;
1528 if (!kvm_pte_valid(pte))
1529 return -ENOENT;
1530 if (size && kvm_granule_size(level) != size)
1531 return -E2BIG;
1532
1533 if (!size)
1534 size = kvm_granule_size(level);
1535
1536 state = guest_get_page_state(pte, ipa);
1537 if (state != PKVM_PAGE_SHARED_BORROWED)
1538 return -EPERM;
1539
1540 phys = kvm_pte_to_phys(pte);
1541 ret = check_range_allowed_memory(phys, phys + size);
1542 if (WARN_ON(ret))
1543 return ret;
1544
1545 for_each_hyp_page(page, phys, size) {
1546 if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED)
1547 return -EPERM;
1548 if (WARN_ON(!page->host_share_guest_count))
1549 return -EINVAL;
1550 }
1551
1552 *__phys = phys;
1553
1554 return 0;
1555 }
1556
__pkvm_host_unshare_guest(u64 gfn,u64 nr_pages,struct pkvm_hyp_vm * vm)1557 int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
1558 {
1559 u64 ipa = hyp_pfn_to_phys(gfn);
1560 u64 size, phys;
1561 int ret;
1562
1563 ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1564 if (ret)
1565 return ret;
1566
1567 host_lock_component();
1568 guest_lock_component(vm);
1569
1570 ret = __check_host_shared_guest(vm, &phys, ipa, size);
1571 if (ret)
1572 goto unlock;
1573
1574 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);
1575 if (ret)
1576 goto unlock;
1577
1578 for_each_hyp_page(page, phys, size) {
1579 /* __check_host_shared_guest() protects against underflow */
1580 page->host_share_guest_count--;
1581 if (!page->host_share_guest_count)
1582 set_host_state(page, PKVM_PAGE_OWNED);
1583 }
1584
1585 unlock:
1586 guest_unlock_component(vm);
1587 host_unlock_component();
1588
1589 return ret;
1590 }
1591
assert_host_shared_guest(struct pkvm_hyp_vm * vm,u64 ipa,u64 size)1592 static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size)
1593 {
1594 u64 phys;
1595 int ret;
1596
1597 if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
1598 return;
1599
1600 host_lock_component();
1601 guest_lock_component(vm);
1602
1603 ret = __check_host_shared_guest(vm, &phys, ipa, size);
1604
1605 guest_unlock_component(vm);
1606 host_unlock_component();
1607
1608 WARN_ON(ret && ret != -ENOENT);
1609 }
1610
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1611 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
1612 {
1613 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1614 u64 ipa = hyp_pfn_to_phys(gfn);
1615 int ret;
1616
1617 if (pkvm_hyp_vm_is_protected(vm))
1618 return -EPERM;
1619
1620 if (prot & ~KVM_PGTABLE_PROT_RWX)
1621 return -EINVAL;
1622
1623 assert_host_shared_guest(vm, ipa, 0);
1624 guest_lock_component(vm);
1625 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
1626 guest_unlock_component(vm);
1627
1628 return ret;
1629 }
1630
__pkvm_host_wrprotect_guest(u64 gfn,u64 nr_pages,struct pkvm_hyp_vm * vm)1631 int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
1632 {
1633 u64 size, ipa = hyp_pfn_to_phys(gfn);
1634 int ret;
1635
1636 if (pkvm_hyp_vm_is_protected(vm))
1637 return -EPERM;
1638
1639 ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1640 if (ret)
1641 return ret;
1642
1643 assert_host_shared_guest(vm, ipa, size);
1644 guest_lock_component(vm);
1645 ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);
1646 guest_unlock_component(vm);
1647
1648 return ret;
1649 }
1650
__pkvm_host_test_clear_young_guest(u64 gfn,u64 nr_pages,bool mkold,struct pkvm_hyp_vm * vm)1651 int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm)
1652 {
1653 u64 size, ipa = hyp_pfn_to_phys(gfn);
1654 int ret;
1655
1656 if (pkvm_hyp_vm_is_protected(vm))
1657 return -EPERM;
1658
1659 ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1660 if (ret)
1661 return ret;
1662
1663 assert_host_shared_guest(vm, ipa, size);
1664 guest_lock_component(vm);
1665 ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);
1666 guest_unlock_component(vm);
1667
1668 return ret;
1669 }
1670
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)1671 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1672 {
1673 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1674 u64 ipa = hyp_pfn_to_phys(gfn);
1675
1676 if (pkvm_hyp_vm_is_protected(vm))
1677 return -EPERM;
1678
1679 assert_host_shared_guest(vm, ipa, 0);
1680 guest_lock_component(vm);
1681 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
1682 guest_unlock_component(vm);
1683
1684 return 0;
1685 }
1686
1687 #ifdef CONFIG_NVHE_EL2_DEBUG
1688 struct pkvm_expected_state {
1689 enum pkvm_page_state host;
1690 enum pkvm_page_state hyp;
1691 enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */
1692 };
1693
1694 static struct pkvm_expected_state selftest_state;
1695 static struct hyp_page *selftest_page;
1696 static struct pkvm_hyp_vcpu *selftest_vcpu;
1697
selftest_ipa(void)1698 static u64 selftest_ipa(void)
1699 {
1700 return BIT(selftest_vcpu->vcpu.arch.hw_mmu->pgt->ia_bits - 1);
1701 }
1702
assert_page_state(void)1703 static void assert_page_state(void)
1704 {
1705 void *virt = hyp_page_to_virt(selftest_page);
1706 u64 size = PAGE_SIZE << selftest_page->order;
1707 struct pkvm_hyp_vcpu *vcpu = selftest_vcpu;
1708 u64 phys = hyp_virt_to_phys(virt);
1709 u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
1710 struct pkvm_hyp_vm *vm;
1711
1712 vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1713
1714 host_lock_component();
1715 WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
1716 host_unlock_component();
1717
1718 hyp_lock_component();
1719 WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));
1720 hyp_unlock_component();
1721
1722 guest_lock_component(vm);
1723 WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));
1724 WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));
1725 guest_unlock_component(vm);
1726 }
1727
1728 #define assert_transition_res(res, fn, ...) \
1729 do { \
1730 WARN_ON(fn(__VA_ARGS__) != res); \
1731 assert_page_state(); \
1732 } while (0)
1733
pkvm_ownership_selftest(void * base)1734 void pkvm_ownership_selftest(void *base)
1735 {
1736 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
1737 void *virt = hyp_alloc_pages(&host_s2_pool, 0);
1738 struct pkvm_hyp_vcpu *vcpu;
1739 u64 phys, size, pfn, gfn;
1740 struct pkvm_hyp_vm *vm;
1741
1742 WARN_ON(!virt);
1743 selftest_page = hyp_virt_to_page(virt);
1744 selftest_page->refcount = 0;
1745 selftest_vcpu = vcpu = init_selftest_vm(base);
1746 vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1747
1748 size = PAGE_SIZE << selftest_page->order;
1749 phys = hyp_virt_to_phys(virt);
1750 pfn = hyp_phys_to_pfn(phys);
1751 gfn = hyp_phys_to_pfn(selftest_ipa());
1752
1753 selftest_state.host = PKVM_NOPAGE;
1754 selftest_state.hyp = PKVM_PAGE_OWNED;
1755 selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;
1756 assert_page_state();
1757 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1758 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1759 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1760 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1761 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
1762 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1763 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1764 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1765 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1766
1767 selftest_state.host = PKVM_PAGE_OWNED;
1768 selftest_state.hyp = PKVM_NOPAGE;
1769 assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);
1770 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1771 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1772 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
1773 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1774 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1775
1776 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1777 selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
1778 assert_transition_res(0, __pkvm_host_share_hyp, pfn);
1779 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1780 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1781 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1782 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1783 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1784 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1785 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1786
1787 assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
1788 assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
1789 hyp_unpin_shared_mem(virt, virt + size);
1790 WARN_ON(hyp_page_count(virt) != 1);
1791 assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);
1792 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1793 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1794 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1795 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1796 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1797 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1798 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1799
1800 hyp_unpin_shared_mem(virt, virt + size);
1801 assert_page_state();
1802 WARN_ON(hyp_page_count(virt));
1803
1804 selftest_state.host = PKVM_PAGE_OWNED;
1805 selftest_state.hyp = PKVM_NOPAGE;
1806 assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);
1807
1808 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1809 selftest_state.hyp = PKVM_NOPAGE;
1810 assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);
1811 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1812 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1813 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1814 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1815 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1816 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1817 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1818 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1819 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1820
1821 selftest_state.host = PKVM_PAGE_OWNED;
1822 selftest_state.hyp = PKVM_NOPAGE;
1823 assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);
1824 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
1825
1826 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1827 selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;
1828 assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1829 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1830 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1831 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1832 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1833 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1834 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1835 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1836 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1837
1838 selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
1839 assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1840 WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);
1841
1842 selftest_state.guest[0] = PKVM_NOPAGE;
1843 assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm);
1844
1845 selftest_state.guest[1] = PKVM_NOPAGE;
1846 selftest_state.host = PKVM_PAGE_OWNED;
1847 assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm);
1848
1849 selftest_state.host = PKVM_NOPAGE;
1850 selftest_state.guest[0] = PKVM_PAGE_OWNED;
1851 assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1852 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1853 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1854 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1855 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1856 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1857 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1858 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1859 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1860 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1861
1862 selftest_state.host = PKVM_PAGE_SHARED_BORROWED;
1863 selftest_state.guest[0] = PKVM_PAGE_SHARED_OWNED;
1864 assert_transition_res(0, __pkvm_guest_share_host, vcpu, gfn);
1865 assert_transition_res(-EPERM, __pkvm_guest_share_host, vcpu, gfn);
1866 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1867 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1868 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1869 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1870 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1871 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1872 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1873 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1874 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1875
1876 selftest_state.host = PKVM_NOPAGE;
1877 selftest_state.guest[0] = PKVM_PAGE_OWNED;
1878 assert_transition_res(0, __pkvm_guest_unshare_host, vcpu, gfn);
1879 assert_transition_res(-EPERM, __pkvm_guest_unshare_host, vcpu, gfn);
1880 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1881 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1882 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1883 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1884 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1885 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1886 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1887 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1888 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1889
1890 selftest_state.host = PKVM_PAGE_OWNED;
1891 selftest_state.guest[0] = PKVM_POISON;
1892 assert_transition_res(0, __pkvm_host_force_reclaim_page_guest, phys);
1893 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1894 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1895 assert_transition_res(-EHWPOISON, __pkvm_guest_share_host, vcpu, gfn);
1896 assert_transition_res(-EHWPOISON, __pkvm_guest_unshare_host, vcpu, gfn);
1897
1898 selftest_state.host = PKVM_NOPAGE;
1899 selftest_state.guest[1] = PKVM_PAGE_OWNED;
1900 assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1901
1902 selftest_state.host = PKVM_PAGE_OWNED;
1903 selftest_state.guest[1] = PKVM_NOPAGE;
1904 assert_transition_res(0, __pkvm_host_reclaim_page_guest, gfn + 1, vm);
1905 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1906 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1907
1908 selftest_state.host = PKVM_NOPAGE;
1909 selftest_state.hyp = PKVM_PAGE_OWNED;
1910 assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);
1911
1912 teardown_selftest_vm();
1913 selftest_page->refcount = 1;
1914 hyp_put_page(&host_s2_pool, virt);
1915 }
1916 #endif
1917