1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2020 Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/kvm_pkvm.h>
13 #include <asm/stage2_pgtable.h>
14
15 #include <hyp/fault.h>
16
17 #include <nvhe/gfp.h>
18 #include <nvhe/memory.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21 #include <nvhe/trap_handler.h>
22
23 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)
24
25 struct host_mmu host_mmu;
26
27 static struct hyp_pool host_s2_pool;
28
29 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
30 #define current_vm (*this_cpu_ptr(&__current_vm))
31
guest_lock_component(struct pkvm_hyp_vm * vm)32 static void guest_lock_component(struct pkvm_hyp_vm *vm)
33 {
34 hyp_spin_lock(&vm->lock);
35 current_vm = vm;
36 }
37
guest_unlock_component(struct pkvm_hyp_vm * vm)38 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
39 {
40 current_vm = NULL;
41 hyp_spin_unlock(&vm->lock);
42 }
43
host_lock_component(void)44 static void host_lock_component(void)
45 {
46 hyp_spin_lock(&host_mmu.lock);
47 }
48
host_unlock_component(void)49 static void host_unlock_component(void)
50 {
51 hyp_spin_unlock(&host_mmu.lock);
52 }
53
hyp_lock_component(void)54 static void hyp_lock_component(void)
55 {
56 hyp_spin_lock(&pkvm_pgd_lock);
57 }
58
hyp_unlock_component(void)59 static void hyp_unlock_component(void)
60 {
61 hyp_spin_unlock(&pkvm_pgd_lock);
62 }
63
64 #define for_each_hyp_page(__p, __st, __sz) \
65 for (struct hyp_page *__p = hyp_phys_to_page(__st), \
66 *__e = __p + ((__sz) >> PAGE_SHIFT); \
67 __p < __e; __p++)
68
host_s2_zalloc_pages_exact(size_t size)69 static void *host_s2_zalloc_pages_exact(size_t size)
70 {
71 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
72
73 hyp_split_page(hyp_virt_to_page(addr));
74
75 /*
76 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
77 * so there should be no need to free any of the tail pages to make the
78 * allocation exact.
79 */
80 WARN_ON(size != (PAGE_SIZE << get_order(size)));
81
82 return addr;
83 }
84
host_s2_zalloc_page(void * pool)85 static void *host_s2_zalloc_page(void *pool)
86 {
87 return hyp_alloc_pages(pool, 0);
88 }
89
host_s2_get_page(void * addr)90 static void host_s2_get_page(void *addr)
91 {
92 hyp_get_page(&host_s2_pool, addr);
93 }
94
host_s2_put_page(void * addr)95 static void host_s2_put_page(void *addr)
96 {
97 hyp_put_page(&host_s2_pool, addr);
98 }
99
host_s2_free_unlinked_table(void * addr,s8 level)100 static void host_s2_free_unlinked_table(void *addr, s8 level)
101 {
102 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
103 }
104
prepare_s2_pool(void * pgt_pool_base)105 static int prepare_s2_pool(void *pgt_pool_base)
106 {
107 unsigned long nr_pages, pfn;
108 int ret;
109
110 pfn = hyp_virt_to_pfn(pgt_pool_base);
111 nr_pages = host_s2_pgtable_pages();
112 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
113 if (ret)
114 return ret;
115
116 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
117 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
118 .zalloc_page = host_s2_zalloc_page,
119 .free_unlinked_table = host_s2_free_unlinked_table,
120 .phys_to_virt = hyp_phys_to_virt,
121 .virt_to_phys = hyp_virt_to_phys,
122 .page_count = hyp_page_count,
123 .get_page = host_s2_get_page,
124 .put_page = host_s2_put_page,
125 };
126
127 return 0;
128 }
129
prepare_host_vtcr(void)130 static void prepare_host_vtcr(void)
131 {
132 u32 parange, phys_shift;
133
134 /* The host stage 2 is id-mapped, so use parange for T0SZ */
135 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
136 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
137
138 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
139 id_aa64mmfr1_el1_sys_val, phys_shift);
140 }
141
142 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
143
kvm_host_prepare_stage2(void * pgt_pool_base)144 int kvm_host_prepare_stage2(void *pgt_pool_base)
145 {
146 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
147 int ret;
148
149 prepare_host_vtcr();
150 hyp_spin_lock_init(&host_mmu.lock);
151 mmu->arch = &host_mmu.arch;
152
153 ret = prepare_s2_pool(pgt_pool_base);
154 if (ret)
155 return ret;
156
157 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
158 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
159 host_stage2_force_pte_cb);
160 if (ret)
161 return ret;
162
163 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
164 mmu->pgt = &host_mmu.pgt;
165 atomic64_set(&mmu->vmid.id, 0);
166
167 return 0;
168 }
169
guest_s2_zalloc_pages_exact(size_t size)170 static void *guest_s2_zalloc_pages_exact(size_t size)
171 {
172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
173
174 WARN_ON(size != (PAGE_SIZE << get_order(size)));
175 hyp_split_page(hyp_virt_to_page(addr));
176
177 return addr;
178 }
179
guest_s2_free_pages_exact(void * addr,unsigned long size)180 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
181 {
182 u8 order = get_order(size);
183 unsigned int i;
184
185 for (i = 0; i < (1 << order); i++)
186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
187 }
188
guest_s2_zalloc_page(void * mc)189 static void *guest_s2_zalloc_page(void *mc)
190 {
191 struct hyp_page *p;
192 void *addr;
193
194 addr = hyp_alloc_pages(¤t_vm->pool, 0);
195 if (addr)
196 return addr;
197
198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
199 if (!addr)
200 return addr;
201
202 memset(addr, 0, PAGE_SIZE);
203 p = hyp_virt_to_page(addr);
204 p->refcount = 1;
205 p->order = 0;
206
207 return addr;
208 }
209
guest_s2_get_page(void * addr)210 static void guest_s2_get_page(void *addr)
211 {
212 hyp_get_page(¤t_vm->pool, addr);
213 }
214
guest_s2_put_page(void * addr)215 static void guest_s2_put_page(void *addr)
216 {
217 hyp_put_page(¤t_vm->pool, addr);
218 }
219
__apply_guest_page(void * va,size_t size,void (* func)(void * addr,size_t size))220 static void __apply_guest_page(void *va, size_t size,
221 void (*func)(void *addr, size_t size))
222 {
223 size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
224 va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
225 size = PAGE_ALIGN(size);
226
227 while (size) {
228 size_t map_size = PAGE_SIZE;
229 void *map;
230
231 if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)
232 map = hyp_fixblock_map(__hyp_pa(va), &map_size);
233 else
234 map = hyp_fixmap_map(__hyp_pa(va));
235
236 func(map, map_size);
237
238 if (map_size == PMD_SIZE)
239 hyp_fixblock_unmap();
240 else
241 hyp_fixmap_unmap();
242
243 size -= map_size;
244 va += map_size;
245 }
246 }
247
clean_dcache_guest_page(void * va,size_t size)248 static void clean_dcache_guest_page(void *va, size_t size)
249 {
250 __apply_guest_page(va, size, __clean_dcache_guest_page);
251 }
252
invalidate_icache_guest_page(void * va,size_t size)253 static void invalidate_icache_guest_page(void *va, size_t size)
254 {
255 __apply_guest_page(va, size, __invalidate_icache_guest_page);
256 }
257
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)258 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
259 {
260 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
261 unsigned long nr_pages;
262 int ret;
263
264 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
265 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
266 if (ret)
267 return ret;
268
269 hyp_spin_lock_init(&vm->lock);
270 vm->mm_ops = (struct kvm_pgtable_mm_ops) {
271 .zalloc_pages_exact = guest_s2_zalloc_pages_exact,
272 .free_pages_exact = guest_s2_free_pages_exact,
273 .zalloc_page = guest_s2_zalloc_page,
274 .phys_to_virt = hyp_phys_to_virt,
275 .virt_to_phys = hyp_virt_to_phys,
276 .page_count = hyp_page_count,
277 .get_page = guest_s2_get_page,
278 .put_page = guest_s2_put_page,
279 .dcache_clean_inval_poc = clean_dcache_guest_page,
280 .icache_inval_pou = invalidate_icache_guest_page,
281 };
282
283 guest_lock_component(vm);
284 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL);
285 guest_unlock_component(vm);
286 if (ret)
287 return ret;
288
289 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
290
291 return 0;
292 }
293
reclaim_pgtable_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)294 void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
295 {
296 struct hyp_page *page;
297 void *addr;
298
299 /* Dump all pgtable pages in the hyp_pool */
300 guest_lock_component(vm);
301 kvm_pgtable_stage2_destroy(&vm->pgt);
302 vm->kvm.arch.mmu.pgd_phys = 0ULL;
303 guest_unlock_component(vm);
304
305 /* Drain the hyp_pool into the memcache */
306 addr = hyp_alloc_pages(&vm->pool, 0);
307 while (addr) {
308 page = hyp_virt_to_page(addr);
309 page->refcount = 0;
310 page->order = 0;
311 push_hyp_memcache(mc, addr, hyp_virt_to_phys);
312 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
313 addr = hyp_alloc_pages(&vm->pool, 0);
314 }
315 }
316
__pkvm_prot_finalize(void)317 int __pkvm_prot_finalize(void)
318 {
319 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
320 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
321
322 if (params->hcr_el2 & HCR_VM)
323 return -EPERM;
324
325 params->vttbr = kvm_get_vttbr(mmu);
326 params->vtcr = mmu->vtcr;
327 params->hcr_el2 |= HCR_VM;
328 if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
329 params->hcr_el2 |= HCR_FWB;
330
331 /*
332 * The CMO below not only cleans the updated params to the
333 * PoC, but also provides the DSB that ensures ongoing
334 * page-table walks that have started before we trapped to EL2
335 * have completed.
336 */
337 kvm_flush_dcache_to_poc(params, sizeof(*params));
338
339 write_sysreg_hcr(params->hcr_el2);
340 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
341
342 /*
343 * Make sure to have an ISB before the TLB maintenance below but only
344 * when __load_stage2() doesn't include one already.
345 */
346 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
347
348 /* Invalidate stale HCR bits that may be cached in TLBs */
349 __tlbi(vmalls12e1);
350 dsb(nsh);
351 isb();
352
353 return 0;
354 }
355
host_stage2_unmap_dev_all(void)356 static int host_stage2_unmap_dev_all(void)
357 {
358 struct kvm_pgtable *pgt = &host_mmu.pgt;
359 struct memblock_region *reg;
360 u64 addr = 0;
361 int i, ret;
362
363 /* Unmap all non-memory regions to recycle the pages */
364 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
365 reg = &hyp_memory[i];
366 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
367 if (ret)
368 return ret;
369 }
370 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
371 }
372
373 /*
374 * Ensure the PFN range is contained within PA-range.
375 *
376 * This check is also robust to overflows and is therefore a requirement before
377 * using a pfn/nr_pages pair from an untrusted source.
378 */
pfn_range_is_valid(u64 pfn,u64 nr_pages)379 static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)
380 {
381 u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);
382
383 return pfn < limit && ((limit - pfn) >= nr_pages);
384 }
385
386 struct kvm_mem_range {
387 u64 start;
388 u64 end;
389 };
390
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)391 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
392 {
393 int cur, left = 0, right = hyp_memblock_nr;
394 struct memblock_region *reg;
395 phys_addr_t end;
396
397 range->start = 0;
398 range->end = ULONG_MAX;
399
400 /* The list of memblock regions is sorted, binary search it */
401 while (left < right) {
402 cur = (left + right) >> 1;
403 reg = &hyp_memory[cur];
404 end = reg->base + reg->size;
405 if (addr < reg->base) {
406 right = cur;
407 range->end = reg->base;
408 } else if (addr >= end) {
409 left = cur + 1;
410 range->start = end;
411 } else {
412 range->start = reg->base;
413 range->end = end;
414 return reg;
415 }
416 }
417
418 return NULL;
419 }
420
addr_is_memory(phys_addr_t phys)421 bool addr_is_memory(phys_addr_t phys)
422 {
423 struct kvm_mem_range range;
424
425 return !!find_mem_range(phys, &range);
426 }
427
is_in_mem_range(u64 addr,struct kvm_mem_range * range)428 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
429 {
430 return range->start <= addr && addr < range->end;
431 }
432
check_range_allowed_memory(u64 start,u64 end)433 static int check_range_allowed_memory(u64 start, u64 end)
434 {
435 struct memblock_region *reg;
436 struct kvm_mem_range range;
437
438 /*
439 * Callers can't check the state of a range that overlaps memory and
440 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
441 */
442 reg = find_mem_range(start, &range);
443 if (!is_in_mem_range(end - 1, &range))
444 return -EINVAL;
445
446 if (!reg || reg->flags & MEMBLOCK_NOMAP)
447 return -EPERM;
448
449 return 0;
450 }
451
range_is_memory(u64 start,u64 end)452 static bool range_is_memory(u64 start, u64 end)
453 {
454 struct kvm_mem_range r;
455
456 if (!find_mem_range(start, &r))
457 return false;
458
459 return is_in_mem_range(end - 1, &r);
460 }
461
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)462 static inline int __host_stage2_idmap(u64 start, u64 end,
463 enum kvm_pgtable_prot prot)
464 {
465 /*
466 * We don't make permission changes to the host idmap after
467 * initialisation, so we can squash -EAGAIN to save callers
468 * having to treat it like success in the case that they try to
469 * map something that is already mapped.
470 */
471 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
472 prot, &host_s2_pool,
473 KVM_PGTABLE_WALK_IGNORE_EAGAIN);
474 }
475
476 /*
477 * The pool has been provided with enough pages to cover all of memory with
478 * page granularity, but it is difficult to know how much of the MMIO range
479 * we will need to cover upfront, so we may need to 'recycle' the pages if we
480 * run out.
481 */
482 #define host_stage2_try(fn, ...) \
483 ({ \
484 int __ret; \
485 hyp_assert_lock_held(&host_mmu.lock); \
486 __ret = fn(__VA_ARGS__); \
487 if (__ret == -ENOMEM) { \
488 __ret = host_stage2_unmap_dev_all(); \
489 if (!__ret) \
490 __ret = fn(__VA_ARGS__); \
491 } \
492 __ret; \
493 })
494
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)495 static inline bool range_included(struct kvm_mem_range *child,
496 struct kvm_mem_range *parent)
497 {
498 return parent->start <= child->start && child->end <= parent->end;
499 }
500
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)501 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
502 {
503 struct kvm_mem_range cur;
504 kvm_pte_t pte;
505 u64 granule;
506 s8 level;
507 int ret;
508
509 hyp_assert_lock_held(&host_mmu.lock);
510 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
511 if (ret)
512 return ret;
513
514 if (kvm_pte_valid(pte))
515 return -EEXIST;
516
517 if (pte) {
518 WARN_ON(addr_is_memory(addr) &&
519 get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE);
520 return -EPERM;
521 }
522
523 for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {
524 if (!kvm_level_supports_block_mapping(level))
525 continue;
526 granule = kvm_granule_size(level);
527 cur.start = ALIGN_DOWN(addr, granule);
528 cur.end = cur.start + granule;
529 if (!range_included(&cur, range) && level < KVM_PGTABLE_LAST_LEVEL)
530 continue;
531 *range = cur;
532 return 0;
533 }
534
535 WARN_ON(1);
536
537 return -EINVAL;
538 }
539
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)540 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
541 enum kvm_pgtable_prot prot)
542 {
543 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
544 }
545
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)546 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
547 {
548 for_each_hyp_page(page, addr, size)
549 set_host_state(page, state);
550 }
551
552 #define KVM_HOST_DONATION_PTE_OWNER_MASK GENMASK(3, 1)
553 #define KVM_HOST_DONATION_PTE_EXTRA_MASK GENMASK(59, 4)
host_stage2_set_owner_metadata_locked(phys_addr_t addr,u64 size,u8 owner_id,u64 meta)554 static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
555 u8 owner_id, u64 meta)
556 {
557 kvm_pte_t annotation;
558 int ret;
559
560 if (owner_id == PKVM_ID_HOST)
561 return -EINVAL;
562
563 if (!range_is_memory(addr, addr + size))
564 return -EPERM;
565
566 if (!FIELD_FIT(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id))
567 return -EINVAL;
568
569 if (!FIELD_FIT(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta))
570 return -EINVAL;
571
572 annotation = FIELD_PREP(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id) |
573 FIELD_PREP(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta);
574 ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
575 addr, size, &host_s2_pool,
576 KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
577 if (!ret)
578 __host_update_page_state(addr, size, PKVM_NOPAGE);
579
580 return ret;
581 }
582
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)583 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
584 {
585 int ret = -EINVAL;
586
587 switch (owner_id) {
588 case PKVM_ID_HOST:
589 if (!range_is_memory(addr, addr + size))
590 return -EPERM;
591
592 ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
593 if (!ret)
594 __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
595 break;
596 case PKVM_ID_HYP:
597 ret = host_stage2_set_owner_metadata_locked(addr, size,
598 owner_id, 0);
599 break;
600 }
601
602 return ret;
603 }
604
605 #define KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK GENMASK(15, 0)
606 /* We need 40 bits for the GFN to cover a 52-bit IPA with 4k pages and LPA2 */
607 #define KVM_HOST_PTE_OWNER_GUEST_GFN_MASK GENMASK(55, 16)
host_stage2_encode_gfn_meta(struct pkvm_hyp_vm * vm,u64 gfn)608 static u64 host_stage2_encode_gfn_meta(struct pkvm_hyp_vm *vm, u64 gfn)
609 {
610 pkvm_handle_t handle = vm->kvm.arch.pkvm.handle;
611
612 BUILD_BUG_ON((pkvm_handle_t)-1 > KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK);
613 WARN_ON(!FIELD_FIT(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn));
614
615 return FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, handle) |
616 FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn);
617 }
618
host_stage2_decode_gfn_meta(kvm_pte_t pte,struct pkvm_hyp_vm ** vm,u64 * gfn)619 static int host_stage2_decode_gfn_meta(kvm_pte_t pte, struct pkvm_hyp_vm **vm,
620 u64 *gfn)
621 {
622 pkvm_handle_t handle;
623 u64 meta;
624
625 if (WARN_ON(kvm_pte_valid(pte)))
626 return -EINVAL;
627
628 if (FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) !=
629 KVM_HOST_INVALID_PTE_TYPE_DONATION) {
630 return -EINVAL;
631 }
632
633 if (FIELD_GET(KVM_HOST_DONATION_PTE_OWNER_MASK, pte) != PKVM_ID_GUEST)
634 return -EPERM;
635
636 meta = FIELD_GET(KVM_HOST_DONATION_PTE_EXTRA_MASK, pte);
637 handle = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, meta);
638 *vm = get_vm_by_handle(handle);
639 if (!*vm) {
640 /* We probably raced with teardown; try again */
641 return -EAGAIN;
642 }
643
644 *gfn = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, meta);
645 return 0;
646 }
647
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)648 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
649 {
650 /*
651 * Block mappings must be used with care in the host stage-2 as a
652 * kvm_pgtable_stage2_map() operation targeting a page in the range of
653 * an existing block will delete the block under the assumption that
654 * mappings in the rest of the block range can always be rebuilt lazily.
655 * That assumption is correct for the host stage-2 with RWX mappings
656 * targeting memory or RW mappings targeting MMIO ranges (see
657 * host_stage2_idmap() below which implements some of the host memory
658 * abort logic). However, this is not safe for any other mappings where
659 * the host stage-2 page-table is in fact the only place where this
660 * state is stored. In all those cases, it is safer to use page-level
661 * mappings, hence avoiding to lose the state because of side-effects in
662 * kvm_pgtable_stage2_map().
663 */
664 if (range_is_memory(addr, end))
665 return prot != PKVM_HOST_MEM_PROT;
666 else
667 return prot != PKVM_HOST_MMIO_PROT;
668 }
669
host_stage2_idmap(u64 addr)670 static int host_stage2_idmap(u64 addr)
671 {
672 struct kvm_mem_range range;
673 bool is_memory = !!find_mem_range(addr, &range);
674 enum kvm_pgtable_prot prot;
675 int ret;
676
677 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
678
679 host_lock_component();
680 ret = host_stage2_adjust_range(addr, &range);
681 if (ret)
682 goto unlock;
683
684 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
685 unlock:
686 host_unlock_component();
687
688 return ret;
689 }
690
host_inject_mem_abort(struct kvm_cpu_context * host_ctxt)691 static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt)
692 {
693 u64 ec, esr, spsr;
694
695 esr = read_sysreg_el2(SYS_ESR);
696 spsr = read_sysreg_el2(SYS_SPSR);
697
698 /* Repaint the ESR to report a same-level fault if taken from EL1 */
699 if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
700 ec = ESR_ELx_EC(esr);
701 if (ec == ESR_ELx_EC_DABT_LOW)
702 ec = ESR_ELx_EC_DABT_CUR;
703 else if (ec == ESR_ELx_EC_IABT_LOW)
704 ec = ESR_ELx_EC_IABT_CUR;
705 else
706 WARN_ON(1);
707 esr &= ~ESR_ELx_EC_MASK;
708 esr |= ec << ESR_ELx_EC_SHIFT;
709 }
710
711 /*
712 * Since S1PTW should only ever be set for stage-2 faults, we're pretty
713 * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
714 * let's use that bit to allow the host abort handler to differentiate
715 * this abort from normal userspace faults.
716 *
717 * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
718 * architecture to be backed by flops, so it should be safe to use.
719 */
720 esr |= ESR_ELx_S1PTW;
721 inject_host_exception(esr);
722 }
723
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)724 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
725 {
726 struct kvm_vcpu_fault_info fault;
727 u64 esr, addr;
728
729 esr = read_sysreg_el2(SYS_ESR);
730 if (!__get_fault_info(esr, &fault)) {
731 /*
732 * We've presumably raced with a page-table change which caused
733 * AT to fail, try again.
734 */
735 return;
736 }
737
738
739 /*
740 * Yikes, we couldn't resolve the fault IPA. This should reinject an
741 * abort into the host when we figure out how to do that.
742 */
743 BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
744 addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
745
746 switch (host_stage2_idmap(addr)) {
747 case -EPERM:
748 host_inject_mem_abort(host_ctxt);
749 fallthrough;
750 case -EEXIST:
751 case 0:
752 break;
753 default:
754 BUG();
755 }
756 }
757
758 struct check_walk_data {
759 enum pkvm_page_state desired;
760 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
761 };
762
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)763 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
764 enum kvm_pgtable_walk_flags visit)
765 {
766 struct check_walk_data *d = ctx->arg;
767
768 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
769 }
770
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)771 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
772 struct check_walk_data *data)
773 {
774 struct kvm_pgtable_walker walker = {
775 .cb = __check_page_state_visitor,
776 .arg = data,
777 .flags = KVM_PGTABLE_WALK_LEAF,
778 };
779
780 return kvm_pgtable_walk(pgt, addr, size, &walker);
781 }
782
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)783 static int __host_check_page_state_range(u64 addr, u64 size,
784 enum pkvm_page_state state)
785 {
786 int ret;
787
788 ret = check_range_allowed_memory(addr, addr + size);
789 if (ret)
790 return ret;
791
792 hyp_assert_lock_held(&host_mmu.lock);
793
794 for_each_hyp_page(page, addr, size) {
795 if (get_host_state(page) != state)
796 return -EPERM;
797 }
798
799 return 0;
800 }
801
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)802 static int __host_set_page_state_range(u64 addr, u64 size,
803 enum pkvm_page_state state)
804 {
805 if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) {
806 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
807
808 if (ret)
809 return ret;
810 }
811
812 __host_update_page_state(addr, size, state);
813
814 return 0;
815 }
816
__hyp_set_page_state_range(phys_addr_t phys,u64 size,enum pkvm_page_state state)817 static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
818 {
819 for_each_hyp_page(page, phys, size)
820 set_hyp_state(page, state);
821 }
822
__hyp_check_page_state_range(phys_addr_t phys,u64 size,enum pkvm_page_state state)823 static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
824 {
825 for_each_hyp_page(page, phys, size) {
826 if (get_hyp_state(page) != state)
827 return -EPERM;
828 }
829
830 return 0;
831 }
832
guest_pte_is_poisoned(kvm_pte_t pte)833 static bool guest_pte_is_poisoned(kvm_pte_t pte)
834 {
835 if (kvm_pte_valid(pte))
836 return false;
837
838 return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) ==
839 KVM_GUEST_INVALID_PTE_TYPE_POISONED;
840 }
841
guest_get_page_state(kvm_pte_t pte,u64 addr)842 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
843 {
844 if (guest_pte_is_poisoned(pte))
845 return PKVM_POISON;
846
847 if (!kvm_pte_valid(pte))
848 return PKVM_NOPAGE;
849
850 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
851 }
852
__guest_check_page_state_range(struct pkvm_hyp_vm * vm,u64 addr,u64 size,enum pkvm_page_state state)853 static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
854 u64 size, enum pkvm_page_state state)
855 {
856 struct check_walk_data d = {
857 .desired = state,
858 .get_page_state = guest_get_page_state,
859 };
860
861 hyp_assert_lock_held(&vm->lock);
862 return check_page_state_range(&vm->pgt, addr, size, &d);
863 }
864
get_valid_guest_pte(struct pkvm_hyp_vm * vm,u64 ipa,kvm_pte_t * ptep,u64 * physp)865 static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
866 {
867 kvm_pte_t pte;
868 u64 phys;
869 s8 level;
870 int ret;
871
872 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
873 if (ret)
874 return ret;
875 if (guest_pte_is_poisoned(pte))
876 return -EHWPOISON;
877 if (!kvm_pte_valid(pte))
878 return -ENOENT;
879 if (level != KVM_PGTABLE_LAST_LEVEL)
880 return -E2BIG;
881
882 phys = kvm_pte_to_phys(pte);
883 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
884 if (WARN_ON(ret))
885 return ret;
886
887 *ptep = pte;
888 *physp = phys;
889
890 return 0;
891 }
892
__pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu * hyp_vcpu)893 int __pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu *hyp_vcpu)
894 {
895 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
896 kvm_pte_t pte;
897 s8 level;
898 u64 ipa;
899 int ret;
900
901 switch (kvm_vcpu_trap_get_class(&hyp_vcpu->vcpu)) {
902 case ESR_ELx_EC_DABT_LOW:
903 case ESR_ELx_EC_IABT_LOW:
904 if (kvm_vcpu_trap_is_translation_fault(&hyp_vcpu->vcpu))
905 break;
906 fallthrough;
907 default:
908 return -EINVAL;
909 }
910
911 /*
912 * The host has the faulting IPA when it calls us from the guest
913 * fault handler but we retrieve it ourselves from the FAR so as
914 * to avoid exposing an "oracle" that could reveal data access
915 * patterns of the guest after initial donation of its pages.
916 */
917 ipa = kvm_vcpu_get_fault_ipa(&hyp_vcpu->vcpu);
918 ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(&hyp_vcpu->vcpu));
919
920 guest_lock_component(vm);
921 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
922 if (ret)
923 goto unlock;
924
925 if (level != KVM_PGTABLE_LAST_LEVEL) {
926 ret = -EINVAL;
927 goto unlock;
928 }
929
930 ret = guest_pte_is_poisoned(pte);
931 unlock:
932 guest_unlock_component(vm);
933 return ret;
934 }
935
__pkvm_host_share_hyp(u64 pfn)936 int __pkvm_host_share_hyp(u64 pfn)
937 {
938 u64 phys = hyp_pfn_to_phys(pfn);
939 u64 size = PAGE_SIZE;
940 int ret;
941
942 host_lock_component();
943 hyp_lock_component();
944
945 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
946 if (ret)
947 goto unlock;
948 ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
949 if (ret)
950 goto unlock;
951
952 __hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
953 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
954
955 unlock:
956 hyp_unlock_component();
957 host_unlock_component();
958
959 return ret;
960 }
961
__pkvm_guest_share_host(struct pkvm_hyp_vcpu * vcpu,u64 gfn)962 int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
963 {
964 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
965 u64 phys, ipa = hyp_pfn_to_phys(gfn);
966 kvm_pte_t pte;
967 int ret;
968
969 host_lock_component();
970 guest_lock_component(vm);
971
972 ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
973 if (ret)
974 goto unlock;
975
976 ret = -EPERM;
977 if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_OWNED)
978 goto unlock;
979 if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE))
980 goto unlock;
981
982 ret = 0;
983 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
984 pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_OWNED),
985 &vcpu->vcpu.arch.pkvm_memcache, 0));
986 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
987 unlock:
988 guest_unlock_component(vm);
989 host_unlock_component();
990
991 return ret;
992 }
993
__pkvm_guest_unshare_host(struct pkvm_hyp_vcpu * vcpu,u64 gfn)994 int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
995 {
996 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
997 u64 meta, phys, ipa = hyp_pfn_to_phys(gfn);
998 kvm_pte_t pte;
999 int ret;
1000
1001 host_lock_component();
1002 guest_lock_component(vm);
1003
1004 ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
1005 if (ret)
1006 goto unlock;
1007
1008 ret = -EPERM;
1009 if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_SHARED_OWNED)
1010 goto unlock;
1011 if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED))
1012 goto unlock;
1013
1014 ret = 0;
1015 meta = host_stage2_encode_gfn_meta(vm, gfn);
1016 WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
1017 PKVM_ID_GUEST, meta));
1018 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1019 pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
1020 &vcpu->vcpu.arch.pkvm_memcache, 0));
1021 unlock:
1022 guest_unlock_component(vm);
1023 host_unlock_component();
1024
1025 return ret;
1026 }
1027
__pkvm_host_unshare_hyp(u64 pfn)1028 int __pkvm_host_unshare_hyp(u64 pfn)
1029 {
1030 u64 phys = hyp_pfn_to_phys(pfn);
1031 u64 virt = (u64)__hyp_va(phys);
1032 u64 size = PAGE_SIZE;
1033 int ret;
1034
1035 host_lock_component();
1036 hyp_lock_component();
1037
1038 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1039 if (ret)
1040 goto unlock;
1041 ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1042 if (ret)
1043 goto unlock;
1044 if (hyp_page_count((void *)virt)) {
1045 ret = -EBUSY;
1046 goto unlock;
1047 }
1048
1049 __hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
1050 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
1051
1052 unlock:
1053 hyp_unlock_component();
1054 host_unlock_component();
1055
1056 return ret;
1057 }
1058
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1059 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1060 {
1061 u64 phys = hyp_pfn_to_phys(pfn);
1062 u64 size = PAGE_SIZE * nr_pages;
1063 void *virt = __hyp_va(phys);
1064 int ret;
1065
1066 if (!pfn_range_is_valid(pfn, nr_pages))
1067 return -EINVAL;
1068
1069 host_lock_component();
1070 hyp_lock_component();
1071
1072 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1073 if (ret)
1074 goto unlock;
1075 ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
1076 if (ret)
1077 goto unlock;
1078
1079 __hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1080 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));
1081 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
1082
1083 unlock:
1084 hyp_unlock_component();
1085 host_unlock_component();
1086
1087 return ret;
1088 }
1089
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1090 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1091 {
1092 u64 phys = hyp_pfn_to_phys(pfn);
1093 u64 size = PAGE_SIZE * nr_pages;
1094 u64 virt = (u64)__hyp_va(phys);
1095 int ret;
1096
1097 if (!pfn_range_is_valid(pfn, nr_pages))
1098 return -EINVAL;
1099
1100 host_lock_component();
1101 hyp_lock_component();
1102
1103 ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1104 if (ret)
1105 goto unlock;
1106 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1107 if (ret)
1108 goto unlock;
1109
1110 __hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
1111 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1112 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1113
1114 unlock:
1115 hyp_unlock_component();
1116 host_unlock_component();
1117
1118 return ret;
1119 }
1120
hyp_pin_shared_mem(void * from,void * to)1121 int hyp_pin_shared_mem(void *from, void *to)
1122 {
1123 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1124 u64 end = PAGE_ALIGN((u64)to);
1125 u64 phys = __hyp_pa(start);
1126 u64 size = end - start;
1127 struct hyp_page *p;
1128 int ret;
1129
1130 host_lock_component();
1131 hyp_lock_component();
1132
1133 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1134 if (ret)
1135 goto unlock;
1136
1137 ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1138 if (ret)
1139 goto unlock;
1140
1141 for (cur = start; cur < end; cur += PAGE_SIZE) {
1142 p = hyp_virt_to_page(cur);
1143 hyp_page_ref_inc(p);
1144 if (p->refcount == 1)
1145 WARN_ON(pkvm_create_mappings_locked((void *)cur,
1146 (void *)cur + PAGE_SIZE,
1147 PAGE_HYP));
1148 }
1149
1150 unlock:
1151 hyp_unlock_component();
1152 host_unlock_component();
1153
1154 return ret;
1155 }
1156
hyp_unpin_shared_mem(void * from,void * to)1157 void hyp_unpin_shared_mem(void *from, void *to)
1158 {
1159 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1160 u64 end = PAGE_ALIGN((u64)to);
1161 struct hyp_page *p;
1162
1163 host_lock_component();
1164 hyp_lock_component();
1165
1166 for (cur = start; cur < end; cur += PAGE_SIZE) {
1167 p = hyp_virt_to_page(cur);
1168 if (p->refcount == 1)
1169 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);
1170 hyp_page_ref_dec(p);
1171 }
1172
1173 hyp_unlock_component();
1174 host_unlock_component();
1175 }
1176
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)1177 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
1178 {
1179 u64 phys = hyp_pfn_to_phys(pfn);
1180 u64 size = PAGE_SIZE * nr_pages;
1181 int ret;
1182
1183 if (!pfn_range_is_valid(pfn, nr_pages))
1184 return -EINVAL;
1185
1186 host_lock_component();
1187 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1188 if (!ret)
1189 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1190 host_unlock_component();
1191
1192 return ret;
1193 }
1194
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)1195 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
1196 {
1197 u64 phys = hyp_pfn_to_phys(pfn);
1198 u64 size = PAGE_SIZE * nr_pages;
1199 int ret;
1200
1201 if (!pfn_range_is_valid(pfn, nr_pages))
1202 return -EINVAL;
1203
1204 host_lock_component();
1205 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1206 if (!ret)
1207 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1208 host_unlock_component();
1209
1210 return ret;
1211 }
1212
__guest_check_transition_size(u64 phys,u64 ipa,u64 nr_pages,u64 * size)1213 static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)
1214 {
1215 size_t block_size;
1216
1217 if (nr_pages == 1) {
1218 *size = PAGE_SIZE;
1219 return 0;
1220 }
1221
1222 /* We solely support second to last level huge mapping */
1223 block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1);
1224
1225 if (nr_pages != block_size >> PAGE_SHIFT)
1226 return -EINVAL;
1227
1228 if (!IS_ALIGNED(phys | ipa, block_size))
1229 return -EINVAL;
1230
1231 *size = block_size;
1232 return 0;
1233 }
1234
hyp_poison_page(phys_addr_t phys)1235 static void hyp_poison_page(phys_addr_t phys)
1236 {
1237 void *addr = hyp_fixmap_map(phys);
1238
1239 memset(addr, 0, PAGE_SIZE);
1240 /*
1241 * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
1242 * here as the latter may elide the CMO under the assumption that FWB
1243 * will be enabled on CPUs that support it. This is incorrect for the
1244 * host stage-2 and would otherwise lead to a malicious host potentially
1245 * being able to read the contents of newly reclaimed guest pages.
1246 */
1247 kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
1248 hyp_fixmap_unmap();
1249 }
1250
host_stage2_get_guest_info(phys_addr_t phys,struct pkvm_hyp_vm ** vm,u64 * gfn)1251 static int host_stage2_get_guest_info(phys_addr_t phys, struct pkvm_hyp_vm **vm,
1252 u64 *gfn)
1253 {
1254 enum pkvm_page_state state;
1255 kvm_pte_t pte;
1256 s8 level;
1257 int ret;
1258
1259 if (!addr_is_memory(phys))
1260 return -EFAULT;
1261
1262 state = get_host_state(hyp_phys_to_page(phys));
1263 switch (state) {
1264 case PKVM_PAGE_OWNED:
1265 case PKVM_PAGE_SHARED_OWNED:
1266 case PKVM_PAGE_SHARED_BORROWED:
1267 /* The access should no longer fault; try again. */
1268 return -EAGAIN;
1269 case PKVM_NOPAGE:
1270 break;
1271 default:
1272 return -EPERM;
1273 }
1274
1275 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, &level);
1276 if (ret)
1277 return ret;
1278
1279 if (WARN_ON(level != KVM_PGTABLE_LAST_LEVEL))
1280 return -EINVAL;
1281
1282 return host_stage2_decode_gfn_meta(pte, vm, gfn);
1283 }
1284
__pkvm_host_force_reclaim_page_guest(phys_addr_t phys)1285 int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys)
1286 {
1287 struct pkvm_hyp_vm *vm;
1288 u64 gfn, ipa, pa;
1289 kvm_pte_t pte;
1290 int ret;
1291
1292 phys &= PAGE_MASK;
1293
1294 hyp_spin_lock(&vm_table_lock);
1295 host_lock_component();
1296
1297 ret = host_stage2_get_guest_info(phys, &vm, &gfn);
1298 if (ret)
1299 goto unlock_host;
1300
1301 ipa = hyp_pfn_to_phys(gfn);
1302 guest_lock_component(vm);
1303 ret = get_valid_guest_pte(vm, ipa, &pte, &pa);
1304 if (ret)
1305 goto unlock_guest;
1306
1307 WARN_ON(pa != phys);
1308 if (guest_get_page_state(pte, ipa) != PKVM_PAGE_OWNED) {
1309 ret = -EPERM;
1310 goto unlock_guest;
1311 }
1312
1313 /* We really shouldn't be allocating, so don't pass a memcache */
1314 ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, NULL,
1315 KVM_GUEST_INVALID_PTE_TYPE_POISONED,
1316 0);
1317 if (ret)
1318 goto unlock_guest;
1319
1320 hyp_poison_page(phys);
1321 WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
1322 unlock_guest:
1323 guest_unlock_component(vm);
1324 unlock_host:
1325 host_unlock_component();
1326 hyp_spin_unlock(&vm_table_lock);
1327
1328 return ret;
1329 }
1330
__pkvm_host_reclaim_page_guest(u64 gfn,struct pkvm_hyp_vm * vm)1331 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
1332 {
1333 u64 ipa = hyp_pfn_to_phys(gfn);
1334 kvm_pte_t pte;
1335 u64 phys;
1336 int ret;
1337
1338 host_lock_component();
1339 guest_lock_component(vm);
1340
1341 ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
1342 if (ret)
1343 goto unlock;
1344
1345 switch (guest_get_page_state(pte, ipa)) {
1346 case PKVM_PAGE_OWNED:
1347 WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
1348 hyp_poison_page(phys);
1349 break;
1350 case PKVM_PAGE_SHARED_OWNED:
1351 WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
1352 break;
1353 default:
1354 ret = -EPERM;
1355 goto unlock;
1356 }
1357
1358 WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
1359 WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
1360
1361 unlock:
1362 guest_unlock_component(vm);
1363 host_unlock_component();
1364
1365 /*
1366 * -EHWPOISON implies that the page was forcefully reclaimed already
1367 * so return success for the GUP pin to be dropped.
1368 */
1369 return ret && ret != -EHWPOISON ? ret : 0;
1370 }
1371
__pkvm_host_donate_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu)1372 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1373 {
1374 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1375 u64 phys = hyp_pfn_to_phys(pfn);
1376 u64 ipa = hyp_pfn_to_phys(gfn);
1377 u64 meta;
1378 int ret;
1379
1380 host_lock_component();
1381 guest_lock_component(vm);
1382
1383 ret = __host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED);
1384 if (ret)
1385 goto unlock;
1386
1387 ret = __guest_check_page_state_range(vm, ipa, PAGE_SIZE, PKVM_NOPAGE);
1388 if (ret)
1389 goto unlock;
1390
1391 meta = host_stage2_encode_gfn_meta(vm, gfn);
1392 WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
1393 PKVM_ID_GUEST, meta));
1394 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1395 pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
1396 &vcpu->vcpu.arch.pkvm_memcache, 0));
1397
1398 unlock:
1399 guest_unlock_component(vm);
1400 host_unlock_component();
1401
1402 return ret;
1403 }
1404
__pkvm_host_share_guest(u64 pfn,u64 gfn,u64 nr_pages,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1405 int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
1406 enum kvm_pgtable_prot prot)
1407 {
1408 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1409 u64 phys = hyp_pfn_to_phys(pfn);
1410 u64 ipa = hyp_pfn_to_phys(gfn);
1411 u64 size;
1412 int ret;
1413
1414 if (prot & ~KVM_PGTABLE_PROT_RWX)
1415 return -EINVAL;
1416
1417 if (!pfn_range_is_valid(pfn, nr_pages))
1418 return -EINVAL;
1419
1420 ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
1421 if (ret)
1422 return ret;
1423
1424 ret = check_range_allowed_memory(phys, phys + size);
1425 if (ret)
1426 return ret;
1427
1428 host_lock_component();
1429 guest_lock_component(vm);
1430
1431 ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
1432 if (ret)
1433 goto unlock;
1434
1435 for_each_hyp_page(page, phys, size) {
1436 switch (get_host_state(page)) {
1437 case PKVM_PAGE_OWNED:
1438 continue;
1439 case PKVM_PAGE_SHARED_OWNED:
1440 if (page->host_share_guest_count == U32_MAX) {
1441 ret = -EBUSY;
1442 goto unlock;
1443 }
1444
1445 /* Only host to np-guest multi-sharing is tolerated */
1446 if (page->host_share_guest_count)
1447 continue;
1448
1449 fallthrough;
1450 default:
1451 ret = -EPERM;
1452 goto unlock;
1453 }
1454 }
1455
1456 for_each_hyp_page(page, phys, size) {
1457 set_host_state(page, PKVM_PAGE_SHARED_OWNED);
1458 page->host_share_guest_count++;
1459 }
1460
1461 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
1462 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
1463 &vcpu->vcpu.arch.pkvm_memcache, 0));
1464
1465 unlock:
1466 guest_unlock_component(vm);
1467 host_unlock_component();
1468
1469 return ret;
1470 }
1471
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa,u64 size)1472 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size)
1473 {
1474 enum pkvm_page_state state;
1475 kvm_pte_t pte;
1476 u64 phys;
1477 s8 level;
1478 int ret;
1479
1480 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
1481 if (ret)
1482 return ret;
1483 if (!kvm_pte_valid(pte))
1484 return -ENOENT;
1485 if (size && kvm_granule_size(level) != size)
1486 return -E2BIG;
1487
1488 if (!size)
1489 size = kvm_granule_size(level);
1490
1491 state = guest_get_page_state(pte, ipa);
1492 if (state != PKVM_PAGE_SHARED_BORROWED)
1493 return -EPERM;
1494
1495 phys = kvm_pte_to_phys(pte);
1496 ret = check_range_allowed_memory(phys, phys + size);
1497 if (WARN_ON(ret))
1498 return ret;
1499
1500 for_each_hyp_page(page, phys, size) {
1501 if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED)
1502 return -EPERM;
1503 if (WARN_ON(!page->host_share_guest_count))
1504 return -EINVAL;
1505 }
1506
1507 *__phys = phys;
1508
1509 return 0;
1510 }
1511
__pkvm_host_unshare_guest(u64 gfn,u64 nr_pages,struct pkvm_hyp_vm * vm)1512 int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
1513 {
1514 u64 ipa = hyp_pfn_to_phys(gfn);
1515 u64 size, phys;
1516 int ret;
1517
1518 ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1519 if (ret)
1520 return ret;
1521
1522 host_lock_component();
1523 guest_lock_component(vm);
1524
1525 ret = __check_host_shared_guest(vm, &phys, ipa, size);
1526 if (ret)
1527 goto unlock;
1528
1529 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);
1530 if (ret)
1531 goto unlock;
1532
1533 for_each_hyp_page(page, phys, size) {
1534 /* __check_host_shared_guest() protects against underflow */
1535 page->host_share_guest_count--;
1536 if (!page->host_share_guest_count)
1537 set_host_state(page, PKVM_PAGE_OWNED);
1538 }
1539
1540 unlock:
1541 guest_unlock_component(vm);
1542 host_unlock_component();
1543
1544 return ret;
1545 }
1546
assert_host_shared_guest(struct pkvm_hyp_vm * vm,u64 ipa,u64 size)1547 static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size)
1548 {
1549 u64 phys;
1550 int ret;
1551
1552 if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
1553 return;
1554
1555 host_lock_component();
1556 guest_lock_component(vm);
1557
1558 ret = __check_host_shared_guest(vm, &phys, ipa, size);
1559
1560 guest_unlock_component(vm);
1561 host_unlock_component();
1562
1563 WARN_ON(ret && ret != -ENOENT);
1564 }
1565
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1566 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
1567 {
1568 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1569 u64 ipa = hyp_pfn_to_phys(gfn);
1570 int ret;
1571
1572 if (pkvm_hyp_vm_is_protected(vm))
1573 return -EPERM;
1574
1575 if (prot & ~KVM_PGTABLE_PROT_RWX)
1576 return -EINVAL;
1577
1578 assert_host_shared_guest(vm, ipa, 0);
1579 guest_lock_component(vm);
1580 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
1581 guest_unlock_component(vm);
1582
1583 return ret;
1584 }
1585
__pkvm_host_wrprotect_guest(u64 gfn,u64 nr_pages,struct pkvm_hyp_vm * vm)1586 int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
1587 {
1588 u64 size, ipa = hyp_pfn_to_phys(gfn);
1589 int ret;
1590
1591 if (pkvm_hyp_vm_is_protected(vm))
1592 return -EPERM;
1593
1594 ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1595 if (ret)
1596 return ret;
1597
1598 assert_host_shared_guest(vm, ipa, size);
1599 guest_lock_component(vm);
1600 ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);
1601 guest_unlock_component(vm);
1602
1603 return ret;
1604 }
1605
__pkvm_host_test_clear_young_guest(u64 gfn,u64 nr_pages,bool mkold,struct pkvm_hyp_vm * vm)1606 int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm)
1607 {
1608 u64 size, ipa = hyp_pfn_to_phys(gfn);
1609 int ret;
1610
1611 if (pkvm_hyp_vm_is_protected(vm))
1612 return -EPERM;
1613
1614 ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1615 if (ret)
1616 return ret;
1617
1618 assert_host_shared_guest(vm, ipa, size);
1619 guest_lock_component(vm);
1620 ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);
1621 guest_unlock_component(vm);
1622
1623 return ret;
1624 }
1625
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)1626 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1627 {
1628 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1629 u64 ipa = hyp_pfn_to_phys(gfn);
1630
1631 if (pkvm_hyp_vm_is_protected(vm))
1632 return -EPERM;
1633
1634 assert_host_shared_guest(vm, ipa, 0);
1635 guest_lock_component(vm);
1636 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
1637 guest_unlock_component(vm);
1638
1639 return 0;
1640 }
1641
1642 #ifdef CONFIG_NVHE_EL2_DEBUG
1643 struct pkvm_expected_state {
1644 enum pkvm_page_state host;
1645 enum pkvm_page_state hyp;
1646 enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */
1647 };
1648
1649 static struct pkvm_expected_state selftest_state;
1650 static struct hyp_page *selftest_page;
1651 static struct pkvm_hyp_vcpu *selftest_vcpu;
1652
selftest_ipa(void)1653 static u64 selftest_ipa(void)
1654 {
1655 return BIT(selftest_vcpu->vcpu.arch.hw_mmu->pgt->ia_bits - 1);
1656 }
1657
assert_page_state(void)1658 static void assert_page_state(void)
1659 {
1660 void *virt = hyp_page_to_virt(selftest_page);
1661 u64 size = PAGE_SIZE << selftest_page->order;
1662 struct pkvm_hyp_vcpu *vcpu = selftest_vcpu;
1663 u64 phys = hyp_virt_to_phys(virt);
1664 u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
1665 struct pkvm_hyp_vm *vm;
1666
1667 vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1668
1669 host_lock_component();
1670 WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
1671 host_unlock_component();
1672
1673 hyp_lock_component();
1674 WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));
1675 hyp_unlock_component();
1676
1677 guest_lock_component(vm);
1678 WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));
1679 WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));
1680 guest_unlock_component(vm);
1681 }
1682
1683 #define assert_transition_res(res, fn, ...) \
1684 do { \
1685 WARN_ON(fn(__VA_ARGS__) != res); \
1686 assert_page_state(); \
1687 } while (0)
1688
pkvm_ownership_selftest(void * base)1689 void pkvm_ownership_selftest(void *base)
1690 {
1691 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
1692 void *virt = hyp_alloc_pages(&host_s2_pool, 0);
1693 struct pkvm_hyp_vcpu *vcpu;
1694 u64 phys, size, pfn, gfn;
1695 struct pkvm_hyp_vm *vm;
1696
1697 WARN_ON(!virt);
1698 selftest_page = hyp_virt_to_page(virt);
1699 selftest_page->refcount = 0;
1700 selftest_vcpu = vcpu = init_selftest_vm(base);
1701 vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1702
1703 size = PAGE_SIZE << selftest_page->order;
1704 phys = hyp_virt_to_phys(virt);
1705 pfn = hyp_phys_to_pfn(phys);
1706 gfn = hyp_phys_to_pfn(selftest_ipa());
1707
1708 selftest_state.host = PKVM_NOPAGE;
1709 selftest_state.hyp = PKVM_PAGE_OWNED;
1710 selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;
1711 assert_page_state();
1712 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1713 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1714 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1715 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1716 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
1717 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1718 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1719 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1720 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1721
1722 selftest_state.host = PKVM_PAGE_OWNED;
1723 selftest_state.hyp = PKVM_NOPAGE;
1724 assert_transition_res(0, __pkvm_hyp_donate_host, pfn, 1);
1725 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1726 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1727 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
1728 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1729 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1730
1731 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1732 selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
1733 assert_transition_res(0, __pkvm_host_share_hyp, pfn);
1734 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1735 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1736 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1737 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1738 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1739 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1740 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1741
1742 assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
1743 assert_transition_res(0, hyp_pin_shared_mem, virt, virt + size);
1744 hyp_unpin_shared_mem(virt, virt + size);
1745 WARN_ON(hyp_page_count(virt) != 1);
1746 assert_transition_res(-EBUSY, __pkvm_host_unshare_hyp, pfn);
1747 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1748 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1749 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1750 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1751 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1752 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1753 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1754
1755 hyp_unpin_shared_mem(virt, virt + size);
1756 assert_page_state();
1757 WARN_ON(hyp_page_count(virt));
1758
1759 selftest_state.host = PKVM_PAGE_OWNED;
1760 selftest_state.hyp = PKVM_NOPAGE;
1761 assert_transition_res(0, __pkvm_host_unshare_hyp, pfn);
1762
1763 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1764 selftest_state.hyp = PKVM_NOPAGE;
1765 assert_transition_res(0, __pkvm_host_share_ffa, pfn, 1);
1766 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1767 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1768 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1769 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1770 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1771 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1772 assert_transition_res(-ENOENT, __pkvm_host_unshare_guest, gfn, 1, vm);
1773 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1774 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1775
1776 selftest_state.host = PKVM_PAGE_OWNED;
1777 selftest_state.hyp = PKVM_NOPAGE;
1778 assert_transition_res(0, __pkvm_host_unshare_ffa, pfn, 1);
1779 assert_transition_res(-EPERM, __pkvm_host_unshare_ffa, pfn, 1);
1780
1781 selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1782 selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;
1783 assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1784 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1785 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1786 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1787 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1788 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1789 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1790 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1791 assert_transition_res(-EPERM, hyp_pin_shared_mem, virt, virt + size);
1792
1793 selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
1794 assert_transition_res(0, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1795 WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);
1796
1797 selftest_state.guest[0] = PKVM_NOPAGE;
1798 assert_transition_res(0, __pkvm_host_unshare_guest, gfn, 1, vm);
1799
1800 selftest_state.guest[1] = PKVM_NOPAGE;
1801 selftest_state.host = PKVM_PAGE_OWNED;
1802 assert_transition_res(0, __pkvm_host_unshare_guest, gfn + 1, 1, vm);
1803
1804 selftest_state.host = PKVM_NOPAGE;
1805 selftest_state.guest[0] = PKVM_PAGE_OWNED;
1806 assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1807 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1808 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1809 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1810 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1811 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1812 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1813 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1814 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1815 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1816
1817 selftest_state.host = PKVM_PAGE_SHARED_BORROWED;
1818 selftest_state.guest[0] = PKVM_PAGE_SHARED_OWNED;
1819 assert_transition_res(0, __pkvm_guest_share_host, vcpu, gfn);
1820 assert_transition_res(-EPERM, __pkvm_guest_share_host, vcpu, gfn);
1821 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1822 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1823 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1824 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1825 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1826 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1827 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1828 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1829 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1830
1831 selftest_state.host = PKVM_NOPAGE;
1832 selftest_state.guest[0] = PKVM_PAGE_OWNED;
1833 assert_transition_res(0, __pkvm_guest_unshare_host, vcpu, gfn);
1834 assert_transition_res(-EPERM, __pkvm_guest_unshare_host, vcpu, gfn);
1835 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1836 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1837 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1838 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1839 assert_transition_res(-EPERM, __pkvm_host_share_ffa, pfn, 1);
1840 assert_transition_res(-EPERM, __pkvm_host_donate_hyp, pfn, 1);
1841 assert_transition_res(-EPERM, __pkvm_host_share_hyp, pfn);
1842 assert_transition_res(-EPERM, __pkvm_host_unshare_hyp, pfn);
1843 assert_transition_res(-EPERM, __pkvm_hyp_donate_host, pfn, 1);
1844
1845 selftest_state.host = PKVM_PAGE_OWNED;
1846 selftest_state.guest[0] = PKVM_POISON;
1847 assert_transition_res(0, __pkvm_host_force_reclaim_page_guest, phys);
1848 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1849 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1850 assert_transition_res(-EHWPOISON, __pkvm_guest_share_host, vcpu, gfn);
1851 assert_transition_res(-EHWPOISON, __pkvm_guest_unshare_host, vcpu, gfn);
1852
1853 selftest_state.host = PKVM_NOPAGE;
1854 selftest_state.guest[1] = PKVM_PAGE_OWNED;
1855 assert_transition_res(0, __pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1856
1857 selftest_state.host = PKVM_PAGE_OWNED;
1858 selftest_state.guest[1] = PKVM_NOPAGE;
1859 assert_transition_res(0, __pkvm_host_reclaim_page_guest, gfn + 1, vm);
1860 assert_transition_res(-EPERM, __pkvm_host_donate_guest, pfn, gfn, vcpu);
1861 assert_transition_res(-EPERM, __pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1862
1863 selftest_state.host = PKVM_NOPAGE;
1864 selftest_state.hyp = PKVM_PAGE_OWNED;
1865 assert_transition_res(0, __pkvm_host_donate_hyp, pfn, 1);
1866
1867 teardown_selftest_vm();
1868 selftest_page->refcount = 1;
1869 hyp_put_page(&host_s2_pool, virt);
1870 }
1871 #endif
1872