xref: /linux/arch/arm64/kvm/hyp/nvhe/mem_protect.c (revision e1914add2799225a87502051415fc5c32aeb02ae)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/kvm_emulate.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/kvm_pgtable.h>
13 #include <asm/kvm_pkvm.h>
14 #include <asm/stage2_pgtable.h>
15 
16 #include <hyp/fault.h>
17 
18 #include <nvhe/arm-smccc.h>
19 #include <nvhe/gfp.h>
20 #include <nvhe/memory.h>
21 #include <nvhe/mem_protect.h>
22 #include <nvhe/mm.h>
23 #include <nvhe/trap_handler.h>
24 
25 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_AS_S1 | KVM_PGTABLE_S2_IDMAP)
26 
27 struct host_mmu host_mmu;
28 
29 static struct hyp_pool host_s2_pool;
30 
31 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
32 #define current_vm (*this_cpu_ptr(&__current_vm))
33 
pkvm_sme_dvmsync_fw_call(void)34 static void pkvm_sme_dvmsync_fw_call(void)
35 {
36 	if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
37 		struct arm_smccc_res res;
38 
39 		/*
40 		 * Ignore the return value. Probing for the workaround
41 		 * availability took place in init_hyp_mode().
42 		 */
43 		hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
44 	}
45 }
46 
guest_lock_component(struct pkvm_hyp_vm * vm)47 static void guest_lock_component(struct pkvm_hyp_vm *vm)
48 {
49 	hyp_spin_lock(&vm->lock);
50 	current_vm = vm;
51 }
52 
guest_unlock_component(struct pkvm_hyp_vm * vm)53 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
54 {
55 	current_vm = NULL;
56 	hyp_spin_unlock(&vm->lock);
57 }
58 
host_lock_component(void)59 static void host_lock_component(void)
60 {
61 	hyp_spin_lock(&host_mmu.lock);
62 }
63 
host_unlock_component(void)64 static void host_unlock_component(void)
65 {
66 	hyp_spin_unlock(&host_mmu.lock);
67 }
68 
hyp_lock_component(void)69 static void hyp_lock_component(void)
70 {
71 	hyp_spin_lock(&pkvm_pgd_lock);
72 }
73 
hyp_unlock_component(void)74 static void hyp_unlock_component(void)
75 {
76 	hyp_spin_unlock(&pkvm_pgd_lock);
77 }
78 
79 #define for_each_hyp_page(__p, __st, __sz)				\
80 	for (struct hyp_page *__p = hyp_phys_to_page(__st),		\
81 			     *__e = __p + ((__sz) >> PAGE_SHIFT);	\
82 	     __p < __e; __p++)
83 
host_s2_zalloc_pages_exact(size_t size)84 static void *host_s2_zalloc_pages_exact(size_t size)
85 {
86 	void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
87 
88 	hyp_split_page(hyp_virt_to_page(addr));
89 
90 	/*
91 	 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
92 	 * so there should be no need to free any of the tail pages to make the
93 	 * allocation exact.
94 	 */
95 	WARN_ON(size != (PAGE_SIZE << get_order(size)));
96 
97 	return addr;
98 }
99 
host_s2_zalloc_page(void * pool)100 static void *host_s2_zalloc_page(void *pool)
101 {
102 	return hyp_alloc_pages(pool, 0);
103 }
104 
host_s2_get_page(void * addr)105 static void host_s2_get_page(void *addr)
106 {
107 	hyp_get_page(&host_s2_pool, addr);
108 }
109 
host_s2_put_page(void * addr)110 static void host_s2_put_page(void *addr)
111 {
112 	hyp_put_page(&host_s2_pool, addr);
113 }
114 
host_s2_free_unlinked_table(void * addr,s8 level)115 static void host_s2_free_unlinked_table(void *addr, s8 level)
116 {
117 	kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
118 }
119 
prepare_s2_pool(void * pgt_pool_base)120 static int prepare_s2_pool(void *pgt_pool_base)
121 {
122 	unsigned long nr_pages, pfn;
123 	int ret;
124 
125 	pfn = hyp_virt_to_pfn(pgt_pool_base);
126 	nr_pages = host_s2_pgtable_pages();
127 	ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
128 	if (ret)
129 		return ret;
130 
131 	host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
132 		.zalloc_pages_exact = host_s2_zalloc_pages_exact,
133 		.zalloc_page = host_s2_zalloc_page,
134 		.free_unlinked_table = host_s2_free_unlinked_table,
135 		.phys_to_virt = hyp_phys_to_virt,
136 		.virt_to_phys = hyp_virt_to_phys,
137 		.page_count = hyp_page_count,
138 		.get_page = host_s2_get_page,
139 		.put_page = host_s2_put_page,
140 	};
141 
142 	return 0;
143 }
144 
prepare_host_vtcr(void)145 static void prepare_host_vtcr(void)
146 {
147 	u32 parange, phys_shift;
148 
149 	/* The host stage 2 is id-mapped, so use parange for T0SZ */
150 	parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
151 	phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
152 
153 	host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
154 					      id_aa64mmfr1_el1_sys_val, phys_shift);
155 }
156 
157 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
158 
kvm_host_prepare_stage2(void * pgt_pool_base)159 int kvm_host_prepare_stage2(void *pgt_pool_base)
160 {
161 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
162 	int ret;
163 
164 	prepare_host_vtcr();
165 	hyp_spin_lock_init(&host_mmu.lock);
166 	mmu->arch = &host_mmu.arch;
167 
168 	ret = prepare_s2_pool(pgt_pool_base);
169 	if (ret)
170 		return ret;
171 
172 	ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
173 					&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
174 					host_stage2_force_pte_cb);
175 	if (ret)
176 		return ret;
177 
178 	mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
179 	mmu->pgt = &host_mmu.pgt;
180 	atomic64_set(&mmu->vmid.id, 0);
181 
182 	return 0;
183 }
184 
guest_s2_zalloc_pages_exact(size_t size)185 static void *guest_s2_zalloc_pages_exact(size_t size)
186 {
187 	void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size));
188 
189 	WARN_ON(size != (PAGE_SIZE << get_order(size)));
190 	hyp_split_page(hyp_virt_to_page(addr));
191 
192 	return addr;
193 }
194 
guest_s2_free_pages_exact(void * addr,unsigned long size)195 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
196 {
197 	u8 order = get_order(size);
198 	unsigned int i;
199 
200 	for (i = 0; i < (1 << order); i++)
201 		hyp_put_page(&current_vm->pool, addr + (i * PAGE_SIZE));
202 }
203 
guest_s2_zalloc_page(void * mc)204 static void *guest_s2_zalloc_page(void *mc)
205 {
206 	struct hyp_page *p;
207 	void *addr;
208 
209 	addr = hyp_alloc_pages(&current_vm->pool, 0);
210 	if (addr)
211 		return addr;
212 
213 	addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
214 	if (!addr)
215 		return addr;
216 
217 	memset(addr, 0, PAGE_SIZE);
218 	p = hyp_virt_to_page(addr);
219 	p->refcount = 1;
220 	p->order = 0;
221 
222 	return addr;
223 }
224 
guest_s2_get_page(void * addr)225 static void guest_s2_get_page(void *addr)
226 {
227 	hyp_get_page(&current_vm->pool, addr);
228 }
229 
guest_s2_put_page(void * addr)230 static void guest_s2_put_page(void *addr)
231 {
232 	hyp_put_page(&current_vm->pool, addr);
233 }
234 
__apply_guest_page(void * va,size_t size,void (* func)(void * addr,size_t size))235 static void __apply_guest_page(void *va, size_t size,
236 			       void (*func)(void *addr, size_t size))
237 {
238 	size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
239 	va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
240 	size = PAGE_ALIGN(size);
241 
242 	while (size) {
243 		size_t map_size = PAGE_SIZE;
244 		void *map;
245 
246 		if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)
247 			map = hyp_fixblock_map(__hyp_pa(va), &map_size);
248 		else
249 			map = hyp_fixmap_map(__hyp_pa(va));
250 
251 		func(map, map_size);
252 
253 		if (map_size == PMD_SIZE)
254 			hyp_fixblock_unmap();
255 		else
256 			hyp_fixmap_unmap();
257 
258 		size -= map_size;
259 		va += map_size;
260 	}
261 }
262 
clean_dcache_guest_page(void * va,size_t size)263 static void clean_dcache_guest_page(void *va, size_t size)
264 {
265 	__apply_guest_page(va, size, __clean_dcache_guest_page);
266 }
267 
invalidate_icache_guest_page(void * va,size_t size)268 static void invalidate_icache_guest_page(void *va, size_t size)
269 {
270 	__apply_guest_page(va, size, __invalidate_icache_guest_page);
271 }
272 
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)273 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
274 {
275 	struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
276 	unsigned long nr_pages;
277 	int ret;
278 
279 	nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
280 	ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
281 	if (ret)
282 		return ret;
283 
284 	hyp_spin_lock_init(&vm->lock);
285 	vm->mm_ops = (struct kvm_pgtable_mm_ops) {
286 		.zalloc_pages_exact	= guest_s2_zalloc_pages_exact,
287 		.free_pages_exact	= guest_s2_free_pages_exact,
288 		.zalloc_page		= guest_s2_zalloc_page,
289 		.phys_to_virt		= hyp_phys_to_virt,
290 		.virt_to_phys		= hyp_virt_to_phys,
291 		.page_count		= hyp_page_count,
292 		.get_page		= guest_s2_get_page,
293 		.put_page		= guest_s2_put_page,
294 		.dcache_clean_inval_poc	= clean_dcache_guest_page,
295 		.icache_inval_pou	= invalidate_icache_guest_page,
296 	};
297 
298 	guest_lock_component(vm);
299 	ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, NULL);
300 	guest_unlock_component(vm);
301 	if (ret)
302 		return ret;
303 
304 	vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
305 
306 	return 0;
307 }
308 
reclaim_pgtable_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)309 void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
310 {
311 	struct hyp_page *page;
312 	void *addr;
313 
314 	/* Dump all pgtable pages in the hyp_pool */
315 	guest_lock_component(vm);
316 	kvm_pgtable_stage2_destroy(&vm->pgt);
317 	vm->kvm.arch.mmu.pgd_phys = 0ULL;
318 	guest_unlock_component(vm);
319 
320 	/* Drain the hyp_pool into the memcache */
321 	addr = hyp_alloc_pages(&vm->pool, 0);
322 	while (addr) {
323 		page = hyp_virt_to_page(addr);
324 		page->refcount = 0;
325 		page->order = 0;
326 		push_hyp_memcache(mc, addr, hyp_virt_to_phys);
327 		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
328 		addr = hyp_alloc_pages(&vm->pool, 0);
329 	}
330 }
331 
__pkvm_prot_finalize(void)332 int __pkvm_prot_finalize(void)
333 {
334 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
335 	struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
336 
337 	if (params->hcr_el2 & HCR_VM)
338 		return -EPERM;
339 
340 	params->vttbr = kvm_get_vttbr(mmu);
341 	params->vtcr = mmu->vtcr;
342 	params->hcr_el2 |= HCR_VM;
343 	if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
344 		params->hcr_el2 |= HCR_FWB;
345 
346 	/*
347 	 * The CMO below not only cleans the updated params to the
348 	 * PoC, but also provides the DSB that ensures ongoing
349 	 * page-table walks that have started before we trapped to EL2
350 	 * have completed.
351 	 */
352 	kvm_flush_dcache_to_poc(params, sizeof(*params));
353 
354 	write_sysreg_hcr(params->hcr_el2);
355 	__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
356 
357 	/*
358 	 * Make sure to have an ISB before the TLB maintenance below but only
359 	 * when __load_stage2() doesn't include one already.
360 	 */
361 	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
362 
363 	/* Invalidate stale HCR bits that may be cached in TLBs */
364 	__tlbi(vmalls12e1);
365 	dsb(nsh);
366 	isb();
367 
368 	return 0;
369 }
370 
host_stage2_unmap_dev_all(void)371 static int host_stage2_unmap_dev_all(void)
372 {
373 	struct kvm_pgtable *pgt = &host_mmu.pgt;
374 	struct memblock_region *reg;
375 	u64 addr = 0;
376 	int i, ret;
377 
378 	/* Unmap all non-memory regions to recycle the pages */
379 	for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
380 		reg = &hyp_memory[i];
381 		ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
382 		if (ret)
383 			return ret;
384 	}
385 	return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
386 }
387 
388 /*
389  * Ensure the PFN range is contained within PA-range.
390  *
391  * This check is also robust to overflows and is therefore a requirement before
392  * using a pfn/nr_pages pair from an untrusted source.
393  */
pfn_range_is_valid(u64 pfn,u64 nr_pages)394 static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)
395 {
396 	u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);
397 
398 	return pfn < limit && ((limit - pfn) >= nr_pages);
399 }
400 
401 struct kvm_mem_range {
402 	u64 start;
403 	u64 end;
404 };
405 
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)406 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
407 {
408 	int cur, left = 0, right = hyp_memblock_nr;
409 	struct memblock_region *reg;
410 	phys_addr_t end;
411 
412 	range->start = 0;
413 	range->end = ULONG_MAX;
414 
415 	/* The list of memblock regions is sorted, binary search it */
416 	while (left < right) {
417 		cur = (left + right) >> 1;
418 		reg = &hyp_memory[cur];
419 		end = reg->base + reg->size;
420 		if (addr < reg->base) {
421 			right = cur;
422 			range->end = reg->base;
423 		} else if (addr >= end) {
424 			left = cur + 1;
425 			range->start = end;
426 		} else {
427 			range->start = reg->base;
428 			range->end = end;
429 			return reg;
430 		}
431 	}
432 
433 	return NULL;
434 }
435 
addr_is_memory(phys_addr_t phys)436 bool addr_is_memory(phys_addr_t phys)
437 {
438 	struct kvm_mem_range range;
439 
440 	return !!find_mem_range(phys, &range);
441 }
442 
is_in_mem_range(u64 addr,struct kvm_mem_range * range)443 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
444 {
445 	return range->start <= addr && addr < range->end;
446 }
447 
check_range_allowed_memory(u64 start,u64 end)448 static int check_range_allowed_memory(u64 start, u64 end)
449 {
450 	struct memblock_region *reg;
451 	struct kvm_mem_range range;
452 
453 	/*
454 	 * Callers can't check the state of a range that overlaps memory and
455 	 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
456 	 */
457 	reg = find_mem_range(start, &range);
458 	if (!is_in_mem_range(end - 1, &range))
459 		return -EINVAL;
460 
461 	if (!reg || reg->flags & MEMBLOCK_NOMAP)
462 		return -EPERM;
463 
464 	return 0;
465 }
466 
range_is_memory(u64 start,u64 end)467 static bool range_is_memory(u64 start, u64 end)
468 {
469 	struct kvm_mem_range r;
470 
471 	if (!find_mem_range(start, &r))
472 		return false;
473 
474 	return is_in_mem_range(end - 1, &r);
475 }
476 
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)477 static inline int __host_stage2_idmap(u64 start, u64 end,
478 				      enum kvm_pgtable_prot prot)
479 {
480 	/*
481 	 * We don't make permission changes to the host idmap after
482 	 * initialisation, so we can squash -EAGAIN to save callers
483 	 * having to treat it like success in the case that they try to
484 	 * map something that is already mapped.
485 	 */
486 	return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
487 				      prot, &host_s2_pool,
488 				      KVM_PGTABLE_WALK_IGNORE_EAGAIN);
489 }
490 
491 /*
492  * The pool has been provided with enough pages to cover all of memory with
493  * page granularity, but it is difficult to know how much of the MMIO range
494  * we will need to cover upfront, so we may need to 'recycle' the pages if we
495  * run out.
496  */
497 #define host_stage2_try(fn, ...)					\
498 	({								\
499 		int __ret;						\
500 		hyp_assert_lock_held(&host_mmu.lock);			\
501 		__ret = fn(__VA_ARGS__);				\
502 		if (__ret == -ENOMEM) {					\
503 			__ret = host_stage2_unmap_dev_all();		\
504 			if (!__ret)					\
505 				__ret = fn(__VA_ARGS__);		\
506 		}							\
507 		__ret;							\
508 	 })
509 
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)510 static inline bool range_included(struct kvm_mem_range *child,
511 				  struct kvm_mem_range *parent)
512 {
513 	return parent->start <= child->start && child->end <= parent->end;
514 }
515 
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)516 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
517 {
518 	struct kvm_mem_range cur;
519 	kvm_pte_t pte;
520 	u64 granule;
521 	s8 level;
522 	int ret;
523 
524 	hyp_assert_lock_held(&host_mmu.lock);
525 	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
526 	if (ret)
527 		return ret;
528 
529 	if (kvm_pte_valid(pte))
530 		return -EEXIST;
531 
532 	if (pte) {
533 		WARN_ON(addr_is_memory(addr) &&
534 			get_host_state(hyp_phys_to_page(addr)) != PKVM_NOPAGE);
535 		return -EPERM;
536 	}
537 
538 	for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {
539 		if (!kvm_level_supports_block_mapping(level))
540 			continue;
541 		granule = kvm_granule_size(level);
542 		cur.start = ALIGN_DOWN(addr, granule);
543 		cur.end = cur.start + granule;
544 		if (!range_included(&cur, range) && level < KVM_PGTABLE_LAST_LEVEL)
545 			continue;
546 		*range = cur;
547 		return 0;
548 	}
549 
550 	WARN_ON(1);
551 
552 	return -EINVAL;
553 }
554 
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)555 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
556 			     enum kvm_pgtable_prot prot)
557 {
558 	return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
559 }
560 
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)561 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
562 {
563 	for_each_hyp_page(page, addr, size)
564 		set_host_state(page, state);
565 }
566 
567 #define KVM_HOST_DONATION_PTE_OWNER_MASK	GENMASK(3, 1)
568 #define KVM_HOST_DONATION_PTE_EXTRA_MASK	GENMASK(59, 4)
host_stage2_set_owner_metadata_locked(phys_addr_t addr,u64 size,u8 owner_id,u64 meta)569 static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
570 						 u8 owner_id, u64 meta)
571 {
572 	kvm_pte_t annotation;
573 	int ret;
574 
575 	if (owner_id == PKVM_ID_HOST)
576 		return -EINVAL;
577 
578 	if (!range_is_memory(addr, addr + size))
579 		return -EPERM;
580 
581 	if (!FIELD_FIT(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id))
582 		return -EINVAL;
583 
584 	if (!FIELD_FIT(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta))
585 		return -EINVAL;
586 
587 	annotation = FIELD_PREP(KVM_HOST_DONATION_PTE_OWNER_MASK, owner_id) |
588 		     FIELD_PREP(KVM_HOST_DONATION_PTE_EXTRA_MASK, meta);
589 	ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
590 			      addr, size, &host_s2_pool,
591 			      KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
592 	if (!ret) {
593 		/*
594 		 * After stage2 maintenance has happened, but before the page
595 		 * owner has changed.
596 		 */
597 		pkvm_sme_dvmsync_fw_call();
598 		__host_update_page_state(addr, size, PKVM_NOPAGE);
599 	}
600 
601 	return ret;
602 }
603 
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)604 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
605 {
606 	int ret = -EINVAL;
607 
608 	switch (owner_id) {
609 	case PKVM_ID_HOST:
610 		if (!range_is_memory(addr, addr + size))
611 			return -EPERM;
612 
613 		ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
614 		if (!ret)
615 			__host_update_page_state(addr, size, PKVM_PAGE_OWNED);
616 		break;
617 	case PKVM_ID_HYP:
618 		ret = host_stage2_set_owner_metadata_locked(addr, size,
619 							    owner_id, 0);
620 		break;
621 	}
622 
623 	return ret;
624 }
625 
626 #define KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK	GENMASK(15, 0)
627 /* We need 40 bits for the GFN to cover a 52-bit IPA with 4k pages and LPA2 */
628 #define KVM_HOST_PTE_OWNER_GUEST_GFN_MASK	GENMASK(55, 16)
host_stage2_encode_gfn_meta(struct pkvm_hyp_vm * vm,u64 gfn)629 static u64 host_stage2_encode_gfn_meta(struct pkvm_hyp_vm *vm, u64 gfn)
630 {
631 	pkvm_handle_t handle = vm->kvm.arch.pkvm.handle;
632 
633 	BUILD_BUG_ON((pkvm_handle_t)-1 > KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK);
634 	WARN_ON(!FIELD_FIT(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn));
635 
636 	return FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, handle) |
637 	       FIELD_PREP(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, gfn);
638 }
639 
host_stage2_decode_gfn_meta(kvm_pte_t pte,struct pkvm_hyp_vm ** vm,u64 * gfn)640 static int host_stage2_decode_gfn_meta(kvm_pte_t pte, struct pkvm_hyp_vm **vm,
641 				       u64 *gfn)
642 {
643 	pkvm_handle_t handle;
644 	u64 meta;
645 
646 	if (WARN_ON(kvm_pte_valid(pte)))
647 		return -EINVAL;
648 
649 	if (FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) !=
650 	    KVM_HOST_INVALID_PTE_TYPE_DONATION) {
651 		return -EINVAL;
652 	}
653 
654 	if (FIELD_GET(KVM_HOST_DONATION_PTE_OWNER_MASK, pte) != PKVM_ID_GUEST)
655 		return -EPERM;
656 
657 	meta = FIELD_GET(KVM_HOST_DONATION_PTE_EXTRA_MASK, pte);
658 	handle = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_HANDLE_MASK, meta);
659 	*vm = get_vm_by_handle(handle);
660 	if (!*vm) {
661 		/* We probably raced with teardown; try again */
662 		return -EAGAIN;
663 	}
664 
665 	*gfn = FIELD_GET(KVM_HOST_PTE_OWNER_GUEST_GFN_MASK, meta);
666 	return 0;
667 }
668 
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)669 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
670 {
671 	/*
672 	 * Block mappings must be used with care in the host stage-2 as a
673 	 * kvm_pgtable_stage2_map() operation targeting a page in the range of
674 	 * an existing block will delete the block under the assumption that
675 	 * mappings in the rest of the block range can always be rebuilt lazily.
676 	 * That assumption is correct for the host stage-2 with RWX mappings
677 	 * targeting memory or RW mappings targeting MMIO ranges (see
678 	 * host_stage2_idmap() below which implements some of the host memory
679 	 * abort logic). However, this is not safe for any other mappings where
680 	 * the host stage-2 page-table is in fact the only place where this
681 	 * state is stored. In all those cases, it is safer to use page-level
682 	 * mappings, hence avoiding to lose the state because of side-effects in
683 	 * kvm_pgtable_stage2_map().
684 	 */
685 	if (range_is_memory(addr, end))
686 		return prot != PKVM_HOST_MEM_PROT;
687 	else
688 		return prot != PKVM_HOST_MMIO_PROT;
689 }
690 
host_stage2_idmap(u64 addr)691 static int host_stage2_idmap(u64 addr)
692 {
693 	struct kvm_mem_range range;
694 	bool is_memory = !!find_mem_range(addr, &range);
695 	enum kvm_pgtable_prot prot;
696 	int ret;
697 
698 	prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
699 
700 	host_lock_component();
701 	ret = host_stage2_adjust_range(addr, &range);
702 	if (ret)
703 		goto unlock;
704 
705 	ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
706 unlock:
707 	host_unlock_component();
708 
709 	return ret;
710 }
711 
host_inject_mem_abort(struct kvm_cpu_context * host_ctxt)712 static void host_inject_mem_abort(struct kvm_cpu_context *host_ctxt)
713 {
714 	u64 ec, esr, spsr;
715 
716 	esr = read_sysreg_el2(SYS_ESR);
717 	spsr = read_sysreg_el2(SYS_SPSR);
718 
719 	/* Repaint the ESR to report a same-level fault if taken from EL1 */
720 	if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) {
721 		ec = ESR_ELx_EC(esr);
722 		if (ec == ESR_ELx_EC_DABT_LOW)
723 			ec = ESR_ELx_EC_DABT_CUR;
724 		else if (ec == ESR_ELx_EC_IABT_LOW)
725 			ec = ESR_ELx_EC_IABT_CUR;
726 		else
727 			WARN_ON(1);
728 		esr &= ~ESR_ELx_EC_MASK;
729 		esr |= ec << ESR_ELx_EC_SHIFT;
730 	}
731 
732 	/*
733 	 * Since S1PTW should only ever be set for stage-2 faults, we're pretty
734 	 * much guaranteed that it won't be set in ESR_EL1 by the hardware. So,
735 	 * let's use that bit to allow the host abort handler to differentiate
736 	 * this abort from normal userspace faults.
737 	 *
738 	 * Note: although S1PTW is RES0 at EL1, it is guaranteed by the
739 	 * architecture to be backed by flops, so it should be safe to use.
740 	 */
741 	esr |= ESR_ELx_S1PTW;
742 	inject_host_exception(esr);
743 }
744 
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)745 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
746 {
747 	struct kvm_vcpu_fault_info fault;
748 	u64 esr, addr;
749 
750 	esr = read_sysreg_el2(SYS_ESR);
751 	if (!__get_fault_info(esr, &fault)) {
752 		/*
753 		 * We've presumably raced with a page-table change which caused
754 		 * AT to fail, try again.
755 		 */
756 		return;
757 	}
758 
759 
760 	/*
761 	 * Yikes, we couldn't resolve the fault IPA. This should reinject an
762 	 * abort into the host when we figure out how to do that.
763 	 */
764 	BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
765 	addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
766 
767 	switch (host_stage2_idmap(addr)) {
768 	case -EPERM:
769 		host_inject_mem_abort(host_ctxt);
770 		fallthrough;
771 	case -EEXIST:
772 	case 0:
773 		break;
774 	default:
775 		BUG();
776 	}
777 }
778 
779 struct check_walk_data {
780 	enum pkvm_page_state	desired;
781 	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte, u64 addr);
782 };
783 
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)784 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
785 				      enum kvm_pgtable_walk_flags visit)
786 {
787 	struct check_walk_data *d = ctx->arg;
788 
789 	return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
790 }
791 
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)792 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
793 				  struct check_walk_data *data)
794 {
795 	struct kvm_pgtable_walker walker = {
796 		.cb	= __check_page_state_visitor,
797 		.arg	= data,
798 		.flags	= KVM_PGTABLE_WALK_LEAF,
799 	};
800 
801 	return kvm_pgtable_walk(pgt, addr, size, &walker);
802 }
803 
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)804 static int __host_check_page_state_range(u64 addr, u64 size,
805 					 enum pkvm_page_state state)
806 {
807 	int ret;
808 
809 	ret = check_range_allowed_memory(addr, addr + size);
810 	if (ret)
811 		return ret;
812 
813 	hyp_assert_lock_held(&host_mmu.lock);
814 
815 	for_each_hyp_page(page, addr, size) {
816 		if (get_host_state(page) != state)
817 			return -EPERM;
818 	}
819 
820 	return 0;
821 }
822 
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)823 static int __host_set_page_state_range(u64 addr, u64 size,
824 				       enum pkvm_page_state state)
825 {
826 	if (get_host_state(hyp_phys_to_page(addr)) == PKVM_NOPAGE) {
827 		int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
828 
829 		if (ret)
830 			return ret;
831 	}
832 
833 	__host_update_page_state(addr, size, state);
834 
835 	return 0;
836 }
837 
__hyp_set_page_state_range(phys_addr_t phys,u64 size,enum pkvm_page_state state)838 static void __hyp_set_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
839 {
840 	for_each_hyp_page(page, phys, size)
841 		set_hyp_state(page, state);
842 }
843 
__hyp_check_page_state_range(phys_addr_t phys,u64 size,enum pkvm_page_state state)844 static int __hyp_check_page_state_range(phys_addr_t phys, u64 size, enum pkvm_page_state state)
845 {
846 	for_each_hyp_page(page, phys, size) {
847 		if (get_hyp_state(page) != state)
848 			return -EPERM;
849 	}
850 
851 	return 0;
852 }
853 
guest_pte_is_poisoned(kvm_pte_t pte)854 static bool guest_pte_is_poisoned(kvm_pte_t pte)
855 {
856 	if (kvm_pte_valid(pte))
857 		return false;
858 
859 	return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) ==
860 	       KVM_GUEST_INVALID_PTE_TYPE_POISONED;
861 }
862 
guest_get_page_state(kvm_pte_t pte,u64 addr)863 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
864 {
865 	if (guest_pte_is_poisoned(pte))
866 		return PKVM_POISON;
867 
868 	if (!kvm_pte_valid(pte))
869 		return PKVM_NOPAGE;
870 
871 	return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
872 }
873 
__guest_check_page_state_range(struct pkvm_hyp_vm * vm,u64 addr,u64 size,enum pkvm_page_state state)874 static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
875 					  u64 size, enum pkvm_page_state state)
876 {
877 	struct check_walk_data d = {
878 		.desired	= state,
879 		.get_page_state	= guest_get_page_state,
880 	};
881 
882 	hyp_assert_lock_held(&vm->lock);
883 	return check_page_state_range(&vm->pgt, addr, size, &d);
884 }
885 
get_valid_guest_pte(struct pkvm_hyp_vm * vm,u64 ipa,kvm_pte_t * ptep,u64 * physp)886 static int get_valid_guest_pte(struct pkvm_hyp_vm *vm, u64 ipa, kvm_pte_t *ptep, u64 *physp)
887 {
888 	kvm_pte_t pte;
889 	u64 phys;
890 	s8 level;
891 	int ret;
892 
893 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
894 	if (ret)
895 		return ret;
896 	if (guest_pte_is_poisoned(pte))
897 		return -EHWPOISON;
898 	if (!kvm_pte_valid(pte))
899 		return -ENOENT;
900 	if (level != KVM_PGTABLE_LAST_LEVEL)
901 		return -E2BIG;
902 
903 	phys = kvm_pte_to_phys(pte);
904 	ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
905 	if (WARN_ON(ret))
906 		return ret;
907 
908 	*ptep = pte;
909 	*physp = phys;
910 
911 	return 0;
912 }
913 
__pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu * hyp_vcpu)914 int __pkvm_vcpu_in_poison_fault(struct pkvm_hyp_vcpu *hyp_vcpu)
915 {
916 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
917 	kvm_pte_t pte;
918 	s8 level;
919 	u64 ipa;
920 	int ret;
921 
922 	switch (kvm_vcpu_trap_get_class(&hyp_vcpu->vcpu)) {
923 	case ESR_ELx_EC_DABT_LOW:
924 	case ESR_ELx_EC_IABT_LOW:
925 		if (kvm_vcpu_trap_is_translation_fault(&hyp_vcpu->vcpu))
926 			break;
927 		fallthrough;
928 	default:
929 		return -EINVAL;
930 	}
931 
932 	/*
933 	 * The host has the faulting IPA when it calls us from the guest
934 	 * fault handler but we retrieve it ourselves from the FAR so as
935 	 * to avoid exposing an "oracle" that could reveal data access
936 	 * patterns of the guest after initial donation of its pages.
937 	 */
938 	ipa = kvm_vcpu_get_fault_ipa(&hyp_vcpu->vcpu);
939 	ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(&hyp_vcpu->vcpu));
940 
941 	guest_lock_component(vm);
942 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
943 	if (ret)
944 		goto unlock;
945 
946 	if (level != KVM_PGTABLE_LAST_LEVEL) {
947 		ret = -EINVAL;
948 		goto unlock;
949 	}
950 
951 	ret = guest_pte_is_poisoned(pte);
952 unlock:
953 	guest_unlock_component(vm);
954 	return ret;
955 }
956 
__pkvm_host_share_hyp(u64 pfn)957 int __pkvm_host_share_hyp(u64 pfn)
958 {
959 	u64 phys = hyp_pfn_to_phys(pfn);
960 	u64 size = PAGE_SIZE;
961 	int ret;
962 
963 	host_lock_component();
964 	hyp_lock_component();
965 
966 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
967 	if (ret)
968 		goto unlock;
969 	ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
970 	if (ret)
971 		goto unlock;
972 
973 	__hyp_set_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
974 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
975 
976 unlock:
977 	hyp_unlock_component();
978 	host_unlock_component();
979 
980 	return ret;
981 }
982 
__pkvm_guest_share_host(struct pkvm_hyp_vcpu * vcpu,u64 gfn)983 int __pkvm_guest_share_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
984 {
985 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
986 	u64 phys, ipa = hyp_pfn_to_phys(gfn);
987 	kvm_pte_t pte;
988 	int ret;
989 
990 	host_lock_component();
991 	guest_lock_component(vm);
992 
993 	ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
994 	if (ret)
995 		goto unlock;
996 
997 	ret = -EPERM;
998 	if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_OWNED)
999 		goto unlock;
1000 	if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE))
1001 		goto unlock;
1002 
1003 	ret = 0;
1004 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1005 				       pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_SHARED_OWNED),
1006 				       &vcpu->vcpu.arch.pkvm_memcache, 0));
1007 	WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
1008 unlock:
1009 	guest_unlock_component(vm);
1010 	host_unlock_component();
1011 
1012 	return ret;
1013 }
1014 
__pkvm_guest_unshare_host(struct pkvm_hyp_vcpu * vcpu,u64 gfn)1015 int __pkvm_guest_unshare_host(struct pkvm_hyp_vcpu *vcpu, u64 gfn)
1016 {
1017 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1018 	u64 meta, phys, ipa = hyp_pfn_to_phys(gfn);
1019 	kvm_pte_t pte;
1020 	int ret;
1021 
1022 	host_lock_component();
1023 	guest_lock_component(vm);
1024 
1025 	ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
1026 	if (ret)
1027 		goto unlock;
1028 
1029 	ret = -EPERM;
1030 	if (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)) != PKVM_PAGE_SHARED_OWNED)
1031 		goto unlock;
1032 	if (__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED))
1033 		goto unlock;
1034 
1035 	ret = 0;
1036 	meta = host_stage2_encode_gfn_meta(vm, gfn);
1037 	WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
1038 						      PKVM_ID_GUEST, meta));
1039 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1040 				       pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
1041 				       &vcpu->vcpu.arch.pkvm_memcache, 0));
1042 unlock:
1043 	guest_unlock_component(vm);
1044 	host_unlock_component();
1045 
1046 	return ret;
1047 }
1048 
__pkvm_host_unshare_hyp(u64 pfn)1049 int __pkvm_host_unshare_hyp(u64 pfn)
1050 {
1051 	u64 phys = hyp_pfn_to_phys(pfn);
1052 	u64 virt = (u64)__hyp_va(phys);
1053 	u64 size = PAGE_SIZE;
1054 	int ret;
1055 
1056 	host_lock_component();
1057 	hyp_lock_component();
1058 
1059 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1060 	if (ret)
1061 		goto unlock;
1062 	ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1063 	if (ret)
1064 		goto unlock;
1065 	if (hyp_page_count((void *)virt)) {
1066 		ret = -EBUSY;
1067 		goto unlock;
1068 	}
1069 
1070 	__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
1071 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
1072 
1073 unlock:
1074 	hyp_unlock_component();
1075 	host_unlock_component();
1076 
1077 	return ret;
1078 }
1079 
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1080 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1081 {
1082 	u64 phys = hyp_pfn_to_phys(pfn);
1083 	u64 size = PAGE_SIZE * nr_pages;
1084 	void *virt = __hyp_va(phys);
1085 	int ret;
1086 
1087 	if (!pfn_range_is_valid(pfn, nr_pages))
1088 		return -EINVAL;
1089 
1090 	host_lock_component();
1091 	hyp_lock_component();
1092 
1093 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1094 	if (ret)
1095 		goto unlock;
1096 	ret = __hyp_check_page_state_range(phys, size, PKVM_NOPAGE);
1097 	if (ret)
1098 		goto unlock;
1099 
1100 	__hyp_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1101 	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, PAGE_HYP));
1102 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
1103 
1104 unlock:
1105 	hyp_unlock_component();
1106 	host_unlock_component();
1107 
1108 	return ret;
1109 }
1110 
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1111 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1112 {
1113 	u64 phys = hyp_pfn_to_phys(pfn);
1114 	u64 size = PAGE_SIZE * nr_pages;
1115 	u64 virt = (u64)__hyp_va(phys);
1116 	int ret;
1117 
1118 	if (!pfn_range_is_valid(pfn, nr_pages))
1119 		return -EINVAL;
1120 
1121 	host_lock_component();
1122 	hyp_lock_component();
1123 
1124 	ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1125 	if (ret)
1126 		goto unlock;
1127 	ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
1128 	if (ret)
1129 		goto unlock;
1130 
1131 	__hyp_set_page_state_range(phys, size, PKVM_NOPAGE);
1132 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
1133 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
1134 
1135 unlock:
1136 	hyp_unlock_component();
1137 	host_unlock_component();
1138 
1139 	return ret;
1140 }
1141 
hyp_pin_shared_mem(void * from,void * to)1142 int hyp_pin_shared_mem(void *from, void *to)
1143 {
1144 	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1145 	u64 end = PAGE_ALIGN((u64)to);
1146 	u64 phys = __hyp_pa(start);
1147 	u64 size = end - start;
1148 	struct hyp_page *p;
1149 	int ret;
1150 
1151 	host_lock_component();
1152 	hyp_lock_component();
1153 
1154 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1155 	if (ret)
1156 		goto unlock;
1157 
1158 	ret = __hyp_check_page_state_range(phys, size, PKVM_PAGE_SHARED_BORROWED);
1159 	if (ret)
1160 		goto unlock;
1161 
1162 	for (cur = start; cur < end; cur += PAGE_SIZE) {
1163 		p = hyp_virt_to_page(cur);
1164 		hyp_page_ref_inc(p);
1165 		if (p->refcount == 1)
1166 			WARN_ON(pkvm_create_mappings_locked((void *)cur,
1167 							    (void *)cur + PAGE_SIZE,
1168 							    PAGE_HYP));
1169 	}
1170 
1171 unlock:
1172 	hyp_unlock_component();
1173 	host_unlock_component();
1174 
1175 	return ret;
1176 }
1177 
hyp_unpin_shared_mem(void * from,void * to)1178 void hyp_unpin_shared_mem(void *from, void *to)
1179 {
1180 	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1181 	u64 end = PAGE_ALIGN((u64)to);
1182 	struct hyp_page *p;
1183 
1184 	host_lock_component();
1185 	hyp_lock_component();
1186 
1187 	for (cur = start; cur < end; cur += PAGE_SIZE) {
1188 		p = hyp_virt_to_page(cur);
1189 		if (p->refcount == 1)
1190 			WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, cur, PAGE_SIZE) != PAGE_SIZE);
1191 		hyp_page_ref_dec(p);
1192 	}
1193 
1194 	hyp_unlock_component();
1195 	host_unlock_component();
1196 }
1197 
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)1198 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
1199 {
1200 	u64 phys = hyp_pfn_to_phys(pfn);
1201 	u64 size = PAGE_SIZE * nr_pages;
1202 	int ret;
1203 
1204 	if (!pfn_range_is_valid(pfn, nr_pages))
1205 		return -EINVAL;
1206 
1207 	host_lock_component();
1208 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
1209 	if (!ret)
1210 		ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1211 	host_unlock_component();
1212 
1213 	return ret;
1214 }
1215 
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)1216 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
1217 {
1218 	u64 phys = hyp_pfn_to_phys(pfn);
1219 	u64 size = PAGE_SIZE * nr_pages;
1220 	int ret;
1221 
1222 	if (!pfn_range_is_valid(pfn, nr_pages))
1223 		return -EINVAL;
1224 
1225 	host_lock_component();
1226 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
1227 	if (!ret)
1228 		ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
1229 	host_unlock_component();
1230 
1231 	return ret;
1232 }
1233 
__guest_check_transition_size(u64 phys,u64 ipa,u64 nr_pages,u64 * size)1234 static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)
1235 {
1236 	size_t block_size;
1237 
1238 	if (nr_pages == 1) {
1239 		*size = PAGE_SIZE;
1240 		return 0;
1241 	}
1242 
1243 	/* We solely support second to last level huge mapping */
1244 	block_size = kvm_granule_size(KVM_PGTABLE_LAST_LEVEL - 1);
1245 
1246 	if (nr_pages != block_size >> PAGE_SHIFT)
1247 		return -EINVAL;
1248 
1249 	if (!IS_ALIGNED(phys | ipa, block_size))
1250 		return -EINVAL;
1251 
1252 	*size = block_size;
1253 	return 0;
1254 }
1255 
hyp_poison_page(phys_addr_t phys)1256 static void hyp_poison_page(phys_addr_t phys)
1257 {
1258 	void *addr = hyp_fixmap_map(phys);
1259 
1260 	memset(addr, 0, PAGE_SIZE);
1261 	/*
1262 	 * Prefer kvm_flush_dcache_to_poc() over __clean_dcache_guest_page()
1263 	 * here as the latter may elide the CMO under the assumption that FWB
1264 	 * will be enabled on CPUs that support it. This is incorrect for the
1265 	 * host stage-2 and would otherwise lead to a malicious host potentially
1266 	 * being able to read the contents of newly reclaimed guest pages.
1267 	 */
1268 	kvm_flush_dcache_to_poc(addr, PAGE_SIZE);
1269 	hyp_fixmap_unmap();
1270 }
1271 
host_stage2_get_guest_info(phys_addr_t phys,struct pkvm_hyp_vm ** vm,u64 * gfn)1272 static int host_stage2_get_guest_info(phys_addr_t phys, struct pkvm_hyp_vm **vm,
1273 				      u64 *gfn)
1274 {
1275 	enum pkvm_page_state state;
1276 	kvm_pte_t pte;
1277 	s8 level;
1278 	int ret;
1279 
1280 	if (!addr_is_memory(phys))
1281 		return -EFAULT;
1282 
1283 	state = get_host_state(hyp_phys_to_page(phys));
1284 	switch (state) {
1285 	case PKVM_PAGE_OWNED:
1286 	case PKVM_PAGE_SHARED_OWNED:
1287 	case PKVM_PAGE_SHARED_BORROWED:
1288 		/* The access should no longer fault; try again. */
1289 		return -EAGAIN;
1290 	case PKVM_NOPAGE:
1291 		break;
1292 	default:
1293 		return -EPERM;
1294 	}
1295 
1296 	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, phys, &pte, &level);
1297 	if (ret)
1298 		return ret;
1299 
1300 	if (WARN_ON(level != KVM_PGTABLE_LAST_LEVEL))
1301 		return -EINVAL;
1302 
1303 	return host_stage2_decode_gfn_meta(pte, vm, gfn);
1304 }
1305 
__pkvm_host_force_reclaim_page_guest(phys_addr_t phys)1306 int __pkvm_host_force_reclaim_page_guest(phys_addr_t phys)
1307 {
1308 	struct pkvm_hyp_vm *vm;
1309 	u64 gfn, ipa, pa;
1310 	kvm_pte_t pte;
1311 	int ret;
1312 
1313 	phys &= PAGE_MASK;
1314 
1315 	hyp_spin_lock(&vm_table_lock);
1316 	host_lock_component();
1317 
1318 	ret = host_stage2_get_guest_info(phys, &vm, &gfn);
1319 	if (ret)
1320 		goto unlock_host;
1321 
1322 	ipa = hyp_pfn_to_phys(gfn);
1323 	guest_lock_component(vm);
1324 	ret = get_valid_guest_pte(vm, ipa, &pte, &pa);
1325 	if (ret)
1326 		goto unlock_guest;
1327 
1328 	WARN_ON(pa != phys);
1329 	if (guest_get_page_state(pte, ipa) != PKVM_PAGE_OWNED) {
1330 		ret = -EPERM;
1331 		goto unlock_guest;
1332 	}
1333 
1334 	/* We really shouldn't be allocating, so don't pass a memcache */
1335 	ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, NULL,
1336 					  KVM_GUEST_INVALID_PTE_TYPE_POISONED,
1337 					  0);
1338 	if (ret)
1339 		goto unlock_guest;
1340 
1341 	hyp_poison_page(phys);
1342 	WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
1343 unlock_guest:
1344 	guest_unlock_component(vm);
1345 unlock_host:
1346 	host_unlock_component();
1347 	hyp_spin_unlock(&vm_table_lock);
1348 
1349 	return ret;
1350 }
1351 
__pkvm_host_reclaim_page_guest(u64 gfn,struct pkvm_hyp_vm * vm)1352 int __pkvm_host_reclaim_page_guest(u64 gfn, struct pkvm_hyp_vm *vm)
1353 {
1354 	u64 ipa = hyp_pfn_to_phys(gfn);
1355 	kvm_pte_t pte;
1356 	u64 phys;
1357 	int ret;
1358 
1359 	host_lock_component();
1360 	guest_lock_component(vm);
1361 
1362 	ret = get_valid_guest_pte(vm, ipa, &pte, &phys);
1363 	if (ret)
1364 		goto unlock;
1365 
1366 	switch (guest_get_page_state(pte, ipa)) {
1367 	case PKVM_PAGE_OWNED:
1368 		WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_NOPAGE));
1369 		hyp_poison_page(phys);
1370 		break;
1371 	case PKVM_PAGE_SHARED_OWNED:
1372 		WARN_ON(__host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_BORROWED));
1373 		break;
1374 	default:
1375 		ret = -EPERM;
1376 		goto unlock;
1377 	}
1378 
1379 	WARN_ON(kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE));
1380 	WARN_ON(host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HOST));
1381 
1382 unlock:
1383 	guest_unlock_component(vm);
1384 	host_unlock_component();
1385 
1386 	/*
1387 	 * -EHWPOISON implies that the page was forcefully reclaimed already
1388 	 * so return success for the GUP pin to be dropped.
1389 	 */
1390 	return ret && ret != -EHWPOISON ? ret : 0;
1391 }
1392 
1393 /*
1394  * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
1395  * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
1396  * bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
1397  * conservative by one for the block.
1398  */
__guest_check_pgtable_memcache(struct pkvm_hyp_vcpu * vcpu)1399 static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
1400 {
1401 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1402 
1403 	if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
1404 		return -ENOMEM;
1405 
1406 	return 0;
1407 }
1408 
__pkvm_host_donate_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu)1409 int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1410 {
1411 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1412 	u64 phys = hyp_pfn_to_phys(pfn);
1413 	u64 ipa = hyp_pfn_to_phys(gfn);
1414 	u64 meta;
1415 	int ret;
1416 
1417 	host_lock_component();
1418 	guest_lock_component(vm);
1419 
1420 	ret = __host_check_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED);
1421 	if (ret)
1422 		goto unlock;
1423 
1424 	ret = __guest_check_page_state_range(vm, ipa, PAGE_SIZE, PKVM_NOPAGE);
1425 	if (ret)
1426 		goto unlock;
1427 
1428 	ret = __guest_check_pgtable_memcache(vcpu);
1429 	if (ret)
1430 		goto unlock;
1431 
1432 	meta = host_stage2_encode_gfn_meta(vm, gfn);
1433 	WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
1434 						      PKVM_ID_GUEST, meta));
1435 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
1436 				       pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED),
1437 				       &vcpu->vcpu.arch.pkvm_memcache, 0));
1438 
1439 unlock:
1440 	guest_unlock_component(vm);
1441 	host_unlock_component();
1442 
1443 	return ret;
1444 }
1445 
__pkvm_host_share_guest(u64 pfn,u64 gfn,u64 nr_pages,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1446 int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
1447 			    enum kvm_pgtable_prot prot)
1448 {
1449 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1450 	u64 phys = hyp_pfn_to_phys(pfn);
1451 	u64 ipa = hyp_pfn_to_phys(gfn);
1452 	u64 size;
1453 	int ret;
1454 
1455 	if (prot & ~KVM_PGTABLE_PROT_RWX)
1456 		return -EINVAL;
1457 
1458 	if (!pfn_range_is_valid(pfn, nr_pages))
1459 		return -EINVAL;
1460 
1461 	ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
1462 	if (ret)
1463 		return ret;
1464 
1465 	ret = check_range_allowed_memory(phys, phys + size);
1466 	if (ret)
1467 		return ret;
1468 
1469 	host_lock_component();
1470 	guest_lock_component(vm);
1471 
1472 	ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
1473 	if (ret)
1474 		goto unlock;
1475 
1476 	for_each_hyp_page(page, phys, size) {
1477 		switch (get_host_state(page)) {
1478 		case PKVM_PAGE_OWNED:
1479 			continue;
1480 		case PKVM_PAGE_SHARED_OWNED:
1481 			if (page->host_share_guest_count == U32_MAX) {
1482 				ret = -EBUSY;
1483 				goto unlock;
1484 			}
1485 
1486 			/* Only host to np-guest multi-sharing is tolerated */
1487 			if (page->host_share_guest_count)
1488 				continue;
1489 
1490 			fallthrough;
1491 		default:
1492 			ret = -EPERM;
1493 			goto unlock;
1494 		}
1495 	}
1496 
1497 	ret = __guest_check_pgtable_memcache(vcpu);
1498 	if (ret)
1499 		goto unlock;
1500 
1501 	for_each_hyp_page(page, phys, size) {
1502 		set_host_state(page, PKVM_PAGE_SHARED_OWNED);
1503 		page->host_share_guest_count++;
1504 	}
1505 
1506 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
1507 				       pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
1508 				       &vcpu->vcpu.arch.pkvm_memcache, 0));
1509 
1510 unlock:
1511 	guest_unlock_component(vm);
1512 	host_unlock_component();
1513 
1514 	return ret;
1515 }
1516 
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa,u64 size)1517 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa, u64 size)
1518 {
1519 	enum pkvm_page_state state;
1520 	kvm_pte_t pte;
1521 	u64 phys;
1522 	s8 level;
1523 	int ret;
1524 
1525 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
1526 	if (ret)
1527 		return ret;
1528 	if (!kvm_pte_valid(pte))
1529 		return -ENOENT;
1530 	if (size && kvm_granule_size(level) != size)
1531 		return -E2BIG;
1532 
1533 	if (!size)
1534 		size = kvm_granule_size(level);
1535 
1536 	state = guest_get_page_state(pte, ipa);
1537 	if (state != PKVM_PAGE_SHARED_BORROWED)
1538 		return -EPERM;
1539 
1540 	phys = kvm_pte_to_phys(pte);
1541 	ret = check_range_allowed_memory(phys, phys + size);
1542 	if (WARN_ON(ret))
1543 		return ret;
1544 
1545 	for_each_hyp_page(page, phys, size) {
1546 		if (get_host_state(page) != PKVM_PAGE_SHARED_OWNED)
1547 			return -EPERM;
1548 		if (WARN_ON(!page->host_share_guest_count))
1549 			return -EINVAL;
1550 	}
1551 
1552 	*__phys = phys;
1553 
1554 	return 0;
1555 }
1556 
__pkvm_host_unshare_guest(u64 gfn,u64 nr_pages,struct pkvm_hyp_vm * vm)1557 int __pkvm_host_unshare_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
1558 {
1559 	u64 ipa = hyp_pfn_to_phys(gfn);
1560 	u64 size, phys;
1561 	int ret;
1562 
1563 	ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1564 	if (ret)
1565 		return ret;
1566 
1567 	host_lock_component();
1568 	guest_lock_component(vm);
1569 
1570 	ret = __check_host_shared_guest(vm, &phys, ipa, size);
1571 	if (ret)
1572 		goto unlock;
1573 
1574 	ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, size);
1575 	if (ret)
1576 		goto unlock;
1577 
1578 	for_each_hyp_page(page, phys, size) {
1579 		/* __check_host_shared_guest() protects against underflow */
1580 		page->host_share_guest_count--;
1581 		if (!page->host_share_guest_count)
1582 			set_host_state(page, PKVM_PAGE_OWNED);
1583 	}
1584 
1585 unlock:
1586 	guest_unlock_component(vm);
1587 	host_unlock_component();
1588 
1589 	return ret;
1590 }
1591 
assert_host_shared_guest(struct pkvm_hyp_vm * vm,u64 ipa,u64 size)1592 static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa, u64 size)
1593 {
1594 	u64 phys;
1595 	int ret;
1596 
1597 	if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
1598 		return;
1599 
1600 	host_lock_component();
1601 	guest_lock_component(vm);
1602 
1603 	ret = __check_host_shared_guest(vm, &phys, ipa, size);
1604 
1605 	guest_unlock_component(vm);
1606 	host_unlock_component();
1607 
1608 	WARN_ON(ret && ret != -ENOENT);
1609 }
1610 
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1611 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
1612 {
1613 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1614 	u64 ipa = hyp_pfn_to_phys(gfn);
1615 	int ret;
1616 
1617 	if (pkvm_hyp_vm_is_protected(vm))
1618 		return -EPERM;
1619 
1620 	if (prot & ~KVM_PGTABLE_PROT_RWX)
1621 		return -EINVAL;
1622 
1623 	assert_host_shared_guest(vm, ipa, 0);
1624 	guest_lock_component(vm);
1625 	ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
1626 	guest_unlock_component(vm);
1627 
1628 	return ret;
1629 }
1630 
__pkvm_host_wrprotect_guest(u64 gfn,u64 nr_pages,struct pkvm_hyp_vm * vm)1631 int __pkvm_host_wrprotect_guest(u64 gfn, u64 nr_pages, struct pkvm_hyp_vm *vm)
1632 {
1633 	u64 size, ipa = hyp_pfn_to_phys(gfn);
1634 	int ret;
1635 
1636 	if (pkvm_hyp_vm_is_protected(vm))
1637 		return -EPERM;
1638 
1639 	ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1640 	if (ret)
1641 		return ret;
1642 
1643 	assert_host_shared_guest(vm, ipa, size);
1644 	guest_lock_component(vm);
1645 	ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, size);
1646 	guest_unlock_component(vm);
1647 
1648 	return ret;
1649 }
1650 
__pkvm_host_test_clear_young_guest(u64 gfn,u64 nr_pages,bool mkold,struct pkvm_hyp_vm * vm)1651 int __pkvm_host_test_clear_young_guest(u64 gfn, u64 nr_pages, bool mkold, struct pkvm_hyp_vm *vm)
1652 {
1653 	u64 size, ipa = hyp_pfn_to_phys(gfn);
1654 	int ret;
1655 
1656 	if (pkvm_hyp_vm_is_protected(vm))
1657 		return -EPERM;
1658 
1659 	ret = __guest_check_transition_size(0, ipa, nr_pages, &size);
1660 	if (ret)
1661 		return ret;
1662 
1663 	assert_host_shared_guest(vm, ipa, size);
1664 	guest_lock_component(vm);
1665 	ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, size, mkold);
1666 	guest_unlock_component(vm);
1667 
1668 	return ret;
1669 }
1670 
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)1671 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1672 {
1673 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1674 	u64 ipa = hyp_pfn_to_phys(gfn);
1675 
1676 	if (pkvm_hyp_vm_is_protected(vm))
1677 		return -EPERM;
1678 
1679 	assert_host_shared_guest(vm, ipa, 0);
1680 	guest_lock_component(vm);
1681 	kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
1682 	guest_unlock_component(vm);
1683 
1684 	return 0;
1685 }
1686 
1687 #ifdef CONFIG_NVHE_EL2_DEBUG
1688 struct pkvm_expected_state {
1689 	enum pkvm_page_state host;
1690 	enum pkvm_page_state hyp;
1691 	enum pkvm_page_state guest[2]; /* [ gfn, gfn + 1 ] */
1692 };
1693 
1694 static struct pkvm_expected_state selftest_state;
1695 static struct hyp_page *selftest_page;
1696 static struct pkvm_hyp_vcpu *selftest_vcpu;
1697 
selftest_ipa(void)1698 static u64 selftest_ipa(void)
1699 {
1700 	return BIT(selftest_vcpu->vcpu.arch.hw_mmu->pgt->ia_bits - 1);
1701 }
1702 
assert_page_state(void)1703 static void assert_page_state(void)
1704 {
1705 	void *virt = hyp_page_to_virt(selftest_page);
1706 	u64 size = PAGE_SIZE << selftest_page->order;
1707 	struct pkvm_hyp_vcpu *vcpu = selftest_vcpu;
1708 	u64 phys = hyp_virt_to_phys(virt);
1709 	u64 ipa[2] = { selftest_ipa(), selftest_ipa() + PAGE_SIZE };
1710 	struct pkvm_hyp_vm *vm;
1711 
1712 	vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1713 
1714 	host_lock_component();
1715 	WARN_ON(__host_check_page_state_range(phys, size, selftest_state.host));
1716 	host_unlock_component();
1717 
1718 	hyp_lock_component();
1719 	WARN_ON(__hyp_check_page_state_range(phys, size, selftest_state.hyp));
1720 	hyp_unlock_component();
1721 
1722 	guest_lock_component(vm);
1723 	WARN_ON(__guest_check_page_state_range(vm, ipa[0], size, selftest_state.guest[0]));
1724 	WARN_ON(__guest_check_page_state_range(vm, ipa[1], size, selftest_state.guest[1]));
1725 	guest_unlock_component(vm);
1726 }
1727 
1728 #define assert_transition_res(res, fn, ...)		\
1729 	do {						\
1730 		WARN_ON(fn(__VA_ARGS__) != res);	\
1731 		assert_page_state();			\
1732 	} while (0)
1733 
pkvm_ownership_selftest(void * base)1734 void pkvm_ownership_selftest(void *base)
1735 {
1736 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_RWX;
1737 	void *virt = hyp_alloc_pages(&host_s2_pool, 0);
1738 	struct pkvm_hyp_vcpu *vcpu;
1739 	u64 phys, size, pfn, gfn;
1740 	struct pkvm_hyp_vm *vm;
1741 
1742 	WARN_ON(!virt);
1743 	selftest_page = hyp_virt_to_page(virt);
1744 	selftest_page->refcount = 0;
1745 	selftest_vcpu = vcpu = init_selftest_vm(base);
1746 	vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1747 
1748 	size = PAGE_SIZE << selftest_page->order;
1749 	phys = hyp_virt_to_phys(virt);
1750 	pfn = hyp_phys_to_pfn(phys);
1751 	gfn = hyp_phys_to_pfn(selftest_ipa());
1752 
1753 	selftest_state.host = PKVM_NOPAGE;
1754 	selftest_state.hyp = PKVM_PAGE_OWNED;
1755 	selftest_state.guest[0] = selftest_state.guest[1] = PKVM_NOPAGE;
1756 	assert_page_state();
1757 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1758 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1759 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1760 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1761 	assert_transition_res(-EPERM,	__pkvm_host_unshare_ffa, pfn, 1);
1762 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
1763 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1764 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, 1, vm);
1765 	assert_transition_res(-EPERM,   __pkvm_host_donate_guest, pfn, gfn, vcpu);
1766 
1767 	selftest_state.host = PKVM_PAGE_OWNED;
1768 	selftest_state.hyp = PKVM_NOPAGE;
1769 	assert_transition_res(0,	__pkvm_hyp_donate_host, pfn, 1);
1770 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1771 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1772 	assert_transition_res(-EPERM,	__pkvm_host_unshare_ffa, pfn, 1);
1773 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, 1, vm);
1774 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
1775 
1776 	selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1777 	selftest_state.hyp = PKVM_PAGE_SHARED_BORROWED;
1778 	assert_transition_res(0,	__pkvm_host_share_hyp, pfn);
1779 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1780 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1781 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1782 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1783 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1784 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, 1, vm);
1785 	assert_transition_res(-EPERM,   __pkvm_host_donate_guest, pfn, gfn, vcpu);
1786 
1787 	assert_transition_res(0,	hyp_pin_shared_mem, virt, virt + size);
1788 	assert_transition_res(0,	hyp_pin_shared_mem, virt, virt + size);
1789 	hyp_unpin_shared_mem(virt, virt + size);
1790 	WARN_ON(hyp_page_count(virt) != 1);
1791 	assert_transition_res(-EBUSY,	__pkvm_host_unshare_hyp, pfn);
1792 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1793 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1794 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1795 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1796 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1797 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, 1, vm);
1798 	assert_transition_res(-EPERM,   __pkvm_host_donate_guest, pfn, gfn, vcpu);
1799 
1800 	hyp_unpin_shared_mem(virt, virt + size);
1801 	assert_page_state();
1802 	WARN_ON(hyp_page_count(virt));
1803 
1804 	selftest_state.host = PKVM_PAGE_OWNED;
1805 	selftest_state.hyp = PKVM_NOPAGE;
1806 	assert_transition_res(0,	__pkvm_host_unshare_hyp, pfn);
1807 
1808 	selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1809 	selftest_state.hyp = PKVM_NOPAGE;
1810 	assert_transition_res(0,	__pkvm_host_share_ffa, pfn, 1);
1811 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1812 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1813 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1814 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1815 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1816 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1817 	assert_transition_res(-ENOENT,	__pkvm_host_unshare_guest, gfn, 1, vm);
1818 	assert_transition_res(-EPERM,   __pkvm_host_donate_guest, pfn, gfn, vcpu);
1819 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
1820 
1821 	selftest_state.host = PKVM_PAGE_OWNED;
1822 	selftest_state.hyp = PKVM_NOPAGE;
1823 	assert_transition_res(0,	__pkvm_host_unshare_ffa, pfn, 1);
1824 	assert_transition_res(-EPERM,	__pkvm_host_unshare_ffa, pfn, 1);
1825 
1826 	selftest_state.host = PKVM_PAGE_SHARED_OWNED;
1827 	selftest_state.guest[0] = PKVM_PAGE_SHARED_BORROWED;
1828 	assert_transition_res(0,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1829 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1830 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1831 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1832 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1833 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1834 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1835 	assert_transition_res(-EPERM,   __pkvm_host_donate_guest, pfn, gfn, vcpu);
1836 	assert_transition_res(-EPERM,	hyp_pin_shared_mem, virt, virt + size);
1837 
1838 	selftest_state.guest[1] = PKVM_PAGE_SHARED_BORROWED;
1839 	assert_transition_res(0,	__pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1840 	WARN_ON(hyp_virt_to_page(virt)->host_share_guest_count != 2);
1841 
1842 	selftest_state.guest[0] = PKVM_NOPAGE;
1843 	assert_transition_res(0,	__pkvm_host_unshare_guest, gfn, 1, vm);
1844 
1845 	selftest_state.guest[1] = PKVM_NOPAGE;
1846 	selftest_state.host = PKVM_PAGE_OWNED;
1847 	assert_transition_res(0,	__pkvm_host_unshare_guest, gfn + 1, 1, vm);
1848 
1849 	selftest_state.host = PKVM_NOPAGE;
1850 	selftest_state.guest[0] = PKVM_PAGE_OWNED;
1851 	assert_transition_res(0,	__pkvm_host_donate_guest, pfn, gfn, vcpu);
1852 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu);
1853 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1854 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1855 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1856 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1857 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1858 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1859 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1860 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1861 
1862 	selftest_state.host = PKVM_PAGE_SHARED_BORROWED;
1863 	selftest_state.guest[0] = PKVM_PAGE_SHARED_OWNED;
1864 	assert_transition_res(0,	__pkvm_guest_share_host, vcpu, gfn);
1865 	assert_transition_res(-EPERM,	__pkvm_guest_share_host, vcpu, gfn);
1866 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu);
1867 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1868 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1869 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1870 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1871 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1872 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1873 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1874 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1875 
1876 	selftest_state.host = PKVM_NOPAGE;
1877 	selftest_state.guest[0] = PKVM_PAGE_OWNED;
1878 	assert_transition_res(0,	__pkvm_guest_unshare_host, vcpu, gfn);
1879 	assert_transition_res(-EPERM,	__pkvm_guest_unshare_host, vcpu, gfn);
1880 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu);
1881 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1882 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1883 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn + 1, 1, vcpu, prot);
1884 	assert_transition_res(-EPERM,	__pkvm_host_share_ffa, pfn, 1);
1885 	assert_transition_res(-EPERM,	__pkvm_host_donate_hyp, pfn, 1);
1886 	assert_transition_res(-EPERM,	__pkvm_host_share_hyp, pfn);
1887 	assert_transition_res(-EPERM,	__pkvm_host_unshare_hyp, pfn);
1888 	assert_transition_res(-EPERM,	__pkvm_hyp_donate_host, pfn, 1);
1889 
1890 	selftest_state.host = PKVM_PAGE_OWNED;
1891 	selftest_state.guest[0] = PKVM_POISON;
1892 	assert_transition_res(0,	__pkvm_host_force_reclaim_page_guest, phys);
1893 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu);
1894 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1895 	assert_transition_res(-EHWPOISON, __pkvm_guest_share_host, vcpu, gfn);
1896 	assert_transition_res(-EHWPOISON, __pkvm_guest_unshare_host, vcpu, gfn);
1897 
1898 	selftest_state.host = PKVM_NOPAGE;
1899 	selftest_state.guest[1] = PKVM_PAGE_OWNED;
1900 	assert_transition_res(0,	__pkvm_host_donate_guest, pfn, gfn + 1, vcpu);
1901 
1902 	selftest_state.host = PKVM_PAGE_OWNED;
1903 	selftest_state.guest[1] = PKVM_NOPAGE;
1904 	assert_transition_res(0,	__pkvm_host_reclaim_page_guest, gfn + 1, vm);
1905 	assert_transition_res(-EPERM,	__pkvm_host_donate_guest, pfn, gfn, vcpu);
1906 	assert_transition_res(-EPERM,	__pkvm_host_share_guest, pfn, gfn, 1, vcpu, prot);
1907 
1908 	selftest_state.host = PKVM_NOPAGE;
1909 	selftest_state.hyp = PKVM_PAGE_OWNED;
1910 	assert_transition_res(0,	__pkvm_host_donate_hyp, pfn, 1);
1911 
1912 	teardown_selftest_vm();
1913 	selftest_page->refcount = 1;
1914 	hyp_put_page(&host_s2_pool, virt);
1915 }
1916 #endif
1917