xref: /linux/arch/arm64/kvm/hyp/nvhe/mem_protect.c (revision 5d6ba5ab8582aa35c1ee98e47af28e6f6772596c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/kvm_pkvm.h>
13 #include <asm/stage2_pgtable.h>
14 
15 #include <hyp/fault.h>
16 
17 #include <nvhe/gfp.h>
18 #include <nvhe/memory.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21 
22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
23 
24 struct host_mmu host_mmu;
25 
26 static struct hyp_pool host_s2_pool;
27 
28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
29 #define current_vm (*this_cpu_ptr(&__current_vm))
30 
guest_lock_component(struct pkvm_hyp_vm * vm)31 static void guest_lock_component(struct pkvm_hyp_vm *vm)
32 {
33 	hyp_spin_lock(&vm->lock);
34 	current_vm = vm;
35 }
36 
guest_unlock_component(struct pkvm_hyp_vm * vm)37 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
38 {
39 	current_vm = NULL;
40 	hyp_spin_unlock(&vm->lock);
41 }
42 
host_lock_component(void)43 static void host_lock_component(void)
44 {
45 	hyp_spin_lock(&host_mmu.lock);
46 }
47 
host_unlock_component(void)48 static void host_unlock_component(void)
49 {
50 	hyp_spin_unlock(&host_mmu.lock);
51 }
52 
hyp_lock_component(void)53 static void hyp_lock_component(void)
54 {
55 	hyp_spin_lock(&pkvm_pgd_lock);
56 }
57 
hyp_unlock_component(void)58 static void hyp_unlock_component(void)
59 {
60 	hyp_spin_unlock(&pkvm_pgd_lock);
61 }
62 
host_s2_zalloc_pages_exact(size_t size)63 static void *host_s2_zalloc_pages_exact(size_t size)
64 {
65 	void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
66 
67 	hyp_split_page(hyp_virt_to_page(addr));
68 
69 	/*
70 	 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
71 	 * so there should be no need to free any of the tail pages to make the
72 	 * allocation exact.
73 	 */
74 	WARN_ON(size != (PAGE_SIZE << get_order(size)));
75 
76 	return addr;
77 }
78 
host_s2_zalloc_page(void * pool)79 static void *host_s2_zalloc_page(void *pool)
80 {
81 	return hyp_alloc_pages(pool, 0);
82 }
83 
host_s2_get_page(void * addr)84 static void host_s2_get_page(void *addr)
85 {
86 	hyp_get_page(&host_s2_pool, addr);
87 }
88 
host_s2_put_page(void * addr)89 static void host_s2_put_page(void *addr)
90 {
91 	hyp_put_page(&host_s2_pool, addr);
92 }
93 
host_s2_free_unlinked_table(void * addr,s8 level)94 static void host_s2_free_unlinked_table(void *addr, s8 level)
95 {
96 	kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
97 }
98 
prepare_s2_pool(void * pgt_pool_base)99 static int prepare_s2_pool(void *pgt_pool_base)
100 {
101 	unsigned long nr_pages, pfn;
102 	int ret;
103 
104 	pfn = hyp_virt_to_pfn(pgt_pool_base);
105 	nr_pages = host_s2_pgtable_pages();
106 	ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
107 	if (ret)
108 		return ret;
109 
110 	host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
111 		.zalloc_pages_exact = host_s2_zalloc_pages_exact,
112 		.zalloc_page = host_s2_zalloc_page,
113 		.free_unlinked_table = host_s2_free_unlinked_table,
114 		.phys_to_virt = hyp_phys_to_virt,
115 		.virt_to_phys = hyp_virt_to_phys,
116 		.page_count = hyp_page_count,
117 		.get_page = host_s2_get_page,
118 		.put_page = host_s2_put_page,
119 	};
120 
121 	return 0;
122 }
123 
prepare_host_vtcr(void)124 static void prepare_host_vtcr(void)
125 {
126 	u32 parange, phys_shift;
127 
128 	/* The host stage 2 is id-mapped, so use parange for T0SZ */
129 	parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
130 	phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
131 
132 	host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
133 					      id_aa64mmfr1_el1_sys_val, phys_shift);
134 }
135 
136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
137 
kvm_host_prepare_stage2(void * pgt_pool_base)138 int kvm_host_prepare_stage2(void *pgt_pool_base)
139 {
140 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
141 	int ret;
142 
143 	prepare_host_vtcr();
144 	hyp_spin_lock_init(&host_mmu.lock);
145 	mmu->arch = &host_mmu.arch;
146 
147 	ret = prepare_s2_pool(pgt_pool_base);
148 	if (ret)
149 		return ret;
150 
151 	ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
152 					&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
153 					host_stage2_force_pte_cb);
154 	if (ret)
155 		return ret;
156 
157 	mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
158 	mmu->pgt = &host_mmu.pgt;
159 	atomic64_set(&mmu->vmid.id, 0);
160 
161 	return 0;
162 }
163 
guest_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
165 				      enum kvm_pgtable_prot prot)
166 {
167 	return true;
168 }
169 
guest_s2_zalloc_pages_exact(size_t size)170 static void *guest_s2_zalloc_pages_exact(size_t size)
171 {
172 	void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size));
173 
174 	WARN_ON(size != (PAGE_SIZE << get_order(size)));
175 	hyp_split_page(hyp_virt_to_page(addr));
176 
177 	return addr;
178 }
179 
guest_s2_free_pages_exact(void * addr,unsigned long size)180 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
181 {
182 	u8 order = get_order(size);
183 	unsigned int i;
184 
185 	for (i = 0; i < (1 << order); i++)
186 		hyp_put_page(&current_vm->pool, addr + (i * PAGE_SIZE));
187 }
188 
guest_s2_zalloc_page(void * mc)189 static void *guest_s2_zalloc_page(void *mc)
190 {
191 	struct hyp_page *p;
192 	void *addr;
193 
194 	addr = hyp_alloc_pages(&current_vm->pool, 0);
195 	if (addr)
196 		return addr;
197 
198 	addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
199 	if (!addr)
200 		return addr;
201 
202 	memset(addr, 0, PAGE_SIZE);
203 	p = hyp_virt_to_page(addr);
204 	p->refcount = 1;
205 	p->order = 0;
206 
207 	return addr;
208 }
209 
guest_s2_get_page(void * addr)210 static void guest_s2_get_page(void *addr)
211 {
212 	hyp_get_page(&current_vm->pool, addr);
213 }
214 
guest_s2_put_page(void * addr)215 static void guest_s2_put_page(void *addr)
216 {
217 	hyp_put_page(&current_vm->pool, addr);
218 }
219 
clean_dcache_guest_page(void * va,size_t size)220 static void clean_dcache_guest_page(void *va, size_t size)
221 {
222 	__clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
223 	hyp_fixmap_unmap();
224 }
225 
invalidate_icache_guest_page(void * va,size_t size)226 static void invalidate_icache_guest_page(void *va, size_t size)
227 {
228 	__invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
229 	hyp_fixmap_unmap();
230 }
231 
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
233 {
234 	struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
235 	unsigned long nr_pages;
236 	int ret;
237 
238 	nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
239 	ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
240 	if (ret)
241 		return ret;
242 
243 	hyp_spin_lock_init(&vm->lock);
244 	vm->mm_ops = (struct kvm_pgtable_mm_ops) {
245 		.zalloc_pages_exact	= guest_s2_zalloc_pages_exact,
246 		.free_pages_exact	= guest_s2_free_pages_exact,
247 		.zalloc_page		= guest_s2_zalloc_page,
248 		.phys_to_virt		= hyp_phys_to_virt,
249 		.virt_to_phys		= hyp_virt_to_phys,
250 		.page_count		= hyp_page_count,
251 		.get_page		= guest_s2_get_page,
252 		.put_page		= guest_s2_put_page,
253 		.dcache_clean_inval_poc	= clean_dcache_guest_page,
254 		.icache_inval_pou	= invalidate_icache_guest_page,
255 	};
256 
257 	guest_lock_component(vm);
258 	ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
259 					guest_stage2_force_pte_cb);
260 	guest_unlock_component(vm);
261 	if (ret)
262 		return ret;
263 
264 	vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
265 
266 	return 0;
267 }
268 
reclaim_pgtable_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)269 void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
270 {
271 	struct hyp_page *page;
272 	void *addr;
273 
274 	/* Dump all pgtable pages in the hyp_pool */
275 	guest_lock_component(vm);
276 	kvm_pgtable_stage2_destroy(&vm->pgt);
277 	vm->kvm.arch.mmu.pgd_phys = 0ULL;
278 	guest_unlock_component(vm);
279 
280 	/* Drain the hyp_pool into the memcache */
281 	addr = hyp_alloc_pages(&vm->pool, 0);
282 	while (addr) {
283 		page = hyp_virt_to_page(addr);
284 		page->refcount = 0;
285 		page->order = 0;
286 		push_hyp_memcache(mc, addr, hyp_virt_to_phys);
287 		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
288 		addr = hyp_alloc_pages(&vm->pool, 0);
289 	}
290 }
291 
__pkvm_prot_finalize(void)292 int __pkvm_prot_finalize(void)
293 {
294 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
295 	struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
296 
297 	if (params->hcr_el2 & HCR_VM)
298 		return -EPERM;
299 
300 	params->vttbr = kvm_get_vttbr(mmu);
301 	params->vtcr = mmu->vtcr;
302 	params->hcr_el2 |= HCR_VM;
303 
304 	/*
305 	 * The CMO below not only cleans the updated params to the
306 	 * PoC, but also provides the DSB that ensures ongoing
307 	 * page-table walks that have started before we trapped to EL2
308 	 * have completed.
309 	 */
310 	kvm_flush_dcache_to_poc(params, sizeof(*params));
311 
312 	write_sysreg(params->hcr_el2, hcr_el2);
313 	__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
314 
315 	/*
316 	 * Make sure to have an ISB before the TLB maintenance below but only
317 	 * when __load_stage2() doesn't include one already.
318 	 */
319 	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
320 
321 	/* Invalidate stale HCR bits that may be cached in TLBs */
322 	__tlbi(vmalls12e1);
323 	dsb(nsh);
324 	isb();
325 
326 	return 0;
327 }
328 
host_stage2_unmap_dev_all(void)329 static int host_stage2_unmap_dev_all(void)
330 {
331 	struct kvm_pgtable *pgt = &host_mmu.pgt;
332 	struct memblock_region *reg;
333 	u64 addr = 0;
334 	int i, ret;
335 
336 	/* Unmap all non-memory regions to recycle the pages */
337 	for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
338 		reg = &hyp_memory[i];
339 		ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
340 		if (ret)
341 			return ret;
342 	}
343 	return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
344 }
345 
346 struct kvm_mem_range {
347 	u64 start;
348 	u64 end;
349 };
350 
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)351 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
352 {
353 	int cur, left = 0, right = hyp_memblock_nr;
354 	struct memblock_region *reg;
355 	phys_addr_t end;
356 
357 	range->start = 0;
358 	range->end = ULONG_MAX;
359 
360 	/* The list of memblock regions is sorted, binary search it */
361 	while (left < right) {
362 		cur = (left + right) >> 1;
363 		reg = &hyp_memory[cur];
364 		end = reg->base + reg->size;
365 		if (addr < reg->base) {
366 			right = cur;
367 			range->end = reg->base;
368 		} else if (addr >= end) {
369 			left = cur + 1;
370 			range->start = end;
371 		} else {
372 			range->start = reg->base;
373 			range->end = end;
374 			return reg;
375 		}
376 	}
377 
378 	return NULL;
379 }
380 
addr_is_memory(phys_addr_t phys)381 bool addr_is_memory(phys_addr_t phys)
382 {
383 	struct kvm_mem_range range;
384 
385 	return !!find_mem_range(phys, &range);
386 }
387 
is_in_mem_range(u64 addr,struct kvm_mem_range * range)388 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
389 {
390 	return range->start <= addr && addr < range->end;
391 }
392 
check_range_allowed_memory(u64 start,u64 end)393 static int check_range_allowed_memory(u64 start, u64 end)
394 {
395 	struct memblock_region *reg;
396 	struct kvm_mem_range range;
397 
398 	/*
399 	 * Callers can't check the state of a range that overlaps memory and
400 	 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
401 	 */
402 	reg = find_mem_range(start, &range);
403 	if (!is_in_mem_range(end - 1, &range))
404 		return -EINVAL;
405 
406 	if (!reg || reg->flags & MEMBLOCK_NOMAP)
407 		return -EPERM;
408 
409 	return 0;
410 }
411 
range_is_memory(u64 start,u64 end)412 static bool range_is_memory(u64 start, u64 end)
413 {
414 	struct kvm_mem_range r;
415 
416 	if (!find_mem_range(start, &r))
417 		return false;
418 
419 	return is_in_mem_range(end - 1, &r);
420 }
421 
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)422 static inline int __host_stage2_idmap(u64 start, u64 end,
423 				      enum kvm_pgtable_prot prot)
424 {
425 	return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
426 				      prot, &host_s2_pool, 0);
427 }
428 
429 /*
430  * The pool has been provided with enough pages to cover all of memory with
431  * page granularity, but it is difficult to know how much of the MMIO range
432  * we will need to cover upfront, so we may need to 'recycle' the pages if we
433  * run out.
434  */
435 #define host_stage2_try(fn, ...)					\
436 	({								\
437 		int __ret;						\
438 		hyp_assert_lock_held(&host_mmu.lock);			\
439 		__ret = fn(__VA_ARGS__);				\
440 		if (__ret == -ENOMEM) {					\
441 			__ret = host_stage2_unmap_dev_all();		\
442 			if (!__ret)					\
443 				__ret = fn(__VA_ARGS__);		\
444 		}							\
445 		__ret;							\
446 	 })
447 
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)448 static inline bool range_included(struct kvm_mem_range *child,
449 				  struct kvm_mem_range *parent)
450 {
451 	return parent->start <= child->start && child->end <= parent->end;
452 }
453 
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)454 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
455 {
456 	struct kvm_mem_range cur;
457 	kvm_pte_t pte;
458 	s8 level;
459 	int ret;
460 
461 	hyp_assert_lock_held(&host_mmu.lock);
462 	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
463 	if (ret)
464 		return ret;
465 
466 	if (kvm_pte_valid(pte))
467 		return -EAGAIN;
468 
469 	if (pte) {
470 		WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
471 		return -EPERM;
472 	}
473 
474 	do {
475 		u64 granule = kvm_granule_size(level);
476 		cur.start = ALIGN_DOWN(addr, granule);
477 		cur.end = cur.start + granule;
478 		level++;
479 	} while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
480 			!(kvm_level_supports_block_mapping(level) &&
481 			  range_included(&cur, range)));
482 
483 	*range = cur;
484 
485 	return 0;
486 }
487 
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)488 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
489 			     enum kvm_pgtable_prot prot)
490 {
491 	return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
492 }
493 
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)494 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
495 {
496 	phys_addr_t end = addr + size;
497 
498 	for (; addr < end; addr += PAGE_SIZE)
499 		hyp_phys_to_page(addr)->host_state = state;
500 }
501 
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)502 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
503 {
504 	int ret;
505 
506 	if (!addr_is_memory(addr))
507 		return -EPERM;
508 
509 	ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
510 			      addr, size, &host_s2_pool, owner_id);
511 	if (ret)
512 		return ret;
513 
514 	/* Don't forget to update the vmemmap tracking for the host */
515 	if (owner_id == PKVM_ID_HOST)
516 		__host_update_page_state(addr, size, PKVM_PAGE_OWNED);
517 	else
518 		__host_update_page_state(addr, size, PKVM_NOPAGE);
519 
520 	return 0;
521 }
522 
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)523 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
524 {
525 	/*
526 	 * Block mappings must be used with care in the host stage-2 as a
527 	 * kvm_pgtable_stage2_map() operation targeting a page in the range of
528 	 * an existing block will delete the block under the assumption that
529 	 * mappings in the rest of the block range can always be rebuilt lazily.
530 	 * That assumption is correct for the host stage-2 with RWX mappings
531 	 * targeting memory or RW mappings targeting MMIO ranges (see
532 	 * host_stage2_idmap() below which implements some of the host memory
533 	 * abort logic). However, this is not safe for any other mappings where
534 	 * the host stage-2 page-table is in fact the only place where this
535 	 * state is stored. In all those cases, it is safer to use page-level
536 	 * mappings, hence avoiding to lose the state because of side-effects in
537 	 * kvm_pgtable_stage2_map().
538 	 */
539 	if (range_is_memory(addr, end))
540 		return prot != PKVM_HOST_MEM_PROT;
541 	else
542 		return prot != PKVM_HOST_MMIO_PROT;
543 }
544 
host_stage2_idmap(u64 addr)545 static int host_stage2_idmap(u64 addr)
546 {
547 	struct kvm_mem_range range;
548 	bool is_memory = !!find_mem_range(addr, &range);
549 	enum kvm_pgtable_prot prot;
550 	int ret;
551 
552 	prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
553 
554 	host_lock_component();
555 	ret = host_stage2_adjust_range(addr, &range);
556 	if (ret)
557 		goto unlock;
558 
559 	ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
560 unlock:
561 	host_unlock_component();
562 
563 	return ret;
564 }
565 
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)566 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
567 {
568 	struct kvm_vcpu_fault_info fault;
569 	u64 esr, addr;
570 	int ret = 0;
571 
572 	esr = read_sysreg_el2(SYS_ESR);
573 	if (!__get_fault_info(esr, &fault)) {
574 		/*
575 		 * We've presumably raced with a page-table change which caused
576 		 * AT to fail, try again.
577 		 */
578 		return;
579 	}
580 
581 	addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
582 	ret = host_stage2_idmap(addr);
583 	BUG_ON(ret && ret != -EAGAIN);
584 }
585 
586 struct check_walk_data {
587 	enum pkvm_page_state	desired;
588 	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte, u64 addr);
589 };
590 
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)591 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
592 				      enum kvm_pgtable_walk_flags visit)
593 {
594 	struct check_walk_data *d = ctx->arg;
595 
596 	return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
597 }
598 
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)599 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
600 				  struct check_walk_data *data)
601 {
602 	struct kvm_pgtable_walker walker = {
603 		.cb	= __check_page_state_visitor,
604 		.arg	= data,
605 		.flags	= KVM_PGTABLE_WALK_LEAF,
606 	};
607 
608 	return kvm_pgtable_walk(pgt, addr, size, &walker);
609 }
610 
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)611 static int __host_check_page_state_range(u64 addr, u64 size,
612 					 enum pkvm_page_state state)
613 {
614 	u64 end = addr + size;
615 	int ret;
616 
617 	ret = check_range_allowed_memory(addr, end);
618 	if (ret)
619 		return ret;
620 
621 	hyp_assert_lock_held(&host_mmu.lock);
622 	for (; addr < end; addr += PAGE_SIZE) {
623 		if (hyp_phys_to_page(addr)->host_state != state)
624 			return -EPERM;
625 	}
626 
627 	return 0;
628 }
629 
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)630 static int __host_set_page_state_range(u64 addr, u64 size,
631 				       enum pkvm_page_state state)
632 {
633 	if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
634 		int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
635 
636 		if (ret)
637 			return ret;
638 	}
639 
640 	__host_update_page_state(addr, size, state);
641 
642 	return 0;
643 }
644 
hyp_get_page_state(kvm_pte_t pte,u64 addr)645 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
646 {
647 	if (!kvm_pte_valid(pte))
648 		return PKVM_NOPAGE;
649 
650 	return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
651 }
652 
__hyp_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)653 static int __hyp_check_page_state_range(u64 addr, u64 size,
654 					enum pkvm_page_state state)
655 {
656 	struct check_walk_data d = {
657 		.desired	= state,
658 		.get_page_state	= hyp_get_page_state,
659 	};
660 
661 	hyp_assert_lock_held(&pkvm_pgd_lock);
662 	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
663 }
664 
guest_get_page_state(kvm_pte_t pte,u64 addr)665 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
666 {
667 	if (!kvm_pte_valid(pte))
668 		return PKVM_NOPAGE;
669 
670 	return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
671 }
672 
__guest_check_page_state_range(struct pkvm_hyp_vcpu * vcpu,u64 addr,u64 size,enum pkvm_page_state state)673 static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
674 					  u64 size, enum pkvm_page_state state)
675 {
676 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
677 	struct check_walk_data d = {
678 		.desired	= state,
679 		.get_page_state	= guest_get_page_state,
680 	};
681 
682 	hyp_assert_lock_held(&vm->lock);
683 	return check_page_state_range(&vm->pgt, addr, size, &d);
684 }
685 
__pkvm_host_share_hyp(u64 pfn)686 int __pkvm_host_share_hyp(u64 pfn)
687 {
688 	u64 phys = hyp_pfn_to_phys(pfn);
689 	void *virt = __hyp_va(phys);
690 	enum kvm_pgtable_prot prot;
691 	u64 size = PAGE_SIZE;
692 	int ret;
693 
694 	host_lock_component();
695 	hyp_lock_component();
696 
697 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
698 	if (ret)
699 		goto unlock;
700 	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
701 		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
702 		if (ret)
703 			goto unlock;
704 	}
705 
706 	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
707 	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
708 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
709 
710 unlock:
711 	hyp_unlock_component();
712 	host_unlock_component();
713 
714 	return ret;
715 }
716 
__pkvm_host_unshare_hyp(u64 pfn)717 int __pkvm_host_unshare_hyp(u64 pfn)
718 {
719 	u64 phys = hyp_pfn_to_phys(pfn);
720 	u64 virt = (u64)__hyp_va(phys);
721 	u64 size = PAGE_SIZE;
722 	int ret;
723 
724 	host_lock_component();
725 	hyp_lock_component();
726 
727 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
728 	if (ret)
729 		goto unlock;
730 	ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
731 	if (ret)
732 		goto unlock;
733 	if (hyp_page_count((void *)virt)) {
734 		ret = -EBUSY;
735 		goto unlock;
736 	}
737 
738 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
739 	WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
740 
741 unlock:
742 	hyp_unlock_component();
743 	host_unlock_component();
744 
745 	return ret;
746 }
747 
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)748 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
749 {
750 	u64 phys = hyp_pfn_to_phys(pfn);
751 	u64 size = PAGE_SIZE * nr_pages;
752 	void *virt = __hyp_va(phys);
753 	enum kvm_pgtable_prot prot;
754 	int ret;
755 
756 	host_lock_component();
757 	hyp_lock_component();
758 
759 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
760 	if (ret)
761 		goto unlock;
762 	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
763 		ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
764 		if (ret)
765 			goto unlock;
766 	}
767 
768 	prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
769 	WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
770 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
771 
772 unlock:
773 	hyp_unlock_component();
774 	host_unlock_component();
775 
776 	return ret;
777 }
778 
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)779 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
780 {
781 	u64 phys = hyp_pfn_to_phys(pfn);
782 	u64 size = PAGE_SIZE * nr_pages;
783 	u64 virt = (u64)__hyp_va(phys);
784 	int ret;
785 
786 	host_lock_component();
787 	hyp_lock_component();
788 
789 	ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
790 	if (ret)
791 		goto unlock;
792 	if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
793 		ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
794 		if (ret)
795 			goto unlock;
796 	}
797 
798 	WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
799 	WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
800 
801 unlock:
802 	hyp_unlock_component();
803 	host_unlock_component();
804 
805 	return ret;
806 }
807 
hyp_pin_shared_mem(void * from,void * to)808 int hyp_pin_shared_mem(void *from, void *to)
809 {
810 	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
811 	u64 end = PAGE_ALIGN((u64)to);
812 	u64 size = end - start;
813 	int ret;
814 
815 	host_lock_component();
816 	hyp_lock_component();
817 
818 	ret = __host_check_page_state_range(__hyp_pa(start), size,
819 					    PKVM_PAGE_SHARED_OWNED);
820 	if (ret)
821 		goto unlock;
822 
823 	ret = __hyp_check_page_state_range(start, size,
824 					   PKVM_PAGE_SHARED_BORROWED);
825 	if (ret)
826 		goto unlock;
827 
828 	for (cur = start; cur < end; cur += PAGE_SIZE)
829 		hyp_page_ref_inc(hyp_virt_to_page(cur));
830 
831 unlock:
832 	hyp_unlock_component();
833 	host_unlock_component();
834 
835 	return ret;
836 }
837 
hyp_unpin_shared_mem(void * from,void * to)838 void hyp_unpin_shared_mem(void *from, void *to)
839 {
840 	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
841 	u64 end = PAGE_ALIGN((u64)to);
842 
843 	host_lock_component();
844 	hyp_lock_component();
845 
846 	for (cur = start; cur < end; cur += PAGE_SIZE)
847 		hyp_page_ref_dec(hyp_virt_to_page(cur));
848 
849 	hyp_unlock_component();
850 	host_unlock_component();
851 }
852 
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)853 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
854 {
855 	u64 phys = hyp_pfn_to_phys(pfn);
856 	u64 size = PAGE_SIZE * nr_pages;
857 	int ret;
858 
859 	host_lock_component();
860 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
861 	if (!ret)
862 		ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
863 	host_unlock_component();
864 
865 	return ret;
866 }
867 
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)868 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
869 {
870 	u64 phys = hyp_pfn_to_phys(pfn);
871 	u64 size = PAGE_SIZE * nr_pages;
872 	int ret;
873 
874 	host_lock_component();
875 	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
876 	if (!ret)
877 		ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
878 	host_unlock_component();
879 
880 	return ret;
881 }
882 
__pkvm_host_share_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)883 int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
884 			    enum kvm_pgtable_prot prot)
885 {
886 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
887 	u64 phys = hyp_pfn_to_phys(pfn);
888 	u64 ipa = hyp_pfn_to_phys(gfn);
889 	struct hyp_page *page;
890 	int ret;
891 
892 	if (prot & ~KVM_PGTABLE_PROT_RWX)
893 		return -EINVAL;
894 
895 	ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
896 	if (ret)
897 		return ret;
898 
899 	host_lock_component();
900 	guest_lock_component(vm);
901 
902 	ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
903 	if (ret)
904 		goto unlock;
905 
906 	page = hyp_phys_to_page(phys);
907 	switch (page->host_state) {
908 	case PKVM_PAGE_OWNED:
909 		WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
910 		break;
911 	case PKVM_PAGE_SHARED_OWNED:
912 		if (page->host_share_guest_count)
913 			break;
914 		/* Only host to np-guest multi-sharing is tolerated */
915 		WARN_ON(1);
916 		fallthrough;
917 	default:
918 		ret = -EPERM;
919 		goto unlock;
920 	}
921 
922 	WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
923 				       pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
924 				       &vcpu->vcpu.arch.pkvm_memcache, 0));
925 	page->host_share_guest_count++;
926 
927 unlock:
928 	guest_unlock_component(vm);
929 	host_unlock_component();
930 
931 	return ret;
932 }
933 
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa)934 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa)
935 {
936 	enum pkvm_page_state state;
937 	struct hyp_page *page;
938 	kvm_pte_t pte;
939 	u64 phys;
940 	s8 level;
941 	int ret;
942 
943 	ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
944 	if (ret)
945 		return ret;
946 	if (!kvm_pte_valid(pte))
947 		return -ENOENT;
948 	if (level != KVM_PGTABLE_LAST_LEVEL)
949 		return -E2BIG;
950 
951 	state = guest_get_page_state(pte, ipa);
952 	if (state != PKVM_PAGE_SHARED_BORROWED)
953 		return -EPERM;
954 
955 	phys = kvm_pte_to_phys(pte);
956 	ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
957 	if (WARN_ON(ret))
958 		return ret;
959 
960 	page = hyp_phys_to_page(phys);
961 	if (page->host_state != PKVM_PAGE_SHARED_OWNED)
962 		return -EPERM;
963 	if (WARN_ON(!page->host_share_guest_count))
964 		return -EINVAL;
965 
966 	*__phys = phys;
967 
968 	return 0;
969 }
970 
__pkvm_host_unshare_guest(u64 gfn,struct pkvm_hyp_vm * vm)971 int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm)
972 {
973 	u64 ipa = hyp_pfn_to_phys(gfn);
974 	struct hyp_page *page;
975 	u64 phys;
976 	int ret;
977 
978 	host_lock_component();
979 	guest_lock_component(vm);
980 
981 	ret = __check_host_shared_guest(vm, &phys, ipa);
982 	if (ret)
983 		goto unlock;
984 
985 	ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
986 	if (ret)
987 		goto unlock;
988 
989 	page = hyp_phys_to_page(phys);
990 	page->host_share_guest_count--;
991 	if (!page->host_share_guest_count)
992 		WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED));
993 
994 unlock:
995 	guest_unlock_component(vm);
996 	host_unlock_component();
997 
998 	return ret;
999 }
1000 
assert_host_shared_guest(struct pkvm_hyp_vm * vm,u64 ipa)1001 static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
1002 {
1003 	u64 phys;
1004 	int ret;
1005 
1006 	if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
1007 		return;
1008 
1009 	host_lock_component();
1010 	guest_lock_component(vm);
1011 
1012 	ret = __check_host_shared_guest(vm, &phys, ipa);
1013 
1014 	guest_unlock_component(vm);
1015 	host_unlock_component();
1016 
1017 	WARN_ON(ret && ret != -ENOENT);
1018 }
1019 
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1020 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
1021 {
1022 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1023 	u64 ipa = hyp_pfn_to_phys(gfn);
1024 	int ret;
1025 
1026 	if (pkvm_hyp_vm_is_protected(vm))
1027 		return -EPERM;
1028 
1029 	if (prot & ~KVM_PGTABLE_PROT_RWX)
1030 		return -EINVAL;
1031 
1032 	assert_host_shared_guest(vm, ipa);
1033 	guest_lock_component(vm);
1034 	ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
1035 	guest_unlock_component(vm);
1036 
1037 	return ret;
1038 }
1039 
__pkvm_host_wrprotect_guest(u64 gfn,struct pkvm_hyp_vm * vm)1040 int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
1041 {
1042 	u64 ipa = hyp_pfn_to_phys(gfn);
1043 	int ret;
1044 
1045 	if (pkvm_hyp_vm_is_protected(vm))
1046 		return -EPERM;
1047 
1048 	assert_host_shared_guest(vm, ipa);
1049 	guest_lock_component(vm);
1050 	ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
1051 	guest_unlock_component(vm);
1052 
1053 	return ret;
1054 }
1055 
__pkvm_host_test_clear_young_guest(u64 gfn,bool mkold,struct pkvm_hyp_vm * vm)1056 int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
1057 {
1058 	u64 ipa = hyp_pfn_to_phys(gfn);
1059 	int ret;
1060 
1061 	if (pkvm_hyp_vm_is_protected(vm))
1062 		return -EPERM;
1063 
1064 	assert_host_shared_guest(vm, ipa);
1065 	guest_lock_component(vm);
1066 	ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
1067 	guest_unlock_component(vm);
1068 
1069 	return ret;
1070 }
1071 
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)1072 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1073 {
1074 	struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1075 	u64 ipa = hyp_pfn_to_phys(gfn);
1076 
1077 	if (pkvm_hyp_vm_is_protected(vm))
1078 		return -EPERM;
1079 
1080 	assert_host_shared_guest(vm, ipa);
1081 	guest_lock_component(vm);
1082 	kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
1083 	guest_unlock_component(vm);
1084 
1085 	return 0;
1086 }
1087