1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2020 Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/kvm_pkvm.h>
13 #include <asm/stage2_pgtable.h>
14
15 #include <hyp/fault.h>
16
17 #include <nvhe/gfp.h>
18 #include <nvhe/memory.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21
22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
23
24 struct host_mmu host_mmu;
25
26 static struct hyp_pool host_s2_pool;
27
28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
29 #define current_vm (*this_cpu_ptr(&__current_vm))
30
guest_lock_component(struct pkvm_hyp_vm * vm)31 static void guest_lock_component(struct pkvm_hyp_vm *vm)
32 {
33 hyp_spin_lock(&vm->lock);
34 current_vm = vm;
35 }
36
guest_unlock_component(struct pkvm_hyp_vm * vm)37 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
38 {
39 current_vm = NULL;
40 hyp_spin_unlock(&vm->lock);
41 }
42
host_lock_component(void)43 static void host_lock_component(void)
44 {
45 hyp_spin_lock(&host_mmu.lock);
46 }
47
host_unlock_component(void)48 static void host_unlock_component(void)
49 {
50 hyp_spin_unlock(&host_mmu.lock);
51 }
52
hyp_lock_component(void)53 static void hyp_lock_component(void)
54 {
55 hyp_spin_lock(&pkvm_pgd_lock);
56 }
57
hyp_unlock_component(void)58 static void hyp_unlock_component(void)
59 {
60 hyp_spin_unlock(&pkvm_pgd_lock);
61 }
62
host_s2_zalloc_pages_exact(size_t size)63 static void *host_s2_zalloc_pages_exact(size_t size)
64 {
65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
66
67 hyp_split_page(hyp_virt_to_page(addr));
68
69 /*
70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
71 * so there should be no need to free any of the tail pages to make the
72 * allocation exact.
73 */
74 WARN_ON(size != (PAGE_SIZE << get_order(size)));
75
76 return addr;
77 }
78
host_s2_zalloc_page(void * pool)79 static void *host_s2_zalloc_page(void *pool)
80 {
81 return hyp_alloc_pages(pool, 0);
82 }
83
host_s2_get_page(void * addr)84 static void host_s2_get_page(void *addr)
85 {
86 hyp_get_page(&host_s2_pool, addr);
87 }
88
host_s2_put_page(void * addr)89 static void host_s2_put_page(void *addr)
90 {
91 hyp_put_page(&host_s2_pool, addr);
92 }
93
host_s2_free_unlinked_table(void * addr,s8 level)94 static void host_s2_free_unlinked_table(void *addr, s8 level)
95 {
96 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level);
97 }
98
prepare_s2_pool(void * pgt_pool_base)99 static int prepare_s2_pool(void *pgt_pool_base)
100 {
101 unsigned long nr_pages, pfn;
102 int ret;
103
104 pfn = hyp_virt_to_pfn(pgt_pool_base);
105 nr_pages = host_s2_pgtable_pages();
106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
107 if (ret)
108 return ret;
109
110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
111 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
112 .zalloc_page = host_s2_zalloc_page,
113 .free_unlinked_table = host_s2_free_unlinked_table,
114 .phys_to_virt = hyp_phys_to_virt,
115 .virt_to_phys = hyp_virt_to_phys,
116 .page_count = hyp_page_count,
117 .get_page = host_s2_get_page,
118 .put_page = host_s2_put_page,
119 };
120
121 return 0;
122 }
123
prepare_host_vtcr(void)124 static void prepare_host_vtcr(void)
125 {
126 u32 parange, phys_shift;
127
128 /* The host stage 2 is id-mapped, so use parange for T0SZ */
129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
131
132 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
133 id_aa64mmfr1_el1_sys_val, phys_shift);
134 }
135
136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
137
kvm_host_prepare_stage2(void * pgt_pool_base)138 int kvm_host_prepare_stage2(void *pgt_pool_base)
139 {
140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
141 int ret;
142
143 prepare_host_vtcr();
144 hyp_spin_lock_init(&host_mmu.lock);
145 mmu->arch = &host_mmu.arch;
146
147 ret = prepare_s2_pool(pgt_pool_base);
148 if (ret)
149 return ret;
150
151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
153 host_stage2_force_pte_cb);
154 if (ret)
155 return ret;
156
157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
158 mmu->pgt = &host_mmu.pgt;
159 atomic64_set(&mmu->vmid.id, 0);
160
161 return 0;
162 }
163
guest_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
165 enum kvm_pgtable_prot prot)
166 {
167 return true;
168 }
169
guest_s2_zalloc_pages_exact(size_t size)170 static void *guest_s2_zalloc_pages_exact(size_t size)
171 {
172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
173
174 WARN_ON(size != (PAGE_SIZE << get_order(size)));
175 hyp_split_page(hyp_virt_to_page(addr));
176
177 return addr;
178 }
179
guest_s2_free_pages_exact(void * addr,unsigned long size)180 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
181 {
182 u8 order = get_order(size);
183 unsigned int i;
184
185 for (i = 0; i < (1 << order); i++)
186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
187 }
188
guest_s2_zalloc_page(void * mc)189 static void *guest_s2_zalloc_page(void *mc)
190 {
191 struct hyp_page *p;
192 void *addr;
193
194 addr = hyp_alloc_pages(¤t_vm->pool, 0);
195 if (addr)
196 return addr;
197
198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
199 if (!addr)
200 return addr;
201
202 memset(addr, 0, PAGE_SIZE);
203 p = hyp_virt_to_page(addr);
204 p->refcount = 1;
205 p->order = 0;
206
207 return addr;
208 }
209
guest_s2_get_page(void * addr)210 static void guest_s2_get_page(void *addr)
211 {
212 hyp_get_page(¤t_vm->pool, addr);
213 }
214
guest_s2_put_page(void * addr)215 static void guest_s2_put_page(void *addr)
216 {
217 hyp_put_page(¤t_vm->pool, addr);
218 }
219
clean_dcache_guest_page(void * va,size_t size)220 static void clean_dcache_guest_page(void *va, size_t size)
221 {
222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
223 hyp_fixmap_unmap();
224 }
225
invalidate_icache_guest_page(void * va,size_t size)226 static void invalidate_icache_guest_page(void *va, size_t size)
227 {
228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
229 hyp_fixmap_unmap();
230 }
231
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
233 {
234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
235 unsigned long nr_pages;
236 int ret;
237
238 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT;
239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
240 if (ret)
241 return ret;
242
243 hyp_spin_lock_init(&vm->lock);
244 vm->mm_ops = (struct kvm_pgtable_mm_ops) {
245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact,
246 .free_pages_exact = guest_s2_free_pages_exact,
247 .zalloc_page = guest_s2_zalloc_page,
248 .phys_to_virt = hyp_phys_to_virt,
249 .virt_to_phys = hyp_virt_to_phys,
250 .page_count = hyp_page_count,
251 .get_page = guest_s2_get_page,
252 .put_page = guest_s2_put_page,
253 .dcache_clean_inval_poc = clean_dcache_guest_page,
254 .icache_inval_pou = invalidate_icache_guest_page,
255 };
256
257 guest_lock_component(vm);
258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
259 guest_stage2_force_pte_cb);
260 guest_unlock_component(vm);
261 if (ret)
262 return ret;
263
264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
265
266 return 0;
267 }
268
reclaim_pgtable_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)269 void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
270 {
271 struct hyp_page *page;
272 void *addr;
273
274 /* Dump all pgtable pages in the hyp_pool */
275 guest_lock_component(vm);
276 kvm_pgtable_stage2_destroy(&vm->pgt);
277 vm->kvm.arch.mmu.pgd_phys = 0ULL;
278 guest_unlock_component(vm);
279
280 /* Drain the hyp_pool into the memcache */
281 addr = hyp_alloc_pages(&vm->pool, 0);
282 while (addr) {
283 page = hyp_virt_to_page(addr);
284 page->refcount = 0;
285 page->order = 0;
286 push_hyp_memcache(mc, addr, hyp_virt_to_phys);
287 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
288 addr = hyp_alloc_pages(&vm->pool, 0);
289 }
290 }
291
__pkvm_prot_finalize(void)292 int __pkvm_prot_finalize(void)
293 {
294 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
295 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
296
297 if (params->hcr_el2 & HCR_VM)
298 return -EPERM;
299
300 params->vttbr = kvm_get_vttbr(mmu);
301 params->vtcr = mmu->vtcr;
302 params->hcr_el2 |= HCR_VM;
303
304 /*
305 * The CMO below not only cleans the updated params to the
306 * PoC, but also provides the DSB that ensures ongoing
307 * page-table walks that have started before we trapped to EL2
308 * have completed.
309 */
310 kvm_flush_dcache_to_poc(params, sizeof(*params));
311
312 write_sysreg(params->hcr_el2, hcr_el2);
313 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
314
315 /*
316 * Make sure to have an ISB before the TLB maintenance below but only
317 * when __load_stage2() doesn't include one already.
318 */
319 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
320
321 /* Invalidate stale HCR bits that may be cached in TLBs */
322 __tlbi(vmalls12e1);
323 dsb(nsh);
324 isb();
325
326 return 0;
327 }
328
host_stage2_unmap_dev_all(void)329 static int host_stage2_unmap_dev_all(void)
330 {
331 struct kvm_pgtable *pgt = &host_mmu.pgt;
332 struct memblock_region *reg;
333 u64 addr = 0;
334 int i, ret;
335
336 /* Unmap all non-memory regions to recycle the pages */
337 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
338 reg = &hyp_memory[i];
339 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
340 if (ret)
341 return ret;
342 }
343 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
344 }
345
346 struct kvm_mem_range {
347 u64 start;
348 u64 end;
349 };
350
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)351 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
352 {
353 int cur, left = 0, right = hyp_memblock_nr;
354 struct memblock_region *reg;
355 phys_addr_t end;
356
357 range->start = 0;
358 range->end = ULONG_MAX;
359
360 /* The list of memblock regions is sorted, binary search it */
361 while (left < right) {
362 cur = (left + right) >> 1;
363 reg = &hyp_memory[cur];
364 end = reg->base + reg->size;
365 if (addr < reg->base) {
366 right = cur;
367 range->end = reg->base;
368 } else if (addr >= end) {
369 left = cur + 1;
370 range->start = end;
371 } else {
372 range->start = reg->base;
373 range->end = end;
374 return reg;
375 }
376 }
377
378 return NULL;
379 }
380
addr_is_memory(phys_addr_t phys)381 bool addr_is_memory(phys_addr_t phys)
382 {
383 struct kvm_mem_range range;
384
385 return !!find_mem_range(phys, &range);
386 }
387
is_in_mem_range(u64 addr,struct kvm_mem_range * range)388 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
389 {
390 return range->start <= addr && addr < range->end;
391 }
392
check_range_allowed_memory(u64 start,u64 end)393 static int check_range_allowed_memory(u64 start, u64 end)
394 {
395 struct memblock_region *reg;
396 struct kvm_mem_range range;
397
398 /*
399 * Callers can't check the state of a range that overlaps memory and
400 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
401 */
402 reg = find_mem_range(start, &range);
403 if (!is_in_mem_range(end - 1, &range))
404 return -EINVAL;
405
406 if (!reg || reg->flags & MEMBLOCK_NOMAP)
407 return -EPERM;
408
409 return 0;
410 }
411
range_is_memory(u64 start,u64 end)412 static bool range_is_memory(u64 start, u64 end)
413 {
414 struct kvm_mem_range r;
415
416 if (!find_mem_range(start, &r))
417 return false;
418
419 return is_in_mem_range(end - 1, &r);
420 }
421
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)422 static inline int __host_stage2_idmap(u64 start, u64 end,
423 enum kvm_pgtable_prot prot)
424 {
425 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
426 prot, &host_s2_pool, 0);
427 }
428
429 /*
430 * The pool has been provided with enough pages to cover all of memory with
431 * page granularity, but it is difficult to know how much of the MMIO range
432 * we will need to cover upfront, so we may need to 'recycle' the pages if we
433 * run out.
434 */
435 #define host_stage2_try(fn, ...) \
436 ({ \
437 int __ret; \
438 hyp_assert_lock_held(&host_mmu.lock); \
439 __ret = fn(__VA_ARGS__); \
440 if (__ret == -ENOMEM) { \
441 __ret = host_stage2_unmap_dev_all(); \
442 if (!__ret) \
443 __ret = fn(__VA_ARGS__); \
444 } \
445 __ret; \
446 })
447
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)448 static inline bool range_included(struct kvm_mem_range *child,
449 struct kvm_mem_range *parent)
450 {
451 return parent->start <= child->start && child->end <= parent->end;
452 }
453
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)454 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
455 {
456 struct kvm_mem_range cur;
457 kvm_pte_t pte;
458 s8 level;
459 int ret;
460
461 hyp_assert_lock_held(&host_mmu.lock);
462 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
463 if (ret)
464 return ret;
465
466 if (kvm_pte_valid(pte))
467 return -EAGAIN;
468
469 if (pte) {
470 WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
471 return -EPERM;
472 }
473
474 do {
475 u64 granule = kvm_granule_size(level);
476 cur.start = ALIGN_DOWN(addr, granule);
477 cur.end = cur.start + granule;
478 level++;
479 } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
480 !(kvm_level_supports_block_mapping(level) &&
481 range_included(&cur, range)));
482
483 *range = cur;
484
485 return 0;
486 }
487
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)488 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
489 enum kvm_pgtable_prot prot)
490 {
491 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
492 }
493
__host_update_page_state(phys_addr_t addr,u64 size,enum pkvm_page_state state)494 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
495 {
496 phys_addr_t end = addr + size;
497
498 for (; addr < end; addr += PAGE_SIZE)
499 hyp_phys_to_page(addr)->host_state = state;
500 }
501
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)502 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
503 {
504 int ret;
505
506 if (!addr_is_memory(addr))
507 return -EPERM;
508
509 ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
510 addr, size, &host_s2_pool, owner_id);
511 if (ret)
512 return ret;
513
514 /* Don't forget to update the vmemmap tracking for the host */
515 if (owner_id == PKVM_ID_HOST)
516 __host_update_page_state(addr, size, PKVM_PAGE_OWNED);
517 else
518 __host_update_page_state(addr, size, PKVM_NOPAGE);
519
520 return 0;
521 }
522
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)523 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
524 {
525 /*
526 * Block mappings must be used with care in the host stage-2 as a
527 * kvm_pgtable_stage2_map() operation targeting a page in the range of
528 * an existing block will delete the block under the assumption that
529 * mappings in the rest of the block range can always be rebuilt lazily.
530 * That assumption is correct for the host stage-2 with RWX mappings
531 * targeting memory or RW mappings targeting MMIO ranges (see
532 * host_stage2_idmap() below which implements some of the host memory
533 * abort logic). However, this is not safe for any other mappings where
534 * the host stage-2 page-table is in fact the only place where this
535 * state is stored. In all those cases, it is safer to use page-level
536 * mappings, hence avoiding to lose the state because of side-effects in
537 * kvm_pgtable_stage2_map().
538 */
539 if (range_is_memory(addr, end))
540 return prot != PKVM_HOST_MEM_PROT;
541 else
542 return prot != PKVM_HOST_MMIO_PROT;
543 }
544
host_stage2_idmap(u64 addr)545 static int host_stage2_idmap(u64 addr)
546 {
547 struct kvm_mem_range range;
548 bool is_memory = !!find_mem_range(addr, &range);
549 enum kvm_pgtable_prot prot;
550 int ret;
551
552 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
553
554 host_lock_component();
555 ret = host_stage2_adjust_range(addr, &range);
556 if (ret)
557 goto unlock;
558
559 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
560 unlock:
561 host_unlock_component();
562
563 return ret;
564 }
565
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)566 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
567 {
568 struct kvm_vcpu_fault_info fault;
569 u64 esr, addr;
570 int ret = 0;
571
572 esr = read_sysreg_el2(SYS_ESR);
573 if (!__get_fault_info(esr, &fault)) {
574 /*
575 * We've presumably raced with a page-table change which caused
576 * AT to fail, try again.
577 */
578 return;
579 }
580
581 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
582 ret = host_stage2_idmap(addr);
583 BUG_ON(ret && ret != -EAGAIN);
584 }
585
586 struct check_walk_data {
587 enum pkvm_page_state desired;
588 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
589 };
590
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)591 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
592 enum kvm_pgtable_walk_flags visit)
593 {
594 struct check_walk_data *d = ctx->arg;
595
596 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
597 }
598
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)599 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
600 struct check_walk_data *data)
601 {
602 struct kvm_pgtable_walker walker = {
603 .cb = __check_page_state_visitor,
604 .arg = data,
605 .flags = KVM_PGTABLE_WALK_LEAF,
606 };
607
608 return kvm_pgtable_walk(pgt, addr, size, &walker);
609 }
610
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)611 static int __host_check_page_state_range(u64 addr, u64 size,
612 enum pkvm_page_state state)
613 {
614 u64 end = addr + size;
615 int ret;
616
617 ret = check_range_allowed_memory(addr, end);
618 if (ret)
619 return ret;
620
621 hyp_assert_lock_held(&host_mmu.lock);
622 for (; addr < end; addr += PAGE_SIZE) {
623 if (hyp_phys_to_page(addr)->host_state != state)
624 return -EPERM;
625 }
626
627 return 0;
628 }
629
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)630 static int __host_set_page_state_range(u64 addr, u64 size,
631 enum pkvm_page_state state)
632 {
633 if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
634 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
635
636 if (ret)
637 return ret;
638 }
639
640 __host_update_page_state(addr, size, state);
641
642 return 0;
643 }
644
hyp_get_page_state(kvm_pte_t pte,u64 addr)645 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
646 {
647 if (!kvm_pte_valid(pte))
648 return PKVM_NOPAGE;
649
650 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
651 }
652
__hyp_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)653 static int __hyp_check_page_state_range(u64 addr, u64 size,
654 enum pkvm_page_state state)
655 {
656 struct check_walk_data d = {
657 .desired = state,
658 .get_page_state = hyp_get_page_state,
659 };
660
661 hyp_assert_lock_held(&pkvm_pgd_lock);
662 return check_page_state_range(&pkvm_pgtable, addr, size, &d);
663 }
664
guest_get_page_state(kvm_pte_t pte,u64 addr)665 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
666 {
667 if (!kvm_pte_valid(pte))
668 return PKVM_NOPAGE;
669
670 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
671 }
672
__guest_check_page_state_range(struct pkvm_hyp_vcpu * vcpu,u64 addr,u64 size,enum pkvm_page_state state)673 static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
674 u64 size, enum pkvm_page_state state)
675 {
676 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
677 struct check_walk_data d = {
678 .desired = state,
679 .get_page_state = guest_get_page_state,
680 };
681
682 hyp_assert_lock_held(&vm->lock);
683 return check_page_state_range(&vm->pgt, addr, size, &d);
684 }
685
__pkvm_host_share_hyp(u64 pfn)686 int __pkvm_host_share_hyp(u64 pfn)
687 {
688 u64 phys = hyp_pfn_to_phys(pfn);
689 void *virt = __hyp_va(phys);
690 enum kvm_pgtable_prot prot;
691 u64 size = PAGE_SIZE;
692 int ret;
693
694 host_lock_component();
695 hyp_lock_component();
696
697 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
698 if (ret)
699 goto unlock;
700 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
701 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
702 if (ret)
703 goto unlock;
704 }
705
706 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED);
707 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
708 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
709
710 unlock:
711 hyp_unlock_component();
712 host_unlock_component();
713
714 return ret;
715 }
716
__pkvm_host_unshare_hyp(u64 pfn)717 int __pkvm_host_unshare_hyp(u64 pfn)
718 {
719 u64 phys = hyp_pfn_to_phys(pfn);
720 u64 virt = (u64)__hyp_va(phys);
721 u64 size = PAGE_SIZE;
722 int ret;
723
724 host_lock_component();
725 hyp_lock_component();
726
727 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
728 if (ret)
729 goto unlock;
730 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED);
731 if (ret)
732 goto unlock;
733 if (hyp_page_count((void *)virt)) {
734 ret = -EBUSY;
735 goto unlock;
736 }
737
738 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
739 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED));
740
741 unlock:
742 hyp_unlock_component();
743 host_unlock_component();
744
745 return ret;
746 }
747
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)748 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
749 {
750 u64 phys = hyp_pfn_to_phys(pfn);
751 u64 size = PAGE_SIZE * nr_pages;
752 void *virt = __hyp_va(phys);
753 enum kvm_pgtable_prot prot;
754 int ret;
755
756 host_lock_component();
757 hyp_lock_component();
758
759 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
760 if (ret)
761 goto unlock;
762 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
763 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE);
764 if (ret)
765 goto unlock;
766 }
767
768 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
769 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot));
770 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP));
771
772 unlock:
773 hyp_unlock_component();
774 host_unlock_component();
775
776 return ret;
777 }
778
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)779 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
780 {
781 u64 phys = hyp_pfn_to_phys(pfn);
782 u64 size = PAGE_SIZE * nr_pages;
783 u64 virt = (u64)__hyp_va(phys);
784 int ret;
785
786 host_lock_component();
787 hyp_lock_component();
788
789 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED);
790 if (ret)
791 goto unlock;
792 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
793 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE);
794 if (ret)
795 goto unlock;
796 }
797
798 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size);
799 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST));
800
801 unlock:
802 hyp_unlock_component();
803 host_unlock_component();
804
805 return ret;
806 }
807
hyp_pin_shared_mem(void * from,void * to)808 int hyp_pin_shared_mem(void *from, void *to)
809 {
810 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
811 u64 end = PAGE_ALIGN((u64)to);
812 u64 size = end - start;
813 int ret;
814
815 host_lock_component();
816 hyp_lock_component();
817
818 ret = __host_check_page_state_range(__hyp_pa(start), size,
819 PKVM_PAGE_SHARED_OWNED);
820 if (ret)
821 goto unlock;
822
823 ret = __hyp_check_page_state_range(start, size,
824 PKVM_PAGE_SHARED_BORROWED);
825 if (ret)
826 goto unlock;
827
828 for (cur = start; cur < end; cur += PAGE_SIZE)
829 hyp_page_ref_inc(hyp_virt_to_page(cur));
830
831 unlock:
832 hyp_unlock_component();
833 host_unlock_component();
834
835 return ret;
836 }
837
hyp_unpin_shared_mem(void * from,void * to)838 void hyp_unpin_shared_mem(void *from, void *to)
839 {
840 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
841 u64 end = PAGE_ALIGN((u64)to);
842
843 host_lock_component();
844 hyp_lock_component();
845
846 for (cur = start; cur < end; cur += PAGE_SIZE)
847 hyp_page_ref_dec(hyp_virt_to_page(cur));
848
849 hyp_unlock_component();
850 host_unlock_component();
851 }
852
__pkvm_host_share_ffa(u64 pfn,u64 nr_pages)853 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
854 {
855 u64 phys = hyp_pfn_to_phys(pfn);
856 u64 size = PAGE_SIZE * nr_pages;
857 int ret;
858
859 host_lock_component();
860 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
861 if (!ret)
862 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
863 host_unlock_component();
864
865 return ret;
866 }
867
__pkvm_host_unshare_ffa(u64 pfn,u64 nr_pages)868 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
869 {
870 u64 phys = hyp_pfn_to_phys(pfn);
871 u64 size = PAGE_SIZE * nr_pages;
872 int ret;
873
874 host_lock_component();
875 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
876 if (!ret)
877 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED);
878 host_unlock_component();
879
880 return ret;
881 }
882
__pkvm_host_share_guest(u64 pfn,u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)883 int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
884 enum kvm_pgtable_prot prot)
885 {
886 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
887 u64 phys = hyp_pfn_to_phys(pfn);
888 u64 ipa = hyp_pfn_to_phys(gfn);
889 struct hyp_page *page;
890 int ret;
891
892 if (prot & ~KVM_PGTABLE_PROT_RWX)
893 return -EINVAL;
894
895 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
896 if (ret)
897 return ret;
898
899 host_lock_component();
900 guest_lock_component(vm);
901
902 ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
903 if (ret)
904 goto unlock;
905
906 page = hyp_phys_to_page(phys);
907 switch (page->host_state) {
908 case PKVM_PAGE_OWNED:
909 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
910 break;
911 case PKVM_PAGE_SHARED_OWNED:
912 if (page->host_share_guest_count)
913 break;
914 /* Only host to np-guest multi-sharing is tolerated */
915 WARN_ON(1);
916 fallthrough;
917 default:
918 ret = -EPERM;
919 goto unlock;
920 }
921
922 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
923 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
924 &vcpu->vcpu.arch.pkvm_memcache, 0));
925 page->host_share_guest_count++;
926
927 unlock:
928 guest_unlock_component(vm);
929 host_unlock_component();
930
931 return ret;
932 }
933
__check_host_shared_guest(struct pkvm_hyp_vm * vm,u64 * __phys,u64 ipa)934 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa)
935 {
936 enum pkvm_page_state state;
937 struct hyp_page *page;
938 kvm_pte_t pte;
939 u64 phys;
940 s8 level;
941 int ret;
942
943 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level);
944 if (ret)
945 return ret;
946 if (!kvm_pte_valid(pte))
947 return -ENOENT;
948 if (level != KVM_PGTABLE_LAST_LEVEL)
949 return -E2BIG;
950
951 state = guest_get_page_state(pte, ipa);
952 if (state != PKVM_PAGE_SHARED_BORROWED)
953 return -EPERM;
954
955 phys = kvm_pte_to_phys(pte);
956 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
957 if (WARN_ON(ret))
958 return ret;
959
960 page = hyp_phys_to_page(phys);
961 if (page->host_state != PKVM_PAGE_SHARED_OWNED)
962 return -EPERM;
963 if (WARN_ON(!page->host_share_guest_count))
964 return -EINVAL;
965
966 *__phys = phys;
967
968 return 0;
969 }
970
__pkvm_host_unshare_guest(u64 gfn,struct pkvm_hyp_vm * vm)971 int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm)
972 {
973 u64 ipa = hyp_pfn_to_phys(gfn);
974 struct hyp_page *page;
975 u64 phys;
976 int ret;
977
978 host_lock_component();
979 guest_lock_component(vm);
980
981 ret = __check_host_shared_guest(vm, &phys, ipa);
982 if (ret)
983 goto unlock;
984
985 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE);
986 if (ret)
987 goto unlock;
988
989 page = hyp_phys_to_page(phys);
990 page->host_share_guest_count--;
991 if (!page->host_share_guest_count)
992 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED));
993
994 unlock:
995 guest_unlock_component(vm);
996 host_unlock_component();
997
998 return ret;
999 }
1000
assert_host_shared_guest(struct pkvm_hyp_vm * vm,u64 ipa)1001 static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
1002 {
1003 u64 phys;
1004 int ret;
1005
1006 if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
1007 return;
1008
1009 host_lock_component();
1010 guest_lock_component(vm);
1011
1012 ret = __check_host_shared_guest(vm, &phys, ipa);
1013
1014 guest_unlock_component(vm);
1015 host_unlock_component();
1016
1017 WARN_ON(ret && ret != -ENOENT);
1018 }
1019
__pkvm_host_relax_perms_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu,enum kvm_pgtable_prot prot)1020 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
1021 {
1022 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1023 u64 ipa = hyp_pfn_to_phys(gfn);
1024 int ret;
1025
1026 if (pkvm_hyp_vm_is_protected(vm))
1027 return -EPERM;
1028
1029 if (prot & ~KVM_PGTABLE_PROT_RWX)
1030 return -EINVAL;
1031
1032 assert_host_shared_guest(vm, ipa);
1033 guest_lock_component(vm);
1034 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
1035 guest_unlock_component(vm);
1036
1037 return ret;
1038 }
1039
__pkvm_host_wrprotect_guest(u64 gfn,struct pkvm_hyp_vm * vm)1040 int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
1041 {
1042 u64 ipa = hyp_pfn_to_phys(gfn);
1043 int ret;
1044
1045 if (pkvm_hyp_vm_is_protected(vm))
1046 return -EPERM;
1047
1048 assert_host_shared_guest(vm, ipa);
1049 guest_lock_component(vm);
1050 ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
1051 guest_unlock_component(vm);
1052
1053 return ret;
1054 }
1055
__pkvm_host_test_clear_young_guest(u64 gfn,bool mkold,struct pkvm_hyp_vm * vm)1056 int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
1057 {
1058 u64 ipa = hyp_pfn_to_phys(gfn);
1059 int ret;
1060
1061 if (pkvm_hyp_vm_is_protected(vm))
1062 return -EPERM;
1063
1064 assert_host_shared_guest(vm, ipa);
1065 guest_lock_component(vm);
1066 ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
1067 guest_unlock_component(vm);
1068
1069 return ret;
1070 }
1071
__pkvm_host_mkyoung_guest(u64 gfn,struct pkvm_hyp_vcpu * vcpu)1072 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
1073 {
1074 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
1075 u64 ipa = hyp_pfn_to_phys(gfn);
1076
1077 if (pkvm_hyp_vm_is_protected(vm))
1078 return -EPERM;
1079
1080 assert_host_shared_guest(vm, ipa);
1081 guest_lock_component(vm);
1082 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
1083 guest_unlock_component(vm);
1084
1085 return 0;
1086 }
1087