xref: /linux/arch/arm64/kvm/pkvm.c (revision e2ee2e9b159094527ae7ad78058b1316f62fc5b7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/init.h>
8 #include <linux/kmemleak.h>
9 #include <linux/kvm_host.h>
10 #include <asm/kvm_mmu.h>
11 #include <linux/memblock.h>
12 #include <linux/mutex.h>
13 #include <linux/sort.h>
14 
15 #include <asm/kvm_pkvm.h>
16 
17 #include "hyp_constants.h"
18 
19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20 
21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23 
24 phys_addr_t hyp_mem_base;
25 phys_addr_t hyp_mem_size;
26 
27 static int cmp_hyp_memblock(const void *p1, const void *p2)
28 {
29 	const struct memblock_region *r1 = p1;
30 	const struct memblock_region *r2 = p2;
31 
32 	return r1->base < r2->base ? -1 : (r1->base > r2->base);
33 }
34 
35 static void __init sort_memblock_regions(void)
36 {
37 	sort(hyp_memory,
38 	     *hyp_memblock_nr_ptr,
39 	     sizeof(struct memblock_region),
40 	     cmp_hyp_memblock,
41 	     NULL);
42 }
43 
44 static int __init register_memblock_regions(void)
45 {
46 	struct memblock_region *reg;
47 
48 	for_each_mem_region(reg) {
49 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
50 			return -ENOMEM;
51 
52 		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
53 		(*hyp_memblock_nr_ptr)++;
54 	}
55 	sort_memblock_regions();
56 
57 	return 0;
58 }
59 
60 void __init kvm_hyp_reserve(void)
61 {
62 	u64 hyp_mem_pages = 0;
63 	int ret;
64 
65 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
66 		return;
67 
68 	if (kvm_get_mode() != KVM_MODE_PROTECTED)
69 		return;
70 
71 	ret = register_memblock_regions();
72 	if (ret) {
73 		*hyp_memblock_nr_ptr = 0;
74 		kvm_err("Failed to register hyp memblocks: %d\n", ret);
75 		return;
76 	}
77 
78 	hyp_mem_pages += hyp_s1_pgtable_pages();
79 	hyp_mem_pages += host_s2_pgtable_pages();
80 	hyp_mem_pages += hyp_vm_table_pages();
81 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
82 	hyp_mem_pages += hyp_ffa_proxy_pages();
83 
84 	/*
85 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
86 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
87 	 */
88 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
89 	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
90 					   PMD_SIZE);
91 	if (!hyp_mem_base)
92 		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
93 	else
94 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
95 
96 	if (!hyp_mem_base) {
97 		kvm_err("Failed to reserve hyp memory\n");
98 		return;
99 	}
100 
101 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
102 		 hyp_mem_base);
103 }
104 
105 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
106 {
107 	if (host_kvm->arch.pkvm.handle) {
108 		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
109 					  host_kvm->arch.pkvm.handle));
110 	}
111 
112 	host_kvm->arch.pkvm.handle = 0;
113 	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
114 }
115 
116 /*
117  * Allocates and donates memory for hypervisor VM structs at EL2.
118  *
119  * Allocates space for the VM state, which includes the hyp vm as well as
120  * the hyp vcpus.
121  *
122  * Stores an opaque handler in the kvm struct for future reference.
123  *
124  * Return 0 on success, negative error code on failure.
125  */
126 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
127 {
128 	size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
129 	struct kvm_vcpu *host_vcpu;
130 	pkvm_handle_t handle;
131 	void *pgd, *hyp_vm;
132 	unsigned long idx;
133 	int ret;
134 
135 	if (host_kvm->created_vcpus < 1)
136 		return -EINVAL;
137 
138 	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
139 
140 	/*
141 	 * The PGD pages will be reclaimed using a hyp_memcache which implies
142 	 * page granularity. So, use alloc_pages_exact() to get individual
143 	 * refcounts.
144 	 */
145 	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
146 	if (!pgd)
147 		return -ENOMEM;
148 
149 	/* Allocate memory to donate to hyp for vm and vcpu pointers. */
150 	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
151 					size_mul(sizeof(void *),
152 						 host_kvm->created_vcpus)));
153 	hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
154 	if (!hyp_vm) {
155 		ret = -ENOMEM;
156 		goto free_pgd;
157 	}
158 
159 	/* Donate the VM memory to hyp and let hyp initialize it. */
160 	ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
161 	if (ret < 0)
162 		goto free_vm;
163 
164 	handle = ret;
165 
166 	host_kvm->arch.pkvm.handle = handle;
167 
168 	/* Donate memory for the vcpus at hyp and initialize it. */
169 	hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
170 	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
171 		void *hyp_vcpu;
172 
173 		/* Indexing of the vcpus to be sequential starting at 0. */
174 		if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
175 			ret = -EINVAL;
176 			goto destroy_vm;
177 		}
178 
179 		hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
180 		if (!hyp_vcpu) {
181 			ret = -ENOMEM;
182 			goto destroy_vm;
183 		}
184 
185 		ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
186 					hyp_vcpu);
187 		if (ret) {
188 			free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
189 			goto destroy_vm;
190 		}
191 	}
192 
193 	return 0;
194 
195 destroy_vm:
196 	__pkvm_destroy_hyp_vm(host_kvm);
197 	return ret;
198 free_vm:
199 	free_pages_exact(hyp_vm, hyp_vm_sz);
200 free_pgd:
201 	free_pages_exact(pgd, pgd_sz);
202 	return ret;
203 }
204 
205 int pkvm_create_hyp_vm(struct kvm *host_kvm)
206 {
207 	int ret = 0;
208 
209 	mutex_lock(&host_kvm->arch.config_lock);
210 	if (!host_kvm->arch.pkvm.handle)
211 		ret = __pkvm_create_hyp_vm(host_kvm);
212 	mutex_unlock(&host_kvm->arch.config_lock);
213 
214 	return ret;
215 }
216 
217 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
218 {
219 	mutex_lock(&host_kvm->arch.config_lock);
220 	__pkvm_destroy_hyp_vm(host_kvm);
221 	mutex_unlock(&host_kvm->arch.config_lock);
222 }
223 
224 int pkvm_init_host_vm(struct kvm *host_kvm)
225 {
226 	return 0;
227 }
228 
229 static void __init _kvm_host_prot_finalize(void *arg)
230 {
231 	int *err = arg;
232 
233 	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
234 		WRITE_ONCE(*err, -EINVAL);
235 }
236 
237 static int __init pkvm_drop_host_privileges(void)
238 {
239 	int ret = 0;
240 
241 	/*
242 	 * Flip the static key upfront as that may no longer be possible
243 	 * once the host stage 2 is installed.
244 	 */
245 	static_branch_enable(&kvm_protected_mode_initialized);
246 	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
247 	return ret;
248 }
249 
250 static int __init finalize_pkvm(void)
251 {
252 	int ret;
253 
254 	if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
255 		return 0;
256 
257 	/*
258 	 * Exclude HYP sections from kmemleak so that they don't get peeked
259 	 * at, which would end badly once inaccessible.
260 	 */
261 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
262 	kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
263 	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
264 
265 	ret = pkvm_drop_host_privileges();
266 	if (ret)
267 		pr_err("Failed to finalize Hyp protection: %d\n", ret);
268 
269 	return ret;
270 }
271 device_initcall_sync(finalize_pkvm);
272 
273 static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
274 {
275 	struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
276 	struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
277 
278 	if (a->gfn < b->gfn)
279 		return -1;
280 	if (a->gfn > b->gfn)
281 		return 1;
282 	return 0;
283 }
284 
285 static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
286 {
287 	struct rb_node *node = root->rb_node, *prev = NULL;
288 	struct pkvm_mapping *mapping;
289 
290 	while (node) {
291 		mapping = rb_entry(node, struct pkvm_mapping, node);
292 		if (mapping->gfn == gfn)
293 			return node;
294 		prev = node;
295 		node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
296 	}
297 
298 	return prev;
299 }
300 
301 /*
302  * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
303  * of __map inline.
304  */
305 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map)				\
306 	for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings,		\
307 							     ((__start) >> PAGE_SHIFT));	\
308 	     __tmp && ({									\
309 				__map = rb_entry(__tmp, struct pkvm_mapping, node);		\
310 				__tmp = rb_next(__tmp);						\
311 				true;								\
312 		       });									\
313 	    )											\
314 		if (__map->gfn < ((__start) >> PAGE_SHIFT))					\
315 			continue;								\
316 		else if (__map->gfn >= ((__end) >> PAGE_SHIFT))					\
317 			break;									\
318 		else
319 
320 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
321 			     struct kvm_pgtable_mm_ops *mm_ops)
322 {
323 	pgt->pkvm_mappings	= RB_ROOT;
324 	pgt->mmu		= mmu;
325 
326 	return 0;
327 }
328 
329 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
330 {
331 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
332 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
333 	struct pkvm_mapping *mapping;
334 	struct rb_node *node;
335 
336 	if (!handle)
337 		return;
338 
339 	node = rb_first(&pgt->pkvm_mappings);
340 	while (node) {
341 		mapping = rb_entry(node, struct pkvm_mapping, node);
342 		kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
343 		node = rb_next(node);
344 		rb_erase(&mapping->node, &pgt->pkvm_mappings);
345 		kfree(mapping);
346 	}
347 }
348 
349 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
350 			   u64 phys, enum kvm_pgtable_prot prot,
351 			   void *mc, enum kvm_pgtable_walk_flags flags)
352 {
353 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
354 	struct pkvm_mapping *mapping = NULL;
355 	struct kvm_hyp_memcache *cache = mc;
356 	u64 gfn = addr >> PAGE_SHIFT;
357 	u64 pfn = phys >> PAGE_SHIFT;
358 	int ret;
359 
360 	if (size != PAGE_SIZE)
361 		return -EINVAL;
362 
363 	lockdep_assert_held_write(&kvm->mmu_lock);
364 	ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
365 	if (ret) {
366 		/* Is the gfn already mapped due to a racing vCPU? */
367 		if (ret == -EPERM)
368 			return -EAGAIN;
369 	}
370 
371 	swap(mapping, cache->mapping);
372 	mapping->gfn = gfn;
373 	mapping->pfn = pfn;
374 	WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
375 
376 	return ret;
377 }
378 
379 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
380 {
381 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
382 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
383 	struct pkvm_mapping *mapping;
384 	int ret = 0;
385 
386 	lockdep_assert_held_write(&kvm->mmu_lock);
387 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
388 		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
389 		if (WARN_ON(ret))
390 			break;
391 		rb_erase(&mapping->node, &pgt->pkvm_mappings);
392 		kfree(mapping);
393 	}
394 
395 	return ret;
396 }
397 
398 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
399 {
400 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
401 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
402 	struct pkvm_mapping *mapping;
403 	int ret = 0;
404 
405 	lockdep_assert_held(&kvm->mmu_lock);
406 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
407 		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
408 		if (WARN_ON(ret))
409 			break;
410 	}
411 
412 	return ret;
413 }
414 
415 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
416 {
417 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
418 	struct pkvm_mapping *mapping;
419 
420 	lockdep_assert_held(&kvm->mmu_lock);
421 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
422 		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
423 
424 	return 0;
425 }
426 
427 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
428 {
429 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
430 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
431 	struct pkvm_mapping *mapping;
432 	bool young = false;
433 
434 	lockdep_assert_held(&kvm->mmu_lock);
435 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
436 		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
437 					   mkold);
438 
439 	return young;
440 }
441 
442 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
443 				    enum kvm_pgtable_walk_flags flags)
444 {
445 	return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
446 }
447 
448 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
449 				 enum kvm_pgtable_walk_flags flags)
450 {
451 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
452 }
453 
454 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
455 {
456 	WARN_ON_ONCE(1);
457 }
458 
459 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
460 					enum kvm_pgtable_prot prot, void *mc, bool force_pte)
461 {
462 	WARN_ON_ONCE(1);
463 	return NULL;
464 }
465 
466 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
467 			      struct kvm_mmu_memory_cache *mc)
468 {
469 	WARN_ON_ONCE(1);
470 	return -EINVAL;
471 }
472