1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/init.h>
8 #include <linux/interval_tree_generic.h>
9 #include <linux/kmemleak.h>
10 #include <linux/kvm_host.h>
11 #include <asm/kvm_mmu.h>
12 #include <linux/memblock.h>
13 #include <linux/mutex.h>
14
15 #include <asm/kvm_pkvm.h>
16
17 #include "hyp_constants.h"
18
19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20
21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23
24 phys_addr_t hyp_mem_base;
25 phys_addr_t hyp_mem_size;
26
register_memblock_regions(void)27 static int __init register_memblock_regions(void)
28 {
29 struct memblock_region *reg;
30
31 for_each_mem_region(reg) {
32 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
33 return -ENOMEM;
34
35 hyp_memory[*hyp_memblock_nr_ptr] = *reg;
36 (*hyp_memblock_nr_ptr)++;
37 }
38
39 return 0;
40 }
41
kvm_hyp_reserve(void)42 void __init kvm_hyp_reserve(void)
43 {
44 u64 hyp_mem_pages = 0;
45 int ret;
46
47 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
48 return;
49
50 if (kvm_get_mode() != KVM_MODE_PROTECTED)
51 return;
52
53 ret = register_memblock_regions();
54 if (ret) {
55 *hyp_memblock_nr_ptr = 0;
56 kvm_err("Failed to register hyp memblocks: %d\n", ret);
57 return;
58 }
59
60 hyp_mem_pages += hyp_s1_pgtable_pages();
61 hyp_mem_pages += host_s2_pgtable_pages();
62 hyp_mem_pages += hyp_vm_table_pages();
63 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
64 hyp_mem_pages += pkvm_selftest_pages();
65 hyp_mem_pages += hyp_ffa_proxy_pages();
66
67 /*
68 * Try to allocate a PMD-aligned region to reduce TLB pressure once
69 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
70 */
71 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
72 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
73 PMD_SIZE);
74 if (!hyp_mem_base)
75 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
76 else
77 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
78
79 if (!hyp_mem_base) {
80 kvm_err("Failed to reserve hyp memory\n");
81 return;
82 }
83
84 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
85 hyp_mem_base);
86 }
87
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)88 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
89 {
90 if (host_kvm->arch.pkvm.handle) {
91 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
92 host_kvm->arch.pkvm.handle));
93 }
94
95 host_kvm->arch.pkvm.handle = 0;
96 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
97 free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
98 }
99
__pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)100 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
101 {
102 size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
103 pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
104 void *hyp_vcpu;
105 int ret;
106
107 vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
108
109 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
110 if (!hyp_vcpu)
111 return -ENOMEM;
112
113 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
114 if (!ret)
115 vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
116 else
117 free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
118
119 return ret;
120 }
121
122 /*
123 * Allocates and donates memory for hypervisor VM structs at EL2.
124 *
125 * Allocates space for the VM state, which includes the hyp vm as well as
126 * the hyp vcpus.
127 *
128 * Stores an opaque handler in the kvm struct for future reference.
129 *
130 * Return 0 on success, negative error code on failure.
131 */
__pkvm_create_hyp_vm(struct kvm * host_kvm)132 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
133 {
134 size_t pgd_sz, hyp_vm_sz;
135 void *pgd, *hyp_vm;
136 int ret;
137
138 if (host_kvm->created_vcpus < 1)
139 return -EINVAL;
140
141 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
142
143 /*
144 * The PGD pages will be reclaimed using a hyp_memcache which implies
145 * page granularity. So, use alloc_pages_exact() to get individual
146 * refcounts.
147 */
148 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
149 if (!pgd)
150 return -ENOMEM;
151
152 /* Allocate memory to donate to hyp for vm and vcpu pointers. */
153 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
154 size_mul(sizeof(void *),
155 host_kvm->created_vcpus)));
156 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
157 if (!hyp_vm) {
158 ret = -ENOMEM;
159 goto free_pgd;
160 }
161
162 /* Donate the VM memory to hyp and let hyp initialize it. */
163 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
164 if (ret < 0)
165 goto free_vm;
166
167 host_kvm->arch.pkvm.handle = ret;
168 host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
169 kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
170
171 return 0;
172 free_vm:
173 free_pages_exact(hyp_vm, hyp_vm_sz);
174 free_pgd:
175 free_pages_exact(pgd, pgd_sz);
176 return ret;
177 }
178
pkvm_create_hyp_vm(struct kvm * host_kvm)179 int pkvm_create_hyp_vm(struct kvm *host_kvm)
180 {
181 int ret = 0;
182
183 mutex_lock(&host_kvm->arch.config_lock);
184 if (!host_kvm->arch.pkvm.handle)
185 ret = __pkvm_create_hyp_vm(host_kvm);
186 mutex_unlock(&host_kvm->arch.config_lock);
187
188 return ret;
189 }
190
pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)191 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
192 {
193 int ret = 0;
194
195 mutex_lock(&vcpu->kvm->arch.config_lock);
196 if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
197 ret = __pkvm_create_hyp_vcpu(vcpu);
198 mutex_unlock(&vcpu->kvm->arch.config_lock);
199
200 return ret;
201 }
202
pkvm_destroy_hyp_vm(struct kvm * host_kvm)203 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
204 {
205 mutex_lock(&host_kvm->arch.config_lock);
206 __pkvm_destroy_hyp_vm(host_kvm);
207 mutex_unlock(&host_kvm->arch.config_lock);
208 }
209
pkvm_init_host_vm(struct kvm * host_kvm)210 int pkvm_init_host_vm(struct kvm *host_kvm)
211 {
212 return 0;
213 }
214
_kvm_host_prot_finalize(void * arg)215 static void __init _kvm_host_prot_finalize(void *arg)
216 {
217 int *err = arg;
218
219 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
220 WRITE_ONCE(*err, -EINVAL);
221 }
222
pkvm_drop_host_privileges(void)223 static int __init pkvm_drop_host_privileges(void)
224 {
225 int ret = 0;
226
227 /*
228 * Flip the static key upfront as that may no longer be possible
229 * once the host stage 2 is installed.
230 */
231 static_branch_enable(&kvm_protected_mode_initialized);
232 on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
233 return ret;
234 }
235
finalize_pkvm(void)236 static int __init finalize_pkvm(void)
237 {
238 int ret;
239
240 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
241 return 0;
242
243 /*
244 * Exclude HYP sections from kmemleak so that they don't get peeked
245 * at, which would end badly once inaccessible.
246 */
247 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
248 kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start);
249 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
250 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
251
252 ret = pkvm_drop_host_privileges();
253 if (ret)
254 pr_err("Failed to finalize Hyp protection: %d\n", ret);
255
256 return ret;
257 }
258 device_initcall_sync(finalize_pkvm);
259
__pkvm_mapping_start(struct pkvm_mapping * m)260 static u64 __pkvm_mapping_start(struct pkvm_mapping *m)
261 {
262 return m->gfn * PAGE_SIZE;
263 }
264
__pkvm_mapping_end(struct pkvm_mapping * m)265 static u64 __pkvm_mapping_end(struct pkvm_mapping *m)
266 {
267 return (m->gfn + m->nr_pages) * PAGE_SIZE - 1;
268 }
269
270 INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last,
271 __pkvm_mapping_start, __pkvm_mapping_end, static,
272 pkvm_mapping);
273
274 /*
275 * __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow
276 * freeing of __map inline.
277 */
278 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
279 for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \
280 __start, __end - 1); \
281 __tmp && ({ \
282 __map = __tmp; \
283 __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \
284 true; \
285 }); \
286 )
287
pkvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops)288 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
289 struct kvm_pgtable_mm_ops *mm_ops)
290 {
291 pgt->pkvm_mappings = RB_ROOT_CACHED;
292 pgt->mmu = mmu;
293
294 return 0;
295 }
296
__pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 start,u64 end)297 static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end)
298 {
299 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
300 pkvm_handle_t handle = kvm->arch.pkvm.handle;
301 struct pkvm_mapping *mapping;
302 int ret;
303
304 if (!handle)
305 return 0;
306
307 for_each_mapping_in_range_safe(pgt, start, end, mapping) {
308 ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn,
309 mapping->nr_pages);
310 if (WARN_ON(ret))
311 return ret;
312 pkvm_mapping_remove(mapping, &pgt->pkvm_mappings);
313 kfree(mapping);
314 }
315
316 return 0;
317 }
318
pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable * pgt,u64 addr,u64 size)319 void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
320 u64 addr, u64 size)
321 {
322 __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
323 }
324
pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable * pgt)325 void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
326 {
327 /* Expected to be called after all pKVM mappings have been released. */
328 WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
329 }
330
pkvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc,enum kvm_pgtable_walk_flags flags)331 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
332 u64 phys, enum kvm_pgtable_prot prot,
333 void *mc, enum kvm_pgtable_walk_flags flags)
334 {
335 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
336 struct pkvm_mapping *mapping = NULL;
337 struct kvm_hyp_memcache *cache = mc;
338 u64 gfn = addr >> PAGE_SHIFT;
339 u64 pfn = phys >> PAGE_SHIFT;
340 int ret;
341
342 if (size != PAGE_SIZE && size != PMD_SIZE)
343 return -EINVAL;
344
345 lockdep_assert_held_write(&kvm->mmu_lock);
346
347 /*
348 * Calling stage2_map() on top of existing mappings is either happening because of a race
349 * with another vCPU, or because we're changing between page and block mappings. As per
350 * user_mem_abort(), same-size permission faults are handled in the relax_perms() path.
351 */
352 mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1);
353 if (mapping) {
354 if (size == (mapping->nr_pages * PAGE_SIZE))
355 return -EAGAIN;
356
357 /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */
358 ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
359 if (ret)
360 return ret;
361 mapping = NULL;
362 }
363
364 ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot);
365 if (WARN_ON(ret))
366 return ret;
367
368 swap(mapping, cache->mapping);
369 mapping->gfn = gfn;
370 mapping->pfn = pfn;
371 mapping->nr_pages = size / PAGE_SIZE;
372 pkvm_mapping_insert(mapping, &pgt->pkvm_mappings);
373
374 return ret;
375 }
376
pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)377 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
378 {
379 lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock);
380
381 return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
382 }
383
pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)384 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
385 {
386 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
387 pkvm_handle_t handle = kvm->arch.pkvm.handle;
388 struct pkvm_mapping *mapping;
389 int ret = 0;
390
391 lockdep_assert_held(&kvm->mmu_lock);
392 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
393 ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn,
394 mapping->nr_pages);
395 if (WARN_ON(ret))
396 break;
397 }
398
399 return ret;
400 }
401
pkvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)402 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
403 {
404 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
405 struct pkvm_mapping *mapping;
406
407 lockdep_assert_held(&kvm->mmu_lock);
408 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
409 __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn),
410 PAGE_SIZE * mapping->nr_pages);
411
412 return 0;
413 }
414
pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable * pgt,u64 addr,u64 size,bool mkold)415 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
416 {
417 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
418 pkvm_handle_t handle = kvm->arch.pkvm.handle;
419 struct pkvm_mapping *mapping;
420 bool young = false;
421
422 lockdep_assert_held(&kvm->mmu_lock);
423 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
424 young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
425 mapping->nr_pages, mkold);
426
427 return young;
428 }
429
pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot,enum kvm_pgtable_walk_flags flags)430 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
431 enum kvm_pgtable_walk_flags flags)
432 {
433 return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
434 }
435
pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_walk_flags flags)436 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
437 enum kvm_pgtable_walk_flags flags)
438 {
439 WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
440 }
441
pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops * mm_ops,void * pgtable,s8 level)442 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
443 {
444 WARN_ON_ONCE(1);
445 }
446
pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable * pgt,u64 phys,s8 level,enum kvm_pgtable_prot prot,void * mc,bool force_pte)447 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
448 enum kvm_pgtable_prot prot, void *mc, bool force_pte)
449 {
450 WARN_ON_ONCE(1);
451 return NULL;
452 }
453
pkvm_pgtable_stage2_split(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_mmu_memory_cache * mc)454 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
455 struct kvm_mmu_memory_cache *mc)
456 {
457 WARN_ON_ONCE(1);
458 return -EINVAL;
459 }
460