1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2020 - Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/init.h> 8 #include <linux/kmemleak.h> 9 #include <linux/kvm_host.h> 10 #include <asm/kvm_mmu.h> 11 #include <linux/memblock.h> 12 #include <linux/mutex.h> 13 #include <linux/sort.h> 14 15 #include <asm/kvm_pkvm.h> 16 17 #include "hyp_constants.h" 18 19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); 20 21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); 22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); 23 24 phys_addr_t hyp_mem_base; 25 phys_addr_t hyp_mem_size; 26 27 static int cmp_hyp_memblock(const void *p1, const void *p2) 28 { 29 const struct memblock_region *r1 = p1; 30 const struct memblock_region *r2 = p2; 31 32 return r1->base < r2->base ? -1 : (r1->base > r2->base); 33 } 34 35 static void __init sort_memblock_regions(void) 36 { 37 sort(hyp_memory, 38 *hyp_memblock_nr_ptr, 39 sizeof(struct memblock_region), 40 cmp_hyp_memblock, 41 NULL); 42 } 43 44 static int __init register_memblock_regions(void) 45 { 46 struct memblock_region *reg; 47 48 for_each_mem_region(reg) { 49 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) 50 return -ENOMEM; 51 52 hyp_memory[*hyp_memblock_nr_ptr] = *reg; 53 (*hyp_memblock_nr_ptr)++; 54 } 55 sort_memblock_regions(); 56 57 return 0; 58 } 59 60 void __init kvm_hyp_reserve(void) 61 { 62 u64 hyp_mem_pages = 0; 63 int ret; 64 65 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) 66 return; 67 68 if (kvm_get_mode() != KVM_MODE_PROTECTED) 69 return; 70 71 ret = register_memblock_regions(); 72 if (ret) { 73 *hyp_memblock_nr_ptr = 0; 74 kvm_err("Failed to register hyp memblocks: %d\n", ret); 75 return; 76 } 77 78 hyp_mem_pages += hyp_s1_pgtable_pages(); 79 hyp_mem_pages += host_s2_pgtable_pages(); 80 hyp_mem_pages += hyp_vm_table_pages(); 81 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); 82 hyp_mem_pages += hyp_ffa_proxy_pages(); 83 84 /* 85 * Try to allocate a PMD-aligned region to reduce TLB pressure once 86 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. 87 */ 88 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; 89 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), 90 PMD_SIZE); 91 if (!hyp_mem_base) 92 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE); 93 else 94 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); 95 96 if (!hyp_mem_base) { 97 kvm_err("Failed to reserve hyp memory\n"); 98 return; 99 } 100 101 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, 102 hyp_mem_base); 103 } 104 105 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) 106 { 107 if (host_kvm->arch.pkvm.handle) { 108 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, 109 host_kvm->arch.pkvm.handle)); 110 } 111 112 host_kvm->arch.pkvm.handle = 0; 113 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); 114 free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); 115 } 116 117 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) 118 { 119 size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); 120 pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle; 121 void *hyp_vcpu; 122 int ret; 123 124 vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; 125 126 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); 127 if (!hyp_vcpu) 128 return -ENOMEM; 129 130 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu); 131 if (!ret) 132 vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED); 133 else 134 free_pages_exact(hyp_vcpu, hyp_vcpu_sz); 135 136 return ret; 137 } 138 139 /* 140 * Allocates and donates memory for hypervisor VM structs at EL2. 141 * 142 * Allocates space for the VM state, which includes the hyp vm as well as 143 * the hyp vcpus. 144 * 145 * Stores an opaque handler in the kvm struct for future reference. 146 * 147 * Return 0 on success, negative error code on failure. 148 */ 149 static int __pkvm_create_hyp_vm(struct kvm *host_kvm) 150 { 151 size_t pgd_sz, hyp_vm_sz; 152 void *pgd, *hyp_vm; 153 int ret; 154 155 if (host_kvm->created_vcpus < 1) 156 return -EINVAL; 157 158 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); 159 160 /* 161 * The PGD pages will be reclaimed using a hyp_memcache which implies 162 * page granularity. So, use alloc_pages_exact() to get individual 163 * refcounts. 164 */ 165 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); 166 if (!pgd) 167 return -ENOMEM; 168 169 /* Allocate memory to donate to hyp for vm and vcpu pointers. */ 170 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, 171 size_mul(sizeof(void *), 172 host_kvm->created_vcpus))); 173 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); 174 if (!hyp_vm) { 175 ret = -ENOMEM; 176 goto free_pgd; 177 } 178 179 /* Donate the VM memory to hyp and let hyp initialize it. */ 180 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); 181 if (ret < 0) 182 goto free_vm; 183 184 host_kvm->arch.pkvm.handle = ret; 185 host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; 186 kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); 187 188 return 0; 189 free_vm: 190 free_pages_exact(hyp_vm, hyp_vm_sz); 191 free_pgd: 192 free_pages_exact(pgd, pgd_sz); 193 return ret; 194 } 195 196 int pkvm_create_hyp_vm(struct kvm *host_kvm) 197 { 198 int ret = 0; 199 200 mutex_lock(&host_kvm->arch.config_lock); 201 if (!host_kvm->arch.pkvm.handle) 202 ret = __pkvm_create_hyp_vm(host_kvm); 203 mutex_unlock(&host_kvm->arch.config_lock); 204 205 return ret; 206 } 207 208 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) 209 { 210 int ret = 0; 211 212 mutex_lock(&vcpu->kvm->arch.config_lock); 213 if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED)) 214 ret = __pkvm_create_hyp_vcpu(vcpu); 215 mutex_unlock(&vcpu->kvm->arch.config_lock); 216 217 return ret; 218 } 219 220 void pkvm_destroy_hyp_vm(struct kvm *host_kvm) 221 { 222 mutex_lock(&host_kvm->arch.config_lock); 223 __pkvm_destroy_hyp_vm(host_kvm); 224 mutex_unlock(&host_kvm->arch.config_lock); 225 } 226 227 int pkvm_init_host_vm(struct kvm *host_kvm) 228 { 229 return 0; 230 } 231 232 static void __init _kvm_host_prot_finalize(void *arg) 233 { 234 int *err = arg; 235 236 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) 237 WRITE_ONCE(*err, -EINVAL); 238 } 239 240 static int __init pkvm_drop_host_privileges(void) 241 { 242 int ret = 0; 243 244 /* 245 * Flip the static key upfront as that may no longer be possible 246 * once the host stage 2 is installed. 247 */ 248 static_branch_enable(&kvm_protected_mode_initialized); 249 on_each_cpu(_kvm_host_prot_finalize, &ret, 1); 250 return ret; 251 } 252 253 static int __init finalize_pkvm(void) 254 { 255 int ret; 256 257 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) 258 return 0; 259 260 /* 261 * Exclude HYP sections from kmemleak so that they don't get peeked 262 * at, which would end badly once inaccessible. 263 */ 264 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); 265 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); 266 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); 267 268 ret = pkvm_drop_host_privileges(); 269 if (ret) 270 pr_err("Failed to finalize Hyp protection: %d\n", ret); 271 272 return ret; 273 } 274 device_initcall_sync(finalize_pkvm); 275 276 static int cmp_mappings(struct rb_node *node, const struct rb_node *parent) 277 { 278 struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node); 279 struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node); 280 281 if (a->gfn < b->gfn) 282 return -1; 283 if (a->gfn > b->gfn) 284 return 1; 285 return 0; 286 } 287 288 static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn) 289 { 290 struct rb_node *node = root->rb_node, *prev = NULL; 291 struct pkvm_mapping *mapping; 292 293 while (node) { 294 mapping = rb_entry(node, struct pkvm_mapping, node); 295 if (mapping->gfn == gfn) 296 return node; 297 prev = node; 298 node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right; 299 } 300 301 return prev; 302 } 303 304 /* 305 * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing 306 * of __map inline. 307 */ 308 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ 309 for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \ 310 ((__start) >> PAGE_SHIFT)); \ 311 __tmp && ({ \ 312 __map = rb_entry(__tmp, struct pkvm_mapping, node); \ 313 __tmp = rb_next(__tmp); \ 314 true; \ 315 }); \ 316 ) \ 317 if (__map->gfn < ((__start) >> PAGE_SHIFT)) \ 318 continue; \ 319 else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \ 320 break; \ 321 else 322 323 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 324 struct kvm_pgtable_mm_ops *mm_ops) 325 { 326 pgt->pkvm_mappings = RB_ROOT; 327 pgt->mmu = mmu; 328 329 return 0; 330 } 331 332 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 333 { 334 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 335 pkvm_handle_t handle = kvm->arch.pkvm.handle; 336 struct pkvm_mapping *mapping; 337 struct rb_node *node; 338 339 if (!handle) 340 return; 341 342 node = rb_first(&pgt->pkvm_mappings); 343 while (node) { 344 mapping = rb_entry(node, struct pkvm_mapping, node); 345 kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 346 node = rb_next(node); 347 rb_erase(&mapping->node, &pgt->pkvm_mappings); 348 kfree(mapping); 349 } 350 } 351 352 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 353 u64 phys, enum kvm_pgtable_prot prot, 354 void *mc, enum kvm_pgtable_walk_flags flags) 355 { 356 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 357 struct pkvm_mapping *mapping = NULL; 358 struct kvm_hyp_memcache *cache = mc; 359 u64 gfn = addr >> PAGE_SHIFT; 360 u64 pfn = phys >> PAGE_SHIFT; 361 int ret; 362 363 if (size != PAGE_SIZE) 364 return -EINVAL; 365 366 lockdep_assert_held_write(&kvm->mmu_lock); 367 ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot); 368 if (ret) { 369 /* Is the gfn already mapped due to a racing vCPU? */ 370 if (ret == -EPERM) 371 return -EAGAIN; 372 } 373 374 swap(mapping, cache->mapping); 375 mapping->gfn = gfn; 376 mapping->pfn = pfn; 377 WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings)); 378 379 return ret; 380 } 381 382 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 383 { 384 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 385 pkvm_handle_t handle = kvm->arch.pkvm.handle; 386 struct pkvm_mapping *mapping; 387 int ret = 0; 388 389 lockdep_assert_held_write(&kvm->mmu_lock); 390 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 391 ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn); 392 if (WARN_ON(ret)) 393 break; 394 rb_erase(&mapping->node, &pgt->pkvm_mappings); 395 kfree(mapping); 396 } 397 398 return ret; 399 } 400 401 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 402 { 403 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 404 pkvm_handle_t handle = kvm->arch.pkvm.handle; 405 struct pkvm_mapping *mapping; 406 int ret = 0; 407 408 lockdep_assert_held(&kvm->mmu_lock); 409 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 410 ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn); 411 if (WARN_ON(ret)) 412 break; 413 } 414 415 return ret; 416 } 417 418 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 419 { 420 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 421 struct pkvm_mapping *mapping; 422 423 lockdep_assert_held(&kvm->mmu_lock); 424 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 425 __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE); 426 427 return 0; 428 } 429 430 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) 431 { 432 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 433 pkvm_handle_t handle = kvm->arch.pkvm.handle; 434 struct pkvm_mapping *mapping; 435 bool young = false; 436 437 lockdep_assert_held(&kvm->mmu_lock); 438 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 439 young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, 440 mkold); 441 442 return young; 443 } 444 445 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, 446 enum kvm_pgtable_walk_flags flags) 447 { 448 return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); 449 } 450 451 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 452 enum kvm_pgtable_walk_flags flags) 453 { 454 WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); 455 } 456 457 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) 458 { 459 WARN_ON_ONCE(1); 460 } 461 462 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, 463 enum kvm_pgtable_prot prot, void *mc, bool force_pte) 464 { 465 WARN_ON_ONCE(1); 466 return NULL; 467 } 468 469 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, 470 struct kvm_mmu_memory_cache *mc) 471 { 472 WARN_ON_ONCE(1); 473 return -EINVAL; 474 } 475