1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020 Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <asm/kvm_emulate.h> 9 #include <asm/kvm_hyp.h> 10 #include <asm/kvm_mmu.h> 11 #include <asm/kvm_pgtable.h> 12 #include <asm/kvm_pkvm.h> 13 #include <asm/stage2_pgtable.h> 14 15 #include <hyp/fault.h> 16 17 #include <nvhe/gfp.h> 18 #include <nvhe/memory.h> 19 #include <nvhe/mem_protect.h> 20 #include <nvhe/mm.h> 21 22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) 23 24 struct host_mmu host_mmu; 25 26 static struct hyp_pool host_s2_pool; 27 28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); 29 #define current_vm (*this_cpu_ptr(&__current_vm)) 30 31 static void guest_lock_component(struct pkvm_hyp_vm *vm) 32 { 33 hyp_spin_lock(&vm->lock); 34 current_vm = vm; 35 } 36 37 static void guest_unlock_component(struct pkvm_hyp_vm *vm) 38 { 39 current_vm = NULL; 40 hyp_spin_unlock(&vm->lock); 41 } 42 43 static void host_lock_component(void) 44 { 45 hyp_spin_lock(&host_mmu.lock); 46 } 47 48 static void host_unlock_component(void) 49 { 50 hyp_spin_unlock(&host_mmu.lock); 51 } 52 53 static void hyp_lock_component(void) 54 { 55 hyp_spin_lock(&pkvm_pgd_lock); 56 } 57 58 static void hyp_unlock_component(void) 59 { 60 hyp_spin_unlock(&pkvm_pgd_lock); 61 } 62 63 static void *host_s2_zalloc_pages_exact(size_t size) 64 { 65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); 66 67 hyp_split_page(hyp_virt_to_page(addr)); 68 69 /* 70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE, 71 * so there should be no need to free any of the tail pages to make the 72 * allocation exact. 73 */ 74 WARN_ON(size != (PAGE_SIZE << get_order(size))); 75 76 return addr; 77 } 78 79 static void *host_s2_zalloc_page(void *pool) 80 { 81 return hyp_alloc_pages(pool, 0); 82 } 83 84 static void host_s2_get_page(void *addr) 85 { 86 hyp_get_page(&host_s2_pool, addr); 87 } 88 89 static void host_s2_put_page(void *addr) 90 { 91 hyp_put_page(&host_s2_pool, addr); 92 } 93 94 static void host_s2_free_unlinked_table(void *addr, s8 level) 95 { 96 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level); 97 } 98 99 static int prepare_s2_pool(void *pgt_pool_base) 100 { 101 unsigned long nr_pages, pfn; 102 int ret; 103 104 pfn = hyp_virt_to_pfn(pgt_pool_base); 105 nr_pages = host_s2_pgtable_pages(); 106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); 107 if (ret) 108 return ret; 109 110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { 111 .zalloc_pages_exact = host_s2_zalloc_pages_exact, 112 .zalloc_page = host_s2_zalloc_page, 113 .free_unlinked_table = host_s2_free_unlinked_table, 114 .phys_to_virt = hyp_phys_to_virt, 115 .virt_to_phys = hyp_virt_to_phys, 116 .page_count = hyp_page_count, 117 .get_page = host_s2_get_page, 118 .put_page = host_s2_put_page, 119 }; 120 121 return 0; 122 } 123 124 static void prepare_host_vtcr(void) 125 { 126 u32 parange, phys_shift; 127 128 /* The host stage 2 is id-mapped, so use parange for T0SZ */ 129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); 130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); 131 132 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, 133 id_aa64mmfr1_el1_sys_val, phys_shift); 134 } 135 136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); 137 138 int kvm_host_prepare_stage2(void *pgt_pool_base) 139 { 140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 141 int ret; 142 143 prepare_host_vtcr(); 144 hyp_spin_lock_init(&host_mmu.lock); 145 mmu->arch = &host_mmu.arch; 146 147 ret = prepare_s2_pool(pgt_pool_base); 148 if (ret) 149 return ret; 150 151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, 152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, 153 host_stage2_force_pte_cb); 154 if (ret) 155 return ret; 156 157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); 158 mmu->pgt = &host_mmu.pgt; 159 atomic64_set(&mmu->vmid.id, 0); 160 161 return 0; 162 } 163 164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end, 165 enum kvm_pgtable_prot prot) 166 { 167 return true; 168 } 169 170 static void *guest_s2_zalloc_pages_exact(size_t size) 171 { 172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); 173 174 WARN_ON(size != (PAGE_SIZE << get_order(size))); 175 hyp_split_page(hyp_virt_to_page(addr)); 176 177 return addr; 178 } 179 180 static void guest_s2_free_pages_exact(void *addr, unsigned long size) 181 { 182 u8 order = get_order(size); 183 unsigned int i; 184 185 for (i = 0; i < (1 << order); i++) 186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); 187 } 188 189 static void *guest_s2_zalloc_page(void *mc) 190 { 191 struct hyp_page *p; 192 void *addr; 193 194 addr = hyp_alloc_pages(¤t_vm->pool, 0); 195 if (addr) 196 return addr; 197 198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt); 199 if (!addr) 200 return addr; 201 202 memset(addr, 0, PAGE_SIZE); 203 p = hyp_virt_to_page(addr); 204 p->refcount = 1; 205 p->order = 0; 206 207 return addr; 208 } 209 210 static void guest_s2_get_page(void *addr) 211 { 212 hyp_get_page(¤t_vm->pool, addr); 213 } 214 215 static void guest_s2_put_page(void *addr) 216 { 217 hyp_put_page(¤t_vm->pool, addr); 218 } 219 220 static void clean_dcache_guest_page(void *va, size_t size) 221 { 222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 223 hyp_fixmap_unmap(); 224 } 225 226 static void invalidate_icache_guest_page(void *va, size_t size) 227 { 228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 229 hyp_fixmap_unmap(); 230 } 231 232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) 233 { 234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; 235 unsigned long nr_pages; 236 int ret; 237 238 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; 239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); 240 if (ret) 241 return ret; 242 243 hyp_spin_lock_init(&vm->lock); 244 vm->mm_ops = (struct kvm_pgtable_mm_ops) { 245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact, 246 .free_pages_exact = guest_s2_free_pages_exact, 247 .zalloc_page = guest_s2_zalloc_page, 248 .phys_to_virt = hyp_phys_to_virt, 249 .virt_to_phys = hyp_virt_to_phys, 250 .page_count = hyp_page_count, 251 .get_page = guest_s2_get_page, 252 .put_page = guest_s2_put_page, 253 .dcache_clean_inval_poc = clean_dcache_guest_page, 254 .icache_inval_pou = invalidate_icache_guest_page, 255 }; 256 257 guest_lock_component(vm); 258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, 259 guest_stage2_force_pte_cb); 260 guest_unlock_component(vm); 261 if (ret) 262 return ret; 263 264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); 265 266 return 0; 267 } 268 269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) 270 { 271 struct hyp_page *page; 272 void *addr; 273 274 /* Dump all pgtable pages in the hyp_pool */ 275 guest_lock_component(vm); 276 kvm_pgtable_stage2_destroy(&vm->pgt); 277 vm->kvm.arch.mmu.pgd_phys = 0ULL; 278 guest_unlock_component(vm); 279 280 /* Drain the hyp_pool into the memcache */ 281 addr = hyp_alloc_pages(&vm->pool, 0); 282 while (addr) { 283 page = hyp_virt_to_page(addr); 284 page->refcount = 0; 285 page->order = 0; 286 push_hyp_memcache(mc, addr, hyp_virt_to_phys); 287 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); 288 addr = hyp_alloc_pages(&vm->pool, 0); 289 } 290 } 291 292 int __pkvm_prot_finalize(void) 293 { 294 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 295 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 296 297 if (params->hcr_el2 & HCR_VM) 298 return -EPERM; 299 300 params->vttbr = kvm_get_vttbr(mmu); 301 params->vtcr = mmu->vtcr; 302 params->hcr_el2 |= HCR_VM; 303 304 /* 305 * The CMO below not only cleans the updated params to the 306 * PoC, but also provides the DSB that ensures ongoing 307 * page-table walks that have started before we trapped to EL2 308 * have completed. 309 */ 310 kvm_flush_dcache_to_poc(params, sizeof(*params)); 311 312 write_sysreg(params->hcr_el2, hcr_el2); 313 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); 314 315 /* 316 * Make sure to have an ISB before the TLB maintenance below but only 317 * when __load_stage2() doesn't include one already. 318 */ 319 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); 320 321 /* Invalidate stale HCR bits that may be cached in TLBs */ 322 __tlbi(vmalls12e1); 323 dsb(nsh); 324 isb(); 325 326 return 0; 327 } 328 329 static int host_stage2_unmap_dev_all(void) 330 { 331 struct kvm_pgtable *pgt = &host_mmu.pgt; 332 struct memblock_region *reg; 333 u64 addr = 0; 334 int i, ret; 335 336 /* Unmap all non-memory regions to recycle the pages */ 337 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { 338 reg = &hyp_memory[i]; 339 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); 340 if (ret) 341 return ret; 342 } 343 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); 344 } 345 346 struct kvm_mem_range { 347 u64 start; 348 u64 end; 349 }; 350 351 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) 352 { 353 int cur, left = 0, right = hyp_memblock_nr; 354 struct memblock_region *reg; 355 phys_addr_t end; 356 357 range->start = 0; 358 range->end = ULONG_MAX; 359 360 /* The list of memblock regions is sorted, binary search it */ 361 while (left < right) { 362 cur = (left + right) >> 1; 363 reg = &hyp_memory[cur]; 364 end = reg->base + reg->size; 365 if (addr < reg->base) { 366 right = cur; 367 range->end = reg->base; 368 } else if (addr >= end) { 369 left = cur + 1; 370 range->start = end; 371 } else { 372 range->start = reg->base; 373 range->end = end; 374 return reg; 375 } 376 } 377 378 return NULL; 379 } 380 381 bool addr_is_memory(phys_addr_t phys) 382 { 383 struct kvm_mem_range range; 384 385 return !!find_mem_range(phys, &range); 386 } 387 388 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 389 { 390 return range->start <= addr && addr < range->end; 391 } 392 393 static int check_range_allowed_memory(u64 start, u64 end) 394 { 395 struct memblock_region *reg; 396 struct kvm_mem_range range; 397 398 /* 399 * Callers can't check the state of a range that overlaps memory and 400 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range. 401 */ 402 reg = find_mem_range(start, &range); 403 if (!is_in_mem_range(end - 1, &range)) 404 return -EINVAL; 405 406 if (!reg || reg->flags & MEMBLOCK_NOMAP) 407 return -EPERM; 408 409 return 0; 410 } 411 412 static bool range_is_memory(u64 start, u64 end) 413 { 414 struct kvm_mem_range r; 415 416 if (!find_mem_range(start, &r)) 417 return false; 418 419 return is_in_mem_range(end - 1, &r); 420 } 421 422 static inline int __host_stage2_idmap(u64 start, u64 end, 423 enum kvm_pgtable_prot prot) 424 { 425 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, 426 prot, &host_s2_pool, 0); 427 } 428 429 /* 430 * The pool has been provided with enough pages to cover all of memory with 431 * page granularity, but it is difficult to know how much of the MMIO range 432 * we will need to cover upfront, so we may need to 'recycle' the pages if we 433 * run out. 434 */ 435 #define host_stage2_try(fn, ...) \ 436 ({ \ 437 int __ret; \ 438 hyp_assert_lock_held(&host_mmu.lock); \ 439 __ret = fn(__VA_ARGS__); \ 440 if (__ret == -ENOMEM) { \ 441 __ret = host_stage2_unmap_dev_all(); \ 442 if (!__ret) \ 443 __ret = fn(__VA_ARGS__); \ 444 } \ 445 __ret; \ 446 }) 447 448 static inline bool range_included(struct kvm_mem_range *child, 449 struct kvm_mem_range *parent) 450 { 451 return parent->start <= child->start && child->end <= parent->end; 452 } 453 454 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) 455 { 456 struct kvm_mem_range cur; 457 kvm_pte_t pte; 458 s8 level; 459 int ret; 460 461 hyp_assert_lock_held(&host_mmu.lock); 462 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); 463 if (ret) 464 return ret; 465 466 if (kvm_pte_valid(pte)) 467 return -EAGAIN; 468 469 if (pte) { 470 WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); 471 return -EPERM; 472 } 473 474 do { 475 u64 granule = kvm_granule_size(level); 476 cur.start = ALIGN_DOWN(addr, granule); 477 cur.end = cur.start + granule; 478 level++; 479 } while ((level <= KVM_PGTABLE_LAST_LEVEL) && 480 !(kvm_level_supports_block_mapping(level) && 481 range_included(&cur, range))); 482 483 *range = cur; 484 485 return 0; 486 } 487 488 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, 489 enum kvm_pgtable_prot prot) 490 { 491 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 492 } 493 494 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) 495 { 496 phys_addr_t end = addr + size; 497 498 for (; addr < end; addr += PAGE_SIZE) 499 hyp_phys_to_page(addr)->host_state = state; 500 } 501 502 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 503 { 504 int ret; 505 506 if (!addr_is_memory(addr)) 507 return -EPERM; 508 509 ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 510 addr, size, &host_s2_pool, owner_id); 511 if (ret) 512 return ret; 513 514 /* Don't forget to update the vmemmap tracking for the host */ 515 if (owner_id == PKVM_ID_HOST) 516 __host_update_page_state(addr, size, PKVM_PAGE_OWNED); 517 else 518 __host_update_page_state(addr, size, PKVM_NOPAGE); 519 520 return 0; 521 } 522 523 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) 524 { 525 /* 526 * Block mappings must be used with care in the host stage-2 as a 527 * kvm_pgtable_stage2_map() operation targeting a page in the range of 528 * an existing block will delete the block under the assumption that 529 * mappings in the rest of the block range can always be rebuilt lazily. 530 * That assumption is correct for the host stage-2 with RWX mappings 531 * targeting memory or RW mappings targeting MMIO ranges (see 532 * host_stage2_idmap() below which implements some of the host memory 533 * abort logic). However, this is not safe for any other mappings where 534 * the host stage-2 page-table is in fact the only place where this 535 * state is stored. In all those cases, it is safer to use page-level 536 * mappings, hence avoiding to lose the state because of side-effects in 537 * kvm_pgtable_stage2_map(). 538 */ 539 if (range_is_memory(addr, end)) 540 return prot != PKVM_HOST_MEM_PROT; 541 else 542 return prot != PKVM_HOST_MMIO_PROT; 543 } 544 545 static int host_stage2_idmap(u64 addr) 546 { 547 struct kvm_mem_range range; 548 bool is_memory = !!find_mem_range(addr, &range); 549 enum kvm_pgtable_prot prot; 550 int ret; 551 552 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; 553 554 host_lock_component(); 555 ret = host_stage2_adjust_range(addr, &range); 556 if (ret) 557 goto unlock; 558 559 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); 560 unlock: 561 host_unlock_component(); 562 563 return ret; 564 } 565 566 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) 567 { 568 struct kvm_vcpu_fault_info fault; 569 u64 esr, addr; 570 int ret = 0; 571 572 esr = read_sysreg_el2(SYS_ESR); 573 if (!__get_fault_info(esr, &fault)) { 574 /* 575 * We've presumably raced with a page-table change which caused 576 * AT to fail, try again. 577 */ 578 return; 579 } 580 581 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; 582 ret = host_stage2_idmap(addr); 583 BUG_ON(ret && ret != -EAGAIN); 584 } 585 586 struct check_walk_data { 587 enum pkvm_page_state desired; 588 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); 589 }; 590 591 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, 592 enum kvm_pgtable_walk_flags visit) 593 { 594 struct check_walk_data *d = ctx->arg; 595 596 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; 597 } 598 599 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, 600 struct check_walk_data *data) 601 { 602 struct kvm_pgtable_walker walker = { 603 .cb = __check_page_state_visitor, 604 .arg = data, 605 .flags = KVM_PGTABLE_WALK_LEAF, 606 }; 607 608 return kvm_pgtable_walk(pgt, addr, size, &walker); 609 } 610 611 static int __host_check_page_state_range(u64 addr, u64 size, 612 enum pkvm_page_state state) 613 { 614 u64 end = addr + size; 615 int ret; 616 617 ret = check_range_allowed_memory(addr, end); 618 if (ret) 619 return ret; 620 621 hyp_assert_lock_held(&host_mmu.lock); 622 for (; addr < end; addr += PAGE_SIZE) { 623 if (hyp_phys_to_page(addr)->host_state != state) 624 return -EPERM; 625 } 626 627 return 0; 628 } 629 630 static int __host_set_page_state_range(u64 addr, u64 size, 631 enum pkvm_page_state state) 632 { 633 if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { 634 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); 635 636 if (ret) 637 return ret; 638 } 639 640 __host_update_page_state(addr, size, state); 641 642 return 0; 643 } 644 645 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) 646 { 647 if (!kvm_pte_valid(pte)) 648 return PKVM_NOPAGE; 649 650 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); 651 } 652 653 static int __hyp_check_page_state_range(u64 addr, u64 size, 654 enum pkvm_page_state state) 655 { 656 struct check_walk_data d = { 657 .desired = state, 658 .get_page_state = hyp_get_page_state, 659 }; 660 661 hyp_assert_lock_held(&pkvm_pgd_lock); 662 return check_page_state_range(&pkvm_pgtable, addr, size, &d); 663 } 664 665 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) 666 { 667 if (!kvm_pte_valid(pte)) 668 return PKVM_NOPAGE; 669 670 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 671 } 672 673 static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, 674 u64 size, enum pkvm_page_state state) 675 { 676 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 677 struct check_walk_data d = { 678 .desired = state, 679 .get_page_state = guest_get_page_state, 680 }; 681 682 hyp_assert_lock_held(&vm->lock); 683 return check_page_state_range(&vm->pgt, addr, size, &d); 684 } 685 686 int __pkvm_host_share_hyp(u64 pfn) 687 { 688 u64 phys = hyp_pfn_to_phys(pfn); 689 void *virt = __hyp_va(phys); 690 enum kvm_pgtable_prot prot; 691 u64 size = PAGE_SIZE; 692 int ret; 693 694 host_lock_component(); 695 hyp_lock_component(); 696 697 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 698 if (ret) 699 goto unlock; 700 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 701 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 702 if (ret) 703 goto unlock; 704 } 705 706 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); 707 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 708 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); 709 710 unlock: 711 hyp_unlock_component(); 712 host_unlock_component(); 713 714 return ret; 715 } 716 717 int __pkvm_host_unshare_hyp(u64 pfn) 718 { 719 u64 phys = hyp_pfn_to_phys(pfn); 720 u64 virt = (u64)__hyp_va(phys); 721 u64 size = PAGE_SIZE; 722 int ret; 723 724 host_lock_component(); 725 hyp_lock_component(); 726 727 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 728 if (ret) 729 goto unlock; 730 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED); 731 if (ret) 732 goto unlock; 733 if (hyp_page_count((void *)virt)) { 734 ret = -EBUSY; 735 goto unlock; 736 } 737 738 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 739 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); 740 741 unlock: 742 hyp_unlock_component(); 743 host_unlock_component(); 744 745 return ret; 746 } 747 748 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) 749 { 750 u64 phys = hyp_pfn_to_phys(pfn); 751 u64 size = PAGE_SIZE * nr_pages; 752 void *virt = __hyp_va(phys); 753 enum kvm_pgtable_prot prot; 754 int ret; 755 756 host_lock_component(); 757 hyp_lock_component(); 758 759 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 760 if (ret) 761 goto unlock; 762 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 763 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 764 if (ret) 765 goto unlock; 766 } 767 768 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 769 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 770 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); 771 772 unlock: 773 hyp_unlock_component(); 774 host_unlock_component(); 775 776 return ret; 777 } 778 779 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) 780 { 781 u64 phys = hyp_pfn_to_phys(pfn); 782 u64 size = PAGE_SIZE * nr_pages; 783 u64 virt = (u64)__hyp_va(phys); 784 int ret; 785 786 host_lock_component(); 787 hyp_lock_component(); 788 789 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED); 790 if (ret) 791 goto unlock; 792 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 793 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); 794 if (ret) 795 goto unlock; 796 } 797 798 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 799 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); 800 801 unlock: 802 hyp_unlock_component(); 803 host_unlock_component(); 804 805 return ret; 806 } 807 808 int hyp_pin_shared_mem(void *from, void *to) 809 { 810 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 811 u64 end = PAGE_ALIGN((u64)to); 812 u64 size = end - start; 813 int ret; 814 815 host_lock_component(); 816 hyp_lock_component(); 817 818 ret = __host_check_page_state_range(__hyp_pa(start), size, 819 PKVM_PAGE_SHARED_OWNED); 820 if (ret) 821 goto unlock; 822 823 ret = __hyp_check_page_state_range(start, size, 824 PKVM_PAGE_SHARED_BORROWED); 825 if (ret) 826 goto unlock; 827 828 for (cur = start; cur < end; cur += PAGE_SIZE) 829 hyp_page_ref_inc(hyp_virt_to_page(cur)); 830 831 unlock: 832 hyp_unlock_component(); 833 host_unlock_component(); 834 835 return ret; 836 } 837 838 void hyp_unpin_shared_mem(void *from, void *to) 839 { 840 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 841 u64 end = PAGE_ALIGN((u64)to); 842 843 host_lock_component(); 844 hyp_lock_component(); 845 846 for (cur = start; cur < end; cur += PAGE_SIZE) 847 hyp_page_ref_dec(hyp_virt_to_page(cur)); 848 849 hyp_unlock_component(); 850 host_unlock_component(); 851 } 852 853 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) 854 { 855 u64 phys = hyp_pfn_to_phys(pfn); 856 u64 size = PAGE_SIZE * nr_pages; 857 int ret; 858 859 host_lock_component(); 860 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 861 if (!ret) 862 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 863 host_unlock_component(); 864 865 return ret; 866 } 867 868 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) 869 { 870 u64 phys = hyp_pfn_to_phys(pfn); 871 u64 size = PAGE_SIZE * nr_pages; 872 int ret; 873 874 host_lock_component(); 875 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 876 if (!ret) 877 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED); 878 host_unlock_component(); 879 880 return ret; 881 } 882 883 int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 884 enum kvm_pgtable_prot prot) 885 { 886 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 887 u64 phys = hyp_pfn_to_phys(pfn); 888 u64 ipa = hyp_pfn_to_phys(gfn); 889 struct hyp_page *page; 890 int ret; 891 892 if (prot & ~KVM_PGTABLE_PROT_RWX) 893 return -EINVAL; 894 895 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 896 if (ret) 897 return ret; 898 899 host_lock_component(); 900 guest_lock_component(vm); 901 902 ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); 903 if (ret) 904 goto unlock; 905 906 page = hyp_phys_to_page(phys); 907 switch (page->host_state) { 908 case PKVM_PAGE_OWNED: 909 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); 910 break; 911 case PKVM_PAGE_SHARED_OWNED: 912 if (page->host_share_guest_count) 913 break; 914 /* Only host to np-guest multi-sharing is tolerated */ 915 WARN_ON(1); 916 fallthrough; 917 default: 918 ret = -EPERM; 919 goto unlock; 920 } 921 922 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, 923 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), 924 &vcpu->vcpu.arch.pkvm_memcache, 0)); 925 page->host_share_guest_count++; 926 927 unlock: 928 guest_unlock_component(vm); 929 host_unlock_component(); 930 931 return ret; 932 } 933 934 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) 935 { 936 enum pkvm_page_state state; 937 struct hyp_page *page; 938 kvm_pte_t pte; 939 u64 phys; 940 s8 level; 941 int ret; 942 943 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); 944 if (ret) 945 return ret; 946 if (level != KVM_PGTABLE_LAST_LEVEL) 947 return -E2BIG; 948 if (!kvm_pte_valid(pte)) 949 return -ENOENT; 950 951 state = guest_get_page_state(pte, ipa); 952 if (state != PKVM_PAGE_SHARED_BORROWED) 953 return -EPERM; 954 955 phys = kvm_pte_to_phys(pte); 956 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 957 if (WARN_ON(ret)) 958 return ret; 959 960 page = hyp_phys_to_page(phys); 961 if (page->host_state != PKVM_PAGE_SHARED_OWNED) 962 return -EPERM; 963 if (WARN_ON(!page->host_share_guest_count)) 964 return -EINVAL; 965 966 *__phys = phys; 967 968 return 0; 969 } 970 971 int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) 972 { 973 u64 ipa = hyp_pfn_to_phys(gfn); 974 struct hyp_page *page; 975 u64 phys; 976 int ret; 977 978 host_lock_component(); 979 guest_lock_component(vm); 980 981 ret = __check_host_shared_guest(vm, &phys, ipa); 982 if (ret) 983 goto unlock; 984 985 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); 986 if (ret) 987 goto unlock; 988 989 page = hyp_phys_to_page(phys); 990 page->host_share_guest_count--; 991 if (!page->host_share_guest_count) 992 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); 993 994 unlock: 995 guest_unlock_component(vm); 996 host_unlock_component(); 997 998 return ret; 999 } 1000 1001 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) 1002 { 1003 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1004 u64 ipa = hyp_pfn_to_phys(gfn); 1005 u64 phys; 1006 int ret; 1007 1008 if (prot & ~KVM_PGTABLE_PROT_RWX) 1009 return -EINVAL; 1010 1011 host_lock_component(); 1012 guest_lock_component(vm); 1013 1014 ret = __check_host_shared_guest(vm, &phys, ipa); 1015 if (!ret) 1016 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); 1017 1018 guest_unlock_component(vm); 1019 host_unlock_component(); 1020 1021 return ret; 1022 } 1023 1024 int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1025 { 1026 u64 ipa = hyp_pfn_to_phys(gfn); 1027 u64 phys; 1028 int ret; 1029 1030 host_lock_component(); 1031 guest_lock_component(vm); 1032 1033 ret = __check_host_shared_guest(vm, &phys, ipa); 1034 if (!ret) 1035 ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); 1036 1037 guest_unlock_component(vm); 1038 host_unlock_component(); 1039 1040 return ret; 1041 } 1042 1043 int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) 1044 { 1045 u64 ipa = hyp_pfn_to_phys(gfn); 1046 u64 phys; 1047 int ret; 1048 1049 host_lock_component(); 1050 guest_lock_component(vm); 1051 1052 ret = __check_host_shared_guest(vm, &phys, ipa); 1053 if (!ret) 1054 ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); 1055 1056 guest_unlock_component(vm); 1057 host_unlock_component(); 1058 1059 return ret; 1060 } 1061 1062 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) 1063 { 1064 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1065 u64 ipa = hyp_pfn_to_phys(gfn); 1066 u64 phys; 1067 int ret; 1068 1069 host_lock_component(); 1070 guest_lock_component(vm); 1071 1072 ret = __check_host_shared_guest(vm, &phys, ipa); 1073 if (!ret) 1074 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); 1075 1076 guest_unlock_component(vm); 1077 host_unlock_component(); 1078 1079 return ret; 1080 } 1081