1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020 Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <asm/kvm_emulate.h> 9 #include <asm/kvm_hyp.h> 10 #include <asm/kvm_mmu.h> 11 #include <asm/kvm_pgtable.h> 12 #include <asm/kvm_pkvm.h> 13 #include <asm/stage2_pgtable.h> 14 15 #include <hyp/fault.h> 16 17 #include <nvhe/gfp.h> 18 #include <nvhe/memory.h> 19 #include <nvhe/mem_protect.h> 20 #include <nvhe/mm.h> 21 22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) 23 24 struct host_mmu host_mmu; 25 26 static struct hyp_pool host_s2_pool; 27 28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); 29 #define current_vm (*this_cpu_ptr(&__current_vm)) 30 31 static void guest_lock_component(struct pkvm_hyp_vm *vm) 32 { 33 hyp_spin_lock(&vm->lock); 34 current_vm = vm; 35 } 36 37 static void guest_unlock_component(struct pkvm_hyp_vm *vm) 38 { 39 current_vm = NULL; 40 hyp_spin_unlock(&vm->lock); 41 } 42 43 static void host_lock_component(void) 44 { 45 hyp_spin_lock(&host_mmu.lock); 46 } 47 48 static void host_unlock_component(void) 49 { 50 hyp_spin_unlock(&host_mmu.lock); 51 } 52 53 static void hyp_lock_component(void) 54 { 55 hyp_spin_lock(&pkvm_pgd_lock); 56 } 57 58 static void hyp_unlock_component(void) 59 { 60 hyp_spin_unlock(&pkvm_pgd_lock); 61 } 62 63 static void *host_s2_zalloc_pages_exact(size_t size) 64 { 65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); 66 67 hyp_split_page(hyp_virt_to_page(addr)); 68 69 /* 70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE, 71 * so there should be no need to free any of the tail pages to make the 72 * allocation exact. 73 */ 74 WARN_ON(size != (PAGE_SIZE << get_order(size))); 75 76 return addr; 77 } 78 79 static void *host_s2_zalloc_page(void *pool) 80 { 81 return hyp_alloc_pages(pool, 0); 82 } 83 84 static void host_s2_get_page(void *addr) 85 { 86 hyp_get_page(&host_s2_pool, addr); 87 } 88 89 static void host_s2_put_page(void *addr) 90 { 91 hyp_put_page(&host_s2_pool, addr); 92 } 93 94 static void host_s2_free_unlinked_table(void *addr, s8 level) 95 { 96 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level); 97 } 98 99 static int prepare_s2_pool(void *pgt_pool_base) 100 { 101 unsigned long nr_pages, pfn; 102 int ret; 103 104 pfn = hyp_virt_to_pfn(pgt_pool_base); 105 nr_pages = host_s2_pgtable_pages(); 106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); 107 if (ret) 108 return ret; 109 110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { 111 .zalloc_pages_exact = host_s2_zalloc_pages_exact, 112 .zalloc_page = host_s2_zalloc_page, 113 .free_unlinked_table = host_s2_free_unlinked_table, 114 .phys_to_virt = hyp_phys_to_virt, 115 .virt_to_phys = hyp_virt_to_phys, 116 .page_count = hyp_page_count, 117 .get_page = host_s2_get_page, 118 .put_page = host_s2_put_page, 119 }; 120 121 return 0; 122 } 123 124 static void prepare_host_vtcr(void) 125 { 126 u32 parange, phys_shift; 127 128 /* The host stage 2 is id-mapped, so use parange for T0SZ */ 129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); 130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); 131 132 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, 133 id_aa64mmfr1_el1_sys_val, phys_shift); 134 } 135 136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); 137 138 int kvm_host_prepare_stage2(void *pgt_pool_base) 139 { 140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 141 int ret; 142 143 prepare_host_vtcr(); 144 hyp_spin_lock_init(&host_mmu.lock); 145 mmu->arch = &host_mmu.arch; 146 147 ret = prepare_s2_pool(pgt_pool_base); 148 if (ret) 149 return ret; 150 151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, 152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, 153 host_stage2_force_pte_cb); 154 if (ret) 155 return ret; 156 157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); 158 mmu->pgt = &host_mmu.pgt; 159 atomic64_set(&mmu->vmid.id, 0); 160 161 return 0; 162 } 163 164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end, 165 enum kvm_pgtable_prot prot) 166 { 167 return true; 168 } 169 170 static void *guest_s2_zalloc_pages_exact(size_t size) 171 { 172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); 173 174 WARN_ON(size != (PAGE_SIZE << get_order(size))); 175 hyp_split_page(hyp_virt_to_page(addr)); 176 177 return addr; 178 } 179 180 static void guest_s2_free_pages_exact(void *addr, unsigned long size) 181 { 182 u8 order = get_order(size); 183 unsigned int i; 184 185 for (i = 0; i < (1 << order); i++) 186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); 187 } 188 189 static void *guest_s2_zalloc_page(void *mc) 190 { 191 struct hyp_page *p; 192 void *addr; 193 194 addr = hyp_alloc_pages(¤t_vm->pool, 0); 195 if (addr) 196 return addr; 197 198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt); 199 if (!addr) 200 return addr; 201 202 memset(addr, 0, PAGE_SIZE); 203 p = hyp_virt_to_page(addr); 204 p->refcount = 1; 205 p->order = 0; 206 207 return addr; 208 } 209 210 static void guest_s2_get_page(void *addr) 211 { 212 hyp_get_page(¤t_vm->pool, addr); 213 } 214 215 static void guest_s2_put_page(void *addr) 216 { 217 hyp_put_page(¤t_vm->pool, addr); 218 } 219 220 static void clean_dcache_guest_page(void *va, size_t size) 221 { 222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 223 hyp_fixmap_unmap(); 224 } 225 226 static void invalidate_icache_guest_page(void *va, size_t size) 227 { 228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 229 hyp_fixmap_unmap(); 230 } 231 232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) 233 { 234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; 235 unsigned long nr_pages; 236 int ret; 237 238 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; 239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); 240 if (ret) 241 return ret; 242 243 hyp_spin_lock_init(&vm->lock); 244 vm->mm_ops = (struct kvm_pgtable_mm_ops) { 245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact, 246 .free_pages_exact = guest_s2_free_pages_exact, 247 .zalloc_page = guest_s2_zalloc_page, 248 .phys_to_virt = hyp_phys_to_virt, 249 .virt_to_phys = hyp_virt_to_phys, 250 .page_count = hyp_page_count, 251 .get_page = guest_s2_get_page, 252 .put_page = guest_s2_put_page, 253 .dcache_clean_inval_poc = clean_dcache_guest_page, 254 .icache_inval_pou = invalidate_icache_guest_page, 255 }; 256 257 guest_lock_component(vm); 258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, 259 guest_stage2_force_pte_cb); 260 guest_unlock_component(vm); 261 if (ret) 262 return ret; 263 264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); 265 266 return 0; 267 } 268 269 void reclaim_pgtable_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) 270 { 271 struct hyp_page *page; 272 void *addr; 273 274 /* Dump all pgtable pages in the hyp_pool */ 275 guest_lock_component(vm); 276 kvm_pgtable_stage2_destroy(&vm->pgt); 277 vm->kvm.arch.mmu.pgd_phys = 0ULL; 278 guest_unlock_component(vm); 279 280 /* Drain the hyp_pool into the memcache */ 281 addr = hyp_alloc_pages(&vm->pool, 0); 282 while (addr) { 283 page = hyp_virt_to_page(addr); 284 page->refcount = 0; 285 page->order = 0; 286 push_hyp_memcache(mc, addr, hyp_virt_to_phys); 287 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); 288 addr = hyp_alloc_pages(&vm->pool, 0); 289 } 290 } 291 292 int __pkvm_prot_finalize(void) 293 { 294 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 295 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 296 297 if (params->hcr_el2 & HCR_VM) 298 return -EPERM; 299 300 params->vttbr = kvm_get_vttbr(mmu); 301 params->vtcr = mmu->vtcr; 302 params->hcr_el2 |= HCR_VM; 303 304 /* 305 * The CMO below not only cleans the updated params to the 306 * PoC, but also provides the DSB that ensures ongoing 307 * page-table walks that have started before we trapped to EL2 308 * have completed. 309 */ 310 kvm_flush_dcache_to_poc(params, sizeof(*params)); 311 312 write_sysreg(params->hcr_el2, hcr_el2); 313 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); 314 315 /* 316 * Make sure to have an ISB before the TLB maintenance below but only 317 * when __load_stage2() doesn't include one already. 318 */ 319 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); 320 321 /* Invalidate stale HCR bits that may be cached in TLBs */ 322 __tlbi(vmalls12e1); 323 dsb(nsh); 324 isb(); 325 326 return 0; 327 } 328 329 static int host_stage2_unmap_dev_all(void) 330 { 331 struct kvm_pgtable *pgt = &host_mmu.pgt; 332 struct memblock_region *reg; 333 u64 addr = 0; 334 int i, ret; 335 336 /* Unmap all non-memory regions to recycle the pages */ 337 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { 338 reg = &hyp_memory[i]; 339 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); 340 if (ret) 341 return ret; 342 } 343 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); 344 } 345 346 struct kvm_mem_range { 347 u64 start; 348 u64 end; 349 }; 350 351 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) 352 { 353 int cur, left = 0, right = hyp_memblock_nr; 354 struct memblock_region *reg; 355 phys_addr_t end; 356 357 range->start = 0; 358 range->end = ULONG_MAX; 359 360 /* The list of memblock regions is sorted, binary search it */ 361 while (left < right) { 362 cur = (left + right) >> 1; 363 reg = &hyp_memory[cur]; 364 end = reg->base + reg->size; 365 if (addr < reg->base) { 366 right = cur; 367 range->end = reg->base; 368 } else if (addr >= end) { 369 left = cur + 1; 370 range->start = end; 371 } else { 372 range->start = reg->base; 373 range->end = end; 374 return reg; 375 } 376 } 377 378 return NULL; 379 } 380 381 bool addr_is_memory(phys_addr_t phys) 382 { 383 struct kvm_mem_range range; 384 385 return !!find_mem_range(phys, &range); 386 } 387 388 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 389 { 390 return range->start <= addr && addr < range->end; 391 } 392 393 static int check_range_allowed_memory(u64 start, u64 end) 394 { 395 struct memblock_region *reg; 396 struct kvm_mem_range range; 397 398 /* 399 * Callers can't check the state of a range that overlaps memory and 400 * MMIO regions, so ensure [start, end[ is in the same kvm_mem_range. 401 */ 402 reg = find_mem_range(start, &range); 403 if (!is_in_mem_range(end - 1, &range)) 404 return -EINVAL; 405 406 if (!reg || reg->flags & MEMBLOCK_NOMAP) 407 return -EPERM; 408 409 return 0; 410 } 411 412 static bool range_is_memory(u64 start, u64 end) 413 { 414 struct kvm_mem_range r; 415 416 if (!find_mem_range(start, &r)) 417 return false; 418 419 return is_in_mem_range(end - 1, &r); 420 } 421 422 static inline int __host_stage2_idmap(u64 start, u64 end, 423 enum kvm_pgtable_prot prot) 424 { 425 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, 426 prot, &host_s2_pool, 0); 427 } 428 429 /* 430 * The pool has been provided with enough pages to cover all of memory with 431 * page granularity, but it is difficult to know how much of the MMIO range 432 * we will need to cover upfront, so we may need to 'recycle' the pages if we 433 * run out. 434 */ 435 #define host_stage2_try(fn, ...) \ 436 ({ \ 437 int __ret; \ 438 hyp_assert_lock_held(&host_mmu.lock); \ 439 __ret = fn(__VA_ARGS__); \ 440 if (__ret == -ENOMEM) { \ 441 __ret = host_stage2_unmap_dev_all(); \ 442 if (!__ret) \ 443 __ret = fn(__VA_ARGS__); \ 444 } \ 445 __ret; \ 446 }) 447 448 static inline bool range_included(struct kvm_mem_range *child, 449 struct kvm_mem_range *parent) 450 { 451 return parent->start <= child->start && child->end <= parent->end; 452 } 453 454 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) 455 { 456 struct kvm_mem_range cur; 457 kvm_pte_t pte; 458 s8 level; 459 int ret; 460 461 hyp_assert_lock_held(&host_mmu.lock); 462 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); 463 if (ret) 464 return ret; 465 466 if (kvm_pte_valid(pte)) 467 return -EAGAIN; 468 469 if (pte) { 470 WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE); 471 return -EPERM; 472 } 473 474 do { 475 u64 granule = kvm_granule_size(level); 476 cur.start = ALIGN_DOWN(addr, granule); 477 cur.end = cur.start + granule; 478 level++; 479 } while ((level <= KVM_PGTABLE_LAST_LEVEL) && 480 !(kvm_level_supports_block_mapping(level) && 481 range_included(&cur, range))); 482 483 *range = cur; 484 485 return 0; 486 } 487 488 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, 489 enum kvm_pgtable_prot prot) 490 { 491 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 492 } 493 494 static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state) 495 { 496 phys_addr_t end = addr + size; 497 498 for (; addr < end; addr += PAGE_SIZE) 499 hyp_phys_to_page(addr)->host_state = state; 500 } 501 502 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 503 { 504 int ret; 505 506 if (!addr_is_memory(addr)) 507 return -EPERM; 508 509 ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 510 addr, size, &host_s2_pool, owner_id); 511 if (ret) 512 return ret; 513 514 /* Don't forget to update the vmemmap tracking for the host */ 515 if (owner_id == PKVM_ID_HOST) 516 __host_update_page_state(addr, size, PKVM_PAGE_OWNED); 517 else 518 __host_update_page_state(addr, size, PKVM_NOPAGE); 519 520 return 0; 521 } 522 523 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) 524 { 525 /* 526 * Block mappings must be used with care in the host stage-2 as a 527 * kvm_pgtable_stage2_map() operation targeting a page in the range of 528 * an existing block will delete the block under the assumption that 529 * mappings in the rest of the block range can always be rebuilt lazily. 530 * That assumption is correct for the host stage-2 with RWX mappings 531 * targeting memory or RW mappings targeting MMIO ranges (see 532 * host_stage2_idmap() below which implements some of the host memory 533 * abort logic). However, this is not safe for any other mappings where 534 * the host stage-2 page-table is in fact the only place where this 535 * state is stored. In all those cases, it is safer to use page-level 536 * mappings, hence avoiding to lose the state because of side-effects in 537 * kvm_pgtable_stage2_map(). 538 */ 539 if (range_is_memory(addr, end)) 540 return prot != PKVM_HOST_MEM_PROT; 541 else 542 return prot != PKVM_HOST_MMIO_PROT; 543 } 544 545 static int host_stage2_idmap(u64 addr) 546 { 547 struct kvm_mem_range range; 548 bool is_memory = !!find_mem_range(addr, &range); 549 enum kvm_pgtable_prot prot; 550 int ret; 551 552 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; 553 554 host_lock_component(); 555 ret = host_stage2_adjust_range(addr, &range); 556 if (ret) 557 goto unlock; 558 559 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); 560 unlock: 561 host_unlock_component(); 562 563 return ret; 564 } 565 566 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) 567 { 568 struct kvm_vcpu_fault_info fault; 569 u64 esr, addr; 570 int ret = 0; 571 572 esr = read_sysreg_el2(SYS_ESR); 573 if (!__get_fault_info(esr, &fault)) { 574 /* 575 * We've presumably raced with a page-table change which caused 576 * AT to fail, try again. 577 */ 578 return; 579 } 580 581 582 /* 583 * Yikes, we couldn't resolve the fault IPA. This should reinject an 584 * abort into the host when we figure out how to do that. 585 */ 586 BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); 587 addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; 588 589 ret = host_stage2_idmap(addr); 590 BUG_ON(ret && ret != -EAGAIN); 591 } 592 593 struct check_walk_data { 594 enum pkvm_page_state desired; 595 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); 596 }; 597 598 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, 599 enum kvm_pgtable_walk_flags visit) 600 { 601 struct check_walk_data *d = ctx->arg; 602 603 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; 604 } 605 606 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, 607 struct check_walk_data *data) 608 { 609 struct kvm_pgtable_walker walker = { 610 .cb = __check_page_state_visitor, 611 .arg = data, 612 .flags = KVM_PGTABLE_WALK_LEAF, 613 }; 614 615 return kvm_pgtable_walk(pgt, addr, size, &walker); 616 } 617 618 static int __host_check_page_state_range(u64 addr, u64 size, 619 enum pkvm_page_state state) 620 { 621 u64 end = addr + size; 622 int ret; 623 624 ret = check_range_allowed_memory(addr, end); 625 if (ret) 626 return ret; 627 628 hyp_assert_lock_held(&host_mmu.lock); 629 for (; addr < end; addr += PAGE_SIZE) { 630 if (hyp_phys_to_page(addr)->host_state != state) 631 return -EPERM; 632 } 633 634 return 0; 635 } 636 637 static int __host_set_page_state_range(u64 addr, u64 size, 638 enum pkvm_page_state state) 639 { 640 if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) { 641 int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT); 642 643 if (ret) 644 return ret; 645 } 646 647 __host_update_page_state(addr, size, state); 648 649 return 0; 650 } 651 652 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) 653 { 654 if (!kvm_pte_valid(pte)) 655 return PKVM_NOPAGE; 656 657 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); 658 } 659 660 static int __hyp_check_page_state_range(u64 addr, u64 size, 661 enum pkvm_page_state state) 662 { 663 struct check_walk_data d = { 664 .desired = state, 665 .get_page_state = hyp_get_page_state, 666 }; 667 668 hyp_assert_lock_held(&pkvm_pgd_lock); 669 return check_page_state_range(&pkvm_pgtable, addr, size, &d); 670 } 671 672 static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr) 673 { 674 if (!kvm_pte_valid(pte)) 675 return PKVM_NOPAGE; 676 677 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 678 } 679 680 static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr, 681 u64 size, enum pkvm_page_state state) 682 { 683 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 684 struct check_walk_data d = { 685 .desired = state, 686 .get_page_state = guest_get_page_state, 687 }; 688 689 hyp_assert_lock_held(&vm->lock); 690 return check_page_state_range(&vm->pgt, addr, size, &d); 691 } 692 693 int __pkvm_host_share_hyp(u64 pfn) 694 { 695 u64 phys = hyp_pfn_to_phys(pfn); 696 void *virt = __hyp_va(phys); 697 enum kvm_pgtable_prot prot; 698 u64 size = PAGE_SIZE; 699 int ret; 700 701 host_lock_component(); 702 hyp_lock_component(); 703 704 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 705 if (ret) 706 goto unlock; 707 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 708 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 709 if (ret) 710 goto unlock; 711 } 712 713 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); 714 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 715 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED)); 716 717 unlock: 718 hyp_unlock_component(); 719 host_unlock_component(); 720 721 return ret; 722 } 723 724 int __pkvm_host_unshare_hyp(u64 pfn) 725 { 726 u64 phys = hyp_pfn_to_phys(pfn); 727 u64 virt = (u64)__hyp_va(phys); 728 u64 size = PAGE_SIZE; 729 int ret; 730 731 host_lock_component(); 732 hyp_lock_component(); 733 734 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 735 if (ret) 736 goto unlock; 737 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_SHARED_BORROWED); 738 if (ret) 739 goto unlock; 740 if (hyp_page_count((void *)virt)) { 741 ret = -EBUSY; 742 goto unlock; 743 } 744 745 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 746 WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_OWNED)); 747 748 unlock: 749 hyp_unlock_component(); 750 host_unlock_component(); 751 752 return ret; 753 } 754 755 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) 756 { 757 u64 phys = hyp_pfn_to_phys(pfn); 758 u64 size = PAGE_SIZE * nr_pages; 759 void *virt = __hyp_va(phys); 760 enum kvm_pgtable_prot prot; 761 int ret; 762 763 host_lock_component(); 764 hyp_lock_component(); 765 766 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 767 if (ret) 768 goto unlock; 769 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 770 ret = __hyp_check_page_state_range((u64)virt, size, PKVM_NOPAGE); 771 if (ret) 772 goto unlock; 773 } 774 775 prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 776 WARN_ON(pkvm_create_mappings_locked(virt, virt + size, prot)); 777 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HYP)); 778 779 unlock: 780 hyp_unlock_component(); 781 host_unlock_component(); 782 783 return ret; 784 } 785 786 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) 787 { 788 u64 phys = hyp_pfn_to_phys(pfn); 789 u64 size = PAGE_SIZE * nr_pages; 790 u64 virt = (u64)__hyp_va(phys); 791 int ret; 792 793 host_lock_component(); 794 hyp_lock_component(); 795 796 ret = __hyp_check_page_state_range(virt, size, PKVM_PAGE_OWNED); 797 if (ret) 798 goto unlock; 799 if (IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { 800 ret = __host_check_page_state_range(phys, size, PKVM_NOPAGE); 801 if (ret) 802 goto unlock; 803 } 804 805 WARN_ON(kvm_pgtable_hyp_unmap(&pkvm_pgtable, virt, size) != size); 806 WARN_ON(host_stage2_set_owner_locked(phys, size, PKVM_ID_HOST)); 807 808 unlock: 809 hyp_unlock_component(); 810 host_unlock_component(); 811 812 return ret; 813 } 814 815 int hyp_pin_shared_mem(void *from, void *to) 816 { 817 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 818 u64 end = PAGE_ALIGN((u64)to); 819 u64 size = end - start; 820 int ret; 821 822 host_lock_component(); 823 hyp_lock_component(); 824 825 ret = __host_check_page_state_range(__hyp_pa(start), size, 826 PKVM_PAGE_SHARED_OWNED); 827 if (ret) 828 goto unlock; 829 830 ret = __hyp_check_page_state_range(start, size, 831 PKVM_PAGE_SHARED_BORROWED); 832 if (ret) 833 goto unlock; 834 835 for (cur = start; cur < end; cur += PAGE_SIZE) 836 hyp_page_ref_inc(hyp_virt_to_page(cur)); 837 838 unlock: 839 hyp_unlock_component(); 840 host_unlock_component(); 841 842 return ret; 843 } 844 845 void hyp_unpin_shared_mem(void *from, void *to) 846 { 847 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 848 u64 end = PAGE_ALIGN((u64)to); 849 850 host_lock_component(); 851 hyp_lock_component(); 852 853 for (cur = start; cur < end; cur += PAGE_SIZE) 854 hyp_page_ref_dec(hyp_virt_to_page(cur)); 855 856 hyp_unlock_component(); 857 host_unlock_component(); 858 } 859 860 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) 861 { 862 u64 phys = hyp_pfn_to_phys(pfn); 863 u64 size = PAGE_SIZE * nr_pages; 864 int ret; 865 866 host_lock_component(); 867 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 868 if (!ret) 869 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 870 host_unlock_component(); 871 872 return ret; 873 } 874 875 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) 876 { 877 u64 phys = hyp_pfn_to_phys(pfn); 878 u64 size = PAGE_SIZE * nr_pages; 879 int ret; 880 881 host_lock_component(); 882 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); 883 if (!ret) 884 ret = __host_set_page_state_range(phys, size, PKVM_PAGE_OWNED); 885 host_unlock_component(); 886 887 return ret; 888 } 889 890 int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu, 891 enum kvm_pgtable_prot prot) 892 { 893 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 894 u64 phys = hyp_pfn_to_phys(pfn); 895 u64 ipa = hyp_pfn_to_phys(gfn); 896 struct hyp_page *page; 897 int ret; 898 899 if (prot & ~KVM_PGTABLE_PROT_RWX) 900 return -EINVAL; 901 902 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 903 if (ret) 904 return ret; 905 906 host_lock_component(); 907 guest_lock_component(vm); 908 909 ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE); 910 if (ret) 911 goto unlock; 912 913 page = hyp_phys_to_page(phys); 914 switch (page->host_state) { 915 case PKVM_PAGE_OWNED: 916 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED)); 917 break; 918 case PKVM_PAGE_SHARED_OWNED: 919 if (page->host_share_guest_count) 920 break; 921 /* Only host to np-guest multi-sharing is tolerated */ 922 WARN_ON(1); 923 fallthrough; 924 default: 925 ret = -EPERM; 926 goto unlock; 927 } 928 929 WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys, 930 pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED), 931 &vcpu->vcpu.arch.pkvm_memcache, 0)); 932 page->host_share_guest_count++; 933 934 unlock: 935 guest_unlock_component(vm); 936 host_unlock_component(); 937 938 return ret; 939 } 940 941 static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ipa) 942 { 943 enum pkvm_page_state state; 944 struct hyp_page *page; 945 kvm_pte_t pte; 946 u64 phys; 947 s8 level; 948 int ret; 949 950 ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); 951 if (ret) 952 return ret; 953 if (!kvm_pte_valid(pte)) 954 return -ENOENT; 955 if (level != KVM_PGTABLE_LAST_LEVEL) 956 return -E2BIG; 957 958 state = guest_get_page_state(pte, ipa); 959 if (state != PKVM_PAGE_SHARED_BORROWED) 960 return -EPERM; 961 962 phys = kvm_pte_to_phys(pte); 963 ret = check_range_allowed_memory(phys, phys + PAGE_SIZE); 964 if (WARN_ON(ret)) 965 return ret; 966 967 page = hyp_phys_to_page(phys); 968 if (page->host_state != PKVM_PAGE_SHARED_OWNED) 969 return -EPERM; 970 if (WARN_ON(!page->host_share_guest_count)) 971 return -EINVAL; 972 973 *__phys = phys; 974 975 return 0; 976 } 977 978 int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm) 979 { 980 u64 ipa = hyp_pfn_to_phys(gfn); 981 struct hyp_page *page; 982 u64 phys; 983 int ret; 984 985 host_lock_component(); 986 guest_lock_component(vm); 987 988 ret = __check_host_shared_guest(vm, &phys, ipa); 989 if (ret) 990 goto unlock; 991 992 ret = kvm_pgtable_stage2_unmap(&vm->pgt, ipa, PAGE_SIZE); 993 if (ret) 994 goto unlock; 995 996 page = hyp_phys_to_page(phys); 997 page->host_share_guest_count--; 998 if (!page->host_share_guest_count) 999 WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_OWNED)); 1000 1001 unlock: 1002 guest_unlock_component(vm); 1003 host_unlock_component(); 1004 1005 return ret; 1006 } 1007 1008 static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa) 1009 { 1010 u64 phys; 1011 int ret; 1012 1013 if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) 1014 return; 1015 1016 host_lock_component(); 1017 guest_lock_component(vm); 1018 1019 ret = __check_host_shared_guest(vm, &phys, ipa); 1020 1021 guest_unlock_component(vm); 1022 host_unlock_component(); 1023 1024 WARN_ON(ret && ret != -ENOENT); 1025 } 1026 1027 int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) 1028 { 1029 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1030 u64 ipa = hyp_pfn_to_phys(gfn); 1031 int ret; 1032 1033 if (pkvm_hyp_vm_is_protected(vm)) 1034 return -EPERM; 1035 1036 if (prot & ~KVM_PGTABLE_PROT_RWX) 1037 return -EINVAL; 1038 1039 assert_host_shared_guest(vm, ipa); 1040 guest_lock_component(vm); 1041 ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); 1042 guest_unlock_component(vm); 1043 1044 return ret; 1045 } 1046 1047 int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) 1048 { 1049 u64 ipa = hyp_pfn_to_phys(gfn); 1050 int ret; 1051 1052 if (pkvm_hyp_vm_is_protected(vm)) 1053 return -EPERM; 1054 1055 assert_host_shared_guest(vm, ipa); 1056 guest_lock_component(vm); 1057 ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); 1058 guest_unlock_component(vm); 1059 1060 return ret; 1061 } 1062 1063 int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) 1064 { 1065 u64 ipa = hyp_pfn_to_phys(gfn); 1066 int ret; 1067 1068 if (pkvm_hyp_vm_is_protected(vm)) 1069 return -EPERM; 1070 1071 assert_host_shared_guest(vm, ipa); 1072 guest_lock_component(vm); 1073 ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); 1074 guest_unlock_component(vm); 1075 1076 return ret; 1077 } 1078 1079 int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) 1080 { 1081 struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); 1082 u64 ipa = hyp_pfn_to_phys(gfn); 1083 1084 if (pkvm_hyp_vm_is_protected(vm)) 1085 return -EPERM; 1086 1087 assert_host_shared_guest(vm, ipa); 1088 guest_lock_component(vm); 1089 kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); 1090 guest_unlock_component(vm); 1091 1092 return 0; 1093 } 1094