1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2020 Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/kvm_host.h> 8 #include <asm/kvm_emulate.h> 9 #include <asm/kvm_hyp.h> 10 #include <asm/kvm_mmu.h> 11 #include <asm/kvm_pgtable.h> 12 #include <asm/kvm_pkvm.h> 13 #include <asm/stage2_pgtable.h> 14 15 #include <hyp/fault.h> 16 17 #include <nvhe/gfp.h> 18 #include <nvhe/memory.h> 19 #include <nvhe/mem_protect.h> 20 #include <nvhe/mm.h> 21 22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) 23 24 struct host_mmu host_mmu; 25 26 static struct hyp_pool host_s2_pool; 27 28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); 29 #define current_vm (*this_cpu_ptr(&__current_vm)) 30 31 static void guest_lock_component(struct pkvm_hyp_vm *vm) 32 { 33 hyp_spin_lock(&vm->lock); 34 current_vm = vm; 35 } 36 37 static void guest_unlock_component(struct pkvm_hyp_vm *vm) 38 { 39 current_vm = NULL; 40 hyp_spin_unlock(&vm->lock); 41 } 42 43 static void host_lock_component(void) 44 { 45 hyp_spin_lock(&host_mmu.lock); 46 } 47 48 static void host_unlock_component(void) 49 { 50 hyp_spin_unlock(&host_mmu.lock); 51 } 52 53 static void hyp_lock_component(void) 54 { 55 hyp_spin_lock(&pkvm_pgd_lock); 56 } 57 58 static void hyp_unlock_component(void) 59 { 60 hyp_spin_unlock(&pkvm_pgd_lock); 61 } 62 63 static void *host_s2_zalloc_pages_exact(size_t size) 64 { 65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); 66 67 hyp_split_page(hyp_virt_to_page(addr)); 68 69 /* 70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE, 71 * so there should be no need to free any of the tail pages to make the 72 * allocation exact. 73 */ 74 WARN_ON(size != (PAGE_SIZE << get_order(size))); 75 76 return addr; 77 } 78 79 static void *host_s2_zalloc_page(void *pool) 80 { 81 return hyp_alloc_pages(pool, 0); 82 } 83 84 static void host_s2_get_page(void *addr) 85 { 86 hyp_get_page(&host_s2_pool, addr); 87 } 88 89 static void host_s2_put_page(void *addr) 90 { 91 hyp_put_page(&host_s2_pool, addr); 92 } 93 94 static void host_s2_free_unlinked_table(void *addr, s8 level) 95 { 96 kvm_pgtable_stage2_free_unlinked(&host_mmu.mm_ops, addr, level); 97 } 98 99 static int prepare_s2_pool(void *pgt_pool_base) 100 { 101 unsigned long nr_pages, pfn; 102 int ret; 103 104 pfn = hyp_virt_to_pfn(pgt_pool_base); 105 nr_pages = host_s2_pgtable_pages(); 106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); 107 if (ret) 108 return ret; 109 110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { 111 .zalloc_pages_exact = host_s2_zalloc_pages_exact, 112 .zalloc_page = host_s2_zalloc_page, 113 .free_unlinked_table = host_s2_free_unlinked_table, 114 .phys_to_virt = hyp_phys_to_virt, 115 .virt_to_phys = hyp_virt_to_phys, 116 .page_count = hyp_page_count, 117 .get_page = host_s2_get_page, 118 .put_page = host_s2_put_page, 119 }; 120 121 return 0; 122 } 123 124 static void prepare_host_vtcr(void) 125 { 126 u32 parange, phys_shift; 127 128 /* The host stage 2 is id-mapped, so use parange for T0SZ */ 129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); 130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); 131 132 host_mmu.arch.mmu.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, 133 id_aa64mmfr1_el1_sys_val, phys_shift); 134 } 135 136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); 137 138 int kvm_host_prepare_stage2(void *pgt_pool_base) 139 { 140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 141 int ret; 142 143 prepare_host_vtcr(); 144 hyp_spin_lock_init(&host_mmu.lock); 145 mmu->arch = &host_mmu.arch; 146 147 ret = prepare_s2_pool(pgt_pool_base); 148 if (ret) 149 return ret; 150 151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, 152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, 153 host_stage2_force_pte_cb); 154 if (ret) 155 return ret; 156 157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); 158 mmu->pgt = &host_mmu.pgt; 159 atomic64_set(&mmu->vmid.id, 0); 160 161 return 0; 162 } 163 164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end, 165 enum kvm_pgtable_prot prot) 166 { 167 return true; 168 } 169 170 static void *guest_s2_zalloc_pages_exact(size_t size) 171 { 172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); 173 174 WARN_ON(size != (PAGE_SIZE << get_order(size))); 175 hyp_split_page(hyp_virt_to_page(addr)); 176 177 return addr; 178 } 179 180 static void guest_s2_free_pages_exact(void *addr, unsigned long size) 181 { 182 u8 order = get_order(size); 183 unsigned int i; 184 185 for (i = 0; i < (1 << order); i++) 186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); 187 } 188 189 static void *guest_s2_zalloc_page(void *mc) 190 { 191 struct hyp_page *p; 192 void *addr; 193 194 addr = hyp_alloc_pages(¤t_vm->pool, 0); 195 if (addr) 196 return addr; 197 198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt); 199 if (!addr) 200 return addr; 201 202 memset(addr, 0, PAGE_SIZE); 203 p = hyp_virt_to_page(addr); 204 memset(p, 0, sizeof(*p)); 205 p->refcount = 1; 206 207 return addr; 208 } 209 210 static void guest_s2_get_page(void *addr) 211 { 212 hyp_get_page(¤t_vm->pool, addr); 213 } 214 215 static void guest_s2_put_page(void *addr) 216 { 217 hyp_put_page(¤t_vm->pool, addr); 218 } 219 220 static void clean_dcache_guest_page(void *va, size_t size) 221 { 222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 223 hyp_fixmap_unmap(); 224 } 225 226 static void invalidate_icache_guest_page(void *va, size_t size) 227 { 228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); 229 hyp_fixmap_unmap(); 230 } 231 232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) 233 { 234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; 235 unsigned long nr_pages; 236 int ret; 237 238 nr_pages = kvm_pgtable_stage2_pgd_size(mmu->vtcr) >> PAGE_SHIFT; 239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); 240 if (ret) 241 return ret; 242 243 hyp_spin_lock_init(&vm->lock); 244 vm->mm_ops = (struct kvm_pgtable_mm_ops) { 245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact, 246 .free_pages_exact = guest_s2_free_pages_exact, 247 .zalloc_page = guest_s2_zalloc_page, 248 .phys_to_virt = hyp_phys_to_virt, 249 .virt_to_phys = hyp_virt_to_phys, 250 .page_count = hyp_page_count, 251 .get_page = guest_s2_get_page, 252 .put_page = guest_s2_put_page, 253 .dcache_clean_inval_poc = clean_dcache_guest_page, 254 .icache_inval_pou = invalidate_icache_guest_page, 255 }; 256 257 guest_lock_component(vm); 258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, 259 guest_stage2_force_pte_cb); 260 guest_unlock_component(vm); 261 if (ret) 262 return ret; 263 264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); 265 266 return 0; 267 } 268 269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) 270 { 271 void *addr; 272 273 /* Dump all pgtable pages in the hyp_pool */ 274 guest_lock_component(vm); 275 kvm_pgtable_stage2_destroy(&vm->pgt); 276 vm->kvm.arch.mmu.pgd_phys = 0ULL; 277 guest_unlock_component(vm); 278 279 /* Drain the hyp_pool into the memcache */ 280 addr = hyp_alloc_pages(&vm->pool, 0); 281 while (addr) { 282 memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); 283 push_hyp_memcache(mc, addr, hyp_virt_to_phys); 284 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); 285 addr = hyp_alloc_pages(&vm->pool, 0); 286 } 287 } 288 289 int __pkvm_prot_finalize(void) 290 { 291 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; 292 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); 293 294 if (params->hcr_el2 & HCR_VM) 295 return -EPERM; 296 297 params->vttbr = kvm_get_vttbr(mmu); 298 params->vtcr = mmu->vtcr; 299 params->hcr_el2 |= HCR_VM; 300 301 /* 302 * The CMO below not only cleans the updated params to the 303 * PoC, but also provides the DSB that ensures ongoing 304 * page-table walks that have started before we trapped to EL2 305 * have completed. 306 */ 307 kvm_flush_dcache_to_poc(params, sizeof(*params)); 308 309 write_sysreg(params->hcr_el2, hcr_el2); 310 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); 311 312 /* 313 * Make sure to have an ISB before the TLB maintenance below but only 314 * when __load_stage2() doesn't include one already. 315 */ 316 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); 317 318 /* Invalidate stale HCR bits that may be cached in TLBs */ 319 __tlbi(vmalls12e1); 320 dsb(nsh); 321 isb(); 322 323 return 0; 324 } 325 326 static int host_stage2_unmap_dev_all(void) 327 { 328 struct kvm_pgtable *pgt = &host_mmu.pgt; 329 struct memblock_region *reg; 330 u64 addr = 0; 331 int i, ret; 332 333 /* Unmap all non-memory regions to recycle the pages */ 334 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { 335 reg = &hyp_memory[i]; 336 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); 337 if (ret) 338 return ret; 339 } 340 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); 341 } 342 343 struct kvm_mem_range { 344 u64 start; 345 u64 end; 346 }; 347 348 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) 349 { 350 int cur, left = 0, right = hyp_memblock_nr; 351 struct memblock_region *reg; 352 phys_addr_t end; 353 354 range->start = 0; 355 range->end = ULONG_MAX; 356 357 /* The list of memblock regions is sorted, binary search it */ 358 while (left < right) { 359 cur = (left + right) >> 1; 360 reg = &hyp_memory[cur]; 361 end = reg->base + reg->size; 362 if (addr < reg->base) { 363 right = cur; 364 range->end = reg->base; 365 } else if (addr >= end) { 366 left = cur + 1; 367 range->start = end; 368 } else { 369 range->start = reg->base; 370 range->end = end; 371 return reg; 372 } 373 } 374 375 return NULL; 376 } 377 378 bool addr_is_memory(phys_addr_t phys) 379 { 380 struct kvm_mem_range range; 381 382 return !!find_mem_range(phys, &range); 383 } 384 385 static bool addr_is_allowed_memory(phys_addr_t phys) 386 { 387 struct memblock_region *reg; 388 struct kvm_mem_range range; 389 390 reg = find_mem_range(phys, &range); 391 392 return reg && !(reg->flags & MEMBLOCK_NOMAP); 393 } 394 395 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) 396 { 397 return range->start <= addr && addr < range->end; 398 } 399 400 static bool range_is_memory(u64 start, u64 end) 401 { 402 struct kvm_mem_range r; 403 404 if (!find_mem_range(start, &r)) 405 return false; 406 407 return is_in_mem_range(end - 1, &r); 408 } 409 410 static inline int __host_stage2_idmap(u64 start, u64 end, 411 enum kvm_pgtable_prot prot) 412 { 413 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, 414 prot, &host_s2_pool, 0); 415 } 416 417 /* 418 * The pool has been provided with enough pages to cover all of memory with 419 * page granularity, but it is difficult to know how much of the MMIO range 420 * we will need to cover upfront, so we may need to 'recycle' the pages if we 421 * run out. 422 */ 423 #define host_stage2_try(fn, ...) \ 424 ({ \ 425 int __ret; \ 426 hyp_assert_lock_held(&host_mmu.lock); \ 427 __ret = fn(__VA_ARGS__); \ 428 if (__ret == -ENOMEM) { \ 429 __ret = host_stage2_unmap_dev_all(); \ 430 if (!__ret) \ 431 __ret = fn(__VA_ARGS__); \ 432 } \ 433 __ret; \ 434 }) 435 436 static inline bool range_included(struct kvm_mem_range *child, 437 struct kvm_mem_range *parent) 438 { 439 return parent->start <= child->start && child->end <= parent->end; 440 } 441 442 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) 443 { 444 struct kvm_mem_range cur; 445 kvm_pte_t pte; 446 s8 level; 447 int ret; 448 449 hyp_assert_lock_held(&host_mmu.lock); 450 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); 451 if (ret) 452 return ret; 453 454 if (kvm_pte_valid(pte)) 455 return -EAGAIN; 456 457 if (pte) 458 return -EPERM; 459 460 do { 461 u64 granule = kvm_granule_size(level); 462 cur.start = ALIGN_DOWN(addr, granule); 463 cur.end = cur.start + granule; 464 level++; 465 } while ((level <= KVM_PGTABLE_LAST_LEVEL) && 466 !(kvm_level_supports_block_mapping(level) && 467 range_included(&cur, range))); 468 469 *range = cur; 470 471 return 0; 472 } 473 474 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, 475 enum kvm_pgtable_prot prot) 476 { 477 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 478 } 479 480 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 481 { 482 return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, 483 addr, size, &host_s2_pool, owner_id); 484 } 485 486 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) 487 { 488 /* 489 * Block mappings must be used with care in the host stage-2 as a 490 * kvm_pgtable_stage2_map() operation targeting a page in the range of 491 * an existing block will delete the block under the assumption that 492 * mappings in the rest of the block range can always be rebuilt lazily. 493 * That assumption is correct for the host stage-2 with RWX mappings 494 * targeting memory or RW mappings targeting MMIO ranges (see 495 * host_stage2_idmap() below which implements some of the host memory 496 * abort logic). However, this is not safe for any other mappings where 497 * the host stage-2 page-table is in fact the only place where this 498 * state is stored. In all those cases, it is safer to use page-level 499 * mappings, hence avoiding to lose the state because of side-effects in 500 * kvm_pgtable_stage2_map(). 501 */ 502 if (range_is_memory(addr, end)) 503 return prot != PKVM_HOST_MEM_PROT; 504 else 505 return prot != PKVM_HOST_MMIO_PROT; 506 } 507 508 static int host_stage2_idmap(u64 addr) 509 { 510 struct kvm_mem_range range; 511 bool is_memory = !!find_mem_range(addr, &range); 512 enum kvm_pgtable_prot prot; 513 int ret; 514 515 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; 516 517 host_lock_component(); 518 ret = host_stage2_adjust_range(addr, &range); 519 if (ret) 520 goto unlock; 521 522 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); 523 unlock: 524 host_unlock_component(); 525 526 return ret; 527 } 528 529 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) 530 { 531 struct kvm_vcpu_fault_info fault; 532 u64 esr, addr; 533 int ret = 0; 534 535 esr = read_sysreg_el2(SYS_ESR); 536 BUG_ON(!__get_fault_info(esr, &fault)); 537 538 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; 539 ret = host_stage2_idmap(addr); 540 BUG_ON(ret && ret != -EAGAIN); 541 } 542 543 struct pkvm_mem_transition { 544 u64 nr_pages; 545 546 struct { 547 enum pkvm_component_id id; 548 /* Address in the initiator's address space */ 549 u64 addr; 550 551 union { 552 struct { 553 /* Address in the completer's address space */ 554 u64 completer_addr; 555 } host; 556 struct { 557 u64 completer_addr; 558 } hyp; 559 }; 560 } initiator; 561 562 struct { 563 enum pkvm_component_id id; 564 } completer; 565 }; 566 567 struct pkvm_mem_share { 568 const struct pkvm_mem_transition tx; 569 const enum kvm_pgtable_prot completer_prot; 570 }; 571 572 struct pkvm_mem_donation { 573 const struct pkvm_mem_transition tx; 574 }; 575 576 struct check_walk_data { 577 enum pkvm_page_state desired; 578 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); 579 }; 580 581 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, 582 enum kvm_pgtable_walk_flags visit) 583 { 584 struct check_walk_data *d = ctx->arg; 585 586 return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; 587 } 588 589 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, 590 struct check_walk_data *data) 591 { 592 struct kvm_pgtable_walker walker = { 593 .cb = __check_page_state_visitor, 594 .arg = data, 595 .flags = KVM_PGTABLE_WALK_LEAF, 596 }; 597 598 return kvm_pgtable_walk(pgt, addr, size, &walker); 599 } 600 601 static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) 602 { 603 if (!addr_is_allowed_memory(addr)) 604 return PKVM_NOPAGE; 605 606 if (!kvm_pte_valid(pte) && pte) 607 return PKVM_NOPAGE; 608 609 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); 610 } 611 612 static int __host_check_page_state_range(u64 addr, u64 size, 613 enum pkvm_page_state state) 614 { 615 struct check_walk_data d = { 616 .desired = state, 617 .get_page_state = host_get_page_state, 618 }; 619 620 hyp_assert_lock_held(&host_mmu.lock); 621 return check_page_state_range(&host_mmu.pgt, addr, size, &d); 622 } 623 624 static int __host_set_page_state_range(u64 addr, u64 size, 625 enum pkvm_page_state state) 626 { 627 enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); 628 629 return host_stage2_idmap_locked(addr, size, prot); 630 } 631 632 static int host_request_owned_transition(u64 *completer_addr, 633 const struct pkvm_mem_transition *tx) 634 { 635 u64 size = tx->nr_pages * PAGE_SIZE; 636 u64 addr = tx->initiator.addr; 637 638 *completer_addr = tx->initiator.host.completer_addr; 639 return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); 640 } 641 642 static int host_request_unshare(u64 *completer_addr, 643 const struct pkvm_mem_transition *tx) 644 { 645 u64 size = tx->nr_pages * PAGE_SIZE; 646 u64 addr = tx->initiator.addr; 647 648 *completer_addr = tx->initiator.host.completer_addr; 649 return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); 650 } 651 652 static int host_initiate_share(u64 *completer_addr, 653 const struct pkvm_mem_transition *tx) 654 { 655 u64 size = tx->nr_pages * PAGE_SIZE; 656 u64 addr = tx->initiator.addr; 657 658 *completer_addr = tx->initiator.host.completer_addr; 659 return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); 660 } 661 662 static int host_initiate_unshare(u64 *completer_addr, 663 const struct pkvm_mem_transition *tx) 664 { 665 u64 size = tx->nr_pages * PAGE_SIZE; 666 u64 addr = tx->initiator.addr; 667 668 *completer_addr = tx->initiator.host.completer_addr; 669 return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); 670 } 671 672 static int host_initiate_donation(u64 *completer_addr, 673 const struct pkvm_mem_transition *tx) 674 { 675 u8 owner_id = tx->completer.id; 676 u64 size = tx->nr_pages * PAGE_SIZE; 677 678 *completer_addr = tx->initiator.host.completer_addr; 679 return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); 680 } 681 682 static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) 683 { 684 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || 685 tx->initiator.id != PKVM_ID_HYP); 686 } 687 688 static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, 689 enum pkvm_page_state state) 690 { 691 u64 size = tx->nr_pages * PAGE_SIZE; 692 693 if (__host_ack_skip_pgtable_check(tx)) 694 return 0; 695 696 return __host_check_page_state_range(addr, size, state); 697 } 698 699 static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) 700 { 701 return __host_ack_transition(addr, tx, PKVM_NOPAGE); 702 } 703 704 static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) 705 { 706 u64 size = tx->nr_pages * PAGE_SIZE; 707 u8 host_id = tx->completer.id; 708 709 return host_stage2_set_owner_locked(addr, size, host_id); 710 } 711 712 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) 713 { 714 if (!kvm_pte_valid(pte)) 715 return PKVM_NOPAGE; 716 717 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); 718 } 719 720 static int __hyp_check_page_state_range(u64 addr, u64 size, 721 enum pkvm_page_state state) 722 { 723 struct check_walk_data d = { 724 .desired = state, 725 .get_page_state = hyp_get_page_state, 726 }; 727 728 hyp_assert_lock_held(&pkvm_pgd_lock); 729 return check_page_state_range(&pkvm_pgtable, addr, size, &d); 730 } 731 732 static int hyp_request_donation(u64 *completer_addr, 733 const struct pkvm_mem_transition *tx) 734 { 735 u64 size = tx->nr_pages * PAGE_SIZE; 736 u64 addr = tx->initiator.addr; 737 738 *completer_addr = tx->initiator.hyp.completer_addr; 739 return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); 740 } 741 742 static int hyp_initiate_donation(u64 *completer_addr, 743 const struct pkvm_mem_transition *tx) 744 { 745 u64 size = tx->nr_pages * PAGE_SIZE; 746 int ret; 747 748 *completer_addr = tx->initiator.hyp.completer_addr; 749 ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); 750 return (ret != size) ? -EFAULT : 0; 751 } 752 753 static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) 754 { 755 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || 756 tx->initiator.id != PKVM_ID_HOST); 757 } 758 759 static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, 760 enum kvm_pgtable_prot perms) 761 { 762 u64 size = tx->nr_pages * PAGE_SIZE; 763 764 if (perms != PAGE_HYP) 765 return -EPERM; 766 767 if (__hyp_ack_skip_pgtable_check(tx)) 768 return 0; 769 770 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); 771 } 772 773 static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) 774 { 775 u64 size = tx->nr_pages * PAGE_SIZE; 776 777 if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) 778 return -EBUSY; 779 780 if (__hyp_ack_skip_pgtable_check(tx)) 781 return 0; 782 783 return __hyp_check_page_state_range(addr, size, 784 PKVM_PAGE_SHARED_BORROWED); 785 } 786 787 static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) 788 { 789 u64 size = tx->nr_pages * PAGE_SIZE; 790 791 if (__hyp_ack_skip_pgtable_check(tx)) 792 return 0; 793 794 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); 795 } 796 797 static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, 798 enum kvm_pgtable_prot perms) 799 { 800 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); 801 enum kvm_pgtable_prot prot; 802 803 prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); 804 return pkvm_create_mappings_locked(start, end, prot); 805 } 806 807 static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) 808 { 809 u64 size = tx->nr_pages * PAGE_SIZE; 810 int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); 811 812 return (ret != size) ? -EFAULT : 0; 813 } 814 815 static int hyp_complete_donation(u64 addr, 816 const struct pkvm_mem_transition *tx) 817 { 818 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); 819 enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); 820 821 return pkvm_create_mappings_locked(start, end, prot); 822 } 823 824 static int check_share(struct pkvm_mem_share *share) 825 { 826 const struct pkvm_mem_transition *tx = &share->tx; 827 u64 completer_addr; 828 int ret; 829 830 switch (tx->initiator.id) { 831 case PKVM_ID_HOST: 832 ret = host_request_owned_transition(&completer_addr, tx); 833 break; 834 default: 835 ret = -EINVAL; 836 } 837 838 if (ret) 839 return ret; 840 841 switch (tx->completer.id) { 842 case PKVM_ID_HYP: 843 ret = hyp_ack_share(completer_addr, tx, share->completer_prot); 844 break; 845 case PKVM_ID_FFA: 846 /* 847 * We only check the host; the secure side will check the other 848 * end when we forward the FFA call. 849 */ 850 ret = 0; 851 break; 852 default: 853 ret = -EINVAL; 854 } 855 856 return ret; 857 } 858 859 static int __do_share(struct pkvm_mem_share *share) 860 { 861 const struct pkvm_mem_transition *tx = &share->tx; 862 u64 completer_addr; 863 int ret; 864 865 switch (tx->initiator.id) { 866 case PKVM_ID_HOST: 867 ret = host_initiate_share(&completer_addr, tx); 868 break; 869 default: 870 ret = -EINVAL; 871 } 872 873 if (ret) 874 return ret; 875 876 switch (tx->completer.id) { 877 case PKVM_ID_HYP: 878 ret = hyp_complete_share(completer_addr, tx, share->completer_prot); 879 break; 880 case PKVM_ID_FFA: 881 /* 882 * We're not responsible for any secure page-tables, so there's 883 * nothing to do here. 884 */ 885 ret = 0; 886 break; 887 default: 888 ret = -EINVAL; 889 } 890 891 return ret; 892 } 893 894 /* 895 * do_share(): 896 * 897 * The page owner grants access to another component with a given set 898 * of permissions. 899 * 900 * Initiator: OWNED => SHARED_OWNED 901 * Completer: NOPAGE => SHARED_BORROWED 902 */ 903 static int do_share(struct pkvm_mem_share *share) 904 { 905 int ret; 906 907 ret = check_share(share); 908 if (ret) 909 return ret; 910 911 return WARN_ON(__do_share(share)); 912 } 913 914 static int check_unshare(struct pkvm_mem_share *share) 915 { 916 const struct pkvm_mem_transition *tx = &share->tx; 917 u64 completer_addr; 918 int ret; 919 920 switch (tx->initiator.id) { 921 case PKVM_ID_HOST: 922 ret = host_request_unshare(&completer_addr, tx); 923 break; 924 default: 925 ret = -EINVAL; 926 } 927 928 if (ret) 929 return ret; 930 931 switch (tx->completer.id) { 932 case PKVM_ID_HYP: 933 ret = hyp_ack_unshare(completer_addr, tx); 934 break; 935 case PKVM_ID_FFA: 936 /* See check_share() */ 937 ret = 0; 938 break; 939 default: 940 ret = -EINVAL; 941 } 942 943 return ret; 944 } 945 946 static int __do_unshare(struct pkvm_mem_share *share) 947 { 948 const struct pkvm_mem_transition *tx = &share->tx; 949 u64 completer_addr; 950 int ret; 951 952 switch (tx->initiator.id) { 953 case PKVM_ID_HOST: 954 ret = host_initiate_unshare(&completer_addr, tx); 955 break; 956 default: 957 ret = -EINVAL; 958 } 959 960 if (ret) 961 return ret; 962 963 switch (tx->completer.id) { 964 case PKVM_ID_HYP: 965 ret = hyp_complete_unshare(completer_addr, tx); 966 break; 967 case PKVM_ID_FFA: 968 /* See __do_share() */ 969 ret = 0; 970 break; 971 default: 972 ret = -EINVAL; 973 } 974 975 return ret; 976 } 977 978 /* 979 * do_unshare(): 980 * 981 * The page owner revokes access from another component for a range of 982 * pages which were previously shared using do_share(). 983 * 984 * Initiator: SHARED_OWNED => OWNED 985 * Completer: SHARED_BORROWED => NOPAGE 986 */ 987 static int do_unshare(struct pkvm_mem_share *share) 988 { 989 int ret; 990 991 ret = check_unshare(share); 992 if (ret) 993 return ret; 994 995 return WARN_ON(__do_unshare(share)); 996 } 997 998 static int check_donation(struct pkvm_mem_donation *donation) 999 { 1000 const struct pkvm_mem_transition *tx = &donation->tx; 1001 u64 completer_addr; 1002 int ret; 1003 1004 switch (tx->initiator.id) { 1005 case PKVM_ID_HOST: 1006 ret = host_request_owned_transition(&completer_addr, tx); 1007 break; 1008 case PKVM_ID_HYP: 1009 ret = hyp_request_donation(&completer_addr, tx); 1010 break; 1011 default: 1012 ret = -EINVAL; 1013 } 1014 1015 if (ret) 1016 return ret; 1017 1018 switch (tx->completer.id) { 1019 case PKVM_ID_HOST: 1020 ret = host_ack_donation(completer_addr, tx); 1021 break; 1022 case PKVM_ID_HYP: 1023 ret = hyp_ack_donation(completer_addr, tx); 1024 break; 1025 default: 1026 ret = -EINVAL; 1027 } 1028 1029 return ret; 1030 } 1031 1032 static int __do_donate(struct pkvm_mem_donation *donation) 1033 { 1034 const struct pkvm_mem_transition *tx = &donation->tx; 1035 u64 completer_addr; 1036 int ret; 1037 1038 switch (tx->initiator.id) { 1039 case PKVM_ID_HOST: 1040 ret = host_initiate_donation(&completer_addr, tx); 1041 break; 1042 case PKVM_ID_HYP: 1043 ret = hyp_initiate_donation(&completer_addr, tx); 1044 break; 1045 default: 1046 ret = -EINVAL; 1047 } 1048 1049 if (ret) 1050 return ret; 1051 1052 switch (tx->completer.id) { 1053 case PKVM_ID_HOST: 1054 ret = host_complete_donation(completer_addr, tx); 1055 break; 1056 case PKVM_ID_HYP: 1057 ret = hyp_complete_donation(completer_addr, tx); 1058 break; 1059 default: 1060 ret = -EINVAL; 1061 } 1062 1063 return ret; 1064 } 1065 1066 /* 1067 * do_donate(): 1068 * 1069 * The page owner transfers ownership to another component, losing access 1070 * as a consequence. 1071 * 1072 * Initiator: OWNED => NOPAGE 1073 * Completer: NOPAGE => OWNED 1074 */ 1075 static int do_donate(struct pkvm_mem_donation *donation) 1076 { 1077 int ret; 1078 1079 ret = check_donation(donation); 1080 if (ret) 1081 return ret; 1082 1083 return WARN_ON(__do_donate(donation)); 1084 } 1085 1086 int __pkvm_host_share_hyp(u64 pfn) 1087 { 1088 int ret; 1089 u64 host_addr = hyp_pfn_to_phys(pfn); 1090 u64 hyp_addr = (u64)__hyp_va(host_addr); 1091 struct pkvm_mem_share share = { 1092 .tx = { 1093 .nr_pages = 1, 1094 .initiator = { 1095 .id = PKVM_ID_HOST, 1096 .addr = host_addr, 1097 .host = { 1098 .completer_addr = hyp_addr, 1099 }, 1100 }, 1101 .completer = { 1102 .id = PKVM_ID_HYP, 1103 }, 1104 }, 1105 .completer_prot = PAGE_HYP, 1106 }; 1107 1108 host_lock_component(); 1109 hyp_lock_component(); 1110 1111 ret = do_share(&share); 1112 1113 hyp_unlock_component(); 1114 host_unlock_component(); 1115 1116 return ret; 1117 } 1118 1119 int __pkvm_host_unshare_hyp(u64 pfn) 1120 { 1121 int ret; 1122 u64 host_addr = hyp_pfn_to_phys(pfn); 1123 u64 hyp_addr = (u64)__hyp_va(host_addr); 1124 struct pkvm_mem_share share = { 1125 .tx = { 1126 .nr_pages = 1, 1127 .initiator = { 1128 .id = PKVM_ID_HOST, 1129 .addr = host_addr, 1130 .host = { 1131 .completer_addr = hyp_addr, 1132 }, 1133 }, 1134 .completer = { 1135 .id = PKVM_ID_HYP, 1136 }, 1137 }, 1138 .completer_prot = PAGE_HYP, 1139 }; 1140 1141 host_lock_component(); 1142 hyp_lock_component(); 1143 1144 ret = do_unshare(&share); 1145 1146 hyp_unlock_component(); 1147 host_unlock_component(); 1148 1149 return ret; 1150 } 1151 1152 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) 1153 { 1154 int ret; 1155 u64 host_addr = hyp_pfn_to_phys(pfn); 1156 u64 hyp_addr = (u64)__hyp_va(host_addr); 1157 struct pkvm_mem_donation donation = { 1158 .tx = { 1159 .nr_pages = nr_pages, 1160 .initiator = { 1161 .id = PKVM_ID_HOST, 1162 .addr = host_addr, 1163 .host = { 1164 .completer_addr = hyp_addr, 1165 }, 1166 }, 1167 .completer = { 1168 .id = PKVM_ID_HYP, 1169 }, 1170 }, 1171 }; 1172 1173 host_lock_component(); 1174 hyp_lock_component(); 1175 1176 ret = do_donate(&donation); 1177 1178 hyp_unlock_component(); 1179 host_unlock_component(); 1180 1181 return ret; 1182 } 1183 1184 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) 1185 { 1186 int ret; 1187 u64 host_addr = hyp_pfn_to_phys(pfn); 1188 u64 hyp_addr = (u64)__hyp_va(host_addr); 1189 struct pkvm_mem_donation donation = { 1190 .tx = { 1191 .nr_pages = nr_pages, 1192 .initiator = { 1193 .id = PKVM_ID_HYP, 1194 .addr = hyp_addr, 1195 .hyp = { 1196 .completer_addr = host_addr, 1197 }, 1198 }, 1199 .completer = { 1200 .id = PKVM_ID_HOST, 1201 }, 1202 }, 1203 }; 1204 1205 host_lock_component(); 1206 hyp_lock_component(); 1207 1208 ret = do_donate(&donation); 1209 1210 hyp_unlock_component(); 1211 host_unlock_component(); 1212 1213 return ret; 1214 } 1215 1216 int hyp_pin_shared_mem(void *from, void *to) 1217 { 1218 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 1219 u64 end = PAGE_ALIGN((u64)to); 1220 u64 size = end - start; 1221 int ret; 1222 1223 host_lock_component(); 1224 hyp_lock_component(); 1225 1226 ret = __host_check_page_state_range(__hyp_pa(start), size, 1227 PKVM_PAGE_SHARED_OWNED); 1228 if (ret) 1229 goto unlock; 1230 1231 ret = __hyp_check_page_state_range(start, size, 1232 PKVM_PAGE_SHARED_BORROWED); 1233 if (ret) 1234 goto unlock; 1235 1236 for (cur = start; cur < end; cur += PAGE_SIZE) 1237 hyp_page_ref_inc(hyp_virt_to_page(cur)); 1238 1239 unlock: 1240 hyp_unlock_component(); 1241 host_unlock_component(); 1242 1243 return ret; 1244 } 1245 1246 void hyp_unpin_shared_mem(void *from, void *to) 1247 { 1248 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); 1249 u64 end = PAGE_ALIGN((u64)to); 1250 1251 host_lock_component(); 1252 hyp_lock_component(); 1253 1254 for (cur = start; cur < end; cur += PAGE_SIZE) 1255 hyp_page_ref_dec(hyp_virt_to_page(cur)); 1256 1257 hyp_unlock_component(); 1258 host_unlock_component(); 1259 } 1260 1261 int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) 1262 { 1263 int ret; 1264 struct pkvm_mem_share share = { 1265 .tx = { 1266 .nr_pages = nr_pages, 1267 .initiator = { 1268 .id = PKVM_ID_HOST, 1269 .addr = hyp_pfn_to_phys(pfn), 1270 }, 1271 .completer = { 1272 .id = PKVM_ID_FFA, 1273 }, 1274 }, 1275 }; 1276 1277 host_lock_component(); 1278 ret = do_share(&share); 1279 host_unlock_component(); 1280 1281 return ret; 1282 } 1283 1284 int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) 1285 { 1286 int ret; 1287 struct pkvm_mem_share share = { 1288 .tx = { 1289 .nr_pages = nr_pages, 1290 .initiator = { 1291 .id = PKVM_ID_HOST, 1292 .addr = hyp_pfn_to_phys(pfn), 1293 }, 1294 .completer = { 1295 .id = PKVM_ID_FFA, 1296 }, 1297 }, 1298 }; 1299 1300 host_lock_component(); 1301 ret = do_unshare(&share); 1302 host_unlock_component(); 1303 1304 return ret; 1305 } 1306