1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. 4 * No bombay mix was harmed in the writing of this file. 5 * 6 * Copyright (C) 2020 Google LLC 7 * Author: Will Deacon <will@kernel.org> 8 */ 9 10 #include <linux/bitfield.h> 11 #include <asm/kvm_pgtable.h> 12 #include <asm/stage2_pgtable.h> 13 14 15 #define KVM_PTE_TYPE BIT(1) 16 #define KVM_PTE_TYPE_BLOCK 0 17 #define KVM_PTE_TYPE_PAGE 1 18 #define KVM_PTE_TYPE_TABLE 1 19 20 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) 21 22 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) 23 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) 24 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 25 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 26 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) 27 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 28 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) 29 30 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) 31 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) 32 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) 33 #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) 34 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 35 #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) 36 37 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) 38 39 #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) 40 41 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 42 43 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) 44 45 #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ 46 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ 47 KVM_PTE_LEAF_ATTR_HI_S2_XN) 48 49 #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) 50 #define KVM_MAX_OWNER_ID 1 51 52 /* 53 * Used to indicate a pte for which a 'break-before-make' sequence is in 54 * progress. 55 */ 56 #define KVM_INVALID_PTE_LOCKED BIT(10) 57 58 struct kvm_pgtable_walk_data { 59 struct kvm_pgtable_walker *walker; 60 61 u64 addr; 62 u64 end; 63 }; 64 65 static bool kvm_phys_is_valid(u64 phys) 66 { 67 return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); 68 } 69 70 static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) 71 { 72 u64 granule = kvm_granule_size(ctx->level); 73 74 if (!kvm_level_supports_block_mapping(ctx->level)) 75 return false; 76 77 if (granule > (ctx->end - ctx->addr)) 78 return false; 79 80 if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) 81 return false; 82 83 return IS_ALIGNED(ctx->addr, granule); 84 } 85 86 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) 87 { 88 u64 shift = kvm_granule_shift(level); 89 u64 mask = BIT(PAGE_SHIFT - 3) - 1; 90 91 return (data->addr >> shift) & mask; 92 } 93 94 static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) 95 { 96 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ 97 u64 mask = BIT(pgt->ia_bits) - 1; 98 99 return (addr & mask) >> shift; 100 } 101 102 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) 103 { 104 struct kvm_pgtable pgt = { 105 .ia_bits = ia_bits, 106 .start_level = start_level, 107 }; 108 109 return kvm_pgd_page_idx(&pgt, -1ULL) + 1; 110 } 111 112 static bool kvm_pte_table(kvm_pte_t pte, u32 level) 113 { 114 if (level == KVM_PGTABLE_MAX_LEVELS - 1) 115 return false; 116 117 if (!kvm_pte_valid(pte)) 118 return false; 119 120 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; 121 } 122 123 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) 124 { 125 return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); 126 } 127 128 static void kvm_clear_pte(kvm_pte_t *ptep) 129 { 130 WRITE_ONCE(*ptep, 0); 131 } 132 133 static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops) 134 { 135 kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); 136 137 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); 138 pte |= KVM_PTE_VALID; 139 return pte; 140 } 141 142 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) 143 { 144 kvm_pte_t pte = kvm_phys_to_pte(pa); 145 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : 146 KVM_PTE_TYPE_BLOCK; 147 148 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); 149 pte |= FIELD_PREP(KVM_PTE_TYPE, type); 150 pte |= KVM_PTE_VALID; 151 152 return pte; 153 } 154 155 static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) 156 { 157 return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); 158 } 159 160 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, 161 const struct kvm_pgtable_visit_ctx *ctx, 162 enum kvm_pgtable_walk_flags visit) 163 { 164 struct kvm_pgtable_walker *walker = data->walker; 165 166 /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ 167 WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); 168 return walker->cb(ctx, visit); 169 } 170 171 static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, 172 int r) 173 { 174 /* 175 * Visitor callbacks return EAGAIN when the conditions that led to a 176 * fault are no longer reflected in the page tables due to a race to 177 * update a PTE. In the context of a fault handler this is interpreted 178 * as a signal to retry guest execution. 179 * 180 * Ignore the return code altogether for walkers outside a fault handler 181 * (e.g. write protecting a range of memory) and chug along with the 182 * page table walk. 183 */ 184 if (r == -EAGAIN) 185 return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); 186 187 return !r; 188 } 189 190 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 191 struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); 192 193 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, 194 struct kvm_pgtable_mm_ops *mm_ops, 195 kvm_pteref_t pteref, u32 level) 196 { 197 enum kvm_pgtable_walk_flags flags = data->walker->flags; 198 kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); 199 struct kvm_pgtable_visit_ctx ctx = { 200 .ptep = ptep, 201 .old = READ_ONCE(*ptep), 202 .arg = data->walker->arg, 203 .mm_ops = mm_ops, 204 .addr = data->addr, 205 .end = data->end, 206 .level = level, 207 .flags = flags, 208 }; 209 int ret = 0; 210 kvm_pteref_t childp; 211 bool table = kvm_pte_table(ctx.old, level); 212 213 if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) 214 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); 215 216 if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { 217 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); 218 ctx.old = READ_ONCE(*ptep); 219 table = kvm_pte_table(ctx.old, level); 220 } 221 222 if (!kvm_pgtable_walk_continue(data->walker, ret)) 223 goto out; 224 225 if (!table) { 226 data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level)); 227 data->addr += kvm_granule_size(level); 228 goto out; 229 } 230 231 childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); 232 ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); 233 if (!kvm_pgtable_walk_continue(data->walker, ret)) 234 goto out; 235 236 if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) 237 ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); 238 239 out: 240 if (kvm_pgtable_walk_continue(data->walker, ret)) 241 return 0; 242 243 return ret; 244 } 245 246 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 247 struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level) 248 { 249 u32 idx; 250 int ret = 0; 251 252 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) 253 return -EINVAL; 254 255 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { 256 kvm_pteref_t pteref = &pgtable[idx]; 257 258 if (data->addr >= data->end) 259 break; 260 261 ret = __kvm_pgtable_visit(data, mm_ops, pteref, level); 262 if (ret) 263 break; 264 } 265 266 return ret; 267 } 268 269 static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data) 270 { 271 u32 idx; 272 int ret = 0; 273 u64 limit = BIT(pgt->ia_bits); 274 275 if (data->addr > limit || data->end > limit) 276 return -ERANGE; 277 278 if (!pgt->pgd) 279 return -EINVAL; 280 281 for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) { 282 kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE]; 283 284 ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level); 285 if (ret) 286 break; 287 } 288 289 return ret; 290 } 291 292 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, 293 struct kvm_pgtable_walker *walker) 294 { 295 struct kvm_pgtable_walk_data walk_data = { 296 .addr = ALIGN_DOWN(addr, PAGE_SIZE), 297 .end = PAGE_ALIGN(walk_data.addr + size), 298 .walker = walker, 299 }; 300 int r; 301 302 r = kvm_pgtable_walk_begin(walker); 303 if (r) 304 return r; 305 306 r = _kvm_pgtable_walk(pgt, &walk_data); 307 kvm_pgtable_walk_end(walker); 308 309 return r; 310 } 311 312 struct leaf_walk_data { 313 kvm_pte_t pte; 314 u32 level; 315 }; 316 317 static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, 318 enum kvm_pgtable_walk_flags visit) 319 { 320 struct leaf_walk_data *data = ctx->arg; 321 322 data->pte = ctx->old; 323 data->level = ctx->level; 324 325 return 0; 326 } 327 328 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, 329 kvm_pte_t *ptep, u32 *level) 330 { 331 struct leaf_walk_data data; 332 struct kvm_pgtable_walker walker = { 333 .cb = leaf_walker, 334 .flags = KVM_PGTABLE_WALK_LEAF, 335 .arg = &data, 336 }; 337 int ret; 338 339 ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE), 340 PAGE_SIZE, &walker); 341 if (!ret) { 342 if (ptep) 343 *ptep = data.pte; 344 if (level) 345 *level = data.level; 346 } 347 348 return ret; 349 } 350 351 struct hyp_map_data { 352 u64 phys; 353 kvm_pte_t attr; 354 }; 355 356 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) 357 { 358 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 359 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; 360 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); 361 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; 362 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : 363 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; 364 365 if (!(prot & KVM_PGTABLE_PROT_R)) 366 return -EINVAL; 367 368 if (prot & KVM_PGTABLE_PROT_X) { 369 if (prot & KVM_PGTABLE_PROT_W) 370 return -EINVAL; 371 372 if (device) 373 return -EINVAL; 374 } else { 375 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; 376 } 377 378 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); 379 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 380 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; 381 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; 382 *ptep = attr; 383 384 return 0; 385 } 386 387 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) 388 { 389 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; 390 u32 ap; 391 392 if (!kvm_pte_valid(pte)) 393 return prot; 394 395 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) 396 prot |= KVM_PGTABLE_PROT_X; 397 398 ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); 399 if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) 400 prot |= KVM_PGTABLE_PROT_R; 401 else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW) 402 prot |= KVM_PGTABLE_PROT_RW; 403 404 return prot; 405 } 406 407 static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, 408 struct hyp_map_data *data) 409 { 410 kvm_pte_t new; 411 u64 granule = kvm_granule_size(ctx->level), phys = data->phys; 412 413 if (!kvm_block_mapping_supported(ctx, phys)) 414 return false; 415 416 data->phys += granule; 417 new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); 418 if (ctx->old == new) 419 return true; 420 if (!kvm_pte_valid(ctx->old)) 421 ctx->mm_ops->get_page(ctx->ptep); 422 else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) 423 return false; 424 425 smp_store_release(ctx->ptep, new); 426 return true; 427 } 428 429 static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, 430 enum kvm_pgtable_walk_flags visit) 431 { 432 kvm_pte_t *childp, new; 433 struct hyp_map_data *data = ctx->arg; 434 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 435 436 if (hyp_map_walker_try_leaf(ctx, data)) 437 return 0; 438 439 if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) 440 return -EINVAL; 441 442 childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); 443 if (!childp) 444 return -ENOMEM; 445 446 new = kvm_init_table_pte(childp, mm_ops); 447 mm_ops->get_page(ctx->ptep); 448 smp_store_release(ctx->ptep, new); 449 450 return 0; 451 } 452 453 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 454 enum kvm_pgtable_prot prot) 455 { 456 int ret; 457 struct hyp_map_data map_data = { 458 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 459 }; 460 struct kvm_pgtable_walker walker = { 461 .cb = hyp_map_walker, 462 .flags = KVM_PGTABLE_WALK_LEAF, 463 .arg = &map_data, 464 }; 465 466 ret = hyp_set_prot_attr(prot, &map_data.attr); 467 if (ret) 468 return ret; 469 470 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 471 dsb(ishst); 472 isb(); 473 return ret; 474 } 475 476 static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, 477 enum kvm_pgtable_walk_flags visit) 478 { 479 kvm_pte_t *childp = NULL; 480 u64 granule = kvm_granule_size(ctx->level); 481 u64 *unmapped = ctx->arg; 482 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 483 484 if (!kvm_pte_valid(ctx->old)) 485 return -EINVAL; 486 487 if (kvm_pte_table(ctx->old, ctx->level)) { 488 childp = kvm_pte_follow(ctx->old, mm_ops); 489 490 if (mm_ops->page_count(childp) != 1) 491 return 0; 492 493 kvm_clear_pte(ctx->ptep); 494 dsb(ishst); 495 __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); 496 } else { 497 if (ctx->end - ctx->addr < granule) 498 return -EINVAL; 499 500 kvm_clear_pte(ctx->ptep); 501 dsb(ishst); 502 __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); 503 *unmapped += granule; 504 } 505 506 dsb(ish); 507 isb(); 508 mm_ops->put_page(ctx->ptep); 509 510 if (childp) 511 mm_ops->put_page(childp); 512 513 return 0; 514 } 515 516 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 517 { 518 u64 unmapped = 0; 519 struct kvm_pgtable_walker walker = { 520 .cb = hyp_unmap_walker, 521 .arg = &unmapped, 522 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 523 }; 524 525 if (!pgt->mm_ops->page_count) 526 return 0; 527 528 kvm_pgtable_walk(pgt, addr, size, &walker); 529 return unmapped; 530 } 531 532 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, 533 struct kvm_pgtable_mm_ops *mm_ops) 534 { 535 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); 536 537 pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); 538 if (!pgt->pgd) 539 return -ENOMEM; 540 541 pgt->ia_bits = va_bits; 542 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; 543 pgt->mm_ops = mm_ops; 544 pgt->mmu = NULL; 545 pgt->force_pte_cb = NULL; 546 547 return 0; 548 } 549 550 static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, 551 enum kvm_pgtable_walk_flags visit) 552 { 553 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 554 555 if (!kvm_pte_valid(ctx->old)) 556 return 0; 557 558 mm_ops->put_page(ctx->ptep); 559 560 if (kvm_pte_table(ctx->old, ctx->level)) 561 mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); 562 563 return 0; 564 } 565 566 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) 567 { 568 struct kvm_pgtable_walker walker = { 569 .cb = hyp_free_walker, 570 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 571 }; 572 573 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 574 pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd)); 575 pgt->pgd = NULL; 576 } 577 578 struct stage2_map_data { 579 u64 phys; 580 kvm_pte_t attr; 581 u8 owner_id; 582 583 kvm_pte_t *anchor; 584 kvm_pte_t *childp; 585 586 struct kvm_s2_mmu *mmu; 587 void *memcache; 588 589 /* Force mappings to page granularity */ 590 bool force_pte; 591 }; 592 593 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) 594 { 595 u64 vtcr = VTCR_EL2_FLAGS; 596 u8 lvls; 597 598 vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; 599 vtcr |= VTCR_EL2_T0SZ(phys_shift); 600 /* 601 * Use a minimum 2 level page table to prevent splitting 602 * host PMD huge pages at stage2. 603 */ 604 lvls = stage2_pgtable_levels(phys_shift); 605 if (lvls < 2) 606 lvls = 2; 607 vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); 608 609 #ifdef CONFIG_ARM64_HW_AFDBM 610 /* 611 * Enable the Hardware Access Flag management, unconditionally 612 * on all CPUs. The features is RES0 on CPUs without the support 613 * and must be ignored by the CPUs. 614 */ 615 vtcr |= VTCR_EL2_HA; 616 #endif /* CONFIG_ARM64_HW_AFDBM */ 617 618 /* Set the vmid bits */ 619 vtcr |= (get_vmid_bits(mmfr1) == 16) ? 620 VTCR_EL2_VS_16BIT : 621 VTCR_EL2_VS_8BIT; 622 623 return vtcr; 624 } 625 626 static bool stage2_has_fwb(struct kvm_pgtable *pgt) 627 { 628 if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 629 return false; 630 631 return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); 632 } 633 634 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) 635 636 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, 637 kvm_pte_t *ptep) 638 { 639 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 640 kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) : 641 KVM_S2_MEMATTR(pgt, NORMAL); 642 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; 643 644 if (!(prot & KVM_PGTABLE_PROT_X)) 645 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 646 else if (device) 647 return -EINVAL; 648 649 if (prot & KVM_PGTABLE_PROT_R) 650 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 651 652 if (prot & KVM_PGTABLE_PROT_W) 653 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 654 655 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 656 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 657 attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; 658 *ptep = attr; 659 660 return 0; 661 } 662 663 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) 664 { 665 enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; 666 667 if (!kvm_pte_valid(pte)) 668 return prot; 669 670 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R) 671 prot |= KVM_PGTABLE_PROT_R; 672 if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) 673 prot |= KVM_PGTABLE_PROT_W; 674 if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) 675 prot |= KVM_PGTABLE_PROT_X; 676 677 return prot; 678 } 679 680 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) 681 { 682 if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) 683 return true; 684 685 return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)); 686 } 687 688 static bool stage2_pte_is_counted(kvm_pte_t pte) 689 { 690 /* 691 * The refcount tracks valid entries as well as invalid entries if they 692 * encode ownership of a page to another entity than the page-table 693 * owner, whose id is 0. 694 */ 695 return !!pte; 696 } 697 698 static bool stage2_pte_is_locked(kvm_pte_t pte) 699 { 700 return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); 701 } 702 703 static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) 704 { 705 if (!kvm_pgtable_walk_shared(ctx)) { 706 WRITE_ONCE(*ctx->ptep, new); 707 return true; 708 } 709 710 return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; 711 } 712 713 /** 714 * stage2_try_break_pte() - Invalidates a pte according to the 715 * 'break-before-make' requirements of the 716 * architecture. 717 * 718 * @ctx: context of the visited pte. 719 * @mmu: stage-2 mmu 720 * 721 * Returns: true if the pte was successfully broken. 722 * 723 * If the removed pte was valid, performs the necessary serialization and TLB 724 * invalidation for the old value. For counted ptes, drops the reference count 725 * on the containing table page. 726 */ 727 static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, 728 struct kvm_s2_mmu *mmu) 729 { 730 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 731 732 if (stage2_pte_is_locked(ctx->old)) { 733 /* 734 * Should never occur if this walker has exclusive access to the 735 * page tables. 736 */ 737 WARN_ON(!kvm_pgtable_walk_shared(ctx)); 738 return false; 739 } 740 741 if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED)) 742 return false; 743 744 /* 745 * Perform the appropriate TLB invalidation based on the evicted pte 746 * value (if any). 747 */ 748 if (kvm_pte_table(ctx->old, ctx->level)) 749 kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); 750 else if (kvm_pte_valid(ctx->old)) 751 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); 752 753 if (stage2_pte_is_counted(ctx->old)) 754 mm_ops->put_page(ctx->ptep); 755 756 return true; 757 } 758 759 static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) 760 { 761 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 762 763 WARN_ON(!stage2_pte_is_locked(*ctx->ptep)); 764 765 if (stage2_pte_is_counted(new)) 766 mm_ops->get_page(ctx->ptep); 767 768 smp_store_release(ctx->ptep, new); 769 } 770 771 static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, 772 struct kvm_pgtable_mm_ops *mm_ops) 773 { 774 /* 775 * Clear the existing PTE, and perform break-before-make with 776 * TLB maintenance if it was valid. 777 */ 778 if (kvm_pte_valid(ctx->old)) { 779 kvm_clear_pte(ctx->ptep); 780 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); 781 } 782 783 mm_ops->put_page(ctx->ptep); 784 } 785 786 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) 787 { 788 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; 789 return memattr == KVM_S2_MEMATTR(pgt, NORMAL); 790 } 791 792 static bool stage2_pte_executable(kvm_pte_t pte) 793 { 794 return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); 795 } 796 797 static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, 798 struct stage2_map_data *data) 799 { 800 if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) 801 return false; 802 803 return kvm_block_mapping_supported(ctx, data->phys); 804 } 805 806 static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, 807 struct stage2_map_data *data) 808 { 809 kvm_pte_t new; 810 u64 granule = kvm_granule_size(ctx->level), phys = data->phys; 811 struct kvm_pgtable *pgt = data->mmu->pgt; 812 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 813 814 if (!stage2_leaf_mapping_allowed(ctx, data)) 815 return -E2BIG; 816 817 if (kvm_phys_is_valid(phys)) 818 new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); 819 else 820 new = kvm_init_invalid_leaf_owner(data->owner_id); 821 822 /* 823 * Skip updating the PTE if we are trying to recreate the exact 824 * same mapping or only change the access permissions. Instead, 825 * the vCPU will exit one more time from guest if still needed 826 * and then go through the path of relaxing permissions. 827 */ 828 if (!stage2_pte_needs_update(ctx->old, new)) 829 return -EAGAIN; 830 831 if (!stage2_try_break_pte(ctx, data->mmu)) 832 return -EAGAIN; 833 834 /* Perform CMOs before installation of the guest stage-2 PTE */ 835 if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) 836 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), 837 granule); 838 839 if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) 840 mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); 841 842 stage2_make_pte(ctx, new); 843 844 if (kvm_phys_is_valid(phys)) 845 data->phys += granule; 846 return 0; 847 } 848 849 static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, 850 struct stage2_map_data *data) 851 { 852 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 853 kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); 854 int ret; 855 856 if (!stage2_leaf_mapping_allowed(ctx, data)) 857 return 0; 858 859 ret = stage2_map_walker_try_leaf(ctx, data); 860 if (ret) 861 return ret; 862 863 mm_ops->free_removed_table(childp, ctx->level); 864 return 0; 865 } 866 867 static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, 868 struct stage2_map_data *data) 869 { 870 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 871 kvm_pte_t *childp, new; 872 int ret; 873 874 ret = stage2_map_walker_try_leaf(ctx, data); 875 if (ret != -E2BIG) 876 return ret; 877 878 if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) 879 return -EINVAL; 880 881 if (!data->memcache) 882 return -ENOMEM; 883 884 childp = mm_ops->zalloc_page(data->memcache); 885 if (!childp) 886 return -ENOMEM; 887 888 if (!stage2_try_break_pte(ctx, data->mmu)) { 889 mm_ops->put_page(childp); 890 return -EAGAIN; 891 } 892 893 /* 894 * If we've run into an existing block mapping then replace it with 895 * a table. Accesses beyond 'end' that fall within the new table 896 * will be mapped lazily. 897 */ 898 new = kvm_init_table_pte(childp, mm_ops); 899 stage2_make_pte(ctx, new); 900 901 return 0; 902 } 903 904 /* 905 * The TABLE_PRE callback runs for table entries on the way down, looking 906 * for table entries which we could conceivably replace with a block entry 907 * for this mapping. If it finds one it replaces the entry and calls 908 * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table. 909 * 910 * Otherwise, the LEAF callback performs the mapping at the existing leaves 911 * instead. 912 */ 913 static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, 914 enum kvm_pgtable_walk_flags visit) 915 { 916 struct stage2_map_data *data = ctx->arg; 917 918 switch (visit) { 919 case KVM_PGTABLE_WALK_TABLE_PRE: 920 return stage2_map_walk_table_pre(ctx, data); 921 case KVM_PGTABLE_WALK_LEAF: 922 return stage2_map_walk_leaf(ctx, data); 923 default: 924 return -EINVAL; 925 } 926 } 927 928 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 929 u64 phys, enum kvm_pgtable_prot prot, 930 void *mc, enum kvm_pgtable_walk_flags flags) 931 { 932 int ret; 933 struct stage2_map_data map_data = { 934 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 935 .mmu = pgt->mmu, 936 .memcache = mc, 937 .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), 938 }; 939 struct kvm_pgtable_walker walker = { 940 .cb = stage2_map_walker, 941 .flags = flags | 942 KVM_PGTABLE_WALK_TABLE_PRE | 943 KVM_PGTABLE_WALK_LEAF, 944 .arg = &map_data, 945 }; 946 947 if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys))) 948 return -EINVAL; 949 950 ret = stage2_set_prot_attr(pgt, prot, &map_data.attr); 951 if (ret) 952 return ret; 953 954 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 955 dsb(ishst); 956 return ret; 957 } 958 959 int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, 960 void *mc, u8 owner_id) 961 { 962 int ret; 963 struct stage2_map_data map_data = { 964 .phys = KVM_PHYS_INVALID, 965 .mmu = pgt->mmu, 966 .memcache = mc, 967 .owner_id = owner_id, 968 .force_pte = true, 969 }; 970 struct kvm_pgtable_walker walker = { 971 .cb = stage2_map_walker, 972 .flags = KVM_PGTABLE_WALK_TABLE_PRE | 973 KVM_PGTABLE_WALK_LEAF, 974 .arg = &map_data, 975 }; 976 977 if (owner_id > KVM_MAX_OWNER_ID) 978 return -EINVAL; 979 980 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 981 return ret; 982 } 983 984 static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, 985 enum kvm_pgtable_walk_flags visit) 986 { 987 struct kvm_pgtable *pgt = ctx->arg; 988 struct kvm_s2_mmu *mmu = pgt->mmu; 989 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 990 kvm_pte_t *childp = NULL; 991 bool need_flush = false; 992 993 if (!kvm_pte_valid(ctx->old)) { 994 if (stage2_pte_is_counted(ctx->old)) { 995 kvm_clear_pte(ctx->ptep); 996 mm_ops->put_page(ctx->ptep); 997 } 998 return 0; 999 } 1000 1001 if (kvm_pte_table(ctx->old, ctx->level)) { 1002 childp = kvm_pte_follow(ctx->old, mm_ops); 1003 1004 if (mm_ops->page_count(childp) != 1) 1005 return 0; 1006 } else if (stage2_pte_cacheable(pgt, ctx->old)) { 1007 need_flush = !stage2_has_fwb(pgt); 1008 } 1009 1010 /* 1011 * This is similar to the map() path in that we unmap the entire 1012 * block entry and rely on the remaining portions being faulted 1013 * back lazily. 1014 */ 1015 stage2_put_pte(ctx, mmu, mm_ops); 1016 1017 if (need_flush && mm_ops->dcache_clean_inval_poc) 1018 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), 1019 kvm_granule_size(ctx->level)); 1020 1021 if (childp) 1022 mm_ops->put_page(childp); 1023 1024 return 0; 1025 } 1026 1027 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 1028 { 1029 struct kvm_pgtable_walker walker = { 1030 .cb = stage2_unmap_walker, 1031 .arg = pgt, 1032 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 1033 }; 1034 1035 return kvm_pgtable_walk(pgt, addr, size, &walker); 1036 } 1037 1038 struct stage2_attr_data { 1039 kvm_pte_t attr_set; 1040 kvm_pte_t attr_clr; 1041 kvm_pte_t pte; 1042 u32 level; 1043 }; 1044 1045 static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, 1046 enum kvm_pgtable_walk_flags visit) 1047 { 1048 kvm_pte_t pte = ctx->old; 1049 struct stage2_attr_data *data = ctx->arg; 1050 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 1051 1052 if (!kvm_pte_valid(ctx->old)) 1053 return -EAGAIN; 1054 1055 data->level = ctx->level; 1056 data->pte = pte; 1057 pte &= ~data->attr_clr; 1058 pte |= data->attr_set; 1059 1060 /* 1061 * We may race with the CPU trying to set the access flag here, 1062 * but worst-case the access flag update gets lost and will be 1063 * set on the next access instead. 1064 */ 1065 if (data->pte != pte) { 1066 /* 1067 * Invalidate instruction cache before updating the guest 1068 * stage-2 PTE if we are going to add executable permission. 1069 */ 1070 if (mm_ops->icache_inval_pou && 1071 stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) 1072 mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), 1073 kvm_granule_size(ctx->level)); 1074 1075 if (!stage2_try_set_pte(ctx, pte)) 1076 return -EAGAIN; 1077 } 1078 1079 return 0; 1080 } 1081 1082 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, 1083 u64 size, kvm_pte_t attr_set, 1084 kvm_pte_t attr_clr, kvm_pte_t *orig_pte, 1085 u32 *level, enum kvm_pgtable_walk_flags flags) 1086 { 1087 int ret; 1088 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; 1089 struct stage2_attr_data data = { 1090 .attr_set = attr_set & attr_mask, 1091 .attr_clr = attr_clr & attr_mask, 1092 }; 1093 struct kvm_pgtable_walker walker = { 1094 .cb = stage2_attr_walker, 1095 .arg = &data, 1096 .flags = flags | KVM_PGTABLE_WALK_LEAF, 1097 }; 1098 1099 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 1100 if (ret) 1101 return ret; 1102 1103 if (orig_pte) 1104 *orig_pte = data.pte; 1105 1106 if (level) 1107 *level = data.level; 1108 return 0; 1109 } 1110 1111 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 1112 { 1113 return stage2_update_leaf_attrs(pgt, addr, size, 0, 1114 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 1115 NULL, NULL, 0); 1116 } 1117 1118 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 1119 { 1120 kvm_pte_t pte = 0; 1121 int ret; 1122 1123 ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 1124 &pte, NULL, 1125 KVM_PGTABLE_WALK_HANDLE_FAULT | 1126 KVM_PGTABLE_WALK_SHARED); 1127 if (!ret) 1128 dsb(ishst); 1129 1130 return pte; 1131 } 1132 1133 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) 1134 { 1135 kvm_pte_t pte = 0; 1136 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, 1137 &pte, NULL, 0); 1138 /* 1139 * "But where's the TLBI?!", you scream. 1140 * "Over in the core code", I sigh. 1141 * 1142 * See the '->clear_flush_young()' callback on the KVM mmu notifier. 1143 */ 1144 return pte; 1145 } 1146 1147 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) 1148 { 1149 kvm_pte_t pte = 0; 1150 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0); 1151 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; 1152 } 1153 1154 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 1155 enum kvm_pgtable_prot prot) 1156 { 1157 int ret; 1158 u32 level; 1159 kvm_pte_t set = 0, clr = 0; 1160 1161 if (prot & KVM_PTE_LEAF_ATTR_HI_SW) 1162 return -EINVAL; 1163 1164 if (prot & KVM_PGTABLE_PROT_R) 1165 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 1166 1167 if (prot & KVM_PGTABLE_PROT_W) 1168 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 1169 1170 if (prot & KVM_PGTABLE_PROT_X) 1171 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 1172 1173 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, 1174 KVM_PGTABLE_WALK_HANDLE_FAULT | 1175 KVM_PGTABLE_WALK_SHARED); 1176 if (!ret) 1177 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); 1178 return ret; 1179 } 1180 1181 static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, 1182 enum kvm_pgtable_walk_flags visit) 1183 { 1184 struct kvm_pgtable *pgt = ctx->arg; 1185 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; 1186 1187 if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) 1188 return 0; 1189 1190 if (mm_ops->dcache_clean_inval_poc) 1191 mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), 1192 kvm_granule_size(ctx->level)); 1193 return 0; 1194 } 1195 1196 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 1197 { 1198 struct kvm_pgtable_walker walker = { 1199 .cb = stage2_flush_walker, 1200 .flags = KVM_PGTABLE_WALK_LEAF, 1201 .arg = pgt, 1202 }; 1203 1204 if (stage2_has_fwb(pgt)) 1205 return 0; 1206 1207 return kvm_pgtable_walk(pgt, addr, size, &walker); 1208 } 1209 1210 1211 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 1212 struct kvm_pgtable_mm_ops *mm_ops, 1213 enum kvm_pgtable_stage2_flags flags, 1214 kvm_pgtable_force_pte_cb_t force_pte_cb) 1215 { 1216 size_t pgd_sz; 1217 u64 vtcr = mmu->arch->vtcr; 1218 u32 ia_bits = VTCR_EL2_IPA(vtcr); 1219 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 1220 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 1221 1222 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 1223 pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); 1224 if (!pgt->pgd) 1225 return -ENOMEM; 1226 1227 pgt->ia_bits = ia_bits; 1228 pgt->start_level = start_level; 1229 pgt->mm_ops = mm_ops; 1230 pgt->mmu = mmu; 1231 pgt->flags = flags; 1232 pgt->force_pte_cb = force_pte_cb; 1233 1234 /* Ensure zeroed PGD pages are visible to the hardware walker */ 1235 dsb(ishst); 1236 return 0; 1237 } 1238 1239 size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) 1240 { 1241 u32 ia_bits = VTCR_EL2_IPA(vtcr); 1242 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 1243 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 1244 1245 return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 1246 } 1247 1248 static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, 1249 enum kvm_pgtable_walk_flags visit) 1250 { 1251 struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; 1252 1253 if (!stage2_pte_is_counted(ctx->old)) 1254 return 0; 1255 1256 mm_ops->put_page(ctx->ptep); 1257 1258 if (kvm_pte_table(ctx->old, ctx->level)) 1259 mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); 1260 1261 return 0; 1262 } 1263 1264 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 1265 { 1266 size_t pgd_sz; 1267 struct kvm_pgtable_walker walker = { 1268 .cb = stage2_free_walker, 1269 .flags = KVM_PGTABLE_WALK_LEAF | 1270 KVM_PGTABLE_WALK_TABLE_POST, 1271 }; 1272 1273 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 1274 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 1275 pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); 1276 pgt->pgd = NULL; 1277 } 1278 1279 void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) 1280 { 1281 kvm_pteref_t ptep = (kvm_pteref_t)pgtable; 1282 struct kvm_pgtable_walker walker = { 1283 .cb = stage2_free_walker, 1284 .flags = KVM_PGTABLE_WALK_LEAF | 1285 KVM_PGTABLE_WALK_TABLE_POST, 1286 }; 1287 struct kvm_pgtable_walk_data data = { 1288 .walker = &walker, 1289 1290 /* 1291 * At this point the IPA really doesn't matter, as the page 1292 * table being traversed has already been removed from the stage 1293 * 2. Set an appropriate range to cover the entire page table. 1294 */ 1295 .addr = 0, 1296 .end = kvm_granule_size(level), 1297 }; 1298 1299 WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); 1300 } 1301