1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. 4 * No bombay mix was harmed in the writing of this file. 5 * 6 * Copyright (C) 2020 Google LLC 7 * Author: Will Deacon <will@kernel.org> 8 */ 9 10 #include <linux/bitfield.h> 11 #include <asm/kvm_pgtable.h> 12 13 #define KVM_PGTABLE_MAX_LEVELS 4U 14 15 #define KVM_PTE_VALID BIT(0) 16 17 #define KVM_PTE_TYPE BIT(1) 18 #define KVM_PTE_TYPE_BLOCK 0 19 #define KVM_PTE_TYPE_PAGE 1 20 #define KVM_PTE_TYPE_TABLE 1 21 22 #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) 23 #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) 24 25 #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) 26 27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) 28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) 29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO 3 30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW 1 31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) 32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) 34 35 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) 36 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) 37 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) 38 #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) 39 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 40 #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) 41 42 #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) 43 44 #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) 45 46 #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) 47 48 #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ 49 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ 50 KVM_PTE_LEAF_ATTR_HI_S2_XN) 51 52 struct kvm_pgtable_walk_data { 53 struct kvm_pgtable *pgt; 54 struct kvm_pgtable_walker *walker; 55 56 u64 addr; 57 u64 end; 58 }; 59 60 static u64 kvm_granule_shift(u32 level) 61 { 62 /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ 63 return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); 64 } 65 66 static u64 kvm_granule_size(u32 level) 67 { 68 return BIT(kvm_granule_shift(level)); 69 } 70 71 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) 72 { 73 u64 granule = kvm_granule_size(level); 74 75 /* 76 * Reject invalid block mappings and don't bother with 4TB mappings for 77 * 52-bit PAs. 78 */ 79 if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1)) 80 return false; 81 82 if (granule > (end - addr)) 83 return false; 84 85 return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule); 86 } 87 88 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) 89 { 90 u64 shift = kvm_granule_shift(level); 91 u64 mask = BIT(PAGE_SHIFT - 3) - 1; 92 93 return (data->addr >> shift) & mask; 94 } 95 96 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) 97 { 98 u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ 99 u64 mask = BIT(pgt->ia_bits) - 1; 100 101 return (addr & mask) >> shift; 102 } 103 104 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) 105 { 106 return __kvm_pgd_page_idx(data->pgt, data->addr); 107 } 108 109 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) 110 { 111 struct kvm_pgtable pgt = { 112 .ia_bits = ia_bits, 113 .start_level = start_level, 114 }; 115 116 return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; 117 } 118 119 static bool kvm_pte_valid(kvm_pte_t pte) 120 { 121 return pte & KVM_PTE_VALID; 122 } 123 124 static bool kvm_pte_table(kvm_pte_t pte, u32 level) 125 { 126 if (level == KVM_PGTABLE_MAX_LEVELS - 1) 127 return false; 128 129 if (!kvm_pte_valid(pte)) 130 return false; 131 132 return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; 133 } 134 135 static u64 kvm_pte_to_phys(kvm_pte_t pte) 136 { 137 u64 pa = pte & KVM_PTE_ADDR_MASK; 138 139 if (PAGE_SHIFT == 16) 140 pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; 141 142 return pa; 143 } 144 145 static kvm_pte_t kvm_phys_to_pte(u64 pa) 146 { 147 kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; 148 149 if (PAGE_SHIFT == 16) 150 pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); 151 152 return pte; 153 } 154 155 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte) 156 { 157 return __va(kvm_pte_to_phys(pte)); 158 } 159 160 static void kvm_set_invalid_pte(kvm_pte_t *ptep) 161 { 162 kvm_pte_t pte = *ptep; 163 WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID); 164 } 165 166 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) 167 { 168 kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp)); 169 170 pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); 171 pte |= KVM_PTE_VALID; 172 173 WARN_ON(kvm_pte_valid(old)); 174 smp_store_release(ptep, pte); 175 } 176 177 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) 178 { 179 kvm_pte_t pte = kvm_phys_to_pte(pa); 180 u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : 181 KVM_PTE_TYPE_BLOCK; 182 183 pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); 184 pte |= FIELD_PREP(KVM_PTE_TYPE, type); 185 pte |= KVM_PTE_VALID; 186 187 return pte; 188 } 189 190 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, 191 u32 level, kvm_pte_t *ptep, 192 enum kvm_pgtable_walk_flags flag) 193 { 194 struct kvm_pgtable_walker *walker = data->walker; 195 return walker->cb(addr, data->end, level, ptep, flag, walker->arg); 196 } 197 198 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 199 kvm_pte_t *pgtable, u32 level); 200 201 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, 202 kvm_pte_t *ptep, u32 level) 203 { 204 int ret = 0; 205 u64 addr = data->addr; 206 kvm_pte_t *childp, pte = *ptep; 207 bool table = kvm_pte_table(pte, level); 208 enum kvm_pgtable_walk_flags flags = data->walker->flags; 209 210 if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { 211 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 212 KVM_PGTABLE_WALK_TABLE_PRE); 213 } 214 215 if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { 216 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 217 KVM_PGTABLE_WALK_LEAF); 218 pte = *ptep; 219 table = kvm_pte_table(pte, level); 220 } 221 222 if (ret) 223 goto out; 224 225 if (!table) { 226 data->addr += kvm_granule_size(level); 227 goto out; 228 } 229 230 childp = kvm_pte_follow(pte); 231 ret = __kvm_pgtable_walk(data, childp, level + 1); 232 if (ret) 233 goto out; 234 235 if (flags & KVM_PGTABLE_WALK_TABLE_POST) { 236 ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, 237 KVM_PGTABLE_WALK_TABLE_POST); 238 } 239 240 out: 241 return ret; 242 } 243 244 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, 245 kvm_pte_t *pgtable, u32 level) 246 { 247 u32 idx; 248 int ret = 0; 249 250 if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) 251 return -EINVAL; 252 253 for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { 254 kvm_pte_t *ptep = &pgtable[idx]; 255 256 if (data->addr >= data->end) 257 break; 258 259 ret = __kvm_pgtable_visit(data, ptep, level); 260 if (ret) 261 break; 262 } 263 264 return ret; 265 } 266 267 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) 268 { 269 u32 idx; 270 int ret = 0; 271 struct kvm_pgtable *pgt = data->pgt; 272 u64 limit = BIT(pgt->ia_bits); 273 274 if (data->addr > limit || data->end > limit) 275 return -ERANGE; 276 277 if (!pgt->pgd) 278 return -EINVAL; 279 280 for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { 281 kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; 282 283 ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); 284 if (ret) 285 break; 286 } 287 288 return ret; 289 } 290 291 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, 292 struct kvm_pgtable_walker *walker) 293 { 294 struct kvm_pgtable_walk_data walk_data = { 295 .pgt = pgt, 296 .addr = ALIGN_DOWN(addr, PAGE_SIZE), 297 .end = PAGE_ALIGN(walk_data.addr + size), 298 .walker = walker, 299 }; 300 301 return _kvm_pgtable_walk(&walk_data); 302 } 303 304 struct hyp_map_data { 305 u64 phys; 306 kvm_pte_t attr; 307 }; 308 309 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, 310 struct hyp_map_data *data) 311 { 312 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 313 u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; 314 kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); 315 u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; 316 u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : 317 KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; 318 319 if (!(prot & KVM_PGTABLE_PROT_R)) 320 return -EINVAL; 321 322 if (prot & KVM_PGTABLE_PROT_X) { 323 if (prot & KVM_PGTABLE_PROT_W) 324 return -EINVAL; 325 326 if (device) 327 return -EINVAL; 328 } else { 329 attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; 330 } 331 332 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); 333 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); 334 attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; 335 data->attr = attr; 336 return 0; 337 } 338 339 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, 340 kvm_pte_t *ptep, struct hyp_map_data *data) 341 { 342 kvm_pte_t new, old = *ptep; 343 u64 granule = kvm_granule_size(level), phys = data->phys; 344 345 if (!kvm_block_mapping_supported(addr, end, phys, level)) 346 return false; 347 348 /* Tolerate KVM recreating the exact same mapping */ 349 new = kvm_init_valid_leaf_pte(phys, data->attr, level); 350 if (old != new && !WARN_ON(kvm_pte_valid(old))) 351 smp_store_release(ptep, new); 352 353 data->phys += granule; 354 return true; 355 } 356 357 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 358 enum kvm_pgtable_walk_flags flag, void * const arg) 359 { 360 kvm_pte_t *childp; 361 362 if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) 363 return 0; 364 365 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 366 return -EINVAL; 367 368 childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 369 if (!childp) 370 return -ENOMEM; 371 372 kvm_set_table_pte(ptep, childp); 373 return 0; 374 } 375 376 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 377 enum kvm_pgtable_prot prot) 378 { 379 int ret; 380 struct hyp_map_data map_data = { 381 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 382 }; 383 struct kvm_pgtable_walker walker = { 384 .cb = hyp_map_walker, 385 .flags = KVM_PGTABLE_WALK_LEAF, 386 .arg = &map_data, 387 }; 388 389 ret = hyp_map_set_prot_attr(prot, &map_data); 390 if (ret) 391 return ret; 392 393 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 394 dsb(ishst); 395 isb(); 396 return ret; 397 } 398 399 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits) 400 { 401 u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); 402 403 pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL); 404 if (!pgt->pgd) 405 return -ENOMEM; 406 407 pgt->ia_bits = va_bits; 408 pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; 409 pgt->mmu = NULL; 410 return 0; 411 } 412 413 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 414 enum kvm_pgtable_walk_flags flag, void * const arg) 415 { 416 free_page((unsigned long)kvm_pte_follow(*ptep)); 417 return 0; 418 } 419 420 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) 421 { 422 struct kvm_pgtable_walker walker = { 423 .cb = hyp_free_walker, 424 .flags = KVM_PGTABLE_WALK_TABLE_POST, 425 }; 426 427 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 428 free_page((unsigned long)pgt->pgd); 429 pgt->pgd = NULL; 430 } 431 432 struct stage2_map_data { 433 u64 phys; 434 kvm_pte_t attr; 435 436 kvm_pte_t *anchor; 437 438 struct kvm_s2_mmu *mmu; 439 struct kvm_mmu_memory_cache *memcache; 440 }; 441 442 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, 443 struct stage2_map_data *data) 444 { 445 bool device = prot & KVM_PGTABLE_PROT_DEVICE; 446 kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) : 447 PAGE_S2_MEMATTR(NORMAL); 448 u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; 449 450 if (!(prot & KVM_PGTABLE_PROT_X)) 451 attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 452 else if (device) 453 return -EINVAL; 454 455 if (prot & KVM_PGTABLE_PROT_R) 456 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 457 458 if (prot & KVM_PGTABLE_PROT_W) 459 attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 460 461 attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); 462 attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; 463 data->attr = attr; 464 return 0; 465 } 466 467 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, 468 kvm_pte_t *ptep, 469 struct stage2_map_data *data) 470 { 471 kvm_pte_t new, old = *ptep; 472 u64 granule = kvm_granule_size(level), phys = data->phys; 473 struct page *page = virt_to_page(ptep); 474 475 if (!kvm_block_mapping_supported(addr, end, phys, level)) 476 return -E2BIG; 477 478 new = kvm_init_valid_leaf_pte(phys, data->attr, level); 479 if (kvm_pte_valid(old)) { 480 /* 481 * Skip updating the PTE if we are trying to recreate the exact 482 * same mapping or only change the access permissions. Instead, 483 * the vCPU will exit one more time from guest if still needed 484 * and then go through the path of relaxing permissions. 485 */ 486 if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS))) 487 return -EAGAIN; 488 489 /* 490 * There's an existing different valid leaf entry, so perform 491 * break-before-make. 492 */ 493 kvm_set_invalid_pte(ptep); 494 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 495 put_page(page); 496 } 497 498 smp_store_release(ptep, new); 499 get_page(page); 500 data->phys += granule; 501 return 0; 502 } 503 504 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, 505 kvm_pte_t *ptep, 506 struct stage2_map_data *data) 507 { 508 if (data->anchor) 509 return 0; 510 511 if (!kvm_block_mapping_supported(addr, end, data->phys, level)) 512 return 0; 513 514 kvm_set_invalid_pte(ptep); 515 516 /* 517 * Invalidate the whole stage-2, as we may have numerous leaf 518 * entries below us which would otherwise need invalidating 519 * individually. 520 */ 521 kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); 522 data->anchor = ptep; 523 return 0; 524 } 525 526 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 527 struct stage2_map_data *data) 528 { 529 int ret; 530 kvm_pte_t *childp, pte = *ptep; 531 struct page *page = virt_to_page(ptep); 532 533 if (data->anchor) { 534 if (kvm_pte_valid(pte)) 535 put_page(page); 536 537 return 0; 538 } 539 540 ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data); 541 if (ret != -E2BIG) 542 return ret; 543 544 if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) 545 return -EINVAL; 546 547 if (!data->memcache) 548 return -ENOMEM; 549 550 childp = kvm_mmu_memory_cache_alloc(data->memcache); 551 if (!childp) 552 return -ENOMEM; 553 554 /* 555 * If we've run into an existing block mapping then replace it with 556 * a table. Accesses beyond 'end' that fall within the new table 557 * will be mapped lazily. 558 */ 559 if (kvm_pte_valid(pte)) { 560 kvm_set_invalid_pte(ptep); 561 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); 562 put_page(page); 563 } 564 565 kvm_set_table_pte(ptep, childp); 566 get_page(page); 567 568 return 0; 569 } 570 571 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, 572 kvm_pte_t *ptep, 573 struct stage2_map_data *data) 574 { 575 int ret = 0; 576 577 if (!data->anchor) 578 return 0; 579 580 free_page((unsigned long)kvm_pte_follow(*ptep)); 581 put_page(virt_to_page(ptep)); 582 583 if (data->anchor == ptep) { 584 data->anchor = NULL; 585 ret = stage2_map_walk_leaf(addr, end, level, ptep, data); 586 } 587 588 return ret; 589 } 590 591 /* 592 * This is a little fiddly, as we use all three of the walk flags. The idea 593 * is that the TABLE_PRE callback runs for table entries on the way down, 594 * looking for table entries which we could conceivably replace with a 595 * block entry for this mapping. If it finds one, then it sets the 'anchor' 596 * field in 'struct stage2_map_data' to point at the table entry, before 597 * clearing the entry to zero and descending into the now detached table. 598 * 599 * The behaviour of the LEAF callback then depends on whether or not the 600 * anchor has been set. If not, then we're not using a block mapping higher 601 * up the table and we perform the mapping at the existing leaves instead. 602 * If, on the other hand, the anchor _is_ set, then we drop references to 603 * all valid leaves so that the pages beneath the anchor can be freed. 604 * 605 * Finally, the TABLE_POST callback does nothing if the anchor has not 606 * been set, but otherwise frees the page-table pages while walking back up 607 * the page-table, installing the block entry when it revisits the anchor 608 * pointer and clearing the anchor to NULL. 609 */ 610 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 611 enum kvm_pgtable_walk_flags flag, void * const arg) 612 { 613 struct stage2_map_data *data = arg; 614 615 switch (flag) { 616 case KVM_PGTABLE_WALK_TABLE_PRE: 617 return stage2_map_walk_table_pre(addr, end, level, ptep, data); 618 case KVM_PGTABLE_WALK_LEAF: 619 return stage2_map_walk_leaf(addr, end, level, ptep, data); 620 case KVM_PGTABLE_WALK_TABLE_POST: 621 return stage2_map_walk_table_post(addr, end, level, ptep, data); 622 } 623 624 return -EINVAL; 625 } 626 627 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 628 u64 phys, enum kvm_pgtable_prot prot, 629 struct kvm_mmu_memory_cache *mc) 630 { 631 int ret; 632 struct stage2_map_data map_data = { 633 .phys = ALIGN_DOWN(phys, PAGE_SIZE), 634 .mmu = pgt->mmu, 635 .memcache = mc, 636 }; 637 struct kvm_pgtable_walker walker = { 638 .cb = stage2_map_walker, 639 .flags = KVM_PGTABLE_WALK_TABLE_PRE | 640 KVM_PGTABLE_WALK_LEAF | 641 KVM_PGTABLE_WALK_TABLE_POST, 642 .arg = &map_data, 643 }; 644 645 ret = stage2_map_set_prot_attr(prot, &map_data); 646 if (ret) 647 return ret; 648 649 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 650 dsb(ishst); 651 return ret; 652 } 653 654 static void stage2_flush_dcache(void *addr, u64 size) 655 { 656 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 657 return; 658 659 __flush_dcache_area(addr, size); 660 } 661 662 static bool stage2_pte_cacheable(kvm_pte_t pte) 663 { 664 u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; 665 return memattr == PAGE_S2_MEMATTR(NORMAL); 666 } 667 668 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 669 enum kvm_pgtable_walk_flags flag, 670 void * const arg) 671 { 672 struct kvm_s2_mmu *mmu = arg; 673 kvm_pte_t pte = *ptep, *childp = NULL; 674 bool need_flush = false; 675 676 if (!kvm_pte_valid(pte)) 677 return 0; 678 679 if (kvm_pte_table(pte, level)) { 680 childp = kvm_pte_follow(pte); 681 682 if (page_count(virt_to_page(childp)) != 1) 683 return 0; 684 } else if (stage2_pte_cacheable(pte)) { 685 need_flush = true; 686 } 687 688 /* 689 * This is similar to the map() path in that we unmap the entire 690 * block entry and rely on the remaining portions being faulted 691 * back lazily. 692 */ 693 kvm_set_invalid_pte(ptep); 694 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); 695 put_page(virt_to_page(ptep)); 696 697 if (need_flush) { 698 stage2_flush_dcache(kvm_pte_follow(pte), 699 kvm_granule_size(level)); 700 } 701 702 if (childp) 703 free_page((unsigned long)childp); 704 705 return 0; 706 } 707 708 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 709 { 710 struct kvm_pgtable_walker walker = { 711 .cb = stage2_unmap_walker, 712 .arg = pgt->mmu, 713 .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, 714 }; 715 716 return kvm_pgtable_walk(pgt, addr, size, &walker); 717 } 718 719 struct stage2_attr_data { 720 kvm_pte_t attr_set; 721 kvm_pte_t attr_clr; 722 kvm_pte_t pte; 723 u32 level; 724 }; 725 726 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 727 enum kvm_pgtable_walk_flags flag, 728 void * const arg) 729 { 730 kvm_pte_t pte = *ptep; 731 struct stage2_attr_data *data = arg; 732 733 if (!kvm_pte_valid(pte)) 734 return 0; 735 736 data->level = level; 737 data->pte = pte; 738 pte &= ~data->attr_clr; 739 pte |= data->attr_set; 740 741 /* 742 * We may race with the CPU trying to set the access flag here, 743 * but worst-case the access flag update gets lost and will be 744 * set on the next access instead. 745 */ 746 if (data->pte != pte) 747 WRITE_ONCE(*ptep, pte); 748 749 return 0; 750 } 751 752 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, 753 u64 size, kvm_pte_t attr_set, 754 kvm_pte_t attr_clr, kvm_pte_t *orig_pte, 755 u32 *level) 756 { 757 int ret; 758 kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; 759 struct stage2_attr_data data = { 760 .attr_set = attr_set & attr_mask, 761 .attr_clr = attr_clr & attr_mask, 762 }; 763 struct kvm_pgtable_walker walker = { 764 .cb = stage2_attr_walker, 765 .arg = &data, 766 .flags = KVM_PGTABLE_WALK_LEAF, 767 }; 768 769 ret = kvm_pgtable_walk(pgt, addr, size, &walker); 770 if (ret) 771 return ret; 772 773 if (orig_pte) 774 *orig_pte = data.pte; 775 776 if (level) 777 *level = data.level; 778 return 0; 779 } 780 781 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 782 { 783 return stage2_update_leaf_attrs(pgt, addr, size, 0, 784 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, 785 NULL, NULL); 786 } 787 788 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) 789 { 790 kvm_pte_t pte = 0; 791 stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, 792 &pte, NULL); 793 dsb(ishst); 794 return pte; 795 } 796 797 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) 798 { 799 kvm_pte_t pte = 0; 800 stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, 801 &pte, NULL); 802 /* 803 * "But where's the TLBI?!", you scream. 804 * "Over in the core code", I sigh. 805 * 806 * See the '->clear_flush_young()' callback on the KVM mmu notifier. 807 */ 808 return pte; 809 } 810 811 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) 812 { 813 kvm_pte_t pte = 0; 814 stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); 815 return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; 816 } 817 818 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, 819 enum kvm_pgtable_prot prot) 820 { 821 int ret; 822 u32 level; 823 kvm_pte_t set = 0, clr = 0; 824 825 if (prot & KVM_PGTABLE_PROT_R) 826 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; 827 828 if (prot & KVM_PGTABLE_PROT_W) 829 set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; 830 831 if (prot & KVM_PGTABLE_PROT_X) 832 clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; 833 834 ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); 835 if (!ret) 836 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); 837 return ret; 838 } 839 840 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 841 enum kvm_pgtable_walk_flags flag, 842 void * const arg) 843 { 844 kvm_pte_t pte = *ptep; 845 846 if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte)) 847 return 0; 848 849 stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level)); 850 return 0; 851 } 852 853 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 854 { 855 struct kvm_pgtable_walker walker = { 856 .cb = stage2_flush_walker, 857 .flags = KVM_PGTABLE_WALK_LEAF, 858 }; 859 860 if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) 861 return 0; 862 863 return kvm_pgtable_walk(pgt, addr, size, &walker); 864 } 865 866 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) 867 { 868 size_t pgd_sz; 869 u64 vtcr = kvm->arch.vtcr; 870 u32 ia_bits = VTCR_EL2_IPA(vtcr); 871 u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); 872 u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; 873 874 pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; 875 pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 876 if (!pgt->pgd) 877 return -ENOMEM; 878 879 pgt->ia_bits = ia_bits; 880 pgt->start_level = start_level; 881 pgt->mmu = &kvm->arch.mmu; 882 883 /* Ensure zeroed PGD pages are visible to the hardware walker */ 884 dsb(ishst); 885 return 0; 886 } 887 888 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, 889 enum kvm_pgtable_walk_flags flag, 890 void * const arg) 891 { 892 kvm_pte_t pte = *ptep; 893 894 if (!kvm_pte_valid(pte)) 895 return 0; 896 897 put_page(virt_to_page(ptep)); 898 899 if (kvm_pte_table(pte, level)) 900 free_page((unsigned long)kvm_pte_follow(pte)); 901 902 return 0; 903 } 904 905 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 906 { 907 size_t pgd_sz; 908 struct kvm_pgtable_walker walker = { 909 .cb = stage2_free_walker, 910 .flags = KVM_PGTABLE_WALK_LEAF | 911 KVM_PGTABLE_WALK_TABLE_POST, 912 }; 913 914 WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 915 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 916 free_pages_exact(pgt->pgd, pgd_sz); 917 pgt->pgd = NULL; 918 } 919