1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2020 6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 * David Hildenbrand <david@redhat.com> 8 * Janosch Frank <frankja@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/cpufeature.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/pagewalk.h> 15 #include <linux/swap.h> 16 #include <linux/smp.h> 17 #include <linux/spinlock.h> 18 #include <linux/slab.h> 19 #include <linux/swapops.h> 20 #include <linux/ksm.h> 21 #include <linux/mman.h> 22 #include <linux/pgtable.h> 23 #include <asm/page-states.h> 24 #include <asm/pgalloc.h> 25 #include <asm/machine.h> 26 #include <asm/gmap_helpers.h> 27 #include <asm/gmap.h> 28 #include <asm/page.h> 29 30 /* 31 * The address is saved in a radix tree directly; NULL would be ambiguous, 32 * since 0 is a valid address, and NULL is returned when nothing was found. 33 * The lower bits are ignored by all users of the macro, so it can be used 34 * to distinguish a valid address 0 from a NULL. 35 */ 36 #define VALID_GADDR_FLAG 1 37 #define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) 38 #define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) 39 40 #define GMAP_SHADOW_FAKE_TABLE 1ULL 41 42 static struct page *gmap_alloc_crst(void) 43 { 44 struct page *page; 45 46 page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); 47 if (!page) 48 return NULL; 49 __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER); 50 return page; 51 } 52 53 /** 54 * gmap_alloc - allocate and initialize a guest address space 55 * @limit: maximum address of the gmap address space 56 * 57 * Returns a guest address space structure. 58 */ 59 struct gmap *gmap_alloc(unsigned long limit) 60 { 61 struct gmap *gmap; 62 struct page *page; 63 unsigned long *table; 64 unsigned long etype, atype; 65 66 if (limit < _REGION3_SIZE) { 67 limit = _REGION3_SIZE - 1; 68 atype = _ASCE_TYPE_SEGMENT; 69 etype = _SEGMENT_ENTRY_EMPTY; 70 } else if (limit < _REGION2_SIZE) { 71 limit = _REGION2_SIZE - 1; 72 atype = _ASCE_TYPE_REGION3; 73 etype = _REGION3_ENTRY_EMPTY; 74 } else if (limit < _REGION1_SIZE) { 75 limit = _REGION1_SIZE - 1; 76 atype = _ASCE_TYPE_REGION2; 77 etype = _REGION2_ENTRY_EMPTY; 78 } else { 79 limit = -1UL; 80 atype = _ASCE_TYPE_REGION1; 81 etype = _REGION1_ENTRY_EMPTY; 82 } 83 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); 84 if (!gmap) 85 goto out; 86 INIT_LIST_HEAD(&gmap->children); 87 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); 88 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); 89 INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); 90 spin_lock_init(&gmap->guest_table_lock); 91 spin_lock_init(&gmap->shadow_lock); 92 refcount_set(&gmap->ref_count, 1); 93 page = gmap_alloc_crst(); 94 if (!page) 95 goto out_free; 96 table = page_to_virt(page); 97 crst_table_init(table, etype); 98 gmap->table = table; 99 gmap->asce = atype | _ASCE_TABLE_LENGTH | 100 _ASCE_USER_BITS | __pa(table); 101 gmap->asce_end = limit; 102 return gmap; 103 104 out_free: 105 kfree(gmap); 106 out: 107 return NULL; 108 } 109 EXPORT_SYMBOL_GPL(gmap_alloc); 110 111 /** 112 * gmap_create - create a guest address space 113 * @mm: pointer to the parent mm_struct 114 * @limit: maximum size of the gmap address space 115 * 116 * Returns a guest address space structure. 117 */ 118 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) 119 { 120 struct gmap *gmap; 121 unsigned long gmap_asce; 122 123 gmap = gmap_alloc(limit); 124 if (!gmap) 125 return NULL; 126 gmap->mm = mm; 127 spin_lock(&mm->context.lock); 128 list_add_rcu(&gmap->list, &mm->context.gmap_list); 129 if (list_is_singular(&mm->context.gmap_list)) 130 gmap_asce = gmap->asce; 131 else 132 gmap_asce = -1UL; 133 WRITE_ONCE(mm->context.gmap_asce, gmap_asce); 134 spin_unlock(&mm->context.lock); 135 return gmap; 136 } 137 EXPORT_SYMBOL_GPL(gmap_create); 138 139 static void gmap_flush_tlb(struct gmap *gmap) 140 { 141 __tlb_flush_idte(gmap->asce); 142 } 143 144 static void gmap_radix_tree_free(struct radix_tree_root *root) 145 { 146 struct radix_tree_iter iter; 147 unsigned long indices[16]; 148 unsigned long index; 149 void __rcu **slot; 150 int i, nr; 151 152 /* A radix tree is freed by deleting all of its entries */ 153 index = 0; 154 do { 155 nr = 0; 156 radix_tree_for_each_slot(slot, root, &iter, index) { 157 indices[nr] = iter.index; 158 if (++nr == 16) 159 break; 160 } 161 for (i = 0; i < nr; i++) { 162 index = indices[i]; 163 radix_tree_delete(root, index); 164 } 165 } while (nr > 0); 166 } 167 168 static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) 169 { 170 struct gmap_rmap *rmap, *rnext, *head; 171 struct radix_tree_iter iter; 172 unsigned long indices[16]; 173 unsigned long index; 174 void __rcu **slot; 175 int i, nr; 176 177 /* A radix tree is freed by deleting all of its entries */ 178 index = 0; 179 do { 180 nr = 0; 181 radix_tree_for_each_slot(slot, root, &iter, index) { 182 indices[nr] = iter.index; 183 if (++nr == 16) 184 break; 185 } 186 for (i = 0; i < nr; i++) { 187 index = indices[i]; 188 head = radix_tree_delete(root, index); 189 gmap_for_each_rmap_safe(rmap, rnext, head) 190 kfree(rmap); 191 } 192 } while (nr > 0); 193 } 194 195 static void gmap_free_crst(unsigned long *table, bool free_ptes) 196 { 197 bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; 198 int i; 199 200 if (is_segment) { 201 if (!free_ptes) 202 goto out; 203 for (i = 0; i < _CRST_ENTRIES; i++) 204 if (!(table[i] & _SEGMENT_ENTRY_INVALID)) 205 page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); 206 } else { 207 for (i = 0; i < _CRST_ENTRIES; i++) 208 if (!(table[i] & _REGION_ENTRY_INVALID)) 209 gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); 210 } 211 212 out: 213 free_pages((unsigned long)table, CRST_ALLOC_ORDER); 214 } 215 216 /** 217 * gmap_free - free a guest address space 218 * @gmap: pointer to the guest address space structure 219 * 220 * No locks required. There are no references to this gmap anymore. 221 */ 222 void gmap_free(struct gmap *gmap) 223 { 224 /* Flush tlb of all gmaps (if not already done for shadows) */ 225 if (!(gmap_is_shadow(gmap) && gmap->removed)) 226 gmap_flush_tlb(gmap); 227 /* Free all segment & region tables. */ 228 gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); 229 230 gmap_radix_tree_free(&gmap->guest_to_host); 231 gmap_radix_tree_free(&gmap->host_to_guest); 232 233 /* Free additional data for a shadow gmap */ 234 if (gmap_is_shadow(gmap)) { 235 gmap_rmap_radix_tree_free(&gmap->host_to_rmap); 236 /* Release reference to the parent */ 237 gmap_put(gmap->parent); 238 } 239 240 kfree(gmap); 241 } 242 EXPORT_SYMBOL_GPL(gmap_free); 243 244 /** 245 * gmap_get - increase reference counter for guest address space 246 * @gmap: pointer to the guest address space structure 247 * 248 * Returns the gmap pointer 249 */ 250 struct gmap *gmap_get(struct gmap *gmap) 251 { 252 refcount_inc(&gmap->ref_count); 253 return gmap; 254 } 255 EXPORT_SYMBOL_GPL(gmap_get); 256 257 /** 258 * gmap_put - decrease reference counter for guest address space 259 * @gmap: pointer to the guest address space structure 260 * 261 * If the reference counter reaches zero the guest address space is freed. 262 */ 263 void gmap_put(struct gmap *gmap) 264 { 265 if (refcount_dec_and_test(&gmap->ref_count)) 266 gmap_free(gmap); 267 } 268 EXPORT_SYMBOL_GPL(gmap_put); 269 270 /** 271 * gmap_remove - remove a guest address space but do not free it yet 272 * @gmap: pointer to the guest address space structure 273 */ 274 void gmap_remove(struct gmap *gmap) 275 { 276 struct gmap *sg, *next; 277 unsigned long gmap_asce; 278 279 /* Remove all shadow gmaps linked to this gmap */ 280 if (!list_empty(&gmap->children)) { 281 spin_lock(&gmap->shadow_lock); 282 list_for_each_entry_safe(sg, next, &gmap->children, list) { 283 list_del(&sg->list); 284 gmap_put(sg); 285 } 286 spin_unlock(&gmap->shadow_lock); 287 } 288 /* Remove gmap from the pre-mm list */ 289 spin_lock(&gmap->mm->context.lock); 290 list_del_rcu(&gmap->list); 291 if (list_empty(&gmap->mm->context.gmap_list)) 292 gmap_asce = 0; 293 else if (list_is_singular(&gmap->mm->context.gmap_list)) 294 gmap_asce = list_first_entry(&gmap->mm->context.gmap_list, 295 struct gmap, list)->asce; 296 else 297 gmap_asce = -1UL; 298 WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); 299 spin_unlock(&gmap->mm->context.lock); 300 synchronize_rcu(); 301 /* Put reference */ 302 gmap_put(gmap); 303 } 304 EXPORT_SYMBOL_GPL(gmap_remove); 305 306 /* 307 * gmap_alloc_table is assumed to be called with mmap_lock held 308 */ 309 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 310 unsigned long init, unsigned long gaddr) 311 { 312 struct page *page; 313 unsigned long *new; 314 315 /* since we dont free the gmap table until gmap_free we can unlock */ 316 page = gmap_alloc_crst(); 317 if (!page) 318 return -ENOMEM; 319 new = page_to_virt(page); 320 crst_table_init(new, init); 321 spin_lock(&gmap->guest_table_lock); 322 if (*table & _REGION_ENTRY_INVALID) { 323 *table = __pa(new) | _REGION_ENTRY_LENGTH | 324 (*table & _REGION_ENTRY_TYPE_MASK); 325 page = NULL; 326 } 327 spin_unlock(&gmap->guest_table_lock); 328 if (page) 329 __free_pages(page, CRST_ALLOC_ORDER); 330 return 0; 331 } 332 333 static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) 334 { 335 return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 336 } 337 338 static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) 339 { 340 return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 341 } 342 343 static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, 344 unsigned long *gaddr) 345 { 346 *gaddr = host_to_guest_delete(gmap, vmaddr); 347 if (IS_GADDR_VALID(*gaddr)) 348 return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); 349 return NULL; 350 } 351 352 /** 353 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 354 * @gmap: pointer to the guest address space structure 355 * @vmaddr: address in the host process address space 356 * 357 * Returns 1 if a TLB flush is required 358 */ 359 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 360 { 361 unsigned long gaddr; 362 int flush = 0; 363 pmd_t *pmdp; 364 365 BUG_ON(gmap_is_shadow(gmap)); 366 spin_lock(&gmap->guest_table_lock); 367 368 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 369 if (pmdp) { 370 flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); 371 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 372 } 373 374 spin_unlock(&gmap->guest_table_lock); 375 return flush; 376 } 377 378 /** 379 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 380 * @gmap: pointer to the guest address space structure 381 * @gaddr: address in the guest address space 382 * 383 * Returns 1 if a TLB flush is required 384 */ 385 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 386 { 387 unsigned long vmaddr; 388 389 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 390 gaddr >> PMD_SHIFT); 391 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 392 } 393 394 /** 395 * gmap_unmap_segment - unmap segment from the guest address space 396 * @gmap: pointer to the guest address space structure 397 * @to: address in the guest address space 398 * @len: length of the memory area to unmap 399 * 400 * Returns 0 if the unmap succeeded, -EINVAL if not. 401 */ 402 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 403 { 404 unsigned long off; 405 int flush; 406 407 BUG_ON(gmap_is_shadow(gmap)); 408 if ((to | len) & (PMD_SIZE - 1)) 409 return -EINVAL; 410 if (len == 0 || to + len < to) 411 return -EINVAL; 412 413 flush = 0; 414 mmap_write_lock(gmap->mm); 415 for (off = 0; off < len; off += PMD_SIZE) 416 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 417 mmap_write_unlock(gmap->mm); 418 if (flush) 419 gmap_flush_tlb(gmap); 420 return 0; 421 } 422 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 423 424 /** 425 * gmap_map_segment - map a segment to the guest address space 426 * @gmap: pointer to the guest address space structure 427 * @from: source address in the parent address space 428 * @to: target address in the guest address space 429 * @len: length of the memory area to map 430 * 431 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 432 */ 433 int gmap_map_segment(struct gmap *gmap, unsigned long from, 434 unsigned long to, unsigned long len) 435 { 436 unsigned long off; 437 int flush; 438 439 BUG_ON(gmap_is_shadow(gmap)); 440 if ((from | to | len) & (PMD_SIZE - 1)) 441 return -EINVAL; 442 if (len == 0 || from + len < from || to + len < to || 443 from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) 444 return -EINVAL; 445 446 flush = 0; 447 mmap_write_lock(gmap->mm); 448 for (off = 0; off < len; off += PMD_SIZE) { 449 /* Remove old translation */ 450 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 451 /* Store new translation */ 452 if (radix_tree_insert(&gmap->guest_to_host, 453 (to + off) >> PMD_SHIFT, 454 (void *) from + off)) 455 break; 456 } 457 mmap_write_unlock(gmap->mm); 458 if (flush) 459 gmap_flush_tlb(gmap); 460 if (off >= len) 461 return 0; 462 gmap_unmap_segment(gmap, to, len); 463 return -ENOMEM; 464 } 465 EXPORT_SYMBOL_GPL(gmap_map_segment); 466 467 /** 468 * __gmap_translate - translate a guest address to a user space address 469 * @gmap: pointer to guest mapping meta data structure 470 * @gaddr: guest address 471 * 472 * Returns user space address which corresponds to the guest address or 473 * -EFAULT if no such mapping exists. 474 * This function does not establish potentially missing page table entries. 475 * The mmap_lock of the mm that belongs to the address space must be held 476 * when this function gets called. 477 * 478 * Note: Can also be called for shadow gmaps. 479 */ 480 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 481 { 482 unsigned long vmaddr; 483 484 vmaddr = (unsigned long) 485 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 486 /* Note: guest_to_host is empty for a shadow gmap */ 487 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 488 } 489 EXPORT_SYMBOL_GPL(__gmap_translate); 490 491 /** 492 * gmap_unlink - disconnect a page table from the gmap shadow tables 493 * @mm: pointer to the parent mm_struct 494 * @table: pointer to the host page table 495 * @vmaddr: vm address associated with the host page table 496 */ 497 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 498 unsigned long vmaddr) 499 { 500 struct gmap *gmap; 501 int flush; 502 503 rcu_read_lock(); 504 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 505 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 506 if (flush) 507 gmap_flush_tlb(gmap); 508 } 509 rcu_read_unlock(); 510 } 511 512 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, 513 unsigned long gaddr); 514 515 /** 516 * __gmap_link - set up shadow page tables to connect a host to a guest address 517 * @gmap: pointer to guest mapping meta data structure 518 * @gaddr: guest address 519 * @vmaddr: vm address 520 * 521 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 522 * if the vm address is already mapped to a different guest segment. 523 * The mmap_lock of the mm that belongs to the address space must be held 524 * when this function gets called. 525 */ 526 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 527 { 528 struct mm_struct *mm; 529 unsigned long *table; 530 spinlock_t *ptl; 531 pgd_t *pgd; 532 p4d_t *p4d; 533 pud_t *pud; 534 pmd_t *pmd; 535 u64 unprot; 536 int rc; 537 538 BUG_ON(gmap_is_shadow(gmap)); 539 /* Create higher level tables in the gmap page table */ 540 table = gmap->table; 541 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 542 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 543 if ((*table & _REGION_ENTRY_INVALID) && 544 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 545 gaddr & _REGION1_MASK)) 546 return -ENOMEM; 547 table = __va(*table & _REGION_ENTRY_ORIGIN); 548 } 549 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 550 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 551 if ((*table & _REGION_ENTRY_INVALID) && 552 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 553 gaddr & _REGION2_MASK)) 554 return -ENOMEM; 555 table = __va(*table & _REGION_ENTRY_ORIGIN); 556 } 557 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 558 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 559 if ((*table & _REGION_ENTRY_INVALID) && 560 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 561 gaddr & _REGION3_MASK)) 562 return -ENOMEM; 563 table = __va(*table & _REGION_ENTRY_ORIGIN); 564 } 565 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 566 /* Walk the parent mm page table */ 567 mm = gmap->mm; 568 pgd = pgd_offset(mm, vmaddr); 569 VM_BUG_ON(pgd_none(*pgd)); 570 p4d = p4d_offset(pgd, vmaddr); 571 VM_BUG_ON(p4d_none(*p4d)); 572 pud = pud_offset(p4d, vmaddr); 573 VM_BUG_ON(pud_none(*pud)); 574 /* large puds cannot yet be handled */ 575 if (pud_leaf(*pud)) 576 return -EFAULT; 577 pmd = pmd_offset(pud, vmaddr); 578 VM_BUG_ON(pmd_none(*pmd)); 579 /* Are we allowed to use huge pages? */ 580 if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) 581 return -EFAULT; 582 /* Link gmap segment table entry location to page table. */ 583 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 584 if (rc) 585 return rc; 586 ptl = pmd_lock(mm, pmd); 587 spin_lock(&gmap->guest_table_lock); 588 if (*table == _SEGMENT_ENTRY_EMPTY) { 589 rc = radix_tree_insert(&gmap->host_to_guest, 590 vmaddr >> PMD_SHIFT, 591 (void *)MAKE_VALID_GADDR(gaddr)); 592 if (!rc) { 593 if (pmd_leaf(*pmd)) { 594 *table = (pmd_val(*pmd) & 595 _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) 596 | _SEGMENT_ENTRY_GMAP_UC 597 | _SEGMENT_ENTRY; 598 } else 599 *table = pmd_val(*pmd) & 600 _SEGMENT_ENTRY_HARDWARE_BITS; 601 } 602 } else if (*table & _SEGMENT_ENTRY_PROTECT && 603 !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { 604 unprot = (u64)*table; 605 unprot &= ~_SEGMENT_ENTRY_PROTECT; 606 unprot |= _SEGMENT_ENTRY_GMAP_UC; 607 gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); 608 } 609 spin_unlock(&gmap->guest_table_lock); 610 spin_unlock(ptl); 611 radix_tree_preload_end(); 612 return rc; 613 } 614 EXPORT_SYMBOL(__gmap_link); 615 616 /* 617 * this function is assumed to be called with mmap_lock held 618 */ 619 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 620 { 621 unsigned long vmaddr; 622 623 mmap_assert_locked(gmap->mm); 624 625 /* Find the vm address for the guest address */ 626 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 627 gaddr >> PMD_SHIFT); 628 if (vmaddr) { 629 vmaddr |= gaddr & ~PMD_MASK; 630 gmap_helper_zap_one_page(gmap->mm, vmaddr); 631 } 632 } 633 EXPORT_SYMBOL_GPL(__gmap_zap); 634 635 static LIST_HEAD(gmap_notifier_list); 636 static DEFINE_SPINLOCK(gmap_notifier_lock); 637 638 /** 639 * gmap_register_pte_notifier - register a pte invalidation callback 640 * @nb: pointer to the gmap notifier block 641 */ 642 void gmap_register_pte_notifier(struct gmap_notifier *nb) 643 { 644 spin_lock(&gmap_notifier_lock); 645 list_add_rcu(&nb->list, &gmap_notifier_list); 646 spin_unlock(&gmap_notifier_lock); 647 } 648 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); 649 650 /** 651 * gmap_unregister_pte_notifier - remove a pte invalidation callback 652 * @nb: pointer to the gmap notifier block 653 */ 654 void gmap_unregister_pte_notifier(struct gmap_notifier *nb) 655 { 656 spin_lock(&gmap_notifier_lock); 657 list_del_rcu(&nb->list); 658 spin_unlock(&gmap_notifier_lock); 659 synchronize_rcu(); 660 } 661 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); 662 663 /** 664 * gmap_call_notifier - call all registered invalidation callbacks 665 * @gmap: pointer to guest mapping meta data structure 666 * @start: start virtual address in the guest address space 667 * @end: end virtual address in the guest address space 668 */ 669 static void gmap_call_notifier(struct gmap *gmap, unsigned long start, 670 unsigned long end) 671 { 672 struct gmap_notifier *nb; 673 674 list_for_each_entry(nb, &gmap_notifier_list, list) 675 nb->notifier_call(gmap, start, end); 676 } 677 678 /** 679 * gmap_table_walk - walk the gmap page tables 680 * @gmap: pointer to guest mapping meta data structure 681 * @gaddr: virtual address in the guest address space 682 * @level: page table level to stop at 683 * 684 * Returns a table entry pointer for the given guest address and @level 685 * @level=0 : returns a pointer to a page table table entry (or NULL) 686 * @level=1 : returns a pointer to a segment table entry (or NULL) 687 * @level=2 : returns a pointer to a region-3 table entry (or NULL) 688 * @level=3 : returns a pointer to a region-2 table entry (or NULL) 689 * @level=4 : returns a pointer to a region-1 table entry (or NULL) 690 * 691 * Returns NULL if the gmap page tables could not be walked to the 692 * requested level. 693 * 694 * Note: Can also be called for shadow gmaps. 695 */ 696 unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) 697 { 698 const int asce_type = gmap->asce & _ASCE_TYPE_MASK; 699 unsigned long *table = gmap->table; 700 701 if (gmap_is_shadow(gmap) && gmap->removed) 702 return NULL; 703 704 if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) 705 return NULL; 706 707 if (asce_type != _ASCE_TYPE_REGION1 && 708 gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) 709 return NULL; 710 711 switch (asce_type) { 712 case _ASCE_TYPE_REGION1: 713 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 714 if (level == 4) 715 break; 716 if (*table & _REGION_ENTRY_INVALID) 717 return NULL; 718 table = __va(*table & _REGION_ENTRY_ORIGIN); 719 fallthrough; 720 case _ASCE_TYPE_REGION2: 721 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 722 if (level == 3) 723 break; 724 if (*table & _REGION_ENTRY_INVALID) 725 return NULL; 726 table = __va(*table & _REGION_ENTRY_ORIGIN); 727 fallthrough; 728 case _ASCE_TYPE_REGION3: 729 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 730 if (level == 2) 731 break; 732 if (*table & _REGION_ENTRY_INVALID) 733 return NULL; 734 table = __va(*table & _REGION_ENTRY_ORIGIN); 735 fallthrough; 736 case _ASCE_TYPE_SEGMENT: 737 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 738 if (level == 1) 739 break; 740 if (*table & _REGION_ENTRY_INVALID) 741 return NULL; 742 table = __va(*table & _SEGMENT_ENTRY_ORIGIN); 743 table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT; 744 } 745 return table; 746 } 747 EXPORT_SYMBOL(gmap_table_walk); 748 749 /** 750 * gmap_pte_op_walk - walk the gmap page table, get the page table lock 751 * and return the pte pointer 752 * @gmap: pointer to guest mapping meta data structure 753 * @gaddr: virtual address in the guest address space 754 * @ptl: pointer to the spinlock pointer 755 * 756 * Returns a pointer to the locked pte for a guest address, or NULL 757 */ 758 static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, 759 spinlock_t **ptl) 760 { 761 unsigned long *table; 762 763 BUG_ON(gmap_is_shadow(gmap)); 764 /* Walk the gmap page table, lock and get pte pointer */ 765 table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ 766 if (!table || *table & _SEGMENT_ENTRY_INVALID) 767 return NULL; 768 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); 769 } 770 771 /** 772 * gmap_pte_op_fixup - force a page in and connect the gmap page table 773 * @gmap: pointer to guest mapping meta data structure 774 * @gaddr: virtual address in the guest address space 775 * @vmaddr: address in the host process address space 776 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 777 * 778 * Returns 0 if the caller can retry __gmap_translate (might fail again), 779 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing 780 * up or connecting the gmap page table. 781 */ 782 static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, 783 unsigned long vmaddr, int prot) 784 { 785 struct mm_struct *mm = gmap->mm; 786 unsigned int fault_flags; 787 bool unlocked = false; 788 789 BUG_ON(gmap_is_shadow(gmap)); 790 fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 791 if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) 792 return -EFAULT; 793 if (unlocked) 794 /* lost mmap_lock, caller has to retry __gmap_translate */ 795 return 0; 796 /* Connect the page tables */ 797 return __gmap_link(gmap, gaddr, vmaddr); 798 } 799 800 /** 801 * gmap_pte_op_end - release the page table lock 802 * @ptep: pointer to the locked pte 803 * @ptl: pointer to the page table spinlock 804 */ 805 static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl) 806 { 807 pte_unmap_unlock(ptep, ptl); 808 } 809 810 /** 811 * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock 812 * and return the pmd pointer 813 * @gmap: pointer to guest mapping meta data structure 814 * @gaddr: virtual address in the guest address space 815 * 816 * Returns a pointer to the pmd for a guest address, or NULL 817 */ 818 static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) 819 { 820 pmd_t *pmdp; 821 822 BUG_ON(gmap_is_shadow(gmap)); 823 pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); 824 if (!pmdp) 825 return NULL; 826 827 /* without huge pages, there is no need to take the table lock */ 828 if (!gmap->mm->context.allow_gmap_hpage_1m) 829 return pmd_none(*pmdp) ? NULL : pmdp; 830 831 spin_lock(&gmap->guest_table_lock); 832 if (pmd_none(*pmdp)) { 833 spin_unlock(&gmap->guest_table_lock); 834 return NULL; 835 } 836 837 /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ 838 if (!pmd_leaf(*pmdp)) 839 spin_unlock(&gmap->guest_table_lock); 840 return pmdp; 841 } 842 843 /** 844 * gmap_pmd_op_end - release the guest_table_lock if needed 845 * @gmap: pointer to the guest mapping meta data structure 846 * @pmdp: pointer to the pmd 847 */ 848 static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) 849 { 850 if (pmd_leaf(*pmdp)) 851 spin_unlock(&gmap->guest_table_lock); 852 } 853 854 /* 855 * gmap_protect_pmd - remove access rights to memory and set pmd notification bits 856 * @pmdp: pointer to the pmd to be protected 857 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 858 * @bits: notification bits to set 859 * 860 * Returns: 861 * 0 if successfully protected 862 * -EAGAIN if a fixup is needed 863 * -EINVAL if unsupported notifier bits have been specified 864 * 865 * Expected to be called with sg->mm->mmap_lock in read and 866 * guest_table_lock held. 867 */ 868 static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, 869 pmd_t *pmdp, int prot, unsigned long bits) 870 { 871 int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; 872 int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; 873 pmd_t new = *pmdp; 874 875 /* Fixup needed */ 876 if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) 877 return -EAGAIN; 878 879 if (prot == PROT_NONE && !pmd_i) { 880 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 881 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 882 } 883 884 if (prot == PROT_READ && !pmd_p) { 885 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 886 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); 887 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 888 } 889 890 if (bits & GMAP_NOTIFY_MPROT) 891 set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 892 893 /* Shadow GMAP protection needs split PMDs */ 894 if (bits & GMAP_NOTIFY_SHADOW) 895 return -EINVAL; 896 897 return 0; 898 } 899 900 /* 901 * gmap_protect_pte - remove access rights to memory and set pgste bits 902 * @gmap: pointer to guest mapping meta data structure 903 * @gaddr: virtual address in the guest address space 904 * @pmdp: pointer to the pmd associated with the pte 905 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 906 * @bits: notification bits to set 907 * 908 * Returns 0 if successfully protected, -ENOMEM if out of memory and 909 * -EAGAIN if a fixup is needed. 910 * 911 * Expected to be called with sg->mm->mmap_lock in read 912 */ 913 static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, 914 pmd_t *pmdp, int prot, unsigned long bits) 915 { 916 int rc; 917 pte_t *ptep; 918 spinlock_t *ptl; 919 unsigned long pbits = 0; 920 921 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 922 return -EAGAIN; 923 924 ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); 925 if (!ptep) 926 return -ENOMEM; 927 928 pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; 929 pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; 930 /* Protect and unlock. */ 931 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); 932 gmap_pte_op_end(ptep, ptl); 933 return rc; 934 } 935 936 /* 937 * gmap_protect_range - remove access rights to memory and set pgste bits 938 * @gmap: pointer to guest mapping meta data structure 939 * @gaddr: virtual address in the guest address space 940 * @len: size of area 941 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 942 * @bits: pgste notification bits to set 943 * 944 * Returns: 945 * PAGE_SIZE if a small page was successfully protected; 946 * HPAGE_SIZE if a large page was successfully protected; 947 * -ENOMEM if out of memory; 948 * -EFAULT if gaddr is invalid (or mapping for shadows is missing); 949 * -EAGAIN if the guest mapping is missing and should be fixed by the caller. 950 * 951 * Context: Called with sg->mm->mmap_lock in read. 952 */ 953 int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) 954 { 955 pmd_t *pmdp; 956 int rc = 0; 957 958 BUG_ON(gmap_is_shadow(gmap)); 959 960 pmdp = gmap_pmd_op_walk(gmap, gaddr); 961 if (!pmdp) 962 return -EAGAIN; 963 964 if (!pmd_leaf(*pmdp)) { 965 rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); 966 if (!rc) 967 rc = PAGE_SIZE; 968 } else { 969 rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); 970 if (!rc) 971 rc = HPAGE_SIZE; 972 } 973 gmap_pmd_op_end(gmap, pmdp); 974 975 return rc; 976 } 977 EXPORT_SYMBOL_GPL(gmap_protect_one); 978 979 /** 980 * gmap_read_table - get an unsigned long value from a guest page table using 981 * absolute addressing, without marking the page referenced. 982 * @gmap: pointer to guest mapping meta data structure 983 * @gaddr: virtual address in the guest address space 984 * @val: pointer to the unsigned long value to return 985 * 986 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT 987 * if reading using the virtual address failed. -EINVAL if called on a gmap 988 * shadow. 989 * 990 * Called with gmap->mm->mmap_lock in read. 991 */ 992 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) 993 { 994 unsigned long address, vmaddr; 995 spinlock_t *ptl; 996 pte_t *ptep, pte; 997 int rc; 998 999 if (gmap_is_shadow(gmap)) 1000 return -EINVAL; 1001 1002 while (1) { 1003 rc = -EAGAIN; 1004 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); 1005 if (ptep) { 1006 pte = *ptep; 1007 if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { 1008 address = pte_val(pte) & PAGE_MASK; 1009 address += gaddr & ~PAGE_MASK; 1010 *val = *(unsigned long *)__va(address); 1011 set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); 1012 /* Do *NOT* clear the _PAGE_INVALID bit! */ 1013 rc = 0; 1014 } 1015 gmap_pte_op_end(ptep, ptl); 1016 } 1017 if (!rc) 1018 break; 1019 vmaddr = __gmap_translate(gmap, gaddr); 1020 if (IS_ERR_VALUE(vmaddr)) { 1021 rc = vmaddr; 1022 break; 1023 } 1024 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); 1025 if (rc) 1026 break; 1027 } 1028 return rc; 1029 } 1030 EXPORT_SYMBOL_GPL(gmap_read_table); 1031 1032 /** 1033 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree 1034 * @sg: pointer to the shadow guest address space structure 1035 * @vmaddr: vm address associated with the rmap 1036 * @rmap: pointer to the rmap structure 1037 * 1038 * Called with the sg->guest_table_lock 1039 */ 1040 static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, 1041 struct gmap_rmap *rmap) 1042 { 1043 struct gmap_rmap *temp; 1044 void __rcu **slot; 1045 1046 BUG_ON(!gmap_is_shadow(sg)); 1047 slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1048 if (slot) { 1049 rmap->next = radix_tree_deref_slot_protected(slot, 1050 &sg->guest_table_lock); 1051 for (temp = rmap->next; temp; temp = temp->next) { 1052 if (temp->raddr == rmap->raddr) { 1053 kfree(rmap); 1054 return; 1055 } 1056 } 1057 radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); 1058 } else { 1059 rmap->next = NULL; 1060 radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, 1061 rmap); 1062 } 1063 } 1064 1065 /** 1066 * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap 1067 * @sg: pointer to the shadow guest address space structure 1068 * @raddr: rmap address in the shadow gmap 1069 * @paddr: address in the parent guest address space 1070 * @len: length of the memory area to protect 1071 * 1072 * Returns 0 if successfully protected and the rmap was created, -ENOMEM 1073 * if out of memory and -EFAULT if paddr is invalid. 1074 */ 1075 static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, 1076 unsigned long paddr, unsigned long len) 1077 { 1078 struct gmap *parent; 1079 struct gmap_rmap *rmap; 1080 unsigned long vmaddr; 1081 spinlock_t *ptl; 1082 pte_t *ptep; 1083 int rc; 1084 1085 BUG_ON(!gmap_is_shadow(sg)); 1086 parent = sg->parent; 1087 while (len) { 1088 vmaddr = __gmap_translate(parent, paddr); 1089 if (IS_ERR_VALUE(vmaddr)) 1090 return vmaddr; 1091 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1092 if (!rmap) 1093 return -ENOMEM; 1094 rmap->raddr = raddr; 1095 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1096 if (rc) { 1097 kfree(rmap); 1098 return rc; 1099 } 1100 rc = -EAGAIN; 1101 ptep = gmap_pte_op_walk(parent, paddr, &ptl); 1102 if (ptep) { 1103 spin_lock(&sg->guest_table_lock); 1104 rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ, 1105 PGSTE_VSIE_BIT); 1106 if (!rc) 1107 gmap_insert_rmap(sg, vmaddr, rmap); 1108 spin_unlock(&sg->guest_table_lock); 1109 gmap_pte_op_end(ptep, ptl); 1110 } 1111 radix_tree_preload_end(); 1112 if (rc) { 1113 kfree(rmap); 1114 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ); 1115 if (rc) 1116 return rc; 1117 continue; 1118 } 1119 paddr += PAGE_SIZE; 1120 len -= PAGE_SIZE; 1121 } 1122 return 0; 1123 } 1124 1125 #define _SHADOW_RMAP_MASK 0x7 1126 #define _SHADOW_RMAP_REGION1 0x5 1127 #define _SHADOW_RMAP_REGION2 0x4 1128 #define _SHADOW_RMAP_REGION3 0x3 1129 #define _SHADOW_RMAP_SEGMENT 0x2 1130 #define _SHADOW_RMAP_PGTABLE 0x1 1131 1132 /** 1133 * gmap_idte_one - invalidate a single region or segment table entry 1134 * @asce: region or segment table *origin* + table-type bits 1135 * @vaddr: virtual address to identify the table entry to flush 1136 * 1137 * The invalid bit of a single region or segment table entry is set 1138 * and the associated TLB entries depending on the entry are flushed. 1139 * The table-type of the @asce identifies the portion of the @vaddr 1140 * that is used as the invalidation index. 1141 */ 1142 static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) 1143 { 1144 asm volatile( 1145 " idte %0,0,%1" 1146 : : "a" (asce), "a" (vaddr) : "cc", "memory"); 1147 } 1148 1149 /** 1150 * gmap_unshadow_page - remove a page from a shadow page table 1151 * @sg: pointer to the shadow guest address space structure 1152 * @raddr: rmap address in the shadow guest address space 1153 * 1154 * Called with the sg->guest_table_lock 1155 */ 1156 static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) 1157 { 1158 unsigned long *table; 1159 1160 BUG_ON(!gmap_is_shadow(sg)); 1161 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1162 if (!table || *table & _PAGE_INVALID) 1163 return; 1164 gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1); 1165 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1166 } 1167 1168 /** 1169 * __gmap_unshadow_pgt - remove all entries from a shadow page table 1170 * @sg: pointer to the shadow guest address space structure 1171 * @raddr: rmap address in the shadow guest address space 1172 * @pgt: pointer to the start of a shadow page table 1173 * 1174 * Called with the sg->guest_table_lock 1175 */ 1176 static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, 1177 unsigned long *pgt) 1178 { 1179 int i; 1180 1181 BUG_ON(!gmap_is_shadow(sg)); 1182 for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE) 1183 pgt[i] = _PAGE_INVALID; 1184 } 1185 1186 /** 1187 * gmap_unshadow_pgt - remove a shadow page table from a segment entry 1188 * @sg: pointer to the shadow guest address space structure 1189 * @raddr: address in the shadow guest address space 1190 * 1191 * Called with the sg->guest_table_lock 1192 */ 1193 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) 1194 { 1195 unsigned long *ste; 1196 phys_addr_t sto, pgt; 1197 struct ptdesc *ptdesc; 1198 1199 BUG_ON(!gmap_is_shadow(sg)); 1200 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1201 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1202 return; 1203 gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); 1204 sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); 1205 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1206 pgt = *ste & _SEGMENT_ENTRY_ORIGIN; 1207 *ste = _SEGMENT_ENTRY_EMPTY; 1208 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1209 /* Free page table */ 1210 ptdesc = page_ptdesc(phys_to_page(pgt)); 1211 page_table_free_pgste(ptdesc); 1212 } 1213 1214 /** 1215 * __gmap_unshadow_sgt - remove all entries from a shadow segment table 1216 * @sg: pointer to the shadow guest address space structure 1217 * @raddr: rmap address in the shadow guest address space 1218 * @sgt: pointer to the start of a shadow segment table 1219 * 1220 * Called with the sg->guest_table_lock 1221 */ 1222 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, 1223 unsigned long *sgt) 1224 { 1225 struct ptdesc *ptdesc; 1226 phys_addr_t pgt; 1227 int i; 1228 1229 BUG_ON(!gmap_is_shadow(sg)); 1230 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { 1231 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1232 continue; 1233 pgt = sgt[i] & _REGION_ENTRY_ORIGIN; 1234 sgt[i] = _SEGMENT_ENTRY_EMPTY; 1235 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1236 /* Free page table */ 1237 ptdesc = page_ptdesc(phys_to_page(pgt)); 1238 page_table_free_pgste(ptdesc); 1239 } 1240 } 1241 1242 /** 1243 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry 1244 * @sg: pointer to the shadow guest address space structure 1245 * @raddr: rmap address in the shadow guest address space 1246 * 1247 * Called with the shadow->guest_table_lock 1248 */ 1249 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) 1250 { 1251 unsigned long r3o, *r3e; 1252 phys_addr_t sgt; 1253 struct page *page; 1254 1255 BUG_ON(!gmap_is_shadow(sg)); 1256 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1257 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1258 return; 1259 gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); 1260 r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); 1261 gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); 1262 sgt = *r3e & _REGION_ENTRY_ORIGIN; 1263 *r3e = _REGION3_ENTRY_EMPTY; 1264 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1265 /* Free segment table */ 1266 page = phys_to_page(sgt); 1267 __free_pages(page, CRST_ALLOC_ORDER); 1268 } 1269 1270 /** 1271 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table 1272 * @sg: pointer to the shadow guest address space structure 1273 * @raddr: address in the shadow guest address space 1274 * @r3t: pointer to the start of a shadow region-3 table 1275 * 1276 * Called with the sg->guest_table_lock 1277 */ 1278 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, 1279 unsigned long *r3t) 1280 { 1281 struct page *page; 1282 phys_addr_t sgt; 1283 int i; 1284 1285 BUG_ON(!gmap_is_shadow(sg)); 1286 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { 1287 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1288 continue; 1289 sgt = r3t[i] & _REGION_ENTRY_ORIGIN; 1290 r3t[i] = _REGION3_ENTRY_EMPTY; 1291 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1292 /* Free segment table */ 1293 page = phys_to_page(sgt); 1294 __free_pages(page, CRST_ALLOC_ORDER); 1295 } 1296 } 1297 1298 /** 1299 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry 1300 * @sg: pointer to the shadow guest address space structure 1301 * @raddr: rmap address in the shadow guest address space 1302 * 1303 * Called with the sg->guest_table_lock 1304 */ 1305 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) 1306 { 1307 unsigned long r2o, *r2e; 1308 phys_addr_t r3t; 1309 struct page *page; 1310 1311 BUG_ON(!gmap_is_shadow(sg)); 1312 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1313 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1314 return; 1315 gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); 1316 r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); 1317 gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); 1318 r3t = *r2e & _REGION_ENTRY_ORIGIN; 1319 *r2e = _REGION2_ENTRY_EMPTY; 1320 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1321 /* Free region 3 table */ 1322 page = phys_to_page(r3t); 1323 __free_pages(page, CRST_ALLOC_ORDER); 1324 } 1325 1326 /** 1327 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table 1328 * @sg: pointer to the shadow guest address space structure 1329 * @raddr: rmap address in the shadow guest address space 1330 * @r2t: pointer to the start of a shadow region-2 table 1331 * 1332 * Called with the sg->guest_table_lock 1333 */ 1334 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, 1335 unsigned long *r2t) 1336 { 1337 phys_addr_t r3t; 1338 struct page *page; 1339 int i; 1340 1341 BUG_ON(!gmap_is_shadow(sg)); 1342 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { 1343 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1344 continue; 1345 r3t = r2t[i] & _REGION_ENTRY_ORIGIN; 1346 r2t[i] = _REGION2_ENTRY_EMPTY; 1347 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1348 /* Free region 3 table */ 1349 page = phys_to_page(r3t); 1350 __free_pages(page, CRST_ALLOC_ORDER); 1351 } 1352 } 1353 1354 /** 1355 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry 1356 * @sg: pointer to the shadow guest address space structure 1357 * @raddr: rmap address in the shadow guest address space 1358 * 1359 * Called with the sg->guest_table_lock 1360 */ 1361 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) 1362 { 1363 unsigned long r1o, *r1e; 1364 struct page *page; 1365 phys_addr_t r2t; 1366 1367 BUG_ON(!gmap_is_shadow(sg)); 1368 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1369 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1370 return; 1371 gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); 1372 r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); 1373 gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); 1374 r2t = *r1e & _REGION_ENTRY_ORIGIN; 1375 *r1e = _REGION1_ENTRY_EMPTY; 1376 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1377 /* Free region 2 table */ 1378 page = phys_to_page(r2t); 1379 __free_pages(page, CRST_ALLOC_ORDER); 1380 } 1381 1382 /** 1383 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table 1384 * @sg: pointer to the shadow guest address space structure 1385 * @raddr: rmap address in the shadow guest address space 1386 * @r1t: pointer to the start of a shadow region-1 table 1387 * 1388 * Called with the shadow->guest_table_lock 1389 */ 1390 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, 1391 unsigned long *r1t) 1392 { 1393 unsigned long asce; 1394 struct page *page; 1395 phys_addr_t r2t; 1396 int i; 1397 1398 BUG_ON(!gmap_is_shadow(sg)); 1399 asce = __pa(r1t) | _ASCE_TYPE_REGION1; 1400 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { 1401 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1402 continue; 1403 r2t = r1t[i] & _REGION_ENTRY_ORIGIN; 1404 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1405 /* Clear entry and flush translation r1t -> r2t */ 1406 gmap_idte_one(asce, raddr); 1407 r1t[i] = _REGION1_ENTRY_EMPTY; 1408 /* Free region 2 table */ 1409 page = phys_to_page(r2t); 1410 __free_pages(page, CRST_ALLOC_ORDER); 1411 } 1412 } 1413 1414 /** 1415 * gmap_unshadow - remove a shadow page table completely 1416 * @sg: pointer to the shadow guest address space structure 1417 * 1418 * Called with sg->guest_table_lock 1419 */ 1420 void gmap_unshadow(struct gmap *sg) 1421 { 1422 unsigned long *table; 1423 1424 BUG_ON(!gmap_is_shadow(sg)); 1425 if (sg->removed) 1426 return; 1427 sg->removed = 1; 1428 gmap_call_notifier(sg, 0, -1UL); 1429 gmap_flush_tlb(sg); 1430 table = __va(sg->asce & _ASCE_ORIGIN); 1431 switch (sg->asce & _ASCE_TYPE_MASK) { 1432 case _ASCE_TYPE_REGION1: 1433 __gmap_unshadow_r1t(sg, 0, table); 1434 break; 1435 case _ASCE_TYPE_REGION2: 1436 __gmap_unshadow_r2t(sg, 0, table); 1437 break; 1438 case _ASCE_TYPE_REGION3: 1439 __gmap_unshadow_r3t(sg, 0, table); 1440 break; 1441 case _ASCE_TYPE_SEGMENT: 1442 __gmap_unshadow_sgt(sg, 0, table); 1443 break; 1444 } 1445 } 1446 EXPORT_SYMBOL(gmap_unshadow); 1447 1448 /** 1449 * gmap_shadow_r2t - create an empty shadow region 2 table 1450 * @sg: pointer to the shadow guest address space structure 1451 * @saddr: faulting address in the shadow gmap 1452 * @r2t: parent gmap address of the region 2 table to get shadowed 1453 * @fake: r2t references contiguous guest memory block, not a r2t 1454 * 1455 * The r2t parameter specifies the address of the source table. The 1456 * four pages of the source table are made read-only in the parent gmap 1457 * address space. A write to the source table area @r2t will automatically 1458 * remove the shadow r2 table and all of its descendants. 1459 * 1460 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1461 * shadow table structure is incomplete, -ENOMEM if out of memory and 1462 * -EFAULT if an address in the parent gmap could not be resolved. 1463 * 1464 * Called with sg->mm->mmap_lock in read. 1465 */ 1466 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, 1467 int fake) 1468 { 1469 unsigned long raddr, origin, offset, len; 1470 unsigned long *table; 1471 phys_addr_t s_r2t; 1472 struct page *page; 1473 int rc; 1474 1475 BUG_ON(!gmap_is_shadow(sg)); 1476 /* Allocate a shadow region second table */ 1477 page = gmap_alloc_crst(); 1478 if (!page) 1479 return -ENOMEM; 1480 s_r2t = page_to_phys(page); 1481 /* Install shadow region second table */ 1482 spin_lock(&sg->guest_table_lock); 1483 table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ 1484 if (!table) { 1485 rc = -EAGAIN; /* Race with unshadow */ 1486 goto out_free; 1487 } 1488 if (!(*table & _REGION_ENTRY_INVALID)) { 1489 rc = 0; /* Already established */ 1490 goto out_free; 1491 } else if (*table & _REGION_ENTRY_ORIGIN) { 1492 rc = -EAGAIN; /* Race with shadow */ 1493 goto out_free; 1494 } 1495 crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); 1496 /* mark as invalid as long as the parent table is not protected */ 1497 *table = s_r2t | _REGION_ENTRY_LENGTH | 1498 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; 1499 if (sg->edat_level >= 1) 1500 *table |= (r2t & _REGION_ENTRY_PROTECT); 1501 if (fake) { 1502 /* nothing to protect for fake tables */ 1503 *table &= ~_REGION_ENTRY_INVALID; 1504 spin_unlock(&sg->guest_table_lock); 1505 return 0; 1506 } 1507 spin_unlock(&sg->guest_table_lock); 1508 /* Make r2t read-only in parent gmap page table */ 1509 raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; 1510 origin = r2t & _REGION_ENTRY_ORIGIN; 1511 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1512 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1513 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1514 spin_lock(&sg->guest_table_lock); 1515 if (!rc) { 1516 table = gmap_table_walk(sg, saddr, 4); 1517 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) 1518 rc = -EAGAIN; /* Race with unshadow */ 1519 else 1520 *table &= ~_REGION_ENTRY_INVALID; 1521 } else { 1522 gmap_unshadow_r2t(sg, raddr); 1523 } 1524 spin_unlock(&sg->guest_table_lock); 1525 return rc; 1526 out_free: 1527 spin_unlock(&sg->guest_table_lock); 1528 __free_pages(page, CRST_ALLOC_ORDER); 1529 return rc; 1530 } 1531 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1532 1533 /** 1534 * gmap_shadow_r3t - create a shadow region 3 table 1535 * @sg: pointer to the shadow guest address space structure 1536 * @saddr: faulting address in the shadow gmap 1537 * @r3t: parent gmap address of the region 3 table to get shadowed 1538 * @fake: r3t references contiguous guest memory block, not a r3t 1539 * 1540 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1541 * shadow table structure is incomplete, -ENOMEM if out of memory and 1542 * -EFAULT if an address in the parent gmap could not be resolved. 1543 * 1544 * Called with sg->mm->mmap_lock in read. 1545 */ 1546 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, 1547 int fake) 1548 { 1549 unsigned long raddr, origin, offset, len; 1550 unsigned long *table; 1551 phys_addr_t s_r3t; 1552 struct page *page; 1553 int rc; 1554 1555 BUG_ON(!gmap_is_shadow(sg)); 1556 /* Allocate a shadow region second table */ 1557 page = gmap_alloc_crst(); 1558 if (!page) 1559 return -ENOMEM; 1560 s_r3t = page_to_phys(page); 1561 /* Install shadow region second table */ 1562 spin_lock(&sg->guest_table_lock); 1563 table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ 1564 if (!table) { 1565 rc = -EAGAIN; /* Race with unshadow */ 1566 goto out_free; 1567 } 1568 if (!(*table & _REGION_ENTRY_INVALID)) { 1569 rc = 0; /* Already established */ 1570 goto out_free; 1571 } else if (*table & _REGION_ENTRY_ORIGIN) { 1572 rc = -EAGAIN; /* Race with shadow */ 1573 goto out_free; 1574 } 1575 crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); 1576 /* mark as invalid as long as the parent table is not protected */ 1577 *table = s_r3t | _REGION_ENTRY_LENGTH | 1578 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; 1579 if (sg->edat_level >= 1) 1580 *table |= (r3t & _REGION_ENTRY_PROTECT); 1581 if (fake) { 1582 /* nothing to protect for fake tables */ 1583 *table &= ~_REGION_ENTRY_INVALID; 1584 spin_unlock(&sg->guest_table_lock); 1585 return 0; 1586 } 1587 spin_unlock(&sg->guest_table_lock); 1588 /* Make r3t read-only in parent gmap page table */ 1589 raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; 1590 origin = r3t & _REGION_ENTRY_ORIGIN; 1591 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1592 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1593 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1594 spin_lock(&sg->guest_table_lock); 1595 if (!rc) { 1596 table = gmap_table_walk(sg, saddr, 3); 1597 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) 1598 rc = -EAGAIN; /* Race with unshadow */ 1599 else 1600 *table &= ~_REGION_ENTRY_INVALID; 1601 } else { 1602 gmap_unshadow_r3t(sg, raddr); 1603 } 1604 spin_unlock(&sg->guest_table_lock); 1605 return rc; 1606 out_free: 1607 spin_unlock(&sg->guest_table_lock); 1608 __free_pages(page, CRST_ALLOC_ORDER); 1609 return rc; 1610 } 1611 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1612 1613 /** 1614 * gmap_shadow_sgt - create a shadow segment table 1615 * @sg: pointer to the shadow guest address space structure 1616 * @saddr: faulting address in the shadow gmap 1617 * @sgt: parent gmap address of the segment table to get shadowed 1618 * @fake: sgt references contiguous guest memory block, not a sgt 1619 * 1620 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the 1621 * shadow table structure is incomplete, -ENOMEM if out of memory and 1622 * -EFAULT if an address in the parent gmap could not be resolved. 1623 * 1624 * Called with sg->mm->mmap_lock in read. 1625 */ 1626 int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, 1627 int fake) 1628 { 1629 unsigned long raddr, origin, offset, len; 1630 unsigned long *table; 1631 phys_addr_t s_sgt; 1632 struct page *page; 1633 int rc; 1634 1635 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1636 /* Allocate a shadow segment table */ 1637 page = gmap_alloc_crst(); 1638 if (!page) 1639 return -ENOMEM; 1640 s_sgt = page_to_phys(page); 1641 /* Install shadow region second table */ 1642 spin_lock(&sg->guest_table_lock); 1643 table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ 1644 if (!table) { 1645 rc = -EAGAIN; /* Race with unshadow */ 1646 goto out_free; 1647 } 1648 if (!(*table & _REGION_ENTRY_INVALID)) { 1649 rc = 0; /* Already established */ 1650 goto out_free; 1651 } else if (*table & _REGION_ENTRY_ORIGIN) { 1652 rc = -EAGAIN; /* Race with shadow */ 1653 goto out_free; 1654 } 1655 crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); 1656 /* mark as invalid as long as the parent table is not protected */ 1657 *table = s_sgt | _REGION_ENTRY_LENGTH | 1658 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; 1659 if (sg->edat_level >= 1) 1660 *table |= sgt & _REGION_ENTRY_PROTECT; 1661 if (fake) { 1662 /* nothing to protect for fake tables */ 1663 *table &= ~_REGION_ENTRY_INVALID; 1664 spin_unlock(&sg->guest_table_lock); 1665 return 0; 1666 } 1667 spin_unlock(&sg->guest_table_lock); 1668 /* Make sgt read-only in parent gmap page table */ 1669 raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; 1670 origin = sgt & _REGION_ENTRY_ORIGIN; 1671 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1672 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1673 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1674 spin_lock(&sg->guest_table_lock); 1675 if (!rc) { 1676 table = gmap_table_walk(sg, saddr, 2); 1677 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) 1678 rc = -EAGAIN; /* Race with unshadow */ 1679 else 1680 *table &= ~_REGION_ENTRY_INVALID; 1681 } else { 1682 gmap_unshadow_sgt(sg, raddr); 1683 } 1684 spin_unlock(&sg->guest_table_lock); 1685 return rc; 1686 out_free: 1687 spin_unlock(&sg->guest_table_lock); 1688 __free_pages(page, CRST_ALLOC_ORDER); 1689 return rc; 1690 } 1691 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 1692 1693 static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) 1694 { 1695 unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); 1696 1697 pgstes += _PAGE_ENTRIES; 1698 1699 pgstes[0] &= ~PGSTE_ST2_MASK; 1700 pgstes[1] &= ~PGSTE_ST2_MASK; 1701 pgstes[2] &= ~PGSTE_ST2_MASK; 1702 pgstes[3] &= ~PGSTE_ST2_MASK; 1703 1704 pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; 1705 pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; 1706 pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; 1707 pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; 1708 } 1709 1710 /** 1711 * gmap_shadow_pgt - instantiate a shadow page table 1712 * @sg: pointer to the shadow guest address space structure 1713 * @saddr: faulting address in the shadow gmap 1714 * @pgt: parent gmap address of the page table to get shadowed 1715 * @fake: pgt references contiguous guest memory block, not a pgtable 1716 * 1717 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1718 * shadow table structure is incomplete, -ENOMEM if out of memory, 1719 * -EFAULT if an address in the parent gmap could not be resolved and 1720 * 1721 * Called with gmap->mm->mmap_lock in read 1722 */ 1723 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, 1724 int fake) 1725 { 1726 unsigned long raddr, origin; 1727 unsigned long *table; 1728 struct ptdesc *ptdesc; 1729 phys_addr_t s_pgt; 1730 int rc; 1731 1732 BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); 1733 /* Allocate a shadow page table */ 1734 ptdesc = page_table_alloc_pgste(sg->mm); 1735 if (!ptdesc) 1736 return -ENOMEM; 1737 origin = pgt & _SEGMENT_ENTRY_ORIGIN; 1738 if (fake) 1739 origin |= GMAP_SHADOW_FAKE_TABLE; 1740 gmap_pgste_set_pgt_addr(ptdesc, origin); 1741 s_pgt = page_to_phys(ptdesc_page(ptdesc)); 1742 /* Install shadow page table */ 1743 spin_lock(&sg->guest_table_lock); 1744 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1745 if (!table) { 1746 rc = -EAGAIN; /* Race with unshadow */ 1747 goto out_free; 1748 } 1749 if (!(*table & _SEGMENT_ENTRY_INVALID)) { 1750 rc = 0; /* Already established */ 1751 goto out_free; 1752 } else if (*table & _SEGMENT_ENTRY_ORIGIN) { 1753 rc = -EAGAIN; /* Race with shadow */ 1754 goto out_free; 1755 } 1756 /* mark as invalid as long as the parent table is not protected */ 1757 *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | 1758 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; 1759 if (fake) { 1760 /* nothing to protect for fake tables */ 1761 *table &= ~_SEGMENT_ENTRY_INVALID; 1762 spin_unlock(&sg->guest_table_lock); 1763 return 0; 1764 } 1765 spin_unlock(&sg->guest_table_lock); 1766 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1767 raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; 1768 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1769 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE); 1770 spin_lock(&sg->guest_table_lock); 1771 if (!rc) { 1772 table = gmap_table_walk(sg, saddr, 1); 1773 if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) 1774 rc = -EAGAIN; /* Race with unshadow */ 1775 else 1776 *table &= ~_SEGMENT_ENTRY_INVALID; 1777 } else { 1778 gmap_unshadow_pgt(sg, raddr); 1779 } 1780 spin_unlock(&sg->guest_table_lock); 1781 return rc; 1782 out_free: 1783 spin_unlock(&sg->guest_table_lock); 1784 page_table_free_pgste(ptdesc); 1785 return rc; 1786 1787 } 1788 EXPORT_SYMBOL_GPL(gmap_shadow_pgt); 1789 1790 /** 1791 * gmap_shadow_page - create a shadow page mapping 1792 * @sg: pointer to the shadow guest address space structure 1793 * @saddr: faulting address in the shadow gmap 1794 * @pte: pte in parent gmap address space to get shadowed 1795 * 1796 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1797 * shadow table structure is incomplete, -ENOMEM if out of memory and 1798 * -EFAULT if an address in the parent gmap could not be resolved. 1799 * 1800 * Called with sg->mm->mmap_lock in read. 1801 */ 1802 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) 1803 { 1804 struct gmap *parent; 1805 struct gmap_rmap *rmap; 1806 unsigned long vmaddr, paddr; 1807 spinlock_t *ptl; 1808 pte_t *sptep, *tptep; 1809 int prot; 1810 int rc; 1811 1812 BUG_ON(!gmap_is_shadow(sg)); 1813 parent = sg->parent; 1814 prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; 1815 1816 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1817 if (!rmap) 1818 return -ENOMEM; 1819 rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; 1820 1821 while (1) { 1822 paddr = pte_val(pte) & PAGE_MASK; 1823 vmaddr = __gmap_translate(parent, paddr); 1824 if (IS_ERR_VALUE(vmaddr)) { 1825 rc = vmaddr; 1826 break; 1827 } 1828 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1829 if (rc) 1830 break; 1831 rc = -EAGAIN; 1832 sptep = gmap_pte_op_walk(parent, paddr, &ptl); 1833 if (sptep) { 1834 spin_lock(&sg->guest_table_lock); 1835 /* Get page table pointer */ 1836 tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); 1837 if (!tptep) { 1838 spin_unlock(&sg->guest_table_lock); 1839 gmap_pte_op_end(sptep, ptl); 1840 radix_tree_preload_end(); 1841 break; 1842 } 1843 rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); 1844 if (rc > 0) { 1845 /* Success and a new mapping */ 1846 gmap_insert_rmap(sg, vmaddr, rmap); 1847 rmap = NULL; 1848 rc = 0; 1849 } 1850 gmap_pte_op_end(sptep, ptl); 1851 spin_unlock(&sg->guest_table_lock); 1852 } 1853 radix_tree_preload_end(); 1854 if (!rc) 1855 break; 1856 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); 1857 if (rc) 1858 break; 1859 } 1860 kfree(rmap); 1861 return rc; 1862 } 1863 EXPORT_SYMBOL_GPL(gmap_shadow_page); 1864 1865 /* 1866 * gmap_shadow_notify - handle notifications for shadow gmap 1867 * 1868 * Called with sg->parent->shadow_lock. 1869 */ 1870 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, 1871 unsigned long gaddr) 1872 { 1873 struct gmap_rmap *rmap, *rnext, *head; 1874 unsigned long start, end, bits, raddr; 1875 1876 BUG_ON(!gmap_is_shadow(sg)); 1877 1878 spin_lock(&sg->guest_table_lock); 1879 if (sg->removed) { 1880 spin_unlock(&sg->guest_table_lock); 1881 return; 1882 } 1883 /* Check for top level table */ 1884 start = sg->orig_asce & _ASCE_ORIGIN; 1885 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; 1886 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 1887 gaddr < end) { 1888 /* The complete shadow table has to go */ 1889 gmap_unshadow(sg); 1890 spin_unlock(&sg->guest_table_lock); 1891 list_del(&sg->list); 1892 gmap_put(sg); 1893 return; 1894 } 1895 /* Remove the page table tree from on specific entry */ 1896 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1897 gmap_for_each_rmap_safe(rmap, rnext, head) { 1898 bits = rmap->raddr & _SHADOW_RMAP_MASK; 1899 raddr = rmap->raddr ^ bits; 1900 switch (bits) { 1901 case _SHADOW_RMAP_REGION1: 1902 gmap_unshadow_r2t(sg, raddr); 1903 break; 1904 case _SHADOW_RMAP_REGION2: 1905 gmap_unshadow_r3t(sg, raddr); 1906 break; 1907 case _SHADOW_RMAP_REGION3: 1908 gmap_unshadow_sgt(sg, raddr); 1909 break; 1910 case _SHADOW_RMAP_SEGMENT: 1911 gmap_unshadow_pgt(sg, raddr); 1912 break; 1913 case _SHADOW_RMAP_PGTABLE: 1914 gmap_unshadow_page(sg, raddr); 1915 break; 1916 } 1917 kfree(rmap); 1918 } 1919 spin_unlock(&sg->guest_table_lock); 1920 } 1921 1922 /** 1923 * ptep_notify - call all invalidation callbacks for a specific pte. 1924 * @mm: pointer to the process mm_struct 1925 * @vmaddr: virtual address in the process address space 1926 * @pte: pointer to the page table entry 1927 * @bits: bits from the pgste that caused the notify call 1928 * 1929 * This function is assumed to be called with the page table lock held 1930 * for the pte to notify. 1931 */ 1932 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, 1933 pte_t *pte, unsigned long bits) 1934 { 1935 unsigned long offset, gaddr = 0; 1936 struct gmap *gmap, *sg, *next; 1937 1938 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 1939 offset = offset * (PAGE_SIZE / sizeof(pte_t)); 1940 rcu_read_lock(); 1941 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 1942 spin_lock(&gmap->guest_table_lock); 1943 gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; 1944 spin_unlock(&gmap->guest_table_lock); 1945 if (!IS_GADDR_VALID(gaddr)) 1946 continue; 1947 1948 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { 1949 spin_lock(&gmap->shadow_lock); 1950 list_for_each_entry_safe(sg, next, 1951 &gmap->children, list) 1952 gmap_shadow_notify(sg, vmaddr, gaddr); 1953 spin_unlock(&gmap->shadow_lock); 1954 } 1955 if (bits & PGSTE_IN_BIT) 1956 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); 1957 } 1958 rcu_read_unlock(); 1959 } 1960 EXPORT_SYMBOL_GPL(ptep_notify); 1961 1962 static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, 1963 unsigned long gaddr) 1964 { 1965 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 1966 gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); 1967 } 1968 1969 /** 1970 * gmap_pmdp_xchg - exchange a gmap pmd with another 1971 * @gmap: pointer to the guest address space structure 1972 * @pmdp: pointer to the pmd entry 1973 * @new: replacement entry 1974 * @gaddr: the affected guest address 1975 * 1976 * This function is assumed to be called with the guest_table_lock 1977 * held. 1978 */ 1979 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, 1980 unsigned long gaddr) 1981 { 1982 gaddr &= HPAGE_MASK; 1983 pmdp_notify_gmap(gmap, pmdp, gaddr); 1984 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); 1985 if (machine_has_tlb_guest()) 1986 __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, 1987 IDTE_GLOBAL); 1988 else 1989 __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); 1990 set_pmd(pmdp, new); 1991 } 1992 1993 static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, 1994 int purge) 1995 { 1996 pmd_t *pmdp; 1997 struct gmap *gmap; 1998 unsigned long gaddr; 1999 2000 rcu_read_lock(); 2001 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2002 spin_lock(&gmap->guest_table_lock); 2003 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2004 if (pmdp) { 2005 pmdp_notify_gmap(gmap, pmdp, gaddr); 2006 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2007 _SEGMENT_ENTRY_GMAP_UC | 2008 _SEGMENT_ENTRY)); 2009 if (purge) 2010 __pmdp_cspg(pmdp); 2011 set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 2012 } 2013 spin_unlock(&gmap->guest_table_lock); 2014 } 2015 rcu_read_unlock(); 2016 } 2017 2018 /** 2019 * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without 2020 * flushing 2021 * @mm: pointer to the process mm_struct 2022 * @vmaddr: virtual address in the process address space 2023 */ 2024 void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) 2025 { 2026 gmap_pmdp_clear(mm, vmaddr, 0); 2027 } 2028 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); 2029 2030 /** 2031 * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry 2032 * @mm: pointer to the process mm_struct 2033 * @vmaddr: virtual address in the process address space 2034 */ 2035 void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) 2036 { 2037 unsigned long gaddr; 2038 struct gmap *gmap; 2039 pmd_t *pmdp; 2040 2041 rcu_read_lock(); 2042 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2043 spin_lock(&gmap->guest_table_lock); 2044 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2045 if (pmdp) { 2046 pmdp_notify_gmap(gmap, pmdp, gaddr); 2047 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2048 _SEGMENT_ENTRY_GMAP_UC | 2049 _SEGMENT_ENTRY)); 2050 if (machine_has_tlb_guest()) 2051 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2052 gmap->asce, IDTE_LOCAL); 2053 else 2054 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); 2055 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2056 } 2057 spin_unlock(&gmap->guest_table_lock); 2058 } 2059 rcu_read_unlock(); 2060 } 2061 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); 2062 2063 /** 2064 * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry 2065 * @mm: pointer to the process mm_struct 2066 * @vmaddr: virtual address in the process address space 2067 */ 2068 void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) 2069 { 2070 unsigned long gaddr; 2071 struct gmap *gmap; 2072 pmd_t *pmdp; 2073 2074 rcu_read_lock(); 2075 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2076 spin_lock(&gmap->guest_table_lock); 2077 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2078 if (pmdp) { 2079 pmdp_notify_gmap(gmap, pmdp, gaddr); 2080 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2081 _SEGMENT_ENTRY_GMAP_UC | 2082 _SEGMENT_ENTRY)); 2083 if (machine_has_tlb_guest()) 2084 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2085 gmap->asce, IDTE_GLOBAL); 2086 else 2087 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); 2088 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2089 } 2090 spin_unlock(&gmap->guest_table_lock); 2091 } 2092 rcu_read_unlock(); 2093 } 2094 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); 2095 2096 /** 2097 * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status 2098 * @gmap: pointer to guest address space 2099 * @pmdp: pointer to the pmd to be tested 2100 * @gaddr: virtual address in the guest address space 2101 * 2102 * This function is assumed to be called with the guest_table_lock 2103 * held. 2104 */ 2105 static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, 2106 unsigned long gaddr) 2107 { 2108 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 2109 return false; 2110 2111 /* Already protected memory, which did not change is clean */ 2112 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && 2113 !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) 2114 return false; 2115 2116 /* Clear UC indication and reset protection */ 2117 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); 2118 gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); 2119 return true; 2120 } 2121 2122 /** 2123 * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment 2124 * @gmap: pointer to guest address space 2125 * @bitmap: dirty bitmap for this pmd 2126 * @gaddr: virtual address in the guest address space 2127 * @vmaddr: virtual address in the host address space 2128 * 2129 * This function is assumed to be called with the guest_table_lock 2130 * held. 2131 */ 2132 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], 2133 unsigned long gaddr, unsigned long vmaddr) 2134 { 2135 int i; 2136 pmd_t *pmdp; 2137 pte_t *ptep; 2138 spinlock_t *ptl; 2139 2140 pmdp = gmap_pmd_op_walk(gmap, gaddr); 2141 if (!pmdp) 2142 return; 2143 2144 if (pmd_leaf(*pmdp)) { 2145 if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) 2146 bitmap_fill(bitmap, _PAGE_ENTRIES); 2147 } else { 2148 for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { 2149 ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); 2150 if (!ptep) 2151 continue; 2152 if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) 2153 set_bit(i, bitmap); 2154 pte_unmap_unlock(ptep, ptl); 2155 } 2156 } 2157 gmap_pmd_op_end(gmap, pmdp); 2158 } 2159 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); 2160 2161 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2162 static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, 2163 unsigned long end, struct mm_walk *walk) 2164 { 2165 struct vm_area_struct *vma = walk->vma; 2166 2167 split_huge_pmd(vma, pmd, addr); 2168 return 0; 2169 } 2170 2171 static const struct mm_walk_ops thp_split_walk_ops = { 2172 .pmd_entry = thp_split_walk_pmd_entry, 2173 .walk_lock = PGWALK_WRLOCK_VERIFY, 2174 }; 2175 2176 static inline void thp_split_mm(struct mm_struct *mm) 2177 { 2178 struct vm_area_struct *vma; 2179 VMA_ITERATOR(vmi, mm, 0); 2180 2181 for_each_vma(vmi, vma) { 2182 vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE); 2183 walk_page_vma(vma, &thp_split_walk_ops, NULL); 2184 } 2185 mm->def_flags |= VM_NOHUGEPAGE; 2186 } 2187 #else 2188 static inline void thp_split_mm(struct mm_struct *mm) 2189 { 2190 } 2191 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2192 2193 /* 2194 * switch on pgstes for its userspace process (for kvm) 2195 */ 2196 int s390_enable_sie(void) 2197 { 2198 struct mm_struct *mm = current->mm; 2199 2200 /* Do we have pgstes? if yes, we are done */ 2201 if (mm_has_pgste(mm)) 2202 return 0; 2203 mmap_write_lock(mm); 2204 mm->context.has_pgste = 1; 2205 /* split thp mappings and disable thp for future mappings */ 2206 thp_split_mm(mm); 2207 mmap_write_unlock(mm); 2208 return 0; 2209 } 2210 EXPORT_SYMBOL_GPL(s390_enable_sie); 2211 2212 /* 2213 * Enable storage key handling from now on and initialize the storage 2214 * keys with the default key. 2215 */ 2216 static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, 2217 unsigned long next, struct mm_walk *walk) 2218 { 2219 /* Clear storage key */ 2220 ptep_zap_key(walk->mm, addr, pte); 2221 return 0; 2222 } 2223 2224 /* 2225 * Give a chance to schedule after setting a key to 256 pages. 2226 * We only hold the mm lock, which is a rwsem and the kvm srcu. 2227 * Both can sleep. 2228 */ 2229 static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, 2230 unsigned long next, struct mm_walk *walk) 2231 { 2232 cond_resched(); 2233 return 0; 2234 } 2235 2236 static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, 2237 unsigned long hmask, unsigned long next, 2238 struct mm_walk *walk) 2239 { 2240 pmd_t *pmd = (pmd_t *)pte; 2241 unsigned long start, end; 2242 struct folio *folio = page_folio(pmd_page(*pmd)); 2243 2244 /* 2245 * The write check makes sure we do not set a key on shared 2246 * memory. This is needed as the walker does not differentiate 2247 * between actual guest memory and the process executable or 2248 * shared libraries. 2249 */ 2250 if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || 2251 !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) 2252 return 0; 2253 2254 start = pmd_val(*pmd) & HPAGE_MASK; 2255 end = start + HPAGE_SIZE; 2256 __storage_key_init_range(start, end); 2257 set_bit(PG_arch_1, &folio->flags.f); 2258 cond_resched(); 2259 return 0; 2260 } 2261 2262 static const struct mm_walk_ops enable_skey_walk_ops = { 2263 .hugetlb_entry = __s390_enable_skey_hugetlb, 2264 .pte_entry = __s390_enable_skey_pte, 2265 .pmd_entry = __s390_enable_skey_pmd, 2266 .walk_lock = PGWALK_WRLOCK, 2267 }; 2268 2269 int s390_enable_skey(void) 2270 { 2271 struct mm_struct *mm = current->mm; 2272 int rc = 0; 2273 2274 mmap_write_lock(mm); 2275 if (mm_uses_skeys(mm)) 2276 goto out_up; 2277 2278 mm->context.uses_skeys = 1; 2279 rc = gmap_helper_disable_cow_sharing(); 2280 if (rc) { 2281 mm->context.uses_skeys = 0; 2282 goto out_up; 2283 } 2284 walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); 2285 2286 out_up: 2287 mmap_write_unlock(mm); 2288 return rc; 2289 } 2290 EXPORT_SYMBOL_GPL(s390_enable_skey); 2291 2292 /* 2293 * Reset CMMA state, make all pages stable again. 2294 */ 2295 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 2296 unsigned long next, struct mm_walk *walk) 2297 { 2298 ptep_zap_unused(walk->mm, addr, pte, 1); 2299 return 0; 2300 } 2301 2302 static const struct mm_walk_ops reset_cmma_walk_ops = { 2303 .pte_entry = __s390_reset_cmma, 2304 .walk_lock = PGWALK_WRLOCK, 2305 }; 2306 2307 void s390_reset_cmma(struct mm_struct *mm) 2308 { 2309 mmap_write_lock(mm); 2310 walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); 2311 mmap_write_unlock(mm); 2312 } 2313 EXPORT_SYMBOL_GPL(s390_reset_cmma); 2314 2315 #define GATHER_GET_PAGES 32 2316 2317 struct reset_walk_state { 2318 unsigned long next; 2319 unsigned long count; 2320 unsigned long pfns[GATHER_GET_PAGES]; 2321 }; 2322 2323 static int s390_gather_pages(pte_t *ptep, unsigned long addr, 2324 unsigned long next, struct mm_walk *walk) 2325 { 2326 struct reset_walk_state *p = walk->private; 2327 pte_t pte = READ_ONCE(*ptep); 2328 2329 if (pte_present(pte)) { 2330 /* we have a reference from the mapping, take an extra one */ 2331 get_page(phys_to_page(pte_val(pte))); 2332 p->pfns[p->count] = phys_to_pfn(pte_val(pte)); 2333 p->next = next; 2334 p->count++; 2335 } 2336 return p->count >= GATHER_GET_PAGES; 2337 } 2338 2339 static const struct mm_walk_ops gather_pages_ops = { 2340 .pte_entry = s390_gather_pages, 2341 .walk_lock = PGWALK_RDLOCK, 2342 }; 2343 2344 /* 2345 * Call the Destroy secure page UVC on each page in the given array of PFNs. 2346 * Each page needs to have an extra reference, which will be released here. 2347 */ 2348 void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) 2349 { 2350 struct folio *folio; 2351 unsigned long i; 2352 2353 for (i = 0; i < count; i++) { 2354 folio = pfn_folio(pfns[i]); 2355 /* we always have an extra reference */ 2356 uv_destroy_folio(folio); 2357 /* get rid of the extra reference */ 2358 folio_put(folio); 2359 cond_resched(); 2360 } 2361 } 2362 EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); 2363 2364 /** 2365 * __s390_uv_destroy_range - Call the destroy secure page UVC on each page 2366 * in the given range of the given address space. 2367 * @mm: the mm to operate on 2368 * @start: the start of the range 2369 * @end: the end of the range 2370 * @interruptible: if not 0, stop when a fatal signal is received 2371 * 2372 * Walk the given range of the given address space and call the destroy 2373 * secure page UVC on each page. Optionally exit early if a fatal signal is 2374 * pending. 2375 * 2376 * Return: 0 on success, -EINTR if the function stopped before completing 2377 */ 2378 int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, 2379 unsigned long end, bool interruptible) 2380 { 2381 struct reset_walk_state state = { .next = start }; 2382 int r = 1; 2383 2384 while (r > 0) { 2385 state.count = 0; 2386 mmap_read_lock(mm); 2387 r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state); 2388 mmap_read_unlock(mm); 2389 cond_resched(); 2390 s390_uv_destroy_pfns(state.count, state.pfns); 2391 if (interruptible && fatal_signal_pending(current)) 2392 return -EINTR; 2393 } 2394 return 0; 2395 } 2396 EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); 2397 2398 /** 2399 * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy 2400 * @gmap: the gmap whose ASCE needs to be replaced 2401 * 2402 * If the ASCE is a SEGMENT type then this function will return -EINVAL, 2403 * otherwise the pointers in the host_to_guest radix tree will keep pointing 2404 * to the wrong pages, causing use-after-free and memory corruption. 2405 * If the allocation of the new top level page table fails, the ASCE is not 2406 * replaced. 2407 * In any case, the old ASCE is always removed from the gmap CRST list. 2408 * Therefore the caller has to make sure to save a pointer to it 2409 * beforehand, unless a leak is actually intended. 2410 */ 2411 int s390_replace_asce(struct gmap *gmap) 2412 { 2413 unsigned long asce; 2414 struct page *page; 2415 void *table; 2416 2417 /* Replacing segment type ASCEs would cause serious issues */ 2418 if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 2419 return -EINVAL; 2420 2421 page = gmap_alloc_crst(); 2422 if (!page) 2423 return -ENOMEM; 2424 table = page_to_virt(page); 2425 memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); 2426 2427 /* Set new table origin while preserving existing ASCE control bits */ 2428 asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); 2429 WRITE_ONCE(gmap->asce, asce); 2430 WRITE_ONCE(gmap->mm->context.gmap_asce, asce); 2431 WRITE_ONCE(gmap->table, table); 2432 2433 return 0; 2434 } 2435 EXPORT_SYMBOL_GPL(s390_replace_asce); 2436