1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2020 6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 * David Hildenbrand <david@redhat.com> 8 * Janosch Frank <frankja@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/cpufeature.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/pagewalk.h> 15 #include <linux/swap.h> 16 #include <linux/smp.h> 17 #include <linux/spinlock.h> 18 #include <linux/slab.h> 19 #include <linux/swapops.h> 20 #include <linux/ksm.h> 21 #include <linux/mman.h> 22 #include <linux/pgtable.h> 23 #include <asm/page-states.h> 24 #include <asm/pgalloc.h> 25 #include <asm/machine.h> 26 #include <asm/gmap_helpers.h> 27 #include <asm/gmap.h> 28 #include <asm/page.h> 29 30 /* 31 * The address is saved in a radix tree directly; NULL would be ambiguous, 32 * since 0 is a valid address, and NULL is returned when nothing was found. 33 * The lower bits are ignored by all users of the macro, so it can be used 34 * to distinguish a valid address 0 from a NULL. 35 */ 36 #define VALID_GADDR_FLAG 1 37 #define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) 38 #define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) 39 40 #define GMAP_SHADOW_FAKE_TABLE 1ULL 41 42 static struct page *gmap_alloc_crst(void) 43 { 44 struct page *page; 45 46 page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); 47 if (!page) 48 return NULL; 49 __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER); 50 return page; 51 } 52 53 /** 54 * gmap_alloc - allocate and initialize a guest address space 55 * @limit: maximum address of the gmap address space 56 * 57 * Returns a guest address space structure. 58 */ 59 struct gmap *gmap_alloc(unsigned long limit) 60 { 61 struct gmap *gmap; 62 struct page *page; 63 unsigned long *table; 64 unsigned long etype, atype; 65 66 if (limit < _REGION3_SIZE) { 67 limit = _REGION3_SIZE - 1; 68 atype = _ASCE_TYPE_SEGMENT; 69 etype = _SEGMENT_ENTRY_EMPTY; 70 } else if (limit < _REGION2_SIZE) { 71 limit = _REGION2_SIZE - 1; 72 atype = _ASCE_TYPE_REGION3; 73 etype = _REGION3_ENTRY_EMPTY; 74 } else if (limit < _REGION1_SIZE) { 75 limit = _REGION1_SIZE - 1; 76 atype = _ASCE_TYPE_REGION2; 77 etype = _REGION2_ENTRY_EMPTY; 78 } else { 79 limit = -1UL; 80 atype = _ASCE_TYPE_REGION1; 81 etype = _REGION1_ENTRY_EMPTY; 82 } 83 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); 84 if (!gmap) 85 goto out; 86 INIT_LIST_HEAD(&gmap->children); 87 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); 88 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); 89 INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); 90 spin_lock_init(&gmap->guest_table_lock); 91 spin_lock_init(&gmap->shadow_lock); 92 refcount_set(&gmap->ref_count, 1); 93 page = gmap_alloc_crst(); 94 if (!page) 95 goto out_free; 96 table = page_to_virt(page); 97 crst_table_init(table, etype); 98 gmap->table = table; 99 gmap->asce = atype | _ASCE_TABLE_LENGTH | 100 _ASCE_USER_BITS | __pa(table); 101 gmap->asce_end = limit; 102 return gmap; 103 104 out_free: 105 kfree(gmap); 106 out: 107 return NULL; 108 } 109 EXPORT_SYMBOL_GPL(gmap_alloc); 110 111 /** 112 * gmap_create - create a guest address space 113 * @mm: pointer to the parent mm_struct 114 * @limit: maximum size of the gmap address space 115 * 116 * Returns a guest address space structure. 117 */ 118 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) 119 { 120 struct gmap *gmap; 121 unsigned long gmap_asce; 122 123 gmap = gmap_alloc(limit); 124 if (!gmap) 125 return NULL; 126 gmap->mm = mm; 127 spin_lock(&mm->context.lock); 128 list_add_rcu(&gmap->list, &mm->context.gmap_list); 129 if (list_is_singular(&mm->context.gmap_list)) 130 gmap_asce = gmap->asce; 131 else 132 gmap_asce = -1UL; 133 WRITE_ONCE(mm->context.gmap_asce, gmap_asce); 134 spin_unlock(&mm->context.lock); 135 return gmap; 136 } 137 EXPORT_SYMBOL_GPL(gmap_create); 138 139 static void gmap_flush_tlb(struct gmap *gmap) 140 { 141 __tlb_flush_idte(gmap->asce); 142 } 143 144 static void gmap_radix_tree_free(struct radix_tree_root *root) 145 { 146 struct radix_tree_iter iter; 147 unsigned long indices[16]; 148 unsigned long index; 149 void __rcu **slot; 150 int i, nr; 151 152 /* A radix tree is freed by deleting all of its entries */ 153 index = 0; 154 do { 155 nr = 0; 156 radix_tree_for_each_slot(slot, root, &iter, index) { 157 indices[nr] = iter.index; 158 if (++nr == 16) 159 break; 160 } 161 for (i = 0; i < nr; i++) { 162 index = indices[i]; 163 radix_tree_delete(root, index); 164 } 165 } while (nr > 0); 166 } 167 168 static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) 169 { 170 struct gmap_rmap *rmap, *rnext, *head; 171 struct radix_tree_iter iter; 172 unsigned long indices[16]; 173 unsigned long index; 174 void __rcu **slot; 175 int i, nr; 176 177 /* A radix tree is freed by deleting all of its entries */ 178 index = 0; 179 do { 180 nr = 0; 181 radix_tree_for_each_slot(slot, root, &iter, index) { 182 indices[nr] = iter.index; 183 if (++nr == 16) 184 break; 185 } 186 for (i = 0; i < nr; i++) { 187 index = indices[i]; 188 head = radix_tree_delete(root, index); 189 gmap_for_each_rmap_safe(rmap, rnext, head) 190 kfree(rmap); 191 } 192 } while (nr > 0); 193 } 194 195 static void gmap_free_crst(unsigned long *table, bool free_ptes) 196 { 197 bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; 198 int i; 199 200 if (is_segment) { 201 if (!free_ptes) 202 goto out; 203 for (i = 0; i < _CRST_ENTRIES; i++) 204 if (!(table[i] & _SEGMENT_ENTRY_INVALID)) 205 page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); 206 } else { 207 for (i = 0; i < _CRST_ENTRIES; i++) 208 if (!(table[i] & _REGION_ENTRY_INVALID)) 209 gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); 210 } 211 212 out: 213 free_pages((unsigned long)table, CRST_ALLOC_ORDER); 214 } 215 216 /** 217 * gmap_free - free a guest address space 218 * @gmap: pointer to the guest address space structure 219 * 220 * No locks required. There are no references to this gmap anymore. 221 */ 222 void gmap_free(struct gmap *gmap) 223 { 224 /* Flush tlb of all gmaps (if not already done for shadows) */ 225 if (!(gmap_is_shadow(gmap) && gmap->removed)) 226 gmap_flush_tlb(gmap); 227 /* Free all segment & region tables. */ 228 gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); 229 230 gmap_radix_tree_free(&gmap->guest_to_host); 231 gmap_radix_tree_free(&gmap->host_to_guest); 232 233 /* Free additional data for a shadow gmap */ 234 if (gmap_is_shadow(gmap)) { 235 gmap_rmap_radix_tree_free(&gmap->host_to_rmap); 236 /* Release reference to the parent */ 237 gmap_put(gmap->parent); 238 } 239 240 kfree(gmap); 241 } 242 EXPORT_SYMBOL_GPL(gmap_free); 243 244 /** 245 * gmap_get - increase reference counter for guest address space 246 * @gmap: pointer to the guest address space structure 247 * 248 * Returns the gmap pointer 249 */ 250 struct gmap *gmap_get(struct gmap *gmap) 251 { 252 refcount_inc(&gmap->ref_count); 253 return gmap; 254 } 255 EXPORT_SYMBOL_GPL(gmap_get); 256 257 /** 258 * gmap_put - decrease reference counter for guest address space 259 * @gmap: pointer to the guest address space structure 260 * 261 * If the reference counter reaches zero the guest address space is freed. 262 */ 263 void gmap_put(struct gmap *gmap) 264 { 265 if (refcount_dec_and_test(&gmap->ref_count)) 266 gmap_free(gmap); 267 } 268 EXPORT_SYMBOL_GPL(gmap_put); 269 270 /** 271 * gmap_remove - remove a guest address space but do not free it yet 272 * @gmap: pointer to the guest address space structure 273 */ 274 void gmap_remove(struct gmap *gmap) 275 { 276 struct gmap *sg, *next; 277 unsigned long gmap_asce; 278 279 /* Remove all shadow gmaps linked to this gmap */ 280 if (!list_empty(&gmap->children)) { 281 spin_lock(&gmap->shadow_lock); 282 list_for_each_entry_safe(sg, next, &gmap->children, list) { 283 list_del(&sg->list); 284 gmap_put(sg); 285 } 286 spin_unlock(&gmap->shadow_lock); 287 } 288 /* Remove gmap from the pre-mm list */ 289 spin_lock(&gmap->mm->context.lock); 290 list_del_rcu(&gmap->list); 291 if (list_empty(&gmap->mm->context.gmap_list)) 292 gmap_asce = 0; 293 else if (list_is_singular(&gmap->mm->context.gmap_list)) 294 gmap_asce = list_first_entry(&gmap->mm->context.gmap_list, 295 struct gmap, list)->asce; 296 else 297 gmap_asce = -1UL; 298 WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); 299 spin_unlock(&gmap->mm->context.lock); 300 synchronize_rcu(); 301 /* Put reference */ 302 gmap_put(gmap); 303 } 304 EXPORT_SYMBOL_GPL(gmap_remove); 305 306 /* 307 * gmap_alloc_table is assumed to be called with mmap_lock held 308 */ 309 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 310 unsigned long init, unsigned long gaddr) 311 { 312 struct page *page; 313 unsigned long *new; 314 315 /* since we dont free the gmap table until gmap_free we can unlock */ 316 page = gmap_alloc_crst(); 317 if (!page) 318 return -ENOMEM; 319 new = page_to_virt(page); 320 crst_table_init(new, init); 321 spin_lock(&gmap->guest_table_lock); 322 if (*table & _REGION_ENTRY_INVALID) { 323 *table = __pa(new) | _REGION_ENTRY_LENGTH | 324 (*table & _REGION_ENTRY_TYPE_MASK); 325 page = NULL; 326 } 327 spin_unlock(&gmap->guest_table_lock); 328 if (page) 329 __free_pages(page, CRST_ALLOC_ORDER); 330 return 0; 331 } 332 333 static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) 334 { 335 return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 336 } 337 338 static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) 339 { 340 return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 341 } 342 343 static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, 344 unsigned long *gaddr) 345 { 346 *gaddr = host_to_guest_delete(gmap, vmaddr); 347 if (IS_GADDR_VALID(*gaddr)) 348 return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); 349 return NULL; 350 } 351 352 /** 353 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 354 * @gmap: pointer to the guest address space structure 355 * @vmaddr: address in the host process address space 356 * 357 * Returns 1 if a TLB flush is required 358 */ 359 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 360 { 361 unsigned long gaddr; 362 int flush = 0; 363 pmd_t *pmdp; 364 365 BUG_ON(gmap_is_shadow(gmap)); 366 spin_lock(&gmap->guest_table_lock); 367 368 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 369 if (pmdp) { 370 flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); 371 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 372 } 373 374 spin_unlock(&gmap->guest_table_lock); 375 return flush; 376 } 377 378 /** 379 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 380 * @gmap: pointer to the guest address space structure 381 * @gaddr: address in the guest address space 382 * 383 * Returns 1 if a TLB flush is required 384 */ 385 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 386 { 387 unsigned long vmaddr; 388 389 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 390 gaddr >> PMD_SHIFT); 391 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 392 } 393 394 /** 395 * gmap_unmap_segment - unmap segment from the guest address space 396 * @gmap: pointer to the guest address space structure 397 * @to: address in the guest address space 398 * @len: length of the memory area to unmap 399 * 400 * Returns 0 if the unmap succeeded, -EINVAL if not. 401 */ 402 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 403 { 404 unsigned long off; 405 int flush; 406 407 BUG_ON(gmap_is_shadow(gmap)); 408 if ((to | len) & (PMD_SIZE - 1)) 409 return -EINVAL; 410 if (len == 0 || to + len < to) 411 return -EINVAL; 412 413 flush = 0; 414 mmap_write_lock(gmap->mm); 415 for (off = 0; off < len; off += PMD_SIZE) 416 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 417 mmap_write_unlock(gmap->mm); 418 if (flush) 419 gmap_flush_tlb(gmap); 420 return 0; 421 } 422 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 423 424 /** 425 * gmap_map_segment - map a segment to the guest address space 426 * @gmap: pointer to the guest address space structure 427 * @from: source address in the parent address space 428 * @to: target address in the guest address space 429 * @len: length of the memory area to map 430 * 431 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 432 */ 433 int gmap_map_segment(struct gmap *gmap, unsigned long from, 434 unsigned long to, unsigned long len) 435 { 436 unsigned long off; 437 int flush; 438 439 BUG_ON(gmap_is_shadow(gmap)); 440 if ((from | to | len) & (PMD_SIZE - 1)) 441 return -EINVAL; 442 if (len == 0 || from + len < from || to + len < to || 443 from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) 444 return -EINVAL; 445 446 flush = 0; 447 mmap_write_lock(gmap->mm); 448 for (off = 0; off < len; off += PMD_SIZE) { 449 /* Remove old translation */ 450 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 451 /* Store new translation */ 452 if (radix_tree_insert(&gmap->guest_to_host, 453 (to + off) >> PMD_SHIFT, 454 (void *) from + off)) 455 break; 456 } 457 mmap_write_unlock(gmap->mm); 458 if (flush) 459 gmap_flush_tlb(gmap); 460 if (off >= len) 461 return 0; 462 gmap_unmap_segment(gmap, to, len); 463 return -ENOMEM; 464 } 465 EXPORT_SYMBOL_GPL(gmap_map_segment); 466 467 /** 468 * __gmap_translate - translate a guest address to a user space address 469 * @gmap: pointer to guest mapping meta data structure 470 * @gaddr: guest address 471 * 472 * Returns user space address which corresponds to the guest address or 473 * -EFAULT if no such mapping exists. 474 * This function does not establish potentially missing page table entries. 475 * The mmap_lock of the mm that belongs to the address space must be held 476 * when this function gets called. 477 * 478 * Note: Can also be called for shadow gmaps. 479 */ 480 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 481 { 482 unsigned long vmaddr; 483 484 vmaddr = (unsigned long) 485 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 486 /* Note: guest_to_host is empty for a shadow gmap */ 487 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 488 } 489 EXPORT_SYMBOL_GPL(__gmap_translate); 490 491 /** 492 * gmap_unlink - disconnect a page table from the gmap shadow tables 493 * @mm: pointer to the parent mm_struct 494 * @table: pointer to the host page table 495 * @vmaddr: vm address associated with the host page table 496 */ 497 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 498 unsigned long vmaddr) 499 { 500 struct gmap *gmap; 501 int flush; 502 503 rcu_read_lock(); 504 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 505 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 506 if (flush) 507 gmap_flush_tlb(gmap); 508 } 509 rcu_read_unlock(); 510 } 511 512 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, 513 unsigned long gaddr); 514 515 /** 516 * __gmap_link - set up shadow page tables to connect a host to a guest address 517 * @gmap: pointer to guest mapping meta data structure 518 * @gaddr: guest address 519 * @vmaddr: vm address 520 * 521 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 522 * if the vm address is already mapped to a different guest segment. 523 * The mmap_lock of the mm that belongs to the address space must be held 524 * when this function gets called. 525 */ 526 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 527 { 528 struct mm_struct *mm; 529 unsigned long *table; 530 spinlock_t *ptl; 531 pgd_t *pgd; 532 p4d_t *p4d; 533 pud_t *pud; 534 pmd_t *pmd; 535 u64 unprot; 536 int rc; 537 538 BUG_ON(gmap_is_shadow(gmap)); 539 /* Create higher level tables in the gmap page table */ 540 table = gmap->table; 541 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 542 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 543 if ((*table & _REGION_ENTRY_INVALID) && 544 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 545 gaddr & _REGION1_MASK)) 546 return -ENOMEM; 547 table = __va(*table & _REGION_ENTRY_ORIGIN); 548 } 549 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 550 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 551 if ((*table & _REGION_ENTRY_INVALID) && 552 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 553 gaddr & _REGION2_MASK)) 554 return -ENOMEM; 555 table = __va(*table & _REGION_ENTRY_ORIGIN); 556 } 557 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 558 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 559 if ((*table & _REGION_ENTRY_INVALID) && 560 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 561 gaddr & _REGION3_MASK)) 562 return -ENOMEM; 563 table = __va(*table & _REGION_ENTRY_ORIGIN); 564 } 565 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 566 /* Walk the parent mm page table */ 567 mm = gmap->mm; 568 pgd = pgd_offset(mm, vmaddr); 569 VM_BUG_ON(pgd_none(*pgd)); 570 p4d = p4d_offset(pgd, vmaddr); 571 VM_BUG_ON(p4d_none(*p4d)); 572 pud = pud_offset(p4d, vmaddr); 573 VM_BUG_ON(pud_none(*pud)); 574 /* large puds cannot yet be handled */ 575 if (pud_leaf(*pud)) 576 return -EFAULT; 577 pmd = pmd_offset(pud, vmaddr); 578 VM_BUG_ON(pmd_none(*pmd)); 579 /* Are we allowed to use huge pages? */ 580 if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) 581 return -EFAULT; 582 /* Link gmap segment table entry location to page table. */ 583 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 584 if (rc) 585 return rc; 586 ptl = pmd_lock(mm, pmd); 587 spin_lock(&gmap->guest_table_lock); 588 if (*table == _SEGMENT_ENTRY_EMPTY) { 589 rc = radix_tree_insert(&gmap->host_to_guest, 590 vmaddr >> PMD_SHIFT, 591 (void *)MAKE_VALID_GADDR(gaddr)); 592 if (!rc) { 593 if (pmd_leaf(*pmd)) { 594 *table = (pmd_val(*pmd) & 595 _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) 596 | _SEGMENT_ENTRY_GMAP_UC 597 | _SEGMENT_ENTRY; 598 } else 599 *table = (pmd_val(*pmd) & 600 _SEGMENT_ENTRY_HARDWARE_BITS) 601 | _SEGMENT_ENTRY; 602 } 603 } else if (*table & _SEGMENT_ENTRY_PROTECT && 604 !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { 605 unprot = (u64)*table; 606 unprot &= ~_SEGMENT_ENTRY_PROTECT; 607 unprot |= _SEGMENT_ENTRY_GMAP_UC; 608 gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); 609 } 610 spin_unlock(&gmap->guest_table_lock); 611 spin_unlock(ptl); 612 radix_tree_preload_end(); 613 return rc; 614 } 615 EXPORT_SYMBOL(__gmap_link); 616 617 /* 618 * this function is assumed to be called with mmap_lock held 619 */ 620 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 621 { 622 unsigned long vmaddr; 623 624 mmap_assert_locked(gmap->mm); 625 626 /* Find the vm address for the guest address */ 627 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 628 gaddr >> PMD_SHIFT); 629 if (vmaddr) { 630 vmaddr |= gaddr & ~PMD_MASK; 631 gmap_helper_zap_one_page(gmap->mm, vmaddr); 632 } 633 } 634 EXPORT_SYMBOL_GPL(__gmap_zap); 635 636 static LIST_HEAD(gmap_notifier_list); 637 static DEFINE_SPINLOCK(gmap_notifier_lock); 638 639 /** 640 * gmap_register_pte_notifier - register a pte invalidation callback 641 * @nb: pointer to the gmap notifier block 642 */ 643 void gmap_register_pte_notifier(struct gmap_notifier *nb) 644 { 645 spin_lock(&gmap_notifier_lock); 646 list_add_rcu(&nb->list, &gmap_notifier_list); 647 spin_unlock(&gmap_notifier_lock); 648 } 649 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); 650 651 /** 652 * gmap_unregister_pte_notifier - remove a pte invalidation callback 653 * @nb: pointer to the gmap notifier block 654 */ 655 void gmap_unregister_pte_notifier(struct gmap_notifier *nb) 656 { 657 spin_lock(&gmap_notifier_lock); 658 list_del_rcu(&nb->list); 659 spin_unlock(&gmap_notifier_lock); 660 synchronize_rcu(); 661 } 662 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); 663 664 /** 665 * gmap_call_notifier - call all registered invalidation callbacks 666 * @gmap: pointer to guest mapping meta data structure 667 * @start: start virtual address in the guest address space 668 * @end: end virtual address in the guest address space 669 */ 670 static void gmap_call_notifier(struct gmap *gmap, unsigned long start, 671 unsigned long end) 672 { 673 struct gmap_notifier *nb; 674 675 list_for_each_entry(nb, &gmap_notifier_list, list) 676 nb->notifier_call(gmap, start, end); 677 } 678 679 /** 680 * gmap_table_walk - walk the gmap page tables 681 * @gmap: pointer to guest mapping meta data structure 682 * @gaddr: virtual address in the guest address space 683 * @level: page table level to stop at 684 * 685 * Returns a table entry pointer for the given guest address and @level 686 * @level=0 : returns a pointer to a page table table entry (or NULL) 687 * @level=1 : returns a pointer to a segment table entry (or NULL) 688 * @level=2 : returns a pointer to a region-3 table entry (or NULL) 689 * @level=3 : returns a pointer to a region-2 table entry (or NULL) 690 * @level=4 : returns a pointer to a region-1 table entry (or NULL) 691 * 692 * Returns NULL if the gmap page tables could not be walked to the 693 * requested level. 694 * 695 * Note: Can also be called for shadow gmaps. 696 */ 697 unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) 698 { 699 const int asce_type = gmap->asce & _ASCE_TYPE_MASK; 700 unsigned long *table = gmap->table; 701 702 if (gmap_is_shadow(gmap) && gmap->removed) 703 return NULL; 704 705 if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) 706 return NULL; 707 708 if (asce_type != _ASCE_TYPE_REGION1 && 709 gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) 710 return NULL; 711 712 switch (asce_type) { 713 case _ASCE_TYPE_REGION1: 714 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 715 if (level == 4) 716 break; 717 if (*table & _REGION_ENTRY_INVALID) 718 return NULL; 719 table = __va(*table & _REGION_ENTRY_ORIGIN); 720 fallthrough; 721 case _ASCE_TYPE_REGION2: 722 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 723 if (level == 3) 724 break; 725 if (*table & _REGION_ENTRY_INVALID) 726 return NULL; 727 table = __va(*table & _REGION_ENTRY_ORIGIN); 728 fallthrough; 729 case _ASCE_TYPE_REGION3: 730 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 731 if (level == 2) 732 break; 733 if (*table & _REGION_ENTRY_INVALID) 734 return NULL; 735 table = __va(*table & _REGION_ENTRY_ORIGIN); 736 fallthrough; 737 case _ASCE_TYPE_SEGMENT: 738 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 739 if (level == 1) 740 break; 741 if (*table & _REGION_ENTRY_INVALID) 742 return NULL; 743 table = __va(*table & _SEGMENT_ENTRY_ORIGIN); 744 table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT; 745 } 746 return table; 747 } 748 EXPORT_SYMBOL(gmap_table_walk); 749 750 /** 751 * gmap_pte_op_walk - walk the gmap page table, get the page table lock 752 * and return the pte pointer 753 * @gmap: pointer to guest mapping meta data structure 754 * @gaddr: virtual address in the guest address space 755 * @ptl: pointer to the spinlock pointer 756 * 757 * Returns a pointer to the locked pte for a guest address, or NULL 758 */ 759 static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, 760 spinlock_t **ptl) 761 { 762 unsigned long *table; 763 764 BUG_ON(gmap_is_shadow(gmap)); 765 /* Walk the gmap page table, lock and get pte pointer */ 766 table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ 767 if (!table || *table & _SEGMENT_ENTRY_INVALID) 768 return NULL; 769 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); 770 } 771 772 /** 773 * gmap_pte_op_fixup - force a page in and connect the gmap page table 774 * @gmap: pointer to guest mapping meta data structure 775 * @gaddr: virtual address in the guest address space 776 * @vmaddr: address in the host process address space 777 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 778 * 779 * Returns 0 if the caller can retry __gmap_translate (might fail again), 780 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing 781 * up or connecting the gmap page table. 782 */ 783 static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, 784 unsigned long vmaddr, int prot) 785 { 786 struct mm_struct *mm = gmap->mm; 787 unsigned int fault_flags; 788 bool unlocked = false; 789 790 BUG_ON(gmap_is_shadow(gmap)); 791 fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 792 if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) 793 return -EFAULT; 794 if (unlocked) 795 /* lost mmap_lock, caller has to retry __gmap_translate */ 796 return 0; 797 /* Connect the page tables */ 798 return __gmap_link(gmap, gaddr, vmaddr); 799 } 800 801 /** 802 * gmap_pte_op_end - release the page table lock 803 * @ptep: pointer to the locked pte 804 * @ptl: pointer to the page table spinlock 805 */ 806 static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl) 807 { 808 pte_unmap_unlock(ptep, ptl); 809 } 810 811 /** 812 * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock 813 * and return the pmd pointer 814 * @gmap: pointer to guest mapping meta data structure 815 * @gaddr: virtual address in the guest address space 816 * 817 * Returns a pointer to the pmd for a guest address, or NULL 818 */ 819 static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) 820 { 821 pmd_t *pmdp; 822 823 BUG_ON(gmap_is_shadow(gmap)); 824 pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); 825 if (!pmdp) 826 return NULL; 827 828 /* without huge pages, there is no need to take the table lock */ 829 if (!gmap->mm->context.allow_gmap_hpage_1m) 830 return pmd_none(*pmdp) ? NULL : pmdp; 831 832 spin_lock(&gmap->guest_table_lock); 833 if (pmd_none(*pmdp)) { 834 spin_unlock(&gmap->guest_table_lock); 835 return NULL; 836 } 837 838 /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ 839 if (!pmd_leaf(*pmdp)) 840 spin_unlock(&gmap->guest_table_lock); 841 return pmdp; 842 } 843 844 /** 845 * gmap_pmd_op_end - release the guest_table_lock if needed 846 * @gmap: pointer to the guest mapping meta data structure 847 * @pmdp: pointer to the pmd 848 */ 849 static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) 850 { 851 if (pmd_leaf(*pmdp)) 852 spin_unlock(&gmap->guest_table_lock); 853 } 854 855 /* 856 * gmap_protect_pmd - remove access rights to memory and set pmd notification bits 857 * @pmdp: pointer to the pmd to be protected 858 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 859 * @bits: notification bits to set 860 * 861 * Returns: 862 * 0 if successfully protected 863 * -EAGAIN if a fixup is needed 864 * -EINVAL if unsupported notifier bits have been specified 865 * 866 * Expected to be called with sg->mm->mmap_lock in read and 867 * guest_table_lock held. 868 */ 869 static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, 870 pmd_t *pmdp, int prot, unsigned long bits) 871 { 872 int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; 873 int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; 874 pmd_t new = *pmdp; 875 876 /* Fixup needed */ 877 if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) 878 return -EAGAIN; 879 880 if (prot == PROT_NONE && !pmd_i) { 881 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 882 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 883 } 884 885 if (prot == PROT_READ && !pmd_p) { 886 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 887 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); 888 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 889 } 890 891 if (bits & GMAP_NOTIFY_MPROT) 892 set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 893 894 /* Shadow GMAP protection needs split PMDs */ 895 if (bits & GMAP_NOTIFY_SHADOW) 896 return -EINVAL; 897 898 return 0; 899 } 900 901 /* 902 * gmap_protect_pte - remove access rights to memory and set pgste bits 903 * @gmap: pointer to guest mapping meta data structure 904 * @gaddr: virtual address in the guest address space 905 * @pmdp: pointer to the pmd associated with the pte 906 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 907 * @bits: notification bits to set 908 * 909 * Returns 0 if successfully protected, -ENOMEM if out of memory and 910 * -EAGAIN if a fixup is needed. 911 * 912 * Expected to be called with sg->mm->mmap_lock in read 913 */ 914 static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, 915 pmd_t *pmdp, int prot, unsigned long bits) 916 { 917 int rc; 918 pte_t *ptep; 919 spinlock_t *ptl; 920 unsigned long pbits = 0; 921 922 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 923 return -EAGAIN; 924 925 ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); 926 if (!ptep) 927 return -ENOMEM; 928 929 pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; 930 pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; 931 /* Protect and unlock. */ 932 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); 933 gmap_pte_op_end(ptep, ptl); 934 return rc; 935 } 936 937 /* 938 * gmap_protect_range - remove access rights to memory and set pgste bits 939 * @gmap: pointer to guest mapping meta data structure 940 * @gaddr: virtual address in the guest address space 941 * @len: size of area 942 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 943 * @bits: pgste notification bits to set 944 * 945 * Returns: 946 * PAGE_SIZE if a small page was successfully protected; 947 * HPAGE_SIZE if a large page was successfully protected; 948 * -ENOMEM if out of memory; 949 * -EFAULT if gaddr is invalid (or mapping for shadows is missing); 950 * -EAGAIN if the guest mapping is missing and should be fixed by the caller. 951 * 952 * Context: Called with sg->mm->mmap_lock in read. 953 */ 954 int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) 955 { 956 pmd_t *pmdp; 957 int rc = 0; 958 959 BUG_ON(gmap_is_shadow(gmap)); 960 961 pmdp = gmap_pmd_op_walk(gmap, gaddr); 962 if (!pmdp) 963 return -EAGAIN; 964 965 if (!pmd_leaf(*pmdp)) { 966 rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); 967 if (!rc) 968 rc = PAGE_SIZE; 969 } else { 970 rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); 971 if (!rc) 972 rc = HPAGE_SIZE; 973 } 974 gmap_pmd_op_end(gmap, pmdp); 975 976 return rc; 977 } 978 EXPORT_SYMBOL_GPL(gmap_protect_one); 979 980 /** 981 * gmap_read_table - get an unsigned long value from a guest page table using 982 * absolute addressing, without marking the page referenced. 983 * @gmap: pointer to guest mapping meta data structure 984 * @gaddr: virtual address in the guest address space 985 * @val: pointer to the unsigned long value to return 986 * 987 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT 988 * if reading using the virtual address failed. -EINVAL if called on a gmap 989 * shadow. 990 * 991 * Called with gmap->mm->mmap_lock in read. 992 */ 993 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) 994 { 995 unsigned long address, vmaddr; 996 spinlock_t *ptl; 997 pte_t *ptep, pte; 998 int rc; 999 1000 if (gmap_is_shadow(gmap)) 1001 return -EINVAL; 1002 1003 while (1) { 1004 rc = -EAGAIN; 1005 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); 1006 if (ptep) { 1007 pte = *ptep; 1008 if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { 1009 address = pte_val(pte) & PAGE_MASK; 1010 address += gaddr & ~PAGE_MASK; 1011 *val = *(unsigned long *)__va(address); 1012 set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); 1013 /* Do *NOT* clear the _PAGE_INVALID bit! */ 1014 rc = 0; 1015 } 1016 gmap_pte_op_end(ptep, ptl); 1017 } 1018 if (!rc) 1019 break; 1020 vmaddr = __gmap_translate(gmap, gaddr); 1021 if (IS_ERR_VALUE(vmaddr)) { 1022 rc = vmaddr; 1023 break; 1024 } 1025 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); 1026 if (rc) 1027 break; 1028 } 1029 return rc; 1030 } 1031 EXPORT_SYMBOL_GPL(gmap_read_table); 1032 1033 /** 1034 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree 1035 * @sg: pointer to the shadow guest address space structure 1036 * @vmaddr: vm address associated with the rmap 1037 * @rmap: pointer to the rmap structure 1038 * 1039 * Called with the sg->guest_table_lock 1040 */ 1041 static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, 1042 struct gmap_rmap *rmap) 1043 { 1044 struct gmap_rmap *temp; 1045 void __rcu **slot; 1046 1047 BUG_ON(!gmap_is_shadow(sg)); 1048 slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1049 if (slot) { 1050 rmap->next = radix_tree_deref_slot_protected(slot, 1051 &sg->guest_table_lock); 1052 for (temp = rmap->next; temp; temp = temp->next) { 1053 if (temp->raddr == rmap->raddr) { 1054 kfree(rmap); 1055 return; 1056 } 1057 } 1058 radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); 1059 } else { 1060 rmap->next = NULL; 1061 radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, 1062 rmap); 1063 } 1064 } 1065 1066 /** 1067 * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap 1068 * @sg: pointer to the shadow guest address space structure 1069 * @raddr: rmap address in the shadow gmap 1070 * @paddr: address in the parent guest address space 1071 * @len: length of the memory area to protect 1072 * 1073 * Returns 0 if successfully protected and the rmap was created, -ENOMEM 1074 * if out of memory and -EFAULT if paddr is invalid. 1075 */ 1076 static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, 1077 unsigned long paddr, unsigned long len) 1078 { 1079 struct gmap *parent; 1080 struct gmap_rmap *rmap; 1081 unsigned long vmaddr; 1082 spinlock_t *ptl; 1083 pte_t *ptep; 1084 int rc; 1085 1086 BUG_ON(!gmap_is_shadow(sg)); 1087 parent = sg->parent; 1088 while (len) { 1089 vmaddr = __gmap_translate(parent, paddr); 1090 if (IS_ERR_VALUE(vmaddr)) 1091 return vmaddr; 1092 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1093 if (!rmap) 1094 return -ENOMEM; 1095 rmap->raddr = raddr; 1096 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1097 if (rc) { 1098 kfree(rmap); 1099 return rc; 1100 } 1101 rc = -EAGAIN; 1102 ptep = gmap_pte_op_walk(parent, paddr, &ptl); 1103 if (ptep) { 1104 spin_lock(&sg->guest_table_lock); 1105 rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ, 1106 PGSTE_VSIE_BIT); 1107 if (!rc) 1108 gmap_insert_rmap(sg, vmaddr, rmap); 1109 spin_unlock(&sg->guest_table_lock); 1110 gmap_pte_op_end(ptep, ptl); 1111 } 1112 radix_tree_preload_end(); 1113 if (rc) { 1114 kfree(rmap); 1115 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ); 1116 if (rc) 1117 return rc; 1118 continue; 1119 } 1120 paddr += PAGE_SIZE; 1121 len -= PAGE_SIZE; 1122 } 1123 return 0; 1124 } 1125 1126 #define _SHADOW_RMAP_MASK 0x7 1127 #define _SHADOW_RMAP_REGION1 0x5 1128 #define _SHADOW_RMAP_REGION2 0x4 1129 #define _SHADOW_RMAP_REGION3 0x3 1130 #define _SHADOW_RMAP_SEGMENT 0x2 1131 #define _SHADOW_RMAP_PGTABLE 0x1 1132 1133 /** 1134 * gmap_idte_one - invalidate a single region or segment table entry 1135 * @asce: region or segment table *origin* + table-type bits 1136 * @vaddr: virtual address to identify the table entry to flush 1137 * 1138 * The invalid bit of a single region or segment table entry is set 1139 * and the associated TLB entries depending on the entry are flushed. 1140 * The table-type of the @asce identifies the portion of the @vaddr 1141 * that is used as the invalidation index. 1142 */ 1143 static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) 1144 { 1145 asm volatile( 1146 " idte %0,0,%1" 1147 : : "a" (asce), "a" (vaddr) : "cc", "memory"); 1148 } 1149 1150 /** 1151 * gmap_unshadow_page - remove a page from a shadow page table 1152 * @sg: pointer to the shadow guest address space structure 1153 * @raddr: rmap address in the shadow guest address space 1154 * 1155 * Called with the sg->guest_table_lock 1156 */ 1157 static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) 1158 { 1159 unsigned long *table; 1160 1161 BUG_ON(!gmap_is_shadow(sg)); 1162 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1163 if (!table || *table & _PAGE_INVALID) 1164 return; 1165 gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1); 1166 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1167 } 1168 1169 /** 1170 * __gmap_unshadow_pgt - remove all entries from a shadow page table 1171 * @sg: pointer to the shadow guest address space structure 1172 * @raddr: rmap address in the shadow guest address space 1173 * @pgt: pointer to the start of a shadow page table 1174 * 1175 * Called with the sg->guest_table_lock 1176 */ 1177 static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, 1178 unsigned long *pgt) 1179 { 1180 int i; 1181 1182 BUG_ON(!gmap_is_shadow(sg)); 1183 for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE) 1184 pgt[i] = _PAGE_INVALID; 1185 } 1186 1187 /** 1188 * gmap_unshadow_pgt - remove a shadow page table from a segment entry 1189 * @sg: pointer to the shadow guest address space structure 1190 * @raddr: address in the shadow guest address space 1191 * 1192 * Called with the sg->guest_table_lock 1193 */ 1194 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) 1195 { 1196 unsigned long *ste; 1197 phys_addr_t sto, pgt; 1198 struct ptdesc *ptdesc; 1199 1200 BUG_ON(!gmap_is_shadow(sg)); 1201 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1202 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1203 return; 1204 gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); 1205 sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); 1206 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1207 pgt = *ste & _SEGMENT_ENTRY_ORIGIN; 1208 *ste = _SEGMENT_ENTRY_EMPTY; 1209 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1210 /* Free page table */ 1211 ptdesc = page_ptdesc(phys_to_page(pgt)); 1212 page_table_free_pgste(ptdesc); 1213 } 1214 1215 /** 1216 * __gmap_unshadow_sgt - remove all entries from a shadow segment table 1217 * @sg: pointer to the shadow guest address space structure 1218 * @raddr: rmap address in the shadow guest address space 1219 * @sgt: pointer to the start of a shadow segment table 1220 * 1221 * Called with the sg->guest_table_lock 1222 */ 1223 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, 1224 unsigned long *sgt) 1225 { 1226 struct ptdesc *ptdesc; 1227 phys_addr_t pgt; 1228 int i; 1229 1230 BUG_ON(!gmap_is_shadow(sg)); 1231 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { 1232 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1233 continue; 1234 pgt = sgt[i] & _REGION_ENTRY_ORIGIN; 1235 sgt[i] = _SEGMENT_ENTRY_EMPTY; 1236 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1237 /* Free page table */ 1238 ptdesc = page_ptdesc(phys_to_page(pgt)); 1239 page_table_free_pgste(ptdesc); 1240 } 1241 } 1242 1243 /** 1244 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry 1245 * @sg: pointer to the shadow guest address space structure 1246 * @raddr: rmap address in the shadow guest address space 1247 * 1248 * Called with the shadow->guest_table_lock 1249 */ 1250 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) 1251 { 1252 unsigned long r3o, *r3e; 1253 phys_addr_t sgt; 1254 struct page *page; 1255 1256 BUG_ON(!gmap_is_shadow(sg)); 1257 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1258 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1259 return; 1260 gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); 1261 r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); 1262 gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); 1263 sgt = *r3e & _REGION_ENTRY_ORIGIN; 1264 *r3e = _REGION3_ENTRY_EMPTY; 1265 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1266 /* Free segment table */ 1267 page = phys_to_page(sgt); 1268 __free_pages(page, CRST_ALLOC_ORDER); 1269 } 1270 1271 /** 1272 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table 1273 * @sg: pointer to the shadow guest address space structure 1274 * @raddr: address in the shadow guest address space 1275 * @r3t: pointer to the start of a shadow region-3 table 1276 * 1277 * Called with the sg->guest_table_lock 1278 */ 1279 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, 1280 unsigned long *r3t) 1281 { 1282 struct page *page; 1283 phys_addr_t sgt; 1284 int i; 1285 1286 BUG_ON(!gmap_is_shadow(sg)); 1287 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { 1288 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1289 continue; 1290 sgt = r3t[i] & _REGION_ENTRY_ORIGIN; 1291 r3t[i] = _REGION3_ENTRY_EMPTY; 1292 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1293 /* Free segment table */ 1294 page = phys_to_page(sgt); 1295 __free_pages(page, CRST_ALLOC_ORDER); 1296 } 1297 } 1298 1299 /** 1300 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry 1301 * @sg: pointer to the shadow guest address space structure 1302 * @raddr: rmap address in the shadow guest address space 1303 * 1304 * Called with the sg->guest_table_lock 1305 */ 1306 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) 1307 { 1308 unsigned long r2o, *r2e; 1309 phys_addr_t r3t; 1310 struct page *page; 1311 1312 BUG_ON(!gmap_is_shadow(sg)); 1313 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1314 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1315 return; 1316 gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); 1317 r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); 1318 gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); 1319 r3t = *r2e & _REGION_ENTRY_ORIGIN; 1320 *r2e = _REGION2_ENTRY_EMPTY; 1321 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1322 /* Free region 3 table */ 1323 page = phys_to_page(r3t); 1324 __free_pages(page, CRST_ALLOC_ORDER); 1325 } 1326 1327 /** 1328 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table 1329 * @sg: pointer to the shadow guest address space structure 1330 * @raddr: rmap address in the shadow guest address space 1331 * @r2t: pointer to the start of a shadow region-2 table 1332 * 1333 * Called with the sg->guest_table_lock 1334 */ 1335 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, 1336 unsigned long *r2t) 1337 { 1338 phys_addr_t r3t; 1339 struct page *page; 1340 int i; 1341 1342 BUG_ON(!gmap_is_shadow(sg)); 1343 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { 1344 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1345 continue; 1346 r3t = r2t[i] & _REGION_ENTRY_ORIGIN; 1347 r2t[i] = _REGION2_ENTRY_EMPTY; 1348 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1349 /* Free region 3 table */ 1350 page = phys_to_page(r3t); 1351 __free_pages(page, CRST_ALLOC_ORDER); 1352 } 1353 } 1354 1355 /** 1356 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry 1357 * @sg: pointer to the shadow guest address space structure 1358 * @raddr: rmap address in the shadow guest address space 1359 * 1360 * Called with the sg->guest_table_lock 1361 */ 1362 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) 1363 { 1364 unsigned long r1o, *r1e; 1365 struct page *page; 1366 phys_addr_t r2t; 1367 1368 BUG_ON(!gmap_is_shadow(sg)); 1369 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1370 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1371 return; 1372 gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); 1373 r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); 1374 gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); 1375 r2t = *r1e & _REGION_ENTRY_ORIGIN; 1376 *r1e = _REGION1_ENTRY_EMPTY; 1377 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1378 /* Free region 2 table */ 1379 page = phys_to_page(r2t); 1380 __free_pages(page, CRST_ALLOC_ORDER); 1381 } 1382 1383 /** 1384 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table 1385 * @sg: pointer to the shadow guest address space structure 1386 * @raddr: rmap address in the shadow guest address space 1387 * @r1t: pointer to the start of a shadow region-1 table 1388 * 1389 * Called with the shadow->guest_table_lock 1390 */ 1391 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, 1392 unsigned long *r1t) 1393 { 1394 unsigned long asce; 1395 struct page *page; 1396 phys_addr_t r2t; 1397 int i; 1398 1399 BUG_ON(!gmap_is_shadow(sg)); 1400 asce = __pa(r1t) | _ASCE_TYPE_REGION1; 1401 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { 1402 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1403 continue; 1404 r2t = r1t[i] & _REGION_ENTRY_ORIGIN; 1405 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1406 /* Clear entry and flush translation r1t -> r2t */ 1407 gmap_idte_one(asce, raddr); 1408 r1t[i] = _REGION1_ENTRY_EMPTY; 1409 /* Free region 2 table */ 1410 page = phys_to_page(r2t); 1411 __free_pages(page, CRST_ALLOC_ORDER); 1412 } 1413 } 1414 1415 /** 1416 * gmap_unshadow - remove a shadow page table completely 1417 * @sg: pointer to the shadow guest address space structure 1418 * 1419 * Called with sg->guest_table_lock 1420 */ 1421 void gmap_unshadow(struct gmap *sg) 1422 { 1423 unsigned long *table; 1424 1425 BUG_ON(!gmap_is_shadow(sg)); 1426 if (sg->removed) 1427 return; 1428 sg->removed = 1; 1429 gmap_call_notifier(sg, 0, -1UL); 1430 gmap_flush_tlb(sg); 1431 table = __va(sg->asce & _ASCE_ORIGIN); 1432 switch (sg->asce & _ASCE_TYPE_MASK) { 1433 case _ASCE_TYPE_REGION1: 1434 __gmap_unshadow_r1t(sg, 0, table); 1435 break; 1436 case _ASCE_TYPE_REGION2: 1437 __gmap_unshadow_r2t(sg, 0, table); 1438 break; 1439 case _ASCE_TYPE_REGION3: 1440 __gmap_unshadow_r3t(sg, 0, table); 1441 break; 1442 case _ASCE_TYPE_SEGMENT: 1443 __gmap_unshadow_sgt(sg, 0, table); 1444 break; 1445 } 1446 } 1447 EXPORT_SYMBOL(gmap_unshadow); 1448 1449 /** 1450 * gmap_shadow_r2t - create an empty shadow region 2 table 1451 * @sg: pointer to the shadow guest address space structure 1452 * @saddr: faulting address in the shadow gmap 1453 * @r2t: parent gmap address of the region 2 table to get shadowed 1454 * @fake: r2t references contiguous guest memory block, not a r2t 1455 * 1456 * The r2t parameter specifies the address of the source table. The 1457 * four pages of the source table are made read-only in the parent gmap 1458 * address space. A write to the source table area @r2t will automatically 1459 * remove the shadow r2 table and all of its descendants. 1460 * 1461 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1462 * shadow table structure is incomplete, -ENOMEM if out of memory and 1463 * -EFAULT if an address in the parent gmap could not be resolved. 1464 * 1465 * Called with sg->mm->mmap_lock in read. 1466 */ 1467 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, 1468 int fake) 1469 { 1470 unsigned long raddr, origin, offset, len; 1471 unsigned long *table; 1472 phys_addr_t s_r2t; 1473 struct page *page; 1474 int rc; 1475 1476 BUG_ON(!gmap_is_shadow(sg)); 1477 /* Allocate a shadow region second table */ 1478 page = gmap_alloc_crst(); 1479 if (!page) 1480 return -ENOMEM; 1481 s_r2t = page_to_phys(page); 1482 /* Install shadow region second table */ 1483 spin_lock(&sg->guest_table_lock); 1484 table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ 1485 if (!table) { 1486 rc = -EAGAIN; /* Race with unshadow */ 1487 goto out_free; 1488 } 1489 if (!(*table & _REGION_ENTRY_INVALID)) { 1490 rc = 0; /* Already established */ 1491 goto out_free; 1492 } else if (*table & _REGION_ENTRY_ORIGIN) { 1493 rc = -EAGAIN; /* Race with shadow */ 1494 goto out_free; 1495 } 1496 crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); 1497 /* mark as invalid as long as the parent table is not protected */ 1498 *table = s_r2t | _REGION_ENTRY_LENGTH | 1499 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; 1500 if (sg->edat_level >= 1) 1501 *table |= (r2t & _REGION_ENTRY_PROTECT); 1502 if (fake) { 1503 /* nothing to protect for fake tables */ 1504 *table &= ~_REGION_ENTRY_INVALID; 1505 spin_unlock(&sg->guest_table_lock); 1506 return 0; 1507 } 1508 spin_unlock(&sg->guest_table_lock); 1509 /* Make r2t read-only in parent gmap page table */ 1510 raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; 1511 origin = r2t & _REGION_ENTRY_ORIGIN; 1512 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1513 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1514 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1515 spin_lock(&sg->guest_table_lock); 1516 if (!rc) { 1517 table = gmap_table_walk(sg, saddr, 4); 1518 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) 1519 rc = -EAGAIN; /* Race with unshadow */ 1520 else 1521 *table &= ~_REGION_ENTRY_INVALID; 1522 } else { 1523 gmap_unshadow_r2t(sg, raddr); 1524 } 1525 spin_unlock(&sg->guest_table_lock); 1526 return rc; 1527 out_free: 1528 spin_unlock(&sg->guest_table_lock); 1529 __free_pages(page, CRST_ALLOC_ORDER); 1530 return rc; 1531 } 1532 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1533 1534 /** 1535 * gmap_shadow_r3t - create a shadow region 3 table 1536 * @sg: pointer to the shadow guest address space structure 1537 * @saddr: faulting address in the shadow gmap 1538 * @r3t: parent gmap address of the region 3 table to get shadowed 1539 * @fake: r3t references contiguous guest memory block, not a r3t 1540 * 1541 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1542 * shadow table structure is incomplete, -ENOMEM if out of memory and 1543 * -EFAULT if an address in the parent gmap could not be resolved. 1544 * 1545 * Called with sg->mm->mmap_lock in read. 1546 */ 1547 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, 1548 int fake) 1549 { 1550 unsigned long raddr, origin, offset, len; 1551 unsigned long *table; 1552 phys_addr_t s_r3t; 1553 struct page *page; 1554 int rc; 1555 1556 BUG_ON(!gmap_is_shadow(sg)); 1557 /* Allocate a shadow region second table */ 1558 page = gmap_alloc_crst(); 1559 if (!page) 1560 return -ENOMEM; 1561 s_r3t = page_to_phys(page); 1562 /* Install shadow region second table */ 1563 spin_lock(&sg->guest_table_lock); 1564 table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ 1565 if (!table) { 1566 rc = -EAGAIN; /* Race with unshadow */ 1567 goto out_free; 1568 } 1569 if (!(*table & _REGION_ENTRY_INVALID)) { 1570 rc = 0; /* Already established */ 1571 goto out_free; 1572 } else if (*table & _REGION_ENTRY_ORIGIN) { 1573 rc = -EAGAIN; /* Race with shadow */ 1574 goto out_free; 1575 } 1576 crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); 1577 /* mark as invalid as long as the parent table is not protected */ 1578 *table = s_r3t | _REGION_ENTRY_LENGTH | 1579 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; 1580 if (sg->edat_level >= 1) 1581 *table |= (r3t & _REGION_ENTRY_PROTECT); 1582 if (fake) { 1583 /* nothing to protect for fake tables */ 1584 *table &= ~_REGION_ENTRY_INVALID; 1585 spin_unlock(&sg->guest_table_lock); 1586 return 0; 1587 } 1588 spin_unlock(&sg->guest_table_lock); 1589 /* Make r3t read-only in parent gmap page table */ 1590 raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; 1591 origin = r3t & _REGION_ENTRY_ORIGIN; 1592 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1593 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1594 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1595 spin_lock(&sg->guest_table_lock); 1596 if (!rc) { 1597 table = gmap_table_walk(sg, saddr, 3); 1598 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) 1599 rc = -EAGAIN; /* Race with unshadow */ 1600 else 1601 *table &= ~_REGION_ENTRY_INVALID; 1602 } else { 1603 gmap_unshadow_r3t(sg, raddr); 1604 } 1605 spin_unlock(&sg->guest_table_lock); 1606 return rc; 1607 out_free: 1608 spin_unlock(&sg->guest_table_lock); 1609 __free_pages(page, CRST_ALLOC_ORDER); 1610 return rc; 1611 } 1612 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1613 1614 /** 1615 * gmap_shadow_sgt - create a shadow segment table 1616 * @sg: pointer to the shadow guest address space structure 1617 * @saddr: faulting address in the shadow gmap 1618 * @sgt: parent gmap address of the segment table to get shadowed 1619 * @fake: sgt references contiguous guest memory block, not a sgt 1620 * 1621 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the 1622 * shadow table structure is incomplete, -ENOMEM if out of memory and 1623 * -EFAULT if an address in the parent gmap could not be resolved. 1624 * 1625 * Called with sg->mm->mmap_lock in read. 1626 */ 1627 int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, 1628 int fake) 1629 { 1630 unsigned long raddr, origin, offset, len; 1631 unsigned long *table; 1632 phys_addr_t s_sgt; 1633 struct page *page; 1634 int rc; 1635 1636 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1637 /* Allocate a shadow segment table */ 1638 page = gmap_alloc_crst(); 1639 if (!page) 1640 return -ENOMEM; 1641 s_sgt = page_to_phys(page); 1642 /* Install shadow region second table */ 1643 spin_lock(&sg->guest_table_lock); 1644 table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ 1645 if (!table) { 1646 rc = -EAGAIN; /* Race with unshadow */ 1647 goto out_free; 1648 } 1649 if (!(*table & _REGION_ENTRY_INVALID)) { 1650 rc = 0; /* Already established */ 1651 goto out_free; 1652 } else if (*table & _REGION_ENTRY_ORIGIN) { 1653 rc = -EAGAIN; /* Race with shadow */ 1654 goto out_free; 1655 } 1656 crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); 1657 /* mark as invalid as long as the parent table is not protected */ 1658 *table = s_sgt | _REGION_ENTRY_LENGTH | 1659 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; 1660 if (sg->edat_level >= 1) 1661 *table |= sgt & _REGION_ENTRY_PROTECT; 1662 if (fake) { 1663 /* nothing to protect for fake tables */ 1664 *table &= ~_REGION_ENTRY_INVALID; 1665 spin_unlock(&sg->guest_table_lock); 1666 return 0; 1667 } 1668 spin_unlock(&sg->guest_table_lock); 1669 /* Make sgt read-only in parent gmap page table */ 1670 raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; 1671 origin = sgt & _REGION_ENTRY_ORIGIN; 1672 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1673 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1674 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1675 spin_lock(&sg->guest_table_lock); 1676 if (!rc) { 1677 table = gmap_table_walk(sg, saddr, 2); 1678 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) 1679 rc = -EAGAIN; /* Race with unshadow */ 1680 else 1681 *table &= ~_REGION_ENTRY_INVALID; 1682 } else { 1683 gmap_unshadow_sgt(sg, raddr); 1684 } 1685 spin_unlock(&sg->guest_table_lock); 1686 return rc; 1687 out_free: 1688 spin_unlock(&sg->guest_table_lock); 1689 __free_pages(page, CRST_ALLOC_ORDER); 1690 return rc; 1691 } 1692 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 1693 1694 static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) 1695 { 1696 unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); 1697 1698 pgstes += _PAGE_ENTRIES; 1699 1700 pgstes[0] &= ~PGSTE_ST2_MASK; 1701 pgstes[1] &= ~PGSTE_ST2_MASK; 1702 pgstes[2] &= ~PGSTE_ST2_MASK; 1703 pgstes[3] &= ~PGSTE_ST2_MASK; 1704 1705 pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; 1706 pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; 1707 pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; 1708 pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; 1709 } 1710 1711 /** 1712 * gmap_shadow_pgt - instantiate a shadow page table 1713 * @sg: pointer to the shadow guest address space structure 1714 * @saddr: faulting address in the shadow gmap 1715 * @pgt: parent gmap address of the page table to get shadowed 1716 * @fake: pgt references contiguous guest memory block, not a pgtable 1717 * 1718 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1719 * shadow table structure is incomplete, -ENOMEM if out of memory, 1720 * -EFAULT if an address in the parent gmap could not be resolved and 1721 * 1722 * Called with gmap->mm->mmap_lock in read 1723 */ 1724 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, 1725 int fake) 1726 { 1727 unsigned long raddr, origin; 1728 unsigned long *table; 1729 struct ptdesc *ptdesc; 1730 phys_addr_t s_pgt; 1731 int rc; 1732 1733 BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); 1734 /* Allocate a shadow page table */ 1735 ptdesc = page_table_alloc_pgste(sg->mm); 1736 if (!ptdesc) 1737 return -ENOMEM; 1738 origin = pgt & _SEGMENT_ENTRY_ORIGIN; 1739 if (fake) 1740 origin |= GMAP_SHADOW_FAKE_TABLE; 1741 gmap_pgste_set_pgt_addr(ptdesc, origin); 1742 s_pgt = page_to_phys(ptdesc_page(ptdesc)); 1743 /* Install shadow page table */ 1744 spin_lock(&sg->guest_table_lock); 1745 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1746 if (!table) { 1747 rc = -EAGAIN; /* Race with unshadow */ 1748 goto out_free; 1749 } 1750 if (!(*table & _SEGMENT_ENTRY_INVALID)) { 1751 rc = 0; /* Already established */ 1752 goto out_free; 1753 } else if (*table & _SEGMENT_ENTRY_ORIGIN) { 1754 rc = -EAGAIN; /* Race with shadow */ 1755 goto out_free; 1756 } 1757 /* mark as invalid as long as the parent table is not protected */ 1758 *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | 1759 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; 1760 if (fake) { 1761 /* nothing to protect for fake tables */ 1762 *table &= ~_SEGMENT_ENTRY_INVALID; 1763 spin_unlock(&sg->guest_table_lock); 1764 return 0; 1765 } 1766 spin_unlock(&sg->guest_table_lock); 1767 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1768 raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; 1769 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1770 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE); 1771 spin_lock(&sg->guest_table_lock); 1772 if (!rc) { 1773 table = gmap_table_walk(sg, saddr, 1); 1774 if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) 1775 rc = -EAGAIN; /* Race with unshadow */ 1776 else 1777 *table &= ~_SEGMENT_ENTRY_INVALID; 1778 } else { 1779 gmap_unshadow_pgt(sg, raddr); 1780 } 1781 spin_unlock(&sg->guest_table_lock); 1782 return rc; 1783 out_free: 1784 spin_unlock(&sg->guest_table_lock); 1785 page_table_free_pgste(ptdesc); 1786 return rc; 1787 1788 } 1789 EXPORT_SYMBOL_GPL(gmap_shadow_pgt); 1790 1791 /** 1792 * gmap_shadow_page - create a shadow page mapping 1793 * @sg: pointer to the shadow guest address space structure 1794 * @saddr: faulting address in the shadow gmap 1795 * @pte: pte in parent gmap address space to get shadowed 1796 * 1797 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1798 * shadow table structure is incomplete, -ENOMEM if out of memory and 1799 * -EFAULT if an address in the parent gmap could not be resolved. 1800 * 1801 * Called with sg->mm->mmap_lock in read. 1802 */ 1803 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) 1804 { 1805 struct gmap *parent; 1806 struct gmap_rmap *rmap; 1807 unsigned long vmaddr, paddr; 1808 spinlock_t *ptl; 1809 pte_t *sptep, *tptep; 1810 int prot; 1811 int rc; 1812 1813 BUG_ON(!gmap_is_shadow(sg)); 1814 parent = sg->parent; 1815 prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; 1816 1817 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1818 if (!rmap) 1819 return -ENOMEM; 1820 rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; 1821 1822 while (1) { 1823 paddr = pte_val(pte) & PAGE_MASK; 1824 vmaddr = __gmap_translate(parent, paddr); 1825 if (IS_ERR_VALUE(vmaddr)) { 1826 rc = vmaddr; 1827 break; 1828 } 1829 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1830 if (rc) 1831 break; 1832 rc = -EAGAIN; 1833 sptep = gmap_pte_op_walk(parent, paddr, &ptl); 1834 if (sptep) { 1835 spin_lock(&sg->guest_table_lock); 1836 /* Get page table pointer */ 1837 tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); 1838 if (!tptep) { 1839 spin_unlock(&sg->guest_table_lock); 1840 gmap_pte_op_end(sptep, ptl); 1841 radix_tree_preload_end(); 1842 break; 1843 } 1844 rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); 1845 if (rc > 0) { 1846 /* Success and a new mapping */ 1847 gmap_insert_rmap(sg, vmaddr, rmap); 1848 rmap = NULL; 1849 rc = 0; 1850 } 1851 gmap_pte_op_end(sptep, ptl); 1852 spin_unlock(&sg->guest_table_lock); 1853 } 1854 radix_tree_preload_end(); 1855 if (!rc) 1856 break; 1857 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); 1858 if (rc) 1859 break; 1860 } 1861 kfree(rmap); 1862 return rc; 1863 } 1864 EXPORT_SYMBOL_GPL(gmap_shadow_page); 1865 1866 /* 1867 * gmap_shadow_notify - handle notifications for shadow gmap 1868 * 1869 * Called with sg->parent->shadow_lock. 1870 */ 1871 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, 1872 unsigned long gaddr) 1873 { 1874 struct gmap_rmap *rmap, *rnext, *head; 1875 unsigned long start, end, bits, raddr; 1876 1877 BUG_ON(!gmap_is_shadow(sg)); 1878 1879 spin_lock(&sg->guest_table_lock); 1880 if (sg->removed) { 1881 spin_unlock(&sg->guest_table_lock); 1882 return; 1883 } 1884 /* Check for top level table */ 1885 start = sg->orig_asce & _ASCE_ORIGIN; 1886 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; 1887 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 1888 gaddr < end) { 1889 /* The complete shadow table has to go */ 1890 gmap_unshadow(sg); 1891 spin_unlock(&sg->guest_table_lock); 1892 list_del(&sg->list); 1893 gmap_put(sg); 1894 return; 1895 } 1896 /* Remove the page table tree from on specific entry */ 1897 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1898 gmap_for_each_rmap_safe(rmap, rnext, head) { 1899 bits = rmap->raddr & _SHADOW_RMAP_MASK; 1900 raddr = rmap->raddr ^ bits; 1901 switch (bits) { 1902 case _SHADOW_RMAP_REGION1: 1903 gmap_unshadow_r2t(sg, raddr); 1904 break; 1905 case _SHADOW_RMAP_REGION2: 1906 gmap_unshadow_r3t(sg, raddr); 1907 break; 1908 case _SHADOW_RMAP_REGION3: 1909 gmap_unshadow_sgt(sg, raddr); 1910 break; 1911 case _SHADOW_RMAP_SEGMENT: 1912 gmap_unshadow_pgt(sg, raddr); 1913 break; 1914 case _SHADOW_RMAP_PGTABLE: 1915 gmap_unshadow_page(sg, raddr); 1916 break; 1917 } 1918 kfree(rmap); 1919 } 1920 spin_unlock(&sg->guest_table_lock); 1921 } 1922 1923 /** 1924 * ptep_notify - call all invalidation callbacks for a specific pte. 1925 * @mm: pointer to the process mm_struct 1926 * @vmaddr: virtual address in the process address space 1927 * @pte: pointer to the page table entry 1928 * @bits: bits from the pgste that caused the notify call 1929 * 1930 * This function is assumed to be called with the page table lock held 1931 * for the pte to notify. 1932 */ 1933 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, 1934 pte_t *pte, unsigned long bits) 1935 { 1936 unsigned long offset, gaddr = 0; 1937 struct gmap *gmap, *sg, *next; 1938 1939 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 1940 offset = offset * (PAGE_SIZE / sizeof(pte_t)); 1941 rcu_read_lock(); 1942 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 1943 spin_lock(&gmap->guest_table_lock); 1944 gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; 1945 spin_unlock(&gmap->guest_table_lock); 1946 if (!IS_GADDR_VALID(gaddr)) 1947 continue; 1948 1949 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { 1950 spin_lock(&gmap->shadow_lock); 1951 list_for_each_entry_safe(sg, next, 1952 &gmap->children, list) 1953 gmap_shadow_notify(sg, vmaddr, gaddr); 1954 spin_unlock(&gmap->shadow_lock); 1955 } 1956 if (bits & PGSTE_IN_BIT) 1957 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); 1958 } 1959 rcu_read_unlock(); 1960 } 1961 EXPORT_SYMBOL_GPL(ptep_notify); 1962 1963 static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, 1964 unsigned long gaddr) 1965 { 1966 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 1967 gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); 1968 } 1969 1970 /** 1971 * gmap_pmdp_xchg - exchange a gmap pmd with another 1972 * @gmap: pointer to the guest address space structure 1973 * @pmdp: pointer to the pmd entry 1974 * @new: replacement entry 1975 * @gaddr: the affected guest address 1976 * 1977 * This function is assumed to be called with the guest_table_lock 1978 * held. 1979 */ 1980 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, 1981 unsigned long gaddr) 1982 { 1983 gaddr &= HPAGE_MASK; 1984 pmdp_notify_gmap(gmap, pmdp, gaddr); 1985 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); 1986 if (machine_has_tlb_guest()) 1987 __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, 1988 IDTE_GLOBAL); 1989 else 1990 __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); 1991 set_pmd(pmdp, new); 1992 } 1993 1994 static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, 1995 int purge) 1996 { 1997 pmd_t *pmdp; 1998 struct gmap *gmap; 1999 unsigned long gaddr; 2000 2001 rcu_read_lock(); 2002 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2003 spin_lock(&gmap->guest_table_lock); 2004 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2005 if (pmdp) { 2006 pmdp_notify_gmap(gmap, pmdp, gaddr); 2007 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2008 _SEGMENT_ENTRY_GMAP_UC | 2009 _SEGMENT_ENTRY)); 2010 if (purge) 2011 __pmdp_cspg(pmdp); 2012 set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 2013 } 2014 spin_unlock(&gmap->guest_table_lock); 2015 } 2016 rcu_read_unlock(); 2017 } 2018 2019 /** 2020 * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without 2021 * flushing 2022 * @mm: pointer to the process mm_struct 2023 * @vmaddr: virtual address in the process address space 2024 */ 2025 void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) 2026 { 2027 gmap_pmdp_clear(mm, vmaddr, 0); 2028 } 2029 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); 2030 2031 /** 2032 * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry 2033 * @mm: pointer to the process mm_struct 2034 * @vmaddr: virtual address in the process address space 2035 */ 2036 void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) 2037 { 2038 unsigned long gaddr; 2039 struct gmap *gmap; 2040 pmd_t *pmdp; 2041 2042 rcu_read_lock(); 2043 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2044 spin_lock(&gmap->guest_table_lock); 2045 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2046 if (pmdp) { 2047 pmdp_notify_gmap(gmap, pmdp, gaddr); 2048 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2049 _SEGMENT_ENTRY_GMAP_UC | 2050 _SEGMENT_ENTRY)); 2051 if (machine_has_tlb_guest()) 2052 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2053 gmap->asce, IDTE_LOCAL); 2054 else 2055 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); 2056 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2057 } 2058 spin_unlock(&gmap->guest_table_lock); 2059 } 2060 rcu_read_unlock(); 2061 } 2062 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); 2063 2064 /** 2065 * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry 2066 * @mm: pointer to the process mm_struct 2067 * @vmaddr: virtual address in the process address space 2068 */ 2069 void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) 2070 { 2071 unsigned long gaddr; 2072 struct gmap *gmap; 2073 pmd_t *pmdp; 2074 2075 rcu_read_lock(); 2076 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2077 spin_lock(&gmap->guest_table_lock); 2078 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2079 if (pmdp) { 2080 pmdp_notify_gmap(gmap, pmdp, gaddr); 2081 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2082 _SEGMENT_ENTRY_GMAP_UC | 2083 _SEGMENT_ENTRY)); 2084 if (machine_has_tlb_guest()) 2085 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2086 gmap->asce, IDTE_GLOBAL); 2087 else 2088 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); 2089 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2090 } 2091 spin_unlock(&gmap->guest_table_lock); 2092 } 2093 rcu_read_unlock(); 2094 } 2095 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); 2096 2097 /** 2098 * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status 2099 * @gmap: pointer to guest address space 2100 * @pmdp: pointer to the pmd to be tested 2101 * @gaddr: virtual address in the guest address space 2102 * 2103 * This function is assumed to be called with the guest_table_lock 2104 * held. 2105 */ 2106 static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, 2107 unsigned long gaddr) 2108 { 2109 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 2110 return false; 2111 2112 /* Already protected memory, which did not change is clean */ 2113 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && 2114 !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) 2115 return false; 2116 2117 /* Clear UC indication and reset protection */ 2118 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); 2119 gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); 2120 return true; 2121 } 2122 2123 /** 2124 * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment 2125 * @gmap: pointer to guest address space 2126 * @bitmap: dirty bitmap for this pmd 2127 * @gaddr: virtual address in the guest address space 2128 * @vmaddr: virtual address in the host address space 2129 * 2130 * This function is assumed to be called with the guest_table_lock 2131 * held. 2132 */ 2133 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], 2134 unsigned long gaddr, unsigned long vmaddr) 2135 { 2136 int i; 2137 pmd_t *pmdp; 2138 pte_t *ptep; 2139 spinlock_t *ptl; 2140 2141 pmdp = gmap_pmd_op_walk(gmap, gaddr); 2142 if (!pmdp) 2143 return; 2144 2145 if (pmd_leaf(*pmdp)) { 2146 if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) 2147 bitmap_fill(bitmap, _PAGE_ENTRIES); 2148 } else { 2149 for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { 2150 ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); 2151 if (!ptep) 2152 continue; 2153 if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) 2154 set_bit(i, bitmap); 2155 pte_unmap_unlock(ptep, ptl); 2156 } 2157 } 2158 gmap_pmd_op_end(gmap, pmdp); 2159 } 2160 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); 2161 2162 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2163 static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, 2164 unsigned long end, struct mm_walk *walk) 2165 { 2166 struct vm_area_struct *vma = walk->vma; 2167 2168 split_huge_pmd(vma, pmd, addr); 2169 return 0; 2170 } 2171 2172 static const struct mm_walk_ops thp_split_walk_ops = { 2173 .pmd_entry = thp_split_walk_pmd_entry, 2174 .walk_lock = PGWALK_WRLOCK_VERIFY, 2175 }; 2176 2177 static inline void thp_split_mm(struct mm_struct *mm) 2178 { 2179 struct vm_area_struct *vma; 2180 VMA_ITERATOR(vmi, mm, 0); 2181 2182 for_each_vma(vmi, vma) { 2183 vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE); 2184 walk_page_vma(vma, &thp_split_walk_ops, NULL); 2185 } 2186 mm->def_flags |= VM_NOHUGEPAGE; 2187 } 2188 #else 2189 static inline void thp_split_mm(struct mm_struct *mm) 2190 { 2191 } 2192 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2193 2194 /* 2195 * switch on pgstes for its userspace process (for kvm) 2196 */ 2197 int s390_enable_sie(void) 2198 { 2199 struct mm_struct *mm = current->mm; 2200 2201 /* Do we have pgstes? if yes, we are done */ 2202 if (mm_has_pgste(mm)) 2203 return 0; 2204 mmap_write_lock(mm); 2205 mm->context.has_pgste = 1; 2206 /* split thp mappings and disable thp for future mappings */ 2207 thp_split_mm(mm); 2208 mmap_write_unlock(mm); 2209 return 0; 2210 } 2211 EXPORT_SYMBOL_GPL(s390_enable_sie); 2212 2213 /* 2214 * Enable storage key handling from now on and initialize the storage 2215 * keys with the default key. 2216 */ 2217 static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, 2218 unsigned long next, struct mm_walk *walk) 2219 { 2220 /* Clear storage key */ 2221 ptep_zap_key(walk->mm, addr, pte); 2222 return 0; 2223 } 2224 2225 /* 2226 * Give a chance to schedule after setting a key to 256 pages. 2227 * We only hold the mm lock, which is a rwsem and the kvm srcu. 2228 * Both can sleep. 2229 */ 2230 static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, 2231 unsigned long next, struct mm_walk *walk) 2232 { 2233 cond_resched(); 2234 return 0; 2235 } 2236 2237 static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, 2238 unsigned long hmask, unsigned long next, 2239 struct mm_walk *walk) 2240 { 2241 pmd_t *pmd = (pmd_t *)pte; 2242 unsigned long start, end; 2243 struct folio *folio = page_folio(pmd_page(*pmd)); 2244 2245 /* 2246 * The write check makes sure we do not set a key on shared 2247 * memory. This is needed as the walker does not differentiate 2248 * between actual guest memory and the process executable or 2249 * shared libraries. 2250 */ 2251 if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || 2252 !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) 2253 return 0; 2254 2255 start = pmd_val(*pmd) & HPAGE_MASK; 2256 end = start + HPAGE_SIZE; 2257 __storage_key_init_range(start, end); 2258 set_bit(PG_arch_1, &folio->flags.f); 2259 cond_resched(); 2260 return 0; 2261 } 2262 2263 static const struct mm_walk_ops enable_skey_walk_ops = { 2264 .hugetlb_entry = __s390_enable_skey_hugetlb, 2265 .pte_entry = __s390_enable_skey_pte, 2266 .pmd_entry = __s390_enable_skey_pmd, 2267 .walk_lock = PGWALK_WRLOCK, 2268 }; 2269 2270 int s390_enable_skey(void) 2271 { 2272 struct mm_struct *mm = current->mm; 2273 int rc = 0; 2274 2275 mmap_write_lock(mm); 2276 if (mm_uses_skeys(mm)) 2277 goto out_up; 2278 2279 mm->context.uses_skeys = 1; 2280 rc = gmap_helper_disable_cow_sharing(); 2281 if (rc) { 2282 mm->context.uses_skeys = 0; 2283 goto out_up; 2284 } 2285 walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); 2286 2287 out_up: 2288 mmap_write_unlock(mm); 2289 return rc; 2290 } 2291 EXPORT_SYMBOL_GPL(s390_enable_skey); 2292 2293 /* 2294 * Reset CMMA state, make all pages stable again. 2295 */ 2296 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 2297 unsigned long next, struct mm_walk *walk) 2298 { 2299 ptep_zap_unused(walk->mm, addr, pte, 1); 2300 return 0; 2301 } 2302 2303 static const struct mm_walk_ops reset_cmma_walk_ops = { 2304 .pte_entry = __s390_reset_cmma, 2305 .walk_lock = PGWALK_WRLOCK, 2306 }; 2307 2308 void s390_reset_cmma(struct mm_struct *mm) 2309 { 2310 mmap_write_lock(mm); 2311 walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); 2312 mmap_write_unlock(mm); 2313 } 2314 EXPORT_SYMBOL_GPL(s390_reset_cmma); 2315 2316 #define GATHER_GET_PAGES 32 2317 2318 struct reset_walk_state { 2319 unsigned long next; 2320 unsigned long count; 2321 unsigned long pfns[GATHER_GET_PAGES]; 2322 }; 2323 2324 static int s390_gather_pages(pte_t *ptep, unsigned long addr, 2325 unsigned long next, struct mm_walk *walk) 2326 { 2327 struct reset_walk_state *p = walk->private; 2328 pte_t pte = READ_ONCE(*ptep); 2329 2330 if (pte_present(pte)) { 2331 /* we have a reference from the mapping, take an extra one */ 2332 get_page(phys_to_page(pte_val(pte))); 2333 p->pfns[p->count] = phys_to_pfn(pte_val(pte)); 2334 p->next = next; 2335 p->count++; 2336 } 2337 return p->count >= GATHER_GET_PAGES; 2338 } 2339 2340 static const struct mm_walk_ops gather_pages_ops = { 2341 .pte_entry = s390_gather_pages, 2342 .walk_lock = PGWALK_RDLOCK, 2343 }; 2344 2345 /* 2346 * Call the Destroy secure page UVC on each page in the given array of PFNs. 2347 * Each page needs to have an extra reference, which will be released here. 2348 */ 2349 void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) 2350 { 2351 struct folio *folio; 2352 unsigned long i; 2353 2354 for (i = 0; i < count; i++) { 2355 folio = pfn_folio(pfns[i]); 2356 /* we always have an extra reference */ 2357 uv_destroy_folio(folio); 2358 /* get rid of the extra reference */ 2359 folio_put(folio); 2360 cond_resched(); 2361 } 2362 } 2363 EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); 2364 2365 /** 2366 * __s390_uv_destroy_range - Call the destroy secure page UVC on each page 2367 * in the given range of the given address space. 2368 * @mm: the mm to operate on 2369 * @start: the start of the range 2370 * @end: the end of the range 2371 * @interruptible: if not 0, stop when a fatal signal is received 2372 * 2373 * Walk the given range of the given address space and call the destroy 2374 * secure page UVC on each page. Optionally exit early if a fatal signal is 2375 * pending. 2376 * 2377 * Return: 0 on success, -EINTR if the function stopped before completing 2378 */ 2379 int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, 2380 unsigned long end, bool interruptible) 2381 { 2382 struct reset_walk_state state = { .next = start }; 2383 int r = 1; 2384 2385 while (r > 0) { 2386 state.count = 0; 2387 mmap_read_lock(mm); 2388 r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state); 2389 mmap_read_unlock(mm); 2390 cond_resched(); 2391 s390_uv_destroy_pfns(state.count, state.pfns); 2392 if (interruptible && fatal_signal_pending(current)) 2393 return -EINTR; 2394 } 2395 return 0; 2396 } 2397 EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); 2398 2399 /** 2400 * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy 2401 * @gmap: the gmap whose ASCE needs to be replaced 2402 * 2403 * If the ASCE is a SEGMENT type then this function will return -EINVAL, 2404 * otherwise the pointers in the host_to_guest radix tree will keep pointing 2405 * to the wrong pages, causing use-after-free and memory corruption. 2406 * If the allocation of the new top level page table fails, the ASCE is not 2407 * replaced. 2408 * In any case, the old ASCE is always removed from the gmap CRST list. 2409 * Therefore the caller has to make sure to save a pointer to it 2410 * beforehand, unless a leak is actually intended. 2411 */ 2412 int s390_replace_asce(struct gmap *gmap) 2413 { 2414 unsigned long asce; 2415 struct page *page; 2416 void *table; 2417 2418 /* Replacing segment type ASCEs would cause serious issues */ 2419 if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 2420 return -EINVAL; 2421 2422 page = gmap_alloc_crst(); 2423 if (!page) 2424 return -ENOMEM; 2425 table = page_to_virt(page); 2426 memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); 2427 2428 /* Set new table origin while preserving existing ASCE control bits */ 2429 asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); 2430 WRITE_ONCE(gmap->asce, asce); 2431 WRITE_ONCE(gmap->mm->context.gmap_asce, asce); 2432 WRITE_ONCE(gmap->table, table); 2433 2434 return 0; 2435 } 2436 EXPORT_SYMBOL_GPL(s390_replace_asce); 2437