1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2020 6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 7 * David Hildenbrand <david@redhat.com> 8 * Janosch Frank <frankja@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/cpufeature.h> 12 #include <linux/export.h> 13 #include <linux/kernel.h> 14 #include <linux/pagewalk.h> 15 #include <linux/swap.h> 16 #include <linux/smp.h> 17 #include <linux/spinlock.h> 18 #include <linux/slab.h> 19 #include <linux/swapops.h> 20 #include <linux/ksm.h> 21 #include <linux/mman.h> 22 #include <linux/pgtable.h> 23 #include <asm/page-states.h> 24 #include <asm/pgalloc.h> 25 #include <asm/machine.h> 26 #include <asm/gmap_helpers.h> 27 #include <asm/gmap.h> 28 #include <asm/page.h> 29 30 /* 31 * The address is saved in a radix tree directly; NULL would be ambiguous, 32 * since 0 is a valid address, and NULL is returned when nothing was found. 33 * The lower bits are ignored by all users of the macro, so it can be used 34 * to distinguish a valid address 0 from a NULL. 35 */ 36 #define VALID_GADDR_FLAG 1 37 #define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) 38 #define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) 39 40 #define GMAP_SHADOW_FAKE_TABLE 1ULL 41 42 static struct page *gmap_alloc_crst(void) 43 { 44 struct page *page; 45 46 page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); 47 if (!page) 48 return NULL; 49 __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER); 50 return page; 51 } 52 53 /** 54 * gmap_alloc - allocate and initialize a guest address space 55 * @limit: maximum address of the gmap address space 56 * 57 * Returns a guest address space structure. 58 */ 59 struct gmap *gmap_alloc(unsigned long limit) 60 { 61 struct gmap *gmap; 62 struct page *page; 63 unsigned long *table; 64 unsigned long etype, atype; 65 66 if (limit < _REGION3_SIZE) { 67 limit = _REGION3_SIZE - 1; 68 atype = _ASCE_TYPE_SEGMENT; 69 etype = _SEGMENT_ENTRY_EMPTY; 70 } else if (limit < _REGION2_SIZE) { 71 limit = _REGION2_SIZE - 1; 72 atype = _ASCE_TYPE_REGION3; 73 etype = _REGION3_ENTRY_EMPTY; 74 } else if (limit < _REGION1_SIZE) { 75 limit = _REGION1_SIZE - 1; 76 atype = _ASCE_TYPE_REGION2; 77 etype = _REGION2_ENTRY_EMPTY; 78 } else { 79 limit = -1UL; 80 atype = _ASCE_TYPE_REGION1; 81 etype = _REGION1_ENTRY_EMPTY; 82 } 83 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); 84 if (!gmap) 85 goto out; 86 INIT_LIST_HEAD(&gmap->children); 87 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); 88 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); 89 INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); 90 spin_lock_init(&gmap->guest_table_lock); 91 spin_lock_init(&gmap->shadow_lock); 92 refcount_set(&gmap->ref_count, 1); 93 page = gmap_alloc_crst(); 94 if (!page) 95 goto out_free; 96 table = page_to_virt(page); 97 crst_table_init(table, etype); 98 gmap->table = table; 99 gmap->asce = atype | _ASCE_TABLE_LENGTH | 100 _ASCE_USER_BITS | __pa(table); 101 gmap->asce_end = limit; 102 return gmap; 103 104 out_free: 105 kfree(gmap); 106 out: 107 return NULL; 108 } 109 EXPORT_SYMBOL_GPL(gmap_alloc); 110 111 /** 112 * gmap_create - create a guest address space 113 * @mm: pointer to the parent mm_struct 114 * @limit: maximum size of the gmap address space 115 * 116 * Returns a guest address space structure. 117 */ 118 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) 119 { 120 struct gmap *gmap; 121 unsigned long gmap_asce; 122 123 gmap = gmap_alloc(limit); 124 if (!gmap) 125 return NULL; 126 gmap->mm = mm; 127 spin_lock(&mm->context.lock); 128 list_add_rcu(&gmap->list, &mm->context.gmap_list); 129 if (list_is_singular(&mm->context.gmap_list)) 130 gmap_asce = gmap->asce; 131 else 132 gmap_asce = -1UL; 133 WRITE_ONCE(mm->context.gmap_asce, gmap_asce); 134 spin_unlock(&mm->context.lock); 135 return gmap; 136 } 137 EXPORT_SYMBOL_GPL(gmap_create); 138 139 static void gmap_flush_tlb(struct gmap *gmap) 140 { 141 if (cpu_has_idte()) 142 __tlb_flush_idte(gmap->asce); 143 else 144 __tlb_flush_global(); 145 } 146 147 static void gmap_radix_tree_free(struct radix_tree_root *root) 148 { 149 struct radix_tree_iter iter; 150 unsigned long indices[16]; 151 unsigned long index; 152 void __rcu **slot; 153 int i, nr; 154 155 /* A radix tree is freed by deleting all of its entries */ 156 index = 0; 157 do { 158 nr = 0; 159 radix_tree_for_each_slot(slot, root, &iter, index) { 160 indices[nr] = iter.index; 161 if (++nr == 16) 162 break; 163 } 164 for (i = 0; i < nr; i++) { 165 index = indices[i]; 166 radix_tree_delete(root, index); 167 } 168 } while (nr > 0); 169 } 170 171 static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) 172 { 173 struct gmap_rmap *rmap, *rnext, *head; 174 struct radix_tree_iter iter; 175 unsigned long indices[16]; 176 unsigned long index; 177 void __rcu **slot; 178 int i, nr; 179 180 /* A radix tree is freed by deleting all of its entries */ 181 index = 0; 182 do { 183 nr = 0; 184 radix_tree_for_each_slot(slot, root, &iter, index) { 185 indices[nr] = iter.index; 186 if (++nr == 16) 187 break; 188 } 189 for (i = 0; i < nr; i++) { 190 index = indices[i]; 191 head = radix_tree_delete(root, index); 192 gmap_for_each_rmap_safe(rmap, rnext, head) 193 kfree(rmap); 194 } 195 } while (nr > 0); 196 } 197 198 static void gmap_free_crst(unsigned long *table, bool free_ptes) 199 { 200 bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; 201 int i; 202 203 if (is_segment) { 204 if (!free_ptes) 205 goto out; 206 for (i = 0; i < _CRST_ENTRIES; i++) 207 if (!(table[i] & _SEGMENT_ENTRY_INVALID)) 208 page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); 209 } else { 210 for (i = 0; i < _CRST_ENTRIES; i++) 211 if (!(table[i] & _REGION_ENTRY_INVALID)) 212 gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); 213 } 214 215 out: 216 free_pages((unsigned long)table, CRST_ALLOC_ORDER); 217 } 218 219 /** 220 * gmap_free - free a guest address space 221 * @gmap: pointer to the guest address space structure 222 * 223 * No locks required. There are no references to this gmap anymore. 224 */ 225 void gmap_free(struct gmap *gmap) 226 { 227 /* Flush tlb of all gmaps (if not already done for shadows) */ 228 if (!(gmap_is_shadow(gmap) && gmap->removed)) 229 gmap_flush_tlb(gmap); 230 /* Free all segment & region tables. */ 231 gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); 232 233 gmap_radix_tree_free(&gmap->guest_to_host); 234 gmap_radix_tree_free(&gmap->host_to_guest); 235 236 /* Free additional data for a shadow gmap */ 237 if (gmap_is_shadow(gmap)) { 238 gmap_rmap_radix_tree_free(&gmap->host_to_rmap); 239 /* Release reference to the parent */ 240 gmap_put(gmap->parent); 241 } 242 243 kfree(gmap); 244 } 245 EXPORT_SYMBOL_GPL(gmap_free); 246 247 /** 248 * gmap_get - increase reference counter for guest address space 249 * @gmap: pointer to the guest address space structure 250 * 251 * Returns the gmap pointer 252 */ 253 struct gmap *gmap_get(struct gmap *gmap) 254 { 255 refcount_inc(&gmap->ref_count); 256 return gmap; 257 } 258 EXPORT_SYMBOL_GPL(gmap_get); 259 260 /** 261 * gmap_put - decrease reference counter for guest address space 262 * @gmap: pointer to the guest address space structure 263 * 264 * If the reference counter reaches zero the guest address space is freed. 265 */ 266 void gmap_put(struct gmap *gmap) 267 { 268 if (refcount_dec_and_test(&gmap->ref_count)) 269 gmap_free(gmap); 270 } 271 EXPORT_SYMBOL_GPL(gmap_put); 272 273 /** 274 * gmap_remove - remove a guest address space but do not free it yet 275 * @gmap: pointer to the guest address space structure 276 */ 277 void gmap_remove(struct gmap *gmap) 278 { 279 struct gmap *sg, *next; 280 unsigned long gmap_asce; 281 282 /* Remove all shadow gmaps linked to this gmap */ 283 if (!list_empty(&gmap->children)) { 284 spin_lock(&gmap->shadow_lock); 285 list_for_each_entry_safe(sg, next, &gmap->children, list) { 286 list_del(&sg->list); 287 gmap_put(sg); 288 } 289 spin_unlock(&gmap->shadow_lock); 290 } 291 /* Remove gmap from the pre-mm list */ 292 spin_lock(&gmap->mm->context.lock); 293 list_del_rcu(&gmap->list); 294 if (list_empty(&gmap->mm->context.gmap_list)) 295 gmap_asce = 0; 296 else if (list_is_singular(&gmap->mm->context.gmap_list)) 297 gmap_asce = list_first_entry(&gmap->mm->context.gmap_list, 298 struct gmap, list)->asce; 299 else 300 gmap_asce = -1UL; 301 WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); 302 spin_unlock(&gmap->mm->context.lock); 303 synchronize_rcu(); 304 /* Put reference */ 305 gmap_put(gmap); 306 } 307 EXPORT_SYMBOL_GPL(gmap_remove); 308 309 /* 310 * gmap_alloc_table is assumed to be called with mmap_lock held 311 */ 312 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 313 unsigned long init, unsigned long gaddr) 314 { 315 struct page *page; 316 unsigned long *new; 317 318 /* since we dont free the gmap table until gmap_free we can unlock */ 319 page = gmap_alloc_crst(); 320 if (!page) 321 return -ENOMEM; 322 new = page_to_virt(page); 323 crst_table_init(new, init); 324 spin_lock(&gmap->guest_table_lock); 325 if (*table & _REGION_ENTRY_INVALID) { 326 *table = __pa(new) | _REGION_ENTRY_LENGTH | 327 (*table & _REGION_ENTRY_TYPE_MASK); 328 page = NULL; 329 } 330 spin_unlock(&gmap->guest_table_lock); 331 if (page) 332 __free_pages(page, CRST_ALLOC_ORDER); 333 return 0; 334 } 335 336 static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) 337 { 338 return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 339 } 340 341 static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) 342 { 343 return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 344 } 345 346 static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, 347 unsigned long *gaddr) 348 { 349 *gaddr = host_to_guest_delete(gmap, vmaddr); 350 if (IS_GADDR_VALID(*gaddr)) 351 return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); 352 return NULL; 353 } 354 355 /** 356 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 357 * @gmap: pointer to the guest address space structure 358 * @vmaddr: address in the host process address space 359 * 360 * Returns 1 if a TLB flush is required 361 */ 362 static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 363 { 364 unsigned long gaddr; 365 int flush = 0; 366 pmd_t *pmdp; 367 368 BUG_ON(gmap_is_shadow(gmap)); 369 spin_lock(&gmap->guest_table_lock); 370 371 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 372 if (pmdp) { 373 flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); 374 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 375 } 376 377 spin_unlock(&gmap->guest_table_lock); 378 return flush; 379 } 380 381 /** 382 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 383 * @gmap: pointer to the guest address space structure 384 * @gaddr: address in the guest address space 385 * 386 * Returns 1 if a TLB flush is required 387 */ 388 static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 389 { 390 unsigned long vmaddr; 391 392 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 393 gaddr >> PMD_SHIFT); 394 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 395 } 396 397 /** 398 * gmap_unmap_segment - unmap segment from the guest address space 399 * @gmap: pointer to the guest address space structure 400 * @to: address in the guest address space 401 * @len: length of the memory area to unmap 402 * 403 * Returns 0 if the unmap succeeded, -EINVAL if not. 404 */ 405 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 406 { 407 unsigned long off; 408 int flush; 409 410 BUG_ON(gmap_is_shadow(gmap)); 411 if ((to | len) & (PMD_SIZE - 1)) 412 return -EINVAL; 413 if (len == 0 || to + len < to) 414 return -EINVAL; 415 416 flush = 0; 417 mmap_write_lock(gmap->mm); 418 for (off = 0; off < len; off += PMD_SIZE) 419 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 420 mmap_write_unlock(gmap->mm); 421 if (flush) 422 gmap_flush_tlb(gmap); 423 return 0; 424 } 425 EXPORT_SYMBOL_GPL(gmap_unmap_segment); 426 427 /** 428 * gmap_map_segment - map a segment to the guest address space 429 * @gmap: pointer to the guest address space structure 430 * @from: source address in the parent address space 431 * @to: target address in the guest address space 432 * @len: length of the memory area to map 433 * 434 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 435 */ 436 int gmap_map_segment(struct gmap *gmap, unsigned long from, 437 unsigned long to, unsigned long len) 438 { 439 unsigned long off; 440 int flush; 441 442 BUG_ON(gmap_is_shadow(gmap)); 443 if ((from | to | len) & (PMD_SIZE - 1)) 444 return -EINVAL; 445 if (len == 0 || from + len < from || to + len < to || 446 from + len - 1 > TASK_SIZE_MAX || to + len - 1 > gmap->asce_end) 447 return -EINVAL; 448 449 flush = 0; 450 mmap_write_lock(gmap->mm); 451 for (off = 0; off < len; off += PMD_SIZE) { 452 /* Remove old translation */ 453 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 454 /* Store new translation */ 455 if (radix_tree_insert(&gmap->guest_to_host, 456 (to + off) >> PMD_SHIFT, 457 (void *) from + off)) 458 break; 459 } 460 mmap_write_unlock(gmap->mm); 461 if (flush) 462 gmap_flush_tlb(gmap); 463 if (off >= len) 464 return 0; 465 gmap_unmap_segment(gmap, to, len); 466 return -ENOMEM; 467 } 468 EXPORT_SYMBOL_GPL(gmap_map_segment); 469 470 /** 471 * __gmap_translate - translate a guest address to a user space address 472 * @gmap: pointer to guest mapping meta data structure 473 * @gaddr: guest address 474 * 475 * Returns user space address which corresponds to the guest address or 476 * -EFAULT if no such mapping exists. 477 * This function does not establish potentially missing page table entries. 478 * The mmap_lock of the mm that belongs to the address space must be held 479 * when this function gets called. 480 * 481 * Note: Can also be called for shadow gmaps. 482 */ 483 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 484 { 485 unsigned long vmaddr; 486 487 vmaddr = (unsigned long) 488 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 489 /* Note: guest_to_host is empty for a shadow gmap */ 490 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 491 } 492 EXPORT_SYMBOL_GPL(__gmap_translate); 493 494 /** 495 * gmap_unlink - disconnect a page table from the gmap shadow tables 496 * @mm: pointer to the parent mm_struct 497 * @table: pointer to the host page table 498 * @vmaddr: vm address associated with the host page table 499 */ 500 void gmap_unlink(struct mm_struct *mm, unsigned long *table, 501 unsigned long vmaddr) 502 { 503 struct gmap *gmap; 504 int flush; 505 506 rcu_read_lock(); 507 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 508 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 509 if (flush) 510 gmap_flush_tlb(gmap); 511 } 512 rcu_read_unlock(); 513 } 514 515 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, 516 unsigned long gaddr); 517 518 /** 519 * __gmap_link - set up shadow page tables to connect a host to a guest address 520 * @gmap: pointer to guest mapping meta data structure 521 * @gaddr: guest address 522 * @vmaddr: vm address 523 * 524 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 525 * if the vm address is already mapped to a different guest segment. 526 * The mmap_lock of the mm that belongs to the address space must be held 527 * when this function gets called. 528 */ 529 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 530 { 531 struct mm_struct *mm; 532 unsigned long *table; 533 spinlock_t *ptl; 534 pgd_t *pgd; 535 p4d_t *p4d; 536 pud_t *pud; 537 pmd_t *pmd; 538 u64 unprot; 539 int rc; 540 541 BUG_ON(gmap_is_shadow(gmap)); 542 /* Create higher level tables in the gmap page table */ 543 table = gmap->table; 544 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 545 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 546 if ((*table & _REGION_ENTRY_INVALID) && 547 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 548 gaddr & _REGION1_MASK)) 549 return -ENOMEM; 550 table = __va(*table & _REGION_ENTRY_ORIGIN); 551 } 552 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 553 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 554 if ((*table & _REGION_ENTRY_INVALID) && 555 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 556 gaddr & _REGION2_MASK)) 557 return -ENOMEM; 558 table = __va(*table & _REGION_ENTRY_ORIGIN); 559 } 560 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 561 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 562 if ((*table & _REGION_ENTRY_INVALID) && 563 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 564 gaddr & _REGION3_MASK)) 565 return -ENOMEM; 566 table = __va(*table & _REGION_ENTRY_ORIGIN); 567 } 568 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 569 /* Walk the parent mm page table */ 570 mm = gmap->mm; 571 pgd = pgd_offset(mm, vmaddr); 572 VM_BUG_ON(pgd_none(*pgd)); 573 p4d = p4d_offset(pgd, vmaddr); 574 VM_BUG_ON(p4d_none(*p4d)); 575 pud = pud_offset(p4d, vmaddr); 576 VM_BUG_ON(pud_none(*pud)); 577 /* large puds cannot yet be handled */ 578 if (pud_leaf(*pud)) 579 return -EFAULT; 580 pmd = pmd_offset(pud, vmaddr); 581 VM_BUG_ON(pmd_none(*pmd)); 582 /* Are we allowed to use huge pages? */ 583 if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) 584 return -EFAULT; 585 /* Link gmap segment table entry location to page table. */ 586 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 587 if (rc) 588 return rc; 589 ptl = pmd_lock(mm, pmd); 590 spin_lock(&gmap->guest_table_lock); 591 if (*table == _SEGMENT_ENTRY_EMPTY) { 592 rc = radix_tree_insert(&gmap->host_to_guest, 593 vmaddr >> PMD_SHIFT, 594 (void *)MAKE_VALID_GADDR(gaddr)); 595 if (!rc) { 596 if (pmd_leaf(*pmd)) { 597 *table = (pmd_val(*pmd) & 598 _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) 599 | _SEGMENT_ENTRY_GMAP_UC 600 | _SEGMENT_ENTRY; 601 } else 602 *table = pmd_val(*pmd) & 603 _SEGMENT_ENTRY_HARDWARE_BITS; 604 } 605 } else if (*table & _SEGMENT_ENTRY_PROTECT && 606 !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { 607 unprot = (u64)*table; 608 unprot &= ~_SEGMENT_ENTRY_PROTECT; 609 unprot |= _SEGMENT_ENTRY_GMAP_UC; 610 gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); 611 } 612 spin_unlock(&gmap->guest_table_lock); 613 spin_unlock(ptl); 614 radix_tree_preload_end(); 615 return rc; 616 } 617 EXPORT_SYMBOL(__gmap_link); 618 619 /* 620 * this function is assumed to be called with mmap_lock held 621 */ 622 void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 623 { 624 unsigned long vmaddr; 625 626 mmap_assert_locked(gmap->mm); 627 628 /* Find the vm address for the guest address */ 629 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 630 gaddr >> PMD_SHIFT); 631 if (vmaddr) { 632 vmaddr |= gaddr & ~PMD_MASK; 633 gmap_helper_zap_one_page(gmap->mm, vmaddr); 634 } 635 } 636 EXPORT_SYMBOL_GPL(__gmap_zap); 637 638 static LIST_HEAD(gmap_notifier_list); 639 static DEFINE_SPINLOCK(gmap_notifier_lock); 640 641 /** 642 * gmap_register_pte_notifier - register a pte invalidation callback 643 * @nb: pointer to the gmap notifier block 644 */ 645 void gmap_register_pte_notifier(struct gmap_notifier *nb) 646 { 647 spin_lock(&gmap_notifier_lock); 648 list_add_rcu(&nb->list, &gmap_notifier_list); 649 spin_unlock(&gmap_notifier_lock); 650 } 651 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier); 652 653 /** 654 * gmap_unregister_pte_notifier - remove a pte invalidation callback 655 * @nb: pointer to the gmap notifier block 656 */ 657 void gmap_unregister_pte_notifier(struct gmap_notifier *nb) 658 { 659 spin_lock(&gmap_notifier_lock); 660 list_del_rcu(&nb->list); 661 spin_unlock(&gmap_notifier_lock); 662 synchronize_rcu(); 663 } 664 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier); 665 666 /** 667 * gmap_call_notifier - call all registered invalidation callbacks 668 * @gmap: pointer to guest mapping meta data structure 669 * @start: start virtual address in the guest address space 670 * @end: end virtual address in the guest address space 671 */ 672 static void gmap_call_notifier(struct gmap *gmap, unsigned long start, 673 unsigned long end) 674 { 675 struct gmap_notifier *nb; 676 677 list_for_each_entry(nb, &gmap_notifier_list, list) 678 nb->notifier_call(gmap, start, end); 679 } 680 681 /** 682 * gmap_table_walk - walk the gmap page tables 683 * @gmap: pointer to guest mapping meta data structure 684 * @gaddr: virtual address in the guest address space 685 * @level: page table level to stop at 686 * 687 * Returns a table entry pointer for the given guest address and @level 688 * @level=0 : returns a pointer to a page table table entry (or NULL) 689 * @level=1 : returns a pointer to a segment table entry (or NULL) 690 * @level=2 : returns a pointer to a region-3 table entry (or NULL) 691 * @level=3 : returns a pointer to a region-2 table entry (or NULL) 692 * @level=4 : returns a pointer to a region-1 table entry (or NULL) 693 * 694 * Returns NULL if the gmap page tables could not be walked to the 695 * requested level. 696 * 697 * Note: Can also be called for shadow gmaps. 698 */ 699 unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) 700 { 701 const int asce_type = gmap->asce & _ASCE_TYPE_MASK; 702 unsigned long *table = gmap->table; 703 704 if (gmap_is_shadow(gmap) && gmap->removed) 705 return NULL; 706 707 if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) 708 return NULL; 709 710 if (asce_type != _ASCE_TYPE_REGION1 && 711 gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) 712 return NULL; 713 714 switch (asce_type) { 715 case _ASCE_TYPE_REGION1: 716 table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; 717 if (level == 4) 718 break; 719 if (*table & _REGION_ENTRY_INVALID) 720 return NULL; 721 table = __va(*table & _REGION_ENTRY_ORIGIN); 722 fallthrough; 723 case _ASCE_TYPE_REGION2: 724 table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; 725 if (level == 3) 726 break; 727 if (*table & _REGION_ENTRY_INVALID) 728 return NULL; 729 table = __va(*table & _REGION_ENTRY_ORIGIN); 730 fallthrough; 731 case _ASCE_TYPE_REGION3: 732 table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; 733 if (level == 2) 734 break; 735 if (*table & _REGION_ENTRY_INVALID) 736 return NULL; 737 table = __va(*table & _REGION_ENTRY_ORIGIN); 738 fallthrough; 739 case _ASCE_TYPE_SEGMENT: 740 table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 741 if (level == 1) 742 break; 743 if (*table & _REGION_ENTRY_INVALID) 744 return NULL; 745 table = __va(*table & _SEGMENT_ENTRY_ORIGIN); 746 table += (gaddr & _PAGE_INDEX) >> PAGE_SHIFT; 747 } 748 return table; 749 } 750 EXPORT_SYMBOL(gmap_table_walk); 751 752 /** 753 * gmap_pte_op_walk - walk the gmap page table, get the page table lock 754 * and return the pte pointer 755 * @gmap: pointer to guest mapping meta data structure 756 * @gaddr: virtual address in the guest address space 757 * @ptl: pointer to the spinlock pointer 758 * 759 * Returns a pointer to the locked pte for a guest address, or NULL 760 */ 761 static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr, 762 spinlock_t **ptl) 763 { 764 unsigned long *table; 765 766 BUG_ON(gmap_is_shadow(gmap)); 767 /* Walk the gmap page table, lock and get pte pointer */ 768 table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */ 769 if (!table || *table & _SEGMENT_ENTRY_INVALID) 770 return NULL; 771 return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl); 772 } 773 774 /** 775 * gmap_pte_op_fixup - force a page in and connect the gmap page table 776 * @gmap: pointer to guest mapping meta data structure 777 * @gaddr: virtual address in the guest address space 778 * @vmaddr: address in the host process address space 779 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 780 * 781 * Returns 0 if the caller can retry __gmap_translate (might fail again), 782 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing 783 * up or connecting the gmap page table. 784 */ 785 static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, 786 unsigned long vmaddr, int prot) 787 { 788 struct mm_struct *mm = gmap->mm; 789 unsigned int fault_flags; 790 bool unlocked = false; 791 792 BUG_ON(gmap_is_shadow(gmap)); 793 fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0; 794 if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked)) 795 return -EFAULT; 796 if (unlocked) 797 /* lost mmap_lock, caller has to retry __gmap_translate */ 798 return 0; 799 /* Connect the page tables */ 800 return __gmap_link(gmap, gaddr, vmaddr); 801 } 802 803 /** 804 * gmap_pte_op_end - release the page table lock 805 * @ptep: pointer to the locked pte 806 * @ptl: pointer to the page table spinlock 807 */ 808 static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl) 809 { 810 pte_unmap_unlock(ptep, ptl); 811 } 812 813 /** 814 * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock 815 * and return the pmd pointer 816 * @gmap: pointer to guest mapping meta data structure 817 * @gaddr: virtual address in the guest address space 818 * 819 * Returns a pointer to the pmd for a guest address, or NULL 820 */ 821 static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) 822 { 823 pmd_t *pmdp; 824 825 BUG_ON(gmap_is_shadow(gmap)); 826 pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); 827 if (!pmdp) 828 return NULL; 829 830 /* without huge pages, there is no need to take the table lock */ 831 if (!gmap->mm->context.allow_gmap_hpage_1m) 832 return pmd_none(*pmdp) ? NULL : pmdp; 833 834 spin_lock(&gmap->guest_table_lock); 835 if (pmd_none(*pmdp)) { 836 spin_unlock(&gmap->guest_table_lock); 837 return NULL; 838 } 839 840 /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ 841 if (!pmd_leaf(*pmdp)) 842 spin_unlock(&gmap->guest_table_lock); 843 return pmdp; 844 } 845 846 /** 847 * gmap_pmd_op_end - release the guest_table_lock if needed 848 * @gmap: pointer to the guest mapping meta data structure 849 * @pmdp: pointer to the pmd 850 */ 851 static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) 852 { 853 if (pmd_leaf(*pmdp)) 854 spin_unlock(&gmap->guest_table_lock); 855 } 856 857 /* 858 * gmap_protect_pmd - remove access rights to memory and set pmd notification bits 859 * @pmdp: pointer to the pmd to be protected 860 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 861 * @bits: notification bits to set 862 * 863 * Returns: 864 * 0 if successfully protected 865 * -EAGAIN if a fixup is needed 866 * -EINVAL if unsupported notifier bits have been specified 867 * 868 * Expected to be called with sg->mm->mmap_lock in read and 869 * guest_table_lock held. 870 */ 871 static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, 872 pmd_t *pmdp, int prot, unsigned long bits) 873 { 874 int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; 875 int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; 876 pmd_t new = *pmdp; 877 878 /* Fixup needed */ 879 if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) 880 return -EAGAIN; 881 882 if (prot == PROT_NONE && !pmd_i) { 883 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 884 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 885 } 886 887 if (prot == PROT_READ && !pmd_p) { 888 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID)); 889 new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_PROTECT)); 890 gmap_pmdp_xchg(gmap, pmdp, new, gaddr); 891 } 892 893 if (bits & GMAP_NOTIFY_MPROT) 894 set_pmd(pmdp, set_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 895 896 /* Shadow GMAP protection needs split PMDs */ 897 if (bits & GMAP_NOTIFY_SHADOW) 898 return -EINVAL; 899 900 return 0; 901 } 902 903 /* 904 * gmap_protect_pte - remove access rights to memory and set pgste bits 905 * @gmap: pointer to guest mapping meta data structure 906 * @gaddr: virtual address in the guest address space 907 * @pmdp: pointer to the pmd associated with the pte 908 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 909 * @bits: notification bits to set 910 * 911 * Returns 0 if successfully protected, -ENOMEM if out of memory and 912 * -EAGAIN if a fixup is needed. 913 * 914 * Expected to be called with sg->mm->mmap_lock in read 915 */ 916 static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, 917 pmd_t *pmdp, int prot, unsigned long bits) 918 { 919 int rc; 920 pte_t *ptep; 921 spinlock_t *ptl; 922 unsigned long pbits = 0; 923 924 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 925 return -EAGAIN; 926 927 ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); 928 if (!ptep) 929 return -ENOMEM; 930 931 pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; 932 pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; 933 /* Protect and unlock. */ 934 rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); 935 gmap_pte_op_end(ptep, ptl); 936 return rc; 937 } 938 939 /* 940 * gmap_protect_range - remove access rights to memory and set pgste bits 941 * @gmap: pointer to guest mapping meta data structure 942 * @gaddr: virtual address in the guest address space 943 * @len: size of area 944 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE 945 * @bits: pgste notification bits to set 946 * 947 * Returns: 948 * PAGE_SIZE if a small page was successfully protected; 949 * HPAGE_SIZE if a large page was successfully protected; 950 * -ENOMEM if out of memory; 951 * -EFAULT if gaddr is invalid (or mapping for shadows is missing); 952 * -EAGAIN if the guest mapping is missing and should be fixed by the caller. 953 * 954 * Context: Called with sg->mm->mmap_lock in read. 955 */ 956 int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) 957 { 958 pmd_t *pmdp; 959 int rc = 0; 960 961 BUG_ON(gmap_is_shadow(gmap)); 962 963 pmdp = gmap_pmd_op_walk(gmap, gaddr); 964 if (!pmdp) 965 return -EAGAIN; 966 967 if (!pmd_leaf(*pmdp)) { 968 rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); 969 if (!rc) 970 rc = PAGE_SIZE; 971 } else { 972 rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); 973 if (!rc) 974 rc = HPAGE_SIZE; 975 } 976 gmap_pmd_op_end(gmap, pmdp); 977 978 return rc; 979 } 980 EXPORT_SYMBOL_GPL(gmap_protect_one); 981 982 /** 983 * gmap_read_table - get an unsigned long value from a guest page table using 984 * absolute addressing, without marking the page referenced. 985 * @gmap: pointer to guest mapping meta data structure 986 * @gaddr: virtual address in the guest address space 987 * @val: pointer to the unsigned long value to return 988 * 989 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT 990 * if reading using the virtual address failed. -EINVAL if called on a gmap 991 * shadow. 992 * 993 * Called with gmap->mm->mmap_lock in read. 994 */ 995 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) 996 { 997 unsigned long address, vmaddr; 998 spinlock_t *ptl; 999 pte_t *ptep, pte; 1000 int rc; 1001 1002 if (gmap_is_shadow(gmap)) 1003 return -EINVAL; 1004 1005 while (1) { 1006 rc = -EAGAIN; 1007 ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); 1008 if (ptep) { 1009 pte = *ptep; 1010 if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { 1011 address = pte_val(pte) & PAGE_MASK; 1012 address += gaddr & ~PAGE_MASK; 1013 *val = *(unsigned long *)__va(address); 1014 set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); 1015 /* Do *NOT* clear the _PAGE_INVALID bit! */ 1016 rc = 0; 1017 } 1018 gmap_pte_op_end(ptep, ptl); 1019 } 1020 if (!rc) 1021 break; 1022 vmaddr = __gmap_translate(gmap, gaddr); 1023 if (IS_ERR_VALUE(vmaddr)) { 1024 rc = vmaddr; 1025 break; 1026 } 1027 rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, PROT_READ); 1028 if (rc) 1029 break; 1030 } 1031 return rc; 1032 } 1033 EXPORT_SYMBOL_GPL(gmap_read_table); 1034 1035 /** 1036 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree 1037 * @sg: pointer to the shadow guest address space structure 1038 * @vmaddr: vm address associated with the rmap 1039 * @rmap: pointer to the rmap structure 1040 * 1041 * Called with the sg->guest_table_lock 1042 */ 1043 static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, 1044 struct gmap_rmap *rmap) 1045 { 1046 struct gmap_rmap *temp; 1047 void __rcu **slot; 1048 1049 BUG_ON(!gmap_is_shadow(sg)); 1050 slot = radix_tree_lookup_slot(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1051 if (slot) { 1052 rmap->next = radix_tree_deref_slot_protected(slot, 1053 &sg->guest_table_lock); 1054 for (temp = rmap->next; temp; temp = temp->next) { 1055 if (temp->raddr == rmap->raddr) { 1056 kfree(rmap); 1057 return; 1058 } 1059 } 1060 radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); 1061 } else { 1062 rmap->next = NULL; 1063 radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, 1064 rmap); 1065 } 1066 } 1067 1068 /** 1069 * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap 1070 * @sg: pointer to the shadow guest address space structure 1071 * @raddr: rmap address in the shadow gmap 1072 * @paddr: address in the parent guest address space 1073 * @len: length of the memory area to protect 1074 * 1075 * Returns 0 if successfully protected and the rmap was created, -ENOMEM 1076 * if out of memory and -EFAULT if paddr is invalid. 1077 */ 1078 static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, 1079 unsigned long paddr, unsigned long len) 1080 { 1081 struct gmap *parent; 1082 struct gmap_rmap *rmap; 1083 unsigned long vmaddr; 1084 spinlock_t *ptl; 1085 pte_t *ptep; 1086 int rc; 1087 1088 BUG_ON(!gmap_is_shadow(sg)); 1089 parent = sg->parent; 1090 while (len) { 1091 vmaddr = __gmap_translate(parent, paddr); 1092 if (IS_ERR_VALUE(vmaddr)) 1093 return vmaddr; 1094 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1095 if (!rmap) 1096 return -ENOMEM; 1097 rmap->raddr = raddr; 1098 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1099 if (rc) { 1100 kfree(rmap); 1101 return rc; 1102 } 1103 rc = -EAGAIN; 1104 ptep = gmap_pte_op_walk(parent, paddr, &ptl); 1105 if (ptep) { 1106 spin_lock(&sg->guest_table_lock); 1107 rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ, 1108 PGSTE_VSIE_BIT); 1109 if (!rc) 1110 gmap_insert_rmap(sg, vmaddr, rmap); 1111 spin_unlock(&sg->guest_table_lock); 1112 gmap_pte_op_end(ptep, ptl); 1113 } 1114 radix_tree_preload_end(); 1115 if (rc) { 1116 kfree(rmap); 1117 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ); 1118 if (rc) 1119 return rc; 1120 continue; 1121 } 1122 paddr += PAGE_SIZE; 1123 len -= PAGE_SIZE; 1124 } 1125 return 0; 1126 } 1127 1128 #define _SHADOW_RMAP_MASK 0x7 1129 #define _SHADOW_RMAP_REGION1 0x5 1130 #define _SHADOW_RMAP_REGION2 0x4 1131 #define _SHADOW_RMAP_REGION3 0x3 1132 #define _SHADOW_RMAP_SEGMENT 0x2 1133 #define _SHADOW_RMAP_PGTABLE 0x1 1134 1135 /** 1136 * gmap_idte_one - invalidate a single region or segment table entry 1137 * @asce: region or segment table *origin* + table-type bits 1138 * @vaddr: virtual address to identify the table entry to flush 1139 * 1140 * The invalid bit of a single region or segment table entry is set 1141 * and the associated TLB entries depending on the entry are flushed. 1142 * The table-type of the @asce identifies the portion of the @vaddr 1143 * that is used as the invalidation index. 1144 */ 1145 static inline void gmap_idte_one(unsigned long asce, unsigned long vaddr) 1146 { 1147 asm volatile( 1148 " idte %0,0,%1" 1149 : : "a" (asce), "a" (vaddr) : "cc", "memory"); 1150 } 1151 1152 /** 1153 * gmap_unshadow_page - remove a page from a shadow page table 1154 * @sg: pointer to the shadow guest address space structure 1155 * @raddr: rmap address in the shadow guest address space 1156 * 1157 * Called with the sg->guest_table_lock 1158 */ 1159 static void gmap_unshadow_page(struct gmap *sg, unsigned long raddr) 1160 { 1161 unsigned long *table; 1162 1163 BUG_ON(!gmap_is_shadow(sg)); 1164 table = gmap_table_walk(sg, raddr, 0); /* get page table pointer */ 1165 if (!table || *table & _PAGE_INVALID) 1166 return; 1167 gmap_call_notifier(sg, raddr, raddr + PAGE_SIZE - 1); 1168 ptep_unshadow_pte(sg->mm, raddr, (pte_t *) table); 1169 } 1170 1171 /** 1172 * __gmap_unshadow_pgt - remove all entries from a shadow page table 1173 * @sg: pointer to the shadow guest address space structure 1174 * @raddr: rmap address in the shadow guest address space 1175 * @pgt: pointer to the start of a shadow page table 1176 * 1177 * Called with the sg->guest_table_lock 1178 */ 1179 static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, 1180 unsigned long *pgt) 1181 { 1182 int i; 1183 1184 BUG_ON(!gmap_is_shadow(sg)); 1185 for (i = 0; i < _PAGE_ENTRIES; i++, raddr += PAGE_SIZE) 1186 pgt[i] = _PAGE_INVALID; 1187 } 1188 1189 /** 1190 * gmap_unshadow_pgt - remove a shadow page table from a segment entry 1191 * @sg: pointer to the shadow guest address space structure 1192 * @raddr: address in the shadow guest address space 1193 * 1194 * Called with the sg->guest_table_lock 1195 */ 1196 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) 1197 { 1198 unsigned long *ste; 1199 phys_addr_t sto, pgt; 1200 struct ptdesc *ptdesc; 1201 1202 BUG_ON(!gmap_is_shadow(sg)); 1203 ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */ 1204 if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) 1205 return; 1206 gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); 1207 sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); 1208 gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); 1209 pgt = *ste & _SEGMENT_ENTRY_ORIGIN; 1210 *ste = _SEGMENT_ENTRY_EMPTY; 1211 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1212 /* Free page table */ 1213 ptdesc = page_ptdesc(phys_to_page(pgt)); 1214 page_table_free_pgste(ptdesc); 1215 } 1216 1217 /** 1218 * __gmap_unshadow_sgt - remove all entries from a shadow segment table 1219 * @sg: pointer to the shadow guest address space structure 1220 * @raddr: rmap address in the shadow guest address space 1221 * @sgt: pointer to the start of a shadow segment table 1222 * 1223 * Called with the sg->guest_table_lock 1224 */ 1225 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, 1226 unsigned long *sgt) 1227 { 1228 struct ptdesc *ptdesc; 1229 phys_addr_t pgt; 1230 int i; 1231 1232 BUG_ON(!gmap_is_shadow(sg)); 1233 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { 1234 if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) 1235 continue; 1236 pgt = sgt[i] & _REGION_ENTRY_ORIGIN; 1237 sgt[i] = _SEGMENT_ENTRY_EMPTY; 1238 __gmap_unshadow_pgt(sg, raddr, __va(pgt)); 1239 /* Free page table */ 1240 ptdesc = page_ptdesc(phys_to_page(pgt)); 1241 page_table_free_pgste(ptdesc); 1242 } 1243 } 1244 1245 /** 1246 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry 1247 * @sg: pointer to the shadow guest address space structure 1248 * @raddr: rmap address in the shadow guest address space 1249 * 1250 * Called with the shadow->guest_table_lock 1251 */ 1252 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) 1253 { 1254 unsigned long r3o, *r3e; 1255 phys_addr_t sgt; 1256 struct page *page; 1257 1258 BUG_ON(!gmap_is_shadow(sg)); 1259 r3e = gmap_table_walk(sg, raddr, 2); /* get region-3 pointer */ 1260 if (!r3e || !(*r3e & _REGION_ENTRY_ORIGIN)) 1261 return; 1262 gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); 1263 r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); 1264 gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); 1265 sgt = *r3e & _REGION_ENTRY_ORIGIN; 1266 *r3e = _REGION3_ENTRY_EMPTY; 1267 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1268 /* Free segment table */ 1269 page = phys_to_page(sgt); 1270 __free_pages(page, CRST_ALLOC_ORDER); 1271 } 1272 1273 /** 1274 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table 1275 * @sg: pointer to the shadow guest address space structure 1276 * @raddr: address in the shadow guest address space 1277 * @r3t: pointer to the start of a shadow region-3 table 1278 * 1279 * Called with the sg->guest_table_lock 1280 */ 1281 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, 1282 unsigned long *r3t) 1283 { 1284 struct page *page; 1285 phys_addr_t sgt; 1286 int i; 1287 1288 BUG_ON(!gmap_is_shadow(sg)); 1289 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { 1290 if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) 1291 continue; 1292 sgt = r3t[i] & _REGION_ENTRY_ORIGIN; 1293 r3t[i] = _REGION3_ENTRY_EMPTY; 1294 __gmap_unshadow_sgt(sg, raddr, __va(sgt)); 1295 /* Free segment table */ 1296 page = phys_to_page(sgt); 1297 __free_pages(page, CRST_ALLOC_ORDER); 1298 } 1299 } 1300 1301 /** 1302 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry 1303 * @sg: pointer to the shadow guest address space structure 1304 * @raddr: rmap address in the shadow guest address space 1305 * 1306 * Called with the sg->guest_table_lock 1307 */ 1308 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) 1309 { 1310 unsigned long r2o, *r2e; 1311 phys_addr_t r3t; 1312 struct page *page; 1313 1314 BUG_ON(!gmap_is_shadow(sg)); 1315 r2e = gmap_table_walk(sg, raddr, 3); /* get region-2 pointer */ 1316 if (!r2e || !(*r2e & _REGION_ENTRY_ORIGIN)) 1317 return; 1318 gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); 1319 r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); 1320 gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); 1321 r3t = *r2e & _REGION_ENTRY_ORIGIN; 1322 *r2e = _REGION2_ENTRY_EMPTY; 1323 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1324 /* Free region 3 table */ 1325 page = phys_to_page(r3t); 1326 __free_pages(page, CRST_ALLOC_ORDER); 1327 } 1328 1329 /** 1330 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table 1331 * @sg: pointer to the shadow guest address space structure 1332 * @raddr: rmap address in the shadow guest address space 1333 * @r2t: pointer to the start of a shadow region-2 table 1334 * 1335 * Called with the sg->guest_table_lock 1336 */ 1337 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, 1338 unsigned long *r2t) 1339 { 1340 phys_addr_t r3t; 1341 struct page *page; 1342 int i; 1343 1344 BUG_ON(!gmap_is_shadow(sg)); 1345 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { 1346 if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) 1347 continue; 1348 r3t = r2t[i] & _REGION_ENTRY_ORIGIN; 1349 r2t[i] = _REGION2_ENTRY_EMPTY; 1350 __gmap_unshadow_r3t(sg, raddr, __va(r3t)); 1351 /* Free region 3 table */ 1352 page = phys_to_page(r3t); 1353 __free_pages(page, CRST_ALLOC_ORDER); 1354 } 1355 } 1356 1357 /** 1358 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry 1359 * @sg: pointer to the shadow guest address space structure 1360 * @raddr: rmap address in the shadow guest address space 1361 * 1362 * Called with the sg->guest_table_lock 1363 */ 1364 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) 1365 { 1366 unsigned long r1o, *r1e; 1367 struct page *page; 1368 phys_addr_t r2t; 1369 1370 BUG_ON(!gmap_is_shadow(sg)); 1371 r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ 1372 if (!r1e || !(*r1e & _REGION_ENTRY_ORIGIN)) 1373 return; 1374 gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); 1375 r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); 1376 gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); 1377 r2t = *r1e & _REGION_ENTRY_ORIGIN; 1378 *r1e = _REGION1_ENTRY_EMPTY; 1379 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1380 /* Free region 2 table */ 1381 page = phys_to_page(r2t); 1382 __free_pages(page, CRST_ALLOC_ORDER); 1383 } 1384 1385 /** 1386 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table 1387 * @sg: pointer to the shadow guest address space structure 1388 * @raddr: rmap address in the shadow guest address space 1389 * @r1t: pointer to the start of a shadow region-1 table 1390 * 1391 * Called with the shadow->guest_table_lock 1392 */ 1393 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, 1394 unsigned long *r1t) 1395 { 1396 unsigned long asce; 1397 struct page *page; 1398 phys_addr_t r2t; 1399 int i; 1400 1401 BUG_ON(!gmap_is_shadow(sg)); 1402 asce = __pa(r1t) | _ASCE_TYPE_REGION1; 1403 for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { 1404 if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) 1405 continue; 1406 r2t = r1t[i] & _REGION_ENTRY_ORIGIN; 1407 __gmap_unshadow_r2t(sg, raddr, __va(r2t)); 1408 /* Clear entry and flush translation r1t -> r2t */ 1409 gmap_idte_one(asce, raddr); 1410 r1t[i] = _REGION1_ENTRY_EMPTY; 1411 /* Free region 2 table */ 1412 page = phys_to_page(r2t); 1413 __free_pages(page, CRST_ALLOC_ORDER); 1414 } 1415 } 1416 1417 /** 1418 * gmap_unshadow - remove a shadow page table completely 1419 * @sg: pointer to the shadow guest address space structure 1420 * 1421 * Called with sg->guest_table_lock 1422 */ 1423 void gmap_unshadow(struct gmap *sg) 1424 { 1425 unsigned long *table; 1426 1427 BUG_ON(!gmap_is_shadow(sg)); 1428 if (sg->removed) 1429 return; 1430 sg->removed = 1; 1431 gmap_call_notifier(sg, 0, -1UL); 1432 gmap_flush_tlb(sg); 1433 table = __va(sg->asce & _ASCE_ORIGIN); 1434 switch (sg->asce & _ASCE_TYPE_MASK) { 1435 case _ASCE_TYPE_REGION1: 1436 __gmap_unshadow_r1t(sg, 0, table); 1437 break; 1438 case _ASCE_TYPE_REGION2: 1439 __gmap_unshadow_r2t(sg, 0, table); 1440 break; 1441 case _ASCE_TYPE_REGION3: 1442 __gmap_unshadow_r3t(sg, 0, table); 1443 break; 1444 case _ASCE_TYPE_SEGMENT: 1445 __gmap_unshadow_sgt(sg, 0, table); 1446 break; 1447 } 1448 } 1449 EXPORT_SYMBOL(gmap_unshadow); 1450 1451 /** 1452 * gmap_shadow_r2t - create an empty shadow region 2 table 1453 * @sg: pointer to the shadow guest address space structure 1454 * @saddr: faulting address in the shadow gmap 1455 * @r2t: parent gmap address of the region 2 table to get shadowed 1456 * @fake: r2t references contiguous guest memory block, not a r2t 1457 * 1458 * The r2t parameter specifies the address of the source table. The 1459 * four pages of the source table are made read-only in the parent gmap 1460 * address space. A write to the source table area @r2t will automatically 1461 * remove the shadow r2 table and all of its descendants. 1462 * 1463 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1464 * shadow table structure is incomplete, -ENOMEM if out of memory and 1465 * -EFAULT if an address in the parent gmap could not be resolved. 1466 * 1467 * Called with sg->mm->mmap_lock in read. 1468 */ 1469 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, 1470 int fake) 1471 { 1472 unsigned long raddr, origin, offset, len; 1473 unsigned long *table; 1474 phys_addr_t s_r2t; 1475 struct page *page; 1476 int rc; 1477 1478 BUG_ON(!gmap_is_shadow(sg)); 1479 /* Allocate a shadow region second table */ 1480 page = gmap_alloc_crst(); 1481 if (!page) 1482 return -ENOMEM; 1483 s_r2t = page_to_phys(page); 1484 /* Install shadow region second table */ 1485 spin_lock(&sg->guest_table_lock); 1486 table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ 1487 if (!table) { 1488 rc = -EAGAIN; /* Race with unshadow */ 1489 goto out_free; 1490 } 1491 if (!(*table & _REGION_ENTRY_INVALID)) { 1492 rc = 0; /* Already established */ 1493 goto out_free; 1494 } else if (*table & _REGION_ENTRY_ORIGIN) { 1495 rc = -EAGAIN; /* Race with shadow */ 1496 goto out_free; 1497 } 1498 crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); 1499 /* mark as invalid as long as the parent table is not protected */ 1500 *table = s_r2t | _REGION_ENTRY_LENGTH | 1501 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; 1502 if (sg->edat_level >= 1) 1503 *table |= (r2t & _REGION_ENTRY_PROTECT); 1504 if (fake) { 1505 /* nothing to protect for fake tables */ 1506 *table &= ~_REGION_ENTRY_INVALID; 1507 spin_unlock(&sg->guest_table_lock); 1508 return 0; 1509 } 1510 spin_unlock(&sg->guest_table_lock); 1511 /* Make r2t read-only in parent gmap page table */ 1512 raddr = (saddr & _REGION1_MASK) | _SHADOW_RMAP_REGION1; 1513 origin = r2t & _REGION_ENTRY_ORIGIN; 1514 offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1515 len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1516 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1517 spin_lock(&sg->guest_table_lock); 1518 if (!rc) { 1519 table = gmap_table_walk(sg, saddr, 4); 1520 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) 1521 rc = -EAGAIN; /* Race with unshadow */ 1522 else 1523 *table &= ~_REGION_ENTRY_INVALID; 1524 } else { 1525 gmap_unshadow_r2t(sg, raddr); 1526 } 1527 spin_unlock(&sg->guest_table_lock); 1528 return rc; 1529 out_free: 1530 spin_unlock(&sg->guest_table_lock); 1531 __free_pages(page, CRST_ALLOC_ORDER); 1532 return rc; 1533 } 1534 EXPORT_SYMBOL_GPL(gmap_shadow_r2t); 1535 1536 /** 1537 * gmap_shadow_r3t - create a shadow region 3 table 1538 * @sg: pointer to the shadow guest address space structure 1539 * @saddr: faulting address in the shadow gmap 1540 * @r3t: parent gmap address of the region 3 table to get shadowed 1541 * @fake: r3t references contiguous guest memory block, not a r3t 1542 * 1543 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1544 * shadow table structure is incomplete, -ENOMEM if out of memory and 1545 * -EFAULT if an address in the parent gmap could not be resolved. 1546 * 1547 * Called with sg->mm->mmap_lock in read. 1548 */ 1549 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, 1550 int fake) 1551 { 1552 unsigned long raddr, origin, offset, len; 1553 unsigned long *table; 1554 phys_addr_t s_r3t; 1555 struct page *page; 1556 int rc; 1557 1558 BUG_ON(!gmap_is_shadow(sg)); 1559 /* Allocate a shadow region second table */ 1560 page = gmap_alloc_crst(); 1561 if (!page) 1562 return -ENOMEM; 1563 s_r3t = page_to_phys(page); 1564 /* Install shadow region second table */ 1565 spin_lock(&sg->guest_table_lock); 1566 table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ 1567 if (!table) { 1568 rc = -EAGAIN; /* Race with unshadow */ 1569 goto out_free; 1570 } 1571 if (!(*table & _REGION_ENTRY_INVALID)) { 1572 rc = 0; /* Already established */ 1573 goto out_free; 1574 } else if (*table & _REGION_ENTRY_ORIGIN) { 1575 rc = -EAGAIN; /* Race with shadow */ 1576 goto out_free; 1577 } 1578 crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); 1579 /* mark as invalid as long as the parent table is not protected */ 1580 *table = s_r3t | _REGION_ENTRY_LENGTH | 1581 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; 1582 if (sg->edat_level >= 1) 1583 *table |= (r3t & _REGION_ENTRY_PROTECT); 1584 if (fake) { 1585 /* nothing to protect for fake tables */ 1586 *table &= ~_REGION_ENTRY_INVALID; 1587 spin_unlock(&sg->guest_table_lock); 1588 return 0; 1589 } 1590 spin_unlock(&sg->guest_table_lock); 1591 /* Make r3t read-only in parent gmap page table */ 1592 raddr = (saddr & _REGION2_MASK) | _SHADOW_RMAP_REGION2; 1593 origin = r3t & _REGION_ENTRY_ORIGIN; 1594 offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1595 len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1596 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1597 spin_lock(&sg->guest_table_lock); 1598 if (!rc) { 1599 table = gmap_table_walk(sg, saddr, 3); 1600 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) 1601 rc = -EAGAIN; /* Race with unshadow */ 1602 else 1603 *table &= ~_REGION_ENTRY_INVALID; 1604 } else { 1605 gmap_unshadow_r3t(sg, raddr); 1606 } 1607 spin_unlock(&sg->guest_table_lock); 1608 return rc; 1609 out_free: 1610 spin_unlock(&sg->guest_table_lock); 1611 __free_pages(page, CRST_ALLOC_ORDER); 1612 return rc; 1613 } 1614 EXPORT_SYMBOL_GPL(gmap_shadow_r3t); 1615 1616 /** 1617 * gmap_shadow_sgt - create a shadow segment table 1618 * @sg: pointer to the shadow guest address space structure 1619 * @saddr: faulting address in the shadow gmap 1620 * @sgt: parent gmap address of the segment table to get shadowed 1621 * @fake: sgt references contiguous guest memory block, not a sgt 1622 * 1623 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the 1624 * shadow table structure is incomplete, -ENOMEM if out of memory and 1625 * -EFAULT if an address in the parent gmap could not be resolved. 1626 * 1627 * Called with sg->mm->mmap_lock in read. 1628 */ 1629 int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, 1630 int fake) 1631 { 1632 unsigned long raddr, origin, offset, len; 1633 unsigned long *table; 1634 phys_addr_t s_sgt; 1635 struct page *page; 1636 int rc; 1637 1638 BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE)); 1639 /* Allocate a shadow segment table */ 1640 page = gmap_alloc_crst(); 1641 if (!page) 1642 return -ENOMEM; 1643 s_sgt = page_to_phys(page); 1644 /* Install shadow region second table */ 1645 spin_lock(&sg->guest_table_lock); 1646 table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ 1647 if (!table) { 1648 rc = -EAGAIN; /* Race with unshadow */ 1649 goto out_free; 1650 } 1651 if (!(*table & _REGION_ENTRY_INVALID)) { 1652 rc = 0; /* Already established */ 1653 goto out_free; 1654 } else if (*table & _REGION_ENTRY_ORIGIN) { 1655 rc = -EAGAIN; /* Race with shadow */ 1656 goto out_free; 1657 } 1658 crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); 1659 /* mark as invalid as long as the parent table is not protected */ 1660 *table = s_sgt | _REGION_ENTRY_LENGTH | 1661 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; 1662 if (sg->edat_level >= 1) 1663 *table |= sgt & _REGION_ENTRY_PROTECT; 1664 if (fake) { 1665 /* nothing to protect for fake tables */ 1666 *table &= ~_REGION_ENTRY_INVALID; 1667 spin_unlock(&sg->guest_table_lock); 1668 return 0; 1669 } 1670 spin_unlock(&sg->guest_table_lock); 1671 /* Make sgt read-only in parent gmap page table */ 1672 raddr = (saddr & _REGION3_MASK) | _SHADOW_RMAP_REGION3; 1673 origin = sgt & _REGION_ENTRY_ORIGIN; 1674 offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE; 1675 len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset; 1676 rc = gmap_protect_rmap(sg, raddr, origin + offset, len); 1677 spin_lock(&sg->guest_table_lock); 1678 if (!rc) { 1679 table = gmap_table_walk(sg, saddr, 2); 1680 if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) 1681 rc = -EAGAIN; /* Race with unshadow */ 1682 else 1683 *table &= ~_REGION_ENTRY_INVALID; 1684 } else { 1685 gmap_unshadow_sgt(sg, raddr); 1686 } 1687 spin_unlock(&sg->guest_table_lock); 1688 return rc; 1689 out_free: 1690 spin_unlock(&sg->guest_table_lock); 1691 __free_pages(page, CRST_ALLOC_ORDER); 1692 return rc; 1693 } 1694 EXPORT_SYMBOL_GPL(gmap_shadow_sgt); 1695 1696 static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) 1697 { 1698 unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); 1699 1700 pgstes += _PAGE_ENTRIES; 1701 1702 pgstes[0] &= ~PGSTE_ST2_MASK; 1703 pgstes[1] &= ~PGSTE_ST2_MASK; 1704 pgstes[2] &= ~PGSTE_ST2_MASK; 1705 pgstes[3] &= ~PGSTE_ST2_MASK; 1706 1707 pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; 1708 pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; 1709 pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; 1710 pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; 1711 } 1712 1713 /** 1714 * gmap_shadow_pgt - instantiate a shadow page table 1715 * @sg: pointer to the shadow guest address space structure 1716 * @saddr: faulting address in the shadow gmap 1717 * @pgt: parent gmap address of the page table to get shadowed 1718 * @fake: pgt references contiguous guest memory block, not a pgtable 1719 * 1720 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1721 * shadow table structure is incomplete, -ENOMEM if out of memory, 1722 * -EFAULT if an address in the parent gmap could not be resolved and 1723 * 1724 * Called with gmap->mm->mmap_lock in read 1725 */ 1726 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, 1727 int fake) 1728 { 1729 unsigned long raddr, origin; 1730 unsigned long *table; 1731 struct ptdesc *ptdesc; 1732 phys_addr_t s_pgt; 1733 int rc; 1734 1735 BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); 1736 /* Allocate a shadow page table */ 1737 ptdesc = page_table_alloc_pgste(sg->mm); 1738 if (!ptdesc) 1739 return -ENOMEM; 1740 origin = pgt & _SEGMENT_ENTRY_ORIGIN; 1741 if (fake) 1742 origin |= GMAP_SHADOW_FAKE_TABLE; 1743 gmap_pgste_set_pgt_addr(ptdesc, origin); 1744 s_pgt = page_to_phys(ptdesc_page(ptdesc)); 1745 /* Install shadow page table */ 1746 spin_lock(&sg->guest_table_lock); 1747 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1748 if (!table) { 1749 rc = -EAGAIN; /* Race with unshadow */ 1750 goto out_free; 1751 } 1752 if (!(*table & _SEGMENT_ENTRY_INVALID)) { 1753 rc = 0; /* Already established */ 1754 goto out_free; 1755 } else if (*table & _SEGMENT_ENTRY_ORIGIN) { 1756 rc = -EAGAIN; /* Race with shadow */ 1757 goto out_free; 1758 } 1759 /* mark as invalid as long as the parent table is not protected */ 1760 *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | 1761 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; 1762 if (fake) { 1763 /* nothing to protect for fake tables */ 1764 *table &= ~_SEGMENT_ENTRY_INVALID; 1765 spin_unlock(&sg->guest_table_lock); 1766 return 0; 1767 } 1768 spin_unlock(&sg->guest_table_lock); 1769 /* Make pgt read-only in parent gmap page table (not the pgste) */ 1770 raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT; 1771 origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK; 1772 rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE); 1773 spin_lock(&sg->guest_table_lock); 1774 if (!rc) { 1775 table = gmap_table_walk(sg, saddr, 1); 1776 if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) 1777 rc = -EAGAIN; /* Race with unshadow */ 1778 else 1779 *table &= ~_SEGMENT_ENTRY_INVALID; 1780 } else { 1781 gmap_unshadow_pgt(sg, raddr); 1782 } 1783 spin_unlock(&sg->guest_table_lock); 1784 return rc; 1785 out_free: 1786 spin_unlock(&sg->guest_table_lock); 1787 page_table_free_pgste(ptdesc); 1788 return rc; 1789 1790 } 1791 EXPORT_SYMBOL_GPL(gmap_shadow_pgt); 1792 1793 /** 1794 * gmap_shadow_page - create a shadow page mapping 1795 * @sg: pointer to the shadow guest address space structure 1796 * @saddr: faulting address in the shadow gmap 1797 * @pte: pte in parent gmap address space to get shadowed 1798 * 1799 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the 1800 * shadow table structure is incomplete, -ENOMEM if out of memory and 1801 * -EFAULT if an address in the parent gmap could not be resolved. 1802 * 1803 * Called with sg->mm->mmap_lock in read. 1804 */ 1805 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) 1806 { 1807 struct gmap *parent; 1808 struct gmap_rmap *rmap; 1809 unsigned long vmaddr, paddr; 1810 spinlock_t *ptl; 1811 pte_t *sptep, *tptep; 1812 int prot; 1813 int rc; 1814 1815 BUG_ON(!gmap_is_shadow(sg)); 1816 parent = sg->parent; 1817 prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE; 1818 1819 rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT); 1820 if (!rmap) 1821 return -ENOMEM; 1822 rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE; 1823 1824 while (1) { 1825 paddr = pte_val(pte) & PAGE_MASK; 1826 vmaddr = __gmap_translate(parent, paddr); 1827 if (IS_ERR_VALUE(vmaddr)) { 1828 rc = vmaddr; 1829 break; 1830 } 1831 rc = radix_tree_preload(GFP_KERNEL_ACCOUNT); 1832 if (rc) 1833 break; 1834 rc = -EAGAIN; 1835 sptep = gmap_pte_op_walk(parent, paddr, &ptl); 1836 if (sptep) { 1837 spin_lock(&sg->guest_table_lock); 1838 /* Get page table pointer */ 1839 tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); 1840 if (!tptep) { 1841 spin_unlock(&sg->guest_table_lock); 1842 gmap_pte_op_end(sptep, ptl); 1843 radix_tree_preload_end(); 1844 break; 1845 } 1846 rc = ptep_shadow_pte(sg->mm, saddr, sptep, tptep, pte); 1847 if (rc > 0) { 1848 /* Success and a new mapping */ 1849 gmap_insert_rmap(sg, vmaddr, rmap); 1850 rmap = NULL; 1851 rc = 0; 1852 } 1853 gmap_pte_op_end(sptep, ptl); 1854 spin_unlock(&sg->guest_table_lock); 1855 } 1856 radix_tree_preload_end(); 1857 if (!rc) 1858 break; 1859 rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot); 1860 if (rc) 1861 break; 1862 } 1863 kfree(rmap); 1864 return rc; 1865 } 1866 EXPORT_SYMBOL_GPL(gmap_shadow_page); 1867 1868 /* 1869 * gmap_shadow_notify - handle notifications for shadow gmap 1870 * 1871 * Called with sg->parent->shadow_lock. 1872 */ 1873 static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, 1874 unsigned long gaddr) 1875 { 1876 struct gmap_rmap *rmap, *rnext, *head; 1877 unsigned long start, end, bits, raddr; 1878 1879 BUG_ON(!gmap_is_shadow(sg)); 1880 1881 spin_lock(&sg->guest_table_lock); 1882 if (sg->removed) { 1883 spin_unlock(&sg->guest_table_lock); 1884 return; 1885 } 1886 /* Check for top level table */ 1887 start = sg->orig_asce & _ASCE_ORIGIN; 1888 end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE; 1889 if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start && 1890 gaddr < end) { 1891 /* The complete shadow table has to go */ 1892 gmap_unshadow(sg); 1893 spin_unlock(&sg->guest_table_lock); 1894 list_del(&sg->list); 1895 gmap_put(sg); 1896 return; 1897 } 1898 /* Remove the page table tree from on specific entry */ 1899 head = radix_tree_delete(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT); 1900 gmap_for_each_rmap_safe(rmap, rnext, head) { 1901 bits = rmap->raddr & _SHADOW_RMAP_MASK; 1902 raddr = rmap->raddr ^ bits; 1903 switch (bits) { 1904 case _SHADOW_RMAP_REGION1: 1905 gmap_unshadow_r2t(sg, raddr); 1906 break; 1907 case _SHADOW_RMAP_REGION2: 1908 gmap_unshadow_r3t(sg, raddr); 1909 break; 1910 case _SHADOW_RMAP_REGION3: 1911 gmap_unshadow_sgt(sg, raddr); 1912 break; 1913 case _SHADOW_RMAP_SEGMENT: 1914 gmap_unshadow_pgt(sg, raddr); 1915 break; 1916 case _SHADOW_RMAP_PGTABLE: 1917 gmap_unshadow_page(sg, raddr); 1918 break; 1919 } 1920 kfree(rmap); 1921 } 1922 spin_unlock(&sg->guest_table_lock); 1923 } 1924 1925 /** 1926 * ptep_notify - call all invalidation callbacks for a specific pte. 1927 * @mm: pointer to the process mm_struct 1928 * @vmaddr: virtual address in the process address space 1929 * @pte: pointer to the page table entry 1930 * @bits: bits from the pgste that caused the notify call 1931 * 1932 * This function is assumed to be called with the page table lock held 1933 * for the pte to notify. 1934 */ 1935 void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, 1936 pte_t *pte, unsigned long bits) 1937 { 1938 unsigned long offset, gaddr = 0; 1939 struct gmap *gmap, *sg, *next; 1940 1941 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 1942 offset = offset * (PAGE_SIZE / sizeof(pte_t)); 1943 rcu_read_lock(); 1944 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 1945 spin_lock(&gmap->guest_table_lock); 1946 gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; 1947 spin_unlock(&gmap->guest_table_lock); 1948 if (!IS_GADDR_VALID(gaddr)) 1949 continue; 1950 1951 if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { 1952 spin_lock(&gmap->shadow_lock); 1953 list_for_each_entry_safe(sg, next, 1954 &gmap->children, list) 1955 gmap_shadow_notify(sg, vmaddr, gaddr); 1956 spin_unlock(&gmap->shadow_lock); 1957 } 1958 if (bits & PGSTE_IN_BIT) 1959 gmap_call_notifier(gmap, gaddr, gaddr + PAGE_SIZE - 1); 1960 } 1961 rcu_read_unlock(); 1962 } 1963 EXPORT_SYMBOL_GPL(ptep_notify); 1964 1965 static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, 1966 unsigned long gaddr) 1967 { 1968 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_IN))); 1969 gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); 1970 } 1971 1972 /** 1973 * gmap_pmdp_xchg - exchange a gmap pmd with another 1974 * @gmap: pointer to the guest address space structure 1975 * @pmdp: pointer to the pmd entry 1976 * @new: replacement entry 1977 * @gaddr: the affected guest address 1978 * 1979 * This function is assumed to be called with the guest_table_lock 1980 * held. 1981 */ 1982 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, 1983 unsigned long gaddr) 1984 { 1985 gaddr &= HPAGE_MASK; 1986 pmdp_notify_gmap(gmap, pmdp, gaddr); 1987 new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_GMAP_IN)); 1988 if (machine_has_tlb_guest()) 1989 __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, 1990 IDTE_GLOBAL); 1991 else if (cpu_has_idte()) 1992 __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); 1993 else 1994 __pmdp_csp(pmdp); 1995 set_pmd(pmdp, new); 1996 } 1997 1998 static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, 1999 int purge) 2000 { 2001 pmd_t *pmdp; 2002 struct gmap *gmap; 2003 unsigned long gaddr; 2004 2005 rcu_read_lock(); 2006 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2007 spin_lock(&gmap->guest_table_lock); 2008 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2009 if (pmdp) { 2010 pmdp_notify_gmap(gmap, pmdp, gaddr); 2011 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2012 _SEGMENT_ENTRY_GMAP_UC | 2013 _SEGMENT_ENTRY)); 2014 if (purge) 2015 __pmdp_csp(pmdp); 2016 set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); 2017 } 2018 spin_unlock(&gmap->guest_table_lock); 2019 } 2020 rcu_read_unlock(); 2021 } 2022 2023 /** 2024 * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without 2025 * flushing 2026 * @mm: pointer to the process mm_struct 2027 * @vmaddr: virtual address in the process address space 2028 */ 2029 void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) 2030 { 2031 gmap_pmdp_clear(mm, vmaddr, 0); 2032 } 2033 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); 2034 2035 /** 2036 * gmap_pmdp_csp - csp all affected guest pmd entries 2037 * @mm: pointer to the process mm_struct 2038 * @vmaddr: virtual address in the process address space 2039 */ 2040 void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) 2041 { 2042 gmap_pmdp_clear(mm, vmaddr, 1); 2043 } 2044 EXPORT_SYMBOL_GPL(gmap_pmdp_csp); 2045 2046 /** 2047 * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry 2048 * @mm: pointer to the process mm_struct 2049 * @vmaddr: virtual address in the process address space 2050 */ 2051 void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) 2052 { 2053 unsigned long gaddr; 2054 struct gmap *gmap; 2055 pmd_t *pmdp; 2056 2057 rcu_read_lock(); 2058 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2059 spin_lock(&gmap->guest_table_lock); 2060 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2061 if (pmdp) { 2062 pmdp_notify_gmap(gmap, pmdp, gaddr); 2063 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2064 _SEGMENT_ENTRY_GMAP_UC | 2065 _SEGMENT_ENTRY)); 2066 if (machine_has_tlb_guest()) 2067 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2068 gmap->asce, IDTE_LOCAL); 2069 else if (cpu_has_idte()) 2070 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); 2071 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2072 } 2073 spin_unlock(&gmap->guest_table_lock); 2074 } 2075 rcu_read_unlock(); 2076 } 2077 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); 2078 2079 /** 2080 * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry 2081 * @mm: pointer to the process mm_struct 2082 * @vmaddr: virtual address in the process address space 2083 */ 2084 void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) 2085 { 2086 unsigned long gaddr; 2087 struct gmap *gmap; 2088 pmd_t *pmdp; 2089 2090 rcu_read_lock(); 2091 list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { 2092 spin_lock(&gmap->guest_table_lock); 2093 pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); 2094 if (pmdp) { 2095 pmdp_notify_gmap(gmap, pmdp, gaddr); 2096 WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | 2097 _SEGMENT_ENTRY_GMAP_UC | 2098 _SEGMENT_ENTRY)); 2099 if (machine_has_tlb_guest()) 2100 __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, 2101 gmap->asce, IDTE_GLOBAL); 2102 else if (cpu_has_idte()) 2103 __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); 2104 else 2105 __pmdp_csp(pmdp); 2106 *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); 2107 } 2108 spin_unlock(&gmap->guest_table_lock); 2109 } 2110 rcu_read_unlock(); 2111 } 2112 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); 2113 2114 /** 2115 * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status 2116 * @gmap: pointer to guest address space 2117 * @pmdp: pointer to the pmd to be tested 2118 * @gaddr: virtual address in the guest address space 2119 * 2120 * This function is assumed to be called with the guest_table_lock 2121 * held. 2122 */ 2123 static bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, 2124 unsigned long gaddr) 2125 { 2126 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) 2127 return false; 2128 2129 /* Already protected memory, which did not change is clean */ 2130 if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && 2131 !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) 2132 return false; 2133 2134 /* Clear UC indication and reset protection */ 2135 set_pmd(pmdp, clear_pmd_bit(*pmdp, __pgprot(_SEGMENT_ENTRY_GMAP_UC))); 2136 gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); 2137 return true; 2138 } 2139 2140 /** 2141 * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment 2142 * @gmap: pointer to guest address space 2143 * @bitmap: dirty bitmap for this pmd 2144 * @gaddr: virtual address in the guest address space 2145 * @vmaddr: virtual address in the host address space 2146 * 2147 * This function is assumed to be called with the guest_table_lock 2148 * held. 2149 */ 2150 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], 2151 unsigned long gaddr, unsigned long vmaddr) 2152 { 2153 int i; 2154 pmd_t *pmdp; 2155 pte_t *ptep; 2156 spinlock_t *ptl; 2157 2158 pmdp = gmap_pmd_op_walk(gmap, gaddr); 2159 if (!pmdp) 2160 return; 2161 2162 if (pmd_leaf(*pmdp)) { 2163 if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) 2164 bitmap_fill(bitmap, _PAGE_ENTRIES); 2165 } else { 2166 for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { 2167 ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); 2168 if (!ptep) 2169 continue; 2170 if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) 2171 set_bit(i, bitmap); 2172 pte_unmap_unlock(ptep, ptl); 2173 } 2174 } 2175 gmap_pmd_op_end(gmap, pmdp); 2176 } 2177 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); 2178 2179 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2180 static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, 2181 unsigned long end, struct mm_walk *walk) 2182 { 2183 struct vm_area_struct *vma = walk->vma; 2184 2185 split_huge_pmd(vma, pmd, addr); 2186 return 0; 2187 } 2188 2189 static const struct mm_walk_ops thp_split_walk_ops = { 2190 .pmd_entry = thp_split_walk_pmd_entry, 2191 .walk_lock = PGWALK_WRLOCK_VERIFY, 2192 }; 2193 2194 static inline void thp_split_mm(struct mm_struct *mm) 2195 { 2196 struct vm_area_struct *vma; 2197 VMA_ITERATOR(vmi, mm, 0); 2198 2199 for_each_vma(vmi, vma) { 2200 vm_flags_mod(vma, VM_NOHUGEPAGE, VM_HUGEPAGE); 2201 walk_page_vma(vma, &thp_split_walk_ops, NULL); 2202 } 2203 mm->def_flags |= VM_NOHUGEPAGE; 2204 } 2205 #else 2206 static inline void thp_split_mm(struct mm_struct *mm) 2207 { 2208 } 2209 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 2210 2211 /* 2212 * switch on pgstes for its userspace process (for kvm) 2213 */ 2214 int s390_enable_sie(void) 2215 { 2216 struct mm_struct *mm = current->mm; 2217 2218 /* Do we have pgstes? if yes, we are done */ 2219 if (mm_has_pgste(mm)) 2220 return 0; 2221 mmap_write_lock(mm); 2222 mm->context.has_pgste = 1; 2223 /* split thp mappings and disable thp for future mappings */ 2224 thp_split_mm(mm); 2225 mmap_write_unlock(mm); 2226 return 0; 2227 } 2228 EXPORT_SYMBOL_GPL(s390_enable_sie); 2229 2230 /* 2231 * Enable storage key handling from now on and initialize the storage 2232 * keys with the default key. 2233 */ 2234 static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, 2235 unsigned long next, struct mm_walk *walk) 2236 { 2237 /* Clear storage key */ 2238 ptep_zap_key(walk->mm, addr, pte); 2239 return 0; 2240 } 2241 2242 /* 2243 * Give a chance to schedule after setting a key to 256 pages. 2244 * We only hold the mm lock, which is a rwsem and the kvm srcu. 2245 * Both can sleep. 2246 */ 2247 static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, 2248 unsigned long next, struct mm_walk *walk) 2249 { 2250 cond_resched(); 2251 return 0; 2252 } 2253 2254 static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, 2255 unsigned long hmask, unsigned long next, 2256 struct mm_walk *walk) 2257 { 2258 pmd_t *pmd = (pmd_t *)pte; 2259 unsigned long start, end; 2260 struct folio *folio = page_folio(pmd_page(*pmd)); 2261 2262 /* 2263 * The write check makes sure we do not set a key on shared 2264 * memory. This is needed as the walker does not differentiate 2265 * between actual guest memory and the process executable or 2266 * shared libraries. 2267 */ 2268 if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || 2269 !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) 2270 return 0; 2271 2272 start = pmd_val(*pmd) & HPAGE_MASK; 2273 end = start + HPAGE_SIZE; 2274 __storage_key_init_range(start, end); 2275 set_bit(PG_arch_1, &folio->flags); 2276 cond_resched(); 2277 return 0; 2278 } 2279 2280 static const struct mm_walk_ops enable_skey_walk_ops = { 2281 .hugetlb_entry = __s390_enable_skey_hugetlb, 2282 .pte_entry = __s390_enable_skey_pte, 2283 .pmd_entry = __s390_enable_skey_pmd, 2284 .walk_lock = PGWALK_WRLOCK, 2285 }; 2286 2287 int s390_enable_skey(void) 2288 { 2289 struct mm_struct *mm = current->mm; 2290 int rc = 0; 2291 2292 mmap_write_lock(mm); 2293 if (mm_uses_skeys(mm)) 2294 goto out_up; 2295 2296 mm->context.uses_skeys = 1; 2297 rc = gmap_helper_disable_cow_sharing(); 2298 if (rc) { 2299 mm->context.uses_skeys = 0; 2300 goto out_up; 2301 } 2302 walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); 2303 2304 out_up: 2305 mmap_write_unlock(mm); 2306 return rc; 2307 } 2308 EXPORT_SYMBOL_GPL(s390_enable_skey); 2309 2310 /* 2311 * Reset CMMA state, make all pages stable again. 2312 */ 2313 static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 2314 unsigned long next, struct mm_walk *walk) 2315 { 2316 ptep_zap_unused(walk->mm, addr, pte, 1); 2317 return 0; 2318 } 2319 2320 static const struct mm_walk_ops reset_cmma_walk_ops = { 2321 .pte_entry = __s390_reset_cmma, 2322 .walk_lock = PGWALK_WRLOCK, 2323 }; 2324 2325 void s390_reset_cmma(struct mm_struct *mm) 2326 { 2327 mmap_write_lock(mm); 2328 walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); 2329 mmap_write_unlock(mm); 2330 } 2331 EXPORT_SYMBOL_GPL(s390_reset_cmma); 2332 2333 #define GATHER_GET_PAGES 32 2334 2335 struct reset_walk_state { 2336 unsigned long next; 2337 unsigned long count; 2338 unsigned long pfns[GATHER_GET_PAGES]; 2339 }; 2340 2341 static int s390_gather_pages(pte_t *ptep, unsigned long addr, 2342 unsigned long next, struct mm_walk *walk) 2343 { 2344 struct reset_walk_state *p = walk->private; 2345 pte_t pte = READ_ONCE(*ptep); 2346 2347 if (pte_present(pte)) { 2348 /* we have a reference from the mapping, take an extra one */ 2349 get_page(phys_to_page(pte_val(pte))); 2350 p->pfns[p->count] = phys_to_pfn(pte_val(pte)); 2351 p->next = next; 2352 p->count++; 2353 } 2354 return p->count >= GATHER_GET_PAGES; 2355 } 2356 2357 static const struct mm_walk_ops gather_pages_ops = { 2358 .pte_entry = s390_gather_pages, 2359 .walk_lock = PGWALK_RDLOCK, 2360 }; 2361 2362 /* 2363 * Call the Destroy secure page UVC on each page in the given array of PFNs. 2364 * Each page needs to have an extra reference, which will be released here. 2365 */ 2366 void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns) 2367 { 2368 struct folio *folio; 2369 unsigned long i; 2370 2371 for (i = 0; i < count; i++) { 2372 folio = pfn_folio(pfns[i]); 2373 /* we always have an extra reference */ 2374 uv_destroy_folio(folio); 2375 /* get rid of the extra reference */ 2376 folio_put(folio); 2377 cond_resched(); 2378 } 2379 } 2380 EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns); 2381 2382 /** 2383 * __s390_uv_destroy_range - Call the destroy secure page UVC on each page 2384 * in the given range of the given address space. 2385 * @mm: the mm to operate on 2386 * @start: the start of the range 2387 * @end: the end of the range 2388 * @interruptible: if not 0, stop when a fatal signal is received 2389 * 2390 * Walk the given range of the given address space and call the destroy 2391 * secure page UVC on each page. Optionally exit early if a fatal signal is 2392 * pending. 2393 * 2394 * Return: 0 on success, -EINTR if the function stopped before completing 2395 */ 2396 int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, 2397 unsigned long end, bool interruptible) 2398 { 2399 struct reset_walk_state state = { .next = start }; 2400 int r = 1; 2401 2402 while (r > 0) { 2403 state.count = 0; 2404 mmap_read_lock(mm); 2405 r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state); 2406 mmap_read_unlock(mm); 2407 cond_resched(); 2408 s390_uv_destroy_pfns(state.count, state.pfns); 2409 if (interruptible && fatal_signal_pending(current)) 2410 return -EINTR; 2411 } 2412 return 0; 2413 } 2414 EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); 2415 2416 /** 2417 * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy 2418 * @gmap: the gmap whose ASCE needs to be replaced 2419 * 2420 * If the ASCE is a SEGMENT type then this function will return -EINVAL, 2421 * otherwise the pointers in the host_to_guest radix tree will keep pointing 2422 * to the wrong pages, causing use-after-free and memory corruption. 2423 * If the allocation of the new top level page table fails, the ASCE is not 2424 * replaced. 2425 * In any case, the old ASCE is always removed from the gmap CRST list. 2426 * Therefore the caller has to make sure to save a pointer to it 2427 * beforehand, unless a leak is actually intended. 2428 */ 2429 int s390_replace_asce(struct gmap *gmap) 2430 { 2431 unsigned long asce; 2432 struct page *page; 2433 void *table; 2434 2435 /* Replacing segment type ASCEs would cause serious issues */ 2436 if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 2437 return -EINVAL; 2438 2439 page = gmap_alloc_crst(); 2440 if (!page) 2441 return -ENOMEM; 2442 table = page_to_virt(page); 2443 memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); 2444 2445 /* Set new table origin while preserving existing ASCE control bits */ 2446 asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); 2447 WRITE_ONCE(gmap->asce, asce); 2448 WRITE_ONCE(gmap->mm->context.gmap_asce, asce); 2449 WRITE_ONCE(gmap->table, table); 2450 2451 return 0; 2452 } 2453 EXPORT_SYMBOL_GPL(s390_replace_asce); 2454