pgtable.c (227be799c39a28bf5d68187a4ea1b43190d96515) | pgtable.c (1e133ab296f3ff8d9e58a5e758291ed39ba72ad7) |
---|---|
1/* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6#include <linux/sched.h> 7#include <linux/kernel.h> 8#include <linux/errno.h> --- 10 unchanged lines hidden (view full) --- 19#include <linux/mman.h> 20 21#include <asm/pgtable.h> 22#include <asm/pgalloc.h> 23#include <asm/tlb.h> 24#include <asm/tlbflush.h> 25#include <asm/mmu_context.h> 26 | 1/* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6#include <linux/sched.h> 7#include <linux/kernel.h> 8#include <linux/errno.h> --- 10 unchanged lines hidden (view full) --- 19#include <linux/mman.h> 20 21#include <asm/pgtable.h> 22#include <asm/pgalloc.h> 23#include <asm/tlb.h> 24#include <asm/tlbflush.h> 25#include <asm/mmu_context.h> 26 |
27unsigned long *crst_table_alloc(struct mm_struct *mm) 28{ 29 struct page *page = alloc_pages(GFP_KERNEL, 2); 30 31 if (!page) 32 return NULL; 33 return (unsigned long *) page_to_phys(page); 34} 35 36void crst_table_free(struct mm_struct *mm, unsigned long *table) 37{ 38 free_pages((unsigned long) table, 2); 39} 40 41static void __crst_table_upgrade(void *arg) 42{ 43 struct mm_struct *mm = arg; 44 45 if (current->active_mm == mm) { 46 clear_user_asce(); 47 set_user_asce(mm); 48 } 49 __tlb_flush_local(); 50} 51 52int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 53{ 54 unsigned long *table, *pgd; 55 unsigned long entry; 56 int flush; 57 58 BUG_ON(limit > TASK_MAX_SIZE); 59 flush = 0; 60repeat: 61 table = crst_table_alloc(mm); 62 if (!table) 63 return -ENOMEM; 64 spin_lock_bh(&mm->page_table_lock); 65 if (mm->context.asce_limit < limit) { 66 pgd = (unsigned long *) mm->pgd; 67 if (mm->context.asce_limit <= (1UL << 31)) { 68 entry = _REGION3_ENTRY_EMPTY; 69 mm->context.asce_limit = 1UL << 42; 70 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 71 _ASCE_USER_BITS | 72 _ASCE_TYPE_REGION3; 73 } else { 74 entry = _REGION2_ENTRY_EMPTY; 75 mm->context.asce_limit = 1UL << 53; 76 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 77 _ASCE_USER_BITS | 78 _ASCE_TYPE_REGION2; 79 } 80 crst_table_init(table, entry); 81 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 82 mm->pgd = (pgd_t *) table; 83 mm->task_size = mm->context.asce_limit; 84 table = NULL; 85 flush = 1; 86 } 87 spin_unlock_bh(&mm->page_table_lock); 88 if (table) 89 crst_table_free(mm, table); 90 if (mm->context.asce_limit < limit) 91 goto repeat; 92 if (flush) 93 on_each_cpu(__crst_table_upgrade, mm, 0); 94 return 0; 95} 96 97void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 98{ 99 pgd_t *pgd; 100 101 if (current->active_mm == mm) { 102 clear_user_asce(); 103 __tlb_flush_mm(mm); 104 } 105 while (mm->context.asce_limit > limit) { 106 pgd = mm->pgd; 107 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 108 case _REGION_ENTRY_TYPE_R2: 109 mm->context.asce_limit = 1UL << 42; 110 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 111 _ASCE_USER_BITS | 112 _ASCE_TYPE_REGION3; 113 break; 114 case _REGION_ENTRY_TYPE_R3: 115 mm->context.asce_limit = 1UL << 31; 116 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 117 _ASCE_USER_BITS | 118 _ASCE_TYPE_SEGMENT; 119 break; 120 default: 121 BUG(); 122 } 123 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 124 mm->task_size = mm->context.asce_limit; 125 crst_table_free(mm, (unsigned long *) pgd); 126 } 127 if (current->active_mm == mm) 128 set_user_asce(mm); 129} 130 131#ifdef CONFIG_PGSTE 132 133/** 134 * gmap_alloc - allocate a guest address space 135 * @mm: pointer to the parent mm_struct 136 * @limit: maximum address of the gmap address space 137 * 138 * Returns a guest address space structure. 139 */ 140struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) 141{ 142 struct gmap *gmap; 143 struct page *page; 144 unsigned long *table; 145 unsigned long etype, atype; 146 147 if (limit < (1UL << 31)) { 148 limit = (1UL << 31) - 1; 149 atype = _ASCE_TYPE_SEGMENT; 150 etype = _SEGMENT_ENTRY_EMPTY; 151 } else if (limit < (1UL << 42)) { 152 limit = (1UL << 42) - 1; 153 atype = _ASCE_TYPE_REGION3; 154 etype = _REGION3_ENTRY_EMPTY; 155 } else if (limit < (1UL << 53)) { 156 limit = (1UL << 53) - 1; 157 atype = _ASCE_TYPE_REGION2; 158 etype = _REGION2_ENTRY_EMPTY; 159 } else { 160 limit = -1UL; 161 atype = _ASCE_TYPE_REGION1; 162 etype = _REGION1_ENTRY_EMPTY; 163 } 164 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); 165 if (!gmap) 166 goto out; 167 INIT_LIST_HEAD(&gmap->crst_list); 168 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); 169 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); 170 spin_lock_init(&gmap->guest_table_lock); 171 gmap->mm = mm; 172 page = alloc_pages(GFP_KERNEL, 2); 173 if (!page) 174 goto out_free; 175 page->index = 0; 176 list_add(&page->lru, &gmap->crst_list); 177 table = (unsigned long *) page_to_phys(page); 178 crst_table_init(table, etype); 179 gmap->table = table; 180 gmap->asce = atype | _ASCE_TABLE_LENGTH | 181 _ASCE_USER_BITS | __pa(table); 182 gmap->asce_end = limit; 183 down_write(&mm->mmap_sem); 184 list_add(&gmap->list, &mm->context.gmap_list); 185 up_write(&mm->mmap_sem); 186 return gmap; 187 188out_free: 189 kfree(gmap); 190out: 191 return NULL; 192} 193EXPORT_SYMBOL_GPL(gmap_alloc); 194 195static void gmap_flush_tlb(struct gmap *gmap) 196{ 197 if (MACHINE_HAS_IDTE) 198 __tlb_flush_asce(gmap->mm, gmap->asce); 199 else 200 __tlb_flush_global(); 201} 202 203static void gmap_radix_tree_free(struct radix_tree_root *root) 204{ 205 struct radix_tree_iter iter; 206 unsigned long indices[16]; 207 unsigned long index; 208 void **slot; 209 int i, nr; 210 211 /* A radix tree is freed by deleting all of its entries */ 212 index = 0; 213 do { 214 nr = 0; 215 radix_tree_for_each_slot(slot, root, &iter, index) { 216 indices[nr] = iter.index; 217 if (++nr == 16) 218 break; 219 } 220 for (i = 0; i < nr; i++) { 221 index = indices[i]; 222 radix_tree_delete(root, index); 223 } 224 } while (nr > 0); 225} 226 227/** 228 * gmap_free - free a guest address space 229 * @gmap: pointer to the guest address space structure 230 */ 231void gmap_free(struct gmap *gmap) 232{ 233 struct page *page, *next; 234 235 /* Flush tlb. */ 236 if (MACHINE_HAS_IDTE) 237 __tlb_flush_asce(gmap->mm, gmap->asce); 238 else 239 __tlb_flush_global(); 240 241 /* Free all segment & region tables. */ 242 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) 243 __free_pages(page, 2); 244 gmap_radix_tree_free(&gmap->guest_to_host); 245 gmap_radix_tree_free(&gmap->host_to_guest); 246 down_write(&gmap->mm->mmap_sem); 247 list_del(&gmap->list); 248 up_write(&gmap->mm->mmap_sem); 249 kfree(gmap); 250} 251EXPORT_SYMBOL_GPL(gmap_free); 252 253/** 254 * gmap_enable - switch primary space to the guest address space 255 * @gmap: pointer to the guest address space structure 256 */ 257void gmap_enable(struct gmap *gmap) 258{ 259 S390_lowcore.gmap = (unsigned long) gmap; 260} 261EXPORT_SYMBOL_GPL(gmap_enable); 262 263/** 264 * gmap_disable - switch back to the standard primary address space 265 * @gmap: pointer to the guest address space structure 266 */ 267void gmap_disable(struct gmap *gmap) 268{ 269 S390_lowcore.gmap = 0UL; 270} 271EXPORT_SYMBOL_GPL(gmap_disable); 272 273/* 274 * gmap_alloc_table is assumed to be called with mmap_sem held 275 */ 276static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, 277 unsigned long init, unsigned long gaddr) 278{ 279 struct page *page; 280 unsigned long *new; 281 282 /* since we dont free the gmap table until gmap_free we can unlock */ 283 page = alloc_pages(GFP_KERNEL, 2); 284 if (!page) 285 return -ENOMEM; 286 new = (unsigned long *) page_to_phys(page); 287 crst_table_init(new, init); 288 spin_lock(&gmap->mm->page_table_lock); 289 if (*table & _REGION_ENTRY_INVALID) { 290 list_add(&page->lru, &gmap->crst_list); 291 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 292 (*table & _REGION_ENTRY_TYPE_MASK); 293 page->index = gaddr; 294 page = NULL; 295 } 296 spin_unlock(&gmap->mm->page_table_lock); 297 if (page) 298 __free_pages(page, 2); 299 return 0; 300} 301 302/** 303 * __gmap_segment_gaddr - find virtual address from segment pointer 304 * @entry: pointer to a segment table entry in the guest address space 305 * 306 * Returns the virtual address in the guest address space for the segment 307 */ 308static unsigned long __gmap_segment_gaddr(unsigned long *entry) 309{ 310 struct page *page; 311 unsigned long offset, mask; 312 313 offset = (unsigned long) entry / sizeof(unsigned long); 314 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; 315 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); 316 page = virt_to_page((void *)((unsigned long) entry & mask)); 317 return page->index + offset; 318} 319 320/** 321 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address 322 * @gmap: pointer to the guest address space structure 323 * @vmaddr: address in the host process address space 324 * 325 * Returns 1 if a TLB flush is required 326 */ 327static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) 328{ 329 unsigned long *entry; 330 int flush = 0; 331 332 spin_lock(&gmap->guest_table_lock); 333 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); 334 if (entry) { 335 flush = (*entry != _SEGMENT_ENTRY_INVALID); 336 *entry = _SEGMENT_ENTRY_INVALID; 337 } 338 spin_unlock(&gmap->guest_table_lock); 339 return flush; 340} 341 342/** 343 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address 344 * @gmap: pointer to the guest address space structure 345 * @gaddr: address in the guest address space 346 * 347 * Returns 1 if a TLB flush is required 348 */ 349static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) 350{ 351 unsigned long vmaddr; 352 353 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, 354 gaddr >> PMD_SHIFT); 355 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; 356} 357 358/** 359 * gmap_unmap_segment - unmap segment from the guest address space 360 * @gmap: pointer to the guest address space structure 361 * @to: address in the guest address space 362 * @len: length of the memory area to unmap 363 * 364 * Returns 0 if the unmap succeeded, -EINVAL if not. 365 */ 366int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) 367{ 368 unsigned long off; 369 int flush; 370 371 if ((to | len) & (PMD_SIZE - 1)) 372 return -EINVAL; 373 if (len == 0 || to + len < to) 374 return -EINVAL; 375 376 flush = 0; 377 down_write(&gmap->mm->mmap_sem); 378 for (off = 0; off < len; off += PMD_SIZE) 379 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 380 up_write(&gmap->mm->mmap_sem); 381 if (flush) 382 gmap_flush_tlb(gmap); 383 return 0; 384} 385EXPORT_SYMBOL_GPL(gmap_unmap_segment); 386 387/** 388 * gmap_mmap_segment - map a segment to the guest address space 389 * @gmap: pointer to the guest address space structure 390 * @from: source address in the parent address space 391 * @to: target address in the guest address space 392 * @len: length of the memory area to map 393 * 394 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. 395 */ 396int gmap_map_segment(struct gmap *gmap, unsigned long from, 397 unsigned long to, unsigned long len) 398{ 399 unsigned long off; 400 int flush; 401 402 if ((from | to | len) & (PMD_SIZE - 1)) 403 return -EINVAL; 404 if (len == 0 || from + len < from || to + len < to || 405 from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) 406 return -EINVAL; 407 408 flush = 0; 409 down_write(&gmap->mm->mmap_sem); 410 for (off = 0; off < len; off += PMD_SIZE) { 411 /* Remove old translation */ 412 flush |= __gmap_unmap_by_gaddr(gmap, to + off); 413 /* Store new translation */ 414 if (radix_tree_insert(&gmap->guest_to_host, 415 (to + off) >> PMD_SHIFT, 416 (void *) from + off)) 417 break; 418 } 419 up_write(&gmap->mm->mmap_sem); 420 if (flush) 421 gmap_flush_tlb(gmap); 422 if (off >= len) 423 return 0; 424 gmap_unmap_segment(gmap, to, len); 425 return -ENOMEM; 426} 427EXPORT_SYMBOL_GPL(gmap_map_segment); 428 429/** 430 * __gmap_translate - translate a guest address to a user space address 431 * @gmap: pointer to guest mapping meta data structure 432 * @gaddr: guest address 433 * 434 * Returns user space address which corresponds to the guest address or 435 * -EFAULT if no such mapping exists. 436 * This function does not establish potentially missing page table entries. 437 * The mmap_sem of the mm that belongs to the address space must be held 438 * when this function gets called. 439 */ 440unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) 441{ 442 unsigned long vmaddr; 443 444 vmaddr = (unsigned long) 445 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); 446 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; 447} 448EXPORT_SYMBOL_GPL(__gmap_translate); 449 450/** 451 * gmap_translate - translate a guest address to a user space address 452 * @gmap: pointer to guest mapping meta data structure 453 * @gaddr: guest address 454 * 455 * Returns user space address which corresponds to the guest address or 456 * -EFAULT if no such mapping exists. 457 * This function does not establish potentially missing page table entries. 458 */ 459unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) 460{ 461 unsigned long rc; 462 463 down_read(&gmap->mm->mmap_sem); 464 rc = __gmap_translate(gmap, gaddr); 465 up_read(&gmap->mm->mmap_sem); 466 return rc; 467} 468EXPORT_SYMBOL_GPL(gmap_translate); 469 470/** 471 * gmap_unlink - disconnect a page table from the gmap shadow tables 472 * @gmap: pointer to guest mapping meta data structure 473 * @table: pointer to the host page table 474 * @vmaddr: vm address associated with the host page table 475 */ 476static void gmap_unlink(struct mm_struct *mm, unsigned long *table, 477 unsigned long vmaddr) 478{ 479 struct gmap *gmap; 480 int flush; 481 482 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 483 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); 484 if (flush) 485 gmap_flush_tlb(gmap); 486 } 487} 488 489/** 490 * gmap_link - set up shadow page tables to connect a host to a guest address 491 * @gmap: pointer to guest mapping meta data structure 492 * @gaddr: guest address 493 * @vmaddr: vm address 494 * 495 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 496 * if the vm address is already mapped to a different guest segment. 497 * The mmap_sem of the mm that belongs to the address space must be held 498 * when this function gets called. 499 */ 500int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) 501{ 502 struct mm_struct *mm; 503 unsigned long *table; 504 spinlock_t *ptl; 505 pgd_t *pgd; 506 pud_t *pud; 507 pmd_t *pmd; 508 int rc; 509 510 /* Create higher level tables in the gmap page table */ 511 table = gmap->table; 512 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { 513 table += (gaddr >> 53) & 0x7ff; 514 if ((*table & _REGION_ENTRY_INVALID) && 515 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, 516 gaddr & 0xffe0000000000000UL)) 517 return -ENOMEM; 518 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 519 } 520 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { 521 table += (gaddr >> 42) & 0x7ff; 522 if ((*table & _REGION_ENTRY_INVALID) && 523 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, 524 gaddr & 0xfffffc0000000000UL)) 525 return -ENOMEM; 526 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 527 } 528 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { 529 table += (gaddr >> 31) & 0x7ff; 530 if ((*table & _REGION_ENTRY_INVALID) && 531 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, 532 gaddr & 0xffffffff80000000UL)) 533 return -ENOMEM; 534 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 535 } 536 table += (gaddr >> 20) & 0x7ff; 537 /* Walk the parent mm page table */ 538 mm = gmap->mm; 539 pgd = pgd_offset(mm, vmaddr); 540 VM_BUG_ON(pgd_none(*pgd)); 541 pud = pud_offset(pgd, vmaddr); 542 VM_BUG_ON(pud_none(*pud)); 543 pmd = pmd_offset(pud, vmaddr); 544 VM_BUG_ON(pmd_none(*pmd)); 545 /* large pmds cannot yet be handled */ 546 if (pmd_large(*pmd)) 547 return -EFAULT; 548 /* Link gmap segment table entry location to page table. */ 549 rc = radix_tree_preload(GFP_KERNEL); 550 if (rc) 551 return rc; 552 ptl = pmd_lock(mm, pmd); 553 spin_lock(&gmap->guest_table_lock); 554 if (*table == _SEGMENT_ENTRY_INVALID) { 555 rc = radix_tree_insert(&gmap->host_to_guest, 556 vmaddr >> PMD_SHIFT, table); 557 if (!rc) 558 *table = pmd_val(*pmd); 559 } else 560 rc = 0; 561 spin_unlock(&gmap->guest_table_lock); 562 spin_unlock(ptl); 563 radix_tree_preload_end(); 564 return rc; 565} 566 567/** 568 * gmap_fault - resolve a fault on a guest address 569 * @gmap: pointer to guest mapping meta data structure 570 * @gaddr: guest address 571 * @fault_flags: flags to pass down to handle_mm_fault() 572 * 573 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT 574 * if the vm address is already mapped to a different guest segment. 575 */ 576int gmap_fault(struct gmap *gmap, unsigned long gaddr, 577 unsigned int fault_flags) 578{ 579 unsigned long vmaddr; 580 int rc; 581 bool unlocked; 582 583 down_read(&gmap->mm->mmap_sem); 584 585retry: 586 unlocked = false; 587 vmaddr = __gmap_translate(gmap, gaddr); 588 if (IS_ERR_VALUE(vmaddr)) { 589 rc = vmaddr; 590 goto out_up; 591 } 592 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, 593 &unlocked)) { 594 rc = -EFAULT; 595 goto out_up; 596 } 597 /* 598 * In the case that fixup_user_fault unlocked the mmap_sem during 599 * faultin redo __gmap_translate to not race with a map/unmap_segment. 600 */ 601 if (unlocked) 602 goto retry; 603 604 rc = __gmap_link(gmap, gaddr, vmaddr); 605out_up: 606 up_read(&gmap->mm->mmap_sem); 607 return rc; 608} 609EXPORT_SYMBOL_GPL(gmap_fault); 610 611static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) 612{ 613 if (!non_swap_entry(entry)) 614 dec_mm_counter(mm, MM_SWAPENTS); 615 else if (is_migration_entry(entry)) { 616 struct page *page = migration_entry_to_page(entry); 617 618 dec_mm_counter(mm, mm_counter(page)); 619 } 620 free_swap_and_cache(entry); 621} 622 623/* 624 * this function is assumed to be called with mmap_sem held 625 */ 626void __gmap_zap(struct gmap *gmap, unsigned long gaddr) 627{ 628 unsigned long vmaddr, ptev, pgstev; 629 pte_t *ptep, pte; 630 spinlock_t *ptl; 631 pgste_t pgste; 632 633 /* Find the vm address for the guest address */ 634 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, 635 gaddr >> PMD_SHIFT); 636 if (!vmaddr) 637 return; 638 vmaddr |= gaddr & ~PMD_MASK; 639 /* Get pointer to the page table entry */ 640 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); 641 if (unlikely(!ptep)) 642 return; 643 pte = *ptep; 644 if (!pte_swap(pte)) 645 goto out_pte; 646 /* Zap unused and logically-zero pages */ 647 pgste = pgste_get_lock(ptep); 648 pgstev = pgste_val(pgste); 649 ptev = pte_val(pte); 650 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || 651 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { 652 gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); 653 pte_clear(gmap->mm, vmaddr, ptep); 654 } 655 pgste_set_unlock(ptep, pgste); 656out_pte: 657 pte_unmap_unlock(ptep, ptl); 658} 659EXPORT_SYMBOL_GPL(__gmap_zap); 660 661void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) 662{ 663 unsigned long gaddr, vmaddr, size; 664 struct vm_area_struct *vma; 665 666 down_read(&gmap->mm->mmap_sem); 667 for (gaddr = from; gaddr < to; 668 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { 669 /* Find the vm address for the guest address */ 670 vmaddr = (unsigned long) 671 radix_tree_lookup(&gmap->guest_to_host, 672 gaddr >> PMD_SHIFT); 673 if (!vmaddr) 674 continue; 675 vmaddr |= gaddr & ~PMD_MASK; 676 /* Find vma in the parent mm */ 677 vma = find_vma(gmap->mm, vmaddr); 678 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); 679 zap_page_range(vma, vmaddr, size, NULL); 680 } 681 up_read(&gmap->mm->mmap_sem); 682} 683EXPORT_SYMBOL_GPL(gmap_discard); 684 685static LIST_HEAD(gmap_notifier_list); 686static DEFINE_SPINLOCK(gmap_notifier_lock); 687 688/** 689 * gmap_register_ipte_notifier - register a pte invalidation callback 690 * @nb: pointer to the gmap notifier block 691 */ 692void gmap_register_ipte_notifier(struct gmap_notifier *nb) 693{ 694 spin_lock(&gmap_notifier_lock); 695 list_add(&nb->list, &gmap_notifier_list); 696 spin_unlock(&gmap_notifier_lock); 697} 698EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); 699 700/** 701 * gmap_unregister_ipte_notifier - remove a pte invalidation callback 702 * @nb: pointer to the gmap notifier block 703 */ 704void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) 705{ 706 spin_lock(&gmap_notifier_lock); 707 list_del_init(&nb->list); 708 spin_unlock(&gmap_notifier_lock); 709} 710EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); 711 712/** 713 * gmap_ipte_notify - mark a range of ptes for invalidation notification 714 * @gmap: pointer to guest mapping meta data structure 715 * @gaddr: virtual address in the guest address space 716 * @len: size of area 717 * 718 * Returns 0 if for each page in the given range a gmap mapping exists and 719 * the invalidation notification could be set. If the gmap mapping is missing 720 * for one or more pages -EFAULT is returned. If no memory could be allocated 721 * -ENOMEM is returned. This function establishes missing page table entries. 722 */ 723int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) 724{ 725 unsigned long addr; 726 spinlock_t *ptl; 727 pte_t *ptep, entry; 728 pgste_t pgste; 729 bool unlocked; 730 int rc = 0; 731 732 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) 733 return -EINVAL; 734 down_read(&gmap->mm->mmap_sem); 735 while (len) { 736 unlocked = false; 737 /* Convert gmap address and connect the page tables */ 738 addr = __gmap_translate(gmap, gaddr); 739 if (IS_ERR_VALUE(addr)) { 740 rc = addr; 741 break; 742 } 743 /* Get the page mapped */ 744 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, 745 &unlocked)) { 746 rc = -EFAULT; 747 break; 748 } 749 /* While trying to map mmap_sem got unlocked. Let us retry */ 750 if (unlocked) 751 continue; 752 rc = __gmap_link(gmap, gaddr, addr); 753 if (rc) 754 break; 755 /* Walk the process page table, lock and get pte pointer */ 756 ptep = get_locked_pte(gmap->mm, addr, &ptl); 757 VM_BUG_ON(!ptep); 758 /* Set notification bit in the pgste of the pte */ 759 entry = *ptep; 760 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { 761 pgste = pgste_get_lock(ptep); 762 pgste_val(pgste) |= PGSTE_IN_BIT; 763 pgste_set_unlock(ptep, pgste); 764 gaddr += PAGE_SIZE; 765 len -= PAGE_SIZE; 766 } 767 pte_unmap_unlock(ptep, ptl); 768 } 769 up_read(&gmap->mm->mmap_sem); 770 return rc; 771} 772EXPORT_SYMBOL_GPL(gmap_ipte_notify); 773 774/** 775 * ptep_ipte_notify - call all invalidation callbacks for a specific pte. 776 * @mm: pointer to the process mm_struct 777 * @addr: virtual address in the process address space 778 * @pte: pointer to the page table entry 779 * 780 * This function is assumed to be called with the page table lock held 781 * for the pte to notify. 782 */ 783void ptep_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) 784{ 785 unsigned long offset, gaddr; 786 unsigned long *table; 787 struct gmap_notifier *nb; 788 struct gmap *gmap; 789 790 offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); 791 offset = offset * (4096 / sizeof(pte_t)); 792 spin_lock(&gmap_notifier_lock); 793 list_for_each_entry(gmap, &mm->context.gmap_list, list) { 794 table = radix_tree_lookup(&gmap->host_to_guest, 795 vmaddr >> PMD_SHIFT); 796 if (!table) 797 continue; 798 gaddr = __gmap_segment_gaddr(table) + offset; 799 list_for_each_entry(nb, &gmap_notifier_list, list) 800 nb->notifier_call(gmap, gaddr); 801 } 802 spin_unlock(&gmap_notifier_lock); 803} 804EXPORT_SYMBOL_GPL(ptep_ipte_notify); 805 806int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 807 unsigned long key, bool nq) 808{ 809 spinlock_t *ptl; 810 pgste_t old, new; 811 pte_t *ptep; 812 813 down_read(&mm->mmap_sem); 814 ptep = get_locked_pte(mm, addr, &ptl); 815 if (unlikely(!ptep)) { 816 up_read(&mm->mmap_sem); 817 return -EFAULT; 818 } 819 820 new = old = pgste_get_lock(ptep); 821 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 822 PGSTE_ACC_BITS | PGSTE_FP_BIT); 823 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 824 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 825 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 826 unsigned long address, bits, skey; 827 828 address = pte_val(*ptep) & PAGE_MASK; 829 skey = (unsigned long) page_get_storage_key(address); 830 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 831 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 832 /* Set storage key ACC and FP */ 833 page_set_storage_key(address, skey, !nq); 834 /* Merge host changed & referenced into pgste */ 835 pgste_val(new) |= bits << 52; 836 } 837 /* changing the guest storage key is considered a change of the page */ 838 if ((pgste_val(new) ^ pgste_val(old)) & 839 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 840 pgste_val(new) |= PGSTE_UC_BIT; 841 842 pgste_set_unlock(ptep, new); 843 pte_unmap_unlock(ptep, ptl); 844 up_read(&mm->mmap_sem); 845 return 0; 846} 847EXPORT_SYMBOL(set_guest_storage_key); 848 849unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) 850{ 851 spinlock_t *ptl; 852 pgste_t pgste; 853 pte_t *ptep; 854 uint64_t physaddr; 855 unsigned long key = 0; 856 857 down_read(&mm->mmap_sem); 858 ptep = get_locked_pte(mm, addr, &ptl); 859 if (unlikely(!ptep)) { 860 up_read(&mm->mmap_sem); 861 return -EFAULT; 862 } 863 pgste = pgste_get_lock(ptep); 864 865 if (pte_val(*ptep) & _PAGE_INVALID) { 866 key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; 867 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; 868 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; 869 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; 870 } else { 871 physaddr = pte_val(*ptep) & PAGE_MASK; 872 key = page_get_storage_key(physaddr); 873 874 /* Reflect guest's logical view, not physical */ 875 if (pgste_val(pgste) & PGSTE_GR_BIT) 876 key |= _PAGE_REFERENCED; 877 if (pgste_val(pgste) & PGSTE_GC_BIT) 878 key |= _PAGE_CHANGED; 879 } 880 881 pgste_set_unlock(ptep, pgste); 882 pte_unmap_unlock(ptep, ptl); 883 up_read(&mm->mmap_sem); 884 return key; 885} 886EXPORT_SYMBOL(get_guest_storage_key); 887 888static int page_table_allocate_pgste_min = 0; 889static int page_table_allocate_pgste_max = 1; 890int page_table_allocate_pgste = 0; 891EXPORT_SYMBOL(page_table_allocate_pgste); 892 893static struct ctl_table page_table_sysctl[] = { 894 { 895 .procname = "allocate_pgste", 896 .data = &page_table_allocate_pgste, 897 .maxlen = sizeof(int), 898 .mode = S_IRUGO | S_IWUSR, 899 .proc_handler = proc_dointvec, 900 .extra1 = &page_table_allocate_pgste_min, 901 .extra2 = &page_table_allocate_pgste_max, 902 }, 903 { } 904}; 905 906static struct ctl_table page_table_sysctl_dir[] = { 907 { 908 .procname = "vm", 909 .maxlen = 0, 910 .mode = 0555, 911 .child = page_table_sysctl, 912 }, 913 { } 914}; 915 916static int __init page_table_register_sysctl(void) 917{ 918 return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; 919} 920__initcall(page_table_register_sysctl); 921 922#else /* CONFIG_PGSTE */ 923 924static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, 925 unsigned long vmaddr) 926{ 927} 928 929#endif /* CONFIG_PGSTE */ 930 931static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 932{ 933 unsigned int old, new; 934 935 do { 936 old = atomic_read(v); 937 new = old ^ bits; 938 } while (atomic_cmpxchg(v, old, new) != old); 939 return new; 940} 941 942/* 943 * page table entry allocation/free routines. 944 */ 945unsigned long *page_table_alloc(struct mm_struct *mm) 946{ 947 unsigned long *table; 948 struct page *page; 949 unsigned int mask, bit; 950 951 /* Try to get a fragment of a 4K page as a 2K page table */ 952 if (!mm_alloc_pgste(mm)) { 953 table = NULL; 954 spin_lock_bh(&mm->context.list_lock); 955 if (!list_empty(&mm->context.pgtable_list)) { 956 page = list_first_entry(&mm->context.pgtable_list, 957 struct page, lru); 958 mask = atomic_read(&page->_mapcount); 959 mask = (mask | (mask >> 4)) & 3; 960 if (mask != 3) { 961 table = (unsigned long *) page_to_phys(page); 962 bit = mask & 1; /* =1 -> second 2K */ 963 if (bit) 964 table += PTRS_PER_PTE; 965 atomic_xor_bits(&page->_mapcount, 1U << bit); 966 list_del(&page->lru); 967 } 968 } 969 spin_unlock_bh(&mm->context.list_lock); 970 if (table) 971 return table; 972 } 973 /* Allocate a fresh page */ 974 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 975 if (!page) 976 return NULL; 977 if (!pgtable_page_ctor(page)) { 978 __free_page(page); 979 return NULL; 980 } 981 /* Initialize page table */ 982 table = (unsigned long *) page_to_phys(page); 983 if (mm_alloc_pgste(mm)) { 984 /* Return 4K page table with PGSTEs */ 985 atomic_set(&page->_mapcount, 3); 986 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 987 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 988 } else { 989 /* Return the first 2K fragment of the page */ 990 atomic_set(&page->_mapcount, 1); 991 clear_table(table, _PAGE_INVALID, PAGE_SIZE); 992 spin_lock_bh(&mm->context.list_lock); 993 list_add(&page->lru, &mm->context.pgtable_list); 994 spin_unlock_bh(&mm->context.list_lock); 995 } 996 return table; 997} 998 999void page_table_free(struct mm_struct *mm, unsigned long *table) 1000{ 1001 struct page *page; 1002 unsigned int bit, mask; 1003 1004 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1005 if (!mm_alloc_pgste(mm)) { 1006 /* Free 2K page table fragment of a 4K page */ 1007 bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); 1008 spin_lock_bh(&mm->context.list_lock); 1009 mask = atomic_xor_bits(&page->_mapcount, 1U << bit); 1010 if (mask & 3) 1011 list_add(&page->lru, &mm->context.pgtable_list); 1012 else 1013 list_del(&page->lru); 1014 spin_unlock_bh(&mm->context.list_lock); 1015 if (mask != 0) 1016 return; 1017 } 1018 1019 pgtable_page_dtor(page); 1020 atomic_set(&page->_mapcount, -1); 1021 __free_page(page); 1022} 1023 1024void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, 1025 unsigned long vmaddr) 1026{ 1027 struct mm_struct *mm; 1028 struct page *page; 1029 unsigned int bit, mask; 1030 1031 mm = tlb->mm; 1032 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1033 if (mm_alloc_pgste(mm)) { 1034 gmap_unlink(mm, table, vmaddr); 1035 table = (unsigned long *) (__pa(table) | 3); 1036 tlb_remove_table(tlb, table); 1037 return; 1038 } 1039 bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); 1040 spin_lock_bh(&mm->context.list_lock); 1041 mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); 1042 if (mask & 3) 1043 list_add_tail(&page->lru, &mm->context.pgtable_list); 1044 else 1045 list_del(&page->lru); 1046 spin_unlock_bh(&mm->context.list_lock); 1047 table = (unsigned long *) (__pa(table) | (1U << bit)); 1048 tlb_remove_table(tlb, table); 1049} 1050 1051static void __tlb_remove_table(void *_table) 1052{ 1053 unsigned int mask = (unsigned long) _table & 3; 1054 void *table = (void *)((unsigned long) _table ^ mask); 1055 struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 1056 1057 switch (mask) { 1058 case 0: /* pmd or pud */ 1059 free_pages((unsigned long) table, 2); 1060 break; 1061 case 1: /* lower 2K of a 4K page table */ 1062 case 2: /* higher 2K of a 4K page table */ 1063 if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) 1064 break; 1065 /* fallthrough */ 1066 case 3: /* 4K page table with pgstes */ 1067 pgtable_page_dtor(page); 1068 atomic_set(&page->_mapcount, -1); 1069 __free_page(page); 1070 break; 1071 } 1072} 1073 1074static void tlb_remove_table_smp_sync(void *arg) 1075{ 1076 /* Simply deliver the interrupt */ 1077} 1078 1079static void tlb_remove_table_one(void *table) 1080{ 1081 /* 1082 * This isn't an RCU grace period and hence the page-tables cannot be 1083 * assumed to be actually RCU-freed. 1084 * 1085 * It is however sufficient for software page-table walkers that rely 1086 * on IRQ disabling. See the comment near struct mmu_table_batch. 1087 */ 1088 smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 1089 __tlb_remove_table(table); 1090} 1091 1092static void tlb_remove_table_rcu(struct rcu_head *head) 1093{ 1094 struct mmu_table_batch *batch; 1095 int i; 1096 1097 batch = container_of(head, struct mmu_table_batch, rcu); 1098 1099 for (i = 0; i < batch->nr; i++) 1100 __tlb_remove_table(batch->tables[i]); 1101 1102 free_page((unsigned long)batch); 1103} 1104 1105void tlb_table_flush(struct mmu_gather *tlb) 1106{ 1107 struct mmu_table_batch **batch = &tlb->batch; 1108 1109 if (*batch) { 1110 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 1111 *batch = NULL; 1112 } 1113} 1114 1115void tlb_remove_table(struct mmu_gather *tlb, void *table) 1116{ 1117 struct mmu_table_batch **batch = &tlb->batch; 1118 1119 tlb->mm->context.flush_mm = 1; 1120 if (*batch == NULL) { 1121 *batch = (struct mmu_table_batch *) 1122 __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 1123 if (*batch == NULL) { 1124 __tlb_flush_mm_lazy(tlb->mm); 1125 tlb_remove_table_one(table); 1126 return; 1127 } 1128 (*batch)->nr = 0; 1129 } 1130 (*batch)->tables[(*batch)->nr++] = table; 1131 if ((*batch)->nr == MAX_TABLE_BATCH) 1132 tlb_flush_mmu(tlb); 1133} 1134 1135#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1136static inline void thp_split_vma(struct vm_area_struct *vma) 1137{ 1138 unsigned long addr; 1139 1140 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) 1141 follow_page(vma, addr, FOLL_SPLIT); 1142} 1143 1144static inline void thp_split_mm(struct mm_struct *mm) 1145{ 1146 struct vm_area_struct *vma; 1147 1148 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { 1149 thp_split_vma(vma); 1150 vma->vm_flags &= ~VM_HUGEPAGE; 1151 vma->vm_flags |= VM_NOHUGEPAGE; 1152 } 1153 mm->def_flags |= VM_NOHUGEPAGE; 1154} 1155#else 1156static inline void thp_split_mm(struct mm_struct *mm) 1157{ 1158} 1159#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1160 | |
1161static inline pte_t ptep_flush_direct(struct mm_struct *mm, 1162 unsigned long addr, pte_t *ptep) 1163{ 1164 int active, count; 1165 pte_t old; 1166 1167 old = *ptep; 1168 if (unlikely(pte_val(old) & _PAGE_INVALID)) --- 24 unchanged lines hidden (view full) --- 1193 pte_val(*ptep) |= _PAGE_INVALID; 1194 mm->context.flush_mm = 1; 1195 } else 1196 __ptep_ipte(addr, ptep); 1197 atomic_sub(0x10000, &mm->context.attach_count); 1198 return old; 1199} 1200 | 27static inline pte_t ptep_flush_direct(struct mm_struct *mm, 28 unsigned long addr, pte_t *ptep) 29{ 30 int active, count; 31 pte_t old; 32 33 old = *ptep; 34 if (unlikely(pte_val(old) & _PAGE_INVALID)) --- 24 unchanged lines hidden (view full) --- 59 pte_val(*ptep) |= _PAGE_INVALID; 60 mm->context.flush_mm = 1; 61 } else 62 __ptep_ipte(addr, ptep); 63 atomic_sub(0x10000, &mm->context.attach_count); 64 return old; 65} 66 |
67static inline pgste_t pgste_get_lock(pte_t *ptep) 68{ 69 unsigned long new = 0; 70#ifdef CONFIG_PGSTE 71 unsigned long old; 72 73 preempt_disable(); 74 asm( 75 " lg %0,%2\n" 76 "0: lgr %1,%0\n" 77 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 78 " oihh %1,0x0080\n" /* set PCL bit in new */ 79 " csg %0,%1,%2\n" 80 " jl 0b\n" 81 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 82 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 83#endif 84 return __pgste(new); 85} 86 87static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 88{ 89#ifdef CONFIG_PGSTE 90 asm( 91 " nihh %1,0xff7f\n" /* clear PCL bit */ 92 " stg %1,%0\n" 93 : "=Q" (ptep[PTRS_PER_PTE]) 94 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 95 : "cc", "memory"); 96 preempt_enable(); 97#endif 98} 99 100static inline pgste_t pgste_get(pte_t *ptep) 101{ 102 unsigned long pgste = 0; 103#ifdef CONFIG_PGSTE 104 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 105#endif 106 return __pgste(pgste); 107} 108 109static inline void pgste_set(pte_t *ptep, pgste_t pgste) 110{ 111#ifdef CONFIG_PGSTE 112 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 113#endif 114} 115 |
|
1201static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 1202 struct mm_struct *mm) 1203{ 1204#ifdef CONFIG_PGSTE 1205 unsigned long address, bits, skey; 1206 1207 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 1208 return pgste; --- 57 unchanged lines hidden (view full) --- 1266 1267static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 1268 unsigned long addr, 1269 pte_t *ptep, pgste_t pgste) 1270{ 1271#ifdef CONFIG_PGSTE 1272 if (pgste_val(pgste) & PGSTE_IN_BIT) { 1273 pgste_val(pgste) &= ~PGSTE_IN_BIT; | 116static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 117 struct mm_struct *mm) 118{ 119#ifdef CONFIG_PGSTE 120 unsigned long address, bits, skey; 121 122 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 123 return pgste; --- 57 unchanged lines hidden (view full) --- 181 182static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 183 unsigned long addr, 184 pte_t *ptep, pgste_t pgste) 185{ 186#ifdef CONFIG_PGSTE 187 if (pgste_val(pgste) & PGSTE_IN_BIT) { 188 pgste_val(pgste) &= ~PGSTE_IN_BIT; |
1274 ptep_ipte_notify(mm, addr, ptep); | 189 ptep_notify(mm, addr, ptep); |
1275 } 1276#endif 1277 return pgste; 1278} 1279 | 190 } 191#endif 192 return pgste; 193} 194 |
1280#ifdef CONFIG_PGSTE 1281/* 1282 * Test and reset if a guest page is dirty 1283 */ 1284bool pgste_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr) 1285{ 1286 spinlock_t *ptl; 1287 pgste_t pgste; 1288 pte_t *ptep; 1289 pte_t pte; 1290 bool dirty; 1291 1292 ptep = get_locked_pte(mm, addr, &ptl); 1293 if (unlikely(!ptep)) 1294 return false; 1295 1296 pgste = pgste_get_lock(ptep); 1297 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 1298 pgste_val(pgste) &= ~PGSTE_UC_BIT; 1299 pte = *ptep; 1300 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 1301 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 1302 __ptep_ipte(addr, ptep); 1303 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 1304 pte_val(pte) |= _PAGE_PROTECT; 1305 else 1306 pte_val(pte) |= _PAGE_INVALID; 1307 *ptep = pte; 1308 } 1309 pgste_set_unlock(ptep, pgste); 1310 1311 spin_unlock(ptl); 1312 return dirty; 1313} 1314EXPORT_SYMBOL_GPL(pgste_test_and_clear_dirty); 1315 1316void set_pte_pgste_at(struct mm_struct *mm, unsigned long addr, 1317 pte_t *ptep, pte_t entry) 1318{ 1319 pgste_t pgste; 1320 1321 /* the mm_has_pgste() check is done in set_pte_at() */ 1322 pgste = pgste_get_lock(ptep); 1323 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 1324 pgste_set_key(ptep, pgste, entry, mm); 1325 pgste = pgste_set_pte(ptep, pgste, entry); 1326 pgste_set_unlock(ptep, pgste); 1327} 1328EXPORT_SYMBOL(set_pte_pgste_at); 1329#endif 1330 | |
1331static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 1332 unsigned long addr, pte_t *ptep) 1333{ 1334 pgste_t pgste = __pgste(0); 1335 1336 if (mm_has_pgste(mm)) { 1337 pgste = pgste_get_lock(ptep); 1338 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); --- 142 unchanged lines hidden (view full) --- 1481 pmd_t old; 1482 1483 old = pmdp_flush_lazy(mm, addr, pmdp); 1484 *pmdp = new; 1485 return old; 1486} 1487EXPORT_SYMBOL(pmdp_xchg_lazy); 1488 | 195static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 196 unsigned long addr, pte_t *ptep) 197{ 198 pgste_t pgste = __pgste(0); 199 200 if (mm_has_pgste(mm)) { 201 pgste = pgste_get_lock(ptep); 202 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); --- 142 unchanged lines hidden (view full) --- 345 pmd_t old; 346 347 old = pmdp_flush_lazy(mm, addr, pmdp); 348 *pmdp = new; 349 return old; 350} 351EXPORT_SYMBOL(pmdp_xchg_lazy); 352 |
1489/* 1490 * switch on pgstes for its userspace process (for kvm) 1491 */ 1492int s390_enable_sie(void) 1493{ 1494 struct mm_struct *mm = current->mm; 1495 1496 /* Do we have pgstes? if yes, we are done */ 1497 if (mm_has_pgste(mm)) 1498 return 0; 1499 /* Fail if the page tables are 2K */ 1500 if (!mm_alloc_pgste(mm)) 1501 return -EINVAL; 1502 down_write(&mm->mmap_sem); 1503 mm->context.has_pgste = 1; 1504 /* split thp mappings and disable thp for future mappings */ 1505 thp_split_mm(mm); 1506 up_write(&mm->mmap_sem); 1507 return 0; 1508} 1509EXPORT_SYMBOL_GPL(s390_enable_sie); 1510 1511/* 1512 * Enable storage key handling from now on and initialize the storage 1513 * keys with the default key. 1514 */ 1515static int __s390_enable_skey(pte_t *pte, unsigned long addr, 1516 unsigned long next, struct mm_walk *walk) 1517{ 1518 unsigned long ptev; 1519 pgste_t pgste; 1520 1521 /* 1522 * Remove all zero page mappings, 1523 * after establishing a policy to forbid zero page mappings 1524 * following faults for that page will get fresh anonymous pages 1525 */ 1526 if (is_zero_pfn(pte_pfn(*pte))) 1527 ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); 1528 /* Clear storage key */ 1529 pgste = pgste_get_lock(pte); 1530 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 1531 PGSTE_GR_BIT | PGSTE_GC_BIT); 1532 ptev = pte_val(*pte); 1533 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 1534 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 1535 pgste_set_unlock(pte, pgste); 1536 return 0; 1537} 1538 1539int s390_enable_skey(void) 1540{ 1541 struct mm_walk walk = { .pte_entry = __s390_enable_skey }; 1542 struct mm_struct *mm = current->mm; 1543 struct vm_area_struct *vma; 1544 int rc = 0; 1545 1546 down_write(&mm->mmap_sem); 1547 if (mm_use_skey(mm)) 1548 goto out_up; 1549 1550 mm->context.use_skey = 1; 1551 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1552 if (ksm_madvise(vma, vma->vm_start, vma->vm_end, 1553 MADV_UNMERGEABLE, &vma->vm_flags)) { 1554 mm->context.use_skey = 0; 1555 rc = -ENOMEM; 1556 goto out_up; 1557 } 1558 } 1559 mm->def_flags &= ~VM_MERGEABLE; 1560 1561 walk.mm = mm; 1562 walk_page_range(0, TASK_SIZE, &walk); 1563 1564out_up: 1565 up_write(&mm->mmap_sem); 1566 return rc; 1567} 1568EXPORT_SYMBOL_GPL(s390_enable_skey); 1569 1570/* 1571 * Reset CMMA state, make all pages stable again. 1572 */ 1573static int __s390_reset_cmma(pte_t *pte, unsigned long addr, 1574 unsigned long next, struct mm_walk *walk) 1575{ 1576 pgste_t pgste; 1577 1578 pgste = pgste_get_lock(pte); 1579 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 1580 pgste_set_unlock(pte, pgste); 1581 return 0; 1582} 1583 1584void s390_reset_cmma(struct mm_struct *mm) 1585{ 1586 struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; 1587 1588 down_write(&mm->mmap_sem); 1589 walk.mm = mm; 1590 walk_page_range(0, TASK_SIZE, &walk); 1591 up_write(&mm->mmap_sem); 1592} 1593EXPORT_SYMBOL_GPL(s390_reset_cmma); 1594 | |
1595#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1596void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 1597 pgtable_t pgtable) 1598{ 1599 struct list_head *lh = (struct list_head *) pgtable; 1600 1601 assert_spin_locked(pmd_lockptr(mm, pmdp)); 1602 --- 24 unchanged lines hidden (view full) --- 1627 } 1628 ptep = (pte_t *) pgtable; 1629 pte_val(*ptep) = _PAGE_INVALID; 1630 ptep++; 1631 pte_val(*ptep) = _PAGE_INVALID; 1632 return pgtable; 1633} 1634#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 353#ifdef CONFIG_TRANSPARENT_HUGEPAGE 354void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 355 pgtable_t pgtable) 356{ 357 struct list_head *lh = (struct list_head *) pgtable; 358 359 assert_spin_locked(pmd_lockptr(mm, pmdp)); 360 --- 24 unchanged lines hidden (view full) --- 385 } 386 ptep = (pte_t *) pgtable; 387 pte_val(*ptep) = _PAGE_INVALID; 388 ptep++; 389 pte_val(*ptep) = _PAGE_INVALID; 390 return pgtable; 391} 392#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
393 394#ifdef CONFIG_PGSTE 395void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 396 pte_t *ptep, pte_t entry) 397{ 398 pgste_t pgste; 399 400 /* the mm_has_pgste() check is done in set_pte_at() */ 401 pgste = pgste_get_lock(ptep); 402 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 403 pgste_set_key(ptep, pgste, entry, mm); 404 pgste = pgste_set_pte(ptep, pgste, entry); 405 pgste_set_unlock(ptep, pgste); 406} 407 408void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 409{ 410 pgste_t pgste; 411 412 pgste = pgste_get_lock(ptep); 413 pgste_val(pgste) |= PGSTE_IN_BIT; 414 pgste_set_unlock(ptep, pgste); 415} 416 417static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 418{ 419 if (!non_swap_entry(entry)) 420 dec_mm_counter(mm, MM_SWAPENTS); 421 else if (is_migration_entry(entry)) { 422 struct page *page = migration_entry_to_page(entry); 423 424 dec_mm_counter(mm, mm_counter(page)); 425 } 426 free_swap_and_cache(entry); 427} 428 429void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 430 pte_t *ptep, int reset) 431{ 432 unsigned long pgstev; 433 pgste_t pgste; 434 pte_t pte; 435 436 /* Zap unused and logically-zero pages */ 437 pgste = pgste_get_lock(ptep); 438 pgstev = pgste_val(pgste); 439 pte = *ptep; 440 if (pte_swap(pte) && 441 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 442 (pgstev & _PGSTE_GPS_ZERO))) { 443 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 444 pte_clear(mm, addr, ptep); 445 } 446 if (reset) 447 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 448 pgste_set_unlock(ptep, pgste); 449} 450 451void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 452{ 453 unsigned long ptev; 454 pgste_t pgste; 455 456 /* Clear storage key */ 457 pgste = pgste_get_lock(ptep); 458 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 459 PGSTE_GR_BIT | PGSTE_GC_BIT); 460 ptev = pte_val(*ptep); 461 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 462 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 463 pgste_set_unlock(ptep, pgste); 464} 465 466/* 467 * Test and reset if a guest page is dirty 468 */ 469bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) 470{ 471 spinlock_t *ptl; 472 pgste_t pgste; 473 pte_t *ptep; 474 pte_t pte; 475 bool dirty; 476 477 ptep = get_locked_pte(mm, addr, &ptl); 478 if (unlikely(!ptep)) 479 return false; 480 481 pgste = pgste_get_lock(ptep); 482 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 483 pgste_val(pgste) &= ~PGSTE_UC_BIT; 484 pte = *ptep; 485 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 486 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 487 __ptep_ipte(addr, ptep); 488 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 489 pte_val(pte) |= _PAGE_PROTECT; 490 else 491 pte_val(pte) |= _PAGE_INVALID; 492 *ptep = pte; 493 } 494 pgste_set_unlock(ptep, pgste); 495 496 spin_unlock(ptl); 497 return dirty; 498} 499EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); 500 501int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 502 unsigned char key, bool nq) 503{ 504 unsigned long keyul; 505 spinlock_t *ptl; 506 pgste_t old, new; 507 pte_t *ptep; 508 509 down_read(&mm->mmap_sem); 510 ptep = get_locked_pte(mm, addr, &ptl); 511 if (unlikely(!ptep)) { 512 up_read(&mm->mmap_sem); 513 return -EFAULT; 514 } 515 516 new = old = pgste_get_lock(ptep); 517 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 518 PGSTE_ACC_BITS | PGSTE_FP_BIT); 519 keyul = (unsigned long) key; 520 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 521 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 522 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 523 unsigned long address, bits, skey; 524 525 address = pte_val(*ptep) & PAGE_MASK; 526 skey = (unsigned long) page_get_storage_key(address); 527 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 528 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 529 /* Set storage key ACC and FP */ 530 page_set_storage_key(address, skey, !nq); 531 /* Merge host changed & referenced into pgste */ 532 pgste_val(new) |= bits << 52; 533 } 534 /* changing the guest storage key is considered a change of the page */ 535 if ((pgste_val(new) ^ pgste_val(old)) & 536 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 537 pgste_val(new) |= PGSTE_UC_BIT; 538 539 pgste_set_unlock(ptep, new); 540 pte_unmap_unlock(ptep, ptl); 541 up_read(&mm->mmap_sem); 542 return 0; 543} 544EXPORT_SYMBOL(set_guest_storage_key); 545 546unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) 547{ 548 unsigned char key; 549 spinlock_t *ptl; 550 pgste_t pgste; 551 pte_t *ptep; 552 553 down_read(&mm->mmap_sem); 554 ptep = get_locked_pte(mm, addr, &ptl); 555 if (unlikely(!ptep)) { 556 up_read(&mm->mmap_sem); 557 return -EFAULT; 558 } 559 pgste = pgste_get_lock(ptep); 560 561 if (pte_val(*ptep) & _PAGE_INVALID) { 562 key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; 563 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; 564 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; 565 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; 566 } else { 567 key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); 568 569 /* Reflect guest's logical view, not physical */ 570 if (pgste_val(pgste) & PGSTE_GR_BIT) 571 key |= _PAGE_REFERENCED; 572 if (pgste_val(pgste) & PGSTE_GC_BIT) 573 key |= _PAGE_CHANGED; 574 } 575 576 pgste_set_unlock(ptep, pgste); 577 pte_unmap_unlock(ptep, ptl); 578 up_read(&mm->mmap_sem); 579 return key; 580} 581EXPORT_SYMBOL(get_guest_storage_key); 582#endif |
|