1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21e133ab2SMartin Schwidefsky /* 31e133ab2SMartin Schwidefsky * Page table allocation functions 41e133ab2SMartin Schwidefsky * 51e133ab2SMartin Schwidefsky * Copyright IBM Corp. 2016 61e133ab2SMartin Schwidefsky * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 71e133ab2SMartin Schwidefsky */ 81e133ab2SMartin Schwidefsky 91e133ab2SMartin Schwidefsky #include <linux/sysctl.h> 101caf170dSHeiko Carstens #include <linux/slab.h> 111caf170dSHeiko Carstens #include <linux/mm.h> 121e133ab2SMartin Schwidefsky #include <asm/mmu_context.h> 131e133ab2SMartin Schwidefsky #include <asm/pgalloc.h> 141e133ab2SMartin Schwidefsky #include <asm/gmap.h> 151e133ab2SMartin Schwidefsky #include <asm/tlb.h> 161e133ab2SMartin Schwidefsky #include <asm/tlbflush.h> 171e133ab2SMartin Schwidefsky 181e133ab2SMartin Schwidefsky #ifdef CONFIG_PGSTE 191e133ab2SMartin Schwidefsky 201e133ab2SMartin Schwidefsky static int page_table_allocate_pgste_min = 0; 211e133ab2SMartin Schwidefsky static int page_table_allocate_pgste_max = 1; 221e133ab2SMartin Schwidefsky int page_table_allocate_pgste = 0; 231e133ab2SMartin Schwidefsky EXPORT_SYMBOL(page_table_allocate_pgste); 241e133ab2SMartin Schwidefsky 251e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl[] = { 261e133ab2SMartin Schwidefsky { 271e133ab2SMartin Schwidefsky .procname = "allocate_pgste", 281e133ab2SMartin Schwidefsky .data = &page_table_allocate_pgste, 291e133ab2SMartin Schwidefsky .maxlen = sizeof(int), 301e133ab2SMartin Schwidefsky .mode = S_IRUGO | S_IWUSR, 311e133ab2SMartin Schwidefsky .proc_handler = proc_dointvec, 321e133ab2SMartin Schwidefsky .extra1 = &page_table_allocate_pgste_min, 331e133ab2SMartin Schwidefsky .extra2 = &page_table_allocate_pgste_max, 341e133ab2SMartin Schwidefsky }, 351e133ab2SMartin Schwidefsky { } 361e133ab2SMartin Schwidefsky }; 371e133ab2SMartin Schwidefsky 381e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl_dir[] = { 391e133ab2SMartin Schwidefsky { 401e133ab2SMartin Schwidefsky .procname = "vm", 411e133ab2SMartin Schwidefsky .maxlen = 0, 421e133ab2SMartin Schwidefsky .mode = 0555, 431e133ab2SMartin Schwidefsky .child = page_table_sysctl, 441e133ab2SMartin Schwidefsky }, 451e133ab2SMartin Schwidefsky { } 461e133ab2SMartin Schwidefsky }; 471e133ab2SMartin Schwidefsky 481e133ab2SMartin Schwidefsky static int __init page_table_register_sysctl(void) 491e133ab2SMartin Schwidefsky { 501e133ab2SMartin Schwidefsky return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; 511e133ab2SMartin Schwidefsky } 521e133ab2SMartin Schwidefsky __initcall(page_table_register_sysctl); 531e133ab2SMartin Schwidefsky 541e133ab2SMartin Schwidefsky #endif /* CONFIG_PGSTE */ 551e133ab2SMartin Schwidefsky 561e133ab2SMartin Schwidefsky unsigned long *crst_table_alloc(struct mm_struct *mm) 571e133ab2SMartin Schwidefsky { 581e133ab2SMartin Schwidefsky struct page *page = alloc_pages(GFP_KERNEL, 2); 591e133ab2SMartin Schwidefsky 601e133ab2SMartin Schwidefsky if (!page) 611e133ab2SMartin Schwidefsky return NULL; 62c9b5ad54SMartin Schwidefsky arch_set_page_dat(page, 2); 631e133ab2SMartin Schwidefsky return (unsigned long *) page_to_phys(page); 641e133ab2SMartin Schwidefsky } 651e133ab2SMartin Schwidefsky 661e133ab2SMartin Schwidefsky void crst_table_free(struct mm_struct *mm, unsigned long *table) 671e133ab2SMartin Schwidefsky { 681e133ab2SMartin Schwidefsky free_pages((unsigned long) table, 2); 691e133ab2SMartin Schwidefsky } 701e133ab2SMartin Schwidefsky 711e133ab2SMartin Schwidefsky static void __crst_table_upgrade(void *arg) 721e133ab2SMartin Schwidefsky { 731e133ab2SMartin Schwidefsky struct mm_struct *mm = arg; 741e133ab2SMartin Schwidefsky 750aaba41bSMartin Schwidefsky if (current->active_mm == mm) 761e133ab2SMartin Schwidefsky set_user_asce(mm); 771e133ab2SMartin Schwidefsky __tlb_flush_local(); 781e133ab2SMartin Schwidefsky } 791e133ab2SMartin Schwidefsky 801aea9b3fSMartin Schwidefsky int crst_table_upgrade(struct mm_struct *mm, unsigned long end) 811e133ab2SMartin Schwidefsky { 821e133ab2SMartin Schwidefsky unsigned long *table, *pgd; 831aea9b3fSMartin Schwidefsky int rc, notify; 841e133ab2SMartin Schwidefsky 851aea9b3fSMartin Schwidefsky /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ 862fc4876eSMartin Schwidefsky VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); 871aea9b3fSMartin Schwidefsky rc = 0; 881aea9b3fSMartin Schwidefsky notify = 0; 891aea9b3fSMartin Schwidefsky while (mm->context.asce_limit < end) { 901aea9b3fSMartin Schwidefsky table = crst_table_alloc(mm); 911aea9b3fSMartin Schwidefsky if (!table) { 921aea9b3fSMartin Schwidefsky rc = -ENOMEM; 931aea9b3fSMartin Schwidefsky break; 941aea9b3fSMartin Schwidefsky } 951e133ab2SMartin Schwidefsky spin_lock_bh(&mm->page_table_lock); 961e133ab2SMartin Schwidefsky pgd = (unsigned long *) mm->pgd; 97f1c1174fSHeiko Carstens if (mm->context.asce_limit == _REGION2_SIZE) { 98723cacbdSGerald Schaefer crst_table_init(table, _REGION2_ENTRY_EMPTY); 991aea9b3fSMartin Schwidefsky p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); 1001e133ab2SMartin Schwidefsky mm->pgd = (pgd_t *) table; 101f1c1174fSHeiko Carstens mm->context.asce_limit = _REGION1_SIZE; 102723cacbdSGerald Schaefer mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 103723cacbdSGerald Schaefer _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 1041aea9b3fSMartin Schwidefsky } else { 1051aea9b3fSMartin Schwidefsky crst_table_init(table, _REGION1_ENTRY_EMPTY); 1061aea9b3fSMartin Schwidefsky pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); 1071aea9b3fSMartin Schwidefsky mm->pgd = (pgd_t *) table; 1081aea9b3fSMartin Schwidefsky mm->context.asce_limit = -PAGE_SIZE; 1091aea9b3fSMartin Schwidefsky mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 1101aea9b3fSMartin Schwidefsky _ASCE_USER_BITS | _ASCE_TYPE_REGION1; 1111aea9b3fSMartin Schwidefsky } 1121aea9b3fSMartin Schwidefsky notify = 1; 1131e133ab2SMartin Schwidefsky spin_unlock_bh(&mm->page_table_lock); 1141aea9b3fSMartin Schwidefsky } 1151aea9b3fSMartin Schwidefsky if (notify) 1161e133ab2SMartin Schwidefsky on_each_cpu(__crst_table_upgrade, mm, 0); 1171aea9b3fSMartin Schwidefsky return rc; 1181e133ab2SMartin Schwidefsky } 1191e133ab2SMartin Schwidefsky 120723cacbdSGerald Schaefer void crst_table_downgrade(struct mm_struct *mm) 1211e133ab2SMartin Schwidefsky { 1221e133ab2SMartin Schwidefsky pgd_t *pgd; 1231e133ab2SMartin Schwidefsky 124723cacbdSGerald Schaefer /* downgrade should only happen from 3 to 2 levels (compat only) */ 1252fc4876eSMartin Schwidefsky VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); 126723cacbdSGerald Schaefer 1271e133ab2SMartin Schwidefsky if (current->active_mm == mm) { 1281e133ab2SMartin Schwidefsky clear_user_asce(); 1291e133ab2SMartin Schwidefsky __tlb_flush_mm(mm); 1301e133ab2SMartin Schwidefsky } 131723cacbdSGerald Schaefer 1321e133ab2SMartin Schwidefsky pgd = mm->pgd; 1331e133ab2SMartin Schwidefsky mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 134f1c1174fSHeiko Carstens mm->context.asce_limit = _REGION3_SIZE; 135723cacbdSGerald Schaefer mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 136723cacbdSGerald Schaefer _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; 1371e133ab2SMartin Schwidefsky crst_table_free(mm, (unsigned long *) pgd); 138723cacbdSGerald Schaefer 1391e133ab2SMartin Schwidefsky if (current->active_mm == mm) 1401e133ab2SMartin Schwidefsky set_user_asce(mm); 1411e133ab2SMartin Schwidefsky } 1421e133ab2SMartin Schwidefsky 1431e133ab2SMartin Schwidefsky static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 1441e133ab2SMartin Schwidefsky { 1451e133ab2SMartin Schwidefsky unsigned int old, new; 1461e133ab2SMartin Schwidefsky 1471e133ab2SMartin Schwidefsky do { 1481e133ab2SMartin Schwidefsky old = atomic_read(v); 1491e133ab2SMartin Schwidefsky new = old ^ bits; 1501e133ab2SMartin Schwidefsky } while (atomic_cmpxchg(v, old, new) != old); 1511e133ab2SMartin Schwidefsky return new; 1521e133ab2SMartin Schwidefsky } 1531e133ab2SMartin Schwidefsky 1544be130a0SMartin Schwidefsky #ifdef CONFIG_PGSTE 1554be130a0SMartin Schwidefsky 1564be130a0SMartin Schwidefsky struct page *page_table_alloc_pgste(struct mm_struct *mm) 1574be130a0SMartin Schwidefsky { 1584be130a0SMartin Schwidefsky struct page *page; 15941879ff6SHeiko Carstens u64 *table; 1604be130a0SMartin Schwidefsky 161faee35a5SMichal Hocko page = alloc_page(GFP_KERNEL); 1624be130a0SMartin Schwidefsky if (page) { 16341879ff6SHeiko Carstens table = (u64 *)page_to_phys(page); 16441879ff6SHeiko Carstens memset64(table, _PAGE_INVALID, PTRS_PER_PTE); 16541879ff6SHeiko Carstens memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); 1664be130a0SMartin Schwidefsky } 1674be130a0SMartin Schwidefsky return page; 1684be130a0SMartin Schwidefsky } 1694be130a0SMartin Schwidefsky 1704be130a0SMartin Schwidefsky void page_table_free_pgste(struct page *page) 1714be130a0SMartin Schwidefsky { 1724be130a0SMartin Schwidefsky __free_page(page); 1734be130a0SMartin Schwidefsky } 1744be130a0SMartin Schwidefsky 1754be130a0SMartin Schwidefsky #endif /* CONFIG_PGSTE */ 1764be130a0SMartin Schwidefsky 1771e133ab2SMartin Schwidefsky /* 1781e133ab2SMartin Schwidefsky * page table entry allocation/free routines. 1791e133ab2SMartin Schwidefsky */ 1801e133ab2SMartin Schwidefsky unsigned long *page_table_alloc(struct mm_struct *mm) 1811e133ab2SMartin Schwidefsky { 1821e133ab2SMartin Schwidefsky unsigned long *table; 1831e133ab2SMartin Schwidefsky struct page *page; 1841e133ab2SMartin Schwidefsky unsigned int mask, bit; 1851e133ab2SMartin Schwidefsky 1861e133ab2SMartin Schwidefsky /* Try to get a fragment of a 4K page as a 2K page table */ 1871e133ab2SMartin Schwidefsky if (!mm_alloc_pgste(mm)) { 1881e133ab2SMartin Schwidefsky table = NULL; 189f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 1901e133ab2SMartin Schwidefsky if (!list_empty(&mm->context.pgtable_list)) { 1911e133ab2SMartin Schwidefsky page = list_first_entry(&mm->context.pgtable_list, 1921e133ab2SMartin Schwidefsky struct page, lru); 193*620b4e90SMatthew Wilcox mask = atomic_read(&page->_refcount) >> 24; 1941e133ab2SMartin Schwidefsky mask = (mask | (mask >> 4)) & 3; 1951e133ab2SMartin Schwidefsky if (mask != 3) { 1961e133ab2SMartin Schwidefsky table = (unsigned long *) page_to_phys(page); 1971e133ab2SMartin Schwidefsky bit = mask & 1; /* =1 -> second 2K */ 1981e133ab2SMartin Schwidefsky if (bit) 1991e133ab2SMartin Schwidefsky table += PTRS_PER_PTE; 200*620b4e90SMatthew Wilcox atomic_xor_bits(&page->_refcount, 201*620b4e90SMatthew Wilcox 1U << (bit + 24)); 2021e133ab2SMartin Schwidefsky list_del(&page->lru); 2031e133ab2SMartin Schwidefsky } 2041e133ab2SMartin Schwidefsky } 205f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2061e133ab2SMartin Schwidefsky if (table) 2071e133ab2SMartin Schwidefsky return table; 2081e133ab2SMartin Schwidefsky } 2091e133ab2SMartin Schwidefsky /* Allocate a fresh page */ 21010d58bf2SMichal Hocko page = alloc_page(GFP_KERNEL); 2111e133ab2SMartin Schwidefsky if (!page) 2121e133ab2SMartin Schwidefsky return NULL; 2131e133ab2SMartin Schwidefsky if (!pgtable_page_ctor(page)) { 2141e133ab2SMartin Schwidefsky __free_page(page); 2151e133ab2SMartin Schwidefsky return NULL; 2161e133ab2SMartin Schwidefsky } 217c9b5ad54SMartin Schwidefsky arch_set_page_dat(page, 0); 2181e133ab2SMartin Schwidefsky /* Initialize page table */ 2191e133ab2SMartin Schwidefsky table = (unsigned long *) page_to_phys(page); 2201e133ab2SMartin Schwidefsky if (mm_alloc_pgste(mm)) { 2211e133ab2SMartin Schwidefsky /* Return 4K page table with PGSTEs */ 222*620b4e90SMatthew Wilcox atomic_xor_bits(&page->_refcount, 3 << 24); 22341879ff6SHeiko Carstens memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); 22441879ff6SHeiko Carstens memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); 2251e133ab2SMartin Schwidefsky } else { 2261e133ab2SMartin Schwidefsky /* Return the first 2K fragment of the page */ 227*620b4e90SMatthew Wilcox atomic_xor_bits(&page->_refcount, 1 << 24); 22841879ff6SHeiko Carstens memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); 229f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 2301e133ab2SMartin Schwidefsky list_add(&page->lru, &mm->context.pgtable_list); 231f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2321e133ab2SMartin Schwidefsky } 2331e133ab2SMartin Schwidefsky return table; 2341e133ab2SMartin Schwidefsky } 2351e133ab2SMartin Schwidefsky 2361e133ab2SMartin Schwidefsky void page_table_free(struct mm_struct *mm, unsigned long *table) 2371e133ab2SMartin Schwidefsky { 2381e133ab2SMartin Schwidefsky struct page *page; 2391e133ab2SMartin Schwidefsky unsigned int bit, mask; 2401e133ab2SMartin Schwidefsky 2411e133ab2SMartin Schwidefsky page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 2421e133ab2SMartin Schwidefsky if (!mm_alloc_pgste(mm)) { 2431e133ab2SMartin Schwidefsky /* Free 2K page table fragment of a 4K page */ 2441e133ab2SMartin Schwidefsky bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); 245f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 246*620b4e90SMatthew Wilcox mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); 247*620b4e90SMatthew Wilcox mask >>= 24; 2481e133ab2SMartin Schwidefsky if (mask & 3) 2491e133ab2SMartin Schwidefsky list_add(&page->lru, &mm->context.pgtable_list); 2501e133ab2SMartin Schwidefsky else 2511e133ab2SMartin Schwidefsky list_del(&page->lru); 252f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2531e133ab2SMartin Schwidefsky if (mask != 0) 2541e133ab2SMartin Schwidefsky return; 2551e133ab2SMartin Schwidefsky } 2561e133ab2SMartin Schwidefsky 2571e133ab2SMartin Schwidefsky pgtable_page_dtor(page); 2581e133ab2SMartin Schwidefsky __free_page(page); 2591e133ab2SMartin Schwidefsky } 2601e133ab2SMartin Schwidefsky 2611e133ab2SMartin Schwidefsky void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, 2621e133ab2SMartin Schwidefsky unsigned long vmaddr) 2631e133ab2SMartin Schwidefsky { 2641e133ab2SMartin Schwidefsky struct mm_struct *mm; 2651e133ab2SMartin Schwidefsky struct page *page; 2661e133ab2SMartin Schwidefsky unsigned int bit, mask; 2671e133ab2SMartin Schwidefsky 2681e133ab2SMartin Schwidefsky mm = tlb->mm; 2691e133ab2SMartin Schwidefsky page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 2701e133ab2SMartin Schwidefsky if (mm_alloc_pgste(mm)) { 2711e133ab2SMartin Schwidefsky gmap_unlink(mm, table, vmaddr); 2721e133ab2SMartin Schwidefsky table = (unsigned long *) (__pa(table) | 3); 2731e133ab2SMartin Schwidefsky tlb_remove_table(tlb, table); 2741e133ab2SMartin Schwidefsky return; 2751e133ab2SMartin Schwidefsky } 2761e133ab2SMartin Schwidefsky bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); 277f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 278*620b4e90SMatthew Wilcox mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); 279*620b4e90SMatthew Wilcox mask >>= 24; 2801e133ab2SMartin Schwidefsky if (mask & 3) 2811e133ab2SMartin Schwidefsky list_add_tail(&page->lru, &mm->context.pgtable_list); 2821e133ab2SMartin Schwidefsky else 2831e133ab2SMartin Schwidefsky list_del(&page->lru); 284f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2851e133ab2SMartin Schwidefsky table = (unsigned long *) (__pa(table) | (1U << bit)); 2861e133ab2SMartin Schwidefsky tlb_remove_table(tlb, table); 2871e133ab2SMartin Schwidefsky } 2881e133ab2SMartin Schwidefsky 2891e133ab2SMartin Schwidefsky static void __tlb_remove_table(void *_table) 2901e133ab2SMartin Schwidefsky { 2911e133ab2SMartin Schwidefsky unsigned int mask = (unsigned long) _table & 3; 2921e133ab2SMartin Schwidefsky void *table = (void *)((unsigned long) _table ^ mask); 2931e133ab2SMartin Schwidefsky struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 2941e133ab2SMartin Schwidefsky 2951e133ab2SMartin Schwidefsky switch (mask) { 2961aea9b3fSMartin Schwidefsky case 0: /* pmd, pud, or p4d */ 2971e133ab2SMartin Schwidefsky free_pages((unsigned long) table, 2); 2981e133ab2SMartin Schwidefsky break; 2991e133ab2SMartin Schwidefsky case 1: /* lower 2K of a 4K page table */ 3001e133ab2SMartin Schwidefsky case 2: /* higher 2K of a 4K page table */ 301*620b4e90SMatthew Wilcox mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); 302*620b4e90SMatthew Wilcox mask >>= 24; 303*620b4e90SMatthew Wilcox if (mask != 0) 3041e133ab2SMartin Schwidefsky break; 3051e133ab2SMartin Schwidefsky /* fallthrough */ 3061e133ab2SMartin Schwidefsky case 3: /* 4K page table with pgstes */ 3071e133ab2SMartin Schwidefsky pgtable_page_dtor(page); 3081e133ab2SMartin Schwidefsky __free_page(page); 3091e133ab2SMartin Schwidefsky break; 3101e133ab2SMartin Schwidefsky } 3111e133ab2SMartin Schwidefsky } 3121e133ab2SMartin Schwidefsky 3131e133ab2SMartin Schwidefsky static void tlb_remove_table_smp_sync(void *arg) 3141e133ab2SMartin Schwidefsky { 3151e133ab2SMartin Schwidefsky /* Simply deliver the interrupt */ 3161e133ab2SMartin Schwidefsky } 3171e133ab2SMartin Schwidefsky 3181e133ab2SMartin Schwidefsky static void tlb_remove_table_one(void *table) 3191e133ab2SMartin Schwidefsky { 3201e133ab2SMartin Schwidefsky /* 3211e133ab2SMartin Schwidefsky * This isn't an RCU grace period and hence the page-tables cannot be 3221e133ab2SMartin Schwidefsky * assumed to be actually RCU-freed. 3231e133ab2SMartin Schwidefsky * 3241e133ab2SMartin Schwidefsky * It is however sufficient for software page-table walkers that rely 3251e133ab2SMartin Schwidefsky * on IRQ disabling. See the comment near struct mmu_table_batch. 3261e133ab2SMartin Schwidefsky */ 3271e133ab2SMartin Schwidefsky smp_call_function(tlb_remove_table_smp_sync, NULL, 1); 3281e133ab2SMartin Schwidefsky __tlb_remove_table(table); 3291e133ab2SMartin Schwidefsky } 3301e133ab2SMartin Schwidefsky 3311e133ab2SMartin Schwidefsky static void tlb_remove_table_rcu(struct rcu_head *head) 3321e133ab2SMartin Schwidefsky { 3331e133ab2SMartin Schwidefsky struct mmu_table_batch *batch; 3341e133ab2SMartin Schwidefsky int i; 3351e133ab2SMartin Schwidefsky 3361e133ab2SMartin Schwidefsky batch = container_of(head, struct mmu_table_batch, rcu); 3371e133ab2SMartin Schwidefsky 3381e133ab2SMartin Schwidefsky for (i = 0; i < batch->nr; i++) 3391e133ab2SMartin Schwidefsky __tlb_remove_table(batch->tables[i]); 3401e133ab2SMartin Schwidefsky 3411e133ab2SMartin Schwidefsky free_page((unsigned long)batch); 3421e133ab2SMartin Schwidefsky } 3431e133ab2SMartin Schwidefsky 3441e133ab2SMartin Schwidefsky void tlb_table_flush(struct mmu_gather *tlb) 3451e133ab2SMartin Schwidefsky { 3461e133ab2SMartin Schwidefsky struct mmu_table_batch **batch = &tlb->batch; 3471e133ab2SMartin Schwidefsky 3481e133ab2SMartin Schwidefsky if (*batch) { 3491e133ab2SMartin Schwidefsky call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 3501e133ab2SMartin Schwidefsky *batch = NULL; 3511e133ab2SMartin Schwidefsky } 3521e133ab2SMartin Schwidefsky } 3531e133ab2SMartin Schwidefsky 3541e133ab2SMartin Schwidefsky void tlb_remove_table(struct mmu_gather *tlb, void *table) 3551e133ab2SMartin Schwidefsky { 3561e133ab2SMartin Schwidefsky struct mmu_table_batch **batch = &tlb->batch; 3571e133ab2SMartin Schwidefsky 3581e133ab2SMartin Schwidefsky tlb->mm->context.flush_mm = 1; 3591e133ab2SMartin Schwidefsky if (*batch == NULL) { 3601e133ab2SMartin Schwidefsky *batch = (struct mmu_table_batch *) 3611e133ab2SMartin Schwidefsky __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 3621e133ab2SMartin Schwidefsky if (*batch == NULL) { 3631e133ab2SMartin Schwidefsky __tlb_flush_mm_lazy(tlb->mm); 3641e133ab2SMartin Schwidefsky tlb_remove_table_one(table); 3651e133ab2SMartin Schwidefsky return; 3661e133ab2SMartin Schwidefsky } 3671e133ab2SMartin Schwidefsky (*batch)->nr = 0; 3681e133ab2SMartin Schwidefsky } 3691e133ab2SMartin Schwidefsky (*batch)->tables[(*batch)->nr++] = table; 3701e133ab2SMartin Schwidefsky if ((*batch)->nr == MAX_TABLE_BATCH) 3711e133ab2SMartin Schwidefsky tlb_flush_mmu(tlb); 3721e133ab2SMartin Schwidefsky } 3731caf170dSHeiko Carstens 3741caf170dSHeiko Carstens /* 3751caf170dSHeiko Carstens * Base infrastructure required to generate basic asces, region, segment, 3761caf170dSHeiko Carstens * and page tables that do not make use of enhanced features like EDAT1. 3771caf170dSHeiko Carstens */ 3781caf170dSHeiko Carstens 3791caf170dSHeiko Carstens static struct kmem_cache *base_pgt_cache; 3801caf170dSHeiko Carstens 3811caf170dSHeiko Carstens static unsigned long base_pgt_alloc(void) 3821caf170dSHeiko Carstens { 3831caf170dSHeiko Carstens u64 *table; 3841caf170dSHeiko Carstens 3851caf170dSHeiko Carstens table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL); 3861caf170dSHeiko Carstens if (table) 3871caf170dSHeiko Carstens memset64(table, _PAGE_INVALID, PTRS_PER_PTE); 3881caf170dSHeiko Carstens return (unsigned long) table; 3891caf170dSHeiko Carstens } 3901caf170dSHeiko Carstens 3911caf170dSHeiko Carstens static void base_pgt_free(unsigned long table) 3921caf170dSHeiko Carstens { 3931caf170dSHeiko Carstens kmem_cache_free(base_pgt_cache, (void *) table); 3941caf170dSHeiko Carstens } 3951caf170dSHeiko Carstens 3961caf170dSHeiko Carstens static unsigned long base_crst_alloc(unsigned long val) 3971caf170dSHeiko Carstens { 3981caf170dSHeiko Carstens unsigned long table; 3991caf170dSHeiko Carstens 4001caf170dSHeiko Carstens table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 4011caf170dSHeiko Carstens if (table) 4021caf170dSHeiko Carstens crst_table_init((unsigned long *)table, val); 4031caf170dSHeiko Carstens return table; 4041caf170dSHeiko Carstens } 4051caf170dSHeiko Carstens 4061caf170dSHeiko Carstens static void base_crst_free(unsigned long table) 4071caf170dSHeiko Carstens { 4081caf170dSHeiko Carstens free_pages(table, CRST_ALLOC_ORDER); 4091caf170dSHeiko Carstens } 4101caf170dSHeiko Carstens 4111caf170dSHeiko Carstens #define BASE_ADDR_END_FUNC(NAME, SIZE) \ 4121caf170dSHeiko Carstens static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \ 4131caf170dSHeiko Carstens unsigned long end) \ 4141caf170dSHeiko Carstens { \ 4151caf170dSHeiko Carstens unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \ 4161caf170dSHeiko Carstens \ 4171caf170dSHeiko Carstens return (next - 1) < (end - 1) ? next : end; \ 4181caf170dSHeiko Carstens } 4191caf170dSHeiko Carstens 4201caf170dSHeiko Carstens BASE_ADDR_END_FUNC(page, _PAGE_SIZE) 4211caf170dSHeiko Carstens BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE) 4221caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region3, _REGION3_SIZE) 4231caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region2, _REGION2_SIZE) 4241caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region1, _REGION1_SIZE) 4251caf170dSHeiko Carstens 4261caf170dSHeiko Carstens static inline unsigned long base_lra(unsigned long address) 4271caf170dSHeiko Carstens { 4281caf170dSHeiko Carstens unsigned long real; 4291caf170dSHeiko Carstens 4301caf170dSHeiko Carstens asm volatile( 4311caf170dSHeiko Carstens " lra %0,0(%1)\n" 4321caf170dSHeiko Carstens : "=d" (real) : "a" (address) : "cc"); 4331caf170dSHeiko Carstens return real; 4341caf170dSHeiko Carstens } 4351caf170dSHeiko Carstens 4361caf170dSHeiko Carstens static int base_page_walk(unsigned long origin, unsigned long addr, 4371caf170dSHeiko Carstens unsigned long end, int alloc) 4381caf170dSHeiko Carstens { 4391caf170dSHeiko Carstens unsigned long *pte, next; 4401caf170dSHeiko Carstens 4411caf170dSHeiko Carstens if (!alloc) 4421caf170dSHeiko Carstens return 0; 4431caf170dSHeiko Carstens pte = (unsigned long *) origin; 4441caf170dSHeiko Carstens pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT; 4451caf170dSHeiko Carstens do { 4461caf170dSHeiko Carstens next = base_page_addr_end(addr, end); 4471caf170dSHeiko Carstens *pte = base_lra(addr); 4481caf170dSHeiko Carstens } while (pte++, addr = next, addr < end); 4491caf170dSHeiko Carstens return 0; 4501caf170dSHeiko Carstens } 4511caf170dSHeiko Carstens 4521caf170dSHeiko Carstens static int base_segment_walk(unsigned long origin, unsigned long addr, 4531caf170dSHeiko Carstens unsigned long end, int alloc) 4541caf170dSHeiko Carstens { 4551caf170dSHeiko Carstens unsigned long *ste, next, table; 4561caf170dSHeiko Carstens int rc; 4571caf170dSHeiko Carstens 4581caf170dSHeiko Carstens ste = (unsigned long *) origin; 4591caf170dSHeiko Carstens ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 4601caf170dSHeiko Carstens do { 4611caf170dSHeiko Carstens next = base_segment_addr_end(addr, end); 4621caf170dSHeiko Carstens if (*ste & _SEGMENT_ENTRY_INVALID) { 4631caf170dSHeiko Carstens if (!alloc) 4641caf170dSHeiko Carstens continue; 4651caf170dSHeiko Carstens table = base_pgt_alloc(); 4661caf170dSHeiko Carstens if (!table) 4671caf170dSHeiko Carstens return -ENOMEM; 4681caf170dSHeiko Carstens *ste = table | _SEGMENT_ENTRY; 4691caf170dSHeiko Carstens } 4701caf170dSHeiko Carstens table = *ste & _SEGMENT_ENTRY_ORIGIN; 4711caf170dSHeiko Carstens rc = base_page_walk(table, addr, next, alloc); 4721caf170dSHeiko Carstens if (rc) 4731caf170dSHeiko Carstens return rc; 4741caf170dSHeiko Carstens if (!alloc) 4751caf170dSHeiko Carstens base_pgt_free(table); 4761caf170dSHeiko Carstens cond_resched(); 4771caf170dSHeiko Carstens } while (ste++, addr = next, addr < end); 4781caf170dSHeiko Carstens return 0; 4791caf170dSHeiko Carstens } 4801caf170dSHeiko Carstens 4811caf170dSHeiko Carstens static int base_region3_walk(unsigned long origin, unsigned long addr, 4821caf170dSHeiko Carstens unsigned long end, int alloc) 4831caf170dSHeiko Carstens { 4841caf170dSHeiko Carstens unsigned long *rtte, next, table; 4851caf170dSHeiko Carstens int rc; 4861caf170dSHeiko Carstens 4871caf170dSHeiko Carstens rtte = (unsigned long *) origin; 4881caf170dSHeiko Carstens rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT; 4891caf170dSHeiko Carstens do { 4901caf170dSHeiko Carstens next = base_region3_addr_end(addr, end); 4911caf170dSHeiko Carstens if (*rtte & _REGION_ENTRY_INVALID) { 4921caf170dSHeiko Carstens if (!alloc) 4931caf170dSHeiko Carstens continue; 4941caf170dSHeiko Carstens table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY); 4951caf170dSHeiko Carstens if (!table) 4961caf170dSHeiko Carstens return -ENOMEM; 4971caf170dSHeiko Carstens *rtte = table | _REGION3_ENTRY; 4981caf170dSHeiko Carstens } 4991caf170dSHeiko Carstens table = *rtte & _REGION_ENTRY_ORIGIN; 5001caf170dSHeiko Carstens rc = base_segment_walk(table, addr, next, alloc); 5011caf170dSHeiko Carstens if (rc) 5021caf170dSHeiko Carstens return rc; 5031caf170dSHeiko Carstens if (!alloc) 5041caf170dSHeiko Carstens base_crst_free(table); 5051caf170dSHeiko Carstens } while (rtte++, addr = next, addr < end); 5061caf170dSHeiko Carstens return 0; 5071caf170dSHeiko Carstens } 5081caf170dSHeiko Carstens 5091caf170dSHeiko Carstens static int base_region2_walk(unsigned long origin, unsigned long addr, 5101caf170dSHeiko Carstens unsigned long end, int alloc) 5111caf170dSHeiko Carstens { 5121caf170dSHeiko Carstens unsigned long *rste, next, table; 5131caf170dSHeiko Carstens int rc; 5141caf170dSHeiko Carstens 5151caf170dSHeiko Carstens rste = (unsigned long *) origin; 5161caf170dSHeiko Carstens rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT; 5171caf170dSHeiko Carstens do { 5181caf170dSHeiko Carstens next = base_region2_addr_end(addr, end); 5191caf170dSHeiko Carstens if (*rste & _REGION_ENTRY_INVALID) { 5201caf170dSHeiko Carstens if (!alloc) 5211caf170dSHeiko Carstens continue; 5221caf170dSHeiko Carstens table = base_crst_alloc(_REGION3_ENTRY_EMPTY); 5231caf170dSHeiko Carstens if (!table) 5241caf170dSHeiko Carstens return -ENOMEM; 5251caf170dSHeiko Carstens *rste = table | _REGION2_ENTRY; 5261caf170dSHeiko Carstens } 5271caf170dSHeiko Carstens table = *rste & _REGION_ENTRY_ORIGIN; 5281caf170dSHeiko Carstens rc = base_region3_walk(table, addr, next, alloc); 5291caf170dSHeiko Carstens if (rc) 5301caf170dSHeiko Carstens return rc; 5311caf170dSHeiko Carstens if (!alloc) 5321caf170dSHeiko Carstens base_crst_free(table); 5331caf170dSHeiko Carstens } while (rste++, addr = next, addr < end); 5341caf170dSHeiko Carstens return 0; 5351caf170dSHeiko Carstens } 5361caf170dSHeiko Carstens 5371caf170dSHeiko Carstens static int base_region1_walk(unsigned long origin, unsigned long addr, 5381caf170dSHeiko Carstens unsigned long end, int alloc) 5391caf170dSHeiko Carstens { 5401caf170dSHeiko Carstens unsigned long *rfte, next, table; 5411caf170dSHeiko Carstens int rc; 5421caf170dSHeiko Carstens 5431caf170dSHeiko Carstens rfte = (unsigned long *) origin; 5441caf170dSHeiko Carstens rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT; 5451caf170dSHeiko Carstens do { 5461caf170dSHeiko Carstens next = base_region1_addr_end(addr, end); 5471caf170dSHeiko Carstens if (*rfte & _REGION_ENTRY_INVALID) { 5481caf170dSHeiko Carstens if (!alloc) 5491caf170dSHeiko Carstens continue; 5501caf170dSHeiko Carstens table = base_crst_alloc(_REGION2_ENTRY_EMPTY); 5511caf170dSHeiko Carstens if (!table) 5521caf170dSHeiko Carstens return -ENOMEM; 5531caf170dSHeiko Carstens *rfte = table | _REGION1_ENTRY; 5541caf170dSHeiko Carstens } 5551caf170dSHeiko Carstens table = *rfte & _REGION_ENTRY_ORIGIN; 5561caf170dSHeiko Carstens rc = base_region2_walk(table, addr, next, alloc); 5571caf170dSHeiko Carstens if (rc) 5581caf170dSHeiko Carstens return rc; 5591caf170dSHeiko Carstens if (!alloc) 5601caf170dSHeiko Carstens base_crst_free(table); 5611caf170dSHeiko Carstens } while (rfte++, addr = next, addr < end); 5621caf170dSHeiko Carstens return 0; 5631caf170dSHeiko Carstens } 5641caf170dSHeiko Carstens 5651caf170dSHeiko Carstens /** 5661caf170dSHeiko Carstens * base_asce_free - free asce and tables returned from base_asce_alloc() 5671caf170dSHeiko Carstens * @asce: asce to be freed 5681caf170dSHeiko Carstens * 5691caf170dSHeiko Carstens * Frees all region, segment, and page tables that were allocated with a 5701caf170dSHeiko Carstens * corresponding base_asce_alloc() call. 5711caf170dSHeiko Carstens */ 5721caf170dSHeiko Carstens void base_asce_free(unsigned long asce) 5731caf170dSHeiko Carstens { 5741caf170dSHeiko Carstens unsigned long table = asce & _ASCE_ORIGIN; 5751caf170dSHeiko Carstens 5761caf170dSHeiko Carstens if (!asce) 5771caf170dSHeiko Carstens return; 5781caf170dSHeiko Carstens switch (asce & _ASCE_TYPE_MASK) { 5791caf170dSHeiko Carstens case _ASCE_TYPE_SEGMENT: 5801caf170dSHeiko Carstens base_segment_walk(table, 0, _REGION3_SIZE, 0); 5811caf170dSHeiko Carstens break; 5821caf170dSHeiko Carstens case _ASCE_TYPE_REGION3: 5831caf170dSHeiko Carstens base_region3_walk(table, 0, _REGION2_SIZE, 0); 5841caf170dSHeiko Carstens break; 5851caf170dSHeiko Carstens case _ASCE_TYPE_REGION2: 5861caf170dSHeiko Carstens base_region2_walk(table, 0, _REGION1_SIZE, 0); 5871caf170dSHeiko Carstens break; 5881caf170dSHeiko Carstens case _ASCE_TYPE_REGION1: 5891caf170dSHeiko Carstens base_region1_walk(table, 0, -_PAGE_SIZE, 0); 5901caf170dSHeiko Carstens break; 5911caf170dSHeiko Carstens } 5921caf170dSHeiko Carstens base_crst_free(table); 5931caf170dSHeiko Carstens } 5941caf170dSHeiko Carstens 5951caf170dSHeiko Carstens static int base_pgt_cache_init(void) 5961caf170dSHeiko Carstens { 5971caf170dSHeiko Carstens static DEFINE_MUTEX(base_pgt_cache_mutex); 5981caf170dSHeiko Carstens unsigned long sz = _PAGE_TABLE_SIZE; 5991caf170dSHeiko Carstens 6001caf170dSHeiko Carstens if (base_pgt_cache) 6011caf170dSHeiko Carstens return 0; 6021caf170dSHeiko Carstens mutex_lock(&base_pgt_cache_mutex); 6031caf170dSHeiko Carstens if (!base_pgt_cache) 6041caf170dSHeiko Carstens base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL); 6051caf170dSHeiko Carstens mutex_unlock(&base_pgt_cache_mutex); 6061caf170dSHeiko Carstens return base_pgt_cache ? 0 : -ENOMEM; 6071caf170dSHeiko Carstens } 6081caf170dSHeiko Carstens 6091caf170dSHeiko Carstens /** 6101caf170dSHeiko Carstens * base_asce_alloc - create kernel mapping without enhanced DAT features 6111caf170dSHeiko Carstens * @addr: virtual start address of kernel mapping 6121caf170dSHeiko Carstens * @num_pages: number of consecutive pages 6131caf170dSHeiko Carstens * 6141caf170dSHeiko Carstens * Generate an asce, including all required region, segment and page tables, 6151caf170dSHeiko Carstens * that can be used to access the virtual kernel mapping. The difference is 6161caf170dSHeiko Carstens * that the returned asce does not make use of any enhanced DAT features like 6171caf170dSHeiko Carstens * e.g. large pages. This is required for some I/O functions that pass an 6181caf170dSHeiko Carstens * asce, like e.g. some service call requests. 6191caf170dSHeiko Carstens * 6201caf170dSHeiko Carstens * Note: the returned asce may NEVER be attached to any cpu. It may only be 6211caf170dSHeiko Carstens * used for I/O requests. tlb entries that might result because the 6221caf170dSHeiko Carstens * asce was attached to a cpu won't be cleared. 6231caf170dSHeiko Carstens */ 6241caf170dSHeiko Carstens unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages) 6251caf170dSHeiko Carstens { 6261caf170dSHeiko Carstens unsigned long asce, table, end; 6271caf170dSHeiko Carstens int rc; 6281caf170dSHeiko Carstens 6291caf170dSHeiko Carstens if (base_pgt_cache_init()) 6301caf170dSHeiko Carstens return 0; 6311caf170dSHeiko Carstens end = addr + num_pages * PAGE_SIZE; 6321caf170dSHeiko Carstens if (end <= _REGION3_SIZE) { 6331caf170dSHeiko Carstens table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY); 6341caf170dSHeiko Carstens if (!table) 6351caf170dSHeiko Carstens return 0; 6361caf170dSHeiko Carstens rc = base_segment_walk(table, addr, end, 1); 6371caf170dSHeiko Carstens asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH; 6381caf170dSHeiko Carstens } else if (end <= _REGION2_SIZE) { 6391caf170dSHeiko Carstens table = base_crst_alloc(_REGION3_ENTRY_EMPTY); 6401caf170dSHeiko Carstens if (!table) 6411caf170dSHeiko Carstens return 0; 6421caf170dSHeiko Carstens rc = base_region3_walk(table, addr, end, 1); 6431caf170dSHeiko Carstens asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 6441caf170dSHeiko Carstens } else if (end <= _REGION1_SIZE) { 6451caf170dSHeiko Carstens table = base_crst_alloc(_REGION2_ENTRY_EMPTY); 6461caf170dSHeiko Carstens if (!table) 6471caf170dSHeiko Carstens return 0; 6481caf170dSHeiko Carstens rc = base_region2_walk(table, addr, end, 1); 6491caf170dSHeiko Carstens asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 6501caf170dSHeiko Carstens } else { 6511caf170dSHeiko Carstens table = base_crst_alloc(_REGION1_ENTRY_EMPTY); 6521caf170dSHeiko Carstens if (!table) 6531caf170dSHeiko Carstens return 0; 6541caf170dSHeiko Carstens rc = base_region1_walk(table, addr, end, 1); 6551caf170dSHeiko Carstens asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH; 6561caf170dSHeiko Carstens } 6571caf170dSHeiko Carstens if (rc) { 6581caf170dSHeiko Carstens base_asce_free(asce); 6591caf170dSHeiko Carstens asce = 0; 6601caf170dSHeiko Carstens } 6611caf170dSHeiko Carstens return asce; 6621caf170dSHeiko Carstens } 663