1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21e133ab2SMartin Schwidefsky /* 31e133ab2SMartin Schwidefsky * Page table allocation functions 41e133ab2SMartin Schwidefsky * 51e133ab2SMartin Schwidefsky * Copyright IBM Corp. 2016 61e133ab2SMartin Schwidefsky * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 71e133ab2SMartin Schwidefsky */ 81e133ab2SMartin Schwidefsky 91e133ab2SMartin Schwidefsky #include <linux/sysctl.h> 101caf170dSHeiko Carstens #include <linux/slab.h> 111caf170dSHeiko Carstens #include <linux/mm.h> 121e133ab2SMartin Schwidefsky #include <asm/mmu_context.h> 131e133ab2SMartin Schwidefsky #include <asm/pgalloc.h> 141e133ab2SMartin Schwidefsky #include <asm/gmap.h> 151e133ab2SMartin Schwidefsky #include <asm/tlb.h> 161e133ab2SMartin Schwidefsky #include <asm/tlbflush.h> 171e133ab2SMartin Schwidefsky 181e133ab2SMartin Schwidefsky #ifdef CONFIG_PGSTE 191e133ab2SMartin Schwidefsky 201e133ab2SMartin Schwidefsky int page_table_allocate_pgste = 0; 211e133ab2SMartin Schwidefsky EXPORT_SYMBOL(page_table_allocate_pgste); 221e133ab2SMartin Schwidefsky 231e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl[] = { 241e133ab2SMartin Schwidefsky { 251e133ab2SMartin Schwidefsky .procname = "allocate_pgste", 261e133ab2SMartin Schwidefsky .data = &page_table_allocate_pgste, 271e133ab2SMartin Schwidefsky .maxlen = sizeof(int), 281e133ab2SMartin Schwidefsky .mode = S_IRUGO | S_IWUSR, 295bedf8aaSVasily Gorbik .proc_handler = proc_dointvec_minmax, 30*ac7a0fceSVasily Gorbik .extra1 = SYSCTL_ZERO, 31*ac7a0fceSVasily Gorbik .extra2 = SYSCTL_ONE, 321e133ab2SMartin Schwidefsky }, 331e133ab2SMartin Schwidefsky { } 341e133ab2SMartin Schwidefsky }; 351e133ab2SMartin Schwidefsky 361e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl_dir[] = { 371e133ab2SMartin Schwidefsky { 381e133ab2SMartin Schwidefsky .procname = "vm", 391e133ab2SMartin Schwidefsky .maxlen = 0, 401e133ab2SMartin Schwidefsky .mode = 0555, 411e133ab2SMartin Schwidefsky .child = page_table_sysctl, 421e133ab2SMartin Schwidefsky }, 431e133ab2SMartin Schwidefsky { } 441e133ab2SMartin Schwidefsky }; 451e133ab2SMartin Schwidefsky 461e133ab2SMartin Schwidefsky static int __init page_table_register_sysctl(void) 471e133ab2SMartin Schwidefsky { 481e133ab2SMartin Schwidefsky return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; 491e133ab2SMartin Schwidefsky } 501e133ab2SMartin Schwidefsky __initcall(page_table_register_sysctl); 511e133ab2SMartin Schwidefsky 521e133ab2SMartin Schwidefsky #endif /* CONFIG_PGSTE */ 531e133ab2SMartin Schwidefsky 541e133ab2SMartin Schwidefsky unsigned long *crst_table_alloc(struct mm_struct *mm) 551e133ab2SMartin Schwidefsky { 561e133ab2SMartin Schwidefsky struct page *page = alloc_pages(GFP_KERNEL, 2); 571e133ab2SMartin Schwidefsky 581e133ab2SMartin Schwidefsky if (!page) 591e133ab2SMartin Schwidefsky return NULL; 60c9b5ad54SMartin Schwidefsky arch_set_page_dat(page, 2); 611e133ab2SMartin Schwidefsky return (unsigned long *) page_to_phys(page); 621e133ab2SMartin Schwidefsky } 631e133ab2SMartin Schwidefsky 641e133ab2SMartin Schwidefsky void crst_table_free(struct mm_struct *mm, unsigned long *table) 651e133ab2SMartin Schwidefsky { 661e133ab2SMartin Schwidefsky free_pages((unsigned long) table, 2); 671e133ab2SMartin Schwidefsky } 681e133ab2SMartin Schwidefsky 691e133ab2SMartin Schwidefsky static void __crst_table_upgrade(void *arg) 701e133ab2SMartin Schwidefsky { 711e133ab2SMartin Schwidefsky struct mm_struct *mm = arg; 721e133ab2SMartin Schwidefsky 730aaba41bSMartin Schwidefsky if (current->active_mm == mm) 741e133ab2SMartin Schwidefsky set_user_asce(mm); 751e133ab2SMartin Schwidefsky __tlb_flush_local(); 761e133ab2SMartin Schwidefsky } 771e133ab2SMartin Schwidefsky 781aea9b3fSMartin Schwidefsky int crst_table_upgrade(struct mm_struct *mm, unsigned long end) 791e133ab2SMartin Schwidefsky { 801e133ab2SMartin Schwidefsky unsigned long *table, *pgd; 811aea9b3fSMartin Schwidefsky int rc, notify; 821e133ab2SMartin Schwidefsky 831aea9b3fSMartin Schwidefsky /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ 842fc4876eSMartin Schwidefsky VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); 851aea9b3fSMartin Schwidefsky rc = 0; 861aea9b3fSMartin Schwidefsky notify = 0; 871aea9b3fSMartin Schwidefsky while (mm->context.asce_limit < end) { 881aea9b3fSMartin Schwidefsky table = crst_table_alloc(mm); 891aea9b3fSMartin Schwidefsky if (!table) { 901aea9b3fSMartin Schwidefsky rc = -ENOMEM; 911aea9b3fSMartin Schwidefsky break; 921aea9b3fSMartin Schwidefsky } 931e133ab2SMartin Schwidefsky spin_lock_bh(&mm->page_table_lock); 941e133ab2SMartin Schwidefsky pgd = (unsigned long *) mm->pgd; 95f1c1174fSHeiko Carstens if (mm->context.asce_limit == _REGION2_SIZE) { 96723cacbdSGerald Schaefer crst_table_init(table, _REGION2_ENTRY_EMPTY); 971aea9b3fSMartin Schwidefsky p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd); 981e133ab2SMartin Schwidefsky mm->pgd = (pgd_t *) table; 99f1c1174fSHeiko Carstens mm->context.asce_limit = _REGION1_SIZE; 100723cacbdSGerald Schaefer mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 101723cacbdSGerald Schaefer _ASCE_USER_BITS | _ASCE_TYPE_REGION2; 102e12e4044SMartin Schwidefsky mm_inc_nr_puds(mm); 1031aea9b3fSMartin Schwidefsky } else { 1041aea9b3fSMartin Schwidefsky crst_table_init(table, _REGION1_ENTRY_EMPTY); 1051aea9b3fSMartin Schwidefsky pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd); 1061aea9b3fSMartin Schwidefsky mm->pgd = (pgd_t *) table; 1071aea9b3fSMartin Schwidefsky mm->context.asce_limit = -PAGE_SIZE; 1081aea9b3fSMartin Schwidefsky mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 1091aea9b3fSMartin Schwidefsky _ASCE_USER_BITS | _ASCE_TYPE_REGION1; 1101aea9b3fSMartin Schwidefsky } 1111aea9b3fSMartin Schwidefsky notify = 1; 1121e133ab2SMartin Schwidefsky spin_unlock_bh(&mm->page_table_lock); 1131aea9b3fSMartin Schwidefsky } 1141aea9b3fSMartin Schwidefsky if (notify) 1151e133ab2SMartin Schwidefsky on_each_cpu(__crst_table_upgrade, mm, 0); 1161aea9b3fSMartin Schwidefsky return rc; 1171e133ab2SMartin Schwidefsky } 1181e133ab2SMartin Schwidefsky 119723cacbdSGerald Schaefer void crst_table_downgrade(struct mm_struct *mm) 1201e133ab2SMartin Schwidefsky { 1211e133ab2SMartin Schwidefsky pgd_t *pgd; 1221e133ab2SMartin Schwidefsky 123723cacbdSGerald Schaefer /* downgrade should only happen from 3 to 2 levels (compat only) */ 1242fc4876eSMartin Schwidefsky VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); 125723cacbdSGerald Schaefer 1261e133ab2SMartin Schwidefsky if (current->active_mm == mm) { 1271e133ab2SMartin Schwidefsky clear_user_asce(); 1281e133ab2SMartin Schwidefsky __tlb_flush_mm(mm); 1291e133ab2SMartin Schwidefsky } 130723cacbdSGerald Schaefer 1311e133ab2SMartin Schwidefsky pgd = mm->pgd; 132814cedbcSMartin Schwidefsky mm_dec_nr_pmds(mm); 1331e133ab2SMartin Schwidefsky mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 134f1c1174fSHeiko Carstens mm->context.asce_limit = _REGION3_SIZE; 135723cacbdSGerald Schaefer mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | 136723cacbdSGerald Schaefer _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; 1371e133ab2SMartin Schwidefsky crst_table_free(mm, (unsigned long *) pgd); 138723cacbdSGerald Schaefer 1391e133ab2SMartin Schwidefsky if (current->active_mm == mm) 1401e133ab2SMartin Schwidefsky set_user_asce(mm); 1411e133ab2SMartin Schwidefsky } 1421e133ab2SMartin Schwidefsky 1431e133ab2SMartin Schwidefsky static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) 1441e133ab2SMartin Schwidefsky { 1451e133ab2SMartin Schwidefsky unsigned int old, new; 1461e133ab2SMartin Schwidefsky 1471e133ab2SMartin Schwidefsky do { 1481e133ab2SMartin Schwidefsky old = atomic_read(v); 1491e133ab2SMartin Schwidefsky new = old ^ bits; 1501e133ab2SMartin Schwidefsky } while (atomic_cmpxchg(v, old, new) != old); 1511e133ab2SMartin Schwidefsky return new; 1521e133ab2SMartin Schwidefsky } 1531e133ab2SMartin Schwidefsky 1544be130a0SMartin Schwidefsky #ifdef CONFIG_PGSTE 1554be130a0SMartin Schwidefsky 1564be130a0SMartin Schwidefsky struct page *page_table_alloc_pgste(struct mm_struct *mm) 1574be130a0SMartin Schwidefsky { 1584be130a0SMartin Schwidefsky struct page *page; 15941879ff6SHeiko Carstens u64 *table; 1604be130a0SMartin Schwidefsky 161faee35a5SMichal Hocko page = alloc_page(GFP_KERNEL); 1624be130a0SMartin Schwidefsky if (page) { 16341879ff6SHeiko Carstens table = (u64 *)page_to_phys(page); 16441879ff6SHeiko Carstens memset64(table, _PAGE_INVALID, PTRS_PER_PTE); 16541879ff6SHeiko Carstens memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE); 1664be130a0SMartin Schwidefsky } 1674be130a0SMartin Schwidefsky return page; 1684be130a0SMartin Schwidefsky } 1694be130a0SMartin Schwidefsky 1704be130a0SMartin Schwidefsky void page_table_free_pgste(struct page *page) 1714be130a0SMartin Schwidefsky { 1724be130a0SMartin Schwidefsky __free_page(page); 1734be130a0SMartin Schwidefsky } 1744be130a0SMartin Schwidefsky 1754be130a0SMartin Schwidefsky #endif /* CONFIG_PGSTE */ 1764be130a0SMartin Schwidefsky 1771e133ab2SMartin Schwidefsky /* 1781e133ab2SMartin Schwidefsky * page table entry allocation/free routines. 1791e133ab2SMartin Schwidefsky */ 1801e133ab2SMartin Schwidefsky unsigned long *page_table_alloc(struct mm_struct *mm) 1811e133ab2SMartin Schwidefsky { 1821e133ab2SMartin Schwidefsky unsigned long *table; 1831e133ab2SMartin Schwidefsky struct page *page; 1841e133ab2SMartin Schwidefsky unsigned int mask, bit; 1851e133ab2SMartin Schwidefsky 1861e133ab2SMartin Schwidefsky /* Try to get a fragment of a 4K page as a 2K page table */ 1871e133ab2SMartin Schwidefsky if (!mm_alloc_pgste(mm)) { 1881e133ab2SMartin Schwidefsky table = NULL; 189f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 1901e133ab2SMartin Schwidefsky if (!list_empty(&mm->context.pgtable_list)) { 1911e133ab2SMartin Schwidefsky page = list_first_entry(&mm->context.pgtable_list, 1921e133ab2SMartin Schwidefsky struct page, lru); 193620b4e90SMatthew Wilcox mask = atomic_read(&page->_refcount) >> 24; 1941e133ab2SMartin Schwidefsky mask = (mask | (mask >> 4)) & 3; 1951e133ab2SMartin Schwidefsky if (mask != 3) { 1961e133ab2SMartin Schwidefsky table = (unsigned long *) page_to_phys(page); 1971e133ab2SMartin Schwidefsky bit = mask & 1; /* =1 -> second 2K */ 1981e133ab2SMartin Schwidefsky if (bit) 1991e133ab2SMartin Schwidefsky table += PTRS_PER_PTE; 200620b4e90SMatthew Wilcox atomic_xor_bits(&page->_refcount, 201620b4e90SMatthew Wilcox 1U << (bit + 24)); 2021e133ab2SMartin Schwidefsky list_del(&page->lru); 2031e133ab2SMartin Schwidefsky } 2041e133ab2SMartin Schwidefsky } 205f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2061e133ab2SMartin Schwidefsky if (table) 2071e133ab2SMartin Schwidefsky return table; 2081e133ab2SMartin Schwidefsky } 2091e133ab2SMartin Schwidefsky /* Allocate a fresh page */ 21010d58bf2SMichal Hocko page = alloc_page(GFP_KERNEL); 2111e133ab2SMartin Schwidefsky if (!page) 2121e133ab2SMartin Schwidefsky return NULL; 2131e133ab2SMartin Schwidefsky if (!pgtable_page_ctor(page)) { 2141e133ab2SMartin Schwidefsky __free_page(page); 2151e133ab2SMartin Schwidefsky return NULL; 2161e133ab2SMartin Schwidefsky } 217c9b5ad54SMartin Schwidefsky arch_set_page_dat(page, 0); 2181e133ab2SMartin Schwidefsky /* Initialize page table */ 2191e133ab2SMartin Schwidefsky table = (unsigned long *) page_to_phys(page); 2201e133ab2SMartin Schwidefsky if (mm_alloc_pgste(mm)) { 2211e133ab2SMartin Schwidefsky /* Return 4K page table with PGSTEs */ 222620b4e90SMatthew Wilcox atomic_xor_bits(&page->_refcount, 3 << 24); 22341879ff6SHeiko Carstens memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); 22441879ff6SHeiko Carstens memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); 2251e133ab2SMartin Schwidefsky } else { 2261e133ab2SMartin Schwidefsky /* Return the first 2K fragment of the page */ 227620b4e90SMatthew Wilcox atomic_xor_bits(&page->_refcount, 1 << 24); 22841879ff6SHeiko Carstens memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); 229f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 2301e133ab2SMartin Schwidefsky list_add(&page->lru, &mm->context.pgtable_list); 231f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2321e133ab2SMartin Schwidefsky } 2331e133ab2SMartin Schwidefsky return table; 2341e133ab2SMartin Schwidefsky } 2351e133ab2SMartin Schwidefsky 2361e133ab2SMartin Schwidefsky void page_table_free(struct mm_struct *mm, unsigned long *table) 2371e133ab2SMartin Schwidefsky { 2381e133ab2SMartin Schwidefsky struct page *page; 2391e133ab2SMartin Schwidefsky unsigned int bit, mask; 2401e133ab2SMartin Schwidefsky 2411e133ab2SMartin Schwidefsky page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 2421e133ab2SMartin Schwidefsky if (!mm_alloc_pgste(mm)) { 2431e133ab2SMartin Schwidefsky /* Free 2K page table fragment of a 4K page */ 2441e133ab2SMartin Schwidefsky bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); 245f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 246620b4e90SMatthew Wilcox mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); 247620b4e90SMatthew Wilcox mask >>= 24; 2481e133ab2SMartin Schwidefsky if (mask & 3) 2491e133ab2SMartin Schwidefsky list_add(&page->lru, &mm->context.pgtable_list); 2501e133ab2SMartin Schwidefsky else 2511e133ab2SMartin Schwidefsky list_del(&page->lru); 252f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2531e133ab2SMartin Schwidefsky if (mask != 0) 2541e133ab2SMartin Schwidefsky return; 255dfa75863SEric Farman } else { 256dfa75863SEric Farman atomic_xor_bits(&page->_refcount, 3U << 24); 2571e133ab2SMartin Schwidefsky } 2581e133ab2SMartin Schwidefsky 2591e133ab2SMartin Schwidefsky pgtable_page_dtor(page); 2601e133ab2SMartin Schwidefsky __free_page(page); 2611e133ab2SMartin Schwidefsky } 2621e133ab2SMartin Schwidefsky 2631e133ab2SMartin Schwidefsky void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, 2641e133ab2SMartin Schwidefsky unsigned long vmaddr) 2651e133ab2SMartin Schwidefsky { 2661e133ab2SMartin Schwidefsky struct mm_struct *mm; 2671e133ab2SMartin Schwidefsky struct page *page; 2681e133ab2SMartin Schwidefsky unsigned int bit, mask; 2691e133ab2SMartin Schwidefsky 2701e133ab2SMartin Schwidefsky mm = tlb->mm; 2711e133ab2SMartin Schwidefsky page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 2721e133ab2SMartin Schwidefsky if (mm_alloc_pgste(mm)) { 2731e133ab2SMartin Schwidefsky gmap_unlink(mm, table, vmaddr); 2741e133ab2SMartin Schwidefsky table = (unsigned long *) (__pa(table) | 3); 2751e133ab2SMartin Schwidefsky tlb_remove_table(tlb, table); 2761e133ab2SMartin Schwidefsky return; 2771e133ab2SMartin Schwidefsky } 2781e133ab2SMartin Schwidefsky bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); 279f28a4b4dSMartin Schwidefsky spin_lock_bh(&mm->context.lock); 280620b4e90SMatthew Wilcox mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); 281620b4e90SMatthew Wilcox mask >>= 24; 2821e133ab2SMartin Schwidefsky if (mask & 3) 2831e133ab2SMartin Schwidefsky list_add_tail(&page->lru, &mm->context.pgtable_list); 2841e133ab2SMartin Schwidefsky else 2851e133ab2SMartin Schwidefsky list_del(&page->lru); 286f28a4b4dSMartin Schwidefsky spin_unlock_bh(&mm->context.lock); 2871e133ab2SMartin Schwidefsky table = (unsigned long *) (__pa(table) | (1U << bit)); 2881e133ab2SMartin Schwidefsky tlb_remove_table(tlb, table); 2891e133ab2SMartin Schwidefsky } 2901e133ab2SMartin Schwidefsky 2919de7d833SMartin Schwidefsky void __tlb_remove_table(void *_table) 2921e133ab2SMartin Schwidefsky { 2931e133ab2SMartin Schwidefsky unsigned int mask = (unsigned long) _table & 3; 2941e133ab2SMartin Schwidefsky void *table = (void *)((unsigned long) _table ^ mask); 2951e133ab2SMartin Schwidefsky struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 2961e133ab2SMartin Schwidefsky 2971e133ab2SMartin Schwidefsky switch (mask) { 2981aea9b3fSMartin Schwidefsky case 0: /* pmd, pud, or p4d */ 2991e133ab2SMartin Schwidefsky free_pages((unsigned long) table, 2); 3001e133ab2SMartin Schwidefsky break; 3011e133ab2SMartin Schwidefsky case 1: /* lower 2K of a 4K page table */ 3021e133ab2SMartin Schwidefsky case 2: /* higher 2K of a 4K page table */ 303620b4e90SMatthew Wilcox mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); 304620b4e90SMatthew Wilcox mask >>= 24; 305620b4e90SMatthew Wilcox if (mask != 0) 3061e133ab2SMartin Schwidefsky break; 3071e133ab2SMartin Schwidefsky /* fallthrough */ 3081e133ab2SMartin Schwidefsky case 3: /* 4K page table with pgstes */ 309dfa75863SEric Farman if (mask & 3) 310dfa75863SEric Farman atomic_xor_bits(&page->_refcount, 3 << 24); 3111e133ab2SMartin Schwidefsky pgtable_page_dtor(page); 3121e133ab2SMartin Schwidefsky __free_page(page); 3131e133ab2SMartin Schwidefsky break; 3141e133ab2SMartin Schwidefsky } 3151e133ab2SMartin Schwidefsky } 3161e133ab2SMartin Schwidefsky 3171caf170dSHeiko Carstens /* 3181caf170dSHeiko Carstens * Base infrastructure required to generate basic asces, region, segment, 3191caf170dSHeiko Carstens * and page tables that do not make use of enhanced features like EDAT1. 3201caf170dSHeiko Carstens */ 3211caf170dSHeiko Carstens 3221caf170dSHeiko Carstens static struct kmem_cache *base_pgt_cache; 3231caf170dSHeiko Carstens 3241caf170dSHeiko Carstens static unsigned long base_pgt_alloc(void) 3251caf170dSHeiko Carstens { 3261caf170dSHeiko Carstens u64 *table; 3271caf170dSHeiko Carstens 3281caf170dSHeiko Carstens table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL); 3291caf170dSHeiko Carstens if (table) 3301caf170dSHeiko Carstens memset64(table, _PAGE_INVALID, PTRS_PER_PTE); 3311caf170dSHeiko Carstens return (unsigned long) table; 3321caf170dSHeiko Carstens } 3331caf170dSHeiko Carstens 3341caf170dSHeiko Carstens static void base_pgt_free(unsigned long table) 3351caf170dSHeiko Carstens { 3361caf170dSHeiko Carstens kmem_cache_free(base_pgt_cache, (void *) table); 3371caf170dSHeiko Carstens } 3381caf170dSHeiko Carstens 3391caf170dSHeiko Carstens static unsigned long base_crst_alloc(unsigned long val) 3401caf170dSHeiko Carstens { 3411caf170dSHeiko Carstens unsigned long table; 3421caf170dSHeiko Carstens 3431caf170dSHeiko Carstens table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER); 3441caf170dSHeiko Carstens if (table) 3451caf170dSHeiko Carstens crst_table_init((unsigned long *)table, val); 3461caf170dSHeiko Carstens return table; 3471caf170dSHeiko Carstens } 3481caf170dSHeiko Carstens 3491caf170dSHeiko Carstens static void base_crst_free(unsigned long table) 3501caf170dSHeiko Carstens { 3511caf170dSHeiko Carstens free_pages(table, CRST_ALLOC_ORDER); 3521caf170dSHeiko Carstens } 3531caf170dSHeiko Carstens 3541caf170dSHeiko Carstens #define BASE_ADDR_END_FUNC(NAME, SIZE) \ 3551caf170dSHeiko Carstens static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \ 3561caf170dSHeiko Carstens unsigned long end) \ 3571caf170dSHeiko Carstens { \ 3581caf170dSHeiko Carstens unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \ 3591caf170dSHeiko Carstens \ 3601caf170dSHeiko Carstens return (next - 1) < (end - 1) ? next : end; \ 3611caf170dSHeiko Carstens } 3621caf170dSHeiko Carstens 3631caf170dSHeiko Carstens BASE_ADDR_END_FUNC(page, _PAGE_SIZE) 3641caf170dSHeiko Carstens BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE) 3651caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region3, _REGION3_SIZE) 3661caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region2, _REGION2_SIZE) 3671caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region1, _REGION1_SIZE) 3681caf170dSHeiko Carstens 3691caf170dSHeiko Carstens static inline unsigned long base_lra(unsigned long address) 3701caf170dSHeiko Carstens { 3711caf170dSHeiko Carstens unsigned long real; 3721caf170dSHeiko Carstens 3731caf170dSHeiko Carstens asm volatile( 3741caf170dSHeiko Carstens " lra %0,0(%1)\n" 3751caf170dSHeiko Carstens : "=d" (real) : "a" (address) : "cc"); 3761caf170dSHeiko Carstens return real; 3771caf170dSHeiko Carstens } 3781caf170dSHeiko Carstens 3791caf170dSHeiko Carstens static int base_page_walk(unsigned long origin, unsigned long addr, 3801caf170dSHeiko Carstens unsigned long end, int alloc) 3811caf170dSHeiko Carstens { 3821caf170dSHeiko Carstens unsigned long *pte, next; 3831caf170dSHeiko Carstens 3841caf170dSHeiko Carstens if (!alloc) 3851caf170dSHeiko Carstens return 0; 3861caf170dSHeiko Carstens pte = (unsigned long *) origin; 3871caf170dSHeiko Carstens pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT; 3881caf170dSHeiko Carstens do { 3891caf170dSHeiko Carstens next = base_page_addr_end(addr, end); 3901caf170dSHeiko Carstens *pte = base_lra(addr); 3911caf170dSHeiko Carstens } while (pte++, addr = next, addr < end); 3921caf170dSHeiko Carstens return 0; 3931caf170dSHeiko Carstens } 3941caf170dSHeiko Carstens 3951caf170dSHeiko Carstens static int base_segment_walk(unsigned long origin, unsigned long addr, 3961caf170dSHeiko Carstens unsigned long end, int alloc) 3971caf170dSHeiko Carstens { 3981caf170dSHeiko Carstens unsigned long *ste, next, table; 3991caf170dSHeiko Carstens int rc; 4001caf170dSHeiko Carstens 4011caf170dSHeiko Carstens ste = (unsigned long *) origin; 4021caf170dSHeiko Carstens ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; 4031caf170dSHeiko Carstens do { 4041caf170dSHeiko Carstens next = base_segment_addr_end(addr, end); 4051caf170dSHeiko Carstens if (*ste & _SEGMENT_ENTRY_INVALID) { 4061caf170dSHeiko Carstens if (!alloc) 4071caf170dSHeiko Carstens continue; 4081caf170dSHeiko Carstens table = base_pgt_alloc(); 4091caf170dSHeiko Carstens if (!table) 4101caf170dSHeiko Carstens return -ENOMEM; 4111caf170dSHeiko Carstens *ste = table | _SEGMENT_ENTRY; 4121caf170dSHeiko Carstens } 4131caf170dSHeiko Carstens table = *ste & _SEGMENT_ENTRY_ORIGIN; 4141caf170dSHeiko Carstens rc = base_page_walk(table, addr, next, alloc); 4151caf170dSHeiko Carstens if (rc) 4161caf170dSHeiko Carstens return rc; 4171caf170dSHeiko Carstens if (!alloc) 4181caf170dSHeiko Carstens base_pgt_free(table); 4191caf170dSHeiko Carstens cond_resched(); 4201caf170dSHeiko Carstens } while (ste++, addr = next, addr < end); 4211caf170dSHeiko Carstens return 0; 4221caf170dSHeiko Carstens } 4231caf170dSHeiko Carstens 4241caf170dSHeiko Carstens static int base_region3_walk(unsigned long origin, unsigned long addr, 4251caf170dSHeiko Carstens unsigned long end, int alloc) 4261caf170dSHeiko Carstens { 4271caf170dSHeiko Carstens unsigned long *rtte, next, table; 4281caf170dSHeiko Carstens int rc; 4291caf170dSHeiko Carstens 4301caf170dSHeiko Carstens rtte = (unsigned long *) origin; 4311caf170dSHeiko Carstens rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT; 4321caf170dSHeiko Carstens do { 4331caf170dSHeiko Carstens next = base_region3_addr_end(addr, end); 4341caf170dSHeiko Carstens if (*rtte & _REGION_ENTRY_INVALID) { 4351caf170dSHeiko Carstens if (!alloc) 4361caf170dSHeiko Carstens continue; 4371caf170dSHeiko Carstens table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY); 4381caf170dSHeiko Carstens if (!table) 4391caf170dSHeiko Carstens return -ENOMEM; 4401caf170dSHeiko Carstens *rtte = table | _REGION3_ENTRY; 4411caf170dSHeiko Carstens } 4421caf170dSHeiko Carstens table = *rtte & _REGION_ENTRY_ORIGIN; 4431caf170dSHeiko Carstens rc = base_segment_walk(table, addr, next, alloc); 4441caf170dSHeiko Carstens if (rc) 4451caf170dSHeiko Carstens return rc; 4461caf170dSHeiko Carstens if (!alloc) 4471caf170dSHeiko Carstens base_crst_free(table); 4481caf170dSHeiko Carstens } while (rtte++, addr = next, addr < end); 4491caf170dSHeiko Carstens return 0; 4501caf170dSHeiko Carstens } 4511caf170dSHeiko Carstens 4521caf170dSHeiko Carstens static int base_region2_walk(unsigned long origin, unsigned long addr, 4531caf170dSHeiko Carstens unsigned long end, int alloc) 4541caf170dSHeiko Carstens { 4551caf170dSHeiko Carstens unsigned long *rste, next, table; 4561caf170dSHeiko Carstens int rc; 4571caf170dSHeiko Carstens 4581caf170dSHeiko Carstens rste = (unsigned long *) origin; 4591caf170dSHeiko Carstens rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT; 4601caf170dSHeiko Carstens do { 4611caf170dSHeiko Carstens next = base_region2_addr_end(addr, end); 4621caf170dSHeiko Carstens if (*rste & _REGION_ENTRY_INVALID) { 4631caf170dSHeiko Carstens if (!alloc) 4641caf170dSHeiko Carstens continue; 4651caf170dSHeiko Carstens table = base_crst_alloc(_REGION3_ENTRY_EMPTY); 4661caf170dSHeiko Carstens if (!table) 4671caf170dSHeiko Carstens return -ENOMEM; 4681caf170dSHeiko Carstens *rste = table | _REGION2_ENTRY; 4691caf170dSHeiko Carstens } 4701caf170dSHeiko Carstens table = *rste & _REGION_ENTRY_ORIGIN; 4711caf170dSHeiko Carstens rc = base_region3_walk(table, addr, next, alloc); 4721caf170dSHeiko Carstens if (rc) 4731caf170dSHeiko Carstens return rc; 4741caf170dSHeiko Carstens if (!alloc) 4751caf170dSHeiko Carstens base_crst_free(table); 4761caf170dSHeiko Carstens } while (rste++, addr = next, addr < end); 4771caf170dSHeiko Carstens return 0; 4781caf170dSHeiko Carstens } 4791caf170dSHeiko Carstens 4801caf170dSHeiko Carstens static int base_region1_walk(unsigned long origin, unsigned long addr, 4811caf170dSHeiko Carstens unsigned long end, int alloc) 4821caf170dSHeiko Carstens { 4831caf170dSHeiko Carstens unsigned long *rfte, next, table; 4841caf170dSHeiko Carstens int rc; 4851caf170dSHeiko Carstens 4861caf170dSHeiko Carstens rfte = (unsigned long *) origin; 4871caf170dSHeiko Carstens rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT; 4881caf170dSHeiko Carstens do { 4891caf170dSHeiko Carstens next = base_region1_addr_end(addr, end); 4901caf170dSHeiko Carstens if (*rfte & _REGION_ENTRY_INVALID) { 4911caf170dSHeiko Carstens if (!alloc) 4921caf170dSHeiko Carstens continue; 4931caf170dSHeiko Carstens table = base_crst_alloc(_REGION2_ENTRY_EMPTY); 4941caf170dSHeiko Carstens if (!table) 4951caf170dSHeiko Carstens return -ENOMEM; 4961caf170dSHeiko Carstens *rfte = table | _REGION1_ENTRY; 4971caf170dSHeiko Carstens } 4981caf170dSHeiko Carstens table = *rfte & _REGION_ENTRY_ORIGIN; 4991caf170dSHeiko Carstens rc = base_region2_walk(table, addr, next, alloc); 5001caf170dSHeiko Carstens if (rc) 5011caf170dSHeiko Carstens return rc; 5021caf170dSHeiko Carstens if (!alloc) 5031caf170dSHeiko Carstens base_crst_free(table); 5041caf170dSHeiko Carstens } while (rfte++, addr = next, addr < end); 5051caf170dSHeiko Carstens return 0; 5061caf170dSHeiko Carstens } 5071caf170dSHeiko Carstens 5081caf170dSHeiko Carstens /** 5091caf170dSHeiko Carstens * base_asce_free - free asce and tables returned from base_asce_alloc() 5101caf170dSHeiko Carstens * @asce: asce to be freed 5111caf170dSHeiko Carstens * 5121caf170dSHeiko Carstens * Frees all region, segment, and page tables that were allocated with a 5131caf170dSHeiko Carstens * corresponding base_asce_alloc() call. 5141caf170dSHeiko Carstens */ 5151caf170dSHeiko Carstens void base_asce_free(unsigned long asce) 5161caf170dSHeiko Carstens { 5171caf170dSHeiko Carstens unsigned long table = asce & _ASCE_ORIGIN; 5181caf170dSHeiko Carstens 5191caf170dSHeiko Carstens if (!asce) 5201caf170dSHeiko Carstens return; 5211caf170dSHeiko Carstens switch (asce & _ASCE_TYPE_MASK) { 5221caf170dSHeiko Carstens case _ASCE_TYPE_SEGMENT: 5231caf170dSHeiko Carstens base_segment_walk(table, 0, _REGION3_SIZE, 0); 5241caf170dSHeiko Carstens break; 5251caf170dSHeiko Carstens case _ASCE_TYPE_REGION3: 5261caf170dSHeiko Carstens base_region3_walk(table, 0, _REGION2_SIZE, 0); 5271caf170dSHeiko Carstens break; 5281caf170dSHeiko Carstens case _ASCE_TYPE_REGION2: 5291caf170dSHeiko Carstens base_region2_walk(table, 0, _REGION1_SIZE, 0); 5301caf170dSHeiko Carstens break; 5311caf170dSHeiko Carstens case _ASCE_TYPE_REGION1: 5321caf170dSHeiko Carstens base_region1_walk(table, 0, -_PAGE_SIZE, 0); 5331caf170dSHeiko Carstens break; 5341caf170dSHeiko Carstens } 5351caf170dSHeiko Carstens base_crst_free(table); 5361caf170dSHeiko Carstens } 5371caf170dSHeiko Carstens 5381caf170dSHeiko Carstens static int base_pgt_cache_init(void) 5391caf170dSHeiko Carstens { 5401caf170dSHeiko Carstens static DEFINE_MUTEX(base_pgt_cache_mutex); 5411caf170dSHeiko Carstens unsigned long sz = _PAGE_TABLE_SIZE; 5421caf170dSHeiko Carstens 5431caf170dSHeiko Carstens if (base_pgt_cache) 5441caf170dSHeiko Carstens return 0; 5451caf170dSHeiko Carstens mutex_lock(&base_pgt_cache_mutex); 5461caf170dSHeiko Carstens if (!base_pgt_cache) 5471caf170dSHeiko Carstens base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL); 5481caf170dSHeiko Carstens mutex_unlock(&base_pgt_cache_mutex); 5491caf170dSHeiko Carstens return base_pgt_cache ? 0 : -ENOMEM; 5501caf170dSHeiko Carstens } 5511caf170dSHeiko Carstens 5521caf170dSHeiko Carstens /** 5531caf170dSHeiko Carstens * base_asce_alloc - create kernel mapping without enhanced DAT features 5541caf170dSHeiko Carstens * @addr: virtual start address of kernel mapping 5551caf170dSHeiko Carstens * @num_pages: number of consecutive pages 5561caf170dSHeiko Carstens * 5571caf170dSHeiko Carstens * Generate an asce, including all required region, segment and page tables, 5581caf170dSHeiko Carstens * that can be used to access the virtual kernel mapping. The difference is 5591caf170dSHeiko Carstens * that the returned asce does not make use of any enhanced DAT features like 5601caf170dSHeiko Carstens * e.g. large pages. This is required for some I/O functions that pass an 5611caf170dSHeiko Carstens * asce, like e.g. some service call requests. 5621caf170dSHeiko Carstens * 5631caf170dSHeiko Carstens * Note: the returned asce may NEVER be attached to any cpu. It may only be 5641caf170dSHeiko Carstens * used for I/O requests. tlb entries that might result because the 5651caf170dSHeiko Carstens * asce was attached to a cpu won't be cleared. 5661caf170dSHeiko Carstens */ 5671caf170dSHeiko Carstens unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages) 5681caf170dSHeiko Carstens { 5691caf170dSHeiko Carstens unsigned long asce, table, end; 5701caf170dSHeiko Carstens int rc; 5711caf170dSHeiko Carstens 5721caf170dSHeiko Carstens if (base_pgt_cache_init()) 5731caf170dSHeiko Carstens return 0; 5741caf170dSHeiko Carstens end = addr + num_pages * PAGE_SIZE; 5751caf170dSHeiko Carstens if (end <= _REGION3_SIZE) { 5761caf170dSHeiko Carstens table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY); 5771caf170dSHeiko Carstens if (!table) 5781caf170dSHeiko Carstens return 0; 5791caf170dSHeiko Carstens rc = base_segment_walk(table, addr, end, 1); 5801caf170dSHeiko Carstens asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH; 5811caf170dSHeiko Carstens } else if (end <= _REGION2_SIZE) { 5821caf170dSHeiko Carstens table = base_crst_alloc(_REGION3_ENTRY_EMPTY); 5831caf170dSHeiko Carstens if (!table) 5841caf170dSHeiko Carstens return 0; 5851caf170dSHeiko Carstens rc = base_region3_walk(table, addr, end, 1); 5861caf170dSHeiko Carstens asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 5871caf170dSHeiko Carstens } else if (end <= _REGION1_SIZE) { 5881caf170dSHeiko Carstens table = base_crst_alloc(_REGION2_ENTRY_EMPTY); 5891caf170dSHeiko Carstens if (!table) 5901caf170dSHeiko Carstens return 0; 5911caf170dSHeiko Carstens rc = base_region2_walk(table, addr, end, 1); 5921caf170dSHeiko Carstens asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 5931caf170dSHeiko Carstens } else { 5941caf170dSHeiko Carstens table = base_crst_alloc(_REGION1_ENTRY_EMPTY); 5951caf170dSHeiko Carstens if (!table) 5961caf170dSHeiko Carstens return 0; 5971caf170dSHeiko Carstens rc = base_region1_walk(table, addr, end, 1); 5981caf170dSHeiko Carstens asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH; 5991caf170dSHeiko Carstens } 6001caf170dSHeiko Carstens if (rc) { 6011caf170dSHeiko Carstens base_asce_free(asce); 6021caf170dSHeiko Carstens asce = 0; 6031caf170dSHeiko Carstens } 6041caf170dSHeiko Carstens return asce; 6051caf170dSHeiko Carstens } 606