xref: /linux/arch/s390/mm/pgalloc.c (revision ac7a0fcea39d29125b83b73583463e5ab70fdb37)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21e133ab2SMartin Schwidefsky /*
31e133ab2SMartin Schwidefsky  *  Page table allocation functions
41e133ab2SMartin Schwidefsky  *
51e133ab2SMartin Schwidefsky  *    Copyright IBM Corp. 2016
61e133ab2SMartin Schwidefsky  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
71e133ab2SMartin Schwidefsky  */
81e133ab2SMartin Schwidefsky 
91e133ab2SMartin Schwidefsky #include <linux/sysctl.h>
101caf170dSHeiko Carstens #include <linux/slab.h>
111caf170dSHeiko Carstens #include <linux/mm.h>
121e133ab2SMartin Schwidefsky #include <asm/mmu_context.h>
131e133ab2SMartin Schwidefsky #include <asm/pgalloc.h>
141e133ab2SMartin Schwidefsky #include <asm/gmap.h>
151e133ab2SMartin Schwidefsky #include <asm/tlb.h>
161e133ab2SMartin Schwidefsky #include <asm/tlbflush.h>
171e133ab2SMartin Schwidefsky 
181e133ab2SMartin Schwidefsky #ifdef CONFIG_PGSTE
191e133ab2SMartin Schwidefsky 
201e133ab2SMartin Schwidefsky int page_table_allocate_pgste = 0;
211e133ab2SMartin Schwidefsky EXPORT_SYMBOL(page_table_allocate_pgste);
221e133ab2SMartin Schwidefsky 
231e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl[] = {
241e133ab2SMartin Schwidefsky 	{
251e133ab2SMartin Schwidefsky 		.procname	= "allocate_pgste",
261e133ab2SMartin Schwidefsky 		.data		= &page_table_allocate_pgste,
271e133ab2SMartin Schwidefsky 		.maxlen		= sizeof(int),
281e133ab2SMartin Schwidefsky 		.mode		= S_IRUGO | S_IWUSR,
295bedf8aaSVasily Gorbik 		.proc_handler	= proc_dointvec_minmax,
30*ac7a0fceSVasily Gorbik 		.extra1		= SYSCTL_ZERO,
31*ac7a0fceSVasily Gorbik 		.extra2		= SYSCTL_ONE,
321e133ab2SMartin Schwidefsky 	},
331e133ab2SMartin Schwidefsky 	{ }
341e133ab2SMartin Schwidefsky };
351e133ab2SMartin Schwidefsky 
361e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl_dir[] = {
371e133ab2SMartin Schwidefsky 	{
381e133ab2SMartin Schwidefsky 		.procname	= "vm",
391e133ab2SMartin Schwidefsky 		.maxlen		= 0,
401e133ab2SMartin Schwidefsky 		.mode		= 0555,
411e133ab2SMartin Schwidefsky 		.child		= page_table_sysctl,
421e133ab2SMartin Schwidefsky 	},
431e133ab2SMartin Schwidefsky 	{ }
441e133ab2SMartin Schwidefsky };
451e133ab2SMartin Schwidefsky 
461e133ab2SMartin Schwidefsky static int __init page_table_register_sysctl(void)
471e133ab2SMartin Schwidefsky {
481e133ab2SMartin Schwidefsky 	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
491e133ab2SMartin Schwidefsky }
501e133ab2SMartin Schwidefsky __initcall(page_table_register_sysctl);
511e133ab2SMartin Schwidefsky 
521e133ab2SMartin Schwidefsky #endif /* CONFIG_PGSTE */
531e133ab2SMartin Schwidefsky 
541e133ab2SMartin Schwidefsky unsigned long *crst_table_alloc(struct mm_struct *mm)
551e133ab2SMartin Schwidefsky {
561e133ab2SMartin Schwidefsky 	struct page *page = alloc_pages(GFP_KERNEL, 2);
571e133ab2SMartin Schwidefsky 
581e133ab2SMartin Schwidefsky 	if (!page)
591e133ab2SMartin Schwidefsky 		return NULL;
60c9b5ad54SMartin Schwidefsky 	arch_set_page_dat(page, 2);
611e133ab2SMartin Schwidefsky 	return (unsigned long *) page_to_phys(page);
621e133ab2SMartin Schwidefsky }
631e133ab2SMartin Schwidefsky 
641e133ab2SMartin Schwidefsky void crst_table_free(struct mm_struct *mm, unsigned long *table)
651e133ab2SMartin Schwidefsky {
661e133ab2SMartin Schwidefsky 	free_pages((unsigned long) table, 2);
671e133ab2SMartin Schwidefsky }
681e133ab2SMartin Schwidefsky 
691e133ab2SMartin Schwidefsky static void __crst_table_upgrade(void *arg)
701e133ab2SMartin Schwidefsky {
711e133ab2SMartin Schwidefsky 	struct mm_struct *mm = arg;
721e133ab2SMartin Schwidefsky 
730aaba41bSMartin Schwidefsky 	if (current->active_mm == mm)
741e133ab2SMartin Schwidefsky 		set_user_asce(mm);
751e133ab2SMartin Schwidefsky 	__tlb_flush_local();
761e133ab2SMartin Schwidefsky }
771e133ab2SMartin Schwidefsky 
781aea9b3fSMartin Schwidefsky int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
791e133ab2SMartin Schwidefsky {
801e133ab2SMartin Schwidefsky 	unsigned long *table, *pgd;
811aea9b3fSMartin Schwidefsky 	int rc, notify;
821e133ab2SMartin Schwidefsky 
831aea9b3fSMartin Schwidefsky 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
842fc4876eSMartin Schwidefsky 	VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
851aea9b3fSMartin Schwidefsky 	rc = 0;
861aea9b3fSMartin Schwidefsky 	notify = 0;
871aea9b3fSMartin Schwidefsky 	while (mm->context.asce_limit < end) {
881aea9b3fSMartin Schwidefsky 		table = crst_table_alloc(mm);
891aea9b3fSMartin Schwidefsky 		if (!table) {
901aea9b3fSMartin Schwidefsky 			rc = -ENOMEM;
911aea9b3fSMartin Schwidefsky 			break;
921aea9b3fSMartin Schwidefsky 		}
931e133ab2SMartin Schwidefsky 		spin_lock_bh(&mm->page_table_lock);
941e133ab2SMartin Schwidefsky 		pgd = (unsigned long *) mm->pgd;
95f1c1174fSHeiko Carstens 		if (mm->context.asce_limit == _REGION2_SIZE) {
96723cacbdSGerald Schaefer 			crst_table_init(table, _REGION2_ENTRY_EMPTY);
971aea9b3fSMartin Schwidefsky 			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
981e133ab2SMartin Schwidefsky 			mm->pgd = (pgd_t *) table;
99f1c1174fSHeiko Carstens 			mm->context.asce_limit = _REGION1_SIZE;
100723cacbdSGerald Schaefer 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
101723cacbdSGerald Schaefer 				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
102e12e4044SMartin Schwidefsky 			mm_inc_nr_puds(mm);
1031aea9b3fSMartin Schwidefsky 		} else {
1041aea9b3fSMartin Schwidefsky 			crst_table_init(table, _REGION1_ENTRY_EMPTY);
1051aea9b3fSMartin Schwidefsky 			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
1061aea9b3fSMartin Schwidefsky 			mm->pgd = (pgd_t *) table;
1071aea9b3fSMartin Schwidefsky 			mm->context.asce_limit = -PAGE_SIZE;
1081aea9b3fSMartin Schwidefsky 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
1091aea9b3fSMartin Schwidefsky 				_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
1101aea9b3fSMartin Schwidefsky 		}
1111aea9b3fSMartin Schwidefsky 		notify = 1;
1121e133ab2SMartin Schwidefsky 		spin_unlock_bh(&mm->page_table_lock);
1131aea9b3fSMartin Schwidefsky 	}
1141aea9b3fSMartin Schwidefsky 	if (notify)
1151e133ab2SMartin Schwidefsky 		on_each_cpu(__crst_table_upgrade, mm, 0);
1161aea9b3fSMartin Schwidefsky 	return rc;
1171e133ab2SMartin Schwidefsky }
1181e133ab2SMartin Schwidefsky 
119723cacbdSGerald Schaefer void crst_table_downgrade(struct mm_struct *mm)
1201e133ab2SMartin Schwidefsky {
1211e133ab2SMartin Schwidefsky 	pgd_t *pgd;
1221e133ab2SMartin Schwidefsky 
123723cacbdSGerald Schaefer 	/* downgrade should only happen from 3 to 2 levels (compat only) */
1242fc4876eSMartin Schwidefsky 	VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
125723cacbdSGerald Schaefer 
1261e133ab2SMartin Schwidefsky 	if (current->active_mm == mm) {
1271e133ab2SMartin Schwidefsky 		clear_user_asce();
1281e133ab2SMartin Schwidefsky 		__tlb_flush_mm(mm);
1291e133ab2SMartin Schwidefsky 	}
130723cacbdSGerald Schaefer 
1311e133ab2SMartin Schwidefsky 	pgd = mm->pgd;
132814cedbcSMartin Schwidefsky 	mm_dec_nr_pmds(mm);
1331e133ab2SMartin Schwidefsky 	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
134f1c1174fSHeiko Carstens 	mm->context.asce_limit = _REGION3_SIZE;
135723cacbdSGerald Schaefer 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
136723cacbdSGerald Schaefer 			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
1371e133ab2SMartin Schwidefsky 	crst_table_free(mm, (unsigned long *) pgd);
138723cacbdSGerald Schaefer 
1391e133ab2SMartin Schwidefsky 	if (current->active_mm == mm)
1401e133ab2SMartin Schwidefsky 		set_user_asce(mm);
1411e133ab2SMartin Schwidefsky }
1421e133ab2SMartin Schwidefsky 
1431e133ab2SMartin Schwidefsky static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
1441e133ab2SMartin Schwidefsky {
1451e133ab2SMartin Schwidefsky 	unsigned int old, new;
1461e133ab2SMartin Schwidefsky 
1471e133ab2SMartin Schwidefsky 	do {
1481e133ab2SMartin Schwidefsky 		old = atomic_read(v);
1491e133ab2SMartin Schwidefsky 		new = old ^ bits;
1501e133ab2SMartin Schwidefsky 	} while (atomic_cmpxchg(v, old, new) != old);
1511e133ab2SMartin Schwidefsky 	return new;
1521e133ab2SMartin Schwidefsky }
1531e133ab2SMartin Schwidefsky 
1544be130a0SMartin Schwidefsky #ifdef CONFIG_PGSTE
1554be130a0SMartin Schwidefsky 
1564be130a0SMartin Schwidefsky struct page *page_table_alloc_pgste(struct mm_struct *mm)
1574be130a0SMartin Schwidefsky {
1584be130a0SMartin Schwidefsky 	struct page *page;
15941879ff6SHeiko Carstens 	u64 *table;
1604be130a0SMartin Schwidefsky 
161faee35a5SMichal Hocko 	page = alloc_page(GFP_KERNEL);
1624be130a0SMartin Schwidefsky 	if (page) {
16341879ff6SHeiko Carstens 		table = (u64 *)page_to_phys(page);
16441879ff6SHeiko Carstens 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
16541879ff6SHeiko Carstens 		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
1664be130a0SMartin Schwidefsky 	}
1674be130a0SMartin Schwidefsky 	return page;
1684be130a0SMartin Schwidefsky }
1694be130a0SMartin Schwidefsky 
1704be130a0SMartin Schwidefsky void page_table_free_pgste(struct page *page)
1714be130a0SMartin Schwidefsky {
1724be130a0SMartin Schwidefsky 	__free_page(page);
1734be130a0SMartin Schwidefsky }
1744be130a0SMartin Schwidefsky 
1754be130a0SMartin Schwidefsky #endif /* CONFIG_PGSTE */
1764be130a0SMartin Schwidefsky 
1771e133ab2SMartin Schwidefsky /*
1781e133ab2SMartin Schwidefsky  * page table entry allocation/free routines.
1791e133ab2SMartin Schwidefsky  */
1801e133ab2SMartin Schwidefsky unsigned long *page_table_alloc(struct mm_struct *mm)
1811e133ab2SMartin Schwidefsky {
1821e133ab2SMartin Schwidefsky 	unsigned long *table;
1831e133ab2SMartin Schwidefsky 	struct page *page;
1841e133ab2SMartin Schwidefsky 	unsigned int mask, bit;
1851e133ab2SMartin Schwidefsky 
1861e133ab2SMartin Schwidefsky 	/* Try to get a fragment of a 4K page as a 2K page table */
1871e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm)) {
1881e133ab2SMartin Schwidefsky 		table = NULL;
189f28a4b4dSMartin Schwidefsky 		spin_lock_bh(&mm->context.lock);
1901e133ab2SMartin Schwidefsky 		if (!list_empty(&mm->context.pgtable_list)) {
1911e133ab2SMartin Schwidefsky 			page = list_first_entry(&mm->context.pgtable_list,
1921e133ab2SMartin Schwidefsky 						struct page, lru);
193620b4e90SMatthew Wilcox 			mask = atomic_read(&page->_refcount) >> 24;
1941e133ab2SMartin Schwidefsky 			mask = (mask | (mask >> 4)) & 3;
1951e133ab2SMartin Schwidefsky 			if (mask != 3) {
1961e133ab2SMartin Schwidefsky 				table = (unsigned long *) page_to_phys(page);
1971e133ab2SMartin Schwidefsky 				bit = mask & 1;		/* =1 -> second 2K */
1981e133ab2SMartin Schwidefsky 				if (bit)
1991e133ab2SMartin Schwidefsky 					table += PTRS_PER_PTE;
200620b4e90SMatthew Wilcox 				atomic_xor_bits(&page->_refcount,
201620b4e90SMatthew Wilcox 							1U << (bit + 24));
2021e133ab2SMartin Schwidefsky 				list_del(&page->lru);
2031e133ab2SMartin Schwidefsky 			}
2041e133ab2SMartin Schwidefsky 		}
205f28a4b4dSMartin Schwidefsky 		spin_unlock_bh(&mm->context.lock);
2061e133ab2SMartin Schwidefsky 		if (table)
2071e133ab2SMartin Schwidefsky 			return table;
2081e133ab2SMartin Schwidefsky 	}
2091e133ab2SMartin Schwidefsky 	/* Allocate a fresh page */
21010d58bf2SMichal Hocko 	page = alloc_page(GFP_KERNEL);
2111e133ab2SMartin Schwidefsky 	if (!page)
2121e133ab2SMartin Schwidefsky 		return NULL;
2131e133ab2SMartin Schwidefsky 	if (!pgtable_page_ctor(page)) {
2141e133ab2SMartin Schwidefsky 		__free_page(page);
2151e133ab2SMartin Schwidefsky 		return NULL;
2161e133ab2SMartin Schwidefsky 	}
217c9b5ad54SMartin Schwidefsky 	arch_set_page_dat(page, 0);
2181e133ab2SMartin Schwidefsky 	/* Initialize page table */
2191e133ab2SMartin Schwidefsky 	table = (unsigned long *) page_to_phys(page);
2201e133ab2SMartin Schwidefsky 	if (mm_alloc_pgste(mm)) {
2211e133ab2SMartin Schwidefsky 		/* Return 4K page table with PGSTEs */
222620b4e90SMatthew Wilcox 		atomic_xor_bits(&page->_refcount, 3 << 24);
22341879ff6SHeiko Carstens 		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
22441879ff6SHeiko Carstens 		memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
2251e133ab2SMartin Schwidefsky 	} else {
2261e133ab2SMartin Schwidefsky 		/* Return the first 2K fragment of the page */
227620b4e90SMatthew Wilcox 		atomic_xor_bits(&page->_refcount, 1 << 24);
22841879ff6SHeiko Carstens 		memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
229f28a4b4dSMartin Schwidefsky 		spin_lock_bh(&mm->context.lock);
2301e133ab2SMartin Schwidefsky 		list_add(&page->lru, &mm->context.pgtable_list);
231f28a4b4dSMartin Schwidefsky 		spin_unlock_bh(&mm->context.lock);
2321e133ab2SMartin Schwidefsky 	}
2331e133ab2SMartin Schwidefsky 	return table;
2341e133ab2SMartin Schwidefsky }
2351e133ab2SMartin Schwidefsky 
2361e133ab2SMartin Schwidefsky void page_table_free(struct mm_struct *mm, unsigned long *table)
2371e133ab2SMartin Schwidefsky {
2381e133ab2SMartin Schwidefsky 	struct page *page;
2391e133ab2SMartin Schwidefsky 	unsigned int bit, mask;
2401e133ab2SMartin Schwidefsky 
2411e133ab2SMartin Schwidefsky 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
2421e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm)) {
2431e133ab2SMartin Schwidefsky 		/* Free 2K page table fragment of a 4K page */
2441e133ab2SMartin Schwidefsky 		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
245f28a4b4dSMartin Schwidefsky 		spin_lock_bh(&mm->context.lock);
246620b4e90SMatthew Wilcox 		mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
247620b4e90SMatthew Wilcox 		mask >>= 24;
2481e133ab2SMartin Schwidefsky 		if (mask & 3)
2491e133ab2SMartin Schwidefsky 			list_add(&page->lru, &mm->context.pgtable_list);
2501e133ab2SMartin Schwidefsky 		else
2511e133ab2SMartin Schwidefsky 			list_del(&page->lru);
252f28a4b4dSMartin Schwidefsky 		spin_unlock_bh(&mm->context.lock);
2531e133ab2SMartin Schwidefsky 		if (mask != 0)
2541e133ab2SMartin Schwidefsky 			return;
255dfa75863SEric Farman 	} else {
256dfa75863SEric Farman 		atomic_xor_bits(&page->_refcount, 3U << 24);
2571e133ab2SMartin Schwidefsky 	}
2581e133ab2SMartin Schwidefsky 
2591e133ab2SMartin Schwidefsky 	pgtable_page_dtor(page);
2601e133ab2SMartin Schwidefsky 	__free_page(page);
2611e133ab2SMartin Schwidefsky }
2621e133ab2SMartin Schwidefsky 
2631e133ab2SMartin Schwidefsky void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
2641e133ab2SMartin Schwidefsky 			 unsigned long vmaddr)
2651e133ab2SMartin Schwidefsky {
2661e133ab2SMartin Schwidefsky 	struct mm_struct *mm;
2671e133ab2SMartin Schwidefsky 	struct page *page;
2681e133ab2SMartin Schwidefsky 	unsigned int bit, mask;
2691e133ab2SMartin Schwidefsky 
2701e133ab2SMartin Schwidefsky 	mm = tlb->mm;
2711e133ab2SMartin Schwidefsky 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
2721e133ab2SMartin Schwidefsky 	if (mm_alloc_pgste(mm)) {
2731e133ab2SMartin Schwidefsky 		gmap_unlink(mm, table, vmaddr);
2741e133ab2SMartin Schwidefsky 		table = (unsigned long *) (__pa(table) | 3);
2751e133ab2SMartin Schwidefsky 		tlb_remove_table(tlb, table);
2761e133ab2SMartin Schwidefsky 		return;
2771e133ab2SMartin Schwidefsky 	}
2781e133ab2SMartin Schwidefsky 	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
279f28a4b4dSMartin Schwidefsky 	spin_lock_bh(&mm->context.lock);
280620b4e90SMatthew Wilcox 	mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
281620b4e90SMatthew Wilcox 	mask >>= 24;
2821e133ab2SMartin Schwidefsky 	if (mask & 3)
2831e133ab2SMartin Schwidefsky 		list_add_tail(&page->lru, &mm->context.pgtable_list);
2841e133ab2SMartin Schwidefsky 	else
2851e133ab2SMartin Schwidefsky 		list_del(&page->lru);
286f28a4b4dSMartin Schwidefsky 	spin_unlock_bh(&mm->context.lock);
2871e133ab2SMartin Schwidefsky 	table = (unsigned long *) (__pa(table) | (1U << bit));
2881e133ab2SMartin Schwidefsky 	tlb_remove_table(tlb, table);
2891e133ab2SMartin Schwidefsky }
2901e133ab2SMartin Schwidefsky 
2919de7d833SMartin Schwidefsky void __tlb_remove_table(void *_table)
2921e133ab2SMartin Schwidefsky {
2931e133ab2SMartin Schwidefsky 	unsigned int mask = (unsigned long) _table & 3;
2941e133ab2SMartin Schwidefsky 	void *table = (void *)((unsigned long) _table ^ mask);
2951e133ab2SMartin Schwidefsky 	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
2961e133ab2SMartin Schwidefsky 
2971e133ab2SMartin Schwidefsky 	switch (mask) {
2981aea9b3fSMartin Schwidefsky 	case 0:		/* pmd, pud, or p4d */
2991e133ab2SMartin Schwidefsky 		free_pages((unsigned long) table, 2);
3001e133ab2SMartin Schwidefsky 		break;
3011e133ab2SMartin Schwidefsky 	case 1:		/* lower 2K of a 4K page table */
3021e133ab2SMartin Schwidefsky 	case 2:		/* higher 2K of a 4K page table */
303620b4e90SMatthew Wilcox 		mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
304620b4e90SMatthew Wilcox 		mask >>= 24;
305620b4e90SMatthew Wilcox 		if (mask != 0)
3061e133ab2SMartin Schwidefsky 			break;
3071e133ab2SMartin Schwidefsky 		/* fallthrough */
3081e133ab2SMartin Schwidefsky 	case 3:		/* 4K page table with pgstes */
309dfa75863SEric Farman 		if (mask & 3)
310dfa75863SEric Farman 			atomic_xor_bits(&page->_refcount, 3 << 24);
3111e133ab2SMartin Schwidefsky 		pgtable_page_dtor(page);
3121e133ab2SMartin Schwidefsky 		__free_page(page);
3131e133ab2SMartin Schwidefsky 		break;
3141e133ab2SMartin Schwidefsky 	}
3151e133ab2SMartin Schwidefsky }
3161e133ab2SMartin Schwidefsky 
3171caf170dSHeiko Carstens /*
3181caf170dSHeiko Carstens  * Base infrastructure required to generate basic asces, region, segment,
3191caf170dSHeiko Carstens  * and page tables that do not make use of enhanced features like EDAT1.
3201caf170dSHeiko Carstens  */
3211caf170dSHeiko Carstens 
3221caf170dSHeiko Carstens static struct kmem_cache *base_pgt_cache;
3231caf170dSHeiko Carstens 
3241caf170dSHeiko Carstens static unsigned long base_pgt_alloc(void)
3251caf170dSHeiko Carstens {
3261caf170dSHeiko Carstens 	u64 *table;
3271caf170dSHeiko Carstens 
3281caf170dSHeiko Carstens 	table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
3291caf170dSHeiko Carstens 	if (table)
3301caf170dSHeiko Carstens 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
3311caf170dSHeiko Carstens 	return (unsigned long) table;
3321caf170dSHeiko Carstens }
3331caf170dSHeiko Carstens 
3341caf170dSHeiko Carstens static void base_pgt_free(unsigned long table)
3351caf170dSHeiko Carstens {
3361caf170dSHeiko Carstens 	kmem_cache_free(base_pgt_cache, (void *) table);
3371caf170dSHeiko Carstens }
3381caf170dSHeiko Carstens 
3391caf170dSHeiko Carstens static unsigned long base_crst_alloc(unsigned long val)
3401caf170dSHeiko Carstens {
3411caf170dSHeiko Carstens 	unsigned long table;
3421caf170dSHeiko Carstens 
3431caf170dSHeiko Carstens 	table =	 __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
3441caf170dSHeiko Carstens 	if (table)
3451caf170dSHeiko Carstens 		crst_table_init((unsigned long *)table, val);
3461caf170dSHeiko Carstens 	return table;
3471caf170dSHeiko Carstens }
3481caf170dSHeiko Carstens 
3491caf170dSHeiko Carstens static void base_crst_free(unsigned long table)
3501caf170dSHeiko Carstens {
3511caf170dSHeiko Carstens 	free_pages(table, CRST_ALLOC_ORDER);
3521caf170dSHeiko Carstens }
3531caf170dSHeiko Carstens 
3541caf170dSHeiko Carstens #define BASE_ADDR_END_FUNC(NAME, SIZE)					\
3551caf170dSHeiko Carstens static inline unsigned long base_##NAME##_addr_end(unsigned long addr,	\
3561caf170dSHeiko Carstens 						   unsigned long end)	\
3571caf170dSHeiko Carstens {									\
3581caf170dSHeiko Carstens 	unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1);		\
3591caf170dSHeiko Carstens 									\
3601caf170dSHeiko Carstens 	return (next - 1) < (end - 1) ? next : end;			\
3611caf170dSHeiko Carstens }
3621caf170dSHeiko Carstens 
3631caf170dSHeiko Carstens BASE_ADDR_END_FUNC(page,    _PAGE_SIZE)
3641caf170dSHeiko Carstens BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
3651caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
3661caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
3671caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
3681caf170dSHeiko Carstens 
3691caf170dSHeiko Carstens static inline unsigned long base_lra(unsigned long address)
3701caf170dSHeiko Carstens {
3711caf170dSHeiko Carstens 	unsigned long real;
3721caf170dSHeiko Carstens 
3731caf170dSHeiko Carstens 	asm volatile(
3741caf170dSHeiko Carstens 		"	lra	%0,0(%1)\n"
3751caf170dSHeiko Carstens 		: "=d" (real) : "a" (address) : "cc");
3761caf170dSHeiko Carstens 	return real;
3771caf170dSHeiko Carstens }
3781caf170dSHeiko Carstens 
3791caf170dSHeiko Carstens static int base_page_walk(unsigned long origin, unsigned long addr,
3801caf170dSHeiko Carstens 			  unsigned long end, int alloc)
3811caf170dSHeiko Carstens {
3821caf170dSHeiko Carstens 	unsigned long *pte, next;
3831caf170dSHeiko Carstens 
3841caf170dSHeiko Carstens 	if (!alloc)
3851caf170dSHeiko Carstens 		return 0;
3861caf170dSHeiko Carstens 	pte = (unsigned long *) origin;
3871caf170dSHeiko Carstens 	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
3881caf170dSHeiko Carstens 	do {
3891caf170dSHeiko Carstens 		next = base_page_addr_end(addr, end);
3901caf170dSHeiko Carstens 		*pte = base_lra(addr);
3911caf170dSHeiko Carstens 	} while (pte++, addr = next, addr < end);
3921caf170dSHeiko Carstens 	return 0;
3931caf170dSHeiko Carstens }
3941caf170dSHeiko Carstens 
3951caf170dSHeiko Carstens static int base_segment_walk(unsigned long origin, unsigned long addr,
3961caf170dSHeiko Carstens 			     unsigned long end, int alloc)
3971caf170dSHeiko Carstens {
3981caf170dSHeiko Carstens 	unsigned long *ste, next, table;
3991caf170dSHeiko Carstens 	int rc;
4001caf170dSHeiko Carstens 
4011caf170dSHeiko Carstens 	ste = (unsigned long *) origin;
4021caf170dSHeiko Carstens 	ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
4031caf170dSHeiko Carstens 	do {
4041caf170dSHeiko Carstens 		next = base_segment_addr_end(addr, end);
4051caf170dSHeiko Carstens 		if (*ste & _SEGMENT_ENTRY_INVALID) {
4061caf170dSHeiko Carstens 			if (!alloc)
4071caf170dSHeiko Carstens 				continue;
4081caf170dSHeiko Carstens 			table = base_pgt_alloc();
4091caf170dSHeiko Carstens 			if (!table)
4101caf170dSHeiko Carstens 				return -ENOMEM;
4111caf170dSHeiko Carstens 			*ste = table | _SEGMENT_ENTRY;
4121caf170dSHeiko Carstens 		}
4131caf170dSHeiko Carstens 		table = *ste & _SEGMENT_ENTRY_ORIGIN;
4141caf170dSHeiko Carstens 		rc = base_page_walk(table, addr, next, alloc);
4151caf170dSHeiko Carstens 		if (rc)
4161caf170dSHeiko Carstens 			return rc;
4171caf170dSHeiko Carstens 		if (!alloc)
4181caf170dSHeiko Carstens 			base_pgt_free(table);
4191caf170dSHeiko Carstens 		cond_resched();
4201caf170dSHeiko Carstens 	} while (ste++, addr = next, addr < end);
4211caf170dSHeiko Carstens 	return 0;
4221caf170dSHeiko Carstens }
4231caf170dSHeiko Carstens 
4241caf170dSHeiko Carstens static int base_region3_walk(unsigned long origin, unsigned long addr,
4251caf170dSHeiko Carstens 			     unsigned long end, int alloc)
4261caf170dSHeiko Carstens {
4271caf170dSHeiko Carstens 	unsigned long *rtte, next, table;
4281caf170dSHeiko Carstens 	int rc;
4291caf170dSHeiko Carstens 
4301caf170dSHeiko Carstens 	rtte = (unsigned long *) origin;
4311caf170dSHeiko Carstens 	rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
4321caf170dSHeiko Carstens 	do {
4331caf170dSHeiko Carstens 		next = base_region3_addr_end(addr, end);
4341caf170dSHeiko Carstens 		if (*rtte & _REGION_ENTRY_INVALID) {
4351caf170dSHeiko Carstens 			if (!alloc)
4361caf170dSHeiko Carstens 				continue;
4371caf170dSHeiko Carstens 			table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
4381caf170dSHeiko Carstens 			if (!table)
4391caf170dSHeiko Carstens 				return -ENOMEM;
4401caf170dSHeiko Carstens 			*rtte = table | _REGION3_ENTRY;
4411caf170dSHeiko Carstens 		}
4421caf170dSHeiko Carstens 		table = *rtte & _REGION_ENTRY_ORIGIN;
4431caf170dSHeiko Carstens 		rc = base_segment_walk(table, addr, next, alloc);
4441caf170dSHeiko Carstens 		if (rc)
4451caf170dSHeiko Carstens 			return rc;
4461caf170dSHeiko Carstens 		if (!alloc)
4471caf170dSHeiko Carstens 			base_crst_free(table);
4481caf170dSHeiko Carstens 	} while (rtte++, addr = next, addr < end);
4491caf170dSHeiko Carstens 	return 0;
4501caf170dSHeiko Carstens }
4511caf170dSHeiko Carstens 
4521caf170dSHeiko Carstens static int base_region2_walk(unsigned long origin, unsigned long addr,
4531caf170dSHeiko Carstens 			     unsigned long end, int alloc)
4541caf170dSHeiko Carstens {
4551caf170dSHeiko Carstens 	unsigned long *rste, next, table;
4561caf170dSHeiko Carstens 	int rc;
4571caf170dSHeiko Carstens 
4581caf170dSHeiko Carstens 	rste = (unsigned long *) origin;
4591caf170dSHeiko Carstens 	rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
4601caf170dSHeiko Carstens 	do {
4611caf170dSHeiko Carstens 		next = base_region2_addr_end(addr, end);
4621caf170dSHeiko Carstens 		if (*rste & _REGION_ENTRY_INVALID) {
4631caf170dSHeiko Carstens 			if (!alloc)
4641caf170dSHeiko Carstens 				continue;
4651caf170dSHeiko Carstens 			table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
4661caf170dSHeiko Carstens 			if (!table)
4671caf170dSHeiko Carstens 				return -ENOMEM;
4681caf170dSHeiko Carstens 			*rste = table | _REGION2_ENTRY;
4691caf170dSHeiko Carstens 		}
4701caf170dSHeiko Carstens 		table = *rste & _REGION_ENTRY_ORIGIN;
4711caf170dSHeiko Carstens 		rc = base_region3_walk(table, addr, next, alloc);
4721caf170dSHeiko Carstens 		if (rc)
4731caf170dSHeiko Carstens 			return rc;
4741caf170dSHeiko Carstens 		if (!alloc)
4751caf170dSHeiko Carstens 			base_crst_free(table);
4761caf170dSHeiko Carstens 	} while (rste++, addr = next, addr < end);
4771caf170dSHeiko Carstens 	return 0;
4781caf170dSHeiko Carstens }
4791caf170dSHeiko Carstens 
4801caf170dSHeiko Carstens static int base_region1_walk(unsigned long origin, unsigned long addr,
4811caf170dSHeiko Carstens 			     unsigned long end, int alloc)
4821caf170dSHeiko Carstens {
4831caf170dSHeiko Carstens 	unsigned long *rfte, next, table;
4841caf170dSHeiko Carstens 	int rc;
4851caf170dSHeiko Carstens 
4861caf170dSHeiko Carstens 	rfte = (unsigned long *) origin;
4871caf170dSHeiko Carstens 	rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
4881caf170dSHeiko Carstens 	do {
4891caf170dSHeiko Carstens 		next = base_region1_addr_end(addr, end);
4901caf170dSHeiko Carstens 		if (*rfte & _REGION_ENTRY_INVALID) {
4911caf170dSHeiko Carstens 			if (!alloc)
4921caf170dSHeiko Carstens 				continue;
4931caf170dSHeiko Carstens 			table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
4941caf170dSHeiko Carstens 			if (!table)
4951caf170dSHeiko Carstens 				return -ENOMEM;
4961caf170dSHeiko Carstens 			*rfte = table | _REGION1_ENTRY;
4971caf170dSHeiko Carstens 		}
4981caf170dSHeiko Carstens 		table = *rfte & _REGION_ENTRY_ORIGIN;
4991caf170dSHeiko Carstens 		rc = base_region2_walk(table, addr, next, alloc);
5001caf170dSHeiko Carstens 		if (rc)
5011caf170dSHeiko Carstens 			return rc;
5021caf170dSHeiko Carstens 		if (!alloc)
5031caf170dSHeiko Carstens 			base_crst_free(table);
5041caf170dSHeiko Carstens 	} while (rfte++, addr = next, addr < end);
5051caf170dSHeiko Carstens 	return 0;
5061caf170dSHeiko Carstens }
5071caf170dSHeiko Carstens 
5081caf170dSHeiko Carstens /**
5091caf170dSHeiko Carstens  * base_asce_free - free asce and tables returned from base_asce_alloc()
5101caf170dSHeiko Carstens  * @asce: asce to be freed
5111caf170dSHeiko Carstens  *
5121caf170dSHeiko Carstens  * Frees all region, segment, and page tables that were allocated with a
5131caf170dSHeiko Carstens  * corresponding base_asce_alloc() call.
5141caf170dSHeiko Carstens  */
5151caf170dSHeiko Carstens void base_asce_free(unsigned long asce)
5161caf170dSHeiko Carstens {
5171caf170dSHeiko Carstens 	unsigned long table = asce & _ASCE_ORIGIN;
5181caf170dSHeiko Carstens 
5191caf170dSHeiko Carstens 	if (!asce)
5201caf170dSHeiko Carstens 		return;
5211caf170dSHeiko Carstens 	switch (asce & _ASCE_TYPE_MASK) {
5221caf170dSHeiko Carstens 	case _ASCE_TYPE_SEGMENT:
5231caf170dSHeiko Carstens 		base_segment_walk(table, 0, _REGION3_SIZE, 0);
5241caf170dSHeiko Carstens 		break;
5251caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION3:
5261caf170dSHeiko Carstens 		base_region3_walk(table, 0, _REGION2_SIZE, 0);
5271caf170dSHeiko Carstens 		break;
5281caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION2:
5291caf170dSHeiko Carstens 		base_region2_walk(table, 0, _REGION1_SIZE, 0);
5301caf170dSHeiko Carstens 		break;
5311caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION1:
5321caf170dSHeiko Carstens 		base_region1_walk(table, 0, -_PAGE_SIZE, 0);
5331caf170dSHeiko Carstens 		break;
5341caf170dSHeiko Carstens 	}
5351caf170dSHeiko Carstens 	base_crst_free(table);
5361caf170dSHeiko Carstens }
5371caf170dSHeiko Carstens 
5381caf170dSHeiko Carstens static int base_pgt_cache_init(void)
5391caf170dSHeiko Carstens {
5401caf170dSHeiko Carstens 	static DEFINE_MUTEX(base_pgt_cache_mutex);
5411caf170dSHeiko Carstens 	unsigned long sz = _PAGE_TABLE_SIZE;
5421caf170dSHeiko Carstens 
5431caf170dSHeiko Carstens 	if (base_pgt_cache)
5441caf170dSHeiko Carstens 		return 0;
5451caf170dSHeiko Carstens 	mutex_lock(&base_pgt_cache_mutex);
5461caf170dSHeiko Carstens 	if (!base_pgt_cache)
5471caf170dSHeiko Carstens 		base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
5481caf170dSHeiko Carstens 	mutex_unlock(&base_pgt_cache_mutex);
5491caf170dSHeiko Carstens 	return base_pgt_cache ? 0 : -ENOMEM;
5501caf170dSHeiko Carstens }
5511caf170dSHeiko Carstens 
5521caf170dSHeiko Carstens /**
5531caf170dSHeiko Carstens  * base_asce_alloc - create kernel mapping without enhanced DAT features
5541caf170dSHeiko Carstens  * @addr: virtual start address of kernel mapping
5551caf170dSHeiko Carstens  * @num_pages: number of consecutive pages
5561caf170dSHeiko Carstens  *
5571caf170dSHeiko Carstens  * Generate an asce, including all required region, segment and page tables,
5581caf170dSHeiko Carstens  * that can be used to access the virtual kernel mapping. The difference is
5591caf170dSHeiko Carstens  * that the returned asce does not make use of any enhanced DAT features like
5601caf170dSHeiko Carstens  * e.g. large pages. This is required for some I/O functions that pass an
5611caf170dSHeiko Carstens  * asce, like e.g. some service call requests.
5621caf170dSHeiko Carstens  *
5631caf170dSHeiko Carstens  * Note: the returned asce may NEVER be attached to any cpu. It may only be
5641caf170dSHeiko Carstens  *	 used for I/O requests. tlb entries that might result because the
5651caf170dSHeiko Carstens  *	 asce was attached to a cpu won't be cleared.
5661caf170dSHeiko Carstens  */
5671caf170dSHeiko Carstens unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
5681caf170dSHeiko Carstens {
5691caf170dSHeiko Carstens 	unsigned long asce, table, end;
5701caf170dSHeiko Carstens 	int rc;
5711caf170dSHeiko Carstens 
5721caf170dSHeiko Carstens 	if (base_pgt_cache_init())
5731caf170dSHeiko Carstens 		return 0;
5741caf170dSHeiko Carstens 	end = addr + num_pages * PAGE_SIZE;
5751caf170dSHeiko Carstens 	if (end <= _REGION3_SIZE) {
5761caf170dSHeiko Carstens 		table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
5771caf170dSHeiko Carstens 		if (!table)
5781caf170dSHeiko Carstens 			return 0;
5791caf170dSHeiko Carstens 		rc = base_segment_walk(table, addr, end, 1);
5801caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
5811caf170dSHeiko Carstens 	} else if (end <= _REGION2_SIZE) {
5821caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
5831caf170dSHeiko Carstens 		if (!table)
5841caf170dSHeiko Carstens 			return 0;
5851caf170dSHeiko Carstens 		rc = base_region3_walk(table, addr, end, 1);
5861caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
5871caf170dSHeiko Carstens 	} else if (end <= _REGION1_SIZE) {
5881caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
5891caf170dSHeiko Carstens 		if (!table)
5901caf170dSHeiko Carstens 			return 0;
5911caf170dSHeiko Carstens 		rc = base_region2_walk(table, addr, end, 1);
5921caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
5931caf170dSHeiko Carstens 	} else {
5941caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
5951caf170dSHeiko Carstens 		if (!table)
5961caf170dSHeiko Carstens 			return 0;
5971caf170dSHeiko Carstens 		rc = base_region1_walk(table, addr, end, 1);
5981caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
5991caf170dSHeiko Carstens 	}
6001caf170dSHeiko Carstens 	if (rc) {
6011caf170dSHeiko Carstens 		base_asce_free(asce);
6021caf170dSHeiko Carstens 		asce = 0;
6031caf170dSHeiko Carstens 	}
6041caf170dSHeiko Carstens 	return asce;
6051caf170dSHeiko Carstens }
606