xref: /linux/arch/s390/mm/pgalloc.c (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21e133ab2SMartin Schwidefsky /*
31e133ab2SMartin Schwidefsky  *  Page table allocation functions
41e133ab2SMartin Schwidefsky  *
51e133ab2SMartin Schwidefsky  *    Copyright IBM Corp. 2016
61e133ab2SMartin Schwidefsky  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
71e133ab2SMartin Schwidefsky  */
81e133ab2SMartin Schwidefsky 
91e133ab2SMartin Schwidefsky #include <linux/sysctl.h>
101caf170dSHeiko Carstens #include <linux/slab.h>
111caf170dSHeiko Carstens #include <linux/mm.h>
121e133ab2SMartin Schwidefsky #include <asm/mmu_context.h>
1365d37f16SHeiko Carstens #include <asm/page-states.h>
141e133ab2SMartin Schwidefsky #include <asm/pgalloc.h>
151e133ab2SMartin Schwidefsky #include <asm/gmap.h>
161e133ab2SMartin Schwidefsky #include <asm/tlb.h>
171e133ab2SMartin Schwidefsky #include <asm/tlbflush.h>
181e133ab2SMartin Schwidefsky 
191e133ab2SMartin Schwidefsky #ifdef CONFIG_PGSTE
201e133ab2SMartin Schwidefsky 
211e133ab2SMartin Schwidefsky int page_table_allocate_pgste = 0;
221e133ab2SMartin Schwidefsky EXPORT_SYMBOL(page_table_allocate_pgste);
231e133ab2SMartin Schwidefsky 
241e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl[] = {
251e133ab2SMartin Schwidefsky 	{
261e133ab2SMartin Schwidefsky 		.procname	= "allocate_pgste",
271e133ab2SMartin Schwidefsky 		.data		= &page_table_allocate_pgste,
281e133ab2SMartin Schwidefsky 		.maxlen		= sizeof(int),
291e133ab2SMartin Schwidefsky 		.mode		= S_IRUGO | S_IWUSR,
305bedf8aaSVasily Gorbik 		.proc_handler	= proc_dointvec_minmax,
31ac7a0fceSVasily Gorbik 		.extra1		= SYSCTL_ZERO,
32ac7a0fceSVasily Gorbik 		.extra2		= SYSCTL_ONE,
331e133ab2SMartin Schwidefsky 	},
341e133ab2SMartin Schwidefsky };
351e133ab2SMartin Schwidefsky 
page_table_register_sysctl(void)361e133ab2SMartin Schwidefsky static int __init page_table_register_sysctl(void)
371e133ab2SMartin Schwidefsky {
387ddc873dSLuis Chamberlain 	return register_sysctl("vm", page_table_sysctl) ? 0 : -ENOMEM;
391e133ab2SMartin Schwidefsky }
401e133ab2SMartin Schwidefsky __initcall(page_table_register_sysctl);
411e133ab2SMartin Schwidefsky 
421e133ab2SMartin Schwidefsky #endif /* CONFIG_PGSTE */
431e133ab2SMartin Schwidefsky 
crst_table_alloc(struct mm_struct * mm)441e133ab2SMartin Schwidefsky unsigned long *crst_table_alloc(struct mm_struct *mm)
451e133ab2SMartin Schwidefsky {
466326c26cSVishal Moola (Oracle) 	struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
4765d37f16SHeiko Carstens 	unsigned long *table;
481e133ab2SMartin Schwidefsky 
496326c26cSVishal Moola (Oracle) 	if (!ptdesc)
501e133ab2SMartin Schwidefsky 		return NULL;
5165d37f16SHeiko Carstens 	table = ptdesc_to_virt(ptdesc);
5265d37f16SHeiko Carstens 	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
5365d37f16SHeiko Carstens 	return table;
541e133ab2SMartin Schwidefsky }
551e133ab2SMartin Schwidefsky 
crst_table_free(struct mm_struct * mm,unsigned long * table)561e133ab2SMartin Schwidefsky void crst_table_free(struct mm_struct *mm, unsigned long *table)
571e133ab2SMartin Schwidefsky {
58b5efb63aSHeiko Carstens 	if (!table)
59b5efb63aSHeiko Carstens 		return;
606326c26cSVishal Moola (Oracle) 	pagetable_free(virt_to_ptdesc(table));
611e133ab2SMartin Schwidefsky }
621e133ab2SMartin Schwidefsky 
__crst_table_upgrade(void * arg)631e133ab2SMartin Schwidefsky static void __crst_table_upgrade(void *arg)
641e133ab2SMartin Schwidefsky {
651e133ab2SMartin Schwidefsky 	struct mm_struct *mm = arg;
661e133ab2SMartin Schwidefsky 
6787d59863SHeiko Carstens 	/* change all active ASCEs to avoid the creation of new TLBs */
68316ec154SChristian Borntraeger 	if (current->active_mm == mm) {
69*208da1d5SSven Schnelle 		get_lowcore()->user_asce.val = mm->context.asce;
70*208da1d5SSven Schnelle 		local_ctl_load(7, &get_lowcore()->user_asce);
71316ec154SChristian Borntraeger 	}
721e133ab2SMartin Schwidefsky 	__tlb_flush_local();
731e133ab2SMartin Schwidefsky }
741e133ab2SMartin Schwidefsky 
crst_table_upgrade(struct mm_struct * mm,unsigned long end)751aea9b3fSMartin Schwidefsky int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
761e133ab2SMartin Schwidefsky {
7731932757SAlexander Gordeev 	unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
7831932757SAlexander Gordeev 	unsigned long asce_limit = mm->context.asce_limit;
791e133ab2SMartin Schwidefsky 
801aea9b3fSMartin Schwidefsky 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
8131932757SAlexander Gordeev 	VM_BUG_ON(asce_limit < _REGION2_SIZE);
8231932757SAlexander Gordeev 
8331932757SAlexander Gordeev 	if (end <= asce_limit)
8431932757SAlexander Gordeev 		return 0;
8531932757SAlexander Gordeev 
8631932757SAlexander Gordeev 	if (asce_limit == _REGION2_SIZE) {
8731932757SAlexander Gordeev 		p4d = crst_table_alloc(mm);
8831932757SAlexander Gordeev 		if (unlikely(!p4d))
8931932757SAlexander Gordeev 			goto err_p4d;
9031932757SAlexander Gordeev 		crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
911aea9b3fSMartin Schwidefsky 	}
9231932757SAlexander Gordeev 	if (end > _REGION1_SIZE) {
9331932757SAlexander Gordeev 		pgd = crst_table_alloc(mm);
9431932757SAlexander Gordeev 		if (unlikely(!pgd))
9531932757SAlexander Gordeev 			goto err_pgd;
9631932757SAlexander Gordeev 		crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
9731932757SAlexander Gordeev 	}
9831932757SAlexander Gordeev 
991e133ab2SMartin Schwidefsky 	spin_lock_bh(&mm->page_table_lock);
10031932757SAlexander Gordeev 
10131932757SAlexander Gordeev 	/*
102c1e8d7c6SMichel Lespinasse 	 * This routine gets called with mmap_lock lock held and there is
10331932757SAlexander Gordeev 	 * no reason to optimize for the case of otherwise. However, if
10431932757SAlexander Gordeev 	 * that would ever change, the below check will let us know.
10531932757SAlexander Gordeev 	 */
10631932757SAlexander Gordeev 	VM_BUG_ON(asce_limit != mm->context.asce_limit);
10731932757SAlexander Gordeev 
10831932757SAlexander Gordeev 	if (p4d) {
10931932757SAlexander Gordeev 		__pgd = (unsigned long *) mm->pgd;
11031932757SAlexander Gordeev 		p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
11131932757SAlexander Gordeev 		mm->pgd = (pgd_t *) p4d;
112f1c1174fSHeiko Carstens 		mm->context.asce_limit = _REGION1_SIZE;
113723cacbdSGerald Schaefer 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
114723cacbdSGerald Schaefer 			_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
115e12e4044SMartin Schwidefsky 		mm_inc_nr_puds(mm);
11631932757SAlexander Gordeev 	}
11731932757SAlexander Gordeev 	if (pgd) {
11831932757SAlexander Gordeev 		__pgd = (unsigned long *) mm->pgd;
11931932757SAlexander Gordeev 		pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
12031932757SAlexander Gordeev 		mm->pgd = (pgd_t *) pgd;
121f7555608SAlexander Gordeev 		mm->context.asce_limit = TASK_SIZE_MAX;
1221aea9b3fSMartin Schwidefsky 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
1231aea9b3fSMartin Schwidefsky 			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
1241aea9b3fSMartin Schwidefsky 	}
12531932757SAlexander Gordeev 
1261e133ab2SMartin Schwidefsky 	spin_unlock_bh(&mm->page_table_lock);
12731932757SAlexander Gordeev 
1281e133ab2SMartin Schwidefsky 	on_each_cpu(__crst_table_upgrade, mm, 0);
12931932757SAlexander Gordeev 
13031932757SAlexander Gordeev 	return 0;
13131932757SAlexander Gordeev 
13231932757SAlexander Gordeev err_pgd:
13331932757SAlexander Gordeev 	crst_table_free(mm, p4d);
13431932757SAlexander Gordeev err_p4d:
13531932757SAlexander Gordeev 	return -ENOMEM;
1361e133ab2SMartin Schwidefsky }
1371e133ab2SMartin Schwidefsky 
1384be130a0SMartin Schwidefsky #ifdef CONFIG_PGSTE
1394be130a0SMartin Schwidefsky 
page_table_alloc_pgste(struct mm_struct * mm)14057b77b75SQi Zheng struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm)
1414be130a0SMartin Schwidefsky {
1426326c26cSVishal Moola (Oracle) 	struct ptdesc *ptdesc;
14341879ff6SHeiko Carstens 	u64 *table;
1444be130a0SMartin Schwidefsky 
1456326c26cSVishal Moola (Oracle) 	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
1466326c26cSVishal Moola (Oracle) 	if (ptdesc) {
1476326c26cSVishal Moola (Oracle) 		table = (u64 *)ptdesc_to_virt(ptdesc);
14865d37f16SHeiko Carstens 		__arch_set_page_dat(table, 1);
14941879ff6SHeiko Carstens 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
15041879ff6SHeiko Carstens 		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
1514be130a0SMartin Schwidefsky 	}
15257b77b75SQi Zheng 	return ptdesc;
1534be130a0SMartin Schwidefsky }
1544be130a0SMartin Schwidefsky 
page_table_free_pgste(struct ptdesc * ptdesc)15557b77b75SQi Zheng void page_table_free_pgste(struct ptdesc *ptdesc)
1564be130a0SMartin Schwidefsky {
15757b77b75SQi Zheng 	pagetable_free(ptdesc);
1584be130a0SMartin Schwidefsky }
1594be130a0SMartin Schwidefsky 
1604be130a0SMartin Schwidefsky #endif /* CONFIG_PGSTE */
1614be130a0SMartin Schwidefsky 
page_table_alloc(struct mm_struct * mm)1621e133ab2SMartin Schwidefsky unsigned long *page_table_alloc(struct mm_struct *mm)
1631e133ab2SMartin Schwidefsky {
1646326c26cSVishal Moola (Oracle) 	struct ptdesc *ptdesc;
165d08d4e7cSAlexander Gordeev 	unsigned long *table;
1661e133ab2SMartin Schwidefsky 
1676326c26cSVishal Moola (Oracle) 	ptdesc = pagetable_alloc(GFP_KERNEL, 0);
1686326c26cSVishal Moola (Oracle) 	if (!ptdesc)
1691e133ab2SMartin Schwidefsky 		return NULL;
1706326c26cSVishal Moola (Oracle) 	if (!pagetable_pte_ctor(ptdesc)) {
1716326c26cSVishal Moola (Oracle) 		pagetable_free(ptdesc);
1721e133ab2SMartin Schwidefsky 		return NULL;
1731e133ab2SMartin Schwidefsky 	}
17465d37f16SHeiko Carstens 	table = ptdesc_to_virt(ptdesc);
17565d37f16SHeiko Carstens 	__arch_set_page_dat(table, 1);
176d08d4e7cSAlexander Gordeev 	/* pt_list is used by gmap only */
1776326c26cSVishal Moola (Oracle) 	INIT_LIST_HEAD(&ptdesc->pt_list);
17841879ff6SHeiko Carstens 	memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
17941879ff6SHeiko Carstens 	memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
1801e133ab2SMartin Schwidefsky 	return table;
1811e133ab2SMartin Schwidefsky }
1821e133ab2SMartin Schwidefsky 
pagetable_pte_dtor_free(struct ptdesc * ptdesc)183d08d4e7cSAlexander Gordeev static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
1844c88bb96SAlexander Gordeev {
1856326c26cSVishal Moola (Oracle) 	pagetable_pte_dtor(ptdesc);
1866326c26cSVishal Moola (Oracle) 	pagetable_free(ptdesc);
1878211dad6SHugh Dickins }
1888211dad6SHugh Dickins 
page_table_free(struct mm_struct * mm,unsigned long * table)1891e133ab2SMartin Schwidefsky void page_table_free(struct mm_struct *mm, unsigned long *table)
1901e133ab2SMartin Schwidefsky {
1916326c26cSVishal Moola (Oracle) 	struct ptdesc *ptdesc = virt_to_ptdesc(table);
1921e133ab2SMartin Schwidefsky 
193d08d4e7cSAlexander Gordeev 	pagetable_pte_dtor_free(ptdesc);
1941e133ab2SMartin Schwidefsky }
1951e133ab2SMartin Schwidefsky 
__tlb_remove_table(void * table)1960031f1c7SAlexander Gordeev void __tlb_remove_table(void *table)
1971e133ab2SMartin Schwidefsky {
1980031f1c7SAlexander Gordeev 	struct ptdesc *ptdesc = virt_to_ptdesc(table);
1990031f1c7SAlexander Gordeev 	struct page *page = ptdesc_page(ptdesc);
2001e133ab2SMartin Schwidefsky 
2010031f1c7SAlexander Gordeev 	if (compound_order(page) == CRST_ALLOC_ORDER) {
202d08d4e7cSAlexander Gordeev 		/* pmd, pud, or p4d */
2036326c26cSVishal Moola (Oracle) 		pagetable_free(ptdesc);
2044c88bb96SAlexander Gordeev 		return;
2051e133ab2SMartin Schwidefsky 	}
206d08d4e7cSAlexander Gordeev 	pagetable_pte_dtor_free(ptdesc);
2071e133ab2SMartin Schwidefsky }
2081e133ab2SMartin Schwidefsky 
2098211dad6SHugh Dickins #ifdef CONFIG_TRANSPARENT_HUGEPAGE
pte_free_now(struct rcu_head * head)210d08d4e7cSAlexander Gordeev static void pte_free_now(struct rcu_head *head)
211d08d4e7cSAlexander Gordeev {
212d08d4e7cSAlexander Gordeev 	struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
213d08d4e7cSAlexander Gordeev 
214d08d4e7cSAlexander Gordeev 	pagetable_pte_dtor_free(ptdesc);
215d08d4e7cSAlexander Gordeev }
216d08d4e7cSAlexander Gordeev 
pte_free_defer(struct mm_struct * mm,pgtable_t pgtable)2178211dad6SHugh Dickins void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
2188211dad6SHugh Dickins {
219d08d4e7cSAlexander Gordeev 	struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
2208211dad6SHugh Dickins 
221d08d4e7cSAlexander Gordeev 	call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
2228211dad6SHugh Dickins 	/*
223d08d4e7cSAlexander Gordeev 	 * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
224d08d4e7cSAlexander Gordeev 	 * Turn to the generic pte_free_defer() version once gmap is removed.
2258211dad6SHugh Dickins 	 */
2268211dad6SHugh Dickins 	WARN_ON_ONCE(mm_has_pgste(mm));
2278211dad6SHugh Dickins }
2288211dad6SHugh Dickins #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
2298211dad6SHugh Dickins 
2301caf170dSHeiko Carstens /*
2311caf170dSHeiko Carstens  * Base infrastructure required to generate basic asces, region, segment,
2321caf170dSHeiko Carstens  * and page tables that do not make use of enhanced features like EDAT1.
2331caf170dSHeiko Carstens  */
2341caf170dSHeiko Carstens 
2351caf170dSHeiko Carstens static struct kmem_cache *base_pgt_cache;
2361caf170dSHeiko Carstens 
base_pgt_alloc(void)237da001fceSHeiko Carstens static unsigned long *base_pgt_alloc(void)
2381caf170dSHeiko Carstens {
239da001fceSHeiko Carstens 	unsigned long *table;
2401caf170dSHeiko Carstens 
2411caf170dSHeiko Carstens 	table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
2421caf170dSHeiko Carstens 	if (table)
243da001fceSHeiko Carstens 		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
2441caf170dSHeiko Carstens 	return table;
2451caf170dSHeiko Carstens }
2461caf170dSHeiko Carstens 
base_pgt_free(unsigned long * table)247da001fceSHeiko Carstens static void base_pgt_free(unsigned long *table)
2481caf170dSHeiko Carstens {
249da001fceSHeiko Carstens 	kmem_cache_free(base_pgt_cache, table);
250da001fceSHeiko Carstens }
251da001fceSHeiko Carstens 
base_crst_alloc(unsigned long val)252da001fceSHeiko Carstens static unsigned long *base_crst_alloc(unsigned long val)
253da001fceSHeiko Carstens {
254da001fceSHeiko Carstens 	unsigned long *table;
2556326c26cSVishal Moola (Oracle) 	struct ptdesc *ptdesc;
256da001fceSHeiko Carstens 
2577bc8b8ebSHeiko Carstens 	ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
2586326c26cSVishal Moola (Oracle) 	if (!ptdesc)
2596326c26cSVishal Moola (Oracle) 		return NULL;
2606326c26cSVishal Moola (Oracle) 	table = ptdesc_address(ptdesc);
261da001fceSHeiko Carstens 	crst_table_init(table, val);
262da001fceSHeiko Carstens 	return table;
263da001fceSHeiko Carstens }
264da001fceSHeiko Carstens 
base_crst_free(unsigned long * table)265da001fceSHeiko Carstens static void base_crst_free(unsigned long *table)
266da001fceSHeiko Carstens {
267b5efb63aSHeiko Carstens 	if (!table)
268b5efb63aSHeiko Carstens 		return;
2696326c26cSVishal Moola (Oracle) 	pagetable_free(virt_to_ptdesc(table));
2701caf170dSHeiko Carstens }
2711caf170dSHeiko Carstens 
2721caf170dSHeiko Carstens #define BASE_ADDR_END_FUNC(NAME, SIZE)					\
2731caf170dSHeiko Carstens static inline unsigned long base_##NAME##_addr_end(unsigned long addr,	\
2741caf170dSHeiko Carstens 						   unsigned long end)	\
2751caf170dSHeiko Carstens {									\
2761caf170dSHeiko Carstens 	unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1);		\
2771caf170dSHeiko Carstens 									\
2781caf170dSHeiko Carstens 	return (next - 1) < (end - 1) ? next : end;			\
2791caf170dSHeiko Carstens }
2801caf170dSHeiko Carstens 
BASE_ADDR_END_FUNC(page,_PAGE_SIZE)2811caf170dSHeiko Carstens BASE_ADDR_END_FUNC(page,    _PAGE_SIZE)
2821caf170dSHeiko Carstens BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
2831caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
2841caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
2851caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
2861caf170dSHeiko Carstens 
2871caf170dSHeiko Carstens static inline unsigned long base_lra(unsigned long address)
2881caf170dSHeiko Carstens {
2891caf170dSHeiko Carstens 	unsigned long real;
2901caf170dSHeiko Carstens 
2911caf170dSHeiko Carstens 	asm volatile(
2921caf170dSHeiko Carstens 		"	lra	%0,0(%1)\n"
2931caf170dSHeiko Carstens 		: "=d" (real) : "a" (address) : "cc");
2941caf170dSHeiko Carstens 	return real;
2951caf170dSHeiko Carstens }
2961caf170dSHeiko Carstens 
base_page_walk(unsigned long * origin,unsigned long addr,unsigned long end,int alloc)297da001fceSHeiko Carstens static int base_page_walk(unsigned long *origin, unsigned long addr,
2981caf170dSHeiko Carstens 			  unsigned long end, int alloc)
2991caf170dSHeiko Carstens {
3001caf170dSHeiko Carstens 	unsigned long *pte, next;
3011caf170dSHeiko Carstens 
3021caf170dSHeiko Carstens 	if (!alloc)
3031caf170dSHeiko Carstens 		return 0;
304da001fceSHeiko Carstens 	pte = origin;
3051caf170dSHeiko Carstens 	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
3061caf170dSHeiko Carstens 	do {
3071caf170dSHeiko Carstens 		next = base_page_addr_end(addr, end);
3081caf170dSHeiko Carstens 		*pte = base_lra(addr);
3091caf170dSHeiko Carstens 	} while (pte++, addr = next, addr < end);
3101caf170dSHeiko Carstens 	return 0;
3111caf170dSHeiko Carstens }
3121caf170dSHeiko Carstens 
base_segment_walk(unsigned long * origin,unsigned long addr,unsigned long end,int alloc)313da001fceSHeiko Carstens static int base_segment_walk(unsigned long *origin, unsigned long addr,
3141caf170dSHeiko Carstens 			     unsigned long end, int alloc)
3151caf170dSHeiko Carstens {
316da001fceSHeiko Carstens 	unsigned long *ste, next, *table;
3171caf170dSHeiko Carstens 	int rc;
3181caf170dSHeiko Carstens 
319da001fceSHeiko Carstens 	ste = origin;
3201caf170dSHeiko Carstens 	ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
3211caf170dSHeiko Carstens 	do {
3221caf170dSHeiko Carstens 		next = base_segment_addr_end(addr, end);
3231caf170dSHeiko Carstens 		if (*ste & _SEGMENT_ENTRY_INVALID) {
3241caf170dSHeiko Carstens 			if (!alloc)
3251caf170dSHeiko Carstens 				continue;
3261caf170dSHeiko Carstens 			table = base_pgt_alloc();
3271caf170dSHeiko Carstens 			if (!table)
3281caf170dSHeiko Carstens 				return -ENOMEM;
3292f882800SHeiko Carstens 			*ste = __pa(table) | _SEGMENT_ENTRY;
3301caf170dSHeiko Carstens 		}
331da001fceSHeiko Carstens 		table = __va(*ste & _SEGMENT_ENTRY_ORIGIN);
3321caf170dSHeiko Carstens 		rc = base_page_walk(table, addr, next, alloc);
3331caf170dSHeiko Carstens 		if (rc)
3341caf170dSHeiko Carstens 			return rc;
3351caf170dSHeiko Carstens 		if (!alloc)
3361caf170dSHeiko Carstens 			base_pgt_free(table);
3371caf170dSHeiko Carstens 		cond_resched();
3381caf170dSHeiko Carstens 	} while (ste++, addr = next, addr < end);
3391caf170dSHeiko Carstens 	return 0;
3401caf170dSHeiko Carstens }
3411caf170dSHeiko Carstens 
base_region3_walk(unsigned long * origin,unsigned long addr,unsigned long end,int alloc)342da001fceSHeiko Carstens static int base_region3_walk(unsigned long *origin, unsigned long addr,
3431caf170dSHeiko Carstens 			     unsigned long end, int alloc)
3441caf170dSHeiko Carstens {
345da001fceSHeiko Carstens 	unsigned long *rtte, next, *table;
3461caf170dSHeiko Carstens 	int rc;
3471caf170dSHeiko Carstens 
348da001fceSHeiko Carstens 	rtte = origin;
3491caf170dSHeiko Carstens 	rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
3501caf170dSHeiko Carstens 	do {
3511caf170dSHeiko Carstens 		next = base_region3_addr_end(addr, end);
3521caf170dSHeiko Carstens 		if (*rtte & _REGION_ENTRY_INVALID) {
3531caf170dSHeiko Carstens 			if (!alloc)
3541caf170dSHeiko Carstens 				continue;
3551caf170dSHeiko Carstens 			table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
3561caf170dSHeiko Carstens 			if (!table)
3571caf170dSHeiko Carstens 				return -ENOMEM;
3582f882800SHeiko Carstens 			*rtte = __pa(table) | _REGION3_ENTRY;
3591caf170dSHeiko Carstens 		}
360da001fceSHeiko Carstens 		table = __va(*rtte & _REGION_ENTRY_ORIGIN);
3611caf170dSHeiko Carstens 		rc = base_segment_walk(table, addr, next, alloc);
3621caf170dSHeiko Carstens 		if (rc)
3631caf170dSHeiko Carstens 			return rc;
3641caf170dSHeiko Carstens 		if (!alloc)
3651caf170dSHeiko Carstens 			base_crst_free(table);
3661caf170dSHeiko Carstens 	} while (rtte++, addr = next, addr < end);
3671caf170dSHeiko Carstens 	return 0;
3681caf170dSHeiko Carstens }
3691caf170dSHeiko Carstens 
base_region2_walk(unsigned long * origin,unsigned long addr,unsigned long end,int alloc)370da001fceSHeiko Carstens static int base_region2_walk(unsigned long *origin, unsigned long addr,
3711caf170dSHeiko Carstens 			     unsigned long end, int alloc)
3721caf170dSHeiko Carstens {
373da001fceSHeiko Carstens 	unsigned long *rste, next, *table;
3741caf170dSHeiko Carstens 	int rc;
3751caf170dSHeiko Carstens 
376da001fceSHeiko Carstens 	rste = origin;
3771caf170dSHeiko Carstens 	rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
3781caf170dSHeiko Carstens 	do {
3791caf170dSHeiko Carstens 		next = base_region2_addr_end(addr, end);
3801caf170dSHeiko Carstens 		if (*rste & _REGION_ENTRY_INVALID) {
3811caf170dSHeiko Carstens 			if (!alloc)
3821caf170dSHeiko Carstens 				continue;
3831caf170dSHeiko Carstens 			table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
3841caf170dSHeiko Carstens 			if (!table)
3851caf170dSHeiko Carstens 				return -ENOMEM;
3862f882800SHeiko Carstens 			*rste = __pa(table) | _REGION2_ENTRY;
3871caf170dSHeiko Carstens 		}
388da001fceSHeiko Carstens 		table = __va(*rste & _REGION_ENTRY_ORIGIN);
3891caf170dSHeiko Carstens 		rc = base_region3_walk(table, addr, next, alloc);
3901caf170dSHeiko Carstens 		if (rc)
3911caf170dSHeiko Carstens 			return rc;
3921caf170dSHeiko Carstens 		if (!alloc)
3931caf170dSHeiko Carstens 			base_crst_free(table);
3941caf170dSHeiko Carstens 	} while (rste++, addr = next, addr < end);
3951caf170dSHeiko Carstens 	return 0;
3961caf170dSHeiko Carstens }
3971caf170dSHeiko Carstens 
base_region1_walk(unsigned long * origin,unsigned long addr,unsigned long end,int alloc)398da001fceSHeiko Carstens static int base_region1_walk(unsigned long *origin, unsigned long addr,
3991caf170dSHeiko Carstens 			     unsigned long end, int alloc)
4001caf170dSHeiko Carstens {
401da001fceSHeiko Carstens 	unsigned long *rfte, next, *table;
4021caf170dSHeiko Carstens 	int rc;
4031caf170dSHeiko Carstens 
404da001fceSHeiko Carstens 	rfte = origin;
4051caf170dSHeiko Carstens 	rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
4061caf170dSHeiko Carstens 	do {
4071caf170dSHeiko Carstens 		next = base_region1_addr_end(addr, end);
4081caf170dSHeiko Carstens 		if (*rfte & _REGION_ENTRY_INVALID) {
4091caf170dSHeiko Carstens 			if (!alloc)
4101caf170dSHeiko Carstens 				continue;
4111caf170dSHeiko Carstens 			table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
4121caf170dSHeiko Carstens 			if (!table)
4131caf170dSHeiko Carstens 				return -ENOMEM;
4142f882800SHeiko Carstens 			*rfte = __pa(table) | _REGION1_ENTRY;
4151caf170dSHeiko Carstens 		}
416da001fceSHeiko Carstens 		table = __va(*rfte & _REGION_ENTRY_ORIGIN);
4171caf170dSHeiko Carstens 		rc = base_region2_walk(table, addr, next, alloc);
4181caf170dSHeiko Carstens 		if (rc)
4191caf170dSHeiko Carstens 			return rc;
4201caf170dSHeiko Carstens 		if (!alloc)
4211caf170dSHeiko Carstens 			base_crst_free(table);
4221caf170dSHeiko Carstens 	} while (rfte++, addr = next, addr < end);
4231caf170dSHeiko Carstens 	return 0;
4241caf170dSHeiko Carstens }
4251caf170dSHeiko Carstens 
4261caf170dSHeiko Carstens /**
4271caf170dSHeiko Carstens  * base_asce_free - free asce and tables returned from base_asce_alloc()
4281caf170dSHeiko Carstens  * @asce: asce to be freed
4291caf170dSHeiko Carstens  *
4301caf170dSHeiko Carstens  * Frees all region, segment, and page tables that were allocated with a
4311caf170dSHeiko Carstens  * corresponding base_asce_alloc() call.
4321caf170dSHeiko Carstens  */
base_asce_free(unsigned long asce)4331caf170dSHeiko Carstens void base_asce_free(unsigned long asce)
4341caf170dSHeiko Carstens {
435da001fceSHeiko Carstens 	unsigned long *table = __va(asce & _ASCE_ORIGIN);
4361caf170dSHeiko Carstens 
4371caf170dSHeiko Carstens 	if (!asce)
4381caf170dSHeiko Carstens 		return;
4391caf170dSHeiko Carstens 	switch (asce & _ASCE_TYPE_MASK) {
4401caf170dSHeiko Carstens 	case _ASCE_TYPE_SEGMENT:
4411caf170dSHeiko Carstens 		base_segment_walk(table, 0, _REGION3_SIZE, 0);
4421caf170dSHeiko Carstens 		break;
4431caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION3:
4441caf170dSHeiko Carstens 		base_region3_walk(table, 0, _REGION2_SIZE, 0);
4451caf170dSHeiko Carstens 		break;
4461caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION2:
4471caf170dSHeiko Carstens 		base_region2_walk(table, 0, _REGION1_SIZE, 0);
4481caf170dSHeiko Carstens 		break;
4491caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION1:
450f7555608SAlexander Gordeev 		base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
4511caf170dSHeiko Carstens 		break;
4521caf170dSHeiko Carstens 	}
4531caf170dSHeiko Carstens 	base_crst_free(table);
4541caf170dSHeiko Carstens }
4551caf170dSHeiko Carstens 
base_pgt_cache_init(void)4561caf170dSHeiko Carstens static int base_pgt_cache_init(void)
4571caf170dSHeiko Carstens {
4581caf170dSHeiko Carstens 	static DEFINE_MUTEX(base_pgt_cache_mutex);
4591caf170dSHeiko Carstens 	unsigned long sz = _PAGE_TABLE_SIZE;
4601caf170dSHeiko Carstens 
4611caf170dSHeiko Carstens 	if (base_pgt_cache)
4621caf170dSHeiko Carstens 		return 0;
4631caf170dSHeiko Carstens 	mutex_lock(&base_pgt_cache_mutex);
4641caf170dSHeiko Carstens 	if (!base_pgt_cache)
4651caf170dSHeiko Carstens 		base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
4661caf170dSHeiko Carstens 	mutex_unlock(&base_pgt_cache_mutex);
4671caf170dSHeiko Carstens 	return base_pgt_cache ? 0 : -ENOMEM;
4681caf170dSHeiko Carstens }
4691caf170dSHeiko Carstens 
4701caf170dSHeiko Carstens /**
4711caf170dSHeiko Carstens  * base_asce_alloc - create kernel mapping without enhanced DAT features
4721caf170dSHeiko Carstens  * @addr: virtual start address of kernel mapping
4731caf170dSHeiko Carstens  * @num_pages: number of consecutive pages
4741caf170dSHeiko Carstens  *
4751caf170dSHeiko Carstens  * Generate an asce, including all required region, segment and page tables,
4761caf170dSHeiko Carstens  * that can be used to access the virtual kernel mapping. The difference is
4771caf170dSHeiko Carstens  * that the returned asce does not make use of any enhanced DAT features like
4781caf170dSHeiko Carstens  * e.g. large pages. This is required for some I/O functions that pass an
4791caf170dSHeiko Carstens  * asce, like e.g. some service call requests.
4801caf170dSHeiko Carstens  *
4811caf170dSHeiko Carstens  * Note: the returned asce may NEVER be attached to any cpu. It may only be
4821caf170dSHeiko Carstens  *	 used for I/O requests. tlb entries that might result because the
4831caf170dSHeiko Carstens  *	 asce was attached to a cpu won't be cleared.
4841caf170dSHeiko Carstens  */
base_asce_alloc(unsigned long addr,unsigned long num_pages)4851caf170dSHeiko Carstens unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
4861caf170dSHeiko Carstens {
487da001fceSHeiko Carstens 	unsigned long asce, *table, end;
4881caf170dSHeiko Carstens 	int rc;
4891caf170dSHeiko Carstens 
4901caf170dSHeiko Carstens 	if (base_pgt_cache_init())
4911caf170dSHeiko Carstens 		return 0;
4921caf170dSHeiko Carstens 	end = addr + num_pages * PAGE_SIZE;
4931caf170dSHeiko Carstens 	if (end <= _REGION3_SIZE) {
4941caf170dSHeiko Carstens 		table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
4951caf170dSHeiko Carstens 		if (!table)
4961caf170dSHeiko Carstens 			return 0;
4971caf170dSHeiko Carstens 		rc = base_segment_walk(table, addr, end, 1);
4982f882800SHeiko Carstens 		asce = __pa(table) | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
4991caf170dSHeiko Carstens 	} else if (end <= _REGION2_SIZE) {
5001caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
5011caf170dSHeiko Carstens 		if (!table)
5021caf170dSHeiko Carstens 			return 0;
5031caf170dSHeiko Carstens 		rc = base_region3_walk(table, addr, end, 1);
5042f882800SHeiko Carstens 		asce = __pa(table) | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
5051caf170dSHeiko Carstens 	} else if (end <= _REGION1_SIZE) {
5061caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
5071caf170dSHeiko Carstens 		if (!table)
5081caf170dSHeiko Carstens 			return 0;
5091caf170dSHeiko Carstens 		rc = base_region2_walk(table, addr, end, 1);
5102f882800SHeiko Carstens 		asce = __pa(table) | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
5111caf170dSHeiko Carstens 	} else {
5121caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
5131caf170dSHeiko Carstens 		if (!table)
5141caf170dSHeiko Carstens 			return 0;
5151caf170dSHeiko Carstens 		rc = base_region1_walk(table, addr, end, 1);
5162f882800SHeiko Carstens 		asce = __pa(table) | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
5171caf170dSHeiko Carstens 	}
5181caf170dSHeiko Carstens 	if (rc) {
5191caf170dSHeiko Carstens 		base_asce_free(asce);
5201caf170dSHeiko Carstens 		asce = 0;
5211caf170dSHeiko Carstens 	}
5221caf170dSHeiko Carstens 	return asce;
5231caf170dSHeiko Carstens }
524