xref: /linux/arch/s390/mm/pgalloc.c (revision 620b4e903179d58342503fa09d9c680d93bf7db8)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21e133ab2SMartin Schwidefsky /*
31e133ab2SMartin Schwidefsky  *  Page table allocation functions
41e133ab2SMartin Schwidefsky  *
51e133ab2SMartin Schwidefsky  *    Copyright IBM Corp. 2016
61e133ab2SMartin Schwidefsky  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
71e133ab2SMartin Schwidefsky  */
81e133ab2SMartin Schwidefsky 
91e133ab2SMartin Schwidefsky #include <linux/sysctl.h>
101caf170dSHeiko Carstens #include <linux/slab.h>
111caf170dSHeiko Carstens #include <linux/mm.h>
121e133ab2SMartin Schwidefsky #include <asm/mmu_context.h>
131e133ab2SMartin Schwidefsky #include <asm/pgalloc.h>
141e133ab2SMartin Schwidefsky #include <asm/gmap.h>
151e133ab2SMartin Schwidefsky #include <asm/tlb.h>
161e133ab2SMartin Schwidefsky #include <asm/tlbflush.h>
171e133ab2SMartin Schwidefsky 
181e133ab2SMartin Schwidefsky #ifdef CONFIG_PGSTE
191e133ab2SMartin Schwidefsky 
201e133ab2SMartin Schwidefsky static int page_table_allocate_pgste_min = 0;
211e133ab2SMartin Schwidefsky static int page_table_allocate_pgste_max = 1;
221e133ab2SMartin Schwidefsky int page_table_allocate_pgste = 0;
231e133ab2SMartin Schwidefsky EXPORT_SYMBOL(page_table_allocate_pgste);
241e133ab2SMartin Schwidefsky 
251e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl[] = {
261e133ab2SMartin Schwidefsky 	{
271e133ab2SMartin Schwidefsky 		.procname	= "allocate_pgste",
281e133ab2SMartin Schwidefsky 		.data		= &page_table_allocate_pgste,
291e133ab2SMartin Schwidefsky 		.maxlen		= sizeof(int),
301e133ab2SMartin Schwidefsky 		.mode		= S_IRUGO | S_IWUSR,
311e133ab2SMartin Schwidefsky 		.proc_handler	= proc_dointvec,
321e133ab2SMartin Schwidefsky 		.extra1		= &page_table_allocate_pgste_min,
331e133ab2SMartin Schwidefsky 		.extra2		= &page_table_allocate_pgste_max,
341e133ab2SMartin Schwidefsky 	},
351e133ab2SMartin Schwidefsky 	{ }
361e133ab2SMartin Schwidefsky };
371e133ab2SMartin Schwidefsky 
381e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl_dir[] = {
391e133ab2SMartin Schwidefsky 	{
401e133ab2SMartin Schwidefsky 		.procname	= "vm",
411e133ab2SMartin Schwidefsky 		.maxlen		= 0,
421e133ab2SMartin Schwidefsky 		.mode		= 0555,
431e133ab2SMartin Schwidefsky 		.child		= page_table_sysctl,
441e133ab2SMartin Schwidefsky 	},
451e133ab2SMartin Schwidefsky 	{ }
461e133ab2SMartin Schwidefsky };
471e133ab2SMartin Schwidefsky 
481e133ab2SMartin Schwidefsky static int __init page_table_register_sysctl(void)
491e133ab2SMartin Schwidefsky {
501e133ab2SMartin Schwidefsky 	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
511e133ab2SMartin Schwidefsky }
521e133ab2SMartin Schwidefsky __initcall(page_table_register_sysctl);
531e133ab2SMartin Schwidefsky 
541e133ab2SMartin Schwidefsky #endif /* CONFIG_PGSTE */
551e133ab2SMartin Schwidefsky 
561e133ab2SMartin Schwidefsky unsigned long *crst_table_alloc(struct mm_struct *mm)
571e133ab2SMartin Schwidefsky {
581e133ab2SMartin Schwidefsky 	struct page *page = alloc_pages(GFP_KERNEL, 2);
591e133ab2SMartin Schwidefsky 
601e133ab2SMartin Schwidefsky 	if (!page)
611e133ab2SMartin Schwidefsky 		return NULL;
62c9b5ad54SMartin Schwidefsky 	arch_set_page_dat(page, 2);
631e133ab2SMartin Schwidefsky 	return (unsigned long *) page_to_phys(page);
641e133ab2SMartin Schwidefsky }
651e133ab2SMartin Schwidefsky 
661e133ab2SMartin Schwidefsky void crst_table_free(struct mm_struct *mm, unsigned long *table)
671e133ab2SMartin Schwidefsky {
681e133ab2SMartin Schwidefsky 	free_pages((unsigned long) table, 2);
691e133ab2SMartin Schwidefsky }
701e133ab2SMartin Schwidefsky 
711e133ab2SMartin Schwidefsky static void __crst_table_upgrade(void *arg)
721e133ab2SMartin Schwidefsky {
731e133ab2SMartin Schwidefsky 	struct mm_struct *mm = arg;
741e133ab2SMartin Schwidefsky 
750aaba41bSMartin Schwidefsky 	if (current->active_mm == mm)
761e133ab2SMartin Schwidefsky 		set_user_asce(mm);
771e133ab2SMartin Schwidefsky 	__tlb_flush_local();
781e133ab2SMartin Schwidefsky }
791e133ab2SMartin Schwidefsky 
801aea9b3fSMartin Schwidefsky int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
811e133ab2SMartin Schwidefsky {
821e133ab2SMartin Schwidefsky 	unsigned long *table, *pgd;
831aea9b3fSMartin Schwidefsky 	int rc, notify;
841e133ab2SMartin Schwidefsky 
851aea9b3fSMartin Schwidefsky 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
862fc4876eSMartin Schwidefsky 	VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
871aea9b3fSMartin Schwidefsky 	rc = 0;
881aea9b3fSMartin Schwidefsky 	notify = 0;
891aea9b3fSMartin Schwidefsky 	while (mm->context.asce_limit < end) {
901aea9b3fSMartin Schwidefsky 		table = crst_table_alloc(mm);
911aea9b3fSMartin Schwidefsky 		if (!table) {
921aea9b3fSMartin Schwidefsky 			rc = -ENOMEM;
931aea9b3fSMartin Schwidefsky 			break;
941aea9b3fSMartin Schwidefsky 		}
951e133ab2SMartin Schwidefsky 		spin_lock_bh(&mm->page_table_lock);
961e133ab2SMartin Schwidefsky 		pgd = (unsigned long *) mm->pgd;
97f1c1174fSHeiko Carstens 		if (mm->context.asce_limit == _REGION2_SIZE) {
98723cacbdSGerald Schaefer 			crst_table_init(table, _REGION2_ENTRY_EMPTY);
991aea9b3fSMartin Schwidefsky 			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
1001e133ab2SMartin Schwidefsky 			mm->pgd = (pgd_t *) table;
101f1c1174fSHeiko Carstens 			mm->context.asce_limit = _REGION1_SIZE;
102723cacbdSGerald Schaefer 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
103723cacbdSGerald Schaefer 				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
1041aea9b3fSMartin Schwidefsky 		} else {
1051aea9b3fSMartin Schwidefsky 			crst_table_init(table, _REGION1_ENTRY_EMPTY);
1061aea9b3fSMartin Schwidefsky 			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
1071aea9b3fSMartin Schwidefsky 			mm->pgd = (pgd_t *) table;
1081aea9b3fSMartin Schwidefsky 			mm->context.asce_limit = -PAGE_SIZE;
1091aea9b3fSMartin Schwidefsky 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
1101aea9b3fSMartin Schwidefsky 				_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
1111aea9b3fSMartin Schwidefsky 		}
1121aea9b3fSMartin Schwidefsky 		notify = 1;
1131e133ab2SMartin Schwidefsky 		spin_unlock_bh(&mm->page_table_lock);
1141aea9b3fSMartin Schwidefsky 	}
1151aea9b3fSMartin Schwidefsky 	if (notify)
1161e133ab2SMartin Schwidefsky 		on_each_cpu(__crst_table_upgrade, mm, 0);
1171aea9b3fSMartin Schwidefsky 	return rc;
1181e133ab2SMartin Schwidefsky }
1191e133ab2SMartin Schwidefsky 
120723cacbdSGerald Schaefer void crst_table_downgrade(struct mm_struct *mm)
1211e133ab2SMartin Schwidefsky {
1221e133ab2SMartin Schwidefsky 	pgd_t *pgd;
1231e133ab2SMartin Schwidefsky 
124723cacbdSGerald Schaefer 	/* downgrade should only happen from 3 to 2 levels (compat only) */
1252fc4876eSMartin Schwidefsky 	VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
126723cacbdSGerald Schaefer 
1271e133ab2SMartin Schwidefsky 	if (current->active_mm == mm) {
1281e133ab2SMartin Schwidefsky 		clear_user_asce();
1291e133ab2SMartin Schwidefsky 		__tlb_flush_mm(mm);
1301e133ab2SMartin Schwidefsky 	}
131723cacbdSGerald Schaefer 
1321e133ab2SMartin Schwidefsky 	pgd = mm->pgd;
1331e133ab2SMartin Schwidefsky 	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
134f1c1174fSHeiko Carstens 	mm->context.asce_limit = _REGION3_SIZE;
135723cacbdSGerald Schaefer 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
136723cacbdSGerald Schaefer 			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
1371e133ab2SMartin Schwidefsky 	crst_table_free(mm, (unsigned long *) pgd);
138723cacbdSGerald Schaefer 
1391e133ab2SMartin Schwidefsky 	if (current->active_mm == mm)
1401e133ab2SMartin Schwidefsky 		set_user_asce(mm);
1411e133ab2SMartin Schwidefsky }
1421e133ab2SMartin Schwidefsky 
1431e133ab2SMartin Schwidefsky static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
1441e133ab2SMartin Schwidefsky {
1451e133ab2SMartin Schwidefsky 	unsigned int old, new;
1461e133ab2SMartin Schwidefsky 
1471e133ab2SMartin Schwidefsky 	do {
1481e133ab2SMartin Schwidefsky 		old = atomic_read(v);
1491e133ab2SMartin Schwidefsky 		new = old ^ bits;
1501e133ab2SMartin Schwidefsky 	} while (atomic_cmpxchg(v, old, new) != old);
1511e133ab2SMartin Schwidefsky 	return new;
1521e133ab2SMartin Schwidefsky }
1531e133ab2SMartin Schwidefsky 
1544be130a0SMartin Schwidefsky #ifdef CONFIG_PGSTE
1554be130a0SMartin Schwidefsky 
1564be130a0SMartin Schwidefsky struct page *page_table_alloc_pgste(struct mm_struct *mm)
1574be130a0SMartin Schwidefsky {
1584be130a0SMartin Schwidefsky 	struct page *page;
15941879ff6SHeiko Carstens 	u64 *table;
1604be130a0SMartin Schwidefsky 
161faee35a5SMichal Hocko 	page = alloc_page(GFP_KERNEL);
1624be130a0SMartin Schwidefsky 	if (page) {
16341879ff6SHeiko Carstens 		table = (u64 *)page_to_phys(page);
16441879ff6SHeiko Carstens 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
16541879ff6SHeiko Carstens 		memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
1664be130a0SMartin Schwidefsky 	}
1674be130a0SMartin Schwidefsky 	return page;
1684be130a0SMartin Schwidefsky }
1694be130a0SMartin Schwidefsky 
1704be130a0SMartin Schwidefsky void page_table_free_pgste(struct page *page)
1714be130a0SMartin Schwidefsky {
1724be130a0SMartin Schwidefsky 	__free_page(page);
1734be130a0SMartin Schwidefsky }
1744be130a0SMartin Schwidefsky 
1754be130a0SMartin Schwidefsky #endif /* CONFIG_PGSTE */
1764be130a0SMartin Schwidefsky 
1771e133ab2SMartin Schwidefsky /*
1781e133ab2SMartin Schwidefsky  * page table entry allocation/free routines.
1791e133ab2SMartin Schwidefsky  */
1801e133ab2SMartin Schwidefsky unsigned long *page_table_alloc(struct mm_struct *mm)
1811e133ab2SMartin Schwidefsky {
1821e133ab2SMartin Schwidefsky 	unsigned long *table;
1831e133ab2SMartin Schwidefsky 	struct page *page;
1841e133ab2SMartin Schwidefsky 	unsigned int mask, bit;
1851e133ab2SMartin Schwidefsky 
1861e133ab2SMartin Schwidefsky 	/* Try to get a fragment of a 4K page as a 2K page table */
1871e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm)) {
1881e133ab2SMartin Schwidefsky 		table = NULL;
189f28a4b4dSMartin Schwidefsky 		spin_lock_bh(&mm->context.lock);
1901e133ab2SMartin Schwidefsky 		if (!list_empty(&mm->context.pgtable_list)) {
1911e133ab2SMartin Schwidefsky 			page = list_first_entry(&mm->context.pgtable_list,
1921e133ab2SMartin Schwidefsky 						struct page, lru);
193*620b4e90SMatthew Wilcox 			mask = atomic_read(&page->_refcount) >> 24;
1941e133ab2SMartin Schwidefsky 			mask = (mask | (mask >> 4)) & 3;
1951e133ab2SMartin Schwidefsky 			if (mask != 3) {
1961e133ab2SMartin Schwidefsky 				table = (unsigned long *) page_to_phys(page);
1971e133ab2SMartin Schwidefsky 				bit = mask & 1;		/* =1 -> second 2K */
1981e133ab2SMartin Schwidefsky 				if (bit)
1991e133ab2SMartin Schwidefsky 					table += PTRS_PER_PTE;
200*620b4e90SMatthew Wilcox 				atomic_xor_bits(&page->_refcount,
201*620b4e90SMatthew Wilcox 							1U << (bit + 24));
2021e133ab2SMartin Schwidefsky 				list_del(&page->lru);
2031e133ab2SMartin Schwidefsky 			}
2041e133ab2SMartin Schwidefsky 		}
205f28a4b4dSMartin Schwidefsky 		spin_unlock_bh(&mm->context.lock);
2061e133ab2SMartin Schwidefsky 		if (table)
2071e133ab2SMartin Schwidefsky 			return table;
2081e133ab2SMartin Schwidefsky 	}
2091e133ab2SMartin Schwidefsky 	/* Allocate a fresh page */
21010d58bf2SMichal Hocko 	page = alloc_page(GFP_KERNEL);
2111e133ab2SMartin Schwidefsky 	if (!page)
2121e133ab2SMartin Schwidefsky 		return NULL;
2131e133ab2SMartin Schwidefsky 	if (!pgtable_page_ctor(page)) {
2141e133ab2SMartin Schwidefsky 		__free_page(page);
2151e133ab2SMartin Schwidefsky 		return NULL;
2161e133ab2SMartin Schwidefsky 	}
217c9b5ad54SMartin Schwidefsky 	arch_set_page_dat(page, 0);
2181e133ab2SMartin Schwidefsky 	/* Initialize page table */
2191e133ab2SMartin Schwidefsky 	table = (unsigned long *) page_to_phys(page);
2201e133ab2SMartin Schwidefsky 	if (mm_alloc_pgste(mm)) {
2211e133ab2SMartin Schwidefsky 		/* Return 4K page table with PGSTEs */
222*620b4e90SMatthew Wilcox 		atomic_xor_bits(&page->_refcount, 3 << 24);
22341879ff6SHeiko Carstens 		memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
22441879ff6SHeiko Carstens 		memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
2251e133ab2SMartin Schwidefsky 	} else {
2261e133ab2SMartin Schwidefsky 		/* Return the first 2K fragment of the page */
227*620b4e90SMatthew Wilcox 		atomic_xor_bits(&page->_refcount, 1 << 24);
22841879ff6SHeiko Carstens 		memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
229f28a4b4dSMartin Schwidefsky 		spin_lock_bh(&mm->context.lock);
2301e133ab2SMartin Schwidefsky 		list_add(&page->lru, &mm->context.pgtable_list);
231f28a4b4dSMartin Schwidefsky 		spin_unlock_bh(&mm->context.lock);
2321e133ab2SMartin Schwidefsky 	}
2331e133ab2SMartin Schwidefsky 	return table;
2341e133ab2SMartin Schwidefsky }
2351e133ab2SMartin Schwidefsky 
2361e133ab2SMartin Schwidefsky void page_table_free(struct mm_struct *mm, unsigned long *table)
2371e133ab2SMartin Schwidefsky {
2381e133ab2SMartin Schwidefsky 	struct page *page;
2391e133ab2SMartin Schwidefsky 	unsigned int bit, mask;
2401e133ab2SMartin Schwidefsky 
2411e133ab2SMartin Schwidefsky 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
2421e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm)) {
2431e133ab2SMartin Schwidefsky 		/* Free 2K page table fragment of a 4K page */
2441e133ab2SMartin Schwidefsky 		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
245f28a4b4dSMartin Schwidefsky 		spin_lock_bh(&mm->context.lock);
246*620b4e90SMatthew Wilcox 		mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
247*620b4e90SMatthew Wilcox 		mask >>= 24;
2481e133ab2SMartin Schwidefsky 		if (mask & 3)
2491e133ab2SMartin Schwidefsky 			list_add(&page->lru, &mm->context.pgtable_list);
2501e133ab2SMartin Schwidefsky 		else
2511e133ab2SMartin Schwidefsky 			list_del(&page->lru);
252f28a4b4dSMartin Schwidefsky 		spin_unlock_bh(&mm->context.lock);
2531e133ab2SMartin Schwidefsky 		if (mask != 0)
2541e133ab2SMartin Schwidefsky 			return;
2551e133ab2SMartin Schwidefsky 	}
2561e133ab2SMartin Schwidefsky 
2571e133ab2SMartin Schwidefsky 	pgtable_page_dtor(page);
2581e133ab2SMartin Schwidefsky 	__free_page(page);
2591e133ab2SMartin Schwidefsky }
2601e133ab2SMartin Schwidefsky 
2611e133ab2SMartin Schwidefsky void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
2621e133ab2SMartin Schwidefsky 			 unsigned long vmaddr)
2631e133ab2SMartin Schwidefsky {
2641e133ab2SMartin Schwidefsky 	struct mm_struct *mm;
2651e133ab2SMartin Schwidefsky 	struct page *page;
2661e133ab2SMartin Schwidefsky 	unsigned int bit, mask;
2671e133ab2SMartin Schwidefsky 
2681e133ab2SMartin Schwidefsky 	mm = tlb->mm;
2691e133ab2SMartin Schwidefsky 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
2701e133ab2SMartin Schwidefsky 	if (mm_alloc_pgste(mm)) {
2711e133ab2SMartin Schwidefsky 		gmap_unlink(mm, table, vmaddr);
2721e133ab2SMartin Schwidefsky 		table = (unsigned long *) (__pa(table) | 3);
2731e133ab2SMartin Schwidefsky 		tlb_remove_table(tlb, table);
2741e133ab2SMartin Schwidefsky 		return;
2751e133ab2SMartin Schwidefsky 	}
2761e133ab2SMartin Schwidefsky 	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
277f28a4b4dSMartin Schwidefsky 	spin_lock_bh(&mm->context.lock);
278*620b4e90SMatthew Wilcox 	mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
279*620b4e90SMatthew Wilcox 	mask >>= 24;
2801e133ab2SMartin Schwidefsky 	if (mask & 3)
2811e133ab2SMartin Schwidefsky 		list_add_tail(&page->lru, &mm->context.pgtable_list);
2821e133ab2SMartin Schwidefsky 	else
2831e133ab2SMartin Schwidefsky 		list_del(&page->lru);
284f28a4b4dSMartin Schwidefsky 	spin_unlock_bh(&mm->context.lock);
2851e133ab2SMartin Schwidefsky 	table = (unsigned long *) (__pa(table) | (1U << bit));
2861e133ab2SMartin Schwidefsky 	tlb_remove_table(tlb, table);
2871e133ab2SMartin Schwidefsky }
2881e133ab2SMartin Schwidefsky 
2891e133ab2SMartin Schwidefsky static void __tlb_remove_table(void *_table)
2901e133ab2SMartin Schwidefsky {
2911e133ab2SMartin Schwidefsky 	unsigned int mask = (unsigned long) _table & 3;
2921e133ab2SMartin Schwidefsky 	void *table = (void *)((unsigned long) _table ^ mask);
2931e133ab2SMartin Schwidefsky 	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
2941e133ab2SMartin Schwidefsky 
2951e133ab2SMartin Schwidefsky 	switch (mask) {
2961aea9b3fSMartin Schwidefsky 	case 0:		/* pmd, pud, or p4d */
2971e133ab2SMartin Schwidefsky 		free_pages((unsigned long) table, 2);
2981e133ab2SMartin Schwidefsky 		break;
2991e133ab2SMartin Schwidefsky 	case 1:		/* lower 2K of a 4K page table */
3001e133ab2SMartin Schwidefsky 	case 2:		/* higher 2K of a 4K page table */
301*620b4e90SMatthew Wilcox 		mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
302*620b4e90SMatthew Wilcox 		mask >>= 24;
303*620b4e90SMatthew Wilcox 		if (mask != 0)
3041e133ab2SMartin Schwidefsky 			break;
3051e133ab2SMartin Schwidefsky 		/* fallthrough */
3061e133ab2SMartin Schwidefsky 	case 3:		/* 4K page table with pgstes */
3071e133ab2SMartin Schwidefsky 		pgtable_page_dtor(page);
3081e133ab2SMartin Schwidefsky 		__free_page(page);
3091e133ab2SMartin Schwidefsky 		break;
3101e133ab2SMartin Schwidefsky 	}
3111e133ab2SMartin Schwidefsky }
3121e133ab2SMartin Schwidefsky 
3131e133ab2SMartin Schwidefsky static void tlb_remove_table_smp_sync(void *arg)
3141e133ab2SMartin Schwidefsky {
3151e133ab2SMartin Schwidefsky 	/* Simply deliver the interrupt */
3161e133ab2SMartin Schwidefsky }
3171e133ab2SMartin Schwidefsky 
3181e133ab2SMartin Schwidefsky static void tlb_remove_table_one(void *table)
3191e133ab2SMartin Schwidefsky {
3201e133ab2SMartin Schwidefsky 	/*
3211e133ab2SMartin Schwidefsky 	 * This isn't an RCU grace period and hence the page-tables cannot be
3221e133ab2SMartin Schwidefsky 	 * assumed to be actually RCU-freed.
3231e133ab2SMartin Schwidefsky 	 *
3241e133ab2SMartin Schwidefsky 	 * It is however sufficient for software page-table walkers that rely
3251e133ab2SMartin Schwidefsky 	 * on IRQ disabling. See the comment near struct mmu_table_batch.
3261e133ab2SMartin Schwidefsky 	 */
3271e133ab2SMartin Schwidefsky 	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
3281e133ab2SMartin Schwidefsky 	__tlb_remove_table(table);
3291e133ab2SMartin Schwidefsky }
3301e133ab2SMartin Schwidefsky 
3311e133ab2SMartin Schwidefsky static void tlb_remove_table_rcu(struct rcu_head *head)
3321e133ab2SMartin Schwidefsky {
3331e133ab2SMartin Schwidefsky 	struct mmu_table_batch *batch;
3341e133ab2SMartin Schwidefsky 	int i;
3351e133ab2SMartin Schwidefsky 
3361e133ab2SMartin Schwidefsky 	batch = container_of(head, struct mmu_table_batch, rcu);
3371e133ab2SMartin Schwidefsky 
3381e133ab2SMartin Schwidefsky 	for (i = 0; i < batch->nr; i++)
3391e133ab2SMartin Schwidefsky 		__tlb_remove_table(batch->tables[i]);
3401e133ab2SMartin Schwidefsky 
3411e133ab2SMartin Schwidefsky 	free_page((unsigned long)batch);
3421e133ab2SMartin Schwidefsky }
3431e133ab2SMartin Schwidefsky 
3441e133ab2SMartin Schwidefsky void tlb_table_flush(struct mmu_gather *tlb)
3451e133ab2SMartin Schwidefsky {
3461e133ab2SMartin Schwidefsky 	struct mmu_table_batch **batch = &tlb->batch;
3471e133ab2SMartin Schwidefsky 
3481e133ab2SMartin Schwidefsky 	if (*batch) {
3491e133ab2SMartin Schwidefsky 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
3501e133ab2SMartin Schwidefsky 		*batch = NULL;
3511e133ab2SMartin Schwidefsky 	}
3521e133ab2SMartin Schwidefsky }
3531e133ab2SMartin Schwidefsky 
3541e133ab2SMartin Schwidefsky void tlb_remove_table(struct mmu_gather *tlb, void *table)
3551e133ab2SMartin Schwidefsky {
3561e133ab2SMartin Schwidefsky 	struct mmu_table_batch **batch = &tlb->batch;
3571e133ab2SMartin Schwidefsky 
3581e133ab2SMartin Schwidefsky 	tlb->mm->context.flush_mm = 1;
3591e133ab2SMartin Schwidefsky 	if (*batch == NULL) {
3601e133ab2SMartin Schwidefsky 		*batch = (struct mmu_table_batch *)
3611e133ab2SMartin Schwidefsky 			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
3621e133ab2SMartin Schwidefsky 		if (*batch == NULL) {
3631e133ab2SMartin Schwidefsky 			__tlb_flush_mm_lazy(tlb->mm);
3641e133ab2SMartin Schwidefsky 			tlb_remove_table_one(table);
3651e133ab2SMartin Schwidefsky 			return;
3661e133ab2SMartin Schwidefsky 		}
3671e133ab2SMartin Schwidefsky 		(*batch)->nr = 0;
3681e133ab2SMartin Schwidefsky 	}
3691e133ab2SMartin Schwidefsky 	(*batch)->tables[(*batch)->nr++] = table;
3701e133ab2SMartin Schwidefsky 	if ((*batch)->nr == MAX_TABLE_BATCH)
3711e133ab2SMartin Schwidefsky 		tlb_flush_mmu(tlb);
3721e133ab2SMartin Schwidefsky }
3731caf170dSHeiko Carstens 
3741caf170dSHeiko Carstens /*
3751caf170dSHeiko Carstens  * Base infrastructure required to generate basic asces, region, segment,
3761caf170dSHeiko Carstens  * and page tables that do not make use of enhanced features like EDAT1.
3771caf170dSHeiko Carstens  */
3781caf170dSHeiko Carstens 
3791caf170dSHeiko Carstens static struct kmem_cache *base_pgt_cache;
3801caf170dSHeiko Carstens 
3811caf170dSHeiko Carstens static unsigned long base_pgt_alloc(void)
3821caf170dSHeiko Carstens {
3831caf170dSHeiko Carstens 	u64 *table;
3841caf170dSHeiko Carstens 
3851caf170dSHeiko Carstens 	table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
3861caf170dSHeiko Carstens 	if (table)
3871caf170dSHeiko Carstens 		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
3881caf170dSHeiko Carstens 	return (unsigned long) table;
3891caf170dSHeiko Carstens }
3901caf170dSHeiko Carstens 
3911caf170dSHeiko Carstens static void base_pgt_free(unsigned long table)
3921caf170dSHeiko Carstens {
3931caf170dSHeiko Carstens 	kmem_cache_free(base_pgt_cache, (void *) table);
3941caf170dSHeiko Carstens }
3951caf170dSHeiko Carstens 
3961caf170dSHeiko Carstens static unsigned long base_crst_alloc(unsigned long val)
3971caf170dSHeiko Carstens {
3981caf170dSHeiko Carstens 	unsigned long table;
3991caf170dSHeiko Carstens 
4001caf170dSHeiko Carstens 	table =	 __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
4011caf170dSHeiko Carstens 	if (table)
4021caf170dSHeiko Carstens 		crst_table_init((unsigned long *)table, val);
4031caf170dSHeiko Carstens 	return table;
4041caf170dSHeiko Carstens }
4051caf170dSHeiko Carstens 
4061caf170dSHeiko Carstens static void base_crst_free(unsigned long table)
4071caf170dSHeiko Carstens {
4081caf170dSHeiko Carstens 	free_pages(table, CRST_ALLOC_ORDER);
4091caf170dSHeiko Carstens }
4101caf170dSHeiko Carstens 
4111caf170dSHeiko Carstens #define BASE_ADDR_END_FUNC(NAME, SIZE)					\
4121caf170dSHeiko Carstens static inline unsigned long base_##NAME##_addr_end(unsigned long addr,	\
4131caf170dSHeiko Carstens 						   unsigned long end)	\
4141caf170dSHeiko Carstens {									\
4151caf170dSHeiko Carstens 	unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1);		\
4161caf170dSHeiko Carstens 									\
4171caf170dSHeiko Carstens 	return (next - 1) < (end - 1) ? next : end;			\
4181caf170dSHeiko Carstens }
4191caf170dSHeiko Carstens 
4201caf170dSHeiko Carstens BASE_ADDR_END_FUNC(page,    _PAGE_SIZE)
4211caf170dSHeiko Carstens BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
4221caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
4231caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
4241caf170dSHeiko Carstens BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
4251caf170dSHeiko Carstens 
4261caf170dSHeiko Carstens static inline unsigned long base_lra(unsigned long address)
4271caf170dSHeiko Carstens {
4281caf170dSHeiko Carstens 	unsigned long real;
4291caf170dSHeiko Carstens 
4301caf170dSHeiko Carstens 	asm volatile(
4311caf170dSHeiko Carstens 		"	lra	%0,0(%1)\n"
4321caf170dSHeiko Carstens 		: "=d" (real) : "a" (address) : "cc");
4331caf170dSHeiko Carstens 	return real;
4341caf170dSHeiko Carstens }
4351caf170dSHeiko Carstens 
4361caf170dSHeiko Carstens static int base_page_walk(unsigned long origin, unsigned long addr,
4371caf170dSHeiko Carstens 			  unsigned long end, int alloc)
4381caf170dSHeiko Carstens {
4391caf170dSHeiko Carstens 	unsigned long *pte, next;
4401caf170dSHeiko Carstens 
4411caf170dSHeiko Carstens 	if (!alloc)
4421caf170dSHeiko Carstens 		return 0;
4431caf170dSHeiko Carstens 	pte = (unsigned long *) origin;
4441caf170dSHeiko Carstens 	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
4451caf170dSHeiko Carstens 	do {
4461caf170dSHeiko Carstens 		next = base_page_addr_end(addr, end);
4471caf170dSHeiko Carstens 		*pte = base_lra(addr);
4481caf170dSHeiko Carstens 	} while (pte++, addr = next, addr < end);
4491caf170dSHeiko Carstens 	return 0;
4501caf170dSHeiko Carstens }
4511caf170dSHeiko Carstens 
4521caf170dSHeiko Carstens static int base_segment_walk(unsigned long origin, unsigned long addr,
4531caf170dSHeiko Carstens 			     unsigned long end, int alloc)
4541caf170dSHeiko Carstens {
4551caf170dSHeiko Carstens 	unsigned long *ste, next, table;
4561caf170dSHeiko Carstens 	int rc;
4571caf170dSHeiko Carstens 
4581caf170dSHeiko Carstens 	ste = (unsigned long *) origin;
4591caf170dSHeiko Carstens 	ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
4601caf170dSHeiko Carstens 	do {
4611caf170dSHeiko Carstens 		next = base_segment_addr_end(addr, end);
4621caf170dSHeiko Carstens 		if (*ste & _SEGMENT_ENTRY_INVALID) {
4631caf170dSHeiko Carstens 			if (!alloc)
4641caf170dSHeiko Carstens 				continue;
4651caf170dSHeiko Carstens 			table = base_pgt_alloc();
4661caf170dSHeiko Carstens 			if (!table)
4671caf170dSHeiko Carstens 				return -ENOMEM;
4681caf170dSHeiko Carstens 			*ste = table | _SEGMENT_ENTRY;
4691caf170dSHeiko Carstens 		}
4701caf170dSHeiko Carstens 		table = *ste & _SEGMENT_ENTRY_ORIGIN;
4711caf170dSHeiko Carstens 		rc = base_page_walk(table, addr, next, alloc);
4721caf170dSHeiko Carstens 		if (rc)
4731caf170dSHeiko Carstens 			return rc;
4741caf170dSHeiko Carstens 		if (!alloc)
4751caf170dSHeiko Carstens 			base_pgt_free(table);
4761caf170dSHeiko Carstens 		cond_resched();
4771caf170dSHeiko Carstens 	} while (ste++, addr = next, addr < end);
4781caf170dSHeiko Carstens 	return 0;
4791caf170dSHeiko Carstens }
4801caf170dSHeiko Carstens 
4811caf170dSHeiko Carstens static int base_region3_walk(unsigned long origin, unsigned long addr,
4821caf170dSHeiko Carstens 			     unsigned long end, int alloc)
4831caf170dSHeiko Carstens {
4841caf170dSHeiko Carstens 	unsigned long *rtte, next, table;
4851caf170dSHeiko Carstens 	int rc;
4861caf170dSHeiko Carstens 
4871caf170dSHeiko Carstens 	rtte = (unsigned long *) origin;
4881caf170dSHeiko Carstens 	rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
4891caf170dSHeiko Carstens 	do {
4901caf170dSHeiko Carstens 		next = base_region3_addr_end(addr, end);
4911caf170dSHeiko Carstens 		if (*rtte & _REGION_ENTRY_INVALID) {
4921caf170dSHeiko Carstens 			if (!alloc)
4931caf170dSHeiko Carstens 				continue;
4941caf170dSHeiko Carstens 			table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
4951caf170dSHeiko Carstens 			if (!table)
4961caf170dSHeiko Carstens 				return -ENOMEM;
4971caf170dSHeiko Carstens 			*rtte = table | _REGION3_ENTRY;
4981caf170dSHeiko Carstens 		}
4991caf170dSHeiko Carstens 		table = *rtte & _REGION_ENTRY_ORIGIN;
5001caf170dSHeiko Carstens 		rc = base_segment_walk(table, addr, next, alloc);
5011caf170dSHeiko Carstens 		if (rc)
5021caf170dSHeiko Carstens 			return rc;
5031caf170dSHeiko Carstens 		if (!alloc)
5041caf170dSHeiko Carstens 			base_crst_free(table);
5051caf170dSHeiko Carstens 	} while (rtte++, addr = next, addr < end);
5061caf170dSHeiko Carstens 	return 0;
5071caf170dSHeiko Carstens }
5081caf170dSHeiko Carstens 
5091caf170dSHeiko Carstens static int base_region2_walk(unsigned long origin, unsigned long addr,
5101caf170dSHeiko Carstens 			     unsigned long end, int alloc)
5111caf170dSHeiko Carstens {
5121caf170dSHeiko Carstens 	unsigned long *rste, next, table;
5131caf170dSHeiko Carstens 	int rc;
5141caf170dSHeiko Carstens 
5151caf170dSHeiko Carstens 	rste = (unsigned long *) origin;
5161caf170dSHeiko Carstens 	rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
5171caf170dSHeiko Carstens 	do {
5181caf170dSHeiko Carstens 		next = base_region2_addr_end(addr, end);
5191caf170dSHeiko Carstens 		if (*rste & _REGION_ENTRY_INVALID) {
5201caf170dSHeiko Carstens 			if (!alloc)
5211caf170dSHeiko Carstens 				continue;
5221caf170dSHeiko Carstens 			table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
5231caf170dSHeiko Carstens 			if (!table)
5241caf170dSHeiko Carstens 				return -ENOMEM;
5251caf170dSHeiko Carstens 			*rste = table | _REGION2_ENTRY;
5261caf170dSHeiko Carstens 		}
5271caf170dSHeiko Carstens 		table = *rste & _REGION_ENTRY_ORIGIN;
5281caf170dSHeiko Carstens 		rc = base_region3_walk(table, addr, next, alloc);
5291caf170dSHeiko Carstens 		if (rc)
5301caf170dSHeiko Carstens 			return rc;
5311caf170dSHeiko Carstens 		if (!alloc)
5321caf170dSHeiko Carstens 			base_crst_free(table);
5331caf170dSHeiko Carstens 	} while (rste++, addr = next, addr < end);
5341caf170dSHeiko Carstens 	return 0;
5351caf170dSHeiko Carstens }
5361caf170dSHeiko Carstens 
5371caf170dSHeiko Carstens static int base_region1_walk(unsigned long origin, unsigned long addr,
5381caf170dSHeiko Carstens 			     unsigned long end, int alloc)
5391caf170dSHeiko Carstens {
5401caf170dSHeiko Carstens 	unsigned long *rfte, next, table;
5411caf170dSHeiko Carstens 	int rc;
5421caf170dSHeiko Carstens 
5431caf170dSHeiko Carstens 	rfte = (unsigned long *) origin;
5441caf170dSHeiko Carstens 	rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
5451caf170dSHeiko Carstens 	do {
5461caf170dSHeiko Carstens 		next = base_region1_addr_end(addr, end);
5471caf170dSHeiko Carstens 		if (*rfte & _REGION_ENTRY_INVALID) {
5481caf170dSHeiko Carstens 			if (!alloc)
5491caf170dSHeiko Carstens 				continue;
5501caf170dSHeiko Carstens 			table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
5511caf170dSHeiko Carstens 			if (!table)
5521caf170dSHeiko Carstens 				return -ENOMEM;
5531caf170dSHeiko Carstens 			*rfte = table | _REGION1_ENTRY;
5541caf170dSHeiko Carstens 		}
5551caf170dSHeiko Carstens 		table = *rfte & _REGION_ENTRY_ORIGIN;
5561caf170dSHeiko Carstens 		rc = base_region2_walk(table, addr, next, alloc);
5571caf170dSHeiko Carstens 		if (rc)
5581caf170dSHeiko Carstens 			return rc;
5591caf170dSHeiko Carstens 		if (!alloc)
5601caf170dSHeiko Carstens 			base_crst_free(table);
5611caf170dSHeiko Carstens 	} while (rfte++, addr = next, addr < end);
5621caf170dSHeiko Carstens 	return 0;
5631caf170dSHeiko Carstens }
5641caf170dSHeiko Carstens 
5651caf170dSHeiko Carstens /**
5661caf170dSHeiko Carstens  * base_asce_free - free asce and tables returned from base_asce_alloc()
5671caf170dSHeiko Carstens  * @asce: asce to be freed
5681caf170dSHeiko Carstens  *
5691caf170dSHeiko Carstens  * Frees all region, segment, and page tables that were allocated with a
5701caf170dSHeiko Carstens  * corresponding base_asce_alloc() call.
5711caf170dSHeiko Carstens  */
5721caf170dSHeiko Carstens void base_asce_free(unsigned long asce)
5731caf170dSHeiko Carstens {
5741caf170dSHeiko Carstens 	unsigned long table = asce & _ASCE_ORIGIN;
5751caf170dSHeiko Carstens 
5761caf170dSHeiko Carstens 	if (!asce)
5771caf170dSHeiko Carstens 		return;
5781caf170dSHeiko Carstens 	switch (asce & _ASCE_TYPE_MASK) {
5791caf170dSHeiko Carstens 	case _ASCE_TYPE_SEGMENT:
5801caf170dSHeiko Carstens 		base_segment_walk(table, 0, _REGION3_SIZE, 0);
5811caf170dSHeiko Carstens 		break;
5821caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION3:
5831caf170dSHeiko Carstens 		base_region3_walk(table, 0, _REGION2_SIZE, 0);
5841caf170dSHeiko Carstens 		break;
5851caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION2:
5861caf170dSHeiko Carstens 		base_region2_walk(table, 0, _REGION1_SIZE, 0);
5871caf170dSHeiko Carstens 		break;
5881caf170dSHeiko Carstens 	case _ASCE_TYPE_REGION1:
5891caf170dSHeiko Carstens 		base_region1_walk(table, 0, -_PAGE_SIZE, 0);
5901caf170dSHeiko Carstens 		break;
5911caf170dSHeiko Carstens 	}
5921caf170dSHeiko Carstens 	base_crst_free(table);
5931caf170dSHeiko Carstens }
5941caf170dSHeiko Carstens 
5951caf170dSHeiko Carstens static int base_pgt_cache_init(void)
5961caf170dSHeiko Carstens {
5971caf170dSHeiko Carstens 	static DEFINE_MUTEX(base_pgt_cache_mutex);
5981caf170dSHeiko Carstens 	unsigned long sz = _PAGE_TABLE_SIZE;
5991caf170dSHeiko Carstens 
6001caf170dSHeiko Carstens 	if (base_pgt_cache)
6011caf170dSHeiko Carstens 		return 0;
6021caf170dSHeiko Carstens 	mutex_lock(&base_pgt_cache_mutex);
6031caf170dSHeiko Carstens 	if (!base_pgt_cache)
6041caf170dSHeiko Carstens 		base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
6051caf170dSHeiko Carstens 	mutex_unlock(&base_pgt_cache_mutex);
6061caf170dSHeiko Carstens 	return base_pgt_cache ? 0 : -ENOMEM;
6071caf170dSHeiko Carstens }
6081caf170dSHeiko Carstens 
6091caf170dSHeiko Carstens /**
6101caf170dSHeiko Carstens  * base_asce_alloc - create kernel mapping without enhanced DAT features
6111caf170dSHeiko Carstens  * @addr: virtual start address of kernel mapping
6121caf170dSHeiko Carstens  * @num_pages: number of consecutive pages
6131caf170dSHeiko Carstens  *
6141caf170dSHeiko Carstens  * Generate an asce, including all required region, segment and page tables,
6151caf170dSHeiko Carstens  * that can be used to access the virtual kernel mapping. The difference is
6161caf170dSHeiko Carstens  * that the returned asce does not make use of any enhanced DAT features like
6171caf170dSHeiko Carstens  * e.g. large pages. This is required for some I/O functions that pass an
6181caf170dSHeiko Carstens  * asce, like e.g. some service call requests.
6191caf170dSHeiko Carstens  *
6201caf170dSHeiko Carstens  * Note: the returned asce may NEVER be attached to any cpu. It may only be
6211caf170dSHeiko Carstens  *	 used for I/O requests. tlb entries that might result because the
6221caf170dSHeiko Carstens  *	 asce was attached to a cpu won't be cleared.
6231caf170dSHeiko Carstens  */
6241caf170dSHeiko Carstens unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
6251caf170dSHeiko Carstens {
6261caf170dSHeiko Carstens 	unsigned long asce, table, end;
6271caf170dSHeiko Carstens 	int rc;
6281caf170dSHeiko Carstens 
6291caf170dSHeiko Carstens 	if (base_pgt_cache_init())
6301caf170dSHeiko Carstens 		return 0;
6311caf170dSHeiko Carstens 	end = addr + num_pages * PAGE_SIZE;
6321caf170dSHeiko Carstens 	if (end <= _REGION3_SIZE) {
6331caf170dSHeiko Carstens 		table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
6341caf170dSHeiko Carstens 		if (!table)
6351caf170dSHeiko Carstens 			return 0;
6361caf170dSHeiko Carstens 		rc = base_segment_walk(table, addr, end, 1);
6371caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
6381caf170dSHeiko Carstens 	} else if (end <= _REGION2_SIZE) {
6391caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
6401caf170dSHeiko Carstens 		if (!table)
6411caf170dSHeiko Carstens 			return 0;
6421caf170dSHeiko Carstens 		rc = base_region3_walk(table, addr, end, 1);
6431caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
6441caf170dSHeiko Carstens 	} else if (end <= _REGION1_SIZE) {
6451caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
6461caf170dSHeiko Carstens 		if (!table)
6471caf170dSHeiko Carstens 			return 0;
6481caf170dSHeiko Carstens 		rc = base_region2_walk(table, addr, end, 1);
6491caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
6501caf170dSHeiko Carstens 	} else {
6511caf170dSHeiko Carstens 		table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
6521caf170dSHeiko Carstens 		if (!table)
6531caf170dSHeiko Carstens 			return 0;
6541caf170dSHeiko Carstens 		rc = base_region1_walk(table, addr, end, 1);
6551caf170dSHeiko Carstens 		asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
6561caf170dSHeiko Carstens 	}
6571caf170dSHeiko Carstens 	if (rc) {
6581caf170dSHeiko Carstens 		base_asce_free(asce);
6591caf170dSHeiko Carstens 		asce = 0;
6601caf170dSHeiko Carstens 	}
6611caf170dSHeiko Carstens 	return asce;
6621caf170dSHeiko Carstens }
663