xref: /linux/arch/s390/mm/pgalloc.c (revision 1e133ab296f3ff8d9e58a5e758291ed39ba72ad7)
1*1e133ab2SMartin Schwidefsky /*
2*1e133ab2SMartin Schwidefsky  *  Page table allocation functions
3*1e133ab2SMartin Schwidefsky  *
4*1e133ab2SMartin Schwidefsky  *    Copyright IBM Corp. 2016
5*1e133ab2SMartin Schwidefsky  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6*1e133ab2SMartin Schwidefsky  */
7*1e133ab2SMartin Schwidefsky 
8*1e133ab2SMartin Schwidefsky #include <linux/mm.h>
9*1e133ab2SMartin Schwidefsky #include <linux/sysctl.h>
10*1e133ab2SMartin Schwidefsky #include <asm/mmu_context.h>
11*1e133ab2SMartin Schwidefsky #include <asm/pgalloc.h>
12*1e133ab2SMartin Schwidefsky #include <asm/gmap.h>
13*1e133ab2SMartin Schwidefsky #include <asm/tlb.h>
14*1e133ab2SMartin Schwidefsky #include <asm/tlbflush.h>
15*1e133ab2SMartin Schwidefsky 
16*1e133ab2SMartin Schwidefsky #ifdef CONFIG_PGSTE
17*1e133ab2SMartin Schwidefsky 
18*1e133ab2SMartin Schwidefsky static int page_table_allocate_pgste_min = 0;
19*1e133ab2SMartin Schwidefsky static int page_table_allocate_pgste_max = 1;
20*1e133ab2SMartin Schwidefsky int page_table_allocate_pgste = 0;
21*1e133ab2SMartin Schwidefsky EXPORT_SYMBOL(page_table_allocate_pgste);
22*1e133ab2SMartin Schwidefsky 
23*1e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl[] = {
24*1e133ab2SMartin Schwidefsky 	{
25*1e133ab2SMartin Schwidefsky 		.procname	= "allocate_pgste",
26*1e133ab2SMartin Schwidefsky 		.data		= &page_table_allocate_pgste,
27*1e133ab2SMartin Schwidefsky 		.maxlen		= sizeof(int),
28*1e133ab2SMartin Schwidefsky 		.mode		= S_IRUGO | S_IWUSR,
29*1e133ab2SMartin Schwidefsky 		.proc_handler	= proc_dointvec,
30*1e133ab2SMartin Schwidefsky 		.extra1		= &page_table_allocate_pgste_min,
31*1e133ab2SMartin Schwidefsky 		.extra2		= &page_table_allocate_pgste_max,
32*1e133ab2SMartin Schwidefsky 	},
33*1e133ab2SMartin Schwidefsky 	{ }
34*1e133ab2SMartin Schwidefsky };
35*1e133ab2SMartin Schwidefsky 
36*1e133ab2SMartin Schwidefsky static struct ctl_table page_table_sysctl_dir[] = {
37*1e133ab2SMartin Schwidefsky 	{
38*1e133ab2SMartin Schwidefsky 		.procname	= "vm",
39*1e133ab2SMartin Schwidefsky 		.maxlen		= 0,
40*1e133ab2SMartin Schwidefsky 		.mode		= 0555,
41*1e133ab2SMartin Schwidefsky 		.child		= page_table_sysctl,
42*1e133ab2SMartin Schwidefsky 	},
43*1e133ab2SMartin Schwidefsky 	{ }
44*1e133ab2SMartin Schwidefsky };
45*1e133ab2SMartin Schwidefsky 
46*1e133ab2SMartin Schwidefsky static int __init page_table_register_sysctl(void)
47*1e133ab2SMartin Schwidefsky {
48*1e133ab2SMartin Schwidefsky 	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
49*1e133ab2SMartin Schwidefsky }
50*1e133ab2SMartin Schwidefsky __initcall(page_table_register_sysctl);
51*1e133ab2SMartin Schwidefsky 
52*1e133ab2SMartin Schwidefsky #endif /* CONFIG_PGSTE */
53*1e133ab2SMartin Schwidefsky 
54*1e133ab2SMartin Schwidefsky unsigned long *crst_table_alloc(struct mm_struct *mm)
55*1e133ab2SMartin Schwidefsky {
56*1e133ab2SMartin Schwidefsky 	struct page *page = alloc_pages(GFP_KERNEL, 2);
57*1e133ab2SMartin Schwidefsky 
58*1e133ab2SMartin Schwidefsky 	if (!page)
59*1e133ab2SMartin Schwidefsky 		return NULL;
60*1e133ab2SMartin Schwidefsky 	return (unsigned long *) page_to_phys(page);
61*1e133ab2SMartin Schwidefsky }
62*1e133ab2SMartin Schwidefsky 
63*1e133ab2SMartin Schwidefsky void crst_table_free(struct mm_struct *mm, unsigned long *table)
64*1e133ab2SMartin Schwidefsky {
65*1e133ab2SMartin Schwidefsky 	free_pages((unsigned long) table, 2);
66*1e133ab2SMartin Schwidefsky }
67*1e133ab2SMartin Schwidefsky 
68*1e133ab2SMartin Schwidefsky static void __crst_table_upgrade(void *arg)
69*1e133ab2SMartin Schwidefsky {
70*1e133ab2SMartin Schwidefsky 	struct mm_struct *mm = arg;
71*1e133ab2SMartin Schwidefsky 
72*1e133ab2SMartin Schwidefsky 	if (current->active_mm == mm) {
73*1e133ab2SMartin Schwidefsky 		clear_user_asce();
74*1e133ab2SMartin Schwidefsky 		set_user_asce(mm);
75*1e133ab2SMartin Schwidefsky 	}
76*1e133ab2SMartin Schwidefsky 	__tlb_flush_local();
77*1e133ab2SMartin Schwidefsky }
78*1e133ab2SMartin Schwidefsky 
79*1e133ab2SMartin Schwidefsky int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
80*1e133ab2SMartin Schwidefsky {
81*1e133ab2SMartin Schwidefsky 	unsigned long *table, *pgd;
82*1e133ab2SMartin Schwidefsky 	unsigned long entry;
83*1e133ab2SMartin Schwidefsky 	int flush;
84*1e133ab2SMartin Schwidefsky 
85*1e133ab2SMartin Schwidefsky 	BUG_ON(limit > TASK_MAX_SIZE);
86*1e133ab2SMartin Schwidefsky 	flush = 0;
87*1e133ab2SMartin Schwidefsky repeat:
88*1e133ab2SMartin Schwidefsky 	table = crst_table_alloc(mm);
89*1e133ab2SMartin Schwidefsky 	if (!table)
90*1e133ab2SMartin Schwidefsky 		return -ENOMEM;
91*1e133ab2SMartin Schwidefsky 	spin_lock_bh(&mm->page_table_lock);
92*1e133ab2SMartin Schwidefsky 	if (mm->context.asce_limit < limit) {
93*1e133ab2SMartin Schwidefsky 		pgd = (unsigned long *) mm->pgd;
94*1e133ab2SMartin Schwidefsky 		if (mm->context.asce_limit <= (1UL << 31)) {
95*1e133ab2SMartin Schwidefsky 			entry = _REGION3_ENTRY_EMPTY;
96*1e133ab2SMartin Schwidefsky 			mm->context.asce_limit = 1UL << 42;
97*1e133ab2SMartin Schwidefsky 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
98*1e133ab2SMartin Schwidefsky 						_ASCE_USER_BITS |
99*1e133ab2SMartin Schwidefsky 						_ASCE_TYPE_REGION3;
100*1e133ab2SMartin Schwidefsky 		} else {
101*1e133ab2SMartin Schwidefsky 			entry = _REGION2_ENTRY_EMPTY;
102*1e133ab2SMartin Schwidefsky 			mm->context.asce_limit = 1UL << 53;
103*1e133ab2SMartin Schwidefsky 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
104*1e133ab2SMartin Schwidefsky 						_ASCE_USER_BITS |
105*1e133ab2SMartin Schwidefsky 						_ASCE_TYPE_REGION2;
106*1e133ab2SMartin Schwidefsky 		}
107*1e133ab2SMartin Schwidefsky 		crst_table_init(table, entry);
108*1e133ab2SMartin Schwidefsky 		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
109*1e133ab2SMartin Schwidefsky 		mm->pgd = (pgd_t *) table;
110*1e133ab2SMartin Schwidefsky 		mm->task_size = mm->context.asce_limit;
111*1e133ab2SMartin Schwidefsky 		table = NULL;
112*1e133ab2SMartin Schwidefsky 		flush = 1;
113*1e133ab2SMartin Schwidefsky 	}
114*1e133ab2SMartin Schwidefsky 	spin_unlock_bh(&mm->page_table_lock);
115*1e133ab2SMartin Schwidefsky 	if (table)
116*1e133ab2SMartin Schwidefsky 		crst_table_free(mm, table);
117*1e133ab2SMartin Schwidefsky 	if (mm->context.asce_limit < limit)
118*1e133ab2SMartin Schwidefsky 		goto repeat;
119*1e133ab2SMartin Schwidefsky 	if (flush)
120*1e133ab2SMartin Schwidefsky 		on_each_cpu(__crst_table_upgrade, mm, 0);
121*1e133ab2SMartin Schwidefsky 	return 0;
122*1e133ab2SMartin Schwidefsky }
123*1e133ab2SMartin Schwidefsky 
124*1e133ab2SMartin Schwidefsky void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
125*1e133ab2SMartin Schwidefsky {
126*1e133ab2SMartin Schwidefsky 	pgd_t *pgd;
127*1e133ab2SMartin Schwidefsky 
128*1e133ab2SMartin Schwidefsky 	if (current->active_mm == mm) {
129*1e133ab2SMartin Schwidefsky 		clear_user_asce();
130*1e133ab2SMartin Schwidefsky 		__tlb_flush_mm(mm);
131*1e133ab2SMartin Schwidefsky 	}
132*1e133ab2SMartin Schwidefsky 	while (mm->context.asce_limit > limit) {
133*1e133ab2SMartin Schwidefsky 		pgd = mm->pgd;
134*1e133ab2SMartin Schwidefsky 		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
135*1e133ab2SMartin Schwidefsky 		case _REGION_ENTRY_TYPE_R2:
136*1e133ab2SMartin Schwidefsky 			mm->context.asce_limit = 1UL << 42;
137*1e133ab2SMartin Schwidefsky 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
138*1e133ab2SMartin Schwidefsky 						_ASCE_USER_BITS |
139*1e133ab2SMartin Schwidefsky 						_ASCE_TYPE_REGION3;
140*1e133ab2SMartin Schwidefsky 			break;
141*1e133ab2SMartin Schwidefsky 		case _REGION_ENTRY_TYPE_R3:
142*1e133ab2SMartin Schwidefsky 			mm->context.asce_limit = 1UL << 31;
143*1e133ab2SMartin Schwidefsky 			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
144*1e133ab2SMartin Schwidefsky 						_ASCE_USER_BITS |
145*1e133ab2SMartin Schwidefsky 						_ASCE_TYPE_SEGMENT;
146*1e133ab2SMartin Schwidefsky 			break;
147*1e133ab2SMartin Schwidefsky 		default:
148*1e133ab2SMartin Schwidefsky 			BUG();
149*1e133ab2SMartin Schwidefsky 		}
150*1e133ab2SMartin Schwidefsky 		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
151*1e133ab2SMartin Schwidefsky 		mm->task_size = mm->context.asce_limit;
152*1e133ab2SMartin Schwidefsky 		crst_table_free(mm, (unsigned long *) pgd);
153*1e133ab2SMartin Schwidefsky 	}
154*1e133ab2SMartin Schwidefsky 	if (current->active_mm == mm)
155*1e133ab2SMartin Schwidefsky 		set_user_asce(mm);
156*1e133ab2SMartin Schwidefsky }
157*1e133ab2SMartin Schwidefsky 
158*1e133ab2SMartin Schwidefsky static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
159*1e133ab2SMartin Schwidefsky {
160*1e133ab2SMartin Schwidefsky 	unsigned int old, new;
161*1e133ab2SMartin Schwidefsky 
162*1e133ab2SMartin Schwidefsky 	do {
163*1e133ab2SMartin Schwidefsky 		old = atomic_read(v);
164*1e133ab2SMartin Schwidefsky 		new = old ^ bits;
165*1e133ab2SMartin Schwidefsky 	} while (atomic_cmpxchg(v, old, new) != old);
166*1e133ab2SMartin Schwidefsky 	return new;
167*1e133ab2SMartin Schwidefsky }
168*1e133ab2SMartin Schwidefsky 
169*1e133ab2SMartin Schwidefsky /*
170*1e133ab2SMartin Schwidefsky  * page table entry allocation/free routines.
171*1e133ab2SMartin Schwidefsky  */
172*1e133ab2SMartin Schwidefsky unsigned long *page_table_alloc(struct mm_struct *mm)
173*1e133ab2SMartin Schwidefsky {
174*1e133ab2SMartin Schwidefsky 	unsigned long *table;
175*1e133ab2SMartin Schwidefsky 	struct page *page;
176*1e133ab2SMartin Schwidefsky 	unsigned int mask, bit;
177*1e133ab2SMartin Schwidefsky 
178*1e133ab2SMartin Schwidefsky 	/* Try to get a fragment of a 4K page as a 2K page table */
179*1e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm)) {
180*1e133ab2SMartin Schwidefsky 		table = NULL;
181*1e133ab2SMartin Schwidefsky 		spin_lock_bh(&mm->context.list_lock);
182*1e133ab2SMartin Schwidefsky 		if (!list_empty(&mm->context.pgtable_list)) {
183*1e133ab2SMartin Schwidefsky 			page = list_first_entry(&mm->context.pgtable_list,
184*1e133ab2SMartin Schwidefsky 						struct page, lru);
185*1e133ab2SMartin Schwidefsky 			mask = atomic_read(&page->_mapcount);
186*1e133ab2SMartin Schwidefsky 			mask = (mask | (mask >> 4)) & 3;
187*1e133ab2SMartin Schwidefsky 			if (mask != 3) {
188*1e133ab2SMartin Schwidefsky 				table = (unsigned long *) page_to_phys(page);
189*1e133ab2SMartin Schwidefsky 				bit = mask & 1;		/* =1 -> second 2K */
190*1e133ab2SMartin Schwidefsky 				if (bit)
191*1e133ab2SMartin Schwidefsky 					table += PTRS_PER_PTE;
192*1e133ab2SMartin Schwidefsky 				atomic_xor_bits(&page->_mapcount, 1U << bit);
193*1e133ab2SMartin Schwidefsky 				list_del(&page->lru);
194*1e133ab2SMartin Schwidefsky 			}
195*1e133ab2SMartin Schwidefsky 		}
196*1e133ab2SMartin Schwidefsky 		spin_unlock_bh(&mm->context.list_lock);
197*1e133ab2SMartin Schwidefsky 		if (table)
198*1e133ab2SMartin Schwidefsky 			return table;
199*1e133ab2SMartin Schwidefsky 	}
200*1e133ab2SMartin Schwidefsky 	/* Allocate a fresh page */
201*1e133ab2SMartin Schwidefsky 	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
202*1e133ab2SMartin Schwidefsky 	if (!page)
203*1e133ab2SMartin Schwidefsky 		return NULL;
204*1e133ab2SMartin Schwidefsky 	if (!pgtable_page_ctor(page)) {
205*1e133ab2SMartin Schwidefsky 		__free_page(page);
206*1e133ab2SMartin Schwidefsky 		return NULL;
207*1e133ab2SMartin Schwidefsky 	}
208*1e133ab2SMartin Schwidefsky 	/* Initialize page table */
209*1e133ab2SMartin Schwidefsky 	table = (unsigned long *) page_to_phys(page);
210*1e133ab2SMartin Schwidefsky 	if (mm_alloc_pgste(mm)) {
211*1e133ab2SMartin Schwidefsky 		/* Return 4K page table with PGSTEs */
212*1e133ab2SMartin Schwidefsky 		atomic_set(&page->_mapcount, 3);
213*1e133ab2SMartin Schwidefsky 		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
214*1e133ab2SMartin Schwidefsky 		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
215*1e133ab2SMartin Schwidefsky 	} else {
216*1e133ab2SMartin Schwidefsky 		/* Return the first 2K fragment of the page */
217*1e133ab2SMartin Schwidefsky 		atomic_set(&page->_mapcount, 1);
218*1e133ab2SMartin Schwidefsky 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
219*1e133ab2SMartin Schwidefsky 		spin_lock_bh(&mm->context.list_lock);
220*1e133ab2SMartin Schwidefsky 		list_add(&page->lru, &mm->context.pgtable_list);
221*1e133ab2SMartin Schwidefsky 		spin_unlock_bh(&mm->context.list_lock);
222*1e133ab2SMartin Schwidefsky 	}
223*1e133ab2SMartin Schwidefsky 	return table;
224*1e133ab2SMartin Schwidefsky }
225*1e133ab2SMartin Schwidefsky 
226*1e133ab2SMartin Schwidefsky void page_table_free(struct mm_struct *mm, unsigned long *table)
227*1e133ab2SMartin Schwidefsky {
228*1e133ab2SMartin Schwidefsky 	struct page *page;
229*1e133ab2SMartin Schwidefsky 	unsigned int bit, mask;
230*1e133ab2SMartin Schwidefsky 
231*1e133ab2SMartin Schwidefsky 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
232*1e133ab2SMartin Schwidefsky 	if (!mm_alloc_pgste(mm)) {
233*1e133ab2SMartin Schwidefsky 		/* Free 2K page table fragment of a 4K page */
234*1e133ab2SMartin Schwidefsky 		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
235*1e133ab2SMartin Schwidefsky 		spin_lock_bh(&mm->context.list_lock);
236*1e133ab2SMartin Schwidefsky 		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
237*1e133ab2SMartin Schwidefsky 		if (mask & 3)
238*1e133ab2SMartin Schwidefsky 			list_add(&page->lru, &mm->context.pgtable_list);
239*1e133ab2SMartin Schwidefsky 		else
240*1e133ab2SMartin Schwidefsky 			list_del(&page->lru);
241*1e133ab2SMartin Schwidefsky 		spin_unlock_bh(&mm->context.list_lock);
242*1e133ab2SMartin Schwidefsky 		if (mask != 0)
243*1e133ab2SMartin Schwidefsky 			return;
244*1e133ab2SMartin Schwidefsky 	}
245*1e133ab2SMartin Schwidefsky 
246*1e133ab2SMartin Schwidefsky 	pgtable_page_dtor(page);
247*1e133ab2SMartin Schwidefsky 	atomic_set(&page->_mapcount, -1);
248*1e133ab2SMartin Schwidefsky 	__free_page(page);
249*1e133ab2SMartin Schwidefsky }
250*1e133ab2SMartin Schwidefsky 
251*1e133ab2SMartin Schwidefsky void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
252*1e133ab2SMartin Schwidefsky 			 unsigned long vmaddr)
253*1e133ab2SMartin Schwidefsky {
254*1e133ab2SMartin Schwidefsky 	struct mm_struct *mm;
255*1e133ab2SMartin Schwidefsky 	struct page *page;
256*1e133ab2SMartin Schwidefsky 	unsigned int bit, mask;
257*1e133ab2SMartin Schwidefsky 
258*1e133ab2SMartin Schwidefsky 	mm = tlb->mm;
259*1e133ab2SMartin Schwidefsky 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
260*1e133ab2SMartin Schwidefsky 	if (mm_alloc_pgste(mm)) {
261*1e133ab2SMartin Schwidefsky 		gmap_unlink(mm, table, vmaddr);
262*1e133ab2SMartin Schwidefsky 		table = (unsigned long *) (__pa(table) | 3);
263*1e133ab2SMartin Schwidefsky 		tlb_remove_table(tlb, table);
264*1e133ab2SMartin Schwidefsky 		return;
265*1e133ab2SMartin Schwidefsky 	}
266*1e133ab2SMartin Schwidefsky 	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
267*1e133ab2SMartin Schwidefsky 	spin_lock_bh(&mm->context.list_lock);
268*1e133ab2SMartin Schwidefsky 	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
269*1e133ab2SMartin Schwidefsky 	if (mask & 3)
270*1e133ab2SMartin Schwidefsky 		list_add_tail(&page->lru, &mm->context.pgtable_list);
271*1e133ab2SMartin Schwidefsky 	else
272*1e133ab2SMartin Schwidefsky 		list_del(&page->lru);
273*1e133ab2SMartin Schwidefsky 	spin_unlock_bh(&mm->context.list_lock);
274*1e133ab2SMartin Schwidefsky 	table = (unsigned long *) (__pa(table) | (1U << bit));
275*1e133ab2SMartin Schwidefsky 	tlb_remove_table(tlb, table);
276*1e133ab2SMartin Schwidefsky }
277*1e133ab2SMartin Schwidefsky 
278*1e133ab2SMartin Schwidefsky static void __tlb_remove_table(void *_table)
279*1e133ab2SMartin Schwidefsky {
280*1e133ab2SMartin Schwidefsky 	unsigned int mask = (unsigned long) _table & 3;
281*1e133ab2SMartin Schwidefsky 	void *table = (void *)((unsigned long) _table ^ mask);
282*1e133ab2SMartin Schwidefsky 	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
283*1e133ab2SMartin Schwidefsky 
284*1e133ab2SMartin Schwidefsky 	switch (mask) {
285*1e133ab2SMartin Schwidefsky 	case 0:		/* pmd or pud */
286*1e133ab2SMartin Schwidefsky 		free_pages((unsigned long) table, 2);
287*1e133ab2SMartin Schwidefsky 		break;
288*1e133ab2SMartin Schwidefsky 	case 1:		/* lower 2K of a 4K page table */
289*1e133ab2SMartin Schwidefsky 	case 2:		/* higher 2K of a 4K page table */
290*1e133ab2SMartin Schwidefsky 		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
291*1e133ab2SMartin Schwidefsky 			break;
292*1e133ab2SMartin Schwidefsky 		/* fallthrough */
293*1e133ab2SMartin Schwidefsky 	case 3:		/* 4K page table with pgstes */
294*1e133ab2SMartin Schwidefsky 		pgtable_page_dtor(page);
295*1e133ab2SMartin Schwidefsky 		atomic_set(&page->_mapcount, -1);
296*1e133ab2SMartin Schwidefsky 		__free_page(page);
297*1e133ab2SMartin Schwidefsky 		break;
298*1e133ab2SMartin Schwidefsky 	}
299*1e133ab2SMartin Schwidefsky }
300*1e133ab2SMartin Schwidefsky 
301*1e133ab2SMartin Schwidefsky static void tlb_remove_table_smp_sync(void *arg)
302*1e133ab2SMartin Schwidefsky {
303*1e133ab2SMartin Schwidefsky 	/* Simply deliver the interrupt */
304*1e133ab2SMartin Schwidefsky }
305*1e133ab2SMartin Schwidefsky 
306*1e133ab2SMartin Schwidefsky static void tlb_remove_table_one(void *table)
307*1e133ab2SMartin Schwidefsky {
308*1e133ab2SMartin Schwidefsky 	/*
309*1e133ab2SMartin Schwidefsky 	 * This isn't an RCU grace period and hence the page-tables cannot be
310*1e133ab2SMartin Schwidefsky 	 * assumed to be actually RCU-freed.
311*1e133ab2SMartin Schwidefsky 	 *
312*1e133ab2SMartin Schwidefsky 	 * It is however sufficient for software page-table walkers that rely
313*1e133ab2SMartin Schwidefsky 	 * on IRQ disabling. See the comment near struct mmu_table_batch.
314*1e133ab2SMartin Schwidefsky 	 */
315*1e133ab2SMartin Schwidefsky 	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
316*1e133ab2SMartin Schwidefsky 	__tlb_remove_table(table);
317*1e133ab2SMartin Schwidefsky }
318*1e133ab2SMartin Schwidefsky 
319*1e133ab2SMartin Schwidefsky static void tlb_remove_table_rcu(struct rcu_head *head)
320*1e133ab2SMartin Schwidefsky {
321*1e133ab2SMartin Schwidefsky 	struct mmu_table_batch *batch;
322*1e133ab2SMartin Schwidefsky 	int i;
323*1e133ab2SMartin Schwidefsky 
324*1e133ab2SMartin Schwidefsky 	batch = container_of(head, struct mmu_table_batch, rcu);
325*1e133ab2SMartin Schwidefsky 
326*1e133ab2SMartin Schwidefsky 	for (i = 0; i < batch->nr; i++)
327*1e133ab2SMartin Schwidefsky 		__tlb_remove_table(batch->tables[i]);
328*1e133ab2SMartin Schwidefsky 
329*1e133ab2SMartin Schwidefsky 	free_page((unsigned long)batch);
330*1e133ab2SMartin Schwidefsky }
331*1e133ab2SMartin Schwidefsky 
332*1e133ab2SMartin Schwidefsky void tlb_table_flush(struct mmu_gather *tlb)
333*1e133ab2SMartin Schwidefsky {
334*1e133ab2SMartin Schwidefsky 	struct mmu_table_batch **batch = &tlb->batch;
335*1e133ab2SMartin Schwidefsky 
336*1e133ab2SMartin Schwidefsky 	if (*batch) {
337*1e133ab2SMartin Schwidefsky 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
338*1e133ab2SMartin Schwidefsky 		*batch = NULL;
339*1e133ab2SMartin Schwidefsky 	}
340*1e133ab2SMartin Schwidefsky }
341*1e133ab2SMartin Schwidefsky 
342*1e133ab2SMartin Schwidefsky void tlb_remove_table(struct mmu_gather *tlb, void *table)
343*1e133ab2SMartin Schwidefsky {
344*1e133ab2SMartin Schwidefsky 	struct mmu_table_batch **batch = &tlb->batch;
345*1e133ab2SMartin Schwidefsky 
346*1e133ab2SMartin Schwidefsky 	tlb->mm->context.flush_mm = 1;
347*1e133ab2SMartin Schwidefsky 	if (*batch == NULL) {
348*1e133ab2SMartin Schwidefsky 		*batch = (struct mmu_table_batch *)
349*1e133ab2SMartin Schwidefsky 			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
350*1e133ab2SMartin Schwidefsky 		if (*batch == NULL) {
351*1e133ab2SMartin Schwidefsky 			__tlb_flush_mm_lazy(tlb->mm);
352*1e133ab2SMartin Schwidefsky 			tlb_remove_table_one(table);
353*1e133ab2SMartin Schwidefsky 			return;
354*1e133ab2SMartin Schwidefsky 		}
355*1e133ab2SMartin Schwidefsky 		(*batch)->nr = 0;
356*1e133ab2SMartin Schwidefsky 	}
357*1e133ab2SMartin Schwidefsky 	(*batch)->tables[(*batch)->nr++] = table;
358*1e133ab2SMartin Schwidefsky 	if ((*batch)->nr == MAX_TABLE_BATCH)
359*1e133ab2SMartin Schwidefsky 		tlb_flush_mmu(tlb);
360*1e133ab2SMartin Schwidefsky }
361