xref: /linux/arch/sparc/mm/tsb.c (revision 861e10be08c69808065d755d3e3cab5d520a2d32)
1 /* arch/sparc64/mm/tsb.c
2  *
3  * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/preempt.h>
8 #include <linux/slab.h>
9 #include <asm/page.h>
10 #include <asm/tlbflush.h>
11 #include <asm/tlb.h>
12 #include <asm/mmu_context.h>
13 #include <asm/pgtable.h>
14 #include <asm/tsb.h>
15 #include <asm/oplib.h>
16 
17 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
18 
19 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
20 {
21 	vaddr >>= hash_shift;
22 	return vaddr & (nentries - 1);
23 }
24 
25 static inline int tag_compare(unsigned long tag, unsigned long vaddr)
26 {
27 	return (tag == (vaddr >> 22));
28 }
29 
30 /* TSB flushes need only occur on the processor initiating the address
31  * space modification, not on each cpu the address space has run on.
32  * Only the TLB flush needs that treatment.
33  */
34 
35 void flush_tsb_kernel_range(unsigned long start, unsigned long end)
36 {
37 	unsigned long v;
38 
39 	for (v = start; v < end; v += PAGE_SIZE) {
40 		unsigned long hash = tsb_hash(v, PAGE_SHIFT,
41 					      KERNEL_TSB_NENTRIES);
42 		struct tsb *ent = &swapper_tsb[hash];
43 
44 		if (tag_compare(ent->tag, v))
45 			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
46 	}
47 }
48 
49 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
50 			    unsigned long tsb, unsigned long nentries)
51 {
52 	unsigned long i;
53 
54 	for (i = 0; i < tb->tlb_nr; i++) {
55 		unsigned long v = tb->vaddrs[i];
56 		unsigned long tag, ent, hash;
57 
58 		v &= ~0x1UL;
59 
60 		hash = tsb_hash(v, hash_shift, nentries);
61 		ent = tsb + (hash * sizeof(struct tsb));
62 		tag = (v >> 22UL);
63 
64 		tsb_flush(ent, tag);
65 	}
66 }
67 
68 void flush_tsb_user(struct tlb_batch *tb)
69 {
70 	struct mm_struct *mm = tb->mm;
71 	unsigned long nentries, base, flags;
72 
73 	spin_lock_irqsave(&mm->context.lock, flags);
74 
75 	base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
76 	nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
77 	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
78 		base = __pa(base);
79 	__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
80 
81 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
82 	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
83 		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
84 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
85 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
86 			base = __pa(base);
87 		__flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
88 	}
89 #endif
90 	spin_unlock_irqrestore(&mm->context.lock, flags);
91 }
92 
93 #define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_8K
94 #define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_8K
95 
96 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
97 #define HV_PGSZ_IDX_HUGE	HV_PGSZ_IDX_4MB
98 #define HV_PGSZ_MASK_HUGE	HV_PGSZ_MASK_4MB
99 #endif
100 
101 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
102 {
103 	unsigned long tsb_reg, base, tsb_paddr;
104 	unsigned long page_sz, tte;
105 
106 	mm->context.tsb_block[tsb_idx].tsb_nentries =
107 		tsb_bytes / sizeof(struct tsb);
108 
109 	base = TSBMAP_BASE;
110 	tte = pgprot_val(PAGE_KERNEL_LOCKED);
111 	tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
112 	BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
113 
114 	/* Use the smallest page size that can map the whole TSB
115 	 * in one TLB entry.
116 	 */
117 	switch (tsb_bytes) {
118 	case 8192 << 0:
119 		tsb_reg = 0x0UL;
120 #ifdef DCACHE_ALIASING_POSSIBLE
121 		base += (tsb_paddr & 8192);
122 #endif
123 		page_sz = 8192;
124 		break;
125 
126 	case 8192 << 1:
127 		tsb_reg = 0x1UL;
128 		page_sz = 64 * 1024;
129 		break;
130 
131 	case 8192 << 2:
132 		tsb_reg = 0x2UL;
133 		page_sz = 64 * 1024;
134 		break;
135 
136 	case 8192 << 3:
137 		tsb_reg = 0x3UL;
138 		page_sz = 64 * 1024;
139 		break;
140 
141 	case 8192 << 4:
142 		tsb_reg = 0x4UL;
143 		page_sz = 512 * 1024;
144 		break;
145 
146 	case 8192 << 5:
147 		tsb_reg = 0x5UL;
148 		page_sz = 512 * 1024;
149 		break;
150 
151 	case 8192 << 6:
152 		tsb_reg = 0x6UL;
153 		page_sz = 512 * 1024;
154 		break;
155 
156 	case 8192 << 7:
157 		tsb_reg = 0x7UL;
158 		page_sz = 4 * 1024 * 1024;
159 		break;
160 
161 	default:
162 		printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
163 		       current->comm, current->pid, tsb_bytes);
164 		do_exit(SIGSEGV);
165 	}
166 	tte |= pte_sz_bits(page_sz);
167 
168 	if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
169 		/* Physical mapping, no locked TLB entry for TSB.  */
170 		tsb_reg |= tsb_paddr;
171 
172 		mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
173 		mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
174 		mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
175 	} else {
176 		tsb_reg |= base;
177 		tsb_reg |= (tsb_paddr & (page_sz - 1UL));
178 		tte |= (tsb_paddr & ~(page_sz - 1UL));
179 
180 		mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
181 		mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
182 		mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
183 	}
184 
185 	/* Setup the Hypervisor TSB descriptor.  */
186 	if (tlb_type == hypervisor) {
187 		struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
188 
189 		switch (tsb_idx) {
190 		case MM_TSB_BASE:
191 			hp->pgsz_idx = HV_PGSZ_IDX_BASE;
192 			break;
193 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
194 		case MM_TSB_HUGE:
195 			hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
196 			break;
197 #endif
198 		default:
199 			BUG();
200 		}
201 		hp->assoc = 1;
202 		hp->num_ttes = tsb_bytes / 16;
203 		hp->ctx_idx = 0;
204 		switch (tsb_idx) {
205 		case MM_TSB_BASE:
206 			hp->pgsz_mask = HV_PGSZ_MASK_BASE;
207 			break;
208 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
209 		case MM_TSB_HUGE:
210 			hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
211 			break;
212 #endif
213 		default:
214 			BUG();
215 		}
216 		hp->tsb_base = tsb_paddr;
217 		hp->resv = 0;
218 	}
219 }
220 
221 struct kmem_cache *pgtable_cache __read_mostly;
222 
223 static struct kmem_cache *tsb_caches[8] __read_mostly;
224 
225 static const char *tsb_cache_names[8] = {
226 	"tsb_8KB",
227 	"tsb_16KB",
228 	"tsb_32KB",
229 	"tsb_64KB",
230 	"tsb_128KB",
231 	"tsb_256KB",
232 	"tsb_512KB",
233 	"tsb_1MB",
234 };
235 
236 void __init pgtable_cache_init(void)
237 {
238 	unsigned long i;
239 
240 	pgtable_cache = kmem_cache_create("pgtable_cache",
241 					  PAGE_SIZE, PAGE_SIZE,
242 					  0,
243 					  _clear_page);
244 	if (!pgtable_cache) {
245 		prom_printf("pgtable_cache_init(): Could not create!\n");
246 		prom_halt();
247 	}
248 
249 	for (i = 0; i < 8; i++) {
250 		unsigned long size = 8192 << i;
251 		const char *name = tsb_cache_names[i];
252 
253 		tsb_caches[i] = kmem_cache_create(name,
254 						  size, size,
255 						  0, NULL);
256 		if (!tsb_caches[i]) {
257 			prom_printf("Could not create %s cache\n", name);
258 			prom_halt();
259 		}
260 	}
261 }
262 
263 int sysctl_tsb_ratio = -2;
264 
265 static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
266 {
267 	unsigned long num_ents = (new_size / sizeof(struct tsb));
268 
269 	if (sysctl_tsb_ratio < 0)
270 		return num_ents - (num_ents >> -sysctl_tsb_ratio);
271 	else
272 		return num_ents + (num_ents >> sysctl_tsb_ratio);
273 }
274 
275 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
276  * do_sparc64_fault() invokes this routine to try and grow it.
277  *
278  * When we reach the maximum TSB size supported, we stick ~0UL into
279  * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
280  * will not trigger any longer.
281  *
282  * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
283  * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
284  * must be 512K aligned.  It also must be physically contiguous, so we
285  * cannot use vmalloc().
286  *
287  * The idea here is to grow the TSB when the RSS of the process approaches
288  * the number of entries that the current TSB can hold at once.  Currently,
289  * we trigger when the RSS hits 3/4 of the TSB capacity.
290  */
291 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
292 {
293 	unsigned long max_tsb_size = 1 * 1024 * 1024;
294 	unsigned long new_size, old_size, flags;
295 	struct tsb *old_tsb, *new_tsb;
296 	unsigned long new_cache_index, old_cache_index;
297 	unsigned long new_rss_limit;
298 	gfp_t gfp_flags;
299 
300 	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
301 		max_tsb_size = (PAGE_SIZE << MAX_ORDER);
302 
303 	new_cache_index = 0;
304 	for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
305 		new_rss_limit = tsb_size_to_rss_limit(new_size);
306 		if (new_rss_limit > rss)
307 			break;
308 		new_cache_index++;
309 	}
310 
311 	if (new_size == max_tsb_size)
312 		new_rss_limit = ~0UL;
313 
314 retry_tsb_alloc:
315 	gfp_flags = GFP_KERNEL;
316 	if (new_size > (PAGE_SIZE * 2))
317 		gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
318 
319 	new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
320 					gfp_flags, numa_node_id());
321 	if (unlikely(!new_tsb)) {
322 		/* Not being able to fork due to a high-order TSB
323 		 * allocation failure is very bad behavior.  Just back
324 		 * down to a 0-order allocation and force no TSB
325 		 * growing for this address space.
326 		 */
327 		if (mm->context.tsb_block[tsb_index].tsb == NULL &&
328 		    new_cache_index > 0) {
329 			new_cache_index = 0;
330 			new_size = 8192;
331 			new_rss_limit = ~0UL;
332 			goto retry_tsb_alloc;
333 		}
334 
335 		/* If we failed on a TSB grow, we are under serious
336 		 * memory pressure so don't try to grow any more.
337 		 */
338 		if (mm->context.tsb_block[tsb_index].tsb != NULL)
339 			mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
340 		return;
341 	}
342 
343 	/* Mark all tags as invalid.  */
344 	tsb_init(new_tsb, new_size);
345 
346 	/* Ok, we are about to commit the changes.  If we are
347 	 * growing an existing TSB the locking is very tricky,
348 	 * so WATCH OUT!
349 	 *
350 	 * We have to hold mm->context.lock while committing to the
351 	 * new TSB, this synchronizes us with processors in
352 	 * flush_tsb_user() and switch_mm() for this address space.
353 	 *
354 	 * But even with that lock held, processors run asynchronously
355 	 * accessing the old TSB via TLB miss handling.  This is OK
356 	 * because those actions are just propagating state from the
357 	 * Linux page tables into the TSB, page table mappings are not
358 	 * being changed.  If a real fault occurs, the processor will
359 	 * synchronize with us when it hits flush_tsb_user(), this is
360 	 * also true for the case where vmscan is modifying the page
361 	 * tables.  The only thing we need to be careful with is to
362 	 * skip any locked TSB entries during copy_tsb().
363 	 *
364 	 * When we finish committing to the new TSB, we have to drop
365 	 * the lock and ask all other cpus running this address space
366 	 * to run tsb_context_switch() to see the new TSB table.
367 	 */
368 	spin_lock_irqsave(&mm->context.lock, flags);
369 
370 	old_tsb = mm->context.tsb_block[tsb_index].tsb;
371 	old_cache_index =
372 		(mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
373 	old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
374 		    sizeof(struct tsb));
375 
376 
377 	/* Handle multiple threads trying to grow the TSB at the same time.
378 	 * One will get in here first, and bump the size and the RSS limit.
379 	 * The others will get in here next and hit this check.
380 	 */
381 	if (unlikely(old_tsb &&
382 		     (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
383 		spin_unlock_irqrestore(&mm->context.lock, flags);
384 
385 		kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
386 		return;
387 	}
388 
389 	mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
390 
391 	if (old_tsb) {
392 		extern void copy_tsb(unsigned long old_tsb_base,
393 				     unsigned long old_tsb_size,
394 				     unsigned long new_tsb_base,
395 				     unsigned long new_tsb_size);
396 		unsigned long old_tsb_base = (unsigned long) old_tsb;
397 		unsigned long new_tsb_base = (unsigned long) new_tsb;
398 
399 		if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
400 			old_tsb_base = __pa(old_tsb_base);
401 			new_tsb_base = __pa(new_tsb_base);
402 		}
403 		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
404 	}
405 
406 	mm->context.tsb_block[tsb_index].tsb = new_tsb;
407 	setup_tsb_params(mm, tsb_index, new_size);
408 
409 	spin_unlock_irqrestore(&mm->context.lock, flags);
410 
411 	/* If old_tsb is NULL, we're being invoked for the first time
412 	 * from init_new_context().
413 	 */
414 	if (old_tsb) {
415 		/* Reload it on the local cpu.  */
416 		tsb_context_switch(mm);
417 
418 		/* Now force other processors to do the same.  */
419 		preempt_disable();
420 		smp_tsb_sync(mm);
421 		preempt_enable();
422 
423 		/* Now it is safe to free the old tsb.  */
424 		kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
425 	}
426 }
427 
428 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
429 {
430 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
431 	unsigned long huge_pte_count;
432 #endif
433 	unsigned int i;
434 
435 	spin_lock_init(&mm->context.lock);
436 
437 	mm->context.sparc64_ctx_val = 0UL;
438 
439 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
440 	/* We reset it to zero because the fork() page copying
441 	 * will re-increment the counters as the parent PTEs are
442 	 * copied into the child address space.
443 	 */
444 	huge_pte_count = mm->context.huge_pte_count;
445 	mm->context.huge_pte_count = 0;
446 #endif
447 
448 	mm->context.pgtable_page = NULL;
449 
450 	/* copy_mm() copies over the parent's mm_struct before calling
451 	 * us, so we need to zero out the TSB pointer or else tsb_grow()
452 	 * will be confused and think there is an older TSB to free up.
453 	 */
454 	for (i = 0; i < MM_NUM_TSBS; i++)
455 		mm->context.tsb_block[i].tsb = NULL;
456 
457 	/* If this is fork, inherit the parent's TSB size.  We would
458 	 * grow it to that size on the first page fault anyways.
459 	 */
460 	tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
461 
462 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
463 	if (unlikely(huge_pte_count))
464 		tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
465 #endif
466 
467 	if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
468 		return -ENOMEM;
469 
470 	return 0;
471 }
472 
473 static void tsb_destroy_one(struct tsb_config *tp)
474 {
475 	unsigned long cache_index;
476 
477 	if (!tp->tsb)
478 		return;
479 	cache_index = tp->tsb_reg_val & 0x7UL;
480 	kmem_cache_free(tsb_caches[cache_index], tp->tsb);
481 	tp->tsb = NULL;
482 	tp->tsb_reg_val = 0UL;
483 }
484 
485 void destroy_context(struct mm_struct *mm)
486 {
487 	unsigned long flags, i;
488 	struct page *page;
489 
490 	for (i = 0; i < MM_NUM_TSBS; i++)
491 		tsb_destroy_one(&mm->context.tsb_block[i]);
492 
493 	page = mm->context.pgtable_page;
494 	if (page && put_page_testzero(page)) {
495 		pgtable_page_dtor(page);
496 		free_hot_cold_page(page, 0);
497 	}
498 
499 	spin_lock_irqsave(&ctx_alloc_lock, flags);
500 
501 	if (CTX_VALID(mm->context)) {
502 		unsigned long nr = CTX_NRBITS(mm->context);
503 		mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
504 	}
505 
506 	spin_unlock_irqrestore(&ctx_alloc_lock, flags);
507 }
508