xref: /linux/arch/powerpc/mm/book3s64/slb.c (revision ad952db4a865e96ec98d4c5874a4699fe3286d56)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * PowerPC64 SLB support.
4  *
5  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
6  * Based on earlier code written by:
7  * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
8  *    Copyright (c) 2001 Dave Engebretsen
9  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
10  */
11 
12 #include <asm/interrupt.h>
13 #include <asm/mmu.h>
14 #include <asm/mmu_context.h>
15 #include <asm/paca.h>
16 #include <asm/lppaca.h>
17 #include <asm/ppc-opcode.h>
18 #include <asm/cputable.h>
19 #include <asm/cacheflush.h>
20 #include <asm/smp.h>
21 #include <linux/compiler.h>
22 #include <linux/context_tracking.h>
23 #include <linux/mm_types.h>
24 #include <linux/pgtable.h>
25 
26 #include <asm/udbg.h>
27 #include <asm/text-patching.h>
28 
29 #include "internal.h"
30 
31 
32 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
33 
34 bool stress_slb_enabled __initdata;
35 
parse_stress_slb(char * p)36 static int __init parse_stress_slb(char *p)
37 {
38 	stress_slb_enabled = true;
39 	return 0;
40 }
41 early_param("stress_slb", parse_stress_slb);
42 
43 __ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
44 
45 bool no_slb_preload __initdata;
parse_no_slb_preload(char * p)46 static int __init parse_no_slb_preload(char *p)
47 {
48 	no_slb_preload = true;
49 	return 0;
50 }
51 early_param("no_slb_preload", parse_no_slb_preload);
52 __ro_after_init DEFINE_STATIC_KEY_FALSE(no_slb_preload_key);
53 
assert_slb_presence(bool present,unsigned long ea)54 static void assert_slb_presence(bool present, unsigned long ea)
55 {
56 #ifdef CONFIG_DEBUG_VM
57 	unsigned long tmp;
58 
59 	WARN_ON_ONCE(mfmsr() & MSR_EE);
60 
61 	if (!cpu_has_feature(CPU_FTR_ARCH_206))
62 		return;
63 
64 	/*
65 	 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
66 	 * ignores all other bits from 0-27, so just clear them all.
67 	 */
68 	ea &= ~((1UL << SID_SHIFT) - 1);
69 	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
70 
71 	WARN_ON(present == (tmp == 0));
72 #endif
73 }
74 
slb_shadow_update(unsigned long ea,int ssize,unsigned long flags,enum slb_index index)75 static inline void slb_shadow_update(unsigned long ea, int ssize,
76 				     unsigned long flags,
77 				     enum slb_index index)
78 {
79 	struct slb_shadow *p = get_slb_shadow();
80 
81 	/*
82 	 * Clear the ESID first so the entry is not valid while we are
83 	 * updating it.  No write barriers are needed here, provided
84 	 * we only update the current CPU's SLB shadow buffer.
85 	 */
86 	WRITE_ONCE(p->save_area[index].esid, 0);
87 	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
88 	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
89 }
90 
slb_shadow_clear(enum slb_index index)91 static inline void slb_shadow_clear(enum slb_index index)
92 {
93 	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
94 }
95 
create_shadowed_slbe(unsigned long ea,int ssize,unsigned long flags,enum slb_index index)96 static inline void create_shadowed_slbe(unsigned long ea, int ssize,
97 					unsigned long flags,
98 					enum slb_index index)
99 {
100 	/*
101 	 * Updating the shadow buffer before writing the SLB ensures
102 	 * we don't get a stale entry here if we get preempted by PHYP
103 	 * between these two statements.
104 	 */
105 	slb_shadow_update(ea, ssize, flags, index);
106 
107 	assert_slb_presence(false, ea);
108 	asm volatile("slbmte  %0,%1" :
109 		     : "r" (mk_vsid_data(ea, ssize, flags)),
110 		       "r" (mk_esid_data(ea, ssize, index))
111 		     : "memory" );
112 }
113 
114 /*
115  * Insert bolted entries into SLB (which may not be empty, so don't clear
116  * slb_cache_ptr).
117  */
__slb_restore_bolted_realmode(void)118 void __slb_restore_bolted_realmode(void)
119 {
120 	struct slb_shadow *p = get_slb_shadow();
121 	enum slb_index index;
122 
123 	 /* No isync needed because realmode. */
124 	for (index = 0; index < SLB_NUM_BOLTED; index++) {
125 		asm volatile("slbmte  %0,%1" :
126 		     : "r" (be64_to_cpu(p->save_area[index].vsid)),
127 		       "r" (be64_to_cpu(p->save_area[index].esid)));
128 	}
129 
130 	assert_slb_presence(true, local_paca->kstack);
131 }
132 
133 /*
134  * Insert the bolted entries into an empty SLB.
135  */
slb_restore_bolted_realmode(void)136 void slb_restore_bolted_realmode(void)
137 {
138 	__slb_restore_bolted_realmode();
139 	get_paca()->slb_cache_ptr = 0;
140 
141 	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
142 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
143 }
144 
145 /*
146  * This flushes all SLB entries including 0, so it must be realmode.
147  */
slb_flush_all_realmode(void)148 void slb_flush_all_realmode(void)
149 {
150 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
151 }
152 
__slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)153 static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
154 {
155 	struct slb_shadow *p = get_slb_shadow();
156 	unsigned long ksp_esid_data, ksp_vsid_data;
157 	u32 ih;
158 
159 	/*
160 	 * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
161 	 * information created with Class=0 entries, which we use for kernel
162 	 * SLB entries (the SLB entries themselves are still invalidated).
163 	 *
164 	 * Older processors will ignore this optimisation. Over-invalidation
165 	 * is fine because we never rely on lookaside information existing.
166 	 */
167 	if (preserve_kernel_lookaside)
168 		ih = 1;
169 	else
170 		ih = 0;
171 
172 	ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
173 	ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
174 
175 	asm volatile(PPC_SLBIA(%0)"	\n"
176 		     "slbmte	%1, %2	\n"
177 		     :: "i" (ih),
178 			"r" (ksp_vsid_data),
179 			"r" (ksp_esid_data)
180 		     : "memory");
181 }
182 
183 /*
184  * This flushes non-bolted entries, it can be run in virtual mode. Must
185  * be called with interrupts disabled.
186  */
slb_flush_and_restore_bolted(void)187 void slb_flush_and_restore_bolted(void)
188 {
189 	BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
190 
191 	WARN_ON(!irqs_disabled());
192 
193 	/*
194 	 * We can't take a PMU exception in the following code, so hard
195 	 * disable interrupts.
196 	 */
197 	hard_irq_disable();
198 
199 	isync();
200 	__slb_flush_and_restore_bolted(false);
201 	isync();
202 
203 	assert_slb_presence(true, get_paca()->kstack);
204 
205 	get_paca()->slb_cache_ptr = 0;
206 
207 	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
208 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
209 }
210 
slb_save_contents(struct slb_entry * slb_ptr)211 void slb_save_contents(struct slb_entry *slb_ptr)
212 {
213 	int i;
214 	unsigned long e, v;
215 
216 	/* Save slb_cache_ptr value. */
217 	get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
218 
219 	if (!slb_ptr)
220 		return;
221 
222 	for (i = 0; i < mmu_slb_size; i++) {
223 		asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
224 		asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
225 		slb_ptr->esid = e;
226 		slb_ptr->vsid = v;
227 		slb_ptr++;
228 	}
229 }
230 
slb_dump_contents(struct slb_entry * slb_ptr)231 void slb_dump_contents(struct slb_entry *slb_ptr)
232 {
233 	int i, n;
234 	unsigned long e, v;
235 	unsigned long llp;
236 
237 	if (!slb_ptr)
238 		return;
239 
240 	pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
241 
242 	for (i = 0; i < mmu_slb_size; i++) {
243 		e = slb_ptr->esid;
244 		v = slb_ptr->vsid;
245 		slb_ptr++;
246 
247 		if (!e && !v)
248 			continue;
249 
250 		pr_err("%02d %016lx %016lx %s\n", i, e, v,
251 				(e & SLB_ESID_V) ? "VALID" : "NOT VALID");
252 
253 		if (!(e & SLB_ESID_V))
254 			continue;
255 
256 		llp = v & SLB_VSID_LLP;
257 		if (v & SLB_VSID_B_1T) {
258 			pr_err("     1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
259 			       GET_ESID_1T(e),
260 			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
261 		} else {
262 			pr_err("   256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
263 			       GET_ESID(e),
264 			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
265 		}
266 	}
267 
268 	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
269 		/* RR is not so useful as it's often not used for allocation */
270 		pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
271 
272 		/* Dump slb cache entires as well. */
273 		pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
274 		pr_err("Valid SLB cache entries:\n");
275 		n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
276 		for (i = 0; i < n; i++)
277 			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
278 		pr_err("Rest of SLB cache entries:\n");
279 		for (i = n; i < SLB_CACHE_ENTRIES; i++)
280 			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
281 	}
282 }
283 
slb_vmalloc_update(void)284 void slb_vmalloc_update(void)
285 {
286 	/*
287 	 * vmalloc is not bolted, so just have to flush non-bolted.
288 	 */
289 	slb_flush_and_restore_bolted();
290 }
291 
preload_hit(struct thread_info * ti,unsigned long esid)292 static bool preload_hit(struct thread_info *ti, unsigned long esid)
293 {
294 	unsigned char i;
295 
296 	for (i = 0; i < ti->slb_preload_nr; i++) {
297 		unsigned char idx;
298 
299 		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
300 		if (esid == ti->slb_preload_esid[idx])
301 			return true;
302 	}
303 	return false;
304 }
305 
preload_add(struct thread_info * ti,unsigned long ea)306 static void preload_add(struct thread_info *ti, unsigned long ea)
307 {
308 	unsigned char idx;
309 	unsigned long esid;
310 
311 	if (slb_preload_disabled())
312 		return;
313 
314 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
315 		/* EAs are stored >> 28 so 256MB segments don't need clearing */
316 		if (ea & ESID_MASK_1T)
317 			ea &= ESID_MASK_1T;
318 	}
319 
320 	esid = ea >> SID_SHIFT;
321 
322 	if (preload_hit(ti, esid))
323 		return;
324 
325 	idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
326 	ti->slb_preload_esid[idx] = esid;
327 	if (ti->slb_preload_nr == SLB_PRELOAD_NR)
328 		ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
329 	else
330 		ti->slb_preload_nr++;
331 }
332 
preload_age(struct thread_info * ti)333 static void preload_age(struct thread_info *ti)
334 {
335 	if (!ti->slb_preload_nr)
336 		return;
337 	ti->slb_preload_nr--;
338 	ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
339 }
340 
slb_cache_slbie_kernel(unsigned int index)341 static void slb_cache_slbie_kernel(unsigned int index)
342 {
343 	unsigned long slbie_data = get_paca()->slb_cache[index];
344 	unsigned long ksp = get_paca()->kstack;
345 
346 	slbie_data <<= SID_SHIFT;
347 	slbie_data |= 0xc000000000000000ULL;
348 	if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
349 		return;
350 	slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
351 
352 	asm volatile("slbie %0" : : "r" (slbie_data));
353 }
354 
slb_cache_slbie_user(unsigned int index)355 static void slb_cache_slbie_user(unsigned int index)
356 {
357 	unsigned long slbie_data = get_paca()->slb_cache[index];
358 
359 	slbie_data <<= SID_SHIFT;
360 	slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
361 	slbie_data |= SLBIE_C; /* user slbs have C=1 */
362 
363 	asm volatile("slbie %0" : : "r" (slbie_data));
364 }
365 
366 /* Flush all user entries from the segment table of the current processor. */
switch_slb(struct task_struct * tsk,struct mm_struct * mm)367 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
368 {
369 	struct thread_info *ti = task_thread_info(tsk);
370 	unsigned char i;
371 
372 	/*
373 	 * We need interrupts hard-disabled here, not just soft-disabled,
374 	 * so that a PMU interrupt can't occur, which might try to access
375 	 * user memory (to get a stack trace) and possible cause an SLB miss
376 	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
377 	 */
378 	hard_irq_disable();
379 	isync();
380 	if (stress_slb()) {
381 		__slb_flush_and_restore_bolted(false);
382 		isync();
383 		get_paca()->slb_cache_ptr = 0;
384 		get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
385 
386 	} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
387 		/*
388 		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
389 		 * associated lookaside structures, which matches what
390 		 * switch_slb wants. So ARCH_300 does not use the slb
391 		 * cache.
392 		 */
393 		asm volatile(PPC_SLBIA(3));
394 
395 	} else {
396 		unsigned long offset = get_paca()->slb_cache_ptr;
397 
398 		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
399 		    offset <= SLB_CACHE_ENTRIES) {
400 			/*
401 			 * Could assert_slb_presence(true) here, but
402 			 * hypervisor or machine check could have come
403 			 * in and removed the entry at this point.
404 			 */
405 
406 			for (i = 0; i < offset; i++)
407 				slb_cache_slbie_user(i);
408 
409 			/* Workaround POWER5 < DD2.1 issue */
410 			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
411 				slb_cache_slbie_user(0);
412 
413 		} else {
414 			/* Flush but retain kernel lookaside information */
415 			__slb_flush_and_restore_bolted(true);
416 			isync();
417 
418 			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
419 		}
420 
421 		get_paca()->slb_cache_ptr = 0;
422 	}
423 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
424 
425 	copy_mm_to_paca(mm);
426 
427 	if (slb_preload_disabled())
428 		return;
429 
430 	/*
431 	 * We gradually age out SLBs after a number of context switches to
432 	 * reduce reload overhead of unused entries (like we do with FP/VEC
433 	 * reload). Each time we wrap 256 switches, take an entry out of the
434 	 * SLB preload cache.
435 	 */
436 	tsk->thread.load_slb++;
437 	if (!tsk->thread.load_slb) {
438 		unsigned long pc = KSTK_EIP(tsk);
439 
440 		preload_age(ti);
441 		preload_add(ti, pc);
442 	}
443 
444 	for (i = 0; i < ti->slb_preload_nr; i++) {
445 		unsigned char idx;
446 		unsigned long ea;
447 
448 		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
449 		ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
450 
451 		slb_allocate_user(mm, ea);
452 	}
453 
454 	/*
455 	 * Synchronize slbmte preloads with possible subsequent user memory
456 	 * address accesses by the kernel (user mode won't happen until
457 	 * rfid, which is safe).
458 	 */
459 	isync();
460 }
461 
slb_set_size(u16 size)462 void slb_set_size(u16 size)
463 {
464 	mmu_slb_size = size;
465 }
466 
slb_initialize(void)467 void slb_initialize(void)
468 {
469 	unsigned long linear_llp, vmalloc_llp, io_llp;
470 	unsigned long lflags;
471 	static int slb_encoding_inited;
472 #ifdef CONFIG_SPARSEMEM_VMEMMAP
473 	unsigned long vmemmap_llp;
474 #endif
475 
476 	/* Prepare our SLB miss handler based on our page size */
477 	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
478 	io_llp = mmu_psize_defs[mmu_io_psize].sllp;
479 	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
480 	get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
481 #ifdef CONFIG_SPARSEMEM_VMEMMAP
482 	vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
483 #endif
484 	if (!slb_encoding_inited) {
485 		slb_encoding_inited = 1;
486 		pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
487 		pr_devel("SLB: io      LLP = %04lx\n", io_llp);
488 #ifdef CONFIG_SPARSEMEM_VMEMMAP
489 		pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
490 #endif
491 	}
492 
493 	get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
494 	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
495 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
496 
497 	lflags = SLB_VSID_KERNEL | linear_llp;
498 
499 	/* Invalidate the entire SLB (even entry 0) & all the ERATS */
500 	asm volatile("isync":::"memory");
501 	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
502 	asm volatile("isync; slbia; isync":::"memory");
503 	create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
504 
505 	/*
506 	 * For the boot cpu, we're running on the stack in init_thread_union,
507 	 * which is in the first segment of the linear mapping, and also
508 	 * get_paca()->kstack hasn't been initialized yet.
509 	 * For secondary cpus, we need to bolt the kernel stack entry now.
510 	 */
511 	slb_shadow_clear(KSTACK_INDEX);
512 	if (raw_smp_processor_id() != boot_cpuid &&
513 	    (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
514 		create_shadowed_slbe(get_paca()->kstack,
515 				     mmu_kernel_ssize, lflags, KSTACK_INDEX);
516 
517 	asm volatile("isync":::"memory");
518 }
519 
slb_cache_update(unsigned long esid_data)520 static void slb_cache_update(unsigned long esid_data)
521 {
522 	int slb_cache_index;
523 
524 	if (cpu_has_feature(CPU_FTR_ARCH_300))
525 		return; /* ISAv3.0B and later does not use slb_cache */
526 
527 	if (stress_slb())
528 		return;
529 
530 	/*
531 	 * Now update slb cache entries
532 	 */
533 	slb_cache_index = local_paca->slb_cache_ptr;
534 	if (slb_cache_index < SLB_CACHE_ENTRIES) {
535 		/*
536 		 * We have space in slb cache for optimized switch_slb().
537 		 * Top 36 bits from esid_data as per ISA
538 		 */
539 		local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
540 		local_paca->slb_cache_ptr++;
541 	} else {
542 		/*
543 		 * Our cache is full and the current cache content strictly
544 		 * doesn't indicate the active SLB contents. Bump the ptr
545 		 * so that switch_slb() will ignore the cache.
546 		 */
547 		local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
548 	}
549 }
550 
alloc_slb_index(bool kernel)551 static enum slb_index alloc_slb_index(bool kernel)
552 {
553 	enum slb_index index;
554 
555 	/*
556 	 * The allocation bitmaps can become out of synch with the SLB
557 	 * when the _switch code does slbie when bolting a new stack
558 	 * segment and it must not be anywhere else in the SLB. This leaves
559 	 * a kernel allocated entry that is unused in the SLB. With very
560 	 * large systems or small segment sizes, the bitmaps could slowly
561 	 * fill with these entries. They will eventually be cleared out
562 	 * by the round robin allocator in that case, so it's probably not
563 	 * worth accounting for.
564 	 */
565 
566 	/*
567 	 * SLBs beyond 32 entries are allocated with stab_rr only
568 	 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
569 	 * future CPU has more.
570 	 */
571 	if (local_paca->slb_used_bitmap != U32_MAX) {
572 		index = ffz(local_paca->slb_used_bitmap);
573 		local_paca->slb_used_bitmap |= 1U << index;
574 		if (kernel)
575 			local_paca->slb_kern_bitmap |= 1U << index;
576 	} else {
577 		/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
578 		index = local_paca->stab_rr;
579 		if (index < (mmu_slb_size - 1))
580 			index++;
581 		else
582 			index = SLB_NUM_BOLTED;
583 		local_paca->stab_rr = index;
584 		if (index < 32) {
585 			if (kernel)
586 				local_paca->slb_kern_bitmap |= 1U << index;
587 			else
588 				local_paca->slb_kern_bitmap &= ~(1U << index);
589 		}
590 	}
591 	BUG_ON(index < SLB_NUM_BOLTED);
592 
593 	return index;
594 }
595 
slb_insert_entry(unsigned long ea,unsigned long context,unsigned long flags,int ssize,bool kernel)596 static long slb_insert_entry(unsigned long ea, unsigned long context,
597 				unsigned long flags, int ssize, bool kernel)
598 {
599 	unsigned long vsid;
600 	unsigned long vsid_data, esid_data;
601 	enum slb_index index;
602 
603 	vsid = get_vsid(context, ea, ssize);
604 	if (!vsid)
605 		return -EFAULT;
606 
607 	/*
608 	 * There must not be a kernel SLB fault in alloc_slb_index or before
609 	 * slbmte here or the allocation bitmaps could get out of whack with
610 	 * the SLB.
611 	 *
612 	 * User SLB faults or preloads take this path which might get inlined
613 	 * into the caller, so add compiler barriers here to ensure unsafe
614 	 * memory accesses do not come between.
615 	 */
616 	barrier();
617 
618 	index = alloc_slb_index(kernel);
619 
620 	vsid_data = __mk_vsid_data(vsid, ssize, flags);
621 	esid_data = mk_esid_data(ea, ssize, index);
622 
623 	/*
624 	 * No need for an isync before or after this slbmte. The exception
625 	 * we enter with and the rfid we exit with are context synchronizing.
626 	 * User preloads should add isync afterwards in case the kernel
627 	 * accesses user memory before it returns to userspace with rfid.
628 	 */
629 	assert_slb_presence(false, ea);
630 	if (stress_slb()) {
631 		int slb_cache_index = local_paca->slb_cache_ptr;
632 
633 		/*
634 		 * stress_slb() does not use slb cache, repurpose as a
635 		 * cache of inserted (non-bolted) kernel SLB entries. All
636 		 * non-bolted kernel entries are flushed on any user fault,
637 		 * or if there are already 3 non-boled kernel entries.
638 		 */
639 		BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
640 		if (!kernel || slb_cache_index == 3) {
641 			int i;
642 
643 			for (i = 0; i < slb_cache_index; i++)
644 				slb_cache_slbie_kernel(i);
645 			slb_cache_index = 0;
646 		}
647 
648 		if (kernel)
649 			local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
650 		local_paca->slb_cache_ptr = slb_cache_index;
651 	}
652 	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
653 
654 	barrier();
655 
656 	if (!kernel)
657 		slb_cache_update(esid_data);
658 
659 	return 0;
660 }
661 
slb_allocate_kernel(unsigned long ea,unsigned long id)662 static long slb_allocate_kernel(unsigned long ea, unsigned long id)
663 {
664 	unsigned long context;
665 	unsigned long flags;
666 	int ssize;
667 
668 	if (id == LINEAR_MAP_REGION_ID) {
669 
670 		/* We only support upto H_MAX_PHYSMEM_BITS */
671 		if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
672 			return -EFAULT;
673 
674 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
675 
676 #ifdef CONFIG_SPARSEMEM_VMEMMAP
677 	} else if (id == VMEMMAP_REGION_ID) {
678 
679 		if (ea >= H_VMEMMAP_END)
680 			return -EFAULT;
681 
682 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
683 #endif
684 	} else if (id == VMALLOC_REGION_ID) {
685 
686 		if (ea >= H_VMALLOC_END)
687 			return -EFAULT;
688 
689 		flags = local_paca->vmalloc_sllp;
690 
691 	} else if (id == IO_REGION_ID) {
692 
693 		if (ea >= H_KERN_IO_END)
694 			return -EFAULT;
695 
696 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
697 
698 	} else {
699 		return -EFAULT;
700 	}
701 
702 	ssize = MMU_SEGSIZE_1T;
703 	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
704 		ssize = MMU_SEGSIZE_256M;
705 
706 	context = get_kernel_context(ea);
707 
708 	return slb_insert_entry(ea, context, flags, ssize, true);
709 }
710 
slb_allocate_user(struct mm_struct * mm,unsigned long ea)711 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
712 {
713 	unsigned long context;
714 	unsigned long flags;
715 	int bpsize;
716 	int ssize;
717 
718 	/*
719 	 * consider this as bad access if we take a SLB miss
720 	 * on an address above addr limit.
721 	 */
722 	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
723 		return -EFAULT;
724 
725 	context = get_user_context(&mm->context, ea);
726 	if (!context)
727 		return -EFAULT;
728 
729 	if (unlikely(ea >= H_PGTABLE_RANGE)) {
730 		WARN_ON(1);
731 		return -EFAULT;
732 	}
733 
734 	ssize = user_segment_size(ea);
735 
736 	bpsize = get_slice_psize(mm, ea);
737 	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
738 
739 	return slb_insert_entry(ea, context, flags, ssize, false);
740 }
741 
DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)742 DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
743 {
744 	unsigned long ea = regs->dar;
745 	unsigned long id = get_region_id(ea);
746 
747 	/* IRQs are not reconciled here, so can't check irqs_disabled */
748 	VM_WARN_ON(mfmsr() & MSR_EE);
749 
750 	if (regs_is_unrecoverable(regs))
751 		return -EINVAL;
752 
753 	/*
754 	 * SLB kernel faults must be very careful not to touch anything that is
755 	 * not bolted. E.g., PACA and global variables are okay, mm->context
756 	 * stuff is not. SLB user faults may access all of memory (and induce
757 	 * one recursive SLB kernel fault), so the kernel fault must not
758 	 * trample on the user fault state at those points.
759 	 */
760 
761 	/*
762 	 * This is a raw interrupt handler, for performance, so that
763 	 * fast_interrupt_return can be used. The handler must not touch local
764 	 * irq state, or schedule. We could test for usermode and upgrade to a
765 	 * normal process context (synchronous) interrupt for those, which
766 	 * would make them first-class kernel code and able to be traced and
767 	 * instrumented, although performance would suffer a bit, it would
768 	 * probably be a good tradeoff.
769 	 */
770 	if (id >= LINEAR_MAP_REGION_ID) {
771 		long err;
772 #ifdef CONFIG_DEBUG_VM
773 		/* Catch recursive kernel SLB faults. */
774 		BUG_ON(local_paca->in_kernel_slb_handler);
775 		local_paca->in_kernel_slb_handler = 1;
776 #endif
777 		err = slb_allocate_kernel(ea, id);
778 #ifdef CONFIG_DEBUG_VM
779 		local_paca->in_kernel_slb_handler = 0;
780 #endif
781 		return err;
782 	} else {
783 		struct mm_struct *mm = current->mm;
784 		long err;
785 
786 		if (unlikely(!mm))
787 			return -EFAULT;
788 
789 		err = slb_allocate_user(mm, ea);
790 		if (!err)
791 			preload_add(current_thread_info(), ea);
792 
793 		return err;
794 	}
795 }
796