xref: /linux/arch/powerpc/mm/book3s64/slb.c (revision 23c996fc2bc1978a02c64eddb90b4ab5d309c8df)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * PowerPC64 SLB support.
4  *
5  * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
6  * Based on earlier code written by:
7  * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
8  *    Copyright (c) 2001 Dave Engebretsen
9  * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
10  */
11 
12 #include <asm/interrupt.h>
13 #include <asm/mmu.h>
14 #include <asm/mmu_context.h>
15 #include <asm/paca.h>
16 #include <asm/lppaca.h>
17 #include <asm/ppc-opcode.h>
18 #include <asm/cputable.h>
19 #include <asm/cacheflush.h>
20 #include <asm/smp.h>
21 #include <linux/compiler.h>
22 #include <linux/context_tracking.h>
23 #include <linux/mm_types.h>
24 #include <linux/pgtable.h>
25 
26 #include <asm/udbg.h>
27 #include <asm/code-patching.h>
28 
29 #include "internal.h"
30 
31 
32 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
33 
34 bool stress_slb_enabled __initdata;
35 
36 static int __init parse_stress_slb(char *p)
37 {
38 	stress_slb_enabled = true;
39 	return 0;
40 }
41 early_param("stress_slb", parse_stress_slb);
42 
43 __ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
44 
45 static void assert_slb_presence(bool present, unsigned long ea)
46 {
47 #ifdef CONFIG_DEBUG_VM
48 	unsigned long tmp;
49 
50 	WARN_ON_ONCE(mfmsr() & MSR_EE);
51 
52 	if (!cpu_has_feature(CPU_FTR_ARCH_206))
53 		return;
54 
55 	/*
56 	 * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
57 	 * ignores all other bits from 0-27, so just clear them all.
58 	 */
59 	ea &= ~((1UL << SID_SHIFT) - 1);
60 	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
61 
62 	WARN_ON(present == (tmp == 0));
63 #endif
64 }
65 
66 static inline void slb_shadow_update(unsigned long ea, int ssize,
67 				     unsigned long flags,
68 				     enum slb_index index)
69 {
70 	struct slb_shadow *p = get_slb_shadow();
71 
72 	/*
73 	 * Clear the ESID first so the entry is not valid while we are
74 	 * updating it.  No write barriers are needed here, provided
75 	 * we only update the current CPU's SLB shadow buffer.
76 	 */
77 	WRITE_ONCE(p->save_area[index].esid, 0);
78 	WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
79 	WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
80 }
81 
82 static inline void slb_shadow_clear(enum slb_index index)
83 {
84 	WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
85 }
86 
87 static inline void create_shadowed_slbe(unsigned long ea, int ssize,
88 					unsigned long flags,
89 					enum slb_index index)
90 {
91 	/*
92 	 * Updating the shadow buffer before writing the SLB ensures
93 	 * we don't get a stale entry here if we get preempted by PHYP
94 	 * between these two statements.
95 	 */
96 	slb_shadow_update(ea, ssize, flags, index);
97 
98 	assert_slb_presence(false, ea);
99 	asm volatile("slbmte  %0,%1" :
100 		     : "r" (mk_vsid_data(ea, ssize, flags)),
101 		       "r" (mk_esid_data(ea, ssize, index))
102 		     : "memory" );
103 }
104 
105 /*
106  * Insert bolted entries into SLB (which may not be empty, so don't clear
107  * slb_cache_ptr).
108  */
109 void __slb_restore_bolted_realmode(void)
110 {
111 	struct slb_shadow *p = get_slb_shadow();
112 	enum slb_index index;
113 
114 	 /* No isync needed because realmode. */
115 	for (index = 0; index < SLB_NUM_BOLTED; index++) {
116 		asm volatile("slbmte  %0,%1" :
117 		     : "r" (be64_to_cpu(p->save_area[index].vsid)),
118 		       "r" (be64_to_cpu(p->save_area[index].esid)));
119 	}
120 
121 	assert_slb_presence(true, local_paca->kstack);
122 }
123 
124 /*
125  * Insert the bolted entries into an empty SLB.
126  */
127 void slb_restore_bolted_realmode(void)
128 {
129 	__slb_restore_bolted_realmode();
130 	get_paca()->slb_cache_ptr = 0;
131 
132 	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
133 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
134 }
135 
136 /*
137  * This flushes all SLB entries including 0, so it must be realmode.
138  */
139 void slb_flush_all_realmode(void)
140 {
141 	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
142 }
143 
144 static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
145 {
146 	struct slb_shadow *p = get_slb_shadow();
147 	unsigned long ksp_esid_data, ksp_vsid_data;
148 	u32 ih;
149 
150 	/*
151 	 * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
152 	 * information created with Class=0 entries, which we use for kernel
153 	 * SLB entries (the SLB entries themselves are still invalidated).
154 	 *
155 	 * Older processors will ignore this optimisation. Over-invalidation
156 	 * is fine because we never rely on lookaside information existing.
157 	 */
158 	if (preserve_kernel_lookaside)
159 		ih = 1;
160 	else
161 		ih = 0;
162 
163 	ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
164 	ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
165 
166 	asm volatile(PPC_SLBIA(%0)"	\n"
167 		     "slbmte	%1, %2	\n"
168 		     :: "i" (ih),
169 			"r" (ksp_vsid_data),
170 			"r" (ksp_esid_data)
171 		     : "memory");
172 }
173 
174 /*
175  * This flushes non-bolted entries, it can be run in virtual mode. Must
176  * be called with interrupts disabled.
177  */
178 void slb_flush_and_restore_bolted(void)
179 {
180 	BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
181 
182 	WARN_ON(!irqs_disabled());
183 
184 	/*
185 	 * We can't take a PMU exception in the following code, so hard
186 	 * disable interrupts.
187 	 */
188 	hard_irq_disable();
189 
190 	isync();
191 	__slb_flush_and_restore_bolted(false);
192 	isync();
193 
194 	assert_slb_presence(true, get_paca()->kstack);
195 
196 	get_paca()->slb_cache_ptr = 0;
197 
198 	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
199 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
200 }
201 
202 void slb_save_contents(struct slb_entry *slb_ptr)
203 {
204 	int i;
205 	unsigned long e, v;
206 
207 	/* Save slb_cache_ptr value. */
208 	get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
209 
210 	if (!slb_ptr)
211 		return;
212 
213 	for (i = 0; i < mmu_slb_size; i++) {
214 		asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
215 		asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
216 		slb_ptr->esid = e;
217 		slb_ptr->vsid = v;
218 		slb_ptr++;
219 	}
220 }
221 
222 void slb_dump_contents(struct slb_entry *slb_ptr)
223 {
224 	int i, n;
225 	unsigned long e, v;
226 	unsigned long llp;
227 
228 	if (!slb_ptr)
229 		return;
230 
231 	pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
232 
233 	for (i = 0; i < mmu_slb_size; i++) {
234 		e = slb_ptr->esid;
235 		v = slb_ptr->vsid;
236 		slb_ptr++;
237 
238 		if (!e && !v)
239 			continue;
240 
241 		pr_err("%02d %016lx %016lx %s\n", i, e, v,
242 				(e & SLB_ESID_V) ? "VALID" : "NOT VALID");
243 
244 		if (!(e & SLB_ESID_V))
245 			continue;
246 
247 		llp = v & SLB_VSID_LLP;
248 		if (v & SLB_VSID_B_1T) {
249 			pr_err("     1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
250 			       GET_ESID_1T(e),
251 			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
252 		} else {
253 			pr_err("   256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
254 			       GET_ESID(e),
255 			       (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
256 		}
257 	}
258 
259 	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
260 		/* RR is not so useful as it's often not used for allocation */
261 		pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
262 
263 		/* Dump slb cache entires as well. */
264 		pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
265 		pr_err("Valid SLB cache entries:\n");
266 		n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
267 		for (i = 0; i < n; i++)
268 			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
269 		pr_err("Rest of SLB cache entries:\n");
270 		for (i = n; i < SLB_CACHE_ENTRIES; i++)
271 			pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
272 	}
273 }
274 
275 void slb_vmalloc_update(void)
276 {
277 	/*
278 	 * vmalloc is not bolted, so just have to flush non-bolted.
279 	 */
280 	slb_flush_and_restore_bolted();
281 }
282 
283 static bool preload_hit(struct thread_info *ti, unsigned long esid)
284 {
285 	unsigned char i;
286 
287 	for (i = 0; i < ti->slb_preload_nr; i++) {
288 		unsigned char idx;
289 
290 		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
291 		if (esid == ti->slb_preload_esid[idx])
292 			return true;
293 	}
294 	return false;
295 }
296 
297 static bool preload_add(struct thread_info *ti, unsigned long ea)
298 {
299 	unsigned char idx;
300 	unsigned long esid;
301 
302 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
303 		/* EAs are stored >> 28 so 256MB segments don't need clearing */
304 		if (ea & ESID_MASK_1T)
305 			ea &= ESID_MASK_1T;
306 	}
307 
308 	esid = ea >> SID_SHIFT;
309 
310 	if (preload_hit(ti, esid))
311 		return false;
312 
313 	idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
314 	ti->slb_preload_esid[idx] = esid;
315 	if (ti->slb_preload_nr == SLB_PRELOAD_NR)
316 		ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
317 	else
318 		ti->slb_preload_nr++;
319 
320 	return true;
321 }
322 
323 static void preload_age(struct thread_info *ti)
324 {
325 	if (!ti->slb_preload_nr)
326 		return;
327 	ti->slb_preload_nr--;
328 	ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
329 }
330 
331 void slb_setup_new_exec(void)
332 {
333 	struct thread_info *ti = current_thread_info();
334 	struct mm_struct *mm = current->mm;
335 	unsigned long exec = 0x10000000;
336 
337 	WARN_ON(irqs_disabled());
338 
339 	/*
340 	 * preload cache can only be used to determine whether a SLB
341 	 * entry exists if it does not start to overflow.
342 	 */
343 	if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
344 		return;
345 
346 	hard_irq_disable();
347 
348 	/*
349 	 * We have no good place to clear the slb preload cache on exec,
350 	 * flush_thread is about the earliest arch hook but that happens
351 	 * after we switch to the mm and have already preloaded the SLBEs.
352 	 *
353 	 * For the most part that's probably okay to use entries from the
354 	 * previous exec, they will age out if unused. It may turn out to
355 	 * be an advantage to clear the cache before switching to it,
356 	 * however.
357 	 */
358 
359 	/*
360 	 * preload some userspace segments into the SLB.
361 	 * Almost all 32 and 64bit PowerPC executables are linked at
362 	 * 0x10000000 so it makes sense to preload this segment.
363 	 */
364 	if (!is_kernel_addr(exec)) {
365 		if (preload_add(ti, exec))
366 			slb_allocate_user(mm, exec);
367 	}
368 
369 	/* Libraries and mmaps. */
370 	if (!is_kernel_addr(mm->mmap_base)) {
371 		if (preload_add(ti, mm->mmap_base))
372 			slb_allocate_user(mm, mm->mmap_base);
373 	}
374 
375 	/* see switch_slb */
376 	asm volatile("isync" : : : "memory");
377 
378 	local_irq_enable();
379 }
380 
381 void preload_new_slb_context(unsigned long start, unsigned long sp)
382 {
383 	struct thread_info *ti = current_thread_info();
384 	struct mm_struct *mm = current->mm;
385 	unsigned long heap = mm->start_brk;
386 
387 	WARN_ON(irqs_disabled());
388 
389 	/* see above */
390 	if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
391 		return;
392 
393 	hard_irq_disable();
394 
395 	/* Userspace entry address. */
396 	if (!is_kernel_addr(start)) {
397 		if (preload_add(ti, start))
398 			slb_allocate_user(mm, start);
399 	}
400 
401 	/* Top of stack, grows down. */
402 	if (!is_kernel_addr(sp)) {
403 		if (preload_add(ti, sp))
404 			slb_allocate_user(mm, sp);
405 	}
406 
407 	/* Bottom of heap, grows up. */
408 	if (heap && !is_kernel_addr(heap)) {
409 		if (preload_add(ti, heap))
410 			slb_allocate_user(mm, heap);
411 	}
412 
413 	/* see switch_slb */
414 	asm volatile("isync" : : : "memory");
415 
416 	local_irq_enable();
417 }
418 
419 static void slb_cache_slbie_kernel(unsigned int index)
420 {
421 	unsigned long slbie_data = get_paca()->slb_cache[index];
422 	unsigned long ksp = get_paca()->kstack;
423 
424 	slbie_data <<= SID_SHIFT;
425 	slbie_data |= 0xc000000000000000ULL;
426 	if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
427 		return;
428 	slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
429 
430 	asm volatile("slbie %0" : : "r" (slbie_data));
431 }
432 
433 static void slb_cache_slbie_user(unsigned int index)
434 {
435 	unsigned long slbie_data = get_paca()->slb_cache[index];
436 
437 	slbie_data <<= SID_SHIFT;
438 	slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
439 	slbie_data |= SLBIE_C; /* user slbs have C=1 */
440 
441 	asm volatile("slbie %0" : : "r" (slbie_data));
442 }
443 
444 /* Flush all user entries from the segment table of the current processor. */
445 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
446 {
447 	struct thread_info *ti = task_thread_info(tsk);
448 	unsigned char i;
449 
450 	/*
451 	 * We need interrupts hard-disabled here, not just soft-disabled,
452 	 * so that a PMU interrupt can't occur, which might try to access
453 	 * user memory (to get a stack trace) and possible cause an SLB miss
454 	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
455 	 */
456 	hard_irq_disable();
457 	isync();
458 	if (stress_slb()) {
459 		__slb_flush_and_restore_bolted(false);
460 		isync();
461 		get_paca()->slb_cache_ptr = 0;
462 		get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
463 
464 	} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
465 		/*
466 		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
467 		 * associated lookaside structures, which matches what
468 		 * switch_slb wants. So ARCH_300 does not use the slb
469 		 * cache.
470 		 */
471 		asm volatile(PPC_SLBIA(3));
472 
473 	} else {
474 		unsigned long offset = get_paca()->slb_cache_ptr;
475 
476 		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
477 		    offset <= SLB_CACHE_ENTRIES) {
478 			/*
479 			 * Could assert_slb_presence(true) here, but
480 			 * hypervisor or machine check could have come
481 			 * in and removed the entry at this point.
482 			 */
483 
484 			for (i = 0; i < offset; i++)
485 				slb_cache_slbie_user(i);
486 
487 			/* Workaround POWER5 < DD2.1 issue */
488 			if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
489 				slb_cache_slbie_user(0);
490 
491 		} else {
492 			/* Flush but retain kernel lookaside information */
493 			__slb_flush_and_restore_bolted(true);
494 			isync();
495 
496 			get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
497 		}
498 
499 		get_paca()->slb_cache_ptr = 0;
500 	}
501 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
502 
503 	copy_mm_to_paca(mm);
504 
505 	/*
506 	 * We gradually age out SLBs after a number of context switches to
507 	 * reduce reload overhead of unused entries (like we do with FP/VEC
508 	 * reload). Each time we wrap 256 switches, take an entry out of the
509 	 * SLB preload cache.
510 	 */
511 	tsk->thread.load_slb++;
512 	if (!tsk->thread.load_slb) {
513 		unsigned long pc = KSTK_EIP(tsk);
514 
515 		preload_age(ti);
516 		preload_add(ti, pc);
517 	}
518 
519 	for (i = 0; i < ti->slb_preload_nr; i++) {
520 		unsigned char idx;
521 		unsigned long ea;
522 
523 		idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
524 		ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
525 
526 		slb_allocate_user(mm, ea);
527 	}
528 
529 	/*
530 	 * Synchronize slbmte preloads with possible subsequent user memory
531 	 * address accesses by the kernel (user mode won't happen until
532 	 * rfid, which is safe).
533 	 */
534 	isync();
535 }
536 
537 void slb_set_size(u16 size)
538 {
539 	mmu_slb_size = size;
540 }
541 
542 void slb_initialize(void)
543 {
544 	unsigned long linear_llp, vmalloc_llp, io_llp;
545 	unsigned long lflags;
546 	static int slb_encoding_inited;
547 #ifdef CONFIG_SPARSEMEM_VMEMMAP
548 	unsigned long vmemmap_llp;
549 #endif
550 
551 	/* Prepare our SLB miss handler based on our page size */
552 	linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
553 	io_llp = mmu_psize_defs[mmu_io_psize].sllp;
554 	vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
555 	get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
556 #ifdef CONFIG_SPARSEMEM_VMEMMAP
557 	vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
558 #endif
559 	if (!slb_encoding_inited) {
560 		slb_encoding_inited = 1;
561 		pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
562 		pr_devel("SLB: io      LLP = %04lx\n", io_llp);
563 #ifdef CONFIG_SPARSEMEM_VMEMMAP
564 		pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
565 #endif
566 	}
567 
568 	get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
569 	get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
570 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
571 
572 	lflags = SLB_VSID_KERNEL | linear_llp;
573 
574 	/* Invalidate the entire SLB (even entry 0) & all the ERATS */
575 	asm volatile("isync":::"memory");
576 	asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
577 	asm volatile("isync; slbia; isync":::"memory");
578 	create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
579 
580 	/*
581 	 * For the boot cpu, we're running on the stack in init_thread_union,
582 	 * which is in the first segment of the linear mapping, and also
583 	 * get_paca()->kstack hasn't been initialized yet.
584 	 * For secondary cpus, we need to bolt the kernel stack entry now.
585 	 */
586 	slb_shadow_clear(KSTACK_INDEX);
587 	if (raw_smp_processor_id() != boot_cpuid &&
588 	    (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
589 		create_shadowed_slbe(get_paca()->kstack,
590 				     mmu_kernel_ssize, lflags, KSTACK_INDEX);
591 
592 	asm volatile("isync":::"memory");
593 }
594 
595 static void slb_cache_update(unsigned long esid_data)
596 {
597 	int slb_cache_index;
598 
599 	if (cpu_has_feature(CPU_FTR_ARCH_300))
600 		return; /* ISAv3.0B and later does not use slb_cache */
601 
602 	if (stress_slb())
603 		return;
604 
605 	/*
606 	 * Now update slb cache entries
607 	 */
608 	slb_cache_index = local_paca->slb_cache_ptr;
609 	if (slb_cache_index < SLB_CACHE_ENTRIES) {
610 		/*
611 		 * We have space in slb cache for optimized switch_slb().
612 		 * Top 36 bits from esid_data as per ISA
613 		 */
614 		local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
615 		local_paca->slb_cache_ptr++;
616 	} else {
617 		/*
618 		 * Our cache is full and the current cache content strictly
619 		 * doesn't indicate the active SLB contents. Bump the ptr
620 		 * so that switch_slb() will ignore the cache.
621 		 */
622 		local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
623 	}
624 }
625 
626 static enum slb_index alloc_slb_index(bool kernel)
627 {
628 	enum slb_index index;
629 
630 	/*
631 	 * The allocation bitmaps can become out of synch with the SLB
632 	 * when the _switch code does slbie when bolting a new stack
633 	 * segment and it must not be anywhere else in the SLB. This leaves
634 	 * a kernel allocated entry that is unused in the SLB. With very
635 	 * large systems or small segment sizes, the bitmaps could slowly
636 	 * fill with these entries. They will eventually be cleared out
637 	 * by the round robin allocator in that case, so it's probably not
638 	 * worth accounting for.
639 	 */
640 
641 	/*
642 	 * SLBs beyond 32 entries are allocated with stab_rr only
643 	 * POWER7/8/9 have 32 SLB entries, this could be expanded if a
644 	 * future CPU has more.
645 	 */
646 	if (local_paca->slb_used_bitmap != U32_MAX) {
647 		index = ffz(local_paca->slb_used_bitmap);
648 		local_paca->slb_used_bitmap |= 1U << index;
649 		if (kernel)
650 			local_paca->slb_kern_bitmap |= 1U << index;
651 	} else {
652 		/* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
653 		index = local_paca->stab_rr;
654 		if (index < (mmu_slb_size - 1))
655 			index++;
656 		else
657 			index = SLB_NUM_BOLTED;
658 		local_paca->stab_rr = index;
659 		if (index < 32) {
660 			if (kernel)
661 				local_paca->slb_kern_bitmap |= 1U << index;
662 			else
663 				local_paca->slb_kern_bitmap &= ~(1U << index);
664 		}
665 	}
666 	BUG_ON(index < SLB_NUM_BOLTED);
667 
668 	return index;
669 }
670 
671 static long slb_insert_entry(unsigned long ea, unsigned long context,
672 				unsigned long flags, int ssize, bool kernel)
673 {
674 	unsigned long vsid;
675 	unsigned long vsid_data, esid_data;
676 	enum slb_index index;
677 
678 	vsid = get_vsid(context, ea, ssize);
679 	if (!vsid)
680 		return -EFAULT;
681 
682 	/*
683 	 * There must not be a kernel SLB fault in alloc_slb_index or before
684 	 * slbmte here or the allocation bitmaps could get out of whack with
685 	 * the SLB.
686 	 *
687 	 * User SLB faults or preloads take this path which might get inlined
688 	 * into the caller, so add compiler barriers here to ensure unsafe
689 	 * memory accesses do not come between.
690 	 */
691 	barrier();
692 
693 	index = alloc_slb_index(kernel);
694 
695 	vsid_data = __mk_vsid_data(vsid, ssize, flags);
696 	esid_data = mk_esid_data(ea, ssize, index);
697 
698 	/*
699 	 * No need for an isync before or after this slbmte. The exception
700 	 * we enter with and the rfid we exit with are context synchronizing.
701 	 * User preloads should add isync afterwards in case the kernel
702 	 * accesses user memory before it returns to userspace with rfid.
703 	 */
704 	assert_slb_presence(false, ea);
705 	if (stress_slb()) {
706 		int slb_cache_index = local_paca->slb_cache_ptr;
707 
708 		/*
709 		 * stress_slb() does not use slb cache, repurpose as a
710 		 * cache of inserted (non-bolted) kernel SLB entries. All
711 		 * non-bolted kernel entries are flushed on any user fault,
712 		 * or if there are already 3 non-boled kernel entries.
713 		 */
714 		BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
715 		if (!kernel || slb_cache_index == 3) {
716 			int i;
717 
718 			for (i = 0; i < slb_cache_index; i++)
719 				slb_cache_slbie_kernel(i);
720 			slb_cache_index = 0;
721 		}
722 
723 		if (kernel)
724 			local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
725 		local_paca->slb_cache_ptr = slb_cache_index;
726 	}
727 	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
728 
729 	barrier();
730 
731 	if (!kernel)
732 		slb_cache_update(esid_data);
733 
734 	return 0;
735 }
736 
737 static long slb_allocate_kernel(unsigned long ea, unsigned long id)
738 {
739 	unsigned long context;
740 	unsigned long flags;
741 	int ssize;
742 
743 	if (id == LINEAR_MAP_REGION_ID) {
744 
745 		/* We only support upto H_MAX_PHYSMEM_BITS */
746 		if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
747 			return -EFAULT;
748 
749 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
750 
751 #ifdef CONFIG_SPARSEMEM_VMEMMAP
752 	} else if (id == VMEMMAP_REGION_ID) {
753 
754 		if (ea >= H_VMEMMAP_END)
755 			return -EFAULT;
756 
757 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
758 #endif
759 	} else if (id == VMALLOC_REGION_ID) {
760 
761 		if (ea >= H_VMALLOC_END)
762 			return -EFAULT;
763 
764 		flags = local_paca->vmalloc_sllp;
765 
766 	} else if (id == IO_REGION_ID) {
767 
768 		if (ea >= H_KERN_IO_END)
769 			return -EFAULT;
770 
771 		flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
772 
773 	} else {
774 		return -EFAULT;
775 	}
776 
777 	ssize = MMU_SEGSIZE_1T;
778 	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
779 		ssize = MMU_SEGSIZE_256M;
780 
781 	context = get_kernel_context(ea);
782 
783 	return slb_insert_entry(ea, context, flags, ssize, true);
784 }
785 
786 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
787 {
788 	unsigned long context;
789 	unsigned long flags;
790 	int bpsize;
791 	int ssize;
792 
793 	/*
794 	 * consider this as bad access if we take a SLB miss
795 	 * on an address above addr limit.
796 	 */
797 	if (ea >= mm_ctx_slb_addr_limit(&mm->context))
798 		return -EFAULT;
799 
800 	context = get_user_context(&mm->context, ea);
801 	if (!context)
802 		return -EFAULT;
803 
804 	if (unlikely(ea >= H_PGTABLE_RANGE)) {
805 		WARN_ON(1);
806 		return -EFAULT;
807 	}
808 
809 	ssize = user_segment_size(ea);
810 
811 	bpsize = get_slice_psize(mm, ea);
812 	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
813 
814 	return slb_insert_entry(ea, context, flags, ssize, false);
815 }
816 
817 DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
818 {
819 	unsigned long ea = regs->dar;
820 	unsigned long id = get_region_id(ea);
821 
822 	/* IRQs are not reconciled here, so can't check irqs_disabled */
823 	VM_WARN_ON(mfmsr() & MSR_EE);
824 
825 	if (regs_is_unrecoverable(regs))
826 		return -EINVAL;
827 
828 	/*
829 	 * SLB kernel faults must be very careful not to touch anything that is
830 	 * not bolted. E.g., PACA and global variables are okay, mm->context
831 	 * stuff is not. SLB user faults may access all of memory (and induce
832 	 * one recursive SLB kernel fault), so the kernel fault must not
833 	 * trample on the user fault state at those points.
834 	 */
835 
836 	/*
837 	 * This is a raw interrupt handler, for performance, so that
838 	 * fast_interrupt_return can be used. The handler must not touch local
839 	 * irq state, or schedule. We could test for usermode and upgrade to a
840 	 * normal process context (synchronous) interrupt for those, which
841 	 * would make them first-class kernel code and able to be traced and
842 	 * instrumented, although performance would suffer a bit, it would
843 	 * probably be a good tradeoff.
844 	 */
845 	if (id >= LINEAR_MAP_REGION_ID) {
846 		long err;
847 #ifdef CONFIG_DEBUG_VM
848 		/* Catch recursive kernel SLB faults. */
849 		BUG_ON(local_paca->in_kernel_slb_handler);
850 		local_paca->in_kernel_slb_handler = 1;
851 #endif
852 		err = slb_allocate_kernel(ea, id);
853 #ifdef CONFIG_DEBUG_VM
854 		local_paca->in_kernel_slb_handler = 0;
855 #endif
856 		return err;
857 	} else {
858 		struct mm_struct *mm = current->mm;
859 		long err;
860 
861 		if (unlikely(!mm))
862 			return -EFAULT;
863 
864 		err = slb_allocate_user(mm, ea);
865 		if (!err)
866 			preload_add(current_thread_info(), ea);
867 
868 		return err;
869 	}
870 }
871