xref: /linux/arch/powerpc/mm/book3s64/radix_tlb.c (revision 8b83369ddcb3fb9cab5c1088987ce477565bb630)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * TLB flush routines for radix kernels.
4  *
5  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6  */
7 
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
13 
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
20 
21 #include "internal.h"
22 
23 #define RIC_FLUSH_TLB 0
24 #define RIC_FLUSH_PWC 1
25 #define RIC_FLUSH_ALL 2
26 
27 /*
28  * tlbiel instruction for radix, set invalidation
29  * i.e., r=1 and is=01 or is=10 or is=11
30  */
31 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
32 					unsigned int pid,
33 					unsigned int ric, unsigned int prs)
34 {
35 	unsigned long rb;
36 	unsigned long rs;
37 
38 	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
39 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
40 
41 	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
42 		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
43 		     : "memory");
44 }
45 
46 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
47 {
48 	unsigned int set;
49 
50 	asm volatile("ptesync": : :"memory");
51 
52 	/*
53 	 * Flush the first set of the TLB, and the entire Page Walk Cache
54 	 * and partition table entries. Then flush the remaining sets of the
55 	 * TLB.
56 	 */
57 
58 	if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
59 		/* MSR[HV] should flush partition scope translations first. */
60 		tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
61 
62 		if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
63 			for (set = 1; set < num_sets; set++)
64 				tlbiel_radix_set_isa300(set, is, 0,
65 							RIC_FLUSH_TLB, 0);
66 		}
67 	}
68 
69 	/* Flush process scoped entries. */
70 	tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
71 
72 	if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
73 		for (set = 1; set < num_sets; set++)
74 			tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
75 	}
76 
77 	ppc_after_tlbiel_barrier();
78 }
79 
80 void radix__tlbiel_all(unsigned int action)
81 {
82 	unsigned int is;
83 
84 	switch (action) {
85 	case TLB_INVAL_SCOPE_GLOBAL:
86 		is = 3;
87 		break;
88 	case TLB_INVAL_SCOPE_LPID:
89 		is = 2;
90 		break;
91 	default:
92 		BUG();
93 	}
94 
95 	if (early_cpu_has_feature(CPU_FTR_ARCH_300))
96 		tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
97 	else
98 		WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
99 
100 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
101 }
102 
103 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
104 				unsigned long ric)
105 {
106 	unsigned long rb,rs,prs,r;
107 
108 	rb = PPC_BIT(53); /* IS = 1 */
109 	rb |= set << PPC_BITLSHIFT(51);
110 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
111 	prs = 1; /* process scoped */
112 	r = 1;   /* radix format */
113 
114 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
115 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
116 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
117 }
118 
119 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
120 {
121 	unsigned long rb,rs,prs,r;
122 
123 	rb = PPC_BIT(53); /* IS = 1 */
124 	rs = pid << PPC_BITLSHIFT(31);
125 	prs = 1; /* process scoped */
126 	r = 1;   /* radix format */
127 
128 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
129 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
130 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
131 }
132 
133 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
134 {
135 	unsigned long rb,rs,prs,r;
136 
137 	rb = PPC_BIT(52); /* IS = 2 */
138 	rs = lpid;
139 	prs = 0; /* partition scoped */
140 	r = 1;   /* radix format */
141 
142 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
143 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
144 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
145 }
146 
147 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
148 {
149 	unsigned long rb,rs,prs,r;
150 
151 	rb = PPC_BIT(52); /* IS = 2 */
152 	rs = lpid;
153 	prs = 1; /* process scoped */
154 	r = 1;   /* radix format */
155 
156 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
157 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
158 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
159 }
160 
161 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
162 					unsigned long ap, unsigned long ric)
163 {
164 	unsigned long rb,rs,prs,r;
165 
166 	rb = va & ~(PPC_BITMASK(52, 63));
167 	rb |= ap << PPC_BITLSHIFT(58);
168 	rs = pid << PPC_BITLSHIFT(31);
169 	prs = 1; /* process scoped */
170 	r = 1;   /* radix format */
171 
172 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
173 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
174 	trace_tlbie(0, 1, rb, rs, ric, prs, r);
175 }
176 
177 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
178 				       unsigned long ap, unsigned long ric)
179 {
180 	unsigned long rb,rs,prs,r;
181 
182 	rb = va & ~(PPC_BITMASK(52, 63));
183 	rb |= ap << PPC_BITLSHIFT(58);
184 	rs = pid << PPC_BITLSHIFT(31);
185 	prs = 1; /* process scoped */
186 	r = 1;   /* radix format */
187 
188 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
189 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
190 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
191 }
192 
193 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
194 					    unsigned long ap, unsigned long ric)
195 {
196 	unsigned long rb,rs,prs,r;
197 
198 	rb = va & ~(PPC_BITMASK(52, 63));
199 	rb |= ap << PPC_BITLSHIFT(58);
200 	rs = lpid;
201 	prs = 0; /* partition scoped */
202 	r = 1;   /* radix format */
203 
204 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
205 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
206 	trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
207 }
208 
209 
210 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
211 				  unsigned long ap)
212 {
213 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
214 		asm volatile("ptesync": : :"memory");
215 		__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
216 	}
217 
218 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
219 		asm volatile("ptesync": : :"memory");
220 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
221 	}
222 }
223 
224 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
225 					unsigned long ap)
226 {
227 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
228 		asm volatile("ptesync": : :"memory");
229 		__tlbie_pid(0, RIC_FLUSH_TLB);
230 	}
231 
232 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
233 		asm volatile("ptesync": : :"memory");
234 		__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
235 	}
236 }
237 
238 static inline void fixup_tlbie_pid(unsigned long pid)
239 {
240 	/*
241 	 * We can use any address for the invalidation, pick one which is
242 	 * probably unused as an optimisation.
243 	 */
244 	unsigned long va = ((1UL << 52) - 1);
245 
246 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
247 		asm volatile("ptesync": : :"memory");
248 		__tlbie_pid(0, RIC_FLUSH_TLB);
249 	}
250 
251 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
252 		asm volatile("ptesync": : :"memory");
253 		__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
254 	}
255 }
256 
257 
258 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
259 				       unsigned long ap)
260 {
261 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
262 		asm volatile("ptesync": : :"memory");
263 		__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
264 	}
265 
266 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
267 		asm volatile("ptesync": : :"memory");
268 		__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
269 	}
270 }
271 
272 static inline void fixup_tlbie_lpid(unsigned long lpid)
273 {
274 	/*
275 	 * We can use any address for the invalidation, pick one which is
276 	 * probably unused as an optimisation.
277 	 */
278 	unsigned long va = ((1UL << 52) - 1);
279 
280 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
281 		asm volatile("ptesync": : :"memory");
282 		__tlbie_lpid(0, RIC_FLUSH_TLB);
283 	}
284 
285 	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
286 		asm volatile("ptesync": : :"memory");
287 		__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
288 	}
289 }
290 
291 /*
292  * We use 128 set in radix mode and 256 set in hpt mode.
293  */
294 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
295 {
296 	int set;
297 
298 	asm volatile("ptesync": : :"memory");
299 
300 	/*
301 	 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
302 	 * also flush the entire Page Walk Cache.
303 	 */
304 	__tlbiel_pid(pid, 0, ric);
305 
306 	/* For PWC, only one flush is needed */
307 	if (ric == RIC_FLUSH_PWC) {
308 		ppc_after_tlbiel_barrier();
309 		return;
310 	}
311 
312 	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
313 		/* For the remaining sets, just flush the TLB */
314 		for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
315 			__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
316 	}
317 
318 	ppc_after_tlbiel_barrier();
319 	asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
320 }
321 
322 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
323 {
324 	asm volatile("ptesync": : :"memory");
325 
326 	/*
327 	 * Workaround the fact that the "ric" argument to __tlbie_pid
328 	 * must be a compile-time contraint to match the "i" constraint
329 	 * in the asm statement.
330 	 */
331 	switch (ric) {
332 	case RIC_FLUSH_TLB:
333 		__tlbie_pid(pid, RIC_FLUSH_TLB);
334 		fixup_tlbie_pid(pid);
335 		break;
336 	case RIC_FLUSH_PWC:
337 		__tlbie_pid(pid, RIC_FLUSH_PWC);
338 		break;
339 	case RIC_FLUSH_ALL:
340 	default:
341 		__tlbie_pid(pid, RIC_FLUSH_ALL);
342 		fixup_tlbie_pid(pid);
343 	}
344 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
345 }
346 
347 struct tlbiel_pid {
348 	unsigned long pid;
349 	unsigned long ric;
350 };
351 
352 static void do_tlbiel_pid(void *info)
353 {
354 	struct tlbiel_pid *t = info;
355 
356 	if (t->ric == RIC_FLUSH_TLB)
357 		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
358 	else if (t->ric == RIC_FLUSH_PWC)
359 		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
360 	else
361 		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
362 }
363 
364 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
365 				unsigned long pid, unsigned long ric)
366 {
367 	struct cpumask *cpus = mm_cpumask(mm);
368 	struct tlbiel_pid t = { .pid = pid, .ric = ric };
369 
370 	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
371 	/*
372 	 * Always want the CPU translations to be invalidated with tlbiel in
373 	 * these paths, so while coprocessors must use tlbie, we can not
374 	 * optimise away the tlbiel component.
375 	 */
376 	if (atomic_read(&mm->context.copros) > 0)
377 		_tlbie_pid(pid, RIC_FLUSH_ALL);
378 }
379 
380 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
381 {
382 	asm volatile("ptesync": : :"memory");
383 
384 	/*
385 	 * Workaround the fact that the "ric" argument to __tlbie_pid
386 	 * must be a compile-time contraint to match the "i" constraint
387 	 * in the asm statement.
388 	 */
389 	switch (ric) {
390 	case RIC_FLUSH_TLB:
391 		__tlbie_lpid(lpid, RIC_FLUSH_TLB);
392 		fixup_tlbie_lpid(lpid);
393 		break;
394 	case RIC_FLUSH_PWC:
395 		__tlbie_lpid(lpid, RIC_FLUSH_PWC);
396 		break;
397 	case RIC_FLUSH_ALL:
398 	default:
399 		__tlbie_lpid(lpid, RIC_FLUSH_ALL);
400 		fixup_tlbie_lpid(lpid);
401 	}
402 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
403 }
404 
405 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
406 {
407 	/*
408 	 * Workaround the fact that the "ric" argument to __tlbie_pid
409 	 * must be a compile-time contraint to match the "i" constraint
410 	 * in the asm statement.
411 	 */
412 	switch (ric) {
413 	case RIC_FLUSH_TLB:
414 		__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
415 		break;
416 	case RIC_FLUSH_PWC:
417 		__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
418 		break;
419 	case RIC_FLUSH_ALL:
420 	default:
421 		__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
422 	}
423 	fixup_tlbie_lpid(lpid);
424 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
425 }
426 
427 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
428 				    unsigned long pid, unsigned long page_size,
429 				    unsigned long psize)
430 {
431 	unsigned long addr;
432 	unsigned long ap = mmu_get_ap(psize);
433 
434 	for (addr = start; addr < end; addr += page_size)
435 		__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
436 }
437 
438 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
439 				       unsigned long psize, unsigned long ric)
440 {
441 	unsigned long ap = mmu_get_ap(psize);
442 
443 	asm volatile("ptesync": : :"memory");
444 	__tlbiel_va(va, pid, ap, ric);
445 	ppc_after_tlbiel_barrier();
446 }
447 
448 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
449 				    unsigned long pid, unsigned long page_size,
450 				    unsigned long psize, bool also_pwc)
451 {
452 	asm volatile("ptesync": : :"memory");
453 	if (also_pwc)
454 		__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
455 	__tlbiel_va_range(start, end, pid, page_size, psize);
456 	ppc_after_tlbiel_barrier();
457 }
458 
459 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
460 				    unsigned long pid, unsigned long page_size,
461 				    unsigned long psize)
462 {
463 	unsigned long addr;
464 	unsigned long ap = mmu_get_ap(psize);
465 
466 	for (addr = start; addr < end; addr += page_size)
467 		__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
468 
469 	fixup_tlbie_va_range(addr - page_size, pid, ap);
470 }
471 
472 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
473 				      unsigned long psize, unsigned long ric)
474 {
475 	unsigned long ap = mmu_get_ap(psize);
476 
477 	asm volatile("ptesync": : :"memory");
478 	__tlbie_va(va, pid, ap, ric);
479 	fixup_tlbie_va(va, pid, ap);
480 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
481 }
482 
483 struct tlbiel_va {
484 	unsigned long pid;
485 	unsigned long va;
486 	unsigned long psize;
487 	unsigned long ric;
488 };
489 
490 static void do_tlbiel_va(void *info)
491 {
492 	struct tlbiel_va *t = info;
493 
494 	if (t->ric == RIC_FLUSH_TLB)
495 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
496 	else if (t->ric == RIC_FLUSH_PWC)
497 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
498 	else
499 		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
500 }
501 
502 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
503 				unsigned long va, unsigned long pid,
504 				unsigned long psize, unsigned long ric)
505 {
506 	struct cpumask *cpus = mm_cpumask(mm);
507 	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
508 	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
509 	if (atomic_read(&mm->context.copros) > 0)
510 		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
511 }
512 
513 struct tlbiel_va_range {
514 	unsigned long pid;
515 	unsigned long start;
516 	unsigned long end;
517 	unsigned long page_size;
518 	unsigned long psize;
519 	bool also_pwc;
520 };
521 
522 static void do_tlbiel_va_range(void *info)
523 {
524 	struct tlbiel_va_range *t = info;
525 
526 	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
527 				    t->psize, t->also_pwc);
528 }
529 
530 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
531 			      unsigned long psize, unsigned long ric)
532 {
533 	unsigned long ap = mmu_get_ap(psize);
534 
535 	asm volatile("ptesync": : :"memory");
536 	__tlbie_lpid_va(va, lpid, ap, ric);
537 	fixup_tlbie_lpid_va(va, lpid, ap);
538 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
539 }
540 
541 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
542 				    unsigned long pid, unsigned long page_size,
543 				    unsigned long psize, bool also_pwc)
544 {
545 	asm volatile("ptesync": : :"memory");
546 	if (also_pwc)
547 		__tlbie_pid(pid, RIC_FLUSH_PWC);
548 	__tlbie_va_range(start, end, pid, page_size, psize);
549 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
550 }
551 
552 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
553 				unsigned long start, unsigned long end,
554 				unsigned long pid, unsigned long page_size,
555 				unsigned long psize, bool also_pwc)
556 {
557 	struct cpumask *cpus = mm_cpumask(mm);
558 	struct tlbiel_va_range t = { .start = start, .end = end,
559 				.pid = pid, .page_size = page_size,
560 				.psize = psize, .also_pwc = also_pwc };
561 
562 	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
563 	if (atomic_read(&mm->context.copros) > 0)
564 		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
565 }
566 
567 /*
568  * Base TLB flushing operations:
569  *
570  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
571  *  - flush_tlb_page(vma, vmaddr) flushes one page
572  *  - flush_tlb_range(vma, start, end) flushes a range of pages
573  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
574  *
575  *  - local_* variants of page and mm only apply to the current
576  *    processor
577  */
578 void radix__local_flush_tlb_mm(struct mm_struct *mm)
579 {
580 	unsigned long pid;
581 
582 	preempt_disable();
583 	pid = mm->context.id;
584 	if (pid != MMU_NO_CONTEXT)
585 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
586 	preempt_enable();
587 }
588 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
589 
590 #ifndef CONFIG_SMP
591 void radix__local_flush_all_mm(struct mm_struct *mm)
592 {
593 	unsigned long pid;
594 
595 	preempt_disable();
596 	pid = mm->context.id;
597 	if (pid != MMU_NO_CONTEXT)
598 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
599 	preempt_enable();
600 }
601 EXPORT_SYMBOL(radix__local_flush_all_mm);
602 
603 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
604 {
605 	radix__local_flush_all_mm(mm);
606 }
607 #endif /* CONFIG_SMP */
608 
609 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
610 				       int psize)
611 {
612 	unsigned long pid;
613 
614 	preempt_disable();
615 	pid = mm->context.id;
616 	if (pid != MMU_NO_CONTEXT)
617 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
618 	preempt_enable();
619 }
620 
621 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
622 {
623 #ifdef CONFIG_HUGETLB_PAGE
624 	/* need the return fix for nohash.c */
625 	if (is_vm_hugetlb_page(vma))
626 		return radix__local_flush_hugetlb_page(vma, vmaddr);
627 #endif
628 	radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
629 }
630 EXPORT_SYMBOL(radix__local_flush_tlb_page);
631 
632 static bool mm_needs_flush_escalation(struct mm_struct *mm)
633 {
634 	/*
635 	 * P9 nest MMU has issues with the page walk cache
636 	 * caching PTEs and not flushing them properly when
637 	 * RIC = 0 for a PID/LPID invalidate
638 	 */
639 	if (atomic_read(&mm->context.copros) > 0)
640 		return true;
641 	return false;
642 }
643 
644 /*
645  * If always_flush is true, then flush even if this CPU can't be removed
646  * from mm_cpumask.
647  */
648 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
649 {
650 	unsigned long pid = mm->context.id;
651 	int cpu = smp_processor_id();
652 
653 	/*
654 	 * A kthread could have done a mmget_not_zero() after the flushing CPU
655 	 * checked mm_cpumask, and be in the process of kthread_use_mm when
656 	 * interrupted here. In that case, current->mm will be set to mm,
657 	 * because kthread_use_mm() setting ->mm and switching to the mm is
658 	 * done with interrupts off.
659 	 */
660 	if (current->mm == mm)
661 		goto out;
662 
663 	if (current->active_mm == mm) {
664 		WARN_ON_ONCE(current->mm != NULL);
665 		/* Is a kernel thread and is using mm as the lazy tlb */
666 		mmgrab(&init_mm);
667 		current->active_mm = &init_mm;
668 		switch_mm_irqs_off(mm, &init_mm, current);
669 		mmdrop(mm);
670 	}
671 
672 	/*
673 	 * This IPI may be initiated from any source including those not
674 	 * running the mm, so there may be a racing IPI that comes after
675 	 * this one which finds the cpumask already clear. Check and avoid
676 	 * underflowing the active_cpus count in that case. The race should
677 	 * not otherwise be a problem, but the TLB must be flushed because
678 	 * that's what the caller expects.
679 	 */
680 	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
681 		atomic_dec(&mm->context.active_cpus);
682 		cpumask_clear_cpu(cpu, mm_cpumask(mm));
683 		always_flush = true;
684 	}
685 
686 out:
687 	if (always_flush)
688 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
689 }
690 
691 #ifdef CONFIG_SMP
692 static void do_exit_flush_lazy_tlb(void *arg)
693 {
694 	struct mm_struct *mm = arg;
695 	exit_lazy_flush_tlb(mm, true);
696 }
697 
698 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
699 {
700 	/*
701 	 * Would be nice if this was async so it could be run in
702 	 * parallel with our local flush, but generic code does not
703 	 * give a good API for it. Could extend the generic code or
704 	 * make a special powerpc IPI for flushing TLBs.
705 	 * For now it's not too performance critical.
706 	 */
707 	smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
708 				(void *)mm, 1);
709 }
710 
711 #else /* CONFIG_SMP */
712 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
713 #endif /* CONFIG_SMP */
714 
715 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
716 
717 /*
718  * Interval between flushes at which we send out IPIs to check whether the
719  * mm_cpumask can be trimmed for the case where it's not a single-threaded
720  * process flushing its own mm. The intent is to reduce the cost of later
721  * flushes. Don't want this to be so low that it adds noticable cost to TLB
722  * flushing, or so high that it doesn't help reduce global TLBIEs.
723  */
724 static unsigned long tlb_mm_cpumask_trim_timer = 1073;
725 
726 static bool tick_and_test_trim_clock(void)
727 {
728 	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
729 			tlb_mm_cpumask_trim_timer) {
730 		__this_cpu_write(mm_cpumask_trim_clock, 0);
731 		return true;
732 	}
733 	return false;
734 }
735 
736 enum tlb_flush_type {
737 	FLUSH_TYPE_NONE,
738 	FLUSH_TYPE_LOCAL,
739 	FLUSH_TYPE_GLOBAL,
740 };
741 
742 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
743 {
744 	int active_cpus = atomic_read(&mm->context.active_cpus);
745 	int cpu = smp_processor_id();
746 
747 	if (active_cpus == 0)
748 		return FLUSH_TYPE_NONE;
749 	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
750 		if (current->mm != mm) {
751 			/*
752 			 * Asynchronous flush sources may trim down to nothing
753 			 * if the process is not running, so occasionally try
754 			 * to trim.
755 			 */
756 			if (tick_and_test_trim_clock()) {
757 				exit_lazy_flush_tlb(mm, true);
758 				return FLUSH_TYPE_NONE;
759 			}
760 		}
761 		return FLUSH_TYPE_LOCAL;
762 	}
763 
764 	/* Coprocessors require TLBIE to invalidate nMMU. */
765 	if (atomic_read(&mm->context.copros) > 0)
766 		return FLUSH_TYPE_GLOBAL;
767 
768 	/*
769 	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
770 	 * because the mm is being taken down anyway, and a TLBIE tends to
771 	 * be faster than an IPI+TLBIEL.
772 	 */
773 	if (fullmm)
774 		return FLUSH_TYPE_GLOBAL;
775 
776 	/*
777 	 * If we are running the only thread of a single-threaded process,
778 	 * then we should almost always be able to trim off the rest of the
779 	 * CPU mask (except in the case of use_mm() races), so always try
780 	 * trimming the mask.
781 	 */
782 	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
783 		exit_flush_lazy_tlbs(mm);
784 		/*
785 		 * use_mm() race could prevent IPIs from being able to clear
786 		 * the cpumask here, however those users are established
787 		 * after our first check (and so after the PTEs are removed),
788 		 * and the TLB still gets flushed by the IPI, so this CPU
789 		 * will only require a local flush.
790 		 */
791 		return FLUSH_TYPE_LOCAL;
792 	}
793 
794 	/*
795 	 * Occasionally try to trim down the cpumask. It's possible this can
796 	 * bring the mask to zero, which results in no flush.
797 	 */
798 	if (tick_and_test_trim_clock()) {
799 		exit_flush_lazy_tlbs(mm);
800 		if (current->mm == mm)
801 			return FLUSH_TYPE_LOCAL;
802 		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
803 			exit_lazy_flush_tlb(mm, true);
804 		return FLUSH_TYPE_NONE;
805 	}
806 
807 	return FLUSH_TYPE_GLOBAL;
808 }
809 
810 #ifdef CONFIG_SMP
811 void radix__flush_tlb_mm(struct mm_struct *mm)
812 {
813 	unsigned long pid;
814 	enum tlb_flush_type type;
815 
816 	pid = mm->context.id;
817 	if (unlikely(pid == MMU_NO_CONTEXT))
818 		return;
819 
820 	preempt_disable();
821 	/*
822 	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
823 	 * stores to clear ptes before the invalidate. See barrier in
824 	 * switch_mm_irqs_off
825 	 */
826 	smp_mb();
827 	type = flush_type_needed(mm, false);
828 	if (type == FLUSH_TYPE_LOCAL) {
829 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
830 	} else if (type == FLUSH_TYPE_GLOBAL) {
831 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
832 			unsigned long tgt = H_RPTI_TARGET_CMMU;
833 
834 			if (atomic_read(&mm->context.copros) > 0)
835 				tgt |= H_RPTI_TARGET_NMMU;
836 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
837 					       H_RPTI_PAGE_ALL, 0, -1UL);
838 		} else if (cputlb_use_tlbie()) {
839 			if (mm_needs_flush_escalation(mm))
840 				_tlbie_pid(pid, RIC_FLUSH_ALL);
841 			else
842 				_tlbie_pid(pid, RIC_FLUSH_TLB);
843 		} else {
844 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
845 		}
846 	}
847 	preempt_enable();
848 }
849 EXPORT_SYMBOL(radix__flush_tlb_mm);
850 
851 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
852 {
853 	unsigned long pid;
854 	enum tlb_flush_type type;
855 
856 	pid = mm->context.id;
857 	if (unlikely(pid == MMU_NO_CONTEXT))
858 		return;
859 
860 	preempt_disable();
861 	smp_mb(); /* see radix__flush_tlb_mm */
862 	type = flush_type_needed(mm, fullmm);
863 	if (type == FLUSH_TYPE_LOCAL) {
864 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
865 	} else if (type == FLUSH_TYPE_GLOBAL) {
866 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
867 			unsigned long tgt = H_RPTI_TARGET_CMMU;
868 			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
869 					     H_RPTI_TYPE_PRT;
870 
871 			if (atomic_read(&mm->context.copros) > 0)
872 				tgt |= H_RPTI_TARGET_NMMU;
873 			pseries_rpt_invalidate(pid, tgt, type,
874 					       H_RPTI_PAGE_ALL, 0, -1UL);
875 		} else if (cputlb_use_tlbie())
876 			_tlbie_pid(pid, RIC_FLUSH_ALL);
877 		else
878 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
879 	}
880 	preempt_enable();
881 }
882 
883 void radix__flush_all_mm(struct mm_struct *mm)
884 {
885 	__flush_all_mm(mm, false);
886 }
887 EXPORT_SYMBOL(radix__flush_all_mm);
888 
889 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
890 				 int psize)
891 {
892 	unsigned long pid;
893 	enum tlb_flush_type type;
894 
895 	pid = mm->context.id;
896 	if (unlikely(pid == MMU_NO_CONTEXT))
897 		return;
898 
899 	preempt_disable();
900 	smp_mb(); /* see radix__flush_tlb_mm */
901 	type = flush_type_needed(mm, false);
902 	if (type == FLUSH_TYPE_LOCAL) {
903 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
904 	} else if (type == FLUSH_TYPE_GLOBAL) {
905 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
906 			unsigned long tgt, pg_sizes, size;
907 
908 			tgt = H_RPTI_TARGET_CMMU;
909 			pg_sizes = psize_to_rpti_pgsize(psize);
910 			size = 1UL << mmu_psize_to_shift(psize);
911 
912 			if (atomic_read(&mm->context.copros) > 0)
913 				tgt |= H_RPTI_TARGET_NMMU;
914 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
915 					       pg_sizes, vmaddr,
916 					       vmaddr + size);
917 		} else if (cputlb_use_tlbie())
918 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
919 		else
920 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
921 	}
922 	preempt_enable();
923 }
924 
925 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
926 {
927 #ifdef CONFIG_HUGETLB_PAGE
928 	if (is_vm_hugetlb_page(vma))
929 		return radix__flush_hugetlb_page(vma, vmaddr);
930 #endif
931 	radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
932 }
933 EXPORT_SYMBOL(radix__flush_tlb_page);
934 
935 #endif /* CONFIG_SMP */
936 
937 static void do_tlbiel_kernel(void *info)
938 {
939 	_tlbiel_pid(0, RIC_FLUSH_ALL);
940 }
941 
942 static inline void _tlbiel_kernel_broadcast(void)
943 {
944 	on_each_cpu(do_tlbiel_kernel, NULL, 1);
945 	if (tlbie_capable) {
946 		/*
947 		 * Coherent accelerators don't refcount kernel memory mappings,
948 		 * so have to always issue a tlbie for them. This is quite a
949 		 * slow path anyway.
950 		 */
951 		_tlbie_pid(0, RIC_FLUSH_ALL);
952 	}
953 }
954 
955 /*
956  * If kernel TLBIs ever become local rather than global, then
957  * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
958  * assumes kernel TLBIs are global.
959  */
960 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
961 {
962 	if (!mmu_has_feature(MMU_FTR_GTSE)) {
963 		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
964 		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
965 				     H_RPTI_TYPE_PRT;
966 
967 		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
968 				       start, end);
969 	} else if (cputlb_use_tlbie())
970 		_tlbie_pid(0, RIC_FLUSH_ALL);
971 	else
972 		_tlbiel_kernel_broadcast();
973 }
974 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
975 
976 #define TLB_FLUSH_ALL -1UL
977 
978 /*
979  * Number of pages above which we invalidate the entire PID rather than
980  * flush individual pages, for local and global flushes respectively.
981  *
982  * tlbie goes out to the interconnect and individual ops are more costly.
983  * It also does not iterate over sets like the local tlbiel variant when
984  * invalidating a full PID, so it has a far lower threshold to change from
985  * individual page flushes to full-pid flushes.
986  */
987 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
988 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
989 
990 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
991 					    unsigned long start, unsigned long end)
992 
993 {
994 	unsigned long pid;
995 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
996 	unsigned long page_size = 1UL << page_shift;
997 	unsigned long nr_pages = (end - start) >> page_shift;
998 	bool fullmm = (end == TLB_FLUSH_ALL);
999 	bool flush_pid;
1000 	enum tlb_flush_type type;
1001 
1002 	pid = mm->context.id;
1003 	if (unlikely(pid == MMU_NO_CONTEXT))
1004 		return;
1005 
1006 	preempt_disable();
1007 	smp_mb(); /* see radix__flush_tlb_mm */
1008 	type = flush_type_needed(mm, fullmm);
1009 	if (type == FLUSH_TYPE_NONE)
1010 		goto out;
1011 
1012 	if (fullmm)
1013 		flush_pid = true;
1014 	else if (type == FLUSH_TYPE_GLOBAL)
1015 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1016 	else
1017 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1018 
1019 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1020 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1021 		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1022 
1023 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1024 			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1025 		if (atomic_read(&mm->context.copros) > 0)
1026 			tgt |= H_RPTI_TARGET_NMMU;
1027 		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
1028 				       start, end);
1029 	} else if (flush_pid) {
1030 		if (type == FLUSH_TYPE_LOCAL) {
1031 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
1032 		} else {
1033 			if (cputlb_use_tlbie()) {
1034 				if (mm_needs_flush_escalation(mm))
1035 					_tlbie_pid(pid, RIC_FLUSH_ALL);
1036 				else
1037 					_tlbie_pid(pid, RIC_FLUSH_TLB);
1038 			} else {
1039 				_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
1040 			}
1041 		}
1042 	} else {
1043 		bool hflush = false;
1044 		unsigned long hstart, hend;
1045 
1046 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1047 			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1048 			hend = end & PMD_MASK;
1049 			if (hstart < hend)
1050 				hflush = true;
1051 		}
1052 
1053 		if (type == FLUSH_TYPE_LOCAL) {
1054 			asm volatile("ptesync": : :"memory");
1055 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1056 			if (hflush)
1057 				__tlbiel_va_range(hstart, hend, pid,
1058 						PMD_SIZE, MMU_PAGE_2M);
1059 			ppc_after_tlbiel_barrier();
1060 		} else if (cputlb_use_tlbie()) {
1061 			asm volatile("ptesync": : :"memory");
1062 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1063 			if (hflush)
1064 				__tlbie_va_range(hstart, hend, pid,
1065 						PMD_SIZE, MMU_PAGE_2M);
1066 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
1067 		} else {
1068 			_tlbiel_va_range_multicast(mm,
1069 					start, end, pid, page_size, mmu_virtual_psize, false);
1070 			if (hflush)
1071 				_tlbiel_va_range_multicast(mm,
1072 					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
1073 		}
1074 	}
1075 out:
1076 	preempt_enable();
1077 }
1078 
1079 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1080 		     unsigned long end)
1081 
1082 {
1083 #ifdef CONFIG_HUGETLB_PAGE
1084 	if (is_vm_hugetlb_page(vma))
1085 		return radix__flush_hugetlb_tlb_range(vma, start, end);
1086 #endif
1087 
1088 	__radix__flush_tlb_range(vma->vm_mm, start, end);
1089 }
1090 EXPORT_SYMBOL(radix__flush_tlb_range);
1091 
1092 static int radix_get_mmu_psize(int page_size)
1093 {
1094 	int psize;
1095 
1096 	if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1097 		psize = mmu_virtual_psize;
1098 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1099 		psize = MMU_PAGE_2M;
1100 	else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1101 		psize = MMU_PAGE_1G;
1102 	else
1103 		return -1;
1104 	return psize;
1105 }
1106 
1107 /*
1108  * Flush partition scoped LPID address translation for all CPUs.
1109  */
1110 void radix__flush_tlb_lpid_page(unsigned int lpid,
1111 					unsigned long addr,
1112 					unsigned long page_size)
1113 {
1114 	int psize = radix_get_mmu_psize(page_size);
1115 
1116 	_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1117 }
1118 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1119 
1120 /*
1121  * Flush partition scoped PWC from LPID for all CPUs.
1122  */
1123 void radix__flush_pwc_lpid(unsigned int lpid)
1124 {
1125 	_tlbie_lpid(lpid, RIC_FLUSH_PWC);
1126 }
1127 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1128 
1129 /*
1130  * Flush partition scoped translations from LPID (=LPIDR)
1131  */
1132 void radix__flush_all_lpid(unsigned int lpid)
1133 {
1134 	_tlbie_lpid(lpid, RIC_FLUSH_ALL);
1135 }
1136 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1137 
1138 /*
1139  * Flush process scoped translations from LPID (=LPIDR)
1140  */
1141 void radix__flush_all_lpid_guest(unsigned int lpid)
1142 {
1143 	_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1144 }
1145 
1146 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1147 				  unsigned long end, int psize);
1148 
1149 void radix__tlb_flush(struct mmu_gather *tlb)
1150 {
1151 	int psize = 0;
1152 	struct mm_struct *mm = tlb->mm;
1153 	int page_size = tlb->page_size;
1154 	unsigned long start = tlb->start;
1155 	unsigned long end = tlb->end;
1156 
1157 	/*
1158 	 * if page size is not something we understand, do a full mm flush
1159 	 *
1160 	 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1161 	 * that flushes the process table entry cache upon process teardown.
1162 	 * See the comment for radix in arch_exit_mmap().
1163 	 */
1164 	if (tlb->fullmm || tlb->need_flush_all) {
1165 		__flush_all_mm(mm, true);
1166 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1167 		if (!tlb->freed_tables)
1168 			radix__flush_tlb_mm(mm);
1169 		else
1170 			radix__flush_all_mm(mm);
1171 	} else {
1172 		if (!tlb->freed_tables)
1173 			radix__flush_tlb_range_psize(mm, start, end, psize);
1174 		else
1175 			radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1176 	}
1177 }
1178 
1179 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1180 				unsigned long start, unsigned long end,
1181 				int psize, bool also_pwc)
1182 {
1183 	unsigned long pid;
1184 	unsigned int page_shift = mmu_psize_defs[psize].shift;
1185 	unsigned long page_size = 1UL << page_shift;
1186 	unsigned long nr_pages = (end - start) >> page_shift;
1187 	bool fullmm = (end == TLB_FLUSH_ALL);
1188 	bool flush_pid;
1189 	enum tlb_flush_type type;
1190 
1191 	pid = mm->context.id;
1192 	if (unlikely(pid == MMU_NO_CONTEXT))
1193 		return;
1194 
1195 	fullmm = (end == TLB_FLUSH_ALL);
1196 
1197 	preempt_disable();
1198 	smp_mb(); /* see radix__flush_tlb_mm */
1199 	type = flush_type_needed(mm, fullmm);
1200 	if (type == FLUSH_TYPE_NONE)
1201 		goto out;
1202 
1203 	if (fullmm)
1204 		flush_pid = true;
1205 	else if (type == FLUSH_TYPE_GLOBAL)
1206 		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1207 	else
1208 		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1209 
1210 	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1211 		unsigned long tgt = H_RPTI_TARGET_CMMU;
1212 		unsigned long type = H_RPTI_TYPE_TLB;
1213 		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1214 
1215 		if (also_pwc)
1216 			type |= H_RPTI_TYPE_PWC;
1217 		if (atomic_read(&mm->context.copros) > 0)
1218 			tgt |= H_RPTI_TARGET_NMMU;
1219 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1220 	} else if (flush_pid) {
1221 		if (type == FLUSH_TYPE_LOCAL) {
1222 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1223 		} else {
1224 			if (cputlb_use_tlbie()) {
1225 				if (mm_needs_flush_escalation(mm))
1226 					also_pwc = true;
1227 
1228 				_tlbie_pid(pid,
1229 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1230 			} else {
1231 				_tlbiel_pid_multicast(mm, pid,
1232 					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1233 			}
1234 
1235 		}
1236 	} else {
1237 		if (type == FLUSH_TYPE_LOCAL)
1238 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1239 		else if (cputlb_use_tlbie())
1240 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1241 		else
1242 			_tlbiel_va_range_multicast(mm,
1243 					start, end, pid, page_size, psize, also_pwc);
1244 	}
1245 out:
1246 	preempt_enable();
1247 }
1248 
1249 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1250 				  unsigned long end, int psize)
1251 {
1252 	return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1253 }
1254 
1255 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1256 				  unsigned long end, int psize)
1257 {
1258 	__radix__flush_tlb_range_psize(mm, start, end, psize, true);
1259 }
1260 
1261 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1262 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1263 {
1264 	unsigned long pid, end;
1265 	enum tlb_flush_type type;
1266 
1267 	pid = mm->context.id;
1268 	if (unlikely(pid == MMU_NO_CONTEXT))
1269 		return;
1270 
1271 	/* 4k page size, just blow the world */
1272 	if (PAGE_SIZE == 0x1000) {
1273 		radix__flush_all_mm(mm);
1274 		return;
1275 	}
1276 
1277 	end = addr + HPAGE_PMD_SIZE;
1278 
1279 	/* Otherwise first do the PWC, then iterate the pages. */
1280 	preempt_disable();
1281 	smp_mb(); /* see radix__flush_tlb_mm */
1282 	type = flush_type_needed(mm, false);
1283 	if (type == FLUSH_TYPE_LOCAL) {
1284 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1285 	} else if (type == FLUSH_TYPE_GLOBAL) {
1286 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
1287 			unsigned long tgt, type, pg_sizes;
1288 
1289 			tgt = H_RPTI_TARGET_CMMU;
1290 			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1291 			       H_RPTI_TYPE_PRT;
1292 			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1293 
1294 			if (atomic_read(&mm->context.copros) > 0)
1295 				tgt |= H_RPTI_TARGET_NMMU;
1296 			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1297 					       addr, end);
1298 		} else if (cputlb_use_tlbie())
1299 			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1300 		else
1301 			_tlbiel_va_range_multicast(mm,
1302 					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1303 	}
1304 
1305 	preempt_enable();
1306 }
1307 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1308 
1309 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1310 				unsigned long start, unsigned long end)
1311 {
1312 	radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1313 }
1314 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1315 
1316 void radix__flush_tlb_all(void)
1317 {
1318 	unsigned long rb,prs,r,rs;
1319 	unsigned long ric = RIC_FLUSH_ALL;
1320 
1321 	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1322 	prs = 0; /* partition scoped */
1323 	r = 1;   /* radix format */
1324 	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1325 
1326 	asm volatile("ptesync": : :"memory");
1327 	/*
1328 	 * now flush guest entries by passing PRS = 1 and LPID != 0
1329 	 */
1330 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1331 		     : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1332 	/*
1333 	 * now flush host entires by passing PRS = 0 and LPID == 0
1334 	 */
1335 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1336 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1337 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
1338 }
1339 
1340 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1341 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
1342 {
1343 	unsigned long pid = mm->context.id;
1344 
1345 	if (unlikely(pid == MMU_NO_CONTEXT))
1346 		return;
1347 
1348 	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
1349 		return;
1350 
1351 	/*
1352 	 * If this context hasn't run on that CPU before and KVM is
1353 	 * around, there's a slim chance that the guest on another
1354 	 * CPU just brought in obsolete translation into the TLB of
1355 	 * this CPU due to a bad prefetch using the guest PID on
1356 	 * the way into the hypervisor.
1357 	 *
1358 	 * We work around this here. If KVM is possible, we check if
1359 	 * any sibling thread is in KVM. If it is, the window may exist
1360 	 * and thus we flush that PID from the core.
1361 	 *
1362 	 * A potential future improvement would be to mark which PIDs
1363 	 * have never been used on the system and avoid it if the PID
1364 	 * is new and the process has no other cpumask bit set.
1365 	 */
1366 	if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
1367 		int cpu = smp_processor_id();
1368 		int sib = cpu_first_thread_sibling(cpu);
1369 		bool flush = false;
1370 
1371 		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
1372 			if (sib == cpu)
1373 				continue;
1374 			if (!cpu_possible(sib))
1375 				continue;
1376 			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
1377 				flush = true;
1378 		}
1379 		if (flush)
1380 			_tlbiel_pid(pid, RIC_FLUSH_ALL);
1381 	}
1382 }
1383 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
1384 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1385