1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 #include <linux/debugfs.h> 14 15 #include <asm/ppc-opcode.h> 16 #include <asm/tlb.h> 17 #include <asm/tlbflush.h> 18 #include <asm/trace.h> 19 #include <asm/cputhreads.h> 20 #include <asm/plpar_wrappers.h> 21 22 #include "internal.h" 23 24 /* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31 { 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41 } 42 43 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44 { 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 59 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, 62 RIC_FLUSH_TLB, 0); 63 } 64 } 65 66 /* Flush process scoped entries. */ 67 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 68 69 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 70 for (set = 1; set < num_sets; set++) 71 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 72 } 73 74 ppc_after_tlbiel_barrier(); 75 } 76 77 void radix__tlbiel_all(unsigned int action) 78 { 79 unsigned int is; 80 81 switch (action) { 82 case TLB_INVAL_SCOPE_GLOBAL: 83 is = 3; 84 break; 85 case TLB_INVAL_SCOPE_LPID: 86 is = 2; 87 break; 88 default: 89 BUG(); 90 } 91 92 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 93 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 94 else 95 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 96 97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 98 } 99 100 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 101 unsigned long ric) 102 { 103 unsigned long rb,rs,prs,r; 104 105 rb = PPC_BIT(53); /* IS = 1 */ 106 rb |= set << PPC_BITLSHIFT(51); 107 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 108 prs = 1; /* process scoped */ 109 r = 1; /* radix format */ 110 111 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 112 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 113 trace_tlbie(0, 1, rb, rs, ric, prs, r); 114 } 115 116 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 117 { 118 unsigned long rb,rs,prs,r; 119 120 rb = PPC_BIT(53); /* IS = 1 */ 121 rs = pid << PPC_BITLSHIFT(31); 122 prs = 1; /* process scoped */ 123 r = 1; /* radix format */ 124 125 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 trace_tlbie(0, 0, rb, rs, ric, prs, r); 128 } 129 130 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 131 unsigned long lpid, 132 unsigned long ric) 133 { 134 unsigned long rb, rs, prs, r; 135 136 rb = PPC_BIT(53); /* IS = 1 */ 137 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 138 prs = 1; /* process scoped */ 139 r = 1; /* radix format */ 140 141 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 142 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 143 trace_tlbie(0, 0, rb, rs, ric, prs, r); 144 } 145 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 146 { 147 unsigned long rb,rs,prs,r; 148 149 rb = PPC_BIT(52); /* IS = 2 */ 150 rs = lpid; 151 prs = 0; /* partition scoped */ 152 r = 1; /* radix format */ 153 154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 156 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 157 } 158 159 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 160 { 161 unsigned long rb,rs,prs,r; 162 163 rb = PPC_BIT(52); /* IS = 2 */ 164 rs = lpid; 165 prs = 1; /* process scoped */ 166 r = 1; /* radix format */ 167 168 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 169 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 170 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 171 } 172 173 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 174 unsigned long ap, unsigned long ric) 175 { 176 unsigned long rb,rs,prs,r; 177 178 rb = va & ~(PPC_BITMASK(52, 63)); 179 rb |= ap << PPC_BITLSHIFT(58); 180 rs = pid << PPC_BITLSHIFT(31); 181 prs = 1; /* process scoped */ 182 r = 1; /* radix format */ 183 184 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 185 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 186 trace_tlbie(0, 1, rb, rs, ric, prs, r); 187 } 188 189 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 190 unsigned long ap, unsigned long ric) 191 { 192 unsigned long rb,rs,prs,r; 193 194 rb = va & ~(PPC_BITMASK(52, 63)); 195 rb |= ap << PPC_BITLSHIFT(58); 196 rs = pid << PPC_BITLSHIFT(31); 197 prs = 1; /* process scoped */ 198 r = 1; /* radix format */ 199 200 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 201 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 202 trace_tlbie(0, 0, rb, rs, ric, prs, r); 203 } 204 205 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 206 unsigned long lpid, 207 unsigned long ap, unsigned long ric) 208 { 209 unsigned long rb, rs, prs, r; 210 211 rb = va & ~(PPC_BITMASK(52, 63)); 212 rb |= ap << PPC_BITLSHIFT(58); 213 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 214 prs = 1; /* process scoped */ 215 r = 1; /* radix format */ 216 217 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 218 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 219 trace_tlbie(0, 0, rb, rs, ric, prs, r); 220 } 221 222 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 223 unsigned long ap, unsigned long ric) 224 { 225 unsigned long rb,rs,prs,r; 226 227 rb = va & ~(PPC_BITMASK(52, 63)); 228 rb |= ap << PPC_BITLSHIFT(58); 229 rs = lpid; 230 prs = 0; /* partition scoped */ 231 r = 1; /* radix format */ 232 233 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 234 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 235 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 236 } 237 238 239 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 240 unsigned long ap) 241 { 242 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 243 asm volatile("ptesync": : :"memory"); 244 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 245 } 246 247 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 248 asm volatile("ptesync": : :"memory"); 249 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 250 } 251 } 252 253 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 254 unsigned long ap) 255 { 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_pid(0, RIC_FLUSH_TLB); 259 } 260 261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 262 asm volatile("ptesync": : :"memory"); 263 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 264 } 265 } 266 267 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 268 unsigned long pid, 269 unsigned long lpid, 270 unsigned long ap) 271 { 272 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 273 asm volatile("ptesync" : : : "memory"); 274 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 275 } 276 277 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 278 asm volatile("ptesync" : : : "memory"); 279 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 280 } 281 } 282 283 static inline void fixup_tlbie_pid(unsigned long pid) 284 { 285 /* 286 * We can use any address for the invalidation, pick one which is 287 * probably unused as an optimisation. 288 */ 289 unsigned long va = ((1UL << 52) - 1); 290 291 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 292 asm volatile("ptesync": : :"memory"); 293 __tlbie_pid(0, RIC_FLUSH_TLB); 294 } 295 296 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 297 asm volatile("ptesync": : :"memory"); 298 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 299 } 300 } 301 302 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 303 { 304 /* 305 * We can use any address for the invalidation, pick one which is 306 * probably unused as an optimisation. 307 */ 308 unsigned long va = ((1UL << 52) - 1); 309 310 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 311 asm volatile("ptesync" : : : "memory"); 312 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 313 } 314 315 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 316 asm volatile("ptesync" : : : "memory"); 317 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 318 RIC_FLUSH_TLB); 319 } 320 } 321 322 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 323 unsigned long ap) 324 { 325 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 326 asm volatile("ptesync": : :"memory"); 327 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 328 } 329 330 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 331 asm volatile("ptesync": : :"memory"); 332 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 333 } 334 } 335 336 static inline void fixup_tlbie_lpid(unsigned long lpid) 337 { 338 /* 339 * We can use any address for the invalidation, pick one which is 340 * probably unused as an optimisation. 341 */ 342 unsigned long va = ((1UL << 52) - 1); 343 344 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 345 asm volatile("ptesync": : :"memory"); 346 __tlbie_lpid(0, RIC_FLUSH_TLB); 347 } 348 349 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 350 asm volatile("ptesync": : :"memory"); 351 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 352 } 353 } 354 355 /* 356 * We use 128 set in radix mode and 256 set in hpt mode. 357 */ 358 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 359 { 360 int set; 361 362 asm volatile("ptesync": : :"memory"); 363 364 switch (ric) { 365 case RIC_FLUSH_PWC: 366 367 /* For PWC, only one flush is needed */ 368 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 369 ppc_after_tlbiel_barrier(); 370 return; 371 case RIC_FLUSH_TLB: 372 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 373 break; 374 case RIC_FLUSH_ALL: 375 default: 376 /* 377 * Flush the first set of the TLB, and if 378 * we're doing a RIC_FLUSH_ALL, also flush 379 * the entire Page Walk Cache. 380 */ 381 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 382 } 383 384 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 385 /* For the remaining sets, just flush the TLB */ 386 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 387 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 388 } 389 390 ppc_after_tlbiel_barrier(); 391 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 392 } 393 394 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 395 { 396 asm volatile("ptesync": : :"memory"); 397 398 /* 399 * Workaround the fact that the "ric" argument to __tlbie_pid 400 * must be a compile-time constraint to match the "i" constraint 401 * in the asm statement. 402 */ 403 switch (ric) { 404 case RIC_FLUSH_TLB: 405 __tlbie_pid(pid, RIC_FLUSH_TLB); 406 fixup_tlbie_pid(pid); 407 break; 408 case RIC_FLUSH_PWC: 409 __tlbie_pid(pid, RIC_FLUSH_PWC); 410 break; 411 case RIC_FLUSH_ALL: 412 default: 413 __tlbie_pid(pid, RIC_FLUSH_ALL); 414 fixup_tlbie_pid(pid); 415 } 416 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 417 } 418 419 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 420 unsigned long ric) 421 { 422 asm volatile("ptesync" : : : "memory"); 423 424 /* 425 * Workaround the fact that the "ric" argument to __tlbie_pid 426 * must be a compile-time contraint to match the "i" constraint 427 * in the asm statement. 428 */ 429 switch (ric) { 430 case RIC_FLUSH_TLB: 431 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 432 fixup_tlbie_pid_lpid(pid, lpid); 433 break; 434 case RIC_FLUSH_PWC: 435 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 436 break; 437 case RIC_FLUSH_ALL: 438 default: 439 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 440 fixup_tlbie_pid_lpid(pid, lpid); 441 } 442 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 443 } 444 struct tlbiel_pid { 445 unsigned long pid; 446 unsigned long ric; 447 }; 448 449 static void do_tlbiel_pid(void *info) 450 { 451 struct tlbiel_pid *t = info; 452 453 if (t->ric == RIC_FLUSH_TLB) 454 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 455 else if (t->ric == RIC_FLUSH_PWC) 456 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 457 else 458 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 459 } 460 461 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 462 unsigned long pid, unsigned long ric) 463 { 464 struct cpumask *cpus = mm_cpumask(mm); 465 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 466 467 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 468 /* 469 * Always want the CPU translations to be invalidated with tlbiel in 470 * these paths, so while coprocessors must use tlbie, we can not 471 * optimise away the tlbiel component. 472 */ 473 if (atomic_read(&mm->context.copros) > 0) 474 _tlbie_pid(pid, RIC_FLUSH_ALL); 475 } 476 477 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 478 { 479 asm volatile("ptesync": : :"memory"); 480 481 /* 482 * Workaround the fact that the "ric" argument to __tlbie_pid 483 * must be a compile-time contraint to match the "i" constraint 484 * in the asm statement. 485 */ 486 switch (ric) { 487 case RIC_FLUSH_TLB: 488 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 489 fixup_tlbie_lpid(lpid); 490 break; 491 case RIC_FLUSH_PWC: 492 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 493 break; 494 case RIC_FLUSH_ALL: 495 default: 496 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 497 fixup_tlbie_lpid(lpid); 498 } 499 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 500 } 501 502 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 503 { 504 /* 505 * Workaround the fact that the "ric" argument to __tlbie_pid 506 * must be a compile-time contraint to match the "i" constraint 507 * in the asm statement. 508 */ 509 switch (ric) { 510 case RIC_FLUSH_TLB: 511 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 512 break; 513 case RIC_FLUSH_PWC: 514 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 515 break; 516 case RIC_FLUSH_ALL: 517 default: 518 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 519 } 520 fixup_tlbie_lpid(lpid); 521 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 522 } 523 524 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 525 unsigned long pid, unsigned long page_size, 526 unsigned long psize) 527 { 528 unsigned long addr; 529 unsigned long ap = mmu_get_ap(psize); 530 531 for (addr = start; addr < end; addr += page_size) 532 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 533 } 534 535 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 536 unsigned long psize, unsigned long ric) 537 { 538 unsigned long ap = mmu_get_ap(psize); 539 540 asm volatile("ptesync": : :"memory"); 541 __tlbiel_va(va, pid, ap, ric); 542 ppc_after_tlbiel_barrier(); 543 } 544 545 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 546 unsigned long pid, unsigned long page_size, 547 unsigned long psize, bool also_pwc) 548 { 549 asm volatile("ptesync": : :"memory"); 550 if (also_pwc) 551 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 552 __tlbiel_va_range(start, end, pid, page_size, psize); 553 ppc_after_tlbiel_barrier(); 554 } 555 556 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 557 unsigned long pid, unsigned long page_size, 558 unsigned long psize) 559 { 560 unsigned long addr; 561 unsigned long ap = mmu_get_ap(psize); 562 563 for (addr = start; addr < end; addr += page_size) 564 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 565 566 fixup_tlbie_va_range(addr - page_size, pid, ap); 567 } 568 569 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 570 unsigned long pid, unsigned long lpid, 571 unsigned long page_size, 572 unsigned long psize) 573 { 574 unsigned long addr; 575 unsigned long ap = mmu_get_ap(psize); 576 577 for (addr = start; addr < end; addr += page_size) 578 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 579 580 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 581 } 582 583 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 584 unsigned long psize, unsigned long ric) 585 { 586 unsigned long ap = mmu_get_ap(psize); 587 588 asm volatile("ptesync": : :"memory"); 589 __tlbie_va(va, pid, ap, ric); 590 fixup_tlbie_va(va, pid, ap); 591 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 592 } 593 594 struct tlbiel_va { 595 unsigned long pid; 596 unsigned long va; 597 unsigned long psize; 598 unsigned long ric; 599 }; 600 601 static void do_tlbiel_va(void *info) 602 { 603 struct tlbiel_va *t = info; 604 605 if (t->ric == RIC_FLUSH_TLB) 606 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 607 else if (t->ric == RIC_FLUSH_PWC) 608 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 609 else 610 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 611 } 612 613 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 614 unsigned long va, unsigned long pid, 615 unsigned long psize, unsigned long ric) 616 { 617 struct cpumask *cpus = mm_cpumask(mm); 618 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 619 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 620 if (atomic_read(&mm->context.copros) > 0) 621 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 622 } 623 624 struct tlbiel_va_range { 625 unsigned long pid; 626 unsigned long start; 627 unsigned long end; 628 unsigned long page_size; 629 unsigned long psize; 630 bool also_pwc; 631 }; 632 633 static void do_tlbiel_va_range(void *info) 634 { 635 struct tlbiel_va_range *t = info; 636 637 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 638 t->psize, t->also_pwc); 639 } 640 641 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 642 unsigned long psize, unsigned long ric) 643 { 644 unsigned long ap = mmu_get_ap(psize); 645 646 asm volatile("ptesync": : :"memory"); 647 __tlbie_lpid_va(va, lpid, ap, ric); 648 fixup_tlbie_lpid_va(va, lpid, ap); 649 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 650 } 651 652 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 653 unsigned long pid, unsigned long page_size, 654 unsigned long psize, bool also_pwc) 655 { 656 asm volatile("ptesync": : :"memory"); 657 if (also_pwc) 658 __tlbie_pid(pid, RIC_FLUSH_PWC); 659 __tlbie_va_range(start, end, pid, page_size, psize); 660 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 661 } 662 663 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 664 unsigned long pid, unsigned long lpid, 665 unsigned long page_size, 666 unsigned long psize, bool also_pwc) 667 { 668 asm volatile("ptesync" : : : "memory"); 669 if (also_pwc) 670 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 671 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 672 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 673 } 674 675 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 676 unsigned long start, unsigned long end, 677 unsigned long pid, unsigned long page_size, 678 unsigned long psize, bool also_pwc) 679 { 680 struct cpumask *cpus = mm_cpumask(mm); 681 struct tlbiel_va_range t = { .start = start, .end = end, 682 .pid = pid, .page_size = page_size, 683 .psize = psize, .also_pwc = also_pwc }; 684 685 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 686 if (atomic_read(&mm->context.copros) > 0) 687 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 688 } 689 690 /* 691 * Base TLB flushing operations: 692 * 693 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 694 * - flush_tlb_page(vma, vmaddr) flushes one page 695 * - flush_tlb_range(vma, start, end) flushes a range of pages 696 * - flush_tlb_kernel_range(start, end) flushes kernel pages 697 * 698 * - local_* variants of page and mm only apply to the current 699 * processor 700 */ 701 void radix__local_flush_tlb_mm(struct mm_struct *mm) 702 { 703 unsigned long pid = mm->context.id; 704 705 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 706 return; 707 708 preempt_disable(); 709 _tlbiel_pid(pid, RIC_FLUSH_TLB); 710 preempt_enable(); 711 } 712 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 713 714 #ifndef CONFIG_SMP 715 void radix__local_flush_all_mm(struct mm_struct *mm) 716 { 717 unsigned long pid = mm->context.id; 718 719 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 720 return; 721 722 preempt_disable(); 723 _tlbiel_pid(pid, RIC_FLUSH_ALL); 724 preempt_enable(); 725 } 726 EXPORT_SYMBOL(radix__local_flush_all_mm); 727 728 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 729 { 730 radix__local_flush_all_mm(mm); 731 } 732 #endif /* CONFIG_SMP */ 733 734 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 735 int psize) 736 { 737 unsigned long pid = mm->context.id; 738 739 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 740 return; 741 742 preempt_disable(); 743 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 744 preempt_enable(); 745 } 746 747 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 748 { 749 #ifdef CONFIG_HUGETLB_PAGE 750 /* need the return fix for nohash.c */ 751 if (is_vm_hugetlb_page(vma)) 752 return radix__local_flush_hugetlb_page(vma, vmaddr); 753 #endif 754 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 755 } 756 EXPORT_SYMBOL(radix__local_flush_tlb_page); 757 758 static bool mm_needs_flush_escalation(struct mm_struct *mm) 759 { 760 /* 761 * The P9 nest MMU has issues with the page walk cache caching PTEs 762 * and not flushing them when RIC = 0 for a PID/LPID invalidate. 763 * 764 * This may have been fixed in shipping firmware (by disabling PWC 765 * or preventing it from caching PTEs), but until that is confirmed, 766 * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes 767 * to RIC=2. 768 * 769 * POWER10 (and P9P) does not have this problem. 770 */ 771 if (cpu_has_feature(CPU_FTR_ARCH_31)) 772 return false; 773 if (atomic_read(&mm->context.copros) > 0) 774 return true; 775 return false; 776 } 777 778 /* 779 * If always_flush is true, then flush even if this CPU can't be removed 780 * from mm_cpumask. 781 */ 782 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 783 { 784 unsigned long pid = mm->context.id; 785 int cpu = smp_processor_id(); 786 787 /* 788 * A kthread could have done a mmget_not_zero() after the flushing CPU 789 * checked mm_cpumask, and be in the process of kthread_use_mm when 790 * interrupted here. In that case, current->mm will be set to mm, 791 * because kthread_use_mm() setting ->mm and switching to the mm is 792 * done with interrupts off. 793 */ 794 if (current->mm == mm) 795 goto out; 796 797 if (current->active_mm == mm) { 798 WARN_ON_ONCE(current->mm != NULL); 799 /* Is a kernel thread and is using mm as the lazy tlb */ 800 mmgrab_lazy_tlb(&init_mm); 801 current->active_mm = &init_mm; 802 switch_mm_irqs_off(mm, &init_mm, current); 803 mmdrop_lazy_tlb(mm); 804 } 805 806 /* 807 * This IPI may be initiated from any source including those not 808 * running the mm, so there may be a racing IPI that comes after 809 * this one which finds the cpumask already clear. Check and avoid 810 * underflowing the active_cpus count in that case. The race should 811 * not otherwise be a problem, but the TLB must be flushed because 812 * that's what the caller expects. 813 */ 814 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 815 atomic_dec(&mm->context.active_cpus); 816 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 817 always_flush = true; 818 } 819 820 out: 821 if (always_flush) 822 _tlbiel_pid(pid, RIC_FLUSH_ALL); 823 } 824 825 #ifdef CONFIG_SMP 826 static void do_exit_flush_lazy_tlb(void *arg) 827 { 828 struct mm_struct *mm = arg; 829 exit_lazy_flush_tlb(mm, true); 830 } 831 832 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 833 { 834 /* 835 * Would be nice if this was async so it could be run in 836 * parallel with our local flush, but generic code does not 837 * give a good API for it. Could extend the generic code or 838 * make a special powerpc IPI for flushing TLBs. 839 * For now it's not too performance critical. 840 */ 841 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 842 (void *)mm, 1); 843 } 844 845 #else /* CONFIG_SMP */ 846 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 847 #endif /* CONFIG_SMP */ 848 849 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 850 851 /* 852 * Interval between flushes at which we send out IPIs to check whether the 853 * mm_cpumask can be trimmed for the case where it's not a single-threaded 854 * process flushing its own mm. The intent is to reduce the cost of later 855 * flushes. Don't want this to be so low that it adds noticable cost to TLB 856 * flushing, or so high that it doesn't help reduce global TLBIEs. 857 */ 858 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 859 860 static bool tick_and_test_trim_clock(void) 861 { 862 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 863 tlb_mm_cpumask_trim_timer) { 864 __this_cpu_write(mm_cpumask_trim_clock, 0); 865 return true; 866 } 867 return false; 868 } 869 870 enum tlb_flush_type { 871 FLUSH_TYPE_NONE, 872 FLUSH_TYPE_LOCAL, 873 FLUSH_TYPE_GLOBAL, 874 }; 875 876 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 877 { 878 int active_cpus = atomic_read(&mm->context.active_cpus); 879 int cpu = smp_processor_id(); 880 881 if (active_cpus == 0) 882 return FLUSH_TYPE_NONE; 883 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 884 if (current->mm != mm) { 885 /* 886 * Asynchronous flush sources may trim down to nothing 887 * if the process is not running, so occasionally try 888 * to trim. 889 */ 890 if (tick_and_test_trim_clock()) { 891 exit_lazy_flush_tlb(mm, true); 892 return FLUSH_TYPE_NONE; 893 } 894 } 895 return FLUSH_TYPE_LOCAL; 896 } 897 898 /* Coprocessors require TLBIE to invalidate nMMU. */ 899 if (atomic_read(&mm->context.copros) > 0) 900 return FLUSH_TYPE_GLOBAL; 901 902 /* 903 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 904 * because the mm is being taken down anyway, and a TLBIE tends to 905 * be faster than an IPI+TLBIEL. 906 */ 907 if (fullmm) 908 return FLUSH_TYPE_GLOBAL; 909 910 /* 911 * If we are running the only thread of a single-threaded process, 912 * then we should almost always be able to trim off the rest of the 913 * CPU mask (except in the case of use_mm() races), so always try 914 * trimming the mask. 915 */ 916 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 917 exit_flush_lazy_tlbs(mm); 918 /* 919 * use_mm() race could prevent IPIs from being able to clear 920 * the cpumask here, however those users are established 921 * after our first check (and so after the PTEs are removed), 922 * and the TLB still gets flushed by the IPI, so this CPU 923 * will only require a local flush. 924 */ 925 return FLUSH_TYPE_LOCAL; 926 } 927 928 /* 929 * Occasionally try to trim down the cpumask. It's possible this can 930 * bring the mask to zero, which results in no flush. 931 */ 932 if (tick_and_test_trim_clock()) { 933 exit_flush_lazy_tlbs(mm); 934 if (current->mm == mm) 935 return FLUSH_TYPE_LOCAL; 936 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 937 exit_lazy_flush_tlb(mm, true); 938 return FLUSH_TYPE_NONE; 939 } 940 941 return FLUSH_TYPE_GLOBAL; 942 } 943 944 #ifdef CONFIG_SMP 945 void radix__flush_tlb_mm(struct mm_struct *mm) 946 { 947 unsigned long pid; 948 enum tlb_flush_type type; 949 950 pid = mm->context.id; 951 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 952 return; 953 954 preempt_disable(); 955 /* 956 * Order loads of mm_cpumask (in flush_type_needed) vs previous 957 * stores to clear ptes before the invalidate. See barrier in 958 * switch_mm_irqs_off 959 */ 960 smp_mb(); 961 type = flush_type_needed(mm, false); 962 if (type == FLUSH_TYPE_LOCAL) { 963 _tlbiel_pid(pid, RIC_FLUSH_TLB); 964 } else if (type == FLUSH_TYPE_GLOBAL) { 965 if (!mmu_has_feature(MMU_FTR_GTSE)) { 966 unsigned long tgt = H_RPTI_TARGET_CMMU; 967 968 if (atomic_read(&mm->context.copros) > 0) 969 tgt |= H_RPTI_TARGET_NMMU; 970 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 971 H_RPTI_PAGE_ALL, 0, -1UL); 972 } else if (cputlb_use_tlbie()) { 973 if (mm_needs_flush_escalation(mm)) 974 _tlbie_pid(pid, RIC_FLUSH_ALL); 975 else 976 _tlbie_pid(pid, RIC_FLUSH_TLB); 977 } else { 978 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 979 } 980 } 981 preempt_enable(); 982 } 983 EXPORT_SYMBOL(radix__flush_tlb_mm); 984 985 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 986 { 987 unsigned long pid; 988 enum tlb_flush_type type; 989 990 pid = mm->context.id; 991 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 992 return; 993 994 preempt_disable(); 995 smp_mb(); /* see radix__flush_tlb_mm */ 996 type = flush_type_needed(mm, fullmm); 997 if (type == FLUSH_TYPE_LOCAL) { 998 _tlbiel_pid(pid, RIC_FLUSH_ALL); 999 } else if (type == FLUSH_TYPE_GLOBAL) { 1000 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1001 unsigned long tgt = H_RPTI_TARGET_CMMU; 1002 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1003 H_RPTI_TYPE_PRT; 1004 1005 if (atomic_read(&mm->context.copros) > 0) 1006 tgt |= H_RPTI_TARGET_NMMU; 1007 pseries_rpt_invalidate(pid, tgt, type, 1008 H_RPTI_PAGE_ALL, 0, -1UL); 1009 } else if (cputlb_use_tlbie()) 1010 _tlbie_pid(pid, RIC_FLUSH_ALL); 1011 else 1012 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1013 } 1014 preempt_enable(); 1015 } 1016 1017 void radix__flush_all_mm(struct mm_struct *mm) 1018 { 1019 __flush_all_mm(mm, false); 1020 } 1021 EXPORT_SYMBOL(radix__flush_all_mm); 1022 1023 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1024 int psize) 1025 { 1026 unsigned long pid; 1027 enum tlb_flush_type type; 1028 1029 pid = mm->context.id; 1030 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1031 return; 1032 1033 preempt_disable(); 1034 smp_mb(); /* see radix__flush_tlb_mm */ 1035 type = flush_type_needed(mm, false); 1036 if (type == FLUSH_TYPE_LOCAL) { 1037 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1038 } else if (type == FLUSH_TYPE_GLOBAL) { 1039 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1040 unsigned long tgt, pg_sizes, size; 1041 1042 tgt = H_RPTI_TARGET_CMMU; 1043 pg_sizes = psize_to_rpti_pgsize(psize); 1044 size = 1UL << mmu_psize_to_shift(psize); 1045 1046 if (atomic_read(&mm->context.copros) > 0) 1047 tgt |= H_RPTI_TARGET_NMMU; 1048 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1049 pg_sizes, vmaddr, 1050 vmaddr + size); 1051 } else if (cputlb_use_tlbie()) 1052 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1053 else 1054 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1055 } 1056 preempt_enable(); 1057 } 1058 1059 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1060 { 1061 #ifdef CONFIG_HUGETLB_PAGE 1062 if (is_vm_hugetlb_page(vma)) 1063 return radix__flush_hugetlb_page(vma, vmaddr); 1064 #endif 1065 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1066 } 1067 EXPORT_SYMBOL(radix__flush_tlb_page); 1068 1069 #endif /* CONFIG_SMP */ 1070 1071 static void do_tlbiel_kernel(void *info) 1072 { 1073 _tlbiel_pid(0, RIC_FLUSH_ALL); 1074 } 1075 1076 static inline void _tlbiel_kernel_broadcast(void) 1077 { 1078 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1079 if (tlbie_capable) { 1080 /* 1081 * Coherent accelerators don't refcount kernel memory mappings, 1082 * so have to always issue a tlbie for them. This is quite a 1083 * slow path anyway. 1084 */ 1085 _tlbie_pid(0, RIC_FLUSH_ALL); 1086 } 1087 } 1088 1089 /* 1090 * If kernel TLBIs ever become local rather than global, then 1091 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1092 * assumes kernel TLBIs are global. 1093 */ 1094 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1095 { 1096 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1097 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1098 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1099 H_RPTI_TYPE_PRT; 1100 1101 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1102 start, end); 1103 } else if (cputlb_use_tlbie()) 1104 _tlbie_pid(0, RIC_FLUSH_ALL); 1105 else 1106 _tlbiel_kernel_broadcast(); 1107 } 1108 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1109 1110 /* 1111 * Doesn't appear to be used anywhere. Remove. 1112 */ 1113 #define TLB_FLUSH_ALL -1UL 1114 1115 /* 1116 * Number of pages above which we invalidate the entire PID rather than 1117 * flush individual pages, for local and global flushes respectively. 1118 * 1119 * tlbie goes out to the interconnect and individual ops are more costly. 1120 * It also does not iterate over sets like the local tlbiel variant when 1121 * invalidating a full PID, so it has a far lower threshold to change from 1122 * individual page flushes to full-pid flushes. 1123 */ 1124 static u32 tlb_single_page_flush_ceiling __read_mostly = 33; 1125 static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1126 1127 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1128 unsigned long start, unsigned long end) 1129 { 1130 unsigned long pid; 1131 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1132 unsigned long page_size = 1UL << page_shift; 1133 unsigned long nr_pages = (end - start) >> page_shift; 1134 bool flush_pid, flush_pwc = false; 1135 enum tlb_flush_type type; 1136 1137 pid = mm->context.id; 1138 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1139 return; 1140 1141 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1142 1143 preempt_disable(); 1144 smp_mb(); /* see radix__flush_tlb_mm */ 1145 type = flush_type_needed(mm, false); 1146 if (type == FLUSH_TYPE_NONE) 1147 goto out; 1148 1149 if (type == FLUSH_TYPE_GLOBAL) 1150 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1151 else 1152 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1153 /* 1154 * full pid flush already does the PWC flush. if it is not full pid 1155 * flush check the range is more than PMD and force a pwc flush 1156 * mremap() depends on this behaviour. 1157 */ 1158 if (!flush_pid && (end - start) >= PMD_SIZE) 1159 flush_pwc = true; 1160 1161 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1162 unsigned long type = H_RPTI_TYPE_TLB; 1163 unsigned long tgt = H_RPTI_TARGET_CMMU; 1164 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1165 1166 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1167 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1168 if (atomic_read(&mm->context.copros) > 0) 1169 tgt |= H_RPTI_TARGET_NMMU; 1170 if (flush_pwc) 1171 type |= H_RPTI_TYPE_PWC; 1172 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1173 } else if (flush_pid) { 1174 /* 1175 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1176 */ 1177 if (type == FLUSH_TYPE_LOCAL) { 1178 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1179 } else { 1180 if (cputlb_use_tlbie()) { 1181 _tlbie_pid(pid, RIC_FLUSH_ALL); 1182 } else { 1183 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1184 } 1185 } 1186 } else { 1187 bool hflush; 1188 unsigned long hstart, hend; 1189 1190 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1191 hend = end & PMD_MASK; 1192 hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend; 1193 1194 if (type == FLUSH_TYPE_LOCAL) { 1195 asm volatile("ptesync": : :"memory"); 1196 if (flush_pwc) 1197 /* For PWC, only one flush is needed */ 1198 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1199 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1200 if (hflush) 1201 __tlbiel_va_range(hstart, hend, pid, 1202 PMD_SIZE, MMU_PAGE_2M); 1203 ppc_after_tlbiel_barrier(); 1204 } else if (cputlb_use_tlbie()) { 1205 asm volatile("ptesync": : :"memory"); 1206 if (flush_pwc) 1207 __tlbie_pid(pid, RIC_FLUSH_PWC); 1208 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1209 if (hflush) 1210 __tlbie_va_range(hstart, hend, pid, 1211 PMD_SIZE, MMU_PAGE_2M); 1212 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1213 } else { 1214 _tlbiel_va_range_multicast(mm, 1215 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1216 if (hflush) 1217 _tlbiel_va_range_multicast(mm, 1218 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1219 } 1220 } 1221 out: 1222 preempt_enable(); 1223 } 1224 1225 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1226 unsigned long end) 1227 1228 { 1229 #ifdef CONFIG_HUGETLB_PAGE 1230 if (is_vm_hugetlb_page(vma)) 1231 return radix__flush_hugetlb_tlb_range(vma, start, end); 1232 #endif 1233 1234 __radix__flush_tlb_range(vma->vm_mm, start, end); 1235 } 1236 EXPORT_SYMBOL(radix__flush_tlb_range); 1237 1238 static int radix_get_mmu_psize(int page_size) 1239 { 1240 int psize; 1241 1242 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1243 psize = mmu_virtual_psize; 1244 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1245 psize = MMU_PAGE_2M; 1246 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1247 psize = MMU_PAGE_1G; 1248 else 1249 return -1; 1250 return psize; 1251 } 1252 1253 /* 1254 * Flush partition scoped LPID address translation for all CPUs. 1255 */ 1256 void radix__flush_tlb_lpid_page(unsigned int lpid, 1257 unsigned long addr, 1258 unsigned long page_size) 1259 { 1260 int psize = radix_get_mmu_psize(page_size); 1261 1262 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1263 } 1264 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1265 1266 /* 1267 * Flush partition scoped PWC from LPID for all CPUs. 1268 */ 1269 void radix__flush_pwc_lpid(unsigned int lpid) 1270 { 1271 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1272 } 1273 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1274 1275 /* 1276 * Flush partition scoped translations from LPID (=LPIDR) 1277 */ 1278 void radix__flush_all_lpid(unsigned int lpid) 1279 { 1280 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1281 } 1282 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1283 1284 /* 1285 * Flush process scoped translations from LPID (=LPIDR) 1286 */ 1287 void radix__flush_all_lpid_guest(unsigned int lpid) 1288 { 1289 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1290 } 1291 1292 void radix__tlb_flush(struct mmu_gather *tlb) 1293 { 1294 int psize = 0; 1295 struct mm_struct *mm = tlb->mm; 1296 int page_size = tlb->page_size; 1297 unsigned long start = tlb->start; 1298 unsigned long end = tlb->end; 1299 1300 /* 1301 * if page size is not something we understand, do a full mm flush 1302 * 1303 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1304 * that flushes the process table entry cache upon process teardown. 1305 * See the comment for radix in arch_exit_mmap(). 1306 */ 1307 if (tlb->fullmm) { 1308 __flush_all_mm(mm, true); 1309 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1310 if (!tlb->freed_tables) 1311 radix__flush_tlb_mm(mm); 1312 else 1313 radix__flush_all_mm(mm); 1314 } else { 1315 if (!tlb->freed_tables) 1316 radix__flush_tlb_range_psize(mm, start, end, psize); 1317 else 1318 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1319 } 1320 } 1321 1322 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1323 unsigned long start, unsigned long end, 1324 int psize, bool also_pwc) 1325 { 1326 unsigned long pid; 1327 unsigned int page_shift = mmu_psize_defs[psize].shift; 1328 unsigned long page_size = 1UL << page_shift; 1329 unsigned long nr_pages = (end - start) >> page_shift; 1330 bool flush_pid; 1331 enum tlb_flush_type type; 1332 1333 pid = mm->context.id; 1334 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1335 return; 1336 1337 WARN_ON_ONCE(end == TLB_FLUSH_ALL); 1338 1339 preempt_disable(); 1340 smp_mb(); /* see radix__flush_tlb_mm */ 1341 type = flush_type_needed(mm, false); 1342 if (type == FLUSH_TYPE_NONE) 1343 goto out; 1344 1345 if (type == FLUSH_TYPE_GLOBAL) 1346 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1347 else 1348 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1349 1350 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1351 unsigned long tgt = H_RPTI_TARGET_CMMU; 1352 unsigned long type = H_RPTI_TYPE_TLB; 1353 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1354 1355 if (also_pwc) 1356 type |= H_RPTI_TYPE_PWC; 1357 if (atomic_read(&mm->context.copros) > 0) 1358 tgt |= H_RPTI_TARGET_NMMU; 1359 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1360 } else if (flush_pid) { 1361 if (type == FLUSH_TYPE_LOCAL) { 1362 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1363 } else { 1364 if (cputlb_use_tlbie()) { 1365 if (mm_needs_flush_escalation(mm)) 1366 also_pwc = true; 1367 1368 _tlbie_pid(pid, 1369 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1370 } else { 1371 _tlbiel_pid_multicast(mm, pid, 1372 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1373 } 1374 1375 } 1376 } else { 1377 if (type == FLUSH_TYPE_LOCAL) 1378 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1379 else if (cputlb_use_tlbie()) 1380 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1381 else 1382 _tlbiel_va_range_multicast(mm, 1383 start, end, pid, page_size, psize, also_pwc); 1384 } 1385 out: 1386 preempt_enable(); 1387 } 1388 1389 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1390 unsigned long end, int psize) 1391 { 1392 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1393 } 1394 1395 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1396 unsigned long end, int psize) 1397 { 1398 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1399 } 1400 1401 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1402 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1403 { 1404 unsigned long pid, end; 1405 enum tlb_flush_type type; 1406 1407 pid = mm->context.id; 1408 if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT)) 1409 return; 1410 1411 /* 4k page size, just blow the world */ 1412 if (PAGE_SIZE == 0x1000) { 1413 radix__flush_all_mm(mm); 1414 return; 1415 } 1416 1417 end = addr + HPAGE_PMD_SIZE; 1418 1419 /* Otherwise first do the PWC, then iterate the pages. */ 1420 preempt_disable(); 1421 smp_mb(); /* see radix__flush_tlb_mm */ 1422 type = flush_type_needed(mm, false); 1423 if (type == FLUSH_TYPE_LOCAL) { 1424 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1425 } else if (type == FLUSH_TYPE_GLOBAL) { 1426 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1427 unsigned long tgt, type, pg_sizes; 1428 1429 tgt = H_RPTI_TARGET_CMMU; 1430 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1431 H_RPTI_TYPE_PRT; 1432 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1433 1434 if (atomic_read(&mm->context.copros) > 0) 1435 tgt |= H_RPTI_TARGET_NMMU; 1436 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1437 addr, end); 1438 } else if (cputlb_use_tlbie()) 1439 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1440 else 1441 _tlbiel_va_range_multicast(mm, 1442 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1443 } 1444 1445 preempt_enable(); 1446 } 1447 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1448 1449 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1450 unsigned long start, unsigned long end) 1451 { 1452 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1453 } 1454 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1455 1456 void radix__flush_tlb_all(void) 1457 { 1458 unsigned long rb,prs,r,rs; 1459 unsigned long ric = RIC_FLUSH_ALL; 1460 1461 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1462 prs = 0; /* partition scoped */ 1463 r = 1; /* radix format */ 1464 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1465 1466 asm volatile("ptesync": : :"memory"); 1467 /* 1468 * now flush guest entries by passing PRS = 1 and LPID != 0 1469 */ 1470 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1471 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1472 /* 1473 * now flush host entires by passing PRS = 0 and LPID == 0 1474 */ 1475 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1476 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1477 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1478 } 1479 1480 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1481 /* 1482 * Performs process-scoped invalidations for a given LPID 1483 * as part of H_RPT_INVALIDATE hcall. 1484 */ 1485 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1486 unsigned long type, unsigned long pg_sizes, 1487 unsigned long start, unsigned long end) 1488 { 1489 unsigned long psize, nr_pages; 1490 struct mmu_psize_def *def; 1491 bool flush_pid; 1492 1493 /* 1494 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1495 * do a single IS=1 based flush. 1496 */ 1497 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1498 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1499 return; 1500 } 1501 1502 if (type & H_RPTI_TYPE_PWC) 1503 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1504 1505 /* Full PID flush */ 1506 if (start == 0 && end == -1) 1507 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1508 1509 /* Do range invalidation for all the valid page sizes */ 1510 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1511 def = &mmu_psize_defs[psize]; 1512 if (!(pg_sizes & def->h_rpt_pgsize)) 1513 continue; 1514 1515 nr_pages = (end - start) >> def->shift; 1516 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1517 1518 /* 1519 * If the number of pages spanning the range is above 1520 * the ceiling, convert the request into a full PID flush. 1521 * And since PID flush takes out all the page sizes, there 1522 * is no need to consider remaining page sizes. 1523 */ 1524 if (flush_pid) { 1525 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1526 return; 1527 } 1528 _tlbie_va_range_lpid(start, end, pid, lpid, 1529 (1UL << def->shift), psize, false); 1530 } 1531 } 1532 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1533 1534 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1535 1536 static int __init create_tlb_single_page_flush_ceiling(void) 1537 { 1538 debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, 1539 arch_debugfs_dir, &tlb_single_page_flush_ceiling); 1540 debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, 1541 arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); 1542 return 0; 1543 } 1544 late_initcall(create_tlb_single_page_flush_ceiling); 1545 1546