1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 #include <asm/plpar_wrappers.h> 20 21 #include "internal.h" 22 23 /* 24 * tlbiel instruction for radix, set invalidation 25 * i.e., r=1 and is=01 or is=10 or is=11 26 */ 27 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 28 unsigned int pid, 29 unsigned int ric, unsigned int prs) 30 { 31 unsigned long rb; 32 unsigned long rs; 33 34 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 35 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 36 37 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 38 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 39 : "memory"); 40 } 41 42 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 43 { 44 unsigned int set; 45 46 asm volatile("ptesync": : :"memory"); 47 48 /* 49 * Flush the first set of the TLB, and the entire Page Walk Cache 50 * and partition table entries. Then flush the remaining sets of the 51 * TLB. 52 */ 53 54 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 55 /* MSR[HV] should flush partition scope translations first. */ 56 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 57 58 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 59 for (set = 1; set < num_sets; set++) 60 tlbiel_radix_set_isa300(set, is, 0, 61 RIC_FLUSH_TLB, 0); 62 } 63 } 64 65 /* Flush process scoped entries. */ 66 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 67 68 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 69 for (set = 1; set < num_sets; set++) 70 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 71 } 72 73 ppc_after_tlbiel_barrier(); 74 } 75 76 void radix__tlbiel_all(unsigned int action) 77 { 78 unsigned int is; 79 80 switch (action) { 81 case TLB_INVAL_SCOPE_GLOBAL: 82 is = 3; 83 break; 84 case TLB_INVAL_SCOPE_LPID: 85 is = 2; 86 break; 87 default: 88 BUG(); 89 } 90 91 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 92 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 93 else 94 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 95 96 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 97 } 98 99 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 100 unsigned long ric) 101 { 102 unsigned long rb,rs,prs,r; 103 104 rb = PPC_BIT(53); /* IS = 1 */ 105 rb |= set << PPC_BITLSHIFT(51); 106 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 107 prs = 1; /* process scoped */ 108 r = 1; /* radix format */ 109 110 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 111 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 112 trace_tlbie(0, 1, rb, rs, ric, prs, r); 113 } 114 115 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 116 { 117 unsigned long rb,rs,prs,r; 118 119 rb = PPC_BIT(53); /* IS = 1 */ 120 rs = pid << PPC_BITLSHIFT(31); 121 prs = 1; /* process scoped */ 122 r = 1; /* radix format */ 123 124 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 125 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 126 trace_tlbie(0, 0, rb, rs, ric, prs, r); 127 } 128 129 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 130 unsigned long lpid, 131 unsigned long ric) 132 { 133 unsigned long rb, rs, prs, r; 134 135 rb = PPC_BIT(53); /* IS = 1 */ 136 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 137 prs = 1; /* process scoped */ 138 r = 1; /* radix format */ 139 140 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 141 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 142 trace_tlbie(0, 0, rb, rs, ric, prs, r); 143 } 144 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 145 { 146 unsigned long rb,rs,prs,r; 147 148 rb = PPC_BIT(52); /* IS = 2 */ 149 rs = lpid; 150 prs = 0; /* partition scoped */ 151 r = 1; /* radix format */ 152 153 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 154 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 155 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 156 } 157 158 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 159 { 160 unsigned long rb,rs,prs,r; 161 162 rb = PPC_BIT(52); /* IS = 2 */ 163 rs = lpid; 164 prs = 1; /* process scoped */ 165 r = 1; /* radix format */ 166 167 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 168 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 169 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 170 } 171 172 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 173 unsigned long ap, unsigned long ric) 174 { 175 unsigned long rb,rs,prs,r; 176 177 rb = va & ~(PPC_BITMASK(52, 63)); 178 rb |= ap << PPC_BITLSHIFT(58); 179 rs = pid << PPC_BITLSHIFT(31); 180 prs = 1; /* process scoped */ 181 r = 1; /* radix format */ 182 183 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 184 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 185 trace_tlbie(0, 1, rb, rs, ric, prs, r); 186 } 187 188 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 189 unsigned long ap, unsigned long ric) 190 { 191 unsigned long rb,rs,prs,r; 192 193 rb = va & ~(PPC_BITMASK(52, 63)); 194 rb |= ap << PPC_BITLSHIFT(58); 195 rs = pid << PPC_BITLSHIFT(31); 196 prs = 1; /* process scoped */ 197 r = 1; /* radix format */ 198 199 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 200 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 201 trace_tlbie(0, 0, rb, rs, ric, prs, r); 202 } 203 204 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 205 unsigned long lpid, 206 unsigned long ap, unsigned long ric) 207 { 208 unsigned long rb, rs, prs, r; 209 210 rb = va & ~(PPC_BITMASK(52, 63)); 211 rb |= ap << PPC_BITLSHIFT(58); 212 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 213 prs = 1; /* process scoped */ 214 r = 1; /* radix format */ 215 216 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 217 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 218 trace_tlbie(0, 0, rb, rs, ric, prs, r); 219 } 220 221 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 222 unsigned long ap, unsigned long ric) 223 { 224 unsigned long rb,rs,prs,r; 225 226 rb = va & ~(PPC_BITMASK(52, 63)); 227 rb |= ap << PPC_BITLSHIFT(58); 228 rs = lpid; 229 prs = 0; /* partition scoped */ 230 r = 1; /* radix format */ 231 232 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 233 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 234 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 235 } 236 237 238 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 239 unsigned long ap) 240 { 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 242 asm volatile("ptesync": : :"memory"); 243 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 244 } 245 246 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 247 asm volatile("ptesync": : :"memory"); 248 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 249 } 250 } 251 252 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 253 unsigned long ap) 254 { 255 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 256 asm volatile("ptesync": : :"memory"); 257 __tlbie_pid(0, RIC_FLUSH_TLB); 258 } 259 260 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 261 asm volatile("ptesync": : :"memory"); 262 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 263 } 264 } 265 266 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 267 unsigned long pid, 268 unsigned long lpid, 269 unsigned long ap) 270 { 271 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 272 asm volatile("ptesync" : : : "memory"); 273 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 274 } 275 276 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 277 asm volatile("ptesync" : : : "memory"); 278 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 279 } 280 } 281 282 static inline void fixup_tlbie_pid(unsigned long pid) 283 { 284 /* 285 * We can use any address for the invalidation, pick one which is 286 * probably unused as an optimisation. 287 */ 288 unsigned long va = ((1UL << 52) - 1); 289 290 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 291 asm volatile("ptesync": : :"memory"); 292 __tlbie_pid(0, RIC_FLUSH_TLB); 293 } 294 295 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 296 asm volatile("ptesync": : :"memory"); 297 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 298 } 299 } 300 301 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 302 { 303 /* 304 * We can use any address for the invalidation, pick one which is 305 * probably unused as an optimisation. 306 */ 307 unsigned long va = ((1UL << 52) - 1); 308 309 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 310 asm volatile("ptesync" : : : "memory"); 311 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 312 } 313 314 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 315 asm volatile("ptesync" : : : "memory"); 316 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 317 RIC_FLUSH_TLB); 318 } 319 } 320 321 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 322 unsigned long ap) 323 { 324 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 325 asm volatile("ptesync": : :"memory"); 326 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 327 } 328 329 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 330 asm volatile("ptesync": : :"memory"); 331 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 332 } 333 } 334 335 static inline void fixup_tlbie_lpid(unsigned long lpid) 336 { 337 /* 338 * We can use any address for the invalidation, pick one which is 339 * probably unused as an optimisation. 340 */ 341 unsigned long va = ((1UL << 52) - 1); 342 343 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 344 asm volatile("ptesync": : :"memory"); 345 __tlbie_lpid(0, RIC_FLUSH_TLB); 346 } 347 348 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 349 asm volatile("ptesync": : :"memory"); 350 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 351 } 352 } 353 354 /* 355 * We use 128 set in radix mode and 256 set in hpt mode. 356 */ 357 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 358 { 359 int set; 360 361 asm volatile("ptesync": : :"memory"); 362 363 switch (ric) { 364 case RIC_FLUSH_PWC: 365 366 /* For PWC, only one flush is needed */ 367 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 368 ppc_after_tlbiel_barrier(); 369 return; 370 case RIC_FLUSH_TLB: 371 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 372 break; 373 case RIC_FLUSH_ALL: 374 default: 375 /* 376 * Flush the first set of the TLB, and if 377 * we're doing a RIC_FLUSH_ALL, also flush 378 * the entire Page Walk Cache. 379 */ 380 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 381 } 382 383 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 384 /* For the remaining sets, just flush the TLB */ 385 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 386 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 387 } 388 389 ppc_after_tlbiel_barrier(); 390 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 391 } 392 393 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 394 { 395 asm volatile("ptesync": : :"memory"); 396 397 /* 398 * Workaround the fact that the "ric" argument to __tlbie_pid 399 * must be a compile-time contraint to match the "i" constraint 400 * in the asm statement. 401 */ 402 switch (ric) { 403 case RIC_FLUSH_TLB: 404 __tlbie_pid(pid, RIC_FLUSH_TLB); 405 fixup_tlbie_pid(pid); 406 break; 407 case RIC_FLUSH_PWC: 408 __tlbie_pid(pid, RIC_FLUSH_PWC); 409 break; 410 case RIC_FLUSH_ALL: 411 default: 412 __tlbie_pid(pid, RIC_FLUSH_ALL); 413 fixup_tlbie_pid(pid); 414 } 415 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 416 } 417 418 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 419 unsigned long ric) 420 { 421 asm volatile("ptesync" : : : "memory"); 422 423 /* 424 * Workaround the fact that the "ric" argument to __tlbie_pid 425 * must be a compile-time contraint to match the "i" constraint 426 * in the asm statement. 427 */ 428 switch (ric) { 429 case RIC_FLUSH_TLB: 430 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 431 fixup_tlbie_pid_lpid(pid, lpid); 432 break; 433 case RIC_FLUSH_PWC: 434 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 435 break; 436 case RIC_FLUSH_ALL: 437 default: 438 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 439 fixup_tlbie_pid_lpid(pid, lpid); 440 } 441 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 442 } 443 struct tlbiel_pid { 444 unsigned long pid; 445 unsigned long ric; 446 }; 447 448 static void do_tlbiel_pid(void *info) 449 { 450 struct tlbiel_pid *t = info; 451 452 if (t->ric == RIC_FLUSH_TLB) 453 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 454 else if (t->ric == RIC_FLUSH_PWC) 455 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 456 else 457 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 458 } 459 460 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 461 unsigned long pid, unsigned long ric) 462 { 463 struct cpumask *cpus = mm_cpumask(mm); 464 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 465 466 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 467 /* 468 * Always want the CPU translations to be invalidated with tlbiel in 469 * these paths, so while coprocessors must use tlbie, we can not 470 * optimise away the tlbiel component. 471 */ 472 if (atomic_read(&mm->context.copros) > 0) 473 _tlbie_pid(pid, RIC_FLUSH_ALL); 474 } 475 476 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 477 { 478 asm volatile("ptesync": : :"memory"); 479 480 /* 481 * Workaround the fact that the "ric" argument to __tlbie_pid 482 * must be a compile-time contraint to match the "i" constraint 483 * in the asm statement. 484 */ 485 switch (ric) { 486 case RIC_FLUSH_TLB: 487 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 488 fixup_tlbie_lpid(lpid); 489 break; 490 case RIC_FLUSH_PWC: 491 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 492 break; 493 case RIC_FLUSH_ALL: 494 default: 495 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 496 fixup_tlbie_lpid(lpid); 497 } 498 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 499 } 500 501 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 502 { 503 /* 504 * Workaround the fact that the "ric" argument to __tlbie_pid 505 * must be a compile-time contraint to match the "i" constraint 506 * in the asm statement. 507 */ 508 switch (ric) { 509 case RIC_FLUSH_TLB: 510 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 511 break; 512 case RIC_FLUSH_PWC: 513 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 514 break; 515 case RIC_FLUSH_ALL: 516 default: 517 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 518 } 519 fixup_tlbie_lpid(lpid); 520 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 521 } 522 523 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 524 unsigned long pid, unsigned long page_size, 525 unsigned long psize) 526 { 527 unsigned long addr; 528 unsigned long ap = mmu_get_ap(psize); 529 530 for (addr = start; addr < end; addr += page_size) 531 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 532 } 533 534 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 535 unsigned long psize, unsigned long ric) 536 { 537 unsigned long ap = mmu_get_ap(psize); 538 539 asm volatile("ptesync": : :"memory"); 540 __tlbiel_va(va, pid, ap, ric); 541 ppc_after_tlbiel_barrier(); 542 } 543 544 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 545 unsigned long pid, unsigned long page_size, 546 unsigned long psize, bool also_pwc) 547 { 548 asm volatile("ptesync": : :"memory"); 549 if (also_pwc) 550 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 551 __tlbiel_va_range(start, end, pid, page_size, psize); 552 ppc_after_tlbiel_barrier(); 553 } 554 555 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 556 unsigned long pid, unsigned long page_size, 557 unsigned long psize) 558 { 559 unsigned long addr; 560 unsigned long ap = mmu_get_ap(psize); 561 562 for (addr = start; addr < end; addr += page_size) 563 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 564 565 fixup_tlbie_va_range(addr - page_size, pid, ap); 566 } 567 568 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 569 unsigned long pid, unsigned long lpid, 570 unsigned long page_size, 571 unsigned long psize) 572 { 573 unsigned long addr; 574 unsigned long ap = mmu_get_ap(psize); 575 576 for (addr = start; addr < end; addr += page_size) 577 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 578 579 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 580 } 581 582 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 583 unsigned long psize, unsigned long ric) 584 { 585 unsigned long ap = mmu_get_ap(psize); 586 587 asm volatile("ptesync": : :"memory"); 588 __tlbie_va(va, pid, ap, ric); 589 fixup_tlbie_va(va, pid, ap); 590 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 591 } 592 593 struct tlbiel_va { 594 unsigned long pid; 595 unsigned long va; 596 unsigned long psize; 597 unsigned long ric; 598 }; 599 600 static void do_tlbiel_va(void *info) 601 { 602 struct tlbiel_va *t = info; 603 604 if (t->ric == RIC_FLUSH_TLB) 605 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 606 else if (t->ric == RIC_FLUSH_PWC) 607 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 608 else 609 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 610 } 611 612 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 613 unsigned long va, unsigned long pid, 614 unsigned long psize, unsigned long ric) 615 { 616 struct cpumask *cpus = mm_cpumask(mm); 617 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 618 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 619 if (atomic_read(&mm->context.copros) > 0) 620 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 621 } 622 623 struct tlbiel_va_range { 624 unsigned long pid; 625 unsigned long start; 626 unsigned long end; 627 unsigned long page_size; 628 unsigned long psize; 629 bool also_pwc; 630 }; 631 632 static void do_tlbiel_va_range(void *info) 633 { 634 struct tlbiel_va_range *t = info; 635 636 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 637 t->psize, t->also_pwc); 638 } 639 640 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 641 unsigned long psize, unsigned long ric) 642 { 643 unsigned long ap = mmu_get_ap(psize); 644 645 asm volatile("ptesync": : :"memory"); 646 __tlbie_lpid_va(va, lpid, ap, ric); 647 fixup_tlbie_lpid_va(va, lpid, ap); 648 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 649 } 650 651 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 652 unsigned long pid, unsigned long page_size, 653 unsigned long psize, bool also_pwc) 654 { 655 asm volatile("ptesync": : :"memory"); 656 if (also_pwc) 657 __tlbie_pid(pid, RIC_FLUSH_PWC); 658 __tlbie_va_range(start, end, pid, page_size, psize); 659 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 660 } 661 662 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 663 unsigned long pid, unsigned long lpid, 664 unsigned long page_size, 665 unsigned long psize, bool also_pwc) 666 { 667 asm volatile("ptesync" : : : "memory"); 668 if (also_pwc) 669 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 670 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 671 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 672 } 673 674 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 675 unsigned long start, unsigned long end, 676 unsigned long pid, unsigned long page_size, 677 unsigned long psize, bool also_pwc) 678 { 679 struct cpumask *cpus = mm_cpumask(mm); 680 struct tlbiel_va_range t = { .start = start, .end = end, 681 .pid = pid, .page_size = page_size, 682 .psize = psize, .also_pwc = also_pwc }; 683 684 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 685 if (atomic_read(&mm->context.copros) > 0) 686 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 687 } 688 689 /* 690 * Base TLB flushing operations: 691 * 692 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 693 * - flush_tlb_page(vma, vmaddr) flushes one page 694 * - flush_tlb_range(vma, start, end) flushes a range of pages 695 * - flush_tlb_kernel_range(start, end) flushes kernel pages 696 * 697 * - local_* variants of page and mm only apply to the current 698 * processor 699 */ 700 void radix__local_flush_tlb_mm(struct mm_struct *mm) 701 { 702 unsigned long pid; 703 704 preempt_disable(); 705 pid = mm->context.id; 706 if (pid != MMU_NO_CONTEXT) 707 _tlbiel_pid(pid, RIC_FLUSH_TLB); 708 preempt_enable(); 709 } 710 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 711 712 #ifndef CONFIG_SMP 713 void radix__local_flush_all_mm(struct mm_struct *mm) 714 { 715 unsigned long pid; 716 717 preempt_disable(); 718 pid = mm->context.id; 719 if (pid != MMU_NO_CONTEXT) 720 _tlbiel_pid(pid, RIC_FLUSH_ALL); 721 preempt_enable(); 722 } 723 EXPORT_SYMBOL(radix__local_flush_all_mm); 724 725 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 726 { 727 radix__local_flush_all_mm(mm); 728 } 729 #endif /* CONFIG_SMP */ 730 731 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 732 int psize) 733 { 734 unsigned long pid; 735 736 preempt_disable(); 737 pid = mm->context.id; 738 if (pid != MMU_NO_CONTEXT) 739 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 740 preempt_enable(); 741 } 742 743 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 744 { 745 #ifdef CONFIG_HUGETLB_PAGE 746 /* need the return fix for nohash.c */ 747 if (is_vm_hugetlb_page(vma)) 748 return radix__local_flush_hugetlb_page(vma, vmaddr); 749 #endif 750 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 751 } 752 EXPORT_SYMBOL(radix__local_flush_tlb_page); 753 754 static bool mm_needs_flush_escalation(struct mm_struct *mm) 755 { 756 /* 757 * P9 nest MMU has issues with the page walk cache 758 * caching PTEs and not flushing them properly when 759 * RIC = 0 for a PID/LPID invalidate 760 */ 761 if (atomic_read(&mm->context.copros) > 0) 762 return true; 763 return false; 764 } 765 766 /* 767 * If always_flush is true, then flush even if this CPU can't be removed 768 * from mm_cpumask. 769 */ 770 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 771 { 772 unsigned long pid = mm->context.id; 773 int cpu = smp_processor_id(); 774 775 /* 776 * A kthread could have done a mmget_not_zero() after the flushing CPU 777 * checked mm_cpumask, and be in the process of kthread_use_mm when 778 * interrupted here. In that case, current->mm will be set to mm, 779 * because kthread_use_mm() setting ->mm and switching to the mm is 780 * done with interrupts off. 781 */ 782 if (current->mm == mm) 783 goto out; 784 785 if (current->active_mm == mm) { 786 WARN_ON_ONCE(current->mm != NULL); 787 /* Is a kernel thread and is using mm as the lazy tlb */ 788 mmgrab(&init_mm); 789 current->active_mm = &init_mm; 790 switch_mm_irqs_off(mm, &init_mm, current); 791 mmdrop(mm); 792 } 793 794 /* 795 * This IPI may be initiated from any source including those not 796 * running the mm, so there may be a racing IPI that comes after 797 * this one which finds the cpumask already clear. Check and avoid 798 * underflowing the active_cpus count in that case. The race should 799 * not otherwise be a problem, but the TLB must be flushed because 800 * that's what the caller expects. 801 */ 802 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 803 atomic_dec(&mm->context.active_cpus); 804 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 805 always_flush = true; 806 } 807 808 out: 809 if (always_flush) 810 _tlbiel_pid(pid, RIC_FLUSH_ALL); 811 } 812 813 #ifdef CONFIG_SMP 814 static void do_exit_flush_lazy_tlb(void *arg) 815 { 816 struct mm_struct *mm = arg; 817 exit_lazy_flush_tlb(mm, true); 818 } 819 820 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 821 { 822 /* 823 * Would be nice if this was async so it could be run in 824 * parallel with our local flush, but generic code does not 825 * give a good API for it. Could extend the generic code or 826 * make a special powerpc IPI for flushing TLBs. 827 * For now it's not too performance critical. 828 */ 829 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 830 (void *)mm, 1); 831 } 832 833 #else /* CONFIG_SMP */ 834 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 835 #endif /* CONFIG_SMP */ 836 837 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 838 839 /* 840 * Interval between flushes at which we send out IPIs to check whether the 841 * mm_cpumask can be trimmed for the case where it's not a single-threaded 842 * process flushing its own mm. The intent is to reduce the cost of later 843 * flushes. Don't want this to be so low that it adds noticable cost to TLB 844 * flushing, or so high that it doesn't help reduce global TLBIEs. 845 */ 846 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 847 848 static bool tick_and_test_trim_clock(void) 849 { 850 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 851 tlb_mm_cpumask_trim_timer) { 852 __this_cpu_write(mm_cpumask_trim_clock, 0); 853 return true; 854 } 855 return false; 856 } 857 858 enum tlb_flush_type { 859 FLUSH_TYPE_NONE, 860 FLUSH_TYPE_LOCAL, 861 FLUSH_TYPE_GLOBAL, 862 }; 863 864 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 865 { 866 int active_cpus = atomic_read(&mm->context.active_cpus); 867 int cpu = smp_processor_id(); 868 869 if (active_cpus == 0) 870 return FLUSH_TYPE_NONE; 871 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 872 if (current->mm != mm) { 873 /* 874 * Asynchronous flush sources may trim down to nothing 875 * if the process is not running, so occasionally try 876 * to trim. 877 */ 878 if (tick_and_test_trim_clock()) { 879 exit_lazy_flush_tlb(mm, true); 880 return FLUSH_TYPE_NONE; 881 } 882 } 883 return FLUSH_TYPE_LOCAL; 884 } 885 886 /* Coprocessors require TLBIE to invalidate nMMU. */ 887 if (atomic_read(&mm->context.copros) > 0) 888 return FLUSH_TYPE_GLOBAL; 889 890 /* 891 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 892 * because the mm is being taken down anyway, and a TLBIE tends to 893 * be faster than an IPI+TLBIEL. 894 */ 895 if (fullmm) 896 return FLUSH_TYPE_GLOBAL; 897 898 /* 899 * If we are running the only thread of a single-threaded process, 900 * then we should almost always be able to trim off the rest of the 901 * CPU mask (except in the case of use_mm() races), so always try 902 * trimming the mask. 903 */ 904 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 905 exit_flush_lazy_tlbs(mm); 906 /* 907 * use_mm() race could prevent IPIs from being able to clear 908 * the cpumask here, however those users are established 909 * after our first check (and so after the PTEs are removed), 910 * and the TLB still gets flushed by the IPI, so this CPU 911 * will only require a local flush. 912 */ 913 return FLUSH_TYPE_LOCAL; 914 } 915 916 /* 917 * Occasionally try to trim down the cpumask. It's possible this can 918 * bring the mask to zero, which results in no flush. 919 */ 920 if (tick_and_test_trim_clock()) { 921 exit_flush_lazy_tlbs(mm); 922 if (current->mm == mm) 923 return FLUSH_TYPE_LOCAL; 924 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 925 exit_lazy_flush_tlb(mm, true); 926 return FLUSH_TYPE_NONE; 927 } 928 929 return FLUSH_TYPE_GLOBAL; 930 } 931 932 #ifdef CONFIG_SMP 933 void radix__flush_tlb_mm(struct mm_struct *mm) 934 { 935 unsigned long pid; 936 enum tlb_flush_type type; 937 938 pid = mm->context.id; 939 if (unlikely(pid == MMU_NO_CONTEXT)) 940 return; 941 942 preempt_disable(); 943 /* 944 * Order loads of mm_cpumask (in flush_type_needed) vs previous 945 * stores to clear ptes before the invalidate. See barrier in 946 * switch_mm_irqs_off 947 */ 948 smp_mb(); 949 type = flush_type_needed(mm, false); 950 if (type == FLUSH_TYPE_LOCAL) { 951 _tlbiel_pid(pid, RIC_FLUSH_TLB); 952 } else if (type == FLUSH_TYPE_GLOBAL) { 953 if (!mmu_has_feature(MMU_FTR_GTSE)) { 954 unsigned long tgt = H_RPTI_TARGET_CMMU; 955 956 if (atomic_read(&mm->context.copros) > 0) 957 tgt |= H_RPTI_TARGET_NMMU; 958 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 959 H_RPTI_PAGE_ALL, 0, -1UL); 960 } else if (cputlb_use_tlbie()) { 961 if (mm_needs_flush_escalation(mm)) 962 _tlbie_pid(pid, RIC_FLUSH_ALL); 963 else 964 _tlbie_pid(pid, RIC_FLUSH_TLB); 965 } else { 966 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 967 } 968 } 969 preempt_enable(); 970 } 971 EXPORT_SYMBOL(radix__flush_tlb_mm); 972 973 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 974 { 975 unsigned long pid; 976 enum tlb_flush_type type; 977 978 pid = mm->context.id; 979 if (unlikely(pid == MMU_NO_CONTEXT)) 980 return; 981 982 preempt_disable(); 983 smp_mb(); /* see radix__flush_tlb_mm */ 984 type = flush_type_needed(mm, fullmm); 985 if (type == FLUSH_TYPE_LOCAL) { 986 _tlbiel_pid(pid, RIC_FLUSH_ALL); 987 } else if (type == FLUSH_TYPE_GLOBAL) { 988 if (!mmu_has_feature(MMU_FTR_GTSE)) { 989 unsigned long tgt = H_RPTI_TARGET_CMMU; 990 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 991 H_RPTI_TYPE_PRT; 992 993 if (atomic_read(&mm->context.copros) > 0) 994 tgt |= H_RPTI_TARGET_NMMU; 995 pseries_rpt_invalidate(pid, tgt, type, 996 H_RPTI_PAGE_ALL, 0, -1UL); 997 } else if (cputlb_use_tlbie()) 998 _tlbie_pid(pid, RIC_FLUSH_ALL); 999 else 1000 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1001 } 1002 preempt_enable(); 1003 } 1004 1005 void radix__flush_all_mm(struct mm_struct *mm) 1006 { 1007 __flush_all_mm(mm, false); 1008 } 1009 EXPORT_SYMBOL(radix__flush_all_mm); 1010 1011 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1012 int psize) 1013 { 1014 unsigned long pid; 1015 enum tlb_flush_type type; 1016 1017 pid = mm->context.id; 1018 if (unlikely(pid == MMU_NO_CONTEXT)) 1019 return; 1020 1021 preempt_disable(); 1022 smp_mb(); /* see radix__flush_tlb_mm */ 1023 type = flush_type_needed(mm, false); 1024 if (type == FLUSH_TYPE_LOCAL) { 1025 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1026 } else if (type == FLUSH_TYPE_GLOBAL) { 1027 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1028 unsigned long tgt, pg_sizes, size; 1029 1030 tgt = H_RPTI_TARGET_CMMU; 1031 pg_sizes = psize_to_rpti_pgsize(psize); 1032 size = 1UL << mmu_psize_to_shift(psize); 1033 1034 if (atomic_read(&mm->context.copros) > 0) 1035 tgt |= H_RPTI_TARGET_NMMU; 1036 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1037 pg_sizes, vmaddr, 1038 vmaddr + size); 1039 } else if (cputlb_use_tlbie()) 1040 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1041 else 1042 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1043 } 1044 preempt_enable(); 1045 } 1046 1047 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1048 { 1049 #ifdef CONFIG_HUGETLB_PAGE 1050 if (is_vm_hugetlb_page(vma)) 1051 return radix__flush_hugetlb_page(vma, vmaddr); 1052 #endif 1053 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1054 } 1055 EXPORT_SYMBOL(radix__flush_tlb_page); 1056 1057 #endif /* CONFIG_SMP */ 1058 1059 static void do_tlbiel_kernel(void *info) 1060 { 1061 _tlbiel_pid(0, RIC_FLUSH_ALL); 1062 } 1063 1064 static inline void _tlbiel_kernel_broadcast(void) 1065 { 1066 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1067 if (tlbie_capable) { 1068 /* 1069 * Coherent accelerators don't refcount kernel memory mappings, 1070 * so have to always issue a tlbie for them. This is quite a 1071 * slow path anyway. 1072 */ 1073 _tlbie_pid(0, RIC_FLUSH_ALL); 1074 } 1075 } 1076 1077 /* 1078 * If kernel TLBIs ever become local rather than global, then 1079 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1080 * assumes kernel TLBIs are global. 1081 */ 1082 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1083 { 1084 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1085 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1086 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1087 H_RPTI_TYPE_PRT; 1088 1089 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1090 start, end); 1091 } else if (cputlb_use_tlbie()) 1092 _tlbie_pid(0, RIC_FLUSH_ALL); 1093 else 1094 _tlbiel_kernel_broadcast(); 1095 } 1096 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1097 1098 #define TLB_FLUSH_ALL -1UL 1099 1100 /* 1101 * Number of pages above which we invalidate the entire PID rather than 1102 * flush individual pages, for local and global flushes respectively. 1103 * 1104 * tlbie goes out to the interconnect and individual ops are more costly. 1105 * It also does not iterate over sets like the local tlbiel variant when 1106 * invalidating a full PID, so it has a far lower threshold to change from 1107 * individual page flushes to full-pid flushes. 1108 */ 1109 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 1110 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1111 1112 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1113 unsigned long start, unsigned long end) 1114 1115 { 1116 unsigned long pid; 1117 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1118 unsigned long page_size = 1UL << page_shift; 1119 unsigned long nr_pages = (end - start) >> page_shift; 1120 bool fullmm = (end == TLB_FLUSH_ALL); 1121 bool flush_pid; 1122 enum tlb_flush_type type; 1123 1124 pid = mm->context.id; 1125 if (unlikely(pid == MMU_NO_CONTEXT)) 1126 return; 1127 1128 preempt_disable(); 1129 smp_mb(); /* see radix__flush_tlb_mm */ 1130 type = flush_type_needed(mm, fullmm); 1131 if (type == FLUSH_TYPE_NONE) 1132 goto out; 1133 1134 if (fullmm) 1135 flush_pid = true; 1136 else if (type == FLUSH_TYPE_GLOBAL) 1137 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1138 else 1139 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1140 1141 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1142 unsigned long tgt = H_RPTI_TARGET_CMMU; 1143 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1144 1145 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1146 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1147 if (atomic_read(&mm->context.copros) > 0) 1148 tgt |= H_RPTI_TARGET_NMMU; 1149 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, 1150 start, end); 1151 } else if (flush_pid) { 1152 if (type == FLUSH_TYPE_LOCAL) { 1153 _tlbiel_pid(pid, RIC_FLUSH_TLB); 1154 } else { 1155 if (cputlb_use_tlbie()) { 1156 if (mm_needs_flush_escalation(mm)) 1157 _tlbie_pid(pid, RIC_FLUSH_ALL); 1158 else 1159 _tlbie_pid(pid, RIC_FLUSH_TLB); 1160 } else { 1161 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 1162 } 1163 } 1164 } else { 1165 bool hflush = false; 1166 unsigned long hstart, hend; 1167 1168 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1169 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1170 hend = end & PMD_MASK; 1171 if (hstart < hend) 1172 hflush = true; 1173 } 1174 1175 if (type == FLUSH_TYPE_LOCAL) { 1176 asm volatile("ptesync": : :"memory"); 1177 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1178 if (hflush) 1179 __tlbiel_va_range(hstart, hend, pid, 1180 PMD_SIZE, MMU_PAGE_2M); 1181 ppc_after_tlbiel_barrier(); 1182 } else if (cputlb_use_tlbie()) { 1183 asm volatile("ptesync": : :"memory"); 1184 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1185 if (hflush) 1186 __tlbie_va_range(hstart, hend, pid, 1187 PMD_SIZE, MMU_PAGE_2M); 1188 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1189 } else { 1190 _tlbiel_va_range_multicast(mm, 1191 start, end, pid, page_size, mmu_virtual_psize, false); 1192 if (hflush) 1193 _tlbiel_va_range_multicast(mm, 1194 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); 1195 } 1196 } 1197 out: 1198 preempt_enable(); 1199 } 1200 1201 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1202 unsigned long end) 1203 1204 { 1205 #ifdef CONFIG_HUGETLB_PAGE 1206 if (is_vm_hugetlb_page(vma)) 1207 return radix__flush_hugetlb_tlb_range(vma, start, end); 1208 #endif 1209 1210 __radix__flush_tlb_range(vma->vm_mm, start, end); 1211 } 1212 EXPORT_SYMBOL(radix__flush_tlb_range); 1213 1214 static int radix_get_mmu_psize(int page_size) 1215 { 1216 int psize; 1217 1218 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1219 psize = mmu_virtual_psize; 1220 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1221 psize = MMU_PAGE_2M; 1222 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1223 psize = MMU_PAGE_1G; 1224 else 1225 return -1; 1226 return psize; 1227 } 1228 1229 /* 1230 * Flush partition scoped LPID address translation for all CPUs. 1231 */ 1232 void radix__flush_tlb_lpid_page(unsigned int lpid, 1233 unsigned long addr, 1234 unsigned long page_size) 1235 { 1236 int psize = radix_get_mmu_psize(page_size); 1237 1238 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1239 } 1240 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1241 1242 /* 1243 * Flush partition scoped PWC from LPID for all CPUs. 1244 */ 1245 void radix__flush_pwc_lpid(unsigned int lpid) 1246 { 1247 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1248 } 1249 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1250 1251 /* 1252 * Flush partition scoped translations from LPID (=LPIDR) 1253 */ 1254 void radix__flush_all_lpid(unsigned int lpid) 1255 { 1256 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1257 } 1258 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1259 1260 /* 1261 * Flush process scoped translations from LPID (=LPIDR) 1262 */ 1263 void radix__flush_all_lpid_guest(unsigned int lpid) 1264 { 1265 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1266 } 1267 1268 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1269 unsigned long end, int psize); 1270 1271 void radix__tlb_flush(struct mmu_gather *tlb) 1272 { 1273 int psize = 0; 1274 struct mm_struct *mm = tlb->mm; 1275 int page_size = tlb->page_size; 1276 unsigned long start = tlb->start; 1277 unsigned long end = tlb->end; 1278 1279 /* 1280 * if page size is not something we understand, do a full mm flush 1281 * 1282 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1283 * that flushes the process table entry cache upon process teardown. 1284 * See the comment for radix in arch_exit_mmap(). 1285 */ 1286 if (tlb->fullmm || tlb->need_flush_all) { 1287 __flush_all_mm(mm, true); 1288 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1289 if (!tlb->freed_tables) 1290 radix__flush_tlb_mm(mm); 1291 else 1292 radix__flush_all_mm(mm); 1293 } else { 1294 if (!tlb->freed_tables) 1295 radix__flush_tlb_range_psize(mm, start, end, psize); 1296 else 1297 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1298 } 1299 } 1300 1301 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1302 unsigned long start, unsigned long end, 1303 int psize, bool also_pwc) 1304 { 1305 unsigned long pid; 1306 unsigned int page_shift = mmu_psize_defs[psize].shift; 1307 unsigned long page_size = 1UL << page_shift; 1308 unsigned long nr_pages = (end - start) >> page_shift; 1309 bool fullmm = (end == TLB_FLUSH_ALL); 1310 bool flush_pid; 1311 enum tlb_flush_type type; 1312 1313 pid = mm->context.id; 1314 if (unlikely(pid == MMU_NO_CONTEXT)) 1315 return; 1316 1317 fullmm = (end == TLB_FLUSH_ALL); 1318 1319 preempt_disable(); 1320 smp_mb(); /* see radix__flush_tlb_mm */ 1321 type = flush_type_needed(mm, fullmm); 1322 if (type == FLUSH_TYPE_NONE) 1323 goto out; 1324 1325 if (fullmm) 1326 flush_pid = true; 1327 else if (type == FLUSH_TYPE_GLOBAL) 1328 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1329 else 1330 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1331 1332 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1333 unsigned long tgt = H_RPTI_TARGET_CMMU; 1334 unsigned long type = H_RPTI_TYPE_TLB; 1335 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1336 1337 if (also_pwc) 1338 type |= H_RPTI_TYPE_PWC; 1339 if (atomic_read(&mm->context.copros) > 0) 1340 tgt |= H_RPTI_TARGET_NMMU; 1341 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1342 } else if (flush_pid) { 1343 if (type == FLUSH_TYPE_LOCAL) { 1344 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1345 } else { 1346 if (cputlb_use_tlbie()) { 1347 if (mm_needs_flush_escalation(mm)) 1348 also_pwc = true; 1349 1350 _tlbie_pid(pid, 1351 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1352 } else { 1353 _tlbiel_pid_multicast(mm, pid, 1354 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1355 } 1356 1357 } 1358 } else { 1359 if (type == FLUSH_TYPE_LOCAL) 1360 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1361 else if (cputlb_use_tlbie()) 1362 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1363 else 1364 _tlbiel_va_range_multicast(mm, 1365 start, end, pid, page_size, psize, also_pwc); 1366 } 1367 out: 1368 preempt_enable(); 1369 } 1370 1371 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1372 unsigned long end, int psize) 1373 { 1374 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1375 } 1376 1377 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1378 unsigned long end, int psize) 1379 { 1380 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1381 } 1382 1383 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1384 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1385 { 1386 unsigned long pid, end; 1387 enum tlb_flush_type type; 1388 1389 pid = mm->context.id; 1390 if (unlikely(pid == MMU_NO_CONTEXT)) 1391 return; 1392 1393 /* 4k page size, just blow the world */ 1394 if (PAGE_SIZE == 0x1000) { 1395 radix__flush_all_mm(mm); 1396 return; 1397 } 1398 1399 end = addr + HPAGE_PMD_SIZE; 1400 1401 /* Otherwise first do the PWC, then iterate the pages. */ 1402 preempt_disable(); 1403 smp_mb(); /* see radix__flush_tlb_mm */ 1404 type = flush_type_needed(mm, false); 1405 if (type == FLUSH_TYPE_LOCAL) { 1406 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1407 } else if (type == FLUSH_TYPE_GLOBAL) { 1408 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1409 unsigned long tgt, type, pg_sizes; 1410 1411 tgt = H_RPTI_TARGET_CMMU; 1412 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1413 H_RPTI_TYPE_PRT; 1414 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1415 1416 if (atomic_read(&mm->context.copros) > 0) 1417 tgt |= H_RPTI_TARGET_NMMU; 1418 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1419 addr, end); 1420 } else if (cputlb_use_tlbie()) 1421 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1422 else 1423 _tlbiel_va_range_multicast(mm, 1424 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1425 } 1426 1427 preempt_enable(); 1428 } 1429 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1430 1431 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1432 unsigned long start, unsigned long end) 1433 { 1434 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1435 } 1436 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1437 1438 void radix__flush_tlb_all(void) 1439 { 1440 unsigned long rb,prs,r,rs; 1441 unsigned long ric = RIC_FLUSH_ALL; 1442 1443 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1444 prs = 0; /* partition scoped */ 1445 r = 1; /* radix format */ 1446 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1447 1448 asm volatile("ptesync": : :"memory"); 1449 /* 1450 * now flush guest entries by passing PRS = 1 and LPID != 0 1451 */ 1452 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1453 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1454 /* 1455 * now flush host entires by passing PRS = 0 and LPID == 0 1456 */ 1457 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1458 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1459 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1460 } 1461 1462 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1463 /* 1464 * Performs process-scoped invalidations for a given LPID 1465 * as part of H_RPT_INVALIDATE hcall. 1466 */ 1467 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1468 unsigned long type, unsigned long pg_sizes, 1469 unsigned long start, unsigned long end) 1470 { 1471 unsigned long psize, nr_pages; 1472 struct mmu_psize_def *def; 1473 bool flush_pid; 1474 1475 /* 1476 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1477 * do a single IS=1 based flush. 1478 */ 1479 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1480 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1481 return; 1482 } 1483 1484 if (type & H_RPTI_TYPE_PWC) 1485 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1486 1487 /* Full PID flush */ 1488 if (start == 0 && end == -1) 1489 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1490 1491 /* Do range invalidation for all the valid page sizes */ 1492 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1493 def = &mmu_psize_defs[psize]; 1494 if (!(pg_sizes & def->h_rpt_pgsize)) 1495 continue; 1496 1497 nr_pages = (end - start) >> def->shift; 1498 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1499 1500 /* 1501 * If the number of pages spanning the range is above 1502 * the ceiling, convert the request into a full PID flush. 1503 * And since PID flush takes out all the page sizes, there 1504 * is no need to consider remaining page sizes. 1505 */ 1506 if (flush_pid) { 1507 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1508 return; 1509 } 1510 _tlbie_va_range_lpid(start, end, pid, lpid, 1511 (1UL << def->shift), psize, false); 1512 } 1513 } 1514 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1515 1516 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1517