1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/hugetlb.h> 10 #include <linux/memblock.h> 11 #include <linux/mmu_context.h> 12 #include <linux/sched/mm.h> 13 14 #include <asm/ppc-opcode.h> 15 #include <asm/tlb.h> 16 #include <asm/tlbflush.h> 17 #include <asm/trace.h> 18 #include <asm/cputhreads.h> 19 #include <asm/plpar_wrappers.h> 20 21 #include "internal.h" 22 23 /* 24 * tlbiel instruction for radix, set invalidation 25 * i.e., r=1 and is=01 or is=10 or is=11 26 */ 27 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 28 unsigned int pid, 29 unsigned int ric, unsigned int prs) 30 { 31 unsigned long rb; 32 unsigned long rs; 33 34 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 35 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 36 37 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 38 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 39 : "memory"); 40 } 41 42 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 43 { 44 unsigned int set; 45 46 asm volatile("ptesync": : :"memory"); 47 48 /* 49 * Flush the first set of the TLB, and the entire Page Walk Cache 50 * and partition table entries. Then flush the remaining sets of the 51 * TLB. 52 */ 53 54 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 55 /* MSR[HV] should flush partition scope translations first. */ 56 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 57 58 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 59 for (set = 1; set < num_sets; set++) 60 tlbiel_radix_set_isa300(set, is, 0, 61 RIC_FLUSH_TLB, 0); 62 } 63 } 64 65 /* Flush process scoped entries. */ 66 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 67 68 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 69 for (set = 1; set < num_sets; set++) 70 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 71 } 72 73 ppc_after_tlbiel_barrier(); 74 } 75 76 void radix__tlbiel_all(unsigned int action) 77 { 78 unsigned int is; 79 80 switch (action) { 81 case TLB_INVAL_SCOPE_GLOBAL: 82 is = 3; 83 break; 84 case TLB_INVAL_SCOPE_LPID: 85 is = 2; 86 break; 87 default: 88 BUG(); 89 } 90 91 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 92 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 93 else 94 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 95 96 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 97 } 98 99 static __always_inline void __tlbiel_pid(unsigned long pid, int set, 100 unsigned long ric) 101 { 102 unsigned long rb,rs,prs,r; 103 104 rb = PPC_BIT(53); /* IS = 1 */ 105 rb |= set << PPC_BITLSHIFT(51); 106 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 107 prs = 1; /* process scoped */ 108 r = 1; /* radix format */ 109 110 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 111 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 112 trace_tlbie(0, 1, rb, rs, ric, prs, r); 113 } 114 115 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 116 { 117 unsigned long rb,rs,prs,r; 118 119 rb = PPC_BIT(53); /* IS = 1 */ 120 rs = pid << PPC_BITLSHIFT(31); 121 prs = 1; /* process scoped */ 122 r = 1; /* radix format */ 123 124 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 125 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 126 trace_tlbie(0, 0, rb, rs, ric, prs, r); 127 } 128 129 static __always_inline void __tlbie_pid_lpid(unsigned long pid, 130 unsigned long lpid, 131 unsigned long ric) 132 { 133 unsigned long rb, rs, prs, r; 134 135 rb = PPC_BIT(53); /* IS = 1 */ 136 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 137 prs = 1; /* process scoped */ 138 r = 1; /* radix format */ 139 140 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 141 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 142 trace_tlbie(0, 0, rb, rs, ric, prs, r); 143 } 144 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 145 { 146 unsigned long rb,rs,prs,r; 147 148 rb = PPC_BIT(52); /* IS = 2 */ 149 rs = lpid; 150 prs = 0; /* partition scoped */ 151 r = 1; /* radix format */ 152 153 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 154 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 155 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 156 } 157 158 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 159 { 160 unsigned long rb,rs,prs,r; 161 162 rb = PPC_BIT(52); /* IS = 2 */ 163 rs = lpid; 164 prs = 1; /* process scoped */ 165 r = 1; /* radix format */ 166 167 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 168 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 169 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 170 } 171 172 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 173 unsigned long ap, unsigned long ric) 174 { 175 unsigned long rb,rs,prs,r; 176 177 rb = va & ~(PPC_BITMASK(52, 63)); 178 rb |= ap << PPC_BITLSHIFT(58); 179 rs = pid << PPC_BITLSHIFT(31); 180 prs = 1; /* process scoped */ 181 r = 1; /* radix format */ 182 183 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 184 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 185 trace_tlbie(0, 1, rb, rs, ric, prs, r); 186 } 187 188 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 189 unsigned long ap, unsigned long ric) 190 { 191 unsigned long rb,rs,prs,r; 192 193 rb = va & ~(PPC_BITMASK(52, 63)); 194 rb |= ap << PPC_BITLSHIFT(58); 195 rs = pid << PPC_BITLSHIFT(31); 196 prs = 1; /* process scoped */ 197 r = 1; /* radix format */ 198 199 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 200 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 201 trace_tlbie(0, 0, rb, rs, ric, prs, r); 202 } 203 204 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 205 unsigned long lpid, 206 unsigned long ap, unsigned long ric) 207 { 208 unsigned long rb, rs, prs, r; 209 210 rb = va & ~(PPC_BITMASK(52, 63)); 211 rb |= ap << PPC_BITLSHIFT(58); 212 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 213 prs = 1; /* process scoped */ 214 r = 1; /* radix format */ 215 216 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 217 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 218 trace_tlbie(0, 0, rb, rs, ric, prs, r); 219 } 220 221 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 222 unsigned long ap, unsigned long ric) 223 { 224 unsigned long rb,rs,prs,r; 225 226 rb = va & ~(PPC_BITMASK(52, 63)); 227 rb |= ap << PPC_BITLSHIFT(58); 228 rs = lpid; 229 prs = 0; /* partition scoped */ 230 r = 1; /* radix format */ 231 232 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 233 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 234 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 235 } 236 237 238 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 239 unsigned long ap) 240 { 241 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 242 asm volatile("ptesync": : :"memory"); 243 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 244 } 245 246 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 247 asm volatile("ptesync": : :"memory"); 248 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 249 } 250 } 251 252 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 253 unsigned long ap) 254 { 255 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 256 asm volatile("ptesync": : :"memory"); 257 __tlbie_pid(0, RIC_FLUSH_TLB); 258 } 259 260 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 261 asm volatile("ptesync": : :"memory"); 262 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 263 } 264 } 265 266 static inline void fixup_tlbie_va_range_lpid(unsigned long va, 267 unsigned long pid, 268 unsigned long lpid, 269 unsigned long ap) 270 { 271 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 272 asm volatile("ptesync" : : : "memory"); 273 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 274 } 275 276 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 277 asm volatile("ptesync" : : : "memory"); 278 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 279 } 280 } 281 282 static inline void fixup_tlbie_pid(unsigned long pid) 283 { 284 /* 285 * We can use any address for the invalidation, pick one which is 286 * probably unused as an optimisation. 287 */ 288 unsigned long va = ((1UL << 52) - 1); 289 290 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 291 asm volatile("ptesync": : :"memory"); 292 __tlbie_pid(0, RIC_FLUSH_TLB); 293 } 294 295 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 296 asm volatile("ptesync": : :"memory"); 297 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 298 } 299 } 300 301 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 302 { 303 /* 304 * We can use any address for the invalidation, pick one which is 305 * probably unused as an optimisation. 306 */ 307 unsigned long va = ((1UL << 52) - 1); 308 309 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 310 asm volatile("ptesync" : : : "memory"); 311 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 312 } 313 314 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 315 asm volatile("ptesync" : : : "memory"); 316 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 317 RIC_FLUSH_TLB); 318 } 319 } 320 321 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 322 unsigned long ap) 323 { 324 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 325 asm volatile("ptesync": : :"memory"); 326 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 327 } 328 329 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 330 asm volatile("ptesync": : :"memory"); 331 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 332 } 333 } 334 335 static inline void fixup_tlbie_lpid(unsigned long lpid) 336 { 337 /* 338 * We can use any address for the invalidation, pick one which is 339 * probably unused as an optimisation. 340 */ 341 unsigned long va = ((1UL << 52) - 1); 342 343 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 344 asm volatile("ptesync": : :"memory"); 345 __tlbie_lpid(0, RIC_FLUSH_TLB); 346 } 347 348 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 349 asm volatile("ptesync": : :"memory"); 350 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 351 } 352 } 353 354 /* 355 * We use 128 set in radix mode and 256 set in hpt mode. 356 */ 357 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 358 { 359 int set; 360 361 asm volatile("ptesync": : :"memory"); 362 363 switch (ric) { 364 case RIC_FLUSH_PWC: 365 366 /* For PWC, only one flush is needed */ 367 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 368 ppc_after_tlbiel_barrier(); 369 return; 370 case RIC_FLUSH_TLB: 371 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 372 break; 373 case RIC_FLUSH_ALL: 374 default: 375 /* 376 * Flush the first set of the TLB, and if 377 * we're doing a RIC_FLUSH_ALL, also flush 378 * the entire Page Walk Cache. 379 */ 380 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 381 } 382 383 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 384 /* For the remaining sets, just flush the TLB */ 385 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 386 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 387 } 388 389 ppc_after_tlbiel_barrier(); 390 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 391 } 392 393 static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 394 { 395 asm volatile("ptesync": : :"memory"); 396 397 /* 398 * Workaround the fact that the "ric" argument to __tlbie_pid 399 * must be a compile-time contraint to match the "i" constraint 400 * in the asm statement. 401 */ 402 switch (ric) { 403 case RIC_FLUSH_TLB: 404 __tlbie_pid(pid, RIC_FLUSH_TLB); 405 fixup_tlbie_pid(pid); 406 break; 407 case RIC_FLUSH_PWC: 408 __tlbie_pid(pid, RIC_FLUSH_PWC); 409 break; 410 case RIC_FLUSH_ALL: 411 default: 412 __tlbie_pid(pid, RIC_FLUSH_ALL); 413 fixup_tlbie_pid(pid); 414 } 415 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 416 } 417 418 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 419 unsigned long ric) 420 { 421 asm volatile("ptesync" : : : "memory"); 422 423 /* 424 * Workaround the fact that the "ric" argument to __tlbie_pid 425 * must be a compile-time contraint to match the "i" constraint 426 * in the asm statement. 427 */ 428 switch (ric) { 429 case RIC_FLUSH_TLB: 430 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 431 fixup_tlbie_pid_lpid(pid, lpid); 432 break; 433 case RIC_FLUSH_PWC: 434 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 435 break; 436 case RIC_FLUSH_ALL: 437 default: 438 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 439 fixup_tlbie_pid_lpid(pid, lpid); 440 } 441 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 442 } 443 struct tlbiel_pid { 444 unsigned long pid; 445 unsigned long ric; 446 }; 447 448 static void do_tlbiel_pid(void *info) 449 { 450 struct tlbiel_pid *t = info; 451 452 if (t->ric == RIC_FLUSH_TLB) 453 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 454 else if (t->ric == RIC_FLUSH_PWC) 455 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 456 else 457 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 458 } 459 460 static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 461 unsigned long pid, unsigned long ric) 462 { 463 struct cpumask *cpus = mm_cpumask(mm); 464 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 465 466 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 467 /* 468 * Always want the CPU translations to be invalidated with tlbiel in 469 * these paths, so while coprocessors must use tlbie, we can not 470 * optimise away the tlbiel component. 471 */ 472 if (atomic_read(&mm->context.copros) > 0) 473 _tlbie_pid(pid, RIC_FLUSH_ALL); 474 } 475 476 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 477 { 478 asm volatile("ptesync": : :"memory"); 479 480 /* 481 * Workaround the fact that the "ric" argument to __tlbie_pid 482 * must be a compile-time contraint to match the "i" constraint 483 * in the asm statement. 484 */ 485 switch (ric) { 486 case RIC_FLUSH_TLB: 487 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 488 fixup_tlbie_lpid(lpid); 489 break; 490 case RIC_FLUSH_PWC: 491 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 492 break; 493 case RIC_FLUSH_ALL: 494 default: 495 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 496 fixup_tlbie_lpid(lpid); 497 } 498 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 499 } 500 501 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 502 { 503 /* 504 * Workaround the fact that the "ric" argument to __tlbie_pid 505 * must be a compile-time contraint to match the "i" constraint 506 * in the asm statement. 507 */ 508 switch (ric) { 509 case RIC_FLUSH_TLB: 510 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 511 break; 512 case RIC_FLUSH_PWC: 513 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 514 break; 515 case RIC_FLUSH_ALL: 516 default: 517 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 518 } 519 fixup_tlbie_lpid(lpid); 520 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 521 } 522 523 static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 524 unsigned long pid, unsigned long page_size, 525 unsigned long psize) 526 { 527 unsigned long addr; 528 unsigned long ap = mmu_get_ap(psize); 529 530 for (addr = start; addr < end; addr += page_size) 531 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 532 } 533 534 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 535 unsigned long psize, unsigned long ric) 536 { 537 unsigned long ap = mmu_get_ap(psize); 538 539 asm volatile("ptesync": : :"memory"); 540 __tlbiel_va(va, pid, ap, ric); 541 ppc_after_tlbiel_barrier(); 542 } 543 544 static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 545 unsigned long pid, unsigned long page_size, 546 unsigned long psize, bool also_pwc) 547 { 548 asm volatile("ptesync": : :"memory"); 549 if (also_pwc) 550 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 551 __tlbiel_va_range(start, end, pid, page_size, psize); 552 ppc_after_tlbiel_barrier(); 553 } 554 555 static inline void __tlbie_va_range(unsigned long start, unsigned long end, 556 unsigned long pid, unsigned long page_size, 557 unsigned long psize) 558 { 559 unsigned long addr; 560 unsigned long ap = mmu_get_ap(psize); 561 562 for (addr = start; addr < end; addr += page_size) 563 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 564 565 fixup_tlbie_va_range(addr - page_size, pid, ap); 566 } 567 568 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 569 unsigned long pid, unsigned long lpid, 570 unsigned long page_size, 571 unsigned long psize) 572 { 573 unsigned long addr; 574 unsigned long ap = mmu_get_ap(psize); 575 576 for (addr = start; addr < end; addr += page_size) 577 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 578 579 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 580 } 581 582 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 583 unsigned long psize, unsigned long ric) 584 { 585 unsigned long ap = mmu_get_ap(psize); 586 587 asm volatile("ptesync": : :"memory"); 588 __tlbie_va(va, pid, ap, ric); 589 fixup_tlbie_va(va, pid, ap); 590 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 591 } 592 593 struct tlbiel_va { 594 unsigned long pid; 595 unsigned long va; 596 unsigned long psize; 597 unsigned long ric; 598 }; 599 600 static void do_tlbiel_va(void *info) 601 { 602 struct tlbiel_va *t = info; 603 604 if (t->ric == RIC_FLUSH_TLB) 605 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 606 else if (t->ric == RIC_FLUSH_PWC) 607 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 608 else 609 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 610 } 611 612 static inline void _tlbiel_va_multicast(struct mm_struct *mm, 613 unsigned long va, unsigned long pid, 614 unsigned long psize, unsigned long ric) 615 { 616 struct cpumask *cpus = mm_cpumask(mm); 617 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 618 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 619 if (atomic_read(&mm->context.copros) > 0) 620 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 621 } 622 623 struct tlbiel_va_range { 624 unsigned long pid; 625 unsigned long start; 626 unsigned long end; 627 unsigned long page_size; 628 unsigned long psize; 629 bool also_pwc; 630 }; 631 632 static void do_tlbiel_va_range(void *info) 633 { 634 struct tlbiel_va_range *t = info; 635 636 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 637 t->psize, t->also_pwc); 638 } 639 640 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 641 unsigned long psize, unsigned long ric) 642 { 643 unsigned long ap = mmu_get_ap(psize); 644 645 asm volatile("ptesync": : :"memory"); 646 __tlbie_lpid_va(va, lpid, ap, ric); 647 fixup_tlbie_lpid_va(va, lpid, ap); 648 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 649 } 650 651 static inline void _tlbie_va_range(unsigned long start, unsigned long end, 652 unsigned long pid, unsigned long page_size, 653 unsigned long psize, bool also_pwc) 654 { 655 asm volatile("ptesync": : :"memory"); 656 if (also_pwc) 657 __tlbie_pid(pid, RIC_FLUSH_PWC); 658 __tlbie_va_range(start, end, pid, page_size, psize); 659 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 660 } 661 662 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 663 unsigned long pid, unsigned long lpid, 664 unsigned long page_size, 665 unsigned long psize, bool also_pwc) 666 { 667 asm volatile("ptesync" : : : "memory"); 668 if (also_pwc) 669 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 670 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 671 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 672 } 673 674 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 675 unsigned long start, unsigned long end, 676 unsigned long pid, unsigned long page_size, 677 unsigned long psize, bool also_pwc) 678 { 679 struct cpumask *cpus = mm_cpumask(mm); 680 struct tlbiel_va_range t = { .start = start, .end = end, 681 .pid = pid, .page_size = page_size, 682 .psize = psize, .also_pwc = also_pwc }; 683 684 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 685 if (atomic_read(&mm->context.copros) > 0) 686 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 687 } 688 689 /* 690 * Base TLB flushing operations: 691 * 692 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 693 * - flush_tlb_page(vma, vmaddr) flushes one page 694 * - flush_tlb_range(vma, start, end) flushes a range of pages 695 * - flush_tlb_kernel_range(start, end) flushes kernel pages 696 * 697 * - local_* variants of page and mm only apply to the current 698 * processor 699 */ 700 void radix__local_flush_tlb_mm(struct mm_struct *mm) 701 { 702 unsigned long pid; 703 704 preempt_disable(); 705 pid = mm->context.id; 706 if (pid != MMU_NO_CONTEXT) 707 _tlbiel_pid(pid, RIC_FLUSH_TLB); 708 preempt_enable(); 709 } 710 EXPORT_SYMBOL(radix__local_flush_tlb_mm); 711 712 #ifndef CONFIG_SMP 713 void radix__local_flush_all_mm(struct mm_struct *mm) 714 { 715 unsigned long pid; 716 717 preempt_disable(); 718 pid = mm->context.id; 719 if (pid != MMU_NO_CONTEXT) 720 _tlbiel_pid(pid, RIC_FLUSH_ALL); 721 preempt_enable(); 722 } 723 EXPORT_SYMBOL(radix__local_flush_all_mm); 724 725 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 726 { 727 radix__local_flush_all_mm(mm); 728 } 729 #endif /* CONFIG_SMP */ 730 731 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 732 int psize) 733 { 734 unsigned long pid; 735 736 preempt_disable(); 737 pid = mm->context.id; 738 if (pid != MMU_NO_CONTEXT) 739 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 740 preempt_enable(); 741 } 742 743 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 744 { 745 #ifdef CONFIG_HUGETLB_PAGE 746 /* need the return fix for nohash.c */ 747 if (is_vm_hugetlb_page(vma)) 748 return radix__local_flush_hugetlb_page(vma, vmaddr); 749 #endif 750 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 751 } 752 EXPORT_SYMBOL(radix__local_flush_tlb_page); 753 754 static bool mm_needs_flush_escalation(struct mm_struct *mm) 755 { 756 /* 757 * P9 nest MMU has issues with the page walk cache 758 * caching PTEs and not flushing them properly when 759 * RIC = 0 for a PID/LPID invalidate 760 */ 761 if (atomic_read(&mm->context.copros) > 0) 762 return true; 763 return false; 764 } 765 766 /* 767 * If always_flush is true, then flush even if this CPU can't be removed 768 * from mm_cpumask. 769 */ 770 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 771 { 772 unsigned long pid = mm->context.id; 773 int cpu = smp_processor_id(); 774 775 /* 776 * A kthread could have done a mmget_not_zero() after the flushing CPU 777 * checked mm_cpumask, and be in the process of kthread_use_mm when 778 * interrupted here. In that case, current->mm will be set to mm, 779 * because kthread_use_mm() setting ->mm and switching to the mm is 780 * done with interrupts off. 781 */ 782 if (current->mm == mm) 783 goto out; 784 785 if (current->active_mm == mm) { 786 WARN_ON_ONCE(current->mm != NULL); 787 /* Is a kernel thread and is using mm as the lazy tlb */ 788 mmgrab(&init_mm); 789 current->active_mm = &init_mm; 790 switch_mm_irqs_off(mm, &init_mm, current); 791 mmdrop(mm); 792 } 793 794 /* 795 * This IPI may be initiated from any source including those not 796 * running the mm, so there may be a racing IPI that comes after 797 * this one which finds the cpumask already clear. Check and avoid 798 * underflowing the active_cpus count in that case. The race should 799 * not otherwise be a problem, but the TLB must be flushed because 800 * that's what the caller expects. 801 */ 802 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 803 atomic_dec(&mm->context.active_cpus); 804 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 805 always_flush = true; 806 } 807 808 out: 809 if (always_flush) 810 _tlbiel_pid(pid, RIC_FLUSH_ALL); 811 } 812 813 #ifdef CONFIG_SMP 814 static void do_exit_flush_lazy_tlb(void *arg) 815 { 816 struct mm_struct *mm = arg; 817 exit_lazy_flush_tlb(mm, true); 818 } 819 820 static void exit_flush_lazy_tlbs(struct mm_struct *mm) 821 { 822 /* 823 * Would be nice if this was async so it could be run in 824 * parallel with our local flush, but generic code does not 825 * give a good API for it. Could extend the generic code or 826 * make a special powerpc IPI for flushing TLBs. 827 * For now it's not too performance critical. 828 */ 829 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 830 (void *)mm, 1); 831 } 832 833 #else /* CONFIG_SMP */ 834 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 835 #endif /* CONFIG_SMP */ 836 837 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 838 839 /* 840 * Interval between flushes at which we send out IPIs to check whether the 841 * mm_cpumask can be trimmed for the case where it's not a single-threaded 842 * process flushing its own mm. The intent is to reduce the cost of later 843 * flushes. Don't want this to be so low that it adds noticable cost to TLB 844 * flushing, or so high that it doesn't help reduce global TLBIEs. 845 */ 846 static unsigned long tlb_mm_cpumask_trim_timer = 1073; 847 848 static bool tick_and_test_trim_clock(void) 849 { 850 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 851 tlb_mm_cpumask_trim_timer) { 852 __this_cpu_write(mm_cpumask_trim_clock, 0); 853 return true; 854 } 855 return false; 856 } 857 858 enum tlb_flush_type { 859 FLUSH_TYPE_NONE, 860 FLUSH_TYPE_LOCAL, 861 FLUSH_TYPE_GLOBAL, 862 }; 863 864 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 865 { 866 int active_cpus = atomic_read(&mm->context.active_cpus); 867 int cpu = smp_processor_id(); 868 869 if (active_cpus == 0) 870 return FLUSH_TYPE_NONE; 871 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 872 if (current->mm != mm) { 873 /* 874 * Asynchronous flush sources may trim down to nothing 875 * if the process is not running, so occasionally try 876 * to trim. 877 */ 878 if (tick_and_test_trim_clock()) { 879 exit_lazy_flush_tlb(mm, true); 880 return FLUSH_TYPE_NONE; 881 } 882 } 883 return FLUSH_TYPE_LOCAL; 884 } 885 886 /* Coprocessors require TLBIE to invalidate nMMU. */ 887 if (atomic_read(&mm->context.copros) > 0) 888 return FLUSH_TYPE_GLOBAL; 889 890 /* 891 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 892 * because the mm is being taken down anyway, and a TLBIE tends to 893 * be faster than an IPI+TLBIEL. 894 */ 895 if (fullmm) 896 return FLUSH_TYPE_GLOBAL; 897 898 /* 899 * If we are running the only thread of a single-threaded process, 900 * then we should almost always be able to trim off the rest of the 901 * CPU mask (except in the case of use_mm() races), so always try 902 * trimming the mask. 903 */ 904 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 905 exit_flush_lazy_tlbs(mm); 906 /* 907 * use_mm() race could prevent IPIs from being able to clear 908 * the cpumask here, however those users are established 909 * after our first check (and so after the PTEs are removed), 910 * and the TLB still gets flushed by the IPI, so this CPU 911 * will only require a local flush. 912 */ 913 return FLUSH_TYPE_LOCAL; 914 } 915 916 /* 917 * Occasionally try to trim down the cpumask. It's possible this can 918 * bring the mask to zero, which results in no flush. 919 */ 920 if (tick_and_test_trim_clock()) { 921 exit_flush_lazy_tlbs(mm); 922 if (current->mm == mm) 923 return FLUSH_TYPE_LOCAL; 924 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 925 exit_lazy_flush_tlb(mm, true); 926 return FLUSH_TYPE_NONE; 927 } 928 929 return FLUSH_TYPE_GLOBAL; 930 } 931 932 #ifdef CONFIG_SMP 933 void radix__flush_tlb_mm(struct mm_struct *mm) 934 { 935 unsigned long pid; 936 enum tlb_flush_type type; 937 938 pid = mm->context.id; 939 if (unlikely(pid == MMU_NO_CONTEXT)) 940 return; 941 942 preempt_disable(); 943 /* 944 * Order loads of mm_cpumask (in flush_type_needed) vs previous 945 * stores to clear ptes before the invalidate. See barrier in 946 * switch_mm_irqs_off 947 */ 948 smp_mb(); 949 type = flush_type_needed(mm, false); 950 if (type == FLUSH_TYPE_LOCAL) { 951 _tlbiel_pid(pid, RIC_FLUSH_TLB); 952 } else if (type == FLUSH_TYPE_GLOBAL) { 953 if (!mmu_has_feature(MMU_FTR_GTSE)) { 954 unsigned long tgt = H_RPTI_TARGET_CMMU; 955 956 if (atomic_read(&mm->context.copros) > 0) 957 tgt |= H_RPTI_TARGET_NMMU; 958 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 959 H_RPTI_PAGE_ALL, 0, -1UL); 960 } else if (cputlb_use_tlbie()) { 961 if (mm_needs_flush_escalation(mm)) 962 _tlbie_pid(pid, RIC_FLUSH_ALL); 963 else 964 _tlbie_pid(pid, RIC_FLUSH_TLB); 965 } else { 966 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 967 } 968 } 969 preempt_enable(); 970 } 971 EXPORT_SYMBOL(radix__flush_tlb_mm); 972 973 static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 974 { 975 unsigned long pid; 976 enum tlb_flush_type type; 977 978 pid = mm->context.id; 979 if (unlikely(pid == MMU_NO_CONTEXT)) 980 return; 981 982 preempt_disable(); 983 smp_mb(); /* see radix__flush_tlb_mm */ 984 type = flush_type_needed(mm, fullmm); 985 if (type == FLUSH_TYPE_LOCAL) { 986 _tlbiel_pid(pid, RIC_FLUSH_ALL); 987 } else if (type == FLUSH_TYPE_GLOBAL) { 988 if (!mmu_has_feature(MMU_FTR_GTSE)) { 989 unsigned long tgt = H_RPTI_TARGET_CMMU; 990 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 991 H_RPTI_TYPE_PRT; 992 993 if (atomic_read(&mm->context.copros) > 0) 994 tgt |= H_RPTI_TARGET_NMMU; 995 pseries_rpt_invalidate(pid, tgt, type, 996 H_RPTI_PAGE_ALL, 0, -1UL); 997 } else if (cputlb_use_tlbie()) 998 _tlbie_pid(pid, RIC_FLUSH_ALL); 999 else 1000 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1001 } 1002 preempt_enable(); 1003 } 1004 1005 void radix__flush_all_mm(struct mm_struct *mm) 1006 { 1007 __flush_all_mm(mm, false); 1008 } 1009 EXPORT_SYMBOL(radix__flush_all_mm); 1010 1011 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1012 int psize) 1013 { 1014 unsigned long pid; 1015 enum tlb_flush_type type; 1016 1017 pid = mm->context.id; 1018 if (unlikely(pid == MMU_NO_CONTEXT)) 1019 return; 1020 1021 preempt_disable(); 1022 smp_mb(); /* see radix__flush_tlb_mm */ 1023 type = flush_type_needed(mm, false); 1024 if (type == FLUSH_TYPE_LOCAL) { 1025 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1026 } else if (type == FLUSH_TYPE_GLOBAL) { 1027 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1028 unsigned long tgt, pg_sizes, size; 1029 1030 tgt = H_RPTI_TARGET_CMMU; 1031 pg_sizes = psize_to_rpti_pgsize(psize); 1032 size = 1UL << mmu_psize_to_shift(psize); 1033 1034 if (atomic_read(&mm->context.copros) > 0) 1035 tgt |= H_RPTI_TARGET_NMMU; 1036 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1037 pg_sizes, vmaddr, 1038 vmaddr + size); 1039 } else if (cputlb_use_tlbie()) 1040 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1041 else 1042 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1043 } 1044 preempt_enable(); 1045 } 1046 1047 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1048 { 1049 #ifdef CONFIG_HUGETLB_PAGE 1050 if (is_vm_hugetlb_page(vma)) 1051 return radix__flush_hugetlb_page(vma, vmaddr); 1052 #endif 1053 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1054 } 1055 EXPORT_SYMBOL(radix__flush_tlb_page); 1056 1057 #endif /* CONFIG_SMP */ 1058 1059 static void do_tlbiel_kernel(void *info) 1060 { 1061 _tlbiel_pid(0, RIC_FLUSH_ALL); 1062 } 1063 1064 static inline void _tlbiel_kernel_broadcast(void) 1065 { 1066 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1067 if (tlbie_capable) { 1068 /* 1069 * Coherent accelerators don't refcount kernel memory mappings, 1070 * so have to always issue a tlbie for them. This is quite a 1071 * slow path anyway. 1072 */ 1073 _tlbie_pid(0, RIC_FLUSH_ALL); 1074 } 1075 } 1076 1077 /* 1078 * If kernel TLBIs ever become local rather than global, then 1079 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1080 * assumes kernel TLBIs are global. 1081 */ 1082 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1083 { 1084 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1085 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1086 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1087 H_RPTI_TYPE_PRT; 1088 1089 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1090 start, end); 1091 } else if (cputlb_use_tlbie()) 1092 _tlbie_pid(0, RIC_FLUSH_ALL); 1093 else 1094 _tlbiel_kernel_broadcast(); 1095 } 1096 EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1097 1098 #define TLB_FLUSH_ALL -1UL 1099 1100 /* 1101 * Number of pages above which we invalidate the entire PID rather than 1102 * flush individual pages, for local and global flushes respectively. 1103 * 1104 * tlbie goes out to the interconnect and individual ops are more costly. 1105 * It also does not iterate over sets like the local tlbiel variant when 1106 * invalidating a full PID, so it has a far lower threshold to change from 1107 * individual page flushes to full-pid flushes. 1108 */ 1109 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; 1110 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1111 1112 static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1113 unsigned long start, unsigned long end) 1114 { 1115 unsigned long pid; 1116 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1117 unsigned long page_size = 1UL << page_shift; 1118 unsigned long nr_pages = (end - start) >> page_shift; 1119 bool fullmm = (end == TLB_FLUSH_ALL); 1120 bool flush_pid, flush_pwc = false; 1121 enum tlb_flush_type type; 1122 1123 pid = mm->context.id; 1124 if (unlikely(pid == MMU_NO_CONTEXT)) 1125 return; 1126 1127 preempt_disable(); 1128 smp_mb(); /* see radix__flush_tlb_mm */ 1129 type = flush_type_needed(mm, fullmm); 1130 if (type == FLUSH_TYPE_NONE) 1131 goto out; 1132 1133 if (fullmm) 1134 flush_pid = true; 1135 else if (type == FLUSH_TYPE_GLOBAL) 1136 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1137 else 1138 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1139 /* 1140 * full pid flush already does the PWC flush. if it is not full pid 1141 * flush check the range is more than PMD and force a pwc flush 1142 * mremap() depends on this behaviour. 1143 */ 1144 if (!flush_pid && (end - start) >= PMD_SIZE) 1145 flush_pwc = true; 1146 1147 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1148 unsigned long type = H_RPTI_TYPE_TLB; 1149 unsigned long tgt = H_RPTI_TARGET_CMMU; 1150 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1151 1152 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1153 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1154 if (atomic_read(&mm->context.copros) > 0) 1155 tgt |= H_RPTI_TARGET_NMMU; 1156 if (flush_pwc) 1157 type |= H_RPTI_TYPE_PWC; 1158 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1159 } else if (flush_pid) { 1160 /* 1161 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1162 */ 1163 if (type == FLUSH_TYPE_LOCAL) { 1164 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1165 } else { 1166 if (cputlb_use_tlbie()) { 1167 _tlbie_pid(pid, RIC_FLUSH_ALL); 1168 } else { 1169 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1170 } 1171 } 1172 } else { 1173 bool hflush = false; 1174 unsigned long hstart, hend; 1175 1176 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1177 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1178 hend = end & PMD_MASK; 1179 if (hstart < hend) 1180 hflush = true; 1181 } 1182 1183 if (type == FLUSH_TYPE_LOCAL) { 1184 asm volatile("ptesync": : :"memory"); 1185 if (flush_pwc) 1186 /* For PWC, only one flush is needed */ 1187 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1188 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1189 if (hflush) 1190 __tlbiel_va_range(hstart, hend, pid, 1191 PMD_SIZE, MMU_PAGE_2M); 1192 ppc_after_tlbiel_barrier(); 1193 } else if (cputlb_use_tlbie()) { 1194 asm volatile("ptesync": : :"memory"); 1195 if (flush_pwc) 1196 __tlbie_pid(pid, RIC_FLUSH_PWC); 1197 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1198 if (hflush) 1199 __tlbie_va_range(hstart, hend, pid, 1200 PMD_SIZE, MMU_PAGE_2M); 1201 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1202 } else { 1203 _tlbiel_va_range_multicast(mm, 1204 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1205 if (hflush) 1206 _tlbiel_va_range_multicast(mm, 1207 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1208 } 1209 } 1210 out: 1211 preempt_enable(); 1212 } 1213 1214 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1215 unsigned long end) 1216 1217 { 1218 #ifdef CONFIG_HUGETLB_PAGE 1219 if (is_vm_hugetlb_page(vma)) 1220 return radix__flush_hugetlb_tlb_range(vma, start, end); 1221 #endif 1222 1223 __radix__flush_tlb_range(vma->vm_mm, start, end); 1224 } 1225 EXPORT_SYMBOL(radix__flush_tlb_range); 1226 1227 static int radix_get_mmu_psize(int page_size) 1228 { 1229 int psize; 1230 1231 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1232 psize = mmu_virtual_psize; 1233 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1234 psize = MMU_PAGE_2M; 1235 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1236 psize = MMU_PAGE_1G; 1237 else 1238 return -1; 1239 return psize; 1240 } 1241 1242 /* 1243 * Flush partition scoped LPID address translation for all CPUs. 1244 */ 1245 void radix__flush_tlb_lpid_page(unsigned int lpid, 1246 unsigned long addr, 1247 unsigned long page_size) 1248 { 1249 int psize = radix_get_mmu_psize(page_size); 1250 1251 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1252 } 1253 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1254 1255 /* 1256 * Flush partition scoped PWC from LPID for all CPUs. 1257 */ 1258 void radix__flush_pwc_lpid(unsigned int lpid) 1259 { 1260 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1261 } 1262 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1263 1264 /* 1265 * Flush partition scoped translations from LPID (=LPIDR) 1266 */ 1267 void radix__flush_all_lpid(unsigned int lpid) 1268 { 1269 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1270 } 1271 EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1272 1273 /* 1274 * Flush process scoped translations from LPID (=LPIDR) 1275 */ 1276 void radix__flush_all_lpid_guest(unsigned int lpid) 1277 { 1278 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1279 } 1280 1281 void radix__tlb_flush(struct mmu_gather *tlb) 1282 { 1283 int psize = 0; 1284 struct mm_struct *mm = tlb->mm; 1285 int page_size = tlb->page_size; 1286 unsigned long start = tlb->start; 1287 unsigned long end = tlb->end; 1288 1289 /* 1290 * if page size is not something we understand, do a full mm flush 1291 * 1292 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1293 * that flushes the process table entry cache upon process teardown. 1294 * See the comment for radix in arch_exit_mmap(). 1295 */ 1296 if (tlb->fullmm || tlb->need_flush_all) { 1297 __flush_all_mm(mm, true); 1298 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1299 if (!tlb->freed_tables) 1300 radix__flush_tlb_mm(mm); 1301 else 1302 radix__flush_all_mm(mm); 1303 } else { 1304 if (!tlb->freed_tables) 1305 radix__flush_tlb_range_psize(mm, start, end, psize); 1306 else 1307 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1308 } 1309 } 1310 1311 static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1312 unsigned long start, unsigned long end, 1313 int psize, bool also_pwc) 1314 { 1315 unsigned long pid; 1316 unsigned int page_shift = mmu_psize_defs[psize].shift; 1317 unsigned long page_size = 1UL << page_shift; 1318 unsigned long nr_pages = (end - start) >> page_shift; 1319 bool fullmm = (end == TLB_FLUSH_ALL); 1320 bool flush_pid; 1321 enum tlb_flush_type type; 1322 1323 pid = mm->context.id; 1324 if (unlikely(pid == MMU_NO_CONTEXT)) 1325 return; 1326 1327 fullmm = (end == TLB_FLUSH_ALL); 1328 1329 preempt_disable(); 1330 smp_mb(); /* see radix__flush_tlb_mm */ 1331 type = flush_type_needed(mm, fullmm); 1332 if (type == FLUSH_TYPE_NONE) 1333 goto out; 1334 1335 if (fullmm) 1336 flush_pid = true; 1337 else if (type == FLUSH_TYPE_GLOBAL) 1338 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1339 else 1340 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1341 1342 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1343 unsigned long tgt = H_RPTI_TARGET_CMMU; 1344 unsigned long type = H_RPTI_TYPE_TLB; 1345 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1346 1347 if (also_pwc) 1348 type |= H_RPTI_TYPE_PWC; 1349 if (atomic_read(&mm->context.copros) > 0) 1350 tgt |= H_RPTI_TARGET_NMMU; 1351 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1352 } else if (flush_pid) { 1353 if (type == FLUSH_TYPE_LOCAL) { 1354 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1355 } else { 1356 if (cputlb_use_tlbie()) { 1357 if (mm_needs_flush_escalation(mm)) 1358 also_pwc = true; 1359 1360 _tlbie_pid(pid, 1361 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1362 } else { 1363 _tlbiel_pid_multicast(mm, pid, 1364 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1365 } 1366 1367 } 1368 } else { 1369 if (type == FLUSH_TYPE_LOCAL) 1370 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1371 else if (cputlb_use_tlbie()) 1372 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1373 else 1374 _tlbiel_va_range_multicast(mm, 1375 start, end, pid, page_size, psize, also_pwc); 1376 } 1377 out: 1378 preempt_enable(); 1379 } 1380 1381 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1382 unsigned long end, int psize) 1383 { 1384 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1385 } 1386 1387 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1388 unsigned long end, int psize) 1389 { 1390 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1391 } 1392 1393 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1394 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1395 { 1396 unsigned long pid, end; 1397 enum tlb_flush_type type; 1398 1399 pid = mm->context.id; 1400 if (unlikely(pid == MMU_NO_CONTEXT)) 1401 return; 1402 1403 /* 4k page size, just blow the world */ 1404 if (PAGE_SIZE == 0x1000) { 1405 radix__flush_all_mm(mm); 1406 return; 1407 } 1408 1409 end = addr + HPAGE_PMD_SIZE; 1410 1411 /* Otherwise first do the PWC, then iterate the pages. */ 1412 preempt_disable(); 1413 smp_mb(); /* see radix__flush_tlb_mm */ 1414 type = flush_type_needed(mm, false); 1415 if (type == FLUSH_TYPE_LOCAL) { 1416 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1417 } else if (type == FLUSH_TYPE_GLOBAL) { 1418 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1419 unsigned long tgt, type, pg_sizes; 1420 1421 tgt = H_RPTI_TARGET_CMMU; 1422 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1423 H_RPTI_TYPE_PRT; 1424 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1425 1426 if (atomic_read(&mm->context.copros) > 0) 1427 tgt |= H_RPTI_TARGET_NMMU; 1428 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1429 addr, end); 1430 } else if (cputlb_use_tlbie()) 1431 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1432 else 1433 _tlbiel_va_range_multicast(mm, 1434 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1435 } 1436 1437 preempt_enable(); 1438 } 1439 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1440 1441 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1442 unsigned long start, unsigned long end) 1443 { 1444 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1445 } 1446 EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1447 1448 void radix__flush_tlb_all(void) 1449 { 1450 unsigned long rb,prs,r,rs; 1451 unsigned long ric = RIC_FLUSH_ALL; 1452 1453 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1454 prs = 0; /* partition scoped */ 1455 r = 1; /* radix format */ 1456 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1457 1458 asm volatile("ptesync": : :"memory"); 1459 /* 1460 * now flush guest entries by passing PRS = 1 and LPID != 0 1461 */ 1462 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1463 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1464 /* 1465 * now flush host entires by passing PRS = 0 and LPID == 0 1466 */ 1467 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1468 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1469 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1470 } 1471 1472 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1473 /* 1474 * Performs process-scoped invalidations for a given LPID 1475 * as part of H_RPT_INVALIDATE hcall. 1476 */ 1477 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1478 unsigned long type, unsigned long pg_sizes, 1479 unsigned long start, unsigned long end) 1480 { 1481 unsigned long psize, nr_pages; 1482 struct mmu_psize_def *def; 1483 bool flush_pid; 1484 1485 /* 1486 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1487 * do a single IS=1 based flush. 1488 */ 1489 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1490 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1491 return; 1492 } 1493 1494 if (type & H_RPTI_TYPE_PWC) 1495 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1496 1497 /* Full PID flush */ 1498 if (start == 0 && end == -1) 1499 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1500 1501 /* Do range invalidation for all the valid page sizes */ 1502 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1503 def = &mmu_psize_defs[psize]; 1504 if (!(pg_sizes & def->h_rpt_pgsize)) 1505 continue; 1506 1507 nr_pages = (end - start) >> def->shift; 1508 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1509 1510 /* 1511 * If the number of pages spanning the range is above 1512 * the ceiling, convert the request into a full PID flush. 1513 * And since PID flush takes out all the page sizes, there 1514 * is no need to consider remaining page sizes. 1515 */ 1516 if (flush_pid) { 1517 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1518 return; 1519 } 1520 _tlbie_va_range_lpid(start, end, pid, lpid, 1521 (1UL << def->shift), psize, false); 1522 } 1523 } 1524 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1525 1526 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1527