1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Based on arch/arm/include/asm/tlbflush.h 4 * 5 * Copyright (C) 1999-2003 Russell King 6 * Copyright (C) 2012 ARM Ltd. 7 */ 8 #ifndef __ASM_TLBFLUSH_H 9 #define __ASM_TLBFLUSH_H 10 11 #ifndef __ASSEMBLY__ 12 13 #include <linux/bitfield.h> 14 #include <linux/mm_types.h> 15 #include <linux/sched.h> 16 #include <linux/mmu_notifier.h> 17 #include <asm/cputype.h> 18 #include <asm/mmu.h> 19 20 /* 21 * Raw TLBI operations. 22 * 23 * Where necessary, use the __tlbi() macro to avoid asm() 24 * boilerplate. Drivers and most kernel code should use the TLB 25 * management routines in preference to the macro below. 26 * 27 * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending 28 * on whether a particular TLBI operation takes an argument or 29 * not. The macros handles invoking the asm with or without the 30 * register argument as appropriate. 31 */ 32 #define __TLBI_0(op, arg) asm (ARM64_ASM_PREAMBLE \ 33 "tlbi " #op "\n" \ 34 ALTERNATIVE("nop\n nop", \ 35 "dsb ish\n tlbi " #op, \ 36 ARM64_WORKAROUND_REPEAT_TLBI, \ 37 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 38 : : ) 39 40 #define __TLBI_1(op, arg) asm (ARM64_ASM_PREAMBLE \ 41 "tlbi " #op ", %0\n" \ 42 ALTERNATIVE("nop\n nop", \ 43 "dsb ish\n tlbi " #op ", %0", \ 44 ARM64_WORKAROUND_REPEAT_TLBI, \ 45 CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ 46 : : "r" (arg)) 47 48 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) 49 50 #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) 51 52 #define __tlbi_user(op, arg) do { \ 53 if (arm64_kernel_unmapped_at_el0()) \ 54 __tlbi(op, (arg) | USER_ASID_FLAG); \ 55 } while (0) 56 57 /* This macro creates a properly formatted VA operand for the TLBI */ 58 #define __TLBI_VADDR(addr, asid) \ 59 ({ \ 60 unsigned long __ta = (addr) >> 12; \ 61 __ta &= GENMASK_ULL(43, 0); \ 62 __ta |= (unsigned long)(asid) << 48; \ 63 __ta; \ 64 }) 65 66 /* 67 * Get translation granule of the system, which is decided by 68 * PAGE_SIZE. Used by TTL. 69 * - 4KB : 1 70 * - 16KB : 2 71 * - 64KB : 3 72 */ 73 #define TLBI_TTL_TG_4K 1 74 #define TLBI_TTL_TG_16K 2 75 #define TLBI_TTL_TG_64K 3 76 77 static inline unsigned long get_trans_granule(void) 78 { 79 switch (PAGE_SIZE) { 80 case SZ_4K: 81 return TLBI_TTL_TG_4K; 82 case SZ_16K: 83 return TLBI_TTL_TG_16K; 84 case SZ_64K: 85 return TLBI_TTL_TG_64K; 86 default: 87 return 0; 88 } 89 } 90 91 /* 92 * Level-based TLBI operations. 93 * 94 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for 95 * the level at which the invalidation must take place. If the level is 96 * wrong, no invalidation may take place. In the case where the level 97 * cannot be easily determined, a 0 value for the level parameter will 98 * perform a non-hinted invalidation. 99 * 100 * For Stage-2 invalidation, use the level values provided to that effect 101 * in asm/stage2_pgtable.h. 102 */ 103 #define TLBI_TTL_MASK GENMASK_ULL(47, 44) 104 105 #define __tlbi_level(op, addr, level) do { \ 106 u64 arg = addr; \ 107 \ 108 if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ 109 level) { \ 110 u64 ttl = level & 3; \ 111 ttl |= get_trans_granule() << 2; \ 112 arg &= ~TLBI_TTL_MASK; \ 113 arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ 114 } \ 115 \ 116 __tlbi(op, arg); \ 117 } while(0) 118 119 #define __tlbi_user_level(op, arg, level) do { \ 120 if (arm64_kernel_unmapped_at_el0()) \ 121 __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ 122 } while (0) 123 124 /* 125 * This macro creates a properly formatted VA operand for the TLB RANGE. 126 * The value bit assignments are: 127 * 128 * +----------+------+-------+-------+-------+----------------------+ 129 * | ASID | TG | SCALE | NUM | TTL | BADDR | 130 * +-----------------+-------+-------+-------+----------------------+ 131 * |63 48|47 46|45 44|43 39|38 37|36 0| 132 * 133 * The address range is determined by below formula: 134 * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) 135 * 136 */ 137 #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ 138 ({ \ 139 unsigned long __ta = (addr) >> PAGE_SHIFT; \ 140 __ta &= GENMASK_ULL(36, 0); \ 141 __ta |= (unsigned long)(ttl) << 37; \ 142 __ta |= (unsigned long)(num) << 39; \ 143 __ta |= (unsigned long)(scale) << 44; \ 144 __ta |= get_trans_granule() << 46; \ 145 __ta |= (unsigned long)(asid) << 48; \ 146 __ta; \ 147 }) 148 149 /* These macros are used by the TLBI RANGE feature. */ 150 #define __TLBI_RANGE_PAGES(num, scale) \ 151 ((unsigned long)((num) + 1) << (5 * (scale) + 1)) 152 #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) 153 154 /* 155 * Generate 'num' values from -1 to 30 with -1 rejected by the 156 * __flush_tlb_range() loop below. 157 */ 158 #define TLBI_RANGE_MASK GENMASK_ULL(4, 0) 159 #define __TLBI_RANGE_NUM(pages, scale) \ 160 ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) 161 162 /* 163 * TLB Invalidation 164 * ================ 165 * 166 * This header file implements the low-level TLB invalidation routines 167 * (sometimes referred to as "flushing" in the kernel) for arm64. 168 * 169 * Every invalidation operation uses the following template: 170 * 171 * DSB ISHST // Ensure prior page-table updates have completed 172 * TLBI ... // Invalidate the TLB 173 * DSB ISH // Ensure the TLB invalidation has completed 174 * if (invalidated kernel mappings) 175 * ISB // Discard any instructions fetched from the old mapping 176 * 177 * 178 * The following functions form part of the "core" TLB invalidation API, 179 * as documented in Documentation/core-api/cachetlb.rst: 180 * 181 * flush_tlb_all() 182 * Invalidate the entire TLB (kernel + user) on all CPUs 183 * 184 * flush_tlb_mm(mm) 185 * Invalidate an entire user address space on all CPUs. 186 * The 'mm' argument identifies the ASID to invalidate. 187 * 188 * flush_tlb_range(vma, start, end) 189 * Invalidate the virtual-address range '[start, end)' on all 190 * CPUs for the user address space corresponding to 'vma->mm'. 191 * Note that this operation also invalidates any walk-cache 192 * entries associated with translations for the specified address 193 * range. 194 * 195 * flush_tlb_kernel_range(start, end) 196 * Same as flush_tlb_range(..., start, end), but applies to 197 * kernel mappings rather than a particular user address space. 198 * Whilst not explicitly documented, this function is used when 199 * unmapping pages from vmalloc/io space. 200 * 201 * flush_tlb_page(vma, addr) 202 * Invalidate a single user mapping for address 'addr' in the 203 * address space corresponding to 'vma->mm'. Note that this 204 * operation only invalidates a single, last-level page-table 205 * entry and therefore does not affect any walk-caches. 206 * 207 * 208 * Next, we have some undocumented invalidation routines that you probably 209 * don't want to call unless you know what you're doing: 210 * 211 * local_flush_tlb_all() 212 * Same as flush_tlb_all(), but only applies to the calling CPU. 213 * 214 * __flush_tlb_kernel_pgtable(addr) 215 * Invalidate a single kernel mapping for address 'addr' on all 216 * CPUs, ensuring that any walk-cache entries associated with the 217 * translation are also invalidated. 218 * 219 * __flush_tlb_range(vma, start, end, stride, last_level) 220 * Invalidate the virtual-address range '[start, end)' on all 221 * CPUs for the user address space corresponding to 'vma->mm'. 222 * The invalidation operations are issued at a granularity 223 * determined by 'stride' and only affect any walk-cache entries 224 * if 'last_level' is equal to false. 225 * 226 * 227 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented 228 * on top of these routines, since that is our interface to the mmu_gather 229 * API as used by munmap() and friends. 230 */ 231 static inline void local_flush_tlb_all(void) 232 { 233 dsb(nshst); 234 __tlbi(vmalle1); 235 dsb(nsh); 236 isb(); 237 } 238 239 static inline void flush_tlb_all(void) 240 { 241 dsb(ishst); 242 __tlbi(vmalle1is); 243 dsb(ish); 244 isb(); 245 } 246 247 static inline void flush_tlb_mm(struct mm_struct *mm) 248 { 249 unsigned long asid; 250 251 dsb(ishst); 252 asid = __TLBI_VADDR(0, ASID(mm)); 253 __tlbi(aside1is, asid); 254 __tlbi_user(aside1is, asid); 255 dsb(ish); 256 mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); 257 } 258 259 static inline void __flush_tlb_page_nosync(struct mm_struct *mm, 260 unsigned long uaddr) 261 { 262 unsigned long addr; 263 264 dsb(ishst); 265 addr = __TLBI_VADDR(uaddr, ASID(mm)); 266 __tlbi(vale1is, addr); 267 __tlbi_user(vale1is, addr); 268 mmu_notifier_arch_invalidate_secondary_tlbs(mm, uaddr & PAGE_MASK, 269 (uaddr & PAGE_MASK) + PAGE_SIZE); 270 } 271 272 static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, 273 unsigned long uaddr) 274 { 275 return __flush_tlb_page_nosync(vma->vm_mm, uaddr); 276 } 277 278 static inline void flush_tlb_page(struct vm_area_struct *vma, 279 unsigned long uaddr) 280 { 281 flush_tlb_page_nosync(vma, uaddr); 282 dsb(ish); 283 } 284 285 static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) 286 { 287 /* 288 * TLB flush deferral is not required on systems which are affected by 289 * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation 290 * will have two consecutive TLBI instructions with a dsb(ish) in between 291 * defeating the purpose (i.e save overall 'dsb ish' cost). 292 */ 293 if (alternative_has_cap_unlikely(ARM64_WORKAROUND_REPEAT_TLBI)) 294 return false; 295 296 return true; 297 } 298 299 static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, 300 struct mm_struct *mm, 301 unsigned long uaddr) 302 { 303 __flush_tlb_page_nosync(mm, uaddr); 304 } 305 306 /* 307 * If mprotect/munmap/etc occurs during TLB batched flushing, we need to 308 * synchronise all the TLBI issued with a DSB to avoid the race mentioned in 309 * flush_tlb_batched_pending(). 310 */ 311 static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) 312 { 313 dsb(ish); 314 } 315 316 /* 317 * To support TLB batched flush for multiple pages unmapping, we only send 318 * the TLBI for each page in arch_tlbbatch_add_pending() and wait for the 319 * completion at the end in arch_tlbbatch_flush(). Since we've already issued 320 * TLBI for each page so only a DSB is needed to synchronise its effect on the 321 * other CPUs. 322 * 323 * This will save the time waiting on DSB comparing issuing a TLBI;DSB sequence 324 * for each page. 325 */ 326 static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) 327 { 328 dsb(ish); 329 } 330 331 /* 332 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not 333 * necessarily a performance improvement. 334 */ 335 #define MAX_DVM_OPS PTRS_PER_PTE 336 337 /* 338 * __flush_tlb_range_op - Perform TLBI operation upon a range 339 * 340 * @op: TLBI instruction that operates on a range (has 'r' prefix) 341 * @start: The start address of the range 342 * @pages: Range as the number of pages from 'start' 343 * @stride: Flush granularity 344 * @asid: The ASID of the task (0 for IPA instructions) 345 * @tlb_level: Translation Table level hint, if known 346 * @tlbi_user: If 'true', call an additional __tlbi_user() 347 * (typically for user ASIDs). 'flase' for IPA instructions 348 * 349 * When the CPU does not support TLB range operations, flush the TLB 350 * entries one by one at the granularity of 'stride'. If the TLB 351 * range ops are supported, then: 352 * 353 * 1. If 'pages' is odd, flush the first page through non-range 354 * operations; 355 * 356 * 2. For remaining pages: the minimum range granularity is decided 357 * by 'scale', so multiple range TLBI operations may be required. 358 * Start from scale = 0, flush the corresponding number of pages 359 * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it 360 * until no pages left. 361 * 362 * Note that certain ranges can be represented by either num = 31 and 363 * scale or num = 0 and scale + 1. The loop below favours the latter 364 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. 365 */ 366 #define __flush_tlb_range_op(op, start, pages, stride, \ 367 asid, tlb_level, tlbi_user) \ 368 do { \ 369 int num = 0; \ 370 int scale = 0; \ 371 unsigned long addr; \ 372 \ 373 while (pages > 0) { \ 374 if (!system_supports_tlb_range() || \ 375 pages % 2 == 1) { \ 376 addr = __TLBI_VADDR(start, asid); \ 377 __tlbi_level(op, addr, tlb_level); \ 378 if (tlbi_user) \ 379 __tlbi_user_level(op, addr, tlb_level); \ 380 start += stride; \ 381 pages -= stride >> PAGE_SHIFT; \ 382 continue; \ 383 } \ 384 \ 385 num = __TLBI_RANGE_NUM(pages, scale); \ 386 if (num >= 0) { \ 387 addr = __TLBI_VADDR_RANGE(start, asid, scale, \ 388 num, tlb_level); \ 389 __tlbi(r##op, addr); \ 390 if (tlbi_user) \ 391 __tlbi_user(r##op, addr); \ 392 start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ 393 pages -= __TLBI_RANGE_PAGES(num, scale); \ 394 } \ 395 scale++; \ 396 } \ 397 } while (0) 398 399 #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ 400 __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) 401 402 static inline void __flush_tlb_range(struct vm_area_struct *vma, 403 unsigned long start, unsigned long end, 404 unsigned long stride, bool last_level, 405 int tlb_level) 406 { 407 unsigned long asid, pages; 408 409 start = round_down(start, stride); 410 end = round_up(end, stride); 411 pages = (end - start) >> PAGE_SHIFT; 412 413 /* 414 * When not uses TLB range ops, we can handle up to 415 * (MAX_DVM_OPS - 1) pages; 416 * When uses TLB range ops, we can handle up to 417 * (MAX_TLBI_RANGE_PAGES - 1) pages. 418 */ 419 if ((!system_supports_tlb_range() && 420 (end - start) >= (MAX_DVM_OPS * stride)) || 421 pages >= MAX_TLBI_RANGE_PAGES) { 422 flush_tlb_mm(vma->vm_mm); 423 return; 424 } 425 426 dsb(ishst); 427 asid = ASID(vma->vm_mm); 428 429 if (last_level) 430 __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); 431 else 432 __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); 433 434 dsb(ish); 435 mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); 436 } 437 438 static inline void flush_tlb_range(struct vm_area_struct *vma, 439 unsigned long start, unsigned long end) 440 { 441 /* 442 * We cannot use leaf-only invalidation here, since we may be invalidating 443 * table entries as part of collapsing hugepages or moving page tables. 444 * Set the tlb_level to 0 because we can not get enough information here. 445 */ 446 __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); 447 } 448 449 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) 450 { 451 unsigned long addr; 452 453 if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { 454 flush_tlb_all(); 455 return; 456 } 457 458 start = __TLBI_VADDR(start, 0); 459 end = __TLBI_VADDR(end, 0); 460 461 dsb(ishst); 462 for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) 463 __tlbi(vaale1is, addr); 464 dsb(ish); 465 isb(); 466 } 467 468 /* 469 * Used to invalidate the TLB (walk caches) corresponding to intermediate page 470 * table levels (pgd/pud/pmd). 471 */ 472 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) 473 { 474 unsigned long addr = __TLBI_VADDR(kaddr, 0); 475 476 dsb(ishst); 477 __tlbi(vaae1is, addr); 478 dsb(ish); 479 isb(); 480 } 481 #endif 482 483 #endif 484