1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm/mm/cache-v7.S 4 * 5 * Copyright (C) 2001 Deep Blue Solutions Ltd. 6 * Copyright (C) 2005 ARM Ltd. 7 * 8 * This is the "shell" of the ARMv7 processor support. 9 */ 10#include <linux/linkage.h> 11#include <linux/init.h> 12#include <asm/assembler.h> 13#include <asm/errno.h> 14#include <asm/unwind.h> 15#include <asm/hardware/cache-b15-rac.h> 16 17#include "proc-macros.S" 18 19.arch armv7-a 20 21#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND 22.globl icache_size 23 .data 24 .align 2 25icache_size: 26 .long 64 27 .text 28#endif 29/* 30 * The secondary kernel init calls v7_flush_dcache_all before it enables 31 * the L1; however, the L1 comes out of reset in an undefined state, so 32 * the clean + invalidate performed by v7_flush_dcache_all causes a bunch 33 * of cache lines with uninitialized data and uninitialized tags to get 34 * written out to memory, which does really unpleasant things to the main 35 * processor. We fix this by performing an invalidate, rather than a 36 * clean + invalidate, before jumping into the kernel. 37 * 38 * This function needs to be called for both secondary cores startup and 39 * primary core resume procedures. 40 */ 41ENTRY(v7_invalidate_l1) 42 mov r0, #0 43 mcr p15, 2, r0, c0, c0, 0 @ select L1 data cache in CSSELR 44 isb 45 mrc p15, 1, r0, c0, c0, 0 @ read cache geometry from CCSIDR 46 47 movw r3, #0x3ff 48 and r3, r3, r0, lsr #3 @ 'Associativity' in CCSIDR[12:3] 49 clz r1, r3 @ WayShift 50 mov r2, #1 51 mov r3, r3, lsl r1 @ NumWays-1 shifted into bits [31:...] 52 movs r1, r2, lsl r1 @ #1 shifted left by same amount 53 moveq r1, #1 @ r1 needs value > 0 even if only 1 way 54 55 and r2, r0, #0x7 56 add r2, r2, #4 @ SetShift 57 581: movw ip, #0x7fff 59 and r0, ip, r0, lsr #13 @ 'NumSets' in CCSIDR[27:13] 60 612: mov ip, r0, lsl r2 @ NumSet << SetShift 62 orr ip, ip, r3 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) 63 mcr p15, 0, ip, c7, c6, 2 64 subs r0, r0, #1 @ Set-- 65 bpl 2b 66 subs r3, r3, r1 @ Way-- 67 bcc 3f 68 mrc p15, 1, r0, c0, c0, 0 @ re-read cache geometry from CCSIDR 69 b 1b 703: dsb st 71 isb 72 ret lr 73ENDPROC(v7_invalidate_l1) 74 75/* 76 * v7_flush_icache_all() 77 * 78 * Flush the whole I-cache. 79 * 80 * Registers: 81 * r0 - set to 0 82 */ 83ENTRY(v7_flush_icache_all) 84 mov r0, #0 85 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 86 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 87 ret lr 88ENDPROC(v7_flush_icache_all) 89 90 /* 91 * v7_flush_dcache_louis() 92 * 93 * Flush the D-cache up to the Level of Unification Inner Shareable 94 * 95 * Corrupted registers: r0-r6, r9-r10 96 */ 97 98ENTRY(v7_flush_dcache_louis) 99 dmb @ ensure ordering with previous memory accesses 100 mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr 101ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position 102ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position 103 ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr 104 bne start_flush_levels @ LoU != 0, start flushing 105#ifdef CONFIG_ARM_ERRATA_643719 106ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register 107ALT_UP( ret lr) @ LoUU is zero, so nothing to do 108 movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p? 109 movt r1, #:upper16:(0x410fc090 >> 4) 110 teq r1, r2, lsr #4 @ test for errata affected core and if so... 111 moveq r3, #1 << 1 @ fix LoUIS value 112 beq start_flush_levels @ start flushing cache levels 113#endif 114 ret lr 115ENDPROC(v7_flush_dcache_louis) 116 117/* 118 * v7_flush_dcache_all() 119 * 120 * Flush the whole D-cache. 121 * 122 * Corrupted registers: r0-r6, r9-r10 123 * 124 * - mm - mm_struct describing address space 125 */ 126ENTRY(v7_flush_dcache_all) 127 dmb @ ensure ordering with previous memory accesses 128 mrc p15, 1, r0, c0, c0, 1 @ read clidr 129 mov r3, r0, lsr #23 @ move LoC into position 130 ands r3, r3, #7 << 1 @ extract LoC*2 from clidr 131 beq finished @ if loc is 0, then no need to clean 132start_flush_levels: 133 mov r10, #0 @ start clean at cache level 0 134flush_levels: 135 add r2, r10, r10, lsr #1 @ work out 3x current cache level 136 mov r1, r0, lsr r2 @ extract cache type bits from clidr 137 and r1, r1, #7 @ mask of the bits for current cache only 138 cmp r1, #2 @ see what cache we have at this level 139 blt skip @ skip if no cache, or just i-cache 140#ifdef CONFIG_PREEMPTION 141 save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic 142#endif 143 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 144 isb @ isb to sych the new cssr&csidr 145 mrc p15, 1, r1, c0, c0, 0 @ read the new csidr 146#ifdef CONFIG_PREEMPTION 147 restore_irqs_notrace r9 148#endif 149 and r2, r1, #7 @ extract the length of the cache lines 150 add r2, r2, #4 @ add 4 (line length offset) 151 movw r4, #0x3ff 152 ands r4, r4, r1, lsr #3 @ find maximum number on the way size 153 clz r5, r4 @ find bit position of way size increment 154 movw r6, #0x7fff 155 and r1, r6, r1, lsr #13 @ extract max number of the index size 156 mov r6, #1 157 movne r4, r4, lsl r5 @ # of ways shifted into bits [31:...] 158 movne r6, r6, lsl r5 @ 1 shifted left by same amount 159loop1: 160 mov r9, r1 @ create working copy of max index 161loop2: 162 mov r5, r9, lsl r2 @ factor set number into r5 163 orr r5, r5, r4 @ factor way number into r5 164 orr r5, r5, r10 @ factor cache level into r5 165 mcr p15, 0, r5, c7, c14, 2 @ clean & invalidate by set/way 166 subs r9, r9, #1 @ decrement the index 167 bge loop2 168 subs r4, r4, r6 @ decrement the way 169 bcs loop1 170skip: 171 add r10, r10, #2 @ increment cache number 172 cmp r3, r10 173#ifdef CONFIG_ARM_ERRATA_814220 174 dsb 175#endif 176 bgt flush_levels 177finished: 178 mov r10, #0 @ switch back to cache level 0 179 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 180 dsb st 181 isb 182 ret lr 183ENDPROC(v7_flush_dcache_all) 184 185/* 186 * v7_flush_cache_all() 187 * 188 * Flush the entire cache system. 189 * The data cache flush is now achieved using atomic clean / invalidates 190 * working outwards from L1 cache. This is done using Set/Way based cache 191 * maintenance instructions. 192 * The instruction cache can still be invalidated back to the point of 193 * unification in a single instruction. 194 * 195 */ 196ENTRY(v7_flush_kern_cache_all) 197 stmfd sp!, {r4-r6, r9-r10, lr} 198 bl v7_flush_dcache_all 199 mov r0, #0 200 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 201 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 202 ldmfd sp!, {r4-r6, r9-r10, lr} 203 ret lr 204ENDPROC(v7_flush_kern_cache_all) 205 206 /* 207 * v7_flush_kern_cache_louis(void) 208 * 209 * Flush the data cache up to Level of Unification Inner Shareable. 210 * Invalidate the I-cache to the point of unification. 211 */ 212ENTRY(v7_flush_kern_cache_louis) 213 stmfd sp!, {r4-r6, r9-r10, lr} 214 bl v7_flush_dcache_louis 215 mov r0, #0 216 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable 217 ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate 218 ldmfd sp!, {r4-r6, r9-r10, lr} 219 ret lr 220ENDPROC(v7_flush_kern_cache_louis) 221 222/* 223 * v7_flush_cache_all() 224 * 225 * Flush all TLB entries in a particular address space 226 * 227 * - mm - mm_struct describing address space 228 */ 229ENTRY(v7_flush_user_cache_all) 230 /*FALLTHROUGH*/ 231 232/* 233 * v7_flush_cache_range(start, end, flags) 234 * 235 * Flush a range of TLB entries in the specified address space. 236 * 237 * - start - start address (may not be aligned) 238 * - end - end address (exclusive, may not be aligned) 239 * - flags - vm_area_struct flags describing address space 240 * 241 * It is assumed that: 242 * - we have a VIPT cache. 243 */ 244ENTRY(v7_flush_user_cache_range) 245 ret lr 246ENDPROC(v7_flush_user_cache_all) 247ENDPROC(v7_flush_user_cache_range) 248 249/* 250 * v7_coherent_kern_range(start,end) 251 * 252 * Ensure that the I and D caches are coherent within specified 253 * region. This is typically used when code has been written to 254 * a memory region, and will be executed. 255 * 256 * - start - virtual start address of region 257 * - end - virtual end address of region 258 * 259 * It is assumed that: 260 * - the Icache does not read data from the write buffer 261 */ 262ENTRY(v7_coherent_kern_range) 263 /* FALLTHROUGH */ 264 265/* 266 * v7_coherent_user_range(start,end) 267 * 268 * Ensure that the I and D caches are coherent within specified 269 * region. This is typically used when code has been written to 270 * a memory region, and will be executed. 271 * 272 * - start - virtual start address of region 273 * - end - virtual end address of region 274 * 275 * It is assumed that: 276 * - the Icache does not read data from the write buffer 277 */ 278ENTRY(v7_coherent_user_range) 279 UNWIND(.fnstart ) 280 dcache_line_size r2, r3 281 sub r3, r2, #1 282 bic r12, r0, r3 283#ifdef CONFIG_ARM_ERRATA_764369 284 ALT_SMP(W(dsb)) 285 ALT_UP(W(nop)) 286#endif 2871: 288 USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification 289 add r12, r12, r2 290 cmp r12, r1 291 blo 1b 292 dsb ishst 293#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND 294 ldr r3, =icache_size 295 ldr r2, [r3, #0] 296#else 297 icache_line_size r2, r3 298#endif 299 sub r3, r2, #1 300 bic r12, r0, r3 3012: 302 USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line 303 add r12, r12, r2 304 cmp r12, r1 305 blo 2b 306 mov r0, #0 307 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable 308 ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB 309 dsb ishst 310 isb 311 ret lr 312 313/* 314 * Fault handling for the cache operation above. If the virtual address in r0 315 * isn't mapped, fail with -EFAULT. 316 */ 3179001: 318#ifdef CONFIG_ARM_ERRATA_775420 319 dsb 320#endif 321 mov r0, #-EFAULT 322 ret lr 323 UNWIND(.fnend ) 324ENDPROC(v7_coherent_kern_range) 325ENDPROC(v7_coherent_user_range) 326 327/* 328 * v7_flush_kern_dcache_area(void *addr, size_t size) 329 * 330 * Ensure that the data held in the page kaddr is written back 331 * to the page in question. 332 * 333 * - addr - kernel address 334 * - size - region size 335 */ 336ENTRY(v7_flush_kern_dcache_area) 337 dcache_line_size r2, r3 338 add r1, r0, r1 339 sub r3, r2, #1 340 bic r0, r0, r3 341#ifdef CONFIG_ARM_ERRATA_764369 342 ALT_SMP(W(dsb)) 343 ALT_UP(W(nop)) 344#endif 3451: 346 mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line 347 add r0, r0, r2 348 cmp r0, r1 349 blo 1b 350 dsb st 351 ret lr 352ENDPROC(v7_flush_kern_dcache_area) 353 354/* 355 * v7_dma_inv_range(start,end) 356 * 357 * Invalidate the data cache within the specified region; we will 358 * be performing a DMA operation in this region and we want to 359 * purge old data in the cache. 360 * 361 * - start - virtual start address of region 362 * - end - virtual end address of region 363 */ 364v7_dma_inv_range: 365 dcache_line_size r2, r3 366 sub r3, r2, #1 367 tst r0, r3 368 bic r0, r0, r3 369#ifdef CONFIG_ARM_ERRATA_764369 370 ALT_SMP(W(dsb)) 371 ALT_UP(W(nop)) 372#endif 373 mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line 374 addne r0, r0, r2 375 376 tst r1, r3 377 bic r1, r1, r3 378 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line 379 cmp r0, r1 3801: 381 mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line 382 addlo r0, r0, r2 383 cmplo r0, r1 384 blo 1b 385 dsb st 386 ret lr 387ENDPROC(v7_dma_inv_range) 388 389/* 390 * v7_dma_clean_range(start,end) 391 * - start - virtual start address of region 392 * - end - virtual end address of region 393 */ 394v7_dma_clean_range: 395 dcache_line_size r2, r3 396 sub r3, r2, #1 397 bic r0, r0, r3 398#ifdef CONFIG_ARM_ERRATA_764369 399 ALT_SMP(W(dsb)) 400 ALT_UP(W(nop)) 401#endif 4021: 403 mcr p15, 0, r0, c7, c10, 1 @ clean D / U line 404 add r0, r0, r2 405 cmp r0, r1 406 blo 1b 407 dsb st 408 ret lr 409ENDPROC(v7_dma_clean_range) 410 411/* 412 * v7_dma_flush_range(start,end) 413 * - start - virtual start address of region 414 * - end - virtual end address of region 415 */ 416ENTRY(v7_dma_flush_range) 417 dcache_line_size r2, r3 418 sub r3, r2, #1 419 bic r0, r0, r3 420#ifdef CONFIG_ARM_ERRATA_764369 421 ALT_SMP(W(dsb)) 422 ALT_UP(W(nop)) 423#endif 4241: 425 mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line 426 add r0, r0, r2 427 cmp r0, r1 428 blo 1b 429 dsb st 430 ret lr 431ENDPROC(v7_dma_flush_range) 432 433/* 434 * dma_map_area(start, size, dir) 435 * - start - kernel virtual start address 436 * - size - size of region 437 * - dir - DMA direction 438 */ 439ENTRY(v7_dma_map_area) 440 add r1, r1, r0 441 teq r2, #DMA_FROM_DEVICE 442 beq v7_dma_inv_range 443 b v7_dma_clean_range 444ENDPROC(v7_dma_map_area) 445 446/* 447 * dma_unmap_area(start, size, dir) 448 * - start - kernel virtual start address 449 * - size - size of region 450 * - dir - DMA direction 451 */ 452ENTRY(v7_dma_unmap_area) 453 add r1, r1, r0 454 teq r2, #DMA_TO_DEVICE 455 bne v7_dma_inv_range 456 ret lr 457ENDPROC(v7_dma_unmap_area) 458 459 __INITDATA 460 461 @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) 462 define_cache_functions v7 463 464 /* The Broadcom Brahma-B15 read-ahead cache requires some modifications 465 * to the v7_cache_fns, we only override the ones we need 466 */ 467#ifndef CONFIG_CACHE_B15_RAC 468 globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all 469#endif 470 globl_equ b15_flush_icache_all, v7_flush_icache_all 471 globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis 472 globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all 473 globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range 474 globl_equ b15_coherent_kern_range, v7_coherent_kern_range 475 globl_equ b15_coherent_user_range, v7_coherent_user_range 476 globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area 477 478 globl_equ b15_dma_map_area, v7_dma_map_area 479 globl_equ b15_dma_unmap_area, v7_dma_unmap_area 480 globl_equ b15_dma_flush_range, v7_dma_flush_range 481 482 define_cache_functions b15 483