1 /* 2 * ARC Cache Management 3 * 4 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) 5 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/module.h> 13 #include <linux/mm.h> 14 #include <linux/sched.h> 15 #include <linux/cache.h> 16 #include <linux/mmu_context.h> 17 #include <linux/syscalls.h> 18 #include <linux/uaccess.h> 19 #include <linux/pagemap.h> 20 #include <asm/cacheflush.h> 21 #include <asm/cachectl.h> 22 #include <asm/setup.h> 23 24 static int l2_line_sz; 25 static int ioc_exists; 26 int slc_enable = 1, ioc_enable = 1; 27 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ 28 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ 29 30 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, 31 unsigned long sz, const int cacheop); 32 33 void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz); 34 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz); 35 void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz); 36 37 char *arc_cache_mumbojumbo(int c, char *buf, int len) 38 { 39 int n = 0; 40 struct cpuinfo_arc_cache *p; 41 42 #define PR_CACHE(p, cfg, str) \ 43 if (!(p)->ver) \ 44 n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ 45 else \ 46 n += scnprintf(buf + n, len - n, \ 47 str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \ 48 (p)->sz_k, (p)->assoc, (p)->line_len, \ 49 (p)->vipt ? "VIPT" : "PIPT", \ 50 (p)->alias ? " aliasing" : "", \ 51 IS_USED_CFG(cfg)); 52 53 PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); 54 PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); 55 56 p = &cpuinfo_arc700[c].slc; 57 if (p->ver) 58 n += scnprintf(buf + n, len - n, 59 "SLC\t\t: %uK, %uB Line%s\n", 60 p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); 61 62 n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", 63 perip_base, 64 IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); 65 66 return buf; 67 } 68 69 /* 70 * Read the Cache Build Confuration Registers, Decode them and save into 71 * the cpuinfo structure for later use. 72 * No Validation done here, simply read/convert the BCRs 73 */ 74 static void read_decode_cache_bcr_arcv2(int cpu) 75 { 76 struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc; 77 struct bcr_generic sbcr; 78 79 struct bcr_slc_cfg { 80 #ifdef CONFIG_CPU_BIG_ENDIAN 81 unsigned int pad:24, way:2, lsz:2, sz:4; 82 #else 83 unsigned int sz:4, lsz:2, way:2, pad:24; 84 #endif 85 } slc_cfg; 86 87 struct bcr_clust_cfg { 88 #ifdef CONFIG_CPU_BIG_ENDIAN 89 unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; 90 #else 91 unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; 92 #endif 93 } cbcr; 94 95 struct bcr_volatile { 96 #ifdef CONFIG_CPU_BIG_ENDIAN 97 unsigned int start:4, limit:4, pad:22, order:1, disable:1; 98 #else 99 unsigned int disable:1, order:1, pad:22, limit:4, start:4; 100 #endif 101 } vol; 102 103 104 READ_BCR(ARC_REG_SLC_BCR, sbcr); 105 if (sbcr.ver) { 106 READ_BCR(ARC_REG_SLC_CFG, slc_cfg); 107 p_slc->ver = sbcr.ver; 108 p_slc->sz_k = 128 << slc_cfg.sz; 109 l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; 110 } 111 112 READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); 113 if (cbcr.c) 114 ioc_exists = 1; 115 else 116 ioc_enable = 0; 117 118 /* HS 2.0 didn't have AUX_VOL */ 119 if (cpuinfo_arc700[cpu].core.family > 0x51) { 120 READ_BCR(AUX_VOL, vol); 121 perip_base = vol.start << 28; 122 /* HS 3.0 has limit and strict-ordering fields */ 123 if (cpuinfo_arc700[cpu].core.family > 0x52) 124 perip_end = (vol.limit << 28) - 1; 125 } 126 } 127 128 void read_decode_cache_bcr(void) 129 { 130 struct cpuinfo_arc_cache *p_ic, *p_dc; 131 unsigned int cpu = smp_processor_id(); 132 struct bcr_cache { 133 #ifdef CONFIG_CPU_BIG_ENDIAN 134 unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; 135 #else 136 unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; 137 #endif 138 } ibcr, dbcr; 139 140 p_ic = &cpuinfo_arc700[cpu].icache; 141 READ_BCR(ARC_REG_IC_BCR, ibcr); 142 143 if (!ibcr.ver) 144 goto dc_chk; 145 146 if (ibcr.ver <= 3) { 147 BUG_ON(ibcr.config != 3); 148 p_ic->assoc = 2; /* Fixed to 2w set assoc */ 149 } else if (ibcr.ver >= 4) { 150 p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */ 151 } 152 153 p_ic->line_len = 8 << ibcr.line_len; 154 p_ic->sz_k = 1 << (ibcr.sz - 1); 155 p_ic->ver = ibcr.ver; 156 p_ic->vipt = 1; 157 p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1; 158 159 dc_chk: 160 p_dc = &cpuinfo_arc700[cpu].dcache; 161 READ_BCR(ARC_REG_DC_BCR, dbcr); 162 163 if (!dbcr.ver) 164 goto slc_chk; 165 166 if (dbcr.ver <= 3) { 167 BUG_ON(dbcr.config != 2); 168 p_dc->assoc = 4; /* Fixed to 4w set assoc */ 169 p_dc->vipt = 1; 170 p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; 171 } else if (dbcr.ver >= 4) { 172 p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */ 173 p_dc->vipt = 0; 174 p_dc->alias = 0; /* PIPT so can't VIPT alias */ 175 } 176 177 p_dc->line_len = 16 << dbcr.line_len; 178 p_dc->sz_k = 1 << (dbcr.sz - 1); 179 p_dc->ver = dbcr.ver; 180 181 slc_chk: 182 if (is_isa_arcv2()) 183 read_decode_cache_bcr_arcv2(cpu); 184 } 185 186 /* 187 * Line Operation on {I,D}-Cache 188 */ 189 190 #define OP_INV 0x1 191 #define OP_FLUSH 0x2 192 #define OP_FLUSH_N_INV 0x3 193 #define OP_INV_IC 0x4 194 195 /* 196 * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) 197 * 198 * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. 199 * The orig Cache Management Module "CDU" only required paddr to invalidate a 200 * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. 201 * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching 202 * the exact same line. 203 * 204 * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, 205 * paddr alone could not be used to correctly index the cache. 206 * 207 * ------------------ 208 * MMU v1/v2 (Fixed Page Size 8k) 209 * ------------------ 210 * The solution was to provide CDU with these additonal vaddr bits. These 211 * would be bits [x:13], x would depend on cache-geometry, 13 comes from 212 * standard page size of 8k. 213 * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits 214 * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the 215 * orig 5 bits of paddr were anyways ignored by CDU line ops, as they 216 * represent the offset within cache-line. The adv of using this "clumsy" 217 * interface for additional info was no new reg was needed in CDU programming 218 * model. 219 * 220 * 17:13 represented the max num of bits passable, actual bits needed were 221 * fewer, based on the num-of-aliases possible. 222 * -for 2 alias possibility, only bit 13 needed (32K cache) 223 * -for 4 alias possibility, bits 14:13 needed (64K cache) 224 * 225 * ------------------ 226 * MMU v3 227 * ------------------ 228 * This ver of MMU supports variable page sizes (1k-16k): although Linux will 229 * only support 8k (default), 16k and 4k. 230 * However from hardware perspective, smaller page sizes aggravate aliasing 231 * meaning more vaddr bits needed to disambiguate the cache-line-op ; 232 * the existing scheme of piggybacking won't work for certain configurations. 233 * Two new registers IC_PTAG and DC_PTAG inttoduced. 234 * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs 235 */ 236 237 static inline 238 void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, 239 unsigned long sz, const int op) 240 { 241 unsigned int aux_cmd; 242 int num_lines; 243 const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; 244 245 if (op == OP_INV_IC) { 246 aux_cmd = ARC_REG_IC_IVIL; 247 } else { 248 /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ 249 aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; 250 } 251 252 /* Ensure we properly floor/ceil the non-line aligned/sized requests 253 * and have @paddr - aligned to cache line and integral @num_lines. 254 * This however can be avoided for page sized since: 255 * -@paddr will be cache-line aligned already (being page aligned) 256 * -@sz will be integral multiple of line size (being page sized). 257 */ 258 if (!full_page) { 259 sz += paddr & ~CACHE_LINE_MASK; 260 paddr &= CACHE_LINE_MASK; 261 vaddr &= CACHE_LINE_MASK; 262 } 263 264 num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); 265 266 /* MMUv2 and before: paddr contains stuffed vaddrs bits */ 267 paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; 268 269 while (num_lines-- > 0) { 270 write_aux_reg(aux_cmd, paddr); 271 paddr += L1_CACHE_BYTES; 272 } 273 } 274 275 /* 276 * For ARC700 MMUv3 I-cache and D-cache flushes 277 * Also reused for HS38 aliasing I-cache configuration 278 */ 279 static inline 280 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, 281 unsigned long sz, const int op) 282 { 283 unsigned int aux_cmd, aux_tag; 284 int num_lines; 285 const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; 286 287 if (op == OP_INV_IC) { 288 aux_cmd = ARC_REG_IC_IVIL; 289 aux_tag = ARC_REG_IC_PTAG; 290 } else { 291 aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; 292 aux_tag = ARC_REG_DC_PTAG; 293 } 294 295 /* Ensure we properly floor/ceil the non-line aligned/sized requests 296 * and have @paddr - aligned to cache line and integral @num_lines. 297 * This however can be avoided for page sized since: 298 * -@paddr will be cache-line aligned already (being page aligned) 299 * -@sz will be integral multiple of line size (being page sized). 300 */ 301 if (!full_page) { 302 sz += paddr & ~CACHE_LINE_MASK; 303 paddr &= CACHE_LINE_MASK; 304 vaddr &= CACHE_LINE_MASK; 305 } 306 num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); 307 308 /* 309 * MMUv3, cache ops require paddr in PTAG reg 310 * if V-P const for loop, PTAG can be written once outside loop 311 */ 312 if (full_page) 313 write_aux_reg(aux_tag, paddr); 314 315 /* 316 * This is technically for MMU v4, using the MMU v3 programming model 317 * Special work for HS38 aliasing I-cache configuration with PAE40 318 * - upper 8 bits of paddr need to be written into PTAG_HI 319 * - (and needs to be written before the lower 32 bits) 320 * Note that PTAG_HI is hoisted outside the line loop 321 */ 322 if (is_pae40_enabled() && op == OP_INV_IC) 323 write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); 324 325 while (num_lines-- > 0) { 326 if (!full_page) { 327 write_aux_reg(aux_tag, paddr); 328 paddr += L1_CACHE_BYTES; 329 } 330 331 write_aux_reg(aux_cmd, vaddr); 332 vaddr += L1_CACHE_BYTES; 333 } 334 } 335 336 /* 337 * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT 338 * Here's how cache ops are implemented 339 * 340 * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) 341 * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) 342 * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG 343 * respectively, similar to MMU v3 programming model, hence 344 * __cache_line_loop_v3() is used) 345 * 346 * If PAE40 is enabled, independent of aliasing considerations, the higher bits 347 * needs to be written into PTAG_HI 348 */ 349 static inline 350 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, 351 unsigned long sz, const int cacheop) 352 { 353 unsigned int aux_cmd; 354 int num_lines; 355 const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; 356 357 if (cacheop == OP_INV_IC) { 358 aux_cmd = ARC_REG_IC_IVIL; 359 } else { 360 /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ 361 aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; 362 } 363 364 /* Ensure we properly floor/ceil the non-line aligned/sized requests 365 * and have @paddr - aligned to cache line and integral @num_lines. 366 * This however can be avoided for page sized since: 367 * -@paddr will be cache-line aligned already (being page aligned) 368 * -@sz will be integral multiple of line size (being page sized). 369 */ 370 if (!full_page_op) { 371 sz += paddr & ~CACHE_LINE_MASK; 372 paddr &= CACHE_LINE_MASK; 373 } 374 375 num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); 376 377 /* 378 * For HS38 PAE40 configuration 379 * - upper 8 bits of paddr need to be written into PTAG_HI 380 * - (and needs to be written before the lower 32 bits) 381 */ 382 if (is_pae40_enabled()) { 383 if (cacheop == OP_INV_IC) 384 /* 385 * Non aliasing I-cache in HS38, 386 * aliasing I-cache handled in __cache_line_loop_v3() 387 */ 388 write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); 389 else 390 write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); 391 } 392 393 while (num_lines-- > 0) { 394 write_aux_reg(aux_cmd, paddr); 395 paddr += L1_CACHE_BYTES; 396 } 397 } 398 399 #if (CONFIG_ARC_MMU_VER < 3) 400 #define __cache_line_loop __cache_line_loop_v2 401 #elif (CONFIG_ARC_MMU_VER == 3) 402 #define __cache_line_loop __cache_line_loop_v3 403 #elif (CONFIG_ARC_MMU_VER > 3) 404 #define __cache_line_loop __cache_line_loop_v4 405 #endif 406 407 #ifdef CONFIG_ARC_HAS_DCACHE 408 409 /*************************************************************** 410 * Machine specific helpers for Entire D-Cache or Per Line ops 411 */ 412 413 static inline void __before_dc_op(const int op) 414 { 415 if (op == OP_FLUSH_N_INV) { 416 /* Dcache provides 2 cmd: FLUSH or INV 417 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE 418 * flush-n-inv is achieved by INV cmd but with IM=1 419 * So toggle INV sub-mode depending on op request and default 420 */ 421 const unsigned int ctl = ARC_REG_DC_CTRL; 422 write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH); 423 } 424 } 425 426 static inline void __after_dc_op(const int op) 427 { 428 if (op & OP_FLUSH) { 429 const unsigned int ctl = ARC_REG_DC_CTRL; 430 unsigned int reg; 431 432 /* flush / flush-n-inv both wait */ 433 while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS) 434 ; 435 436 /* Switch back to default Invalidate mode */ 437 if (op == OP_FLUSH_N_INV) 438 write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH); 439 } 440 } 441 442 /* 443 * Operation on Entire D-Cache 444 * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} 445 * Note that constant propagation ensures all the checks are gone 446 * in generated code 447 */ 448 static inline void __dc_entire_op(const int op) 449 { 450 int aux; 451 452 __before_dc_op(op); 453 454 if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ 455 aux = ARC_REG_DC_IVDC; 456 else 457 aux = ARC_REG_DC_FLSH; 458 459 write_aux_reg(aux, 0x1); 460 461 __after_dc_op(op); 462 } 463 464 /* For kernel mappings cache operation: index is same as paddr */ 465 #define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) 466 467 /* 468 * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) 469 */ 470 static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, 471 unsigned long sz, const int op) 472 { 473 unsigned long flags; 474 475 local_irq_save(flags); 476 477 __before_dc_op(op); 478 479 __cache_line_loop(paddr, vaddr, sz, op); 480 481 __after_dc_op(op); 482 483 local_irq_restore(flags); 484 } 485 486 #else 487 488 #define __dc_entire_op(op) 489 #define __dc_line_op(paddr, vaddr, sz, op) 490 #define __dc_line_op_k(paddr, sz, op) 491 492 #endif /* CONFIG_ARC_HAS_DCACHE */ 493 494 #ifdef CONFIG_ARC_HAS_ICACHE 495 496 static inline void __ic_entire_inv(void) 497 { 498 write_aux_reg(ARC_REG_IC_IVIC, 1); 499 read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ 500 } 501 502 static inline void 503 __ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, 504 unsigned long sz) 505 { 506 unsigned long flags; 507 508 local_irq_save(flags); 509 (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC); 510 local_irq_restore(flags); 511 } 512 513 #ifndef CONFIG_SMP 514 515 #define __ic_line_inv_vaddr(p, v, s) __ic_line_inv_vaddr_local(p, v, s) 516 517 #else 518 519 struct ic_inv_args { 520 phys_addr_t paddr, vaddr; 521 int sz; 522 }; 523 524 static void __ic_line_inv_vaddr_helper(void *info) 525 { 526 struct ic_inv_args *ic_inv = info; 527 528 __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); 529 } 530 531 static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, 532 unsigned long sz) 533 { 534 struct ic_inv_args ic_inv = { 535 .paddr = paddr, 536 .vaddr = vaddr, 537 .sz = sz 538 }; 539 540 on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1); 541 } 542 543 #endif /* CONFIG_SMP */ 544 545 #else /* !CONFIG_ARC_HAS_ICACHE */ 546 547 #define __ic_entire_inv() 548 #define __ic_line_inv_vaddr(pstart, vstart, sz) 549 550 #endif /* CONFIG_ARC_HAS_ICACHE */ 551 552 noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) 553 { 554 #ifdef CONFIG_ISA_ARCV2 555 /* 556 * SLC is shared between all cores and concurrent aux operations from 557 * multiple cores need to be serialized using a spinlock 558 * A concurrent operation can be silently ignored and/or the old/new 559 * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop 560 * below) 561 */ 562 static DEFINE_SPINLOCK(lock); 563 unsigned long flags; 564 unsigned int ctrl; 565 566 spin_lock_irqsave(&lock, flags); 567 568 /* 569 * The Region Flush operation is specified by CTRL.RGN_OP[11..9] 570 * - b'000 (default) is Flush, 571 * - b'001 is Invalidate if CTRL.IM == 0 572 * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 573 */ 574 ctrl = read_aux_reg(ARC_REG_SLC_CTRL); 575 576 /* Don't rely on default value of IM bit */ 577 if (!(op & OP_FLUSH)) /* i.e. OP_INV */ 578 ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ 579 else 580 ctrl |= SLC_CTRL_IM; 581 582 if (op & OP_INV) 583 ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ 584 else 585 ctrl &= ~SLC_CTRL_RGN_OP_INV; 586 587 write_aux_reg(ARC_REG_SLC_CTRL, ctrl); 588 589 /* 590 * Lower bits are ignored, no need to clip 591 * END needs to be setup before START (latter triggers the operation) 592 * END can't be same as START, so add (l2_line_sz - 1) to sz 593 */ 594 write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); 595 write_aux_reg(ARC_REG_SLC_RGN_START, paddr); 596 597 while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); 598 599 spin_unlock_irqrestore(&lock, flags); 600 #endif 601 } 602 603 /*********************************************************** 604 * Exported APIs 605 */ 606 607 /* 608 * Handle cache congruency of kernel and userspace mappings of page when kernel 609 * writes-to/reads-from 610 * 611 * The idea is to defer flushing of kernel mapping after a WRITE, possible if: 612 * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent 613 * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) 614 * -In SMP, if hardware caches are coherent 615 * 616 * There's a corollary case, where kernel READs from a userspace mapped page. 617 * If the U-mapping is not congruent to to K-mapping, former needs flushing. 618 */ 619 void flush_dcache_page(struct page *page) 620 { 621 struct address_space *mapping; 622 623 if (!cache_is_vipt_aliasing()) { 624 clear_bit(PG_dc_clean, &page->flags); 625 return; 626 } 627 628 /* don't handle anon pages here */ 629 mapping = page_mapping(page); 630 if (!mapping) 631 return; 632 633 /* 634 * pagecache page, file not yet mapped to userspace 635 * Make a note that K-mapping is dirty 636 */ 637 if (!mapping_mapped(mapping)) { 638 clear_bit(PG_dc_clean, &page->flags); 639 } else if (page_mapcount(page)) { 640 641 /* kernel reading from page with U-mapping */ 642 phys_addr_t paddr = (unsigned long)page_address(page); 643 unsigned long vaddr = page->index << PAGE_SHIFT; 644 645 if (addr_not_cache_congruent(paddr, vaddr)) 646 __flush_dcache_page(paddr, vaddr); 647 } 648 } 649 EXPORT_SYMBOL(flush_dcache_page); 650 651 /* 652 * DMA ops for systems with L1 cache only 653 * Make memory coherent with L1 cache by flushing/invalidating L1 lines 654 */ 655 static void __dma_cache_wback_inv_l1(phys_addr_t start, unsigned long sz) 656 { 657 __dc_line_op_k(start, sz, OP_FLUSH_N_INV); 658 } 659 660 static void __dma_cache_inv_l1(phys_addr_t start, unsigned long sz) 661 { 662 __dc_line_op_k(start, sz, OP_INV); 663 } 664 665 static void __dma_cache_wback_l1(phys_addr_t start, unsigned long sz) 666 { 667 __dc_line_op_k(start, sz, OP_FLUSH); 668 } 669 670 /* 671 * DMA ops for systems with both L1 and L2 caches, but without IOC 672 * Both L1 and L2 lines need to be explicitly flushed/invalidated 673 */ 674 static void __dma_cache_wback_inv_slc(phys_addr_t start, unsigned long sz) 675 { 676 __dc_line_op_k(start, sz, OP_FLUSH_N_INV); 677 slc_op(start, sz, OP_FLUSH_N_INV); 678 } 679 680 static void __dma_cache_inv_slc(phys_addr_t start, unsigned long sz) 681 { 682 __dc_line_op_k(start, sz, OP_INV); 683 slc_op(start, sz, OP_INV); 684 } 685 686 static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) 687 { 688 __dc_line_op_k(start, sz, OP_FLUSH); 689 slc_op(start, sz, OP_FLUSH); 690 } 691 692 /* 693 * DMA ops for systems with IOC 694 * IOC hardware snoops all DMA traffic keeping the caches consistent with 695 * memory - eliding need for any explicit cache maintenance of DMA buffers 696 */ 697 static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {} 698 static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {} 699 static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {} 700 701 /* 702 * Exported DMA API 703 */ 704 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz) 705 { 706 __dma_cache_wback_inv(start, sz); 707 } 708 EXPORT_SYMBOL(dma_cache_wback_inv); 709 710 void dma_cache_inv(phys_addr_t start, unsigned long sz) 711 { 712 __dma_cache_inv(start, sz); 713 } 714 EXPORT_SYMBOL(dma_cache_inv); 715 716 void dma_cache_wback(phys_addr_t start, unsigned long sz) 717 { 718 __dma_cache_wback(start, sz); 719 } 720 EXPORT_SYMBOL(dma_cache_wback); 721 722 /* 723 * This is API for making I/D Caches consistent when modifying 724 * kernel code (loadable modules, kprobes, kgdb...) 725 * This is called on insmod, with kernel virtual address for CODE of 726 * the module. ARC cache maintenance ops require PHY address thus we 727 * need to convert vmalloc addr to PHY addr 728 */ 729 void flush_icache_range(unsigned long kstart, unsigned long kend) 730 { 731 unsigned int tot_sz; 732 733 WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__); 734 735 /* Shortcut for bigger flush ranges. 736 * Here we don't care if this was kernel virtual or phy addr 737 */ 738 tot_sz = kend - kstart; 739 if (tot_sz > PAGE_SIZE) { 740 flush_cache_all(); 741 return; 742 } 743 744 /* Case: Kernel Phy addr (0x8000_0000 onwards) */ 745 if (likely(kstart > PAGE_OFFSET)) { 746 /* 747 * The 2nd arg despite being paddr will be used to index icache 748 * This is OK since no alternate virtual mappings will exist 749 * given the callers for this case: kprobe/kgdb in built-in 750 * kernel code only. 751 */ 752 __sync_icache_dcache(kstart, kstart, kend - kstart); 753 return; 754 } 755 756 /* 757 * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff) 758 * (1) ARC Cache Maintenance ops only take Phy addr, hence special 759 * handling of kernel vaddr. 760 * 761 * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already), 762 * it still needs to handle a 2 page scenario, where the range 763 * straddles across 2 virtual pages and hence need for loop 764 */ 765 while (tot_sz > 0) { 766 unsigned int off, sz; 767 unsigned long phy, pfn; 768 769 off = kstart % PAGE_SIZE; 770 pfn = vmalloc_to_pfn((void *)kstart); 771 phy = (pfn << PAGE_SHIFT) + off; 772 sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); 773 __sync_icache_dcache(phy, kstart, sz); 774 kstart += sz; 775 tot_sz -= sz; 776 } 777 } 778 EXPORT_SYMBOL(flush_icache_range); 779 780 /* 781 * General purpose helper to make I and D cache lines consistent. 782 * @paddr is phy addr of region 783 * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc) 784 * However in one instance, when called by kprobe (for a breakpt in 785 * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will 786 * use a paddr to index the cache (despite VIPT). This is fine since since a 787 * builtin kernel page will not have any virtual mappings. 788 * kprobe on loadable module will be kernel vaddr. 789 */ 790 void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) 791 { 792 __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); 793 __ic_line_inv_vaddr(paddr, vaddr, len); 794 } 795 796 /* wrapper to compile time eliminate alignment checks in flush loop */ 797 void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) 798 { 799 __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); 800 } 801 802 /* 803 * wrapper to clearout kernel or userspace mappings of a page 804 * For kernel mappings @vaddr == @paddr 805 */ 806 void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) 807 { 808 __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); 809 } 810 811 noinline void flush_cache_all(void) 812 { 813 unsigned long flags; 814 815 local_irq_save(flags); 816 817 __ic_entire_inv(); 818 __dc_entire_op(OP_FLUSH_N_INV); 819 820 local_irq_restore(flags); 821 822 } 823 824 #ifdef CONFIG_ARC_CACHE_VIPT_ALIASING 825 826 void flush_cache_mm(struct mm_struct *mm) 827 { 828 flush_cache_all(); 829 } 830 831 void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, 832 unsigned long pfn) 833 { 834 unsigned int paddr = pfn << PAGE_SHIFT; 835 836 u_vaddr &= PAGE_MASK; 837 838 __flush_dcache_page(paddr, u_vaddr); 839 840 if (vma->vm_flags & VM_EXEC) 841 __inv_icache_page(paddr, u_vaddr); 842 } 843 844 void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 845 unsigned long end) 846 { 847 flush_cache_all(); 848 } 849 850 void flush_anon_page(struct vm_area_struct *vma, struct page *page, 851 unsigned long u_vaddr) 852 { 853 /* TBD: do we really need to clear the kernel mapping */ 854 __flush_dcache_page(page_address(page), u_vaddr); 855 __flush_dcache_page(page_address(page), page_address(page)); 856 857 } 858 859 #endif 860 861 void copy_user_highpage(struct page *to, struct page *from, 862 unsigned long u_vaddr, struct vm_area_struct *vma) 863 { 864 void *kfrom = kmap_atomic(from); 865 void *kto = kmap_atomic(to); 866 int clean_src_k_mappings = 0; 867 868 /* 869 * If SRC page was already mapped in userspace AND it's U-mapping is 870 * not congruent with K-mapping, sync former to physical page so that 871 * K-mapping in memcpy below, sees the right data 872 * 873 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is 874 * equally valid for SRC page as well 875 * 876 * For !VIPT cache, all of this gets compiled out as 877 * addr_not_cache_congruent() is 0 878 */ 879 if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { 880 __flush_dcache_page((unsigned long)kfrom, u_vaddr); 881 clean_src_k_mappings = 1; 882 } 883 884 copy_page(kto, kfrom); 885 886 /* 887 * Mark DST page K-mapping as dirty for a later finalization by 888 * update_mmu_cache(). Although the finalization could have been done 889 * here as well (given that both vaddr/paddr are available). 890 * But update_mmu_cache() already has code to do that for other 891 * non copied user pages (e.g. read faults which wire in pagecache page 892 * directly). 893 */ 894 clear_bit(PG_dc_clean, &to->flags); 895 896 /* 897 * if SRC was already usermapped and non-congruent to kernel mapping 898 * sync the kernel mapping back to physical page 899 */ 900 if (clean_src_k_mappings) { 901 __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom); 902 set_bit(PG_dc_clean, &from->flags); 903 } else { 904 clear_bit(PG_dc_clean, &from->flags); 905 } 906 907 kunmap_atomic(kto); 908 kunmap_atomic(kfrom); 909 } 910 911 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) 912 { 913 clear_page(to); 914 clear_bit(PG_dc_clean, &page->flags); 915 } 916 917 918 /********************************************************************** 919 * Explicit Cache flush request from user space via syscall 920 * Needed for JITs which generate code on the fly 921 */ 922 SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) 923 { 924 /* TBD: optimize this */ 925 flush_cache_all(); 926 return 0; 927 } 928 929 void arc_cache_init(void) 930 { 931 unsigned int __maybe_unused cpu = smp_processor_id(); 932 char str[256]; 933 934 printk(arc_cache_mumbojumbo(0, str, sizeof(str))); 935 936 /* 937 * Only master CPU needs to execute rest of function: 938 * - Assume SMP so all cores will have same cache config so 939 * any geomtry checks will be same for all 940 * - IOC setup / dma callbacks only need to be setup once 941 */ 942 if (cpu) 943 return; 944 945 if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { 946 struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; 947 948 if (!ic->ver) 949 panic("cache support enabled but non-existent cache\n"); 950 951 if (ic->line_len != L1_CACHE_BYTES) 952 panic("ICache line [%d] != kernel Config [%d]", 953 ic->line_len, L1_CACHE_BYTES); 954 955 if (ic->ver != CONFIG_ARC_MMU_VER) 956 panic("Cache ver [%d] doesn't match MMU ver [%d]\n", 957 ic->ver, CONFIG_ARC_MMU_VER); 958 959 /* 960 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG 961 * pair to provide vaddr/paddr respectively, just as in MMU v3 962 */ 963 if (is_isa_arcv2() && ic->alias) 964 _cache_line_loop_ic_fn = __cache_line_loop_v3; 965 else 966 _cache_line_loop_ic_fn = __cache_line_loop; 967 } 968 969 if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { 970 struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; 971 972 if (!dc->ver) 973 panic("cache support enabled but non-existent cache\n"); 974 975 if (dc->line_len != L1_CACHE_BYTES) 976 panic("DCache line [%d] != kernel Config [%d]", 977 dc->line_len, L1_CACHE_BYTES); 978 979 /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ 980 if (is_isa_arcompact()) { 981 int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); 982 983 if (dc->alias && !handled) 984 panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); 985 else if (!dc->alias && handled) 986 panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); 987 } 988 } 989 990 if (is_isa_arcv2() && l2_line_sz && !slc_enable) { 991 992 /* IM set : flush before invalidate */ 993 write_aux_reg(ARC_REG_SLC_CTRL, 994 read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM); 995 996 write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); 997 998 /* Important to wait for flush to complete */ 999 while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); 1000 write_aux_reg(ARC_REG_SLC_CTRL, 1001 read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE); 1002 } 1003 1004 if (is_isa_arcv2() && ioc_enable) { 1005 /* IO coherency base - 0x8z */ 1006 write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); 1007 /* IO coherency aperture size - 512Mb: 0x8z-0xAz */ 1008 write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11); 1009 /* Enable partial writes */ 1010 write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); 1011 /* Enable IO coherency */ 1012 write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); 1013 1014 __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; 1015 __dma_cache_inv = __dma_cache_inv_ioc; 1016 __dma_cache_wback = __dma_cache_wback_ioc; 1017 } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { 1018 __dma_cache_wback_inv = __dma_cache_wback_inv_slc; 1019 __dma_cache_inv = __dma_cache_inv_slc; 1020 __dma_cache_wback = __dma_cache_wback_slc; 1021 } else { 1022 __dma_cache_wback_inv = __dma_cache_wback_inv_l1; 1023 __dma_cache_inv = __dma_cache_inv_l1; 1024 __dma_cache_wback = __dma_cache_wback_l1; 1025 } 1026 } 1027