1/* 2 * PARISC TLB and cache flushing support 3 * Copyright (C) 2000-2001 Hewlett-Packard (John Marvin) 4 * Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org) 5 * Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org) 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2, or (at your option) 10 * any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22/* 23 * NOTE: fdc,fic, and pdc instructions that use base register modification 24 * should only use index and base registers that are not shadowed, 25 * so that the fast path emulation in the non access miss handler 26 * can be used. 27 */ 28 29#ifdef CONFIG_64BIT 30 .level 2.0w 31#else 32 .level 2.0 33#endif 34 35#include <asm/psw.h> 36#include <asm/assembly.h> 37#include <asm/pgtable.h> 38#include <asm/cache.h> 39#include <asm/ldcw.h> 40#include <linux/linkage.h> 41#include <linux/init.h> 42 43 .section .text.hot 44 .align 16 45 46ENTRY_CFI(flush_tlb_all_local) 47 /* 48 * The pitlbe and pdtlbe instructions should only be used to 49 * flush the entire tlb. Also, there needs to be no intervening 50 * tlb operations, e.g. tlb misses, so the operation needs 51 * to happen in real mode with all interruptions disabled. 52 */ 53 54 /* pcxt_ssm_bug - relied upon translation! PA 2.0 Arch. F-4 and F-5 */ 55 rsm PSW_SM_I, %r19 /* save I-bit state */ 56 load32 PA(1f), %r1 57 nop 58 nop 59 nop 60 nop 61 nop 62 63 rsm PSW_SM_Q, %r0 /* prep to load iia queue */ 64 mtctl %r0, %cr17 /* Clear IIASQ tail */ 65 mtctl %r0, %cr17 /* Clear IIASQ head */ 66 mtctl %r1, %cr18 /* IIAOQ head */ 67 ldo 4(%r1), %r1 68 mtctl %r1, %cr18 /* IIAOQ tail */ 69 load32 REAL_MODE_PSW, %r1 70 mtctl %r1, %ipsw 71 rfi 72 nop 73 741: load32 PA(cache_info), %r1 75 76 /* Flush Instruction Tlb */ 77 78 LDREG ITLB_SID_BASE(%r1), %r20 79 LDREG ITLB_SID_STRIDE(%r1), %r21 80 LDREG ITLB_SID_COUNT(%r1), %r22 81 LDREG ITLB_OFF_BASE(%r1), %arg0 82 LDREG ITLB_OFF_STRIDE(%r1), %arg1 83 LDREG ITLB_OFF_COUNT(%r1), %arg2 84 LDREG ITLB_LOOP(%r1), %arg3 85 86 addib,COND(=) -1, %arg3, fitoneloop /* Preadjust and test */ 87 movb,<,n %arg3, %r31, fitdone /* If loop < 0, skip */ 88 copy %arg0, %r28 /* Init base addr */ 89 90fitmanyloop: /* Loop if LOOP >= 2 */ 91 mtsp %r20, %sr1 92 add %r21, %r20, %r20 /* increment space */ 93 copy %arg2, %r29 /* Init middle loop count */ 94 95fitmanymiddle: /* Loop if LOOP >= 2 */ 96 addib,COND(>) -1, %r31, fitmanymiddle /* Adjusted inner loop decr */ 97 pitlbe %r0(%sr1, %r28) 98 pitlbe,m %arg1(%sr1, %r28) /* Last pitlbe and addr adjust */ 99 addib,COND(>) -1, %r29, fitmanymiddle /* Middle loop decr */ 100 copy %arg3, %r31 /* Re-init inner loop count */ 101 102 movb,tr %arg0, %r28, fitmanyloop /* Re-init base addr */ 103 addib,COND(<=),n -1, %r22, fitdone /* Outer loop count decr */ 104 105fitoneloop: /* Loop if LOOP = 1 */ 106 mtsp %r20, %sr1 107 copy %arg0, %r28 /* init base addr */ 108 copy %arg2, %r29 /* init middle loop count */ 109 110fitonemiddle: /* Loop if LOOP = 1 */ 111 addib,COND(>) -1, %r29, fitonemiddle /* Middle loop count decr */ 112 pitlbe,m %arg1(%sr1, %r28) /* pitlbe for one loop */ 113 114 addib,COND(>) -1, %r22, fitoneloop /* Outer loop count decr */ 115 add %r21, %r20, %r20 /* increment space */ 116 117fitdone: 118 119 /* Flush Data Tlb */ 120 121 LDREG DTLB_SID_BASE(%r1), %r20 122 LDREG DTLB_SID_STRIDE(%r1), %r21 123 LDREG DTLB_SID_COUNT(%r1), %r22 124 LDREG DTLB_OFF_BASE(%r1), %arg0 125 LDREG DTLB_OFF_STRIDE(%r1), %arg1 126 LDREG DTLB_OFF_COUNT(%r1), %arg2 127 LDREG DTLB_LOOP(%r1), %arg3 128 129 addib,COND(=) -1, %arg3, fdtoneloop /* Preadjust and test */ 130 movb,<,n %arg3, %r31, fdtdone /* If loop < 0, skip */ 131 copy %arg0, %r28 /* Init base addr */ 132 133fdtmanyloop: /* Loop if LOOP >= 2 */ 134 mtsp %r20, %sr1 135 add %r21, %r20, %r20 /* increment space */ 136 copy %arg2, %r29 /* Init middle loop count */ 137 138fdtmanymiddle: /* Loop if LOOP >= 2 */ 139 addib,COND(>) -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */ 140 pdtlbe %r0(%sr1, %r28) 141 pdtlbe,m %arg1(%sr1, %r28) /* Last pdtlbe and addr adjust */ 142 addib,COND(>) -1, %r29, fdtmanymiddle /* Middle loop decr */ 143 copy %arg3, %r31 /* Re-init inner loop count */ 144 145 movb,tr %arg0, %r28, fdtmanyloop /* Re-init base addr */ 146 addib,COND(<=),n -1, %r22,fdtdone /* Outer loop count decr */ 147 148fdtoneloop: /* Loop if LOOP = 1 */ 149 mtsp %r20, %sr1 150 copy %arg0, %r28 /* init base addr */ 151 copy %arg2, %r29 /* init middle loop count */ 152 153fdtonemiddle: /* Loop if LOOP = 1 */ 154 addib,COND(>) -1, %r29, fdtonemiddle /* Middle loop count decr */ 155 pdtlbe,m %arg1(%sr1, %r28) /* pdtlbe for one loop */ 156 157 addib,COND(>) -1, %r22, fdtoneloop /* Outer loop count decr */ 158 add %r21, %r20, %r20 /* increment space */ 159 160 161fdtdone: 162 /* 163 * Switch back to virtual mode 164 */ 165 /* pcxt_ssm_bug */ 166 rsm PSW_SM_I, %r0 167 load32 2f, %r1 168 nop 169 nop 170 nop 171 nop 172 nop 173 174 rsm PSW_SM_Q, %r0 /* prep to load iia queue */ 175 mtctl %r0, %cr17 /* Clear IIASQ tail */ 176 mtctl %r0, %cr17 /* Clear IIASQ head */ 177 mtctl %r1, %cr18 /* IIAOQ head */ 178 ldo 4(%r1), %r1 179 mtctl %r1, %cr18 /* IIAOQ tail */ 180 load32 KERNEL_PSW, %r1 181 or %r1, %r19, %r1 /* I-bit to state on entry */ 182 mtctl %r1, %ipsw /* restore I-bit (entire PSW) */ 183 rfi 184 nop 185 1862: bv %r0(%r2) 187 nop 188ENDPROC_CFI(flush_tlb_all_local) 189 190 .import cache_info,data 191 192ENTRY_CFI(flush_instruction_cache_local) 193 load32 cache_info, %r1 194 195 /* Flush Instruction Cache */ 196 197 LDREG ICACHE_BASE(%r1), %arg0 198 LDREG ICACHE_STRIDE(%r1), %arg1 199 LDREG ICACHE_COUNT(%r1), %arg2 200 LDREG ICACHE_LOOP(%r1), %arg3 201 rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ 202 mtsp %r0, %sr1 203 addib,COND(=) -1, %arg3, fioneloop /* Preadjust and test */ 204 movb,<,n %arg3, %r31, fisync /* If loop < 0, do sync */ 205 206fimanyloop: /* Loop if LOOP >= 2 */ 207 addib,COND(>) -1, %r31, fimanyloop /* Adjusted inner loop decr */ 208 fice %r0(%sr1, %arg0) 209 fice,m %arg1(%sr1, %arg0) /* Last fice and addr adjust */ 210 movb,tr %arg3, %r31, fimanyloop /* Re-init inner loop count */ 211 addib,COND(<=),n -1, %arg2, fisync /* Outer loop decr */ 212 213fioneloop: /* Loop if LOOP = 1 */ 214 /* Some implementations may flush with a single fice instruction */ 215 cmpib,COND(>>=),n 15, %arg2, fioneloop2 216 217fioneloop1: 218 fice,m %arg1(%sr1, %arg0) 219 fice,m %arg1(%sr1, %arg0) 220 fice,m %arg1(%sr1, %arg0) 221 fice,m %arg1(%sr1, %arg0) 222 fice,m %arg1(%sr1, %arg0) 223 fice,m %arg1(%sr1, %arg0) 224 fice,m %arg1(%sr1, %arg0) 225 fice,m %arg1(%sr1, %arg0) 226 fice,m %arg1(%sr1, %arg0) 227 fice,m %arg1(%sr1, %arg0) 228 fice,m %arg1(%sr1, %arg0) 229 fice,m %arg1(%sr1, %arg0) 230 fice,m %arg1(%sr1, %arg0) 231 fice,m %arg1(%sr1, %arg0) 232 fice,m %arg1(%sr1, %arg0) 233 addib,COND(>) -16, %arg2, fioneloop1 234 fice,m %arg1(%sr1, %arg0) 235 236 /* Check if done */ 237 cmpb,COND(=),n %arg2, %r0, fisync /* Predict branch taken */ 238 239fioneloop2: 240 addib,COND(>) -1, %arg2, fioneloop2 /* Outer loop count decr */ 241 fice,m %arg1(%sr1, %arg0) /* Fice for one loop */ 242 243fisync: 244 sync 245 mtsm %r22 /* restore I-bit */ 246 bv %r0(%r2) 247 nop 248ENDPROC_CFI(flush_instruction_cache_local) 249 250 251 .import cache_info, data 252ENTRY_CFI(flush_data_cache_local) 253 load32 cache_info, %r1 254 255 /* Flush Data Cache */ 256 257 LDREG DCACHE_BASE(%r1), %arg0 258 LDREG DCACHE_STRIDE(%r1), %arg1 259 LDREG DCACHE_COUNT(%r1), %arg2 260 LDREG DCACHE_LOOP(%r1), %arg3 261 rsm PSW_SM_I, %r22 /* No mmgt ops during loop*/ 262 mtsp %r0, %sr1 263 addib,COND(=) -1, %arg3, fdoneloop /* Preadjust and test */ 264 movb,<,n %arg3, %r31, fdsync /* If loop < 0, do sync */ 265 266fdmanyloop: /* Loop if LOOP >= 2 */ 267 addib,COND(>) -1, %r31, fdmanyloop /* Adjusted inner loop decr */ 268 fdce %r0(%sr1, %arg0) 269 fdce,m %arg1(%sr1, %arg0) /* Last fdce and addr adjust */ 270 movb,tr %arg3, %r31, fdmanyloop /* Re-init inner loop count */ 271 addib,COND(<=),n -1, %arg2, fdsync /* Outer loop decr */ 272 273fdoneloop: /* Loop if LOOP = 1 */ 274 /* Some implementations may flush with a single fdce instruction */ 275 cmpib,COND(>>=),n 15, %arg2, fdoneloop2 276 277fdoneloop1: 278 fdce,m %arg1(%sr1, %arg0) 279 fdce,m %arg1(%sr1, %arg0) 280 fdce,m %arg1(%sr1, %arg0) 281 fdce,m %arg1(%sr1, %arg0) 282 fdce,m %arg1(%sr1, %arg0) 283 fdce,m %arg1(%sr1, %arg0) 284 fdce,m %arg1(%sr1, %arg0) 285 fdce,m %arg1(%sr1, %arg0) 286 fdce,m %arg1(%sr1, %arg0) 287 fdce,m %arg1(%sr1, %arg0) 288 fdce,m %arg1(%sr1, %arg0) 289 fdce,m %arg1(%sr1, %arg0) 290 fdce,m %arg1(%sr1, %arg0) 291 fdce,m %arg1(%sr1, %arg0) 292 fdce,m %arg1(%sr1, %arg0) 293 addib,COND(>) -16, %arg2, fdoneloop1 294 fdce,m %arg1(%sr1, %arg0) 295 296 /* Check if done */ 297 cmpb,COND(=),n %arg2, %r0, fdsync /* Predict branch taken */ 298 299fdoneloop2: 300 addib,COND(>) -1, %arg2, fdoneloop2 /* Outer loop count decr */ 301 fdce,m %arg1(%sr1, %arg0) /* Fdce for one loop */ 302 303fdsync: 304 syncdma 305 sync 306 mtsm %r22 /* restore I-bit */ 307 bv %r0(%r2) 308 nop 309ENDPROC_CFI(flush_data_cache_local) 310 311/* Macros to serialize TLB purge operations on SMP. */ 312 313 .macro tlb_lock la,flags,tmp 314#ifdef CONFIG_SMP 315#if __PA_LDCW_ALIGNMENT > 4 316 load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la 317 depi 0,31,__PA_LDCW_ALIGN_ORDER, \la 318#else 319 load32 pa_tlb_lock, \la 320#endif 321 rsm PSW_SM_I,\flags 3221: LDCW 0(\la),\tmp 323 cmpib,<>,n 0,\tmp,3f 3242: ldw 0(\la),\tmp 325 cmpb,<> %r0,\tmp,1b 326 nop 327 b,n 2b 3283: 329#endif 330 .endm 331 332 .macro tlb_unlock la,flags,tmp 333#ifdef CONFIG_SMP 334 ldi 1,\tmp 335 sync 336 stw \tmp,0(\la) 337 mtsm \flags 338#endif 339 .endm 340 341/* Clear page using kernel mapping. */ 342 343ENTRY_CFI(clear_page_asm) 344#ifdef CONFIG_64BIT 345 346 /* Unroll the loop. */ 347 ldi (PAGE_SIZE / 128), %r1 348 3491: 350 std %r0, 0(%r26) 351 std %r0, 8(%r26) 352 std %r0, 16(%r26) 353 std %r0, 24(%r26) 354 std %r0, 32(%r26) 355 std %r0, 40(%r26) 356 std %r0, 48(%r26) 357 std %r0, 56(%r26) 358 std %r0, 64(%r26) 359 std %r0, 72(%r26) 360 std %r0, 80(%r26) 361 std %r0, 88(%r26) 362 std %r0, 96(%r26) 363 std %r0, 104(%r26) 364 std %r0, 112(%r26) 365 std %r0, 120(%r26) 366 367 /* Note reverse branch hint for addib is taken. */ 368 addib,COND(>),n -1, %r1, 1b 369 ldo 128(%r26), %r26 370 371#else 372 373 /* 374 * Note that until (if) we start saving the full 64-bit register 375 * values on interrupt, we can't use std on a 32 bit kernel. 376 */ 377 ldi (PAGE_SIZE / 64), %r1 378 3791: 380 stw %r0, 0(%r26) 381 stw %r0, 4(%r26) 382 stw %r0, 8(%r26) 383 stw %r0, 12(%r26) 384 stw %r0, 16(%r26) 385 stw %r0, 20(%r26) 386 stw %r0, 24(%r26) 387 stw %r0, 28(%r26) 388 stw %r0, 32(%r26) 389 stw %r0, 36(%r26) 390 stw %r0, 40(%r26) 391 stw %r0, 44(%r26) 392 stw %r0, 48(%r26) 393 stw %r0, 52(%r26) 394 stw %r0, 56(%r26) 395 stw %r0, 60(%r26) 396 397 addib,COND(>),n -1, %r1, 1b 398 ldo 64(%r26), %r26 399#endif 400 bv %r0(%r2) 401 nop 402ENDPROC_CFI(clear_page_asm) 403 404/* Copy page using kernel mapping. */ 405 406ENTRY_CFI(copy_page_asm) 407#ifdef CONFIG_64BIT 408 /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. 409 * Unroll the loop by hand and arrange insn appropriately. 410 * Prefetch doesn't improve performance on rp3440. 411 * GCC probably can do this just as well... 412 */ 413 414 ldi (PAGE_SIZE / 128), %r1 415 4161: ldd 0(%r25), %r19 417 ldd 8(%r25), %r20 418 419 ldd 16(%r25), %r21 420 ldd 24(%r25), %r22 421 std %r19, 0(%r26) 422 std %r20, 8(%r26) 423 424 ldd 32(%r25), %r19 425 ldd 40(%r25), %r20 426 std %r21, 16(%r26) 427 std %r22, 24(%r26) 428 429 ldd 48(%r25), %r21 430 ldd 56(%r25), %r22 431 std %r19, 32(%r26) 432 std %r20, 40(%r26) 433 434 ldd 64(%r25), %r19 435 ldd 72(%r25), %r20 436 std %r21, 48(%r26) 437 std %r22, 56(%r26) 438 439 ldd 80(%r25), %r21 440 ldd 88(%r25), %r22 441 std %r19, 64(%r26) 442 std %r20, 72(%r26) 443 444 ldd 96(%r25), %r19 445 ldd 104(%r25), %r20 446 std %r21, 80(%r26) 447 std %r22, 88(%r26) 448 449 ldd 112(%r25), %r21 450 ldd 120(%r25), %r22 451 ldo 128(%r25), %r25 452 std %r19, 96(%r26) 453 std %r20, 104(%r26) 454 455 std %r21, 112(%r26) 456 std %r22, 120(%r26) 457 458 /* Note reverse branch hint for addib is taken. */ 459 addib,COND(>),n -1, %r1, 1b 460 ldo 128(%r26), %r26 461 462#else 463 464 /* 465 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw 466 * bundles (very restricted rules for bundling). 467 * Note that until (if) we start saving 468 * the full 64 bit register values on interrupt, we can't 469 * use ldd/std on a 32 bit kernel. 470 */ 471 ldw 0(%r25), %r19 472 ldi (PAGE_SIZE / 64), %r1 473 4741: 475 ldw 4(%r25), %r20 476 ldw 8(%r25), %r21 477 ldw 12(%r25), %r22 478 stw %r19, 0(%r26) 479 stw %r20, 4(%r26) 480 stw %r21, 8(%r26) 481 stw %r22, 12(%r26) 482 ldw 16(%r25), %r19 483 ldw 20(%r25), %r20 484 ldw 24(%r25), %r21 485 ldw 28(%r25), %r22 486 stw %r19, 16(%r26) 487 stw %r20, 20(%r26) 488 stw %r21, 24(%r26) 489 stw %r22, 28(%r26) 490 ldw 32(%r25), %r19 491 ldw 36(%r25), %r20 492 ldw 40(%r25), %r21 493 ldw 44(%r25), %r22 494 stw %r19, 32(%r26) 495 stw %r20, 36(%r26) 496 stw %r21, 40(%r26) 497 stw %r22, 44(%r26) 498 ldw 48(%r25), %r19 499 ldw 52(%r25), %r20 500 ldw 56(%r25), %r21 501 ldw 60(%r25), %r22 502 stw %r19, 48(%r26) 503 stw %r20, 52(%r26) 504 ldo 64(%r25), %r25 505 stw %r21, 56(%r26) 506 stw %r22, 60(%r26) 507 ldo 64(%r26), %r26 508 addib,COND(>),n -1, %r1, 1b 509 ldw 0(%r25), %r19 510#endif 511 bv %r0(%r2) 512 nop 513ENDPROC_CFI(copy_page_asm) 514 515/* 516 * NOTE: Code in clear_user_page has a hard coded dependency on the 517 * maximum alias boundary being 4 Mb. We've been assured by the 518 * parisc chip designers that there will not ever be a parisc 519 * chip with a larger alias boundary (Never say never :-) ). 520 * 521 * Subtle: the dtlb miss handlers support the temp alias region by 522 * "knowing" that if a dtlb miss happens within the temp alias 523 * region it must have occurred while in clear_user_page. Since 524 * this routine makes use of processor local translations, we 525 * don't want to insert them into the kernel page table. Instead, 526 * we load up some general registers (they need to be registers 527 * which aren't shadowed) with the physical page numbers (preshifted 528 * for tlb insertion) needed to insert the translations. When we 529 * miss on the translation, the dtlb miss handler inserts the 530 * translation into the tlb using these values: 531 * 532 * %r26 physical page (shifted for tlb insert) of "to" translation 533 * %r23 physical page (shifted for tlb insert) of "from" translation 534 */ 535 536 /* Drop prot bits and convert to page addr for iitlbt and idtlbt */ 537 #define PAGE_ADD_SHIFT (PAGE_SHIFT-12) 538 .macro convert_phys_for_tlb_insert20 phys 539 extrd,u \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys 540#if _PAGE_SIZE_ENCODING_DEFAULT 541 depdi _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys 542#endif 543 .endm 544 545 /* 546 * copy_user_page_asm() performs a page copy using mappings 547 * equivalent to the user page mappings. It can be used to 548 * implement copy_user_page() but unfortunately both the `from' 549 * and `to' pages need to be flushed through mappings equivalent 550 * to the user mappings after the copy because the kernel accesses 551 * the `from' page through the kmap kernel mapping and the `to' 552 * page needs to be flushed since code can be copied. As a 553 * result, this implementation is less efficient than the simpler 554 * copy using the kernel mapping. It only needs the `from' page 555 * to flushed via the user mapping. The kunmap routines handle 556 * the flushes needed for the kernel mapping. 557 * 558 * I'm still keeping this around because it may be possible to 559 * use it if more information is passed into copy_user_page(). 560 * Have to do some measurements to see if it is worthwhile to 561 * lobby for such a change. 562 * 563 */ 564 565ENTRY_CFI(copy_user_page_asm) 566 /* Convert virtual `to' and `from' addresses to physical addresses. 567 Move `from' physical address to non shadowed register. */ 568 ldil L%(__PAGE_OFFSET), %r1 569 sub %r26, %r1, %r26 570 sub %r25, %r1, %r23 571 572 ldil L%(TMPALIAS_MAP_START), %r28 573#ifdef CONFIG_64BIT 574#if (TMPALIAS_MAP_START >= 0x80000000) 575 depdi 0, 31,32, %r28 /* clear any sign extension */ 576#endif 577 convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ 578 convert_phys_for_tlb_insert20 %r23 /* convert phys addr to tlb insert format */ 579 depd %r24,63,22, %r28 /* Form aliased virtual address 'to' */ 580 depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ 581 copy %r28, %r29 582 depdi 1, 41,1, %r29 /* Form aliased virtual address 'from' */ 583#else 584 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ 585 extrw,u %r23, 24,25, %r23 /* convert phys addr to tlb insert format */ 586 depw %r24, 31,22, %r28 /* Form aliased virtual address 'to' */ 587 depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ 588 copy %r28, %r29 589 depwi 1, 9,1, %r29 /* Form aliased virtual address 'from' */ 590#endif 591 592 /* Purge any old translations */ 593 594#ifdef CONFIG_PA20 595 pdtlb,l %r0(%r28) 596 pdtlb,l %r0(%r29) 597#else 598 tlb_lock %r20,%r21,%r22 599 pdtlb %r0(%r28) 600 pdtlb %r0(%r29) 601 tlb_unlock %r20,%r21,%r22 602#endif 603 604#ifdef CONFIG_64BIT 605 /* PA8x00 CPUs can consume 2 loads or 1 store per cycle. 606 * Unroll the loop by hand and arrange insn appropriately. 607 * GCC probably can do this just as well. 608 */ 609 610 ldd 0(%r29), %r19 611 ldi (PAGE_SIZE / 128), %r1 612 6131: ldd 8(%r29), %r20 614 615 ldd 16(%r29), %r21 616 ldd 24(%r29), %r22 617 std %r19, 0(%r28) 618 std %r20, 8(%r28) 619 620 ldd 32(%r29), %r19 621 ldd 40(%r29), %r20 622 std %r21, 16(%r28) 623 std %r22, 24(%r28) 624 625 ldd 48(%r29), %r21 626 ldd 56(%r29), %r22 627 std %r19, 32(%r28) 628 std %r20, 40(%r28) 629 630 ldd 64(%r29), %r19 631 ldd 72(%r29), %r20 632 std %r21, 48(%r28) 633 std %r22, 56(%r28) 634 635 ldd 80(%r29), %r21 636 ldd 88(%r29), %r22 637 std %r19, 64(%r28) 638 std %r20, 72(%r28) 639 640 ldd 96(%r29), %r19 641 ldd 104(%r29), %r20 642 std %r21, 80(%r28) 643 std %r22, 88(%r28) 644 645 ldd 112(%r29), %r21 646 ldd 120(%r29), %r22 647 std %r19, 96(%r28) 648 std %r20, 104(%r28) 649 650 ldo 128(%r29), %r29 651 std %r21, 112(%r28) 652 std %r22, 120(%r28) 653 ldo 128(%r28), %r28 654 655 /* conditional branches nullify on forward taken branch, and on 656 * non-taken backward branch. Note that .+4 is a backwards branch. 657 * The ldd should only get executed if the branch is taken. 658 */ 659 addib,COND(>),n -1, %r1, 1b /* bundle 10 */ 660 ldd 0(%r29), %r19 /* start next loads */ 661 662#else 663 ldi (PAGE_SIZE / 64), %r1 664 665 /* 666 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw 667 * bundles (very restricted rules for bundling). It probably 668 * does OK on PCXU and better, but we could do better with 669 * ldd/std instructions. Note that until (if) we start saving 670 * the full 64 bit register values on interrupt, we can't 671 * use ldd/std on a 32 bit kernel. 672 */ 673 6741: ldw 0(%r29), %r19 675 ldw 4(%r29), %r20 676 ldw 8(%r29), %r21 677 ldw 12(%r29), %r22 678 stw %r19, 0(%r28) 679 stw %r20, 4(%r28) 680 stw %r21, 8(%r28) 681 stw %r22, 12(%r28) 682 ldw 16(%r29), %r19 683 ldw 20(%r29), %r20 684 ldw 24(%r29), %r21 685 ldw 28(%r29), %r22 686 stw %r19, 16(%r28) 687 stw %r20, 20(%r28) 688 stw %r21, 24(%r28) 689 stw %r22, 28(%r28) 690 ldw 32(%r29), %r19 691 ldw 36(%r29), %r20 692 ldw 40(%r29), %r21 693 ldw 44(%r29), %r22 694 stw %r19, 32(%r28) 695 stw %r20, 36(%r28) 696 stw %r21, 40(%r28) 697 stw %r22, 44(%r28) 698 ldw 48(%r29), %r19 699 ldw 52(%r29), %r20 700 ldw 56(%r29), %r21 701 ldw 60(%r29), %r22 702 stw %r19, 48(%r28) 703 stw %r20, 52(%r28) 704 stw %r21, 56(%r28) 705 stw %r22, 60(%r28) 706 ldo 64(%r28), %r28 707 708 addib,COND(>) -1, %r1,1b 709 ldo 64(%r29), %r29 710#endif 711 712 bv %r0(%r2) 713 nop 714ENDPROC_CFI(copy_user_page_asm) 715 716ENTRY_CFI(clear_user_page_asm) 717 tophys_r1 %r26 718 719 ldil L%(TMPALIAS_MAP_START), %r28 720#ifdef CONFIG_64BIT 721#if (TMPALIAS_MAP_START >= 0x80000000) 722 depdi 0, 31,32, %r28 /* clear any sign extension */ 723#endif 724 convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ 725 depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ 726 depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ 727#else 728 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ 729 depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ 730 depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ 731#endif 732 733 /* Purge any old translation */ 734 735#ifdef CONFIG_PA20 736 pdtlb,l %r0(%r28) 737#else 738 tlb_lock %r20,%r21,%r22 739 pdtlb %r0(%r28) 740 tlb_unlock %r20,%r21,%r22 741#endif 742 743#ifdef CONFIG_64BIT 744 ldi (PAGE_SIZE / 128), %r1 745 746 /* PREFETCH (Write) has not (yet) been proven to help here */ 747 /* #define PREFETCHW_OP ldd 256(%0), %r0 */ 748 7491: std %r0, 0(%r28) 750 std %r0, 8(%r28) 751 std %r0, 16(%r28) 752 std %r0, 24(%r28) 753 std %r0, 32(%r28) 754 std %r0, 40(%r28) 755 std %r0, 48(%r28) 756 std %r0, 56(%r28) 757 std %r0, 64(%r28) 758 std %r0, 72(%r28) 759 std %r0, 80(%r28) 760 std %r0, 88(%r28) 761 std %r0, 96(%r28) 762 std %r0, 104(%r28) 763 std %r0, 112(%r28) 764 std %r0, 120(%r28) 765 addib,COND(>) -1, %r1, 1b 766 ldo 128(%r28), %r28 767 768#else /* ! CONFIG_64BIT */ 769 ldi (PAGE_SIZE / 64), %r1 770 7711: stw %r0, 0(%r28) 772 stw %r0, 4(%r28) 773 stw %r0, 8(%r28) 774 stw %r0, 12(%r28) 775 stw %r0, 16(%r28) 776 stw %r0, 20(%r28) 777 stw %r0, 24(%r28) 778 stw %r0, 28(%r28) 779 stw %r0, 32(%r28) 780 stw %r0, 36(%r28) 781 stw %r0, 40(%r28) 782 stw %r0, 44(%r28) 783 stw %r0, 48(%r28) 784 stw %r0, 52(%r28) 785 stw %r0, 56(%r28) 786 stw %r0, 60(%r28) 787 addib,COND(>) -1, %r1, 1b 788 ldo 64(%r28), %r28 789#endif /* CONFIG_64BIT */ 790 791 bv %r0(%r2) 792 nop 793ENDPROC_CFI(clear_user_page_asm) 794 795ENTRY_CFI(flush_dcache_page_asm) 796 ldil L%(TMPALIAS_MAP_START), %r28 797#ifdef CONFIG_64BIT 798#if (TMPALIAS_MAP_START >= 0x80000000) 799 depdi 0, 31,32, %r28 /* clear any sign extension */ 800#endif 801 convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ 802 depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ 803 depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ 804#else 805 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ 806 depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ 807 depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ 808#endif 809 810 /* Purge any old translation */ 811 812#ifdef CONFIG_PA20 813 pdtlb,l %r0(%r28) 814#else 815 tlb_lock %r20,%r21,%r22 816 pdtlb %r0(%r28) 817 tlb_unlock %r20,%r21,%r22 818#endif 819 820 ldil L%dcache_stride, %r1 821 ldw R%dcache_stride(%r1), r31 822 823#ifdef CONFIG_64BIT 824 depdi,z 1, 63-PAGE_SHIFT,1, %r25 825#else 826 depwi,z 1, 31-PAGE_SHIFT,1, %r25 827#endif 828 add %r28, %r25, %r25 829 sub %r25, r31, %r25 830 831 8321: fdc,m r31(%r28) 833 fdc,m r31(%r28) 834 fdc,m r31(%r28) 835 fdc,m r31(%r28) 836 fdc,m r31(%r28) 837 fdc,m r31(%r28) 838 fdc,m r31(%r28) 839 fdc,m r31(%r28) 840 fdc,m r31(%r28) 841 fdc,m r31(%r28) 842 fdc,m r31(%r28) 843 fdc,m r31(%r28) 844 fdc,m r31(%r28) 845 fdc,m r31(%r28) 846 fdc,m r31(%r28) 847 cmpb,COND(<<) %r28, %r25,1b 848 fdc,m r31(%r28) 849 850 sync 851 bv %r0(%r2) 852 nop 853ENDPROC_CFI(flush_dcache_page_asm) 854 855ENTRY_CFI(flush_icache_page_asm) 856 ldil L%(TMPALIAS_MAP_START), %r28 857#ifdef CONFIG_64BIT 858#if (TMPALIAS_MAP_START >= 0x80000000) 859 depdi 0, 31,32, %r28 /* clear any sign extension */ 860#endif 861 convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ 862 depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ 863 depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ 864#else 865 extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ 866 depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ 867 depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ 868#endif 869 870 /* Purge any old translation. Note that the FIC instruction 871 * may use either the instruction or data TLB. Given that we 872 * have a flat address space, it's not clear which TLB will be 873 * used. So, we purge both entries. */ 874 875#ifdef CONFIG_PA20 876 pdtlb,l %r0(%r28) 877 pitlb,l %r0(%sr4,%r28) 878#else 879 tlb_lock %r20,%r21,%r22 880 pdtlb %r0(%r28) 881 pitlb %r0(%sr4,%r28) 882 tlb_unlock %r20,%r21,%r22 883#endif 884 885 ldil L%icache_stride, %r1 886 ldw R%icache_stride(%r1), %r31 887 888#ifdef CONFIG_64BIT 889 depdi,z 1, 63-PAGE_SHIFT,1, %r25 890#else 891 depwi,z 1, 31-PAGE_SHIFT,1, %r25 892#endif 893 add %r28, %r25, %r25 894 sub %r25, %r31, %r25 895 896 897 /* fic only has the type 26 form on PA1.1, requiring an 898 * explicit space specification, so use %sr4 */ 8991: fic,m %r31(%sr4,%r28) 900 fic,m %r31(%sr4,%r28) 901 fic,m %r31(%sr4,%r28) 902 fic,m %r31(%sr4,%r28) 903 fic,m %r31(%sr4,%r28) 904 fic,m %r31(%sr4,%r28) 905 fic,m %r31(%sr4,%r28) 906 fic,m %r31(%sr4,%r28) 907 fic,m %r31(%sr4,%r28) 908 fic,m %r31(%sr4,%r28) 909 fic,m %r31(%sr4,%r28) 910 fic,m %r31(%sr4,%r28) 911 fic,m %r31(%sr4,%r28) 912 fic,m %r31(%sr4,%r28) 913 fic,m %r31(%sr4,%r28) 914 cmpb,COND(<<) %r28, %r25,1b 915 fic,m %r31(%sr4,%r28) 916 917 sync 918 bv %r0(%r2) 919 nop 920ENDPROC_CFI(flush_icache_page_asm) 921 922ENTRY_CFI(flush_kernel_dcache_page_asm) 923 ldil L%dcache_stride, %r1 924 ldw R%dcache_stride(%r1), %r23 925 926#ifdef CONFIG_64BIT 927 depdi,z 1, 63-PAGE_SHIFT,1, %r25 928#else 929 depwi,z 1, 31-PAGE_SHIFT,1, %r25 930#endif 931 add %r26, %r25, %r25 932 sub %r25, %r23, %r25 933 934 9351: fdc,m %r23(%r26) 936 fdc,m %r23(%r26) 937 fdc,m %r23(%r26) 938 fdc,m %r23(%r26) 939 fdc,m %r23(%r26) 940 fdc,m %r23(%r26) 941 fdc,m %r23(%r26) 942 fdc,m %r23(%r26) 943 fdc,m %r23(%r26) 944 fdc,m %r23(%r26) 945 fdc,m %r23(%r26) 946 fdc,m %r23(%r26) 947 fdc,m %r23(%r26) 948 fdc,m %r23(%r26) 949 fdc,m %r23(%r26) 950 cmpb,COND(<<) %r26, %r25,1b 951 fdc,m %r23(%r26) 952 953 sync 954 bv %r0(%r2) 955 nop 956ENDPROC_CFI(flush_kernel_dcache_page_asm) 957 958ENTRY_CFI(purge_kernel_dcache_page_asm) 959 ldil L%dcache_stride, %r1 960 ldw R%dcache_stride(%r1), %r23 961 962#ifdef CONFIG_64BIT 963 depdi,z 1, 63-PAGE_SHIFT,1, %r25 964#else 965 depwi,z 1, 31-PAGE_SHIFT,1, %r25 966#endif 967 add %r26, %r25, %r25 968 sub %r25, %r23, %r25 969 9701: pdc,m %r23(%r26) 971 pdc,m %r23(%r26) 972 pdc,m %r23(%r26) 973 pdc,m %r23(%r26) 974 pdc,m %r23(%r26) 975 pdc,m %r23(%r26) 976 pdc,m %r23(%r26) 977 pdc,m %r23(%r26) 978 pdc,m %r23(%r26) 979 pdc,m %r23(%r26) 980 pdc,m %r23(%r26) 981 pdc,m %r23(%r26) 982 pdc,m %r23(%r26) 983 pdc,m %r23(%r26) 984 pdc,m %r23(%r26) 985 cmpb,COND(<<) %r26, %r25, 1b 986 pdc,m %r23(%r26) 987 988 sync 989 bv %r0(%r2) 990 nop 991ENDPROC_CFI(purge_kernel_dcache_page_asm) 992 993ENTRY_CFI(flush_user_dcache_range_asm) 994 ldil L%dcache_stride, %r1 995 ldw R%dcache_stride(%r1), %r23 996 ldo -1(%r23), %r21 997 ANDCM %r26, %r21, %r26 998 9991: cmpb,COND(<<),n %r26, %r25, 1b 1000 fdc,m %r23(%sr3, %r26) 1001 1002 sync 1003 bv %r0(%r2) 1004 nop 1005ENDPROC_CFI(flush_user_dcache_range_asm) 1006 1007ENTRY_CFI(flush_kernel_dcache_range_asm) 1008 ldil L%dcache_stride, %r1 1009 ldw R%dcache_stride(%r1), %r23 1010 ldo -1(%r23), %r21 1011 ANDCM %r26, %r21, %r26 1012 10131: cmpb,COND(<<),n %r26, %r25,1b 1014 fdc,m %r23(%r26) 1015 1016 sync 1017 syncdma 1018 bv %r0(%r2) 1019 nop 1020ENDPROC_CFI(flush_kernel_dcache_range_asm) 1021 1022ENTRY_CFI(purge_kernel_dcache_range_asm) 1023 ldil L%dcache_stride, %r1 1024 ldw R%dcache_stride(%r1), %r23 1025 ldo -1(%r23), %r21 1026 ANDCM %r26, %r21, %r26 1027 10281: cmpb,COND(<<),n %r26, %r25,1b 1029 pdc,m %r23(%r26) 1030 1031 sync 1032 syncdma 1033 bv %r0(%r2) 1034 nop 1035ENDPROC_CFI(purge_kernel_dcache_range_asm) 1036 1037ENTRY_CFI(flush_user_icache_range_asm) 1038 ldil L%icache_stride, %r1 1039 ldw R%icache_stride(%r1), %r23 1040 ldo -1(%r23), %r21 1041 ANDCM %r26, %r21, %r26 1042 10431: cmpb,COND(<<),n %r26, %r25,1b 1044 fic,m %r23(%sr3, %r26) 1045 1046 sync 1047 bv %r0(%r2) 1048 nop 1049ENDPROC_CFI(flush_user_icache_range_asm) 1050 1051ENTRY_CFI(flush_kernel_icache_page) 1052 ldil L%icache_stride, %r1 1053 ldw R%icache_stride(%r1), %r23 1054 1055#ifdef CONFIG_64BIT 1056 depdi,z 1, 63-PAGE_SHIFT,1, %r25 1057#else 1058 depwi,z 1, 31-PAGE_SHIFT,1, %r25 1059#endif 1060 add %r26, %r25, %r25 1061 sub %r25, %r23, %r25 1062 1063 10641: fic,m %r23(%sr4, %r26) 1065 fic,m %r23(%sr4, %r26) 1066 fic,m %r23(%sr4, %r26) 1067 fic,m %r23(%sr4, %r26) 1068 fic,m %r23(%sr4, %r26) 1069 fic,m %r23(%sr4, %r26) 1070 fic,m %r23(%sr4, %r26) 1071 fic,m %r23(%sr4, %r26) 1072 fic,m %r23(%sr4, %r26) 1073 fic,m %r23(%sr4, %r26) 1074 fic,m %r23(%sr4, %r26) 1075 fic,m %r23(%sr4, %r26) 1076 fic,m %r23(%sr4, %r26) 1077 fic,m %r23(%sr4, %r26) 1078 fic,m %r23(%sr4, %r26) 1079 cmpb,COND(<<) %r26, %r25, 1b 1080 fic,m %r23(%sr4, %r26) 1081 1082 sync 1083 bv %r0(%r2) 1084 nop 1085ENDPROC_CFI(flush_kernel_icache_page) 1086 1087ENTRY_CFI(flush_kernel_icache_range_asm) 1088 ldil L%icache_stride, %r1 1089 ldw R%icache_stride(%r1), %r23 1090 ldo -1(%r23), %r21 1091 ANDCM %r26, %r21, %r26 1092 10931: cmpb,COND(<<),n %r26, %r25, 1b 1094 fic,m %r23(%sr4, %r26) 1095 1096 sync 1097 bv %r0(%r2) 1098 nop 1099ENDPROC_CFI(flush_kernel_icache_range_asm) 1100 1101 __INIT 1102 1103 /* align should cover use of rfi in disable_sr_hashing_asm and 1104 * srdis_done. 1105 */ 1106 .align 256 1107ENTRY_CFI(disable_sr_hashing_asm) 1108 /* 1109 * Switch to real mode 1110 */ 1111 /* pcxt_ssm_bug */ 1112 rsm PSW_SM_I, %r0 1113 load32 PA(1f), %r1 1114 nop 1115 nop 1116 nop 1117 nop 1118 nop 1119 1120 rsm PSW_SM_Q, %r0 /* prep to load iia queue */ 1121 mtctl %r0, %cr17 /* Clear IIASQ tail */ 1122 mtctl %r0, %cr17 /* Clear IIASQ head */ 1123 mtctl %r1, %cr18 /* IIAOQ head */ 1124 ldo 4(%r1), %r1 1125 mtctl %r1, %cr18 /* IIAOQ tail */ 1126 load32 REAL_MODE_PSW, %r1 1127 mtctl %r1, %ipsw 1128 rfi 1129 nop 1130 11311: cmpib,=,n SRHASH_PCXST, %r26,srdis_pcxs 1132 cmpib,=,n SRHASH_PCXL, %r26,srdis_pcxl 1133 cmpib,=,n SRHASH_PA20, %r26,srdis_pa20 1134 b,n srdis_done 1135 1136srdis_pcxs: 1137 1138 /* Disable Space Register Hashing for PCXS,PCXT,PCXT' */ 1139 1140 .word 0x141c1a00 /* mfdiag %dr0, %r28 */ 1141 .word 0x141c1a00 /* must issue twice */ 1142 depwi 0,18,1, %r28 /* Clear DHE (dcache hash enable) */ 1143 depwi 0,20,1, %r28 /* Clear IHE (icache hash enable) */ 1144 .word 0x141c1600 /* mtdiag %r28, %dr0 */ 1145 .word 0x141c1600 /* must issue twice */ 1146 b,n srdis_done 1147 1148srdis_pcxl: 1149 1150 /* Disable Space Register Hashing for PCXL */ 1151 1152 .word 0x141c0600 /* mfdiag %dr0, %r28 */ 1153 depwi 0,28,2, %r28 /* Clear DHASH_EN & IHASH_EN */ 1154 .word 0x141c0240 /* mtdiag %r28, %dr0 */ 1155 b,n srdis_done 1156 1157srdis_pa20: 1158 1159 /* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */ 1160 1161 .word 0x144008bc /* mfdiag %dr2, %r28 */ 1162 depdi 0, 54,1, %r28 /* clear DIAG_SPHASH_ENAB (bit 54) */ 1163 .word 0x145c1840 /* mtdiag %r28, %dr2 */ 1164 1165 1166srdis_done: 1167 /* Switch back to virtual mode */ 1168 rsm PSW_SM_I, %r0 /* prep to load iia queue */ 1169 load32 2f, %r1 1170 nop 1171 nop 1172 nop 1173 nop 1174 nop 1175 1176 rsm PSW_SM_Q, %r0 /* prep to load iia queue */ 1177 mtctl %r0, %cr17 /* Clear IIASQ tail */ 1178 mtctl %r0, %cr17 /* Clear IIASQ head */ 1179 mtctl %r1, %cr18 /* IIAOQ head */ 1180 ldo 4(%r1), %r1 1181 mtctl %r1, %cr18 /* IIAOQ tail */ 1182 load32 KERNEL_PSW, %r1 1183 mtctl %r1, %ipsw 1184 rfi 1185 nop 1186 11872: bv %r0(%r2) 1188 nop 1189ENDPROC_CFI(disable_sr_hashing_asm) 1190 1191 .end 1192