1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Synthesize TLB refill handlers at runtime. 7 * 8 * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer 9 * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki 10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) 11 * Copyright (C) 2008, 2009 Cavium Networks, Inc. 12 * 13 * ... and the days got worse and worse and now you see 14 * I've gone completly out of my mind. 15 * 16 * They're coming to take me a away haha 17 * they're coming to take me a away hoho hihi haha 18 * to the funny farm where code is beautiful all the time ... 19 * 20 * (Condolences to Napoleon XIV) 21 */ 22 23 #include <linux/bug.h> 24 #include <linux/kernel.h> 25 #include <linux/types.h> 26 #include <linux/smp.h> 27 #include <linux/string.h> 28 #include <linux/init.h> 29 #include <linux/cache.h> 30 31 #include <asm/cacheflush.h> 32 #include <asm/pgtable.h> 33 #include <asm/war.h> 34 #include <asm/uasm.h> 35 36 /* 37 * TLB load/store/modify handlers. 38 * 39 * Only the fastpath gets synthesized at runtime, the slowpath for 40 * do_page_fault remains normal asm. 41 */ 42 extern void tlb_do_page_fault_0(void); 43 extern void tlb_do_page_fault_1(void); 44 45 struct work_registers { 46 int r1; 47 int r2; 48 int r3; 49 }; 50 51 struct tlb_reg_save { 52 unsigned long a; 53 unsigned long b; 54 } ____cacheline_aligned_in_smp; 55 56 static struct tlb_reg_save handler_reg_save[NR_CPUS]; 57 58 static inline int r45k_bvahwbug(void) 59 { 60 /* XXX: We should probe for the presence of this bug, but we don't. */ 61 return 0; 62 } 63 64 static inline int r4k_250MHZhwbug(void) 65 { 66 /* XXX: We should probe for the presence of this bug, but we don't. */ 67 return 0; 68 } 69 70 static inline int __maybe_unused bcm1250_m3_war(void) 71 { 72 return BCM1250_M3_WAR; 73 } 74 75 static inline int __maybe_unused r10000_llsc_war(void) 76 { 77 return R10000_LLSC_WAR; 78 } 79 80 static int use_bbit_insns(void) 81 { 82 switch (current_cpu_type()) { 83 case CPU_CAVIUM_OCTEON: 84 case CPU_CAVIUM_OCTEON_PLUS: 85 case CPU_CAVIUM_OCTEON2: 86 return 1; 87 default: 88 return 0; 89 } 90 } 91 92 static int use_lwx_insns(void) 93 { 94 switch (current_cpu_type()) { 95 case CPU_CAVIUM_OCTEON2: 96 return 1; 97 default: 98 return 0; 99 } 100 } 101 #if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \ 102 CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 103 static bool scratchpad_available(void) 104 { 105 return true; 106 } 107 static int scratchpad_offset(int i) 108 { 109 /* 110 * CVMSEG starts at address -32768 and extends for 111 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines. 112 */ 113 i += 1; /* Kernel use starts at the top and works down. */ 114 return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768; 115 } 116 #else 117 static bool scratchpad_available(void) 118 { 119 return false; 120 } 121 static int scratchpad_offset(int i) 122 { 123 BUG(); 124 /* Really unreachable, but evidently some GCC want this. */ 125 return 0; 126 } 127 #endif 128 /* 129 * Found by experiment: At least some revisions of the 4kc throw under 130 * some circumstances a machine check exception, triggered by invalid 131 * values in the index register. Delaying the tlbp instruction until 132 * after the next branch, plus adding an additional nop in front of 133 * tlbwi/tlbwr avoids the invalid index register values. Nobody knows 134 * why; it's not an issue caused by the core RTL. 135 * 136 */ 137 static int __cpuinit m4kc_tlbp_war(void) 138 { 139 return (current_cpu_data.processor_id & 0xffff00) == 140 (PRID_COMP_MIPS | PRID_IMP_4KC); 141 } 142 143 /* Handle labels (which must be positive integers). */ 144 enum label_id { 145 label_second_part = 1, 146 label_leave, 147 label_vmalloc, 148 label_vmalloc_done, 149 label_tlbw_hazard, 150 label_split, 151 label_tlbl_goaround1, 152 label_tlbl_goaround2, 153 label_nopage_tlbl, 154 label_nopage_tlbs, 155 label_nopage_tlbm, 156 label_smp_pgtable_change, 157 label_r3000_write_probe_fail, 158 label_large_segbits_fault, 159 #ifdef CONFIG_HUGETLB_PAGE 160 label_tlb_huge_update, 161 #endif 162 }; 163 164 UASM_L_LA(_second_part) 165 UASM_L_LA(_leave) 166 UASM_L_LA(_vmalloc) 167 UASM_L_LA(_vmalloc_done) 168 UASM_L_LA(_tlbw_hazard) 169 UASM_L_LA(_split) 170 UASM_L_LA(_tlbl_goaround1) 171 UASM_L_LA(_tlbl_goaround2) 172 UASM_L_LA(_nopage_tlbl) 173 UASM_L_LA(_nopage_tlbs) 174 UASM_L_LA(_nopage_tlbm) 175 UASM_L_LA(_smp_pgtable_change) 176 UASM_L_LA(_r3000_write_probe_fail) 177 UASM_L_LA(_large_segbits_fault) 178 #ifdef CONFIG_HUGETLB_PAGE 179 UASM_L_LA(_tlb_huge_update) 180 #endif 181 182 /* 183 * For debug purposes. 184 */ 185 static inline void dump_handler(const u32 *handler, int count) 186 { 187 int i; 188 189 pr_debug("\t.set push\n"); 190 pr_debug("\t.set noreorder\n"); 191 192 for (i = 0; i < count; i++) 193 pr_debug("\t%p\t.word 0x%08x\n", &handler[i], handler[i]); 194 195 pr_debug("\t.set pop\n"); 196 } 197 198 /* The only general purpose registers allowed in TLB handlers. */ 199 #define K0 26 200 #define K1 27 201 202 /* Some CP0 registers */ 203 #define C0_INDEX 0, 0 204 #define C0_ENTRYLO0 2, 0 205 #define C0_TCBIND 2, 2 206 #define C0_ENTRYLO1 3, 0 207 #define C0_CONTEXT 4, 0 208 #define C0_PAGEMASK 5, 0 209 #define C0_BADVADDR 8, 0 210 #define C0_ENTRYHI 10, 0 211 #define C0_EPC 14, 0 212 #define C0_XCONTEXT 20, 0 213 214 #ifdef CONFIG_64BIT 215 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT) 216 #else 217 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT) 218 #endif 219 220 /* The worst case length of the handler is around 18 instructions for 221 * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. 222 * Maximum space available is 32 instructions for R3000 and 64 223 * instructions for R4000. 224 * 225 * We deliberately chose a buffer size of 128, so we won't scribble 226 * over anything important on overflow before we panic. 227 */ 228 static u32 tlb_handler[128] __cpuinitdata; 229 230 /* simply assume worst case size for labels and relocs */ 231 static struct uasm_label labels[128] __cpuinitdata; 232 static struct uasm_reloc relocs[128] __cpuinitdata; 233 234 #ifdef CONFIG_64BIT 235 static int check_for_high_segbits __cpuinitdata; 236 #endif 237 238 static int check_for_high_segbits __cpuinitdata; 239 240 static unsigned int kscratch_used_mask __cpuinitdata; 241 242 static int __cpuinit allocate_kscratch(void) 243 { 244 int r; 245 unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; 246 247 r = ffs(a); 248 249 if (r == 0) 250 return -1; 251 252 r--; /* make it zero based */ 253 254 kscratch_used_mask |= (1 << r); 255 256 return r; 257 } 258 259 static int scratch_reg __cpuinitdata; 260 static int pgd_reg __cpuinitdata; 261 enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; 262 263 static struct work_registers __cpuinit build_get_work_registers(u32 **p) 264 { 265 struct work_registers r; 266 267 int smp_processor_id_reg; 268 int smp_processor_id_sel; 269 int smp_processor_id_shift; 270 271 if (scratch_reg > 0) { 272 /* Save in CPU local C0_KScratch? */ 273 UASM_i_MTC0(p, 1, 31, scratch_reg); 274 r.r1 = K0; 275 r.r2 = K1; 276 r.r3 = 1; 277 return r; 278 } 279 280 if (num_possible_cpus() > 1) { 281 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 282 smp_processor_id_shift = 51; 283 smp_processor_id_reg = 20; /* XContext */ 284 smp_processor_id_sel = 0; 285 #else 286 # ifdef CONFIG_32BIT 287 smp_processor_id_shift = 25; 288 smp_processor_id_reg = 4; /* Context */ 289 smp_processor_id_sel = 0; 290 # endif 291 # ifdef CONFIG_64BIT 292 smp_processor_id_shift = 26; 293 smp_processor_id_reg = 4; /* Context */ 294 smp_processor_id_sel = 0; 295 # endif 296 #endif 297 /* Get smp_processor_id */ 298 UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel); 299 UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift); 300 301 /* handler_reg_save index in K0 */ 302 UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); 303 304 UASM_i_LA(p, K1, (long)&handler_reg_save); 305 UASM_i_ADDU(p, K0, K0, K1); 306 } else { 307 UASM_i_LA(p, K0, (long)&handler_reg_save); 308 } 309 /* K0 now points to save area, save $1 and $2 */ 310 UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); 311 UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); 312 313 r.r1 = K1; 314 r.r2 = 1; 315 r.r3 = 2; 316 return r; 317 } 318 319 static void __cpuinit build_restore_work_registers(u32 **p) 320 { 321 if (scratch_reg > 0) { 322 UASM_i_MFC0(p, 1, 31, scratch_reg); 323 return; 324 } 325 /* K0 already points to save area, restore $1 and $2 */ 326 UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); 327 UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); 328 } 329 330 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 331 332 /* 333 * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, 334 * we cannot do r3000 under these circumstances. 335 * 336 * Declare pgd_current here instead of including mmu_context.h to avoid type 337 * conflicts for tlbmiss_handler_setup_pgd 338 */ 339 extern unsigned long pgd_current[]; 340 341 /* 342 * The R3000 TLB handler is simple. 343 */ 344 static void __cpuinit build_r3000_tlb_refill_handler(void) 345 { 346 long pgdc = (long)pgd_current; 347 u32 *p; 348 349 memset(tlb_handler, 0, sizeof(tlb_handler)); 350 p = tlb_handler; 351 352 uasm_i_mfc0(&p, K0, C0_BADVADDR); 353 uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */ 354 uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1); 355 uasm_i_srl(&p, K0, K0, 22); /* load delay */ 356 uasm_i_sll(&p, K0, K0, 2); 357 uasm_i_addu(&p, K1, K1, K0); 358 uasm_i_mfc0(&p, K0, C0_CONTEXT); 359 uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */ 360 uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */ 361 uasm_i_addu(&p, K1, K1, K0); 362 uasm_i_lw(&p, K0, 0, K1); 363 uasm_i_nop(&p); /* load delay */ 364 uasm_i_mtc0(&p, K0, C0_ENTRYLO0); 365 uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ 366 uasm_i_tlbwr(&p); /* cp0 delay */ 367 uasm_i_jr(&p, K1); 368 uasm_i_rfe(&p); /* branch delay */ 369 370 if (p > tlb_handler + 32) 371 panic("TLB refill handler space exceeded"); 372 373 pr_debug("Wrote TLB refill handler (%u instructions).\n", 374 (unsigned int)(p - tlb_handler)); 375 376 memcpy((void *)ebase, tlb_handler, 0x80); 377 378 dump_handler((u32 *)ebase, 32); 379 } 380 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ 381 382 /* 383 * The R4000 TLB handler is much more complicated. We have two 384 * consecutive handler areas with 32 instructions space each. 385 * Since they aren't used at the same time, we can overflow in the 386 * other one.To keep things simple, we first assume linear space, 387 * then we relocate it to the final handler layout as needed. 388 */ 389 static u32 final_handler[64] __cpuinitdata; 390 391 /* 392 * Hazards 393 * 394 * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0: 395 * 2. A timing hazard exists for the TLBP instruction. 396 * 397 * stalling_instruction 398 * TLBP 399 * 400 * The JTLB is being read for the TLBP throughout the stall generated by the 401 * previous instruction. This is not really correct as the stalling instruction 402 * can modify the address used to access the JTLB. The failure symptom is that 403 * the TLBP instruction will use an address created for the stalling instruction 404 * and not the address held in C0_ENHI and thus report the wrong results. 405 * 406 * The software work-around is to not allow the instruction preceding the TLBP 407 * to stall - make it an NOP or some other instruction guaranteed not to stall. 408 * 409 * Errata 2 will not be fixed. This errata is also on the R5000. 410 * 411 * As if we MIPS hackers wouldn't know how to nop pipelines happy ... 412 */ 413 static void __cpuinit __maybe_unused build_tlb_probe_entry(u32 **p) 414 { 415 switch (current_cpu_type()) { 416 /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ 417 case CPU_R4600: 418 case CPU_R4700: 419 case CPU_R5000: 420 case CPU_R5000A: 421 case CPU_NEVADA: 422 uasm_i_nop(p); 423 uasm_i_tlbp(p); 424 break; 425 426 default: 427 uasm_i_tlbp(p); 428 break; 429 } 430 } 431 432 /* 433 * Write random or indexed TLB entry, and care about the hazards from 434 * the preceding mtc0 and for the following eret. 435 */ 436 enum tlb_write_entry { tlb_random, tlb_indexed }; 437 438 static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l, 439 struct uasm_reloc **r, 440 enum tlb_write_entry wmode) 441 { 442 void(*tlbw)(u32 **) = NULL; 443 444 switch (wmode) { 445 case tlb_random: tlbw = uasm_i_tlbwr; break; 446 case tlb_indexed: tlbw = uasm_i_tlbwi; break; 447 } 448 449 if (cpu_has_mips_r2) { 450 if (cpu_has_mips_r2_exec_hazard) 451 uasm_i_ehb(p); 452 tlbw(p); 453 return; 454 } 455 456 switch (current_cpu_type()) { 457 case CPU_R4000PC: 458 case CPU_R4000SC: 459 case CPU_R4000MC: 460 case CPU_R4400PC: 461 case CPU_R4400SC: 462 case CPU_R4400MC: 463 /* 464 * This branch uses up a mtc0 hazard nop slot and saves 465 * two nops after the tlbw instruction. 466 */ 467 uasm_il_bgezl(p, r, 0, label_tlbw_hazard); 468 tlbw(p); 469 uasm_l_tlbw_hazard(l, *p); 470 uasm_i_nop(p); 471 break; 472 473 case CPU_R4600: 474 case CPU_R4700: 475 case CPU_R5000: 476 case CPU_R5000A: 477 uasm_i_nop(p); 478 tlbw(p); 479 uasm_i_nop(p); 480 break; 481 482 case CPU_R4300: 483 case CPU_5KC: 484 case CPU_TX49XX: 485 case CPU_PR4450: 486 case CPU_XLR: 487 uasm_i_nop(p); 488 tlbw(p); 489 break; 490 491 case CPU_R10000: 492 case CPU_R12000: 493 case CPU_R14000: 494 case CPU_4KC: 495 case CPU_4KEC: 496 case CPU_SB1: 497 case CPU_SB1A: 498 case CPU_4KSC: 499 case CPU_20KC: 500 case CPU_25KF: 501 case CPU_BMIPS32: 502 case CPU_BMIPS3300: 503 case CPU_BMIPS4350: 504 case CPU_BMIPS4380: 505 case CPU_BMIPS5000: 506 case CPU_LOONGSON2: 507 case CPU_R5500: 508 if (m4kc_tlbp_war()) 509 uasm_i_nop(p); 510 case CPU_ALCHEMY: 511 tlbw(p); 512 break; 513 514 case CPU_NEVADA: 515 uasm_i_nop(p); /* QED specifies 2 nops hazard */ 516 /* 517 * This branch uses up a mtc0 hazard nop slot and saves 518 * a nop after the tlbw instruction. 519 */ 520 uasm_il_bgezl(p, r, 0, label_tlbw_hazard); 521 tlbw(p); 522 uasm_l_tlbw_hazard(l, *p); 523 break; 524 525 case CPU_RM7000: 526 uasm_i_nop(p); 527 uasm_i_nop(p); 528 uasm_i_nop(p); 529 uasm_i_nop(p); 530 tlbw(p); 531 break; 532 533 case CPU_RM9000: 534 /* 535 * When the JTLB is updated by tlbwi or tlbwr, a subsequent 536 * use of the JTLB for instructions should not occur for 4 537 * cpu cycles and use for data translations should not occur 538 * for 3 cpu cycles. 539 */ 540 uasm_i_ssnop(p); 541 uasm_i_ssnop(p); 542 uasm_i_ssnop(p); 543 uasm_i_ssnop(p); 544 tlbw(p); 545 uasm_i_ssnop(p); 546 uasm_i_ssnop(p); 547 uasm_i_ssnop(p); 548 uasm_i_ssnop(p); 549 break; 550 551 case CPU_VR4111: 552 case CPU_VR4121: 553 case CPU_VR4122: 554 case CPU_VR4181: 555 case CPU_VR4181A: 556 uasm_i_nop(p); 557 uasm_i_nop(p); 558 tlbw(p); 559 uasm_i_nop(p); 560 uasm_i_nop(p); 561 break; 562 563 case CPU_VR4131: 564 case CPU_VR4133: 565 case CPU_R5432: 566 uasm_i_nop(p); 567 uasm_i_nop(p); 568 tlbw(p); 569 break; 570 571 case CPU_JZRISC: 572 tlbw(p); 573 uasm_i_nop(p); 574 break; 575 576 default: 577 panic("No TLB refill handler yet (CPU type: %d)", 578 current_cpu_data.cputype); 579 break; 580 } 581 } 582 583 static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p, 584 unsigned int reg) 585 { 586 if (kernel_uses_smartmips_rixi) { 587 UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC)); 588 UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 589 } else { 590 #ifdef CONFIG_64BIT_PHYS_ADDR 591 uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); 592 #else 593 UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); 594 #endif 595 } 596 } 597 598 #ifdef CONFIG_HUGETLB_PAGE 599 600 static __cpuinit void build_restore_pagemask(u32 **p, 601 struct uasm_reloc **r, 602 unsigned int tmp, 603 enum label_id lid, 604 int restore_scratch) 605 { 606 if (restore_scratch) { 607 /* Reset default page size */ 608 if (PM_DEFAULT_MASK >> 16) { 609 uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); 610 uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); 611 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 612 uasm_il_b(p, r, lid); 613 } else if (PM_DEFAULT_MASK) { 614 uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); 615 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 616 uasm_il_b(p, r, lid); 617 } else { 618 uasm_i_mtc0(p, 0, C0_PAGEMASK); 619 uasm_il_b(p, r, lid); 620 } 621 if (scratch_reg > 0) 622 UASM_i_MFC0(p, 1, 31, scratch_reg); 623 else 624 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 625 } else { 626 /* Reset default page size */ 627 if (PM_DEFAULT_MASK >> 16) { 628 uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); 629 uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); 630 uasm_il_b(p, r, lid); 631 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 632 } else if (PM_DEFAULT_MASK) { 633 uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); 634 uasm_il_b(p, r, lid); 635 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 636 } else { 637 uasm_il_b(p, r, lid); 638 uasm_i_mtc0(p, 0, C0_PAGEMASK); 639 } 640 } 641 } 642 643 static __cpuinit void build_huge_tlb_write_entry(u32 **p, 644 struct uasm_label **l, 645 struct uasm_reloc **r, 646 unsigned int tmp, 647 enum tlb_write_entry wmode, 648 int restore_scratch) 649 { 650 /* Set huge page tlb entry size */ 651 uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); 652 uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff); 653 uasm_i_mtc0(p, tmp, C0_PAGEMASK); 654 655 build_tlb_write_entry(p, l, r, wmode); 656 657 build_restore_pagemask(p, r, tmp, label_leave, restore_scratch); 658 } 659 660 /* 661 * Check if Huge PTE is present, if so then jump to LABEL. 662 */ 663 static void __cpuinit 664 build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, 665 unsigned int pmd, int lid) 666 { 667 UASM_i_LW(p, tmp, 0, pmd); 668 if (use_bbit_insns()) { 669 uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid); 670 } else { 671 uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); 672 uasm_il_bnez(p, r, tmp, lid); 673 } 674 } 675 676 static __cpuinit void build_huge_update_entries(u32 **p, 677 unsigned int pte, 678 unsigned int tmp) 679 { 680 int small_sequence; 681 682 /* 683 * A huge PTE describes an area the size of the 684 * configured huge page size. This is twice the 685 * of the large TLB entry size we intend to use. 686 * A TLB entry half the size of the configured 687 * huge page size is configured into entrylo0 688 * and entrylo1 to cover the contiguous huge PTE 689 * address space. 690 */ 691 small_sequence = (HPAGE_SIZE >> 7) < 0x10000; 692 693 /* We can clobber tmp. It isn't used after this.*/ 694 if (!small_sequence) 695 uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16)); 696 697 build_convert_pte_to_entrylo(p, pte); 698 UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */ 699 /* convert to entrylo1 */ 700 if (small_sequence) 701 UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7); 702 else 703 UASM_i_ADDU(p, pte, pte, tmp); 704 705 UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ 706 } 707 708 static __cpuinit void build_huge_handler_tail(u32 **p, 709 struct uasm_reloc **r, 710 struct uasm_label **l, 711 unsigned int pte, 712 unsigned int ptr) 713 { 714 #ifdef CONFIG_SMP 715 UASM_i_SC(p, pte, 0, ptr); 716 uasm_il_beqz(p, r, pte, label_tlb_huge_update); 717 UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */ 718 #else 719 UASM_i_SW(p, pte, 0, ptr); 720 #endif 721 build_huge_update_entries(p, pte, ptr); 722 build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); 723 } 724 #endif /* CONFIG_HUGETLB_PAGE */ 725 726 #ifdef CONFIG_64BIT 727 /* 728 * TMP and PTR are scratch. 729 * TMP will be clobbered, PTR will hold the pmd entry. 730 */ 731 static void __cpuinit 732 build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, 733 unsigned int tmp, unsigned int ptr) 734 { 735 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 736 long pgdc = (long)pgd_current; 737 #endif 738 /* 739 * The vmalloc handling is not in the hotpath. 740 */ 741 uasm_i_dmfc0(p, tmp, C0_BADVADDR); 742 743 if (check_for_high_segbits) { 744 /* 745 * The kernel currently implicitely assumes that the 746 * MIPS SEGBITS parameter for the processor is 747 * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never 748 * allocate virtual addresses outside the maximum 749 * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But 750 * that doesn't prevent user code from accessing the 751 * higher xuseg addresses. Here, we make sure that 752 * everything but the lower xuseg addresses goes down 753 * the module_alloc/vmalloc path. 754 */ 755 uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 756 uasm_il_bnez(p, r, ptr, label_vmalloc); 757 } else { 758 uasm_il_bltz(p, r, tmp, label_vmalloc); 759 } 760 /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ 761 762 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 763 if (pgd_reg != -1) { 764 /* pgd is in pgd_reg */ 765 UASM_i_MFC0(p, ptr, 31, pgd_reg); 766 } else { 767 /* 768 * &pgd << 11 stored in CONTEXT [23..63]. 769 */ 770 UASM_i_MFC0(p, ptr, C0_CONTEXT); 771 772 /* Clear lower 23 bits of context. */ 773 uasm_i_dins(p, ptr, 0, 0, 23); 774 775 /* 1 0 1 0 1 << 6 xkphys cached */ 776 uasm_i_ori(p, ptr, ptr, 0x540); 777 uasm_i_drotr(p, ptr, ptr, 11); 778 } 779 #elif defined(CONFIG_SMP) 780 # ifdef CONFIG_MIPS_MT_SMTC 781 /* 782 * SMTC uses TCBind value as "CPU" index 783 */ 784 uasm_i_mfc0(p, ptr, C0_TCBIND); 785 uasm_i_dsrl_safe(p, ptr, ptr, 19); 786 # else 787 /* 788 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 789 * stored in CONTEXT. 790 */ 791 uasm_i_dmfc0(p, ptr, C0_CONTEXT); 792 uasm_i_dsrl_safe(p, ptr, ptr, 23); 793 # endif 794 UASM_i_LA_mostly(p, tmp, pgdc); 795 uasm_i_daddu(p, ptr, ptr, tmp); 796 uasm_i_dmfc0(p, tmp, C0_BADVADDR); 797 uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); 798 #else 799 UASM_i_LA_mostly(p, ptr, pgdc); 800 uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); 801 #endif 802 803 uasm_l_vmalloc_done(l, *p); 804 805 /* get pgd offset in bytes */ 806 uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3); 807 808 uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); 809 uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ 810 #ifndef __PAGETABLE_PMD_FOLDED 811 uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 812 uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */ 813 uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ 814 uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); 815 uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ 816 #endif 817 } 818 819 /* 820 * BVADDR is the faulting address, PTR is scratch. 821 * PTR will hold the pgd for vmalloc. 822 */ 823 static void __cpuinit 824 build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, 825 unsigned int bvaddr, unsigned int ptr, 826 enum vmalloc64_mode mode) 827 { 828 long swpd = (long)swapper_pg_dir; 829 int single_insn_swpd; 830 int did_vmalloc_branch = 0; 831 832 single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd); 833 834 uasm_l_vmalloc(l, *p); 835 836 if (mode != not_refill && check_for_high_segbits) { 837 if (single_insn_swpd) { 838 uasm_il_bltz(p, r, bvaddr, label_vmalloc_done); 839 uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); 840 did_vmalloc_branch = 1; 841 /* fall through */ 842 } else { 843 uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault); 844 } 845 } 846 if (!did_vmalloc_branch) { 847 if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { 848 uasm_il_b(p, r, label_vmalloc_done); 849 uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); 850 } else { 851 UASM_i_LA_mostly(p, ptr, swpd); 852 uasm_il_b(p, r, label_vmalloc_done); 853 if (uasm_in_compat_space_p(swpd)) 854 uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd)); 855 else 856 uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd)); 857 } 858 } 859 if (mode != not_refill && check_for_high_segbits) { 860 uasm_l_large_segbits_fault(l, *p); 861 /* 862 * We get here if we are an xsseg address, or if we are 863 * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. 864 * 865 * Ignoring xsseg (assume disabled so would generate 866 * (address errors?), the only remaining possibility 867 * is the upper xuseg addresses. On processors with 868 * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these 869 * addresses would have taken an address error. We try 870 * to mimic that here by taking a load/istream page 871 * fault. 872 */ 873 UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); 874 uasm_i_jr(p, ptr); 875 876 if (mode == refill_scratch) { 877 if (scratch_reg > 0) 878 UASM_i_MFC0(p, 1, 31, scratch_reg); 879 else 880 UASM_i_LW(p, 1, scratchpad_offset(0), 0); 881 } else { 882 uasm_i_nop(p); 883 } 884 } 885 } 886 887 #else /* !CONFIG_64BIT */ 888 889 /* 890 * TMP and PTR are scratch. 891 * TMP will be clobbered, PTR will hold the pgd entry. 892 */ 893 static void __cpuinit __maybe_unused 894 build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) 895 { 896 long pgdc = (long)pgd_current; 897 898 /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ 899 #ifdef CONFIG_SMP 900 #ifdef CONFIG_MIPS_MT_SMTC 901 /* 902 * SMTC uses TCBind value as "CPU" index 903 */ 904 uasm_i_mfc0(p, ptr, C0_TCBIND); 905 UASM_i_LA_mostly(p, tmp, pgdc); 906 uasm_i_srl(p, ptr, ptr, 19); 907 #else 908 /* 909 * smp_processor_id() << 3 is stored in CONTEXT. 910 */ 911 uasm_i_mfc0(p, ptr, C0_CONTEXT); 912 UASM_i_LA_mostly(p, tmp, pgdc); 913 uasm_i_srl(p, ptr, ptr, 23); 914 #endif 915 uasm_i_addu(p, ptr, tmp, ptr); 916 #else 917 UASM_i_LA_mostly(p, ptr, pgdc); 918 #endif 919 uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 920 uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); 921 uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ 922 uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); 923 uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ 924 } 925 926 #endif /* !CONFIG_64BIT */ 927 928 static void __cpuinit build_adjust_context(u32 **p, unsigned int ctx) 929 { 930 unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; 931 unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); 932 933 switch (current_cpu_type()) { 934 case CPU_VR41XX: 935 case CPU_VR4111: 936 case CPU_VR4121: 937 case CPU_VR4122: 938 case CPU_VR4131: 939 case CPU_VR4181: 940 case CPU_VR4181A: 941 case CPU_VR4133: 942 shift += 2; 943 break; 944 945 default: 946 break; 947 } 948 949 if (shift) 950 UASM_i_SRL(p, ctx, ctx, shift); 951 uasm_i_andi(p, ctx, ctx, mask); 952 } 953 954 static void __cpuinit build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) 955 { 956 /* 957 * Bug workaround for the Nevada. It seems as if under certain 958 * circumstances the move from cp0_context might produce a 959 * bogus result when the mfc0 instruction and its consumer are 960 * in a different cacheline or a load instruction, probably any 961 * memory reference, is between them. 962 */ 963 switch (current_cpu_type()) { 964 case CPU_NEVADA: 965 UASM_i_LW(p, ptr, 0, ptr); 966 GET_CONTEXT(p, tmp); /* get context reg */ 967 break; 968 969 default: 970 GET_CONTEXT(p, tmp); /* get context reg */ 971 UASM_i_LW(p, ptr, 0, ptr); 972 break; 973 } 974 975 build_adjust_context(p, tmp); 976 UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ 977 } 978 979 static void __cpuinit build_update_entries(u32 **p, unsigned int tmp, 980 unsigned int ptep) 981 { 982 /* 983 * 64bit address support (36bit on a 32bit CPU) in a 32bit 984 * Kernel is a special case. Only a few CPUs use it. 985 */ 986 #ifdef CONFIG_64BIT_PHYS_ADDR 987 if (cpu_has_64bits) { 988 uasm_i_ld(p, tmp, 0, ptep); /* get even pte */ 989 uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 990 if (kernel_uses_smartmips_rixi) { 991 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC)); 992 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC)); 993 UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 994 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 995 UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 996 } else { 997 uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ 998 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 999 uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ 1000 } 1001 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1002 } else { 1003 int pte_off_even = sizeof(pte_t) / 2; 1004 int pte_off_odd = pte_off_even + sizeof(pte_t); 1005 1006 /* The pte entries are pre-shifted */ 1007 uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ 1008 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1009 uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ 1010 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1011 } 1012 #else 1013 UASM_i_LW(p, tmp, 0, ptep); /* get even pte */ 1014 UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 1015 if (r45k_bvahwbug()) 1016 build_tlb_probe_entry(p); 1017 if (kernel_uses_smartmips_rixi) { 1018 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_NO_EXEC)); 1019 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_NO_EXEC)); 1020 UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1021 if (r4k_250MHZhwbug()) 1022 UASM_i_MTC0(p, 0, C0_ENTRYLO0); 1023 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1024 UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1025 } else { 1026 UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ 1027 if (r4k_250MHZhwbug()) 1028 UASM_i_MTC0(p, 0, C0_ENTRYLO0); 1029 UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ 1030 UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ 1031 if (r45k_bvahwbug()) 1032 uasm_i_mfc0(p, tmp, C0_INDEX); 1033 } 1034 if (r4k_250MHZhwbug()) 1035 UASM_i_MTC0(p, 0, C0_ENTRYLO1); 1036 UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ 1037 #endif 1038 } 1039 1040 struct mips_huge_tlb_info { 1041 int huge_pte; 1042 int restore_scratch; 1043 }; 1044 1045 static struct mips_huge_tlb_info __cpuinit 1046 build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, 1047 struct uasm_reloc **r, unsigned int tmp, 1048 unsigned int ptr, int c0_scratch) 1049 { 1050 struct mips_huge_tlb_info rv; 1051 unsigned int even, odd; 1052 int vmalloc_branch_delay_filled = 0; 1053 const int scratch = 1; /* Our extra working register */ 1054 1055 rv.huge_pte = scratch; 1056 rv.restore_scratch = 0; 1057 1058 if (check_for_high_segbits) { 1059 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1060 1061 if (pgd_reg != -1) 1062 UASM_i_MFC0(p, ptr, 31, pgd_reg); 1063 else 1064 UASM_i_MFC0(p, ptr, C0_CONTEXT); 1065 1066 if (c0_scratch >= 0) 1067 UASM_i_MTC0(p, scratch, 31, c0_scratch); 1068 else 1069 UASM_i_SW(p, scratch, scratchpad_offset(0), 0); 1070 1071 uasm_i_dsrl_safe(p, scratch, tmp, 1072 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 1073 uasm_il_bnez(p, r, scratch, label_vmalloc); 1074 1075 if (pgd_reg == -1) { 1076 vmalloc_branch_delay_filled = 1; 1077 /* Clear lower 23 bits of context. */ 1078 uasm_i_dins(p, ptr, 0, 0, 23); 1079 } 1080 } else { 1081 if (pgd_reg != -1) 1082 UASM_i_MFC0(p, ptr, 31, pgd_reg); 1083 else 1084 UASM_i_MFC0(p, ptr, C0_CONTEXT); 1085 1086 UASM_i_MFC0(p, tmp, C0_BADVADDR); 1087 1088 if (c0_scratch >= 0) 1089 UASM_i_MTC0(p, scratch, 31, c0_scratch); 1090 else 1091 UASM_i_SW(p, scratch, scratchpad_offset(0), 0); 1092 1093 if (pgd_reg == -1) 1094 /* Clear lower 23 bits of context. */ 1095 uasm_i_dins(p, ptr, 0, 0, 23); 1096 1097 uasm_il_bltz(p, r, tmp, label_vmalloc); 1098 } 1099 1100 if (pgd_reg == -1) { 1101 vmalloc_branch_delay_filled = 1; 1102 /* 1 0 1 0 1 << 6 xkphys cached */ 1103 uasm_i_ori(p, ptr, ptr, 0x540); 1104 uasm_i_drotr(p, ptr, ptr, 11); 1105 } 1106 1107 #ifdef __PAGETABLE_PMD_FOLDED 1108 #define LOC_PTEP scratch 1109 #else 1110 #define LOC_PTEP ptr 1111 #endif 1112 1113 if (!vmalloc_branch_delay_filled) 1114 /* get pgd offset in bytes */ 1115 uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); 1116 1117 uasm_l_vmalloc_done(l, *p); 1118 1119 /* 1120 * tmp ptr 1121 * fall-through case = badvaddr *pgd_current 1122 * vmalloc case = badvaddr swapper_pg_dir 1123 */ 1124 1125 if (vmalloc_branch_delay_filled) 1126 /* get pgd offset in bytes */ 1127 uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); 1128 1129 #ifdef __PAGETABLE_PMD_FOLDED 1130 GET_CONTEXT(p, tmp); /* get context reg */ 1131 #endif 1132 uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3); 1133 1134 if (use_lwx_insns()) { 1135 UASM_i_LWX(p, LOC_PTEP, scratch, ptr); 1136 } else { 1137 uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */ 1138 uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ 1139 } 1140 1141 #ifndef __PAGETABLE_PMD_FOLDED 1142 /* get pmd offset in bytes */ 1143 uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); 1144 uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3); 1145 GET_CONTEXT(p, tmp); /* get context reg */ 1146 1147 if (use_lwx_insns()) { 1148 UASM_i_LWX(p, scratch, scratch, ptr); 1149 } else { 1150 uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ 1151 UASM_i_LW(p, scratch, 0, ptr); 1152 } 1153 #endif 1154 /* Adjust the context during the load latency. */ 1155 build_adjust_context(p, tmp); 1156 1157 #ifdef CONFIG_HUGETLB_PAGE 1158 uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update); 1159 /* 1160 * The in the LWX case we don't want to do the load in the 1161 * delay slot. It cannot issue in the same cycle and may be 1162 * speculative and unneeded. 1163 */ 1164 if (use_lwx_insns()) 1165 uasm_i_nop(p); 1166 #endif /* CONFIG_HUGETLB_PAGE */ 1167 1168 1169 /* build_update_entries */ 1170 if (use_lwx_insns()) { 1171 even = ptr; 1172 odd = tmp; 1173 UASM_i_LWX(p, even, scratch, tmp); 1174 UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t)); 1175 UASM_i_LWX(p, odd, scratch, tmp); 1176 } else { 1177 UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */ 1178 even = tmp; 1179 odd = ptr; 1180 UASM_i_LW(p, even, 0, ptr); /* get even pte */ 1181 UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */ 1182 } 1183 if (kernel_uses_smartmips_rixi) { 1184 uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_NO_EXEC)); 1185 uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_NO_EXEC)); 1186 uasm_i_drotr(p, even, even, 1187 ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1188 UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ 1189 uasm_i_drotr(p, odd, odd, 1190 ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); 1191 } else { 1192 uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL)); 1193 UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ 1194 uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL)); 1195 } 1196 UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ 1197 1198 if (c0_scratch >= 0) { 1199 UASM_i_MFC0(p, scratch, 31, c0_scratch); 1200 build_tlb_write_entry(p, l, r, tlb_random); 1201 uasm_l_leave(l, *p); 1202 rv.restore_scratch = 1; 1203 } else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13) { 1204 build_tlb_write_entry(p, l, r, tlb_random); 1205 uasm_l_leave(l, *p); 1206 UASM_i_LW(p, scratch, scratchpad_offset(0), 0); 1207 } else { 1208 UASM_i_LW(p, scratch, scratchpad_offset(0), 0); 1209 build_tlb_write_entry(p, l, r, tlb_random); 1210 uasm_l_leave(l, *p); 1211 rv.restore_scratch = 1; 1212 } 1213 1214 uasm_i_eret(p); /* return from trap */ 1215 1216 return rv; 1217 } 1218 1219 /* 1220 * For a 64-bit kernel, we are using the 64-bit XTLB refill exception 1221 * because EXL == 0. If we wrap, we can also use the 32 instruction 1222 * slots before the XTLB refill exception handler which belong to the 1223 * unused TLB refill exception. 1224 */ 1225 #define MIPS64_REFILL_INSNS 32 1226 1227 static void __cpuinit build_r4000_tlb_refill_handler(void) 1228 { 1229 u32 *p = tlb_handler; 1230 struct uasm_label *l = labels; 1231 struct uasm_reloc *r = relocs; 1232 u32 *f; 1233 unsigned int final_len; 1234 struct mips_huge_tlb_info htlb_info __maybe_unused; 1235 enum vmalloc64_mode vmalloc_mode __maybe_unused; 1236 1237 memset(tlb_handler, 0, sizeof(tlb_handler)); 1238 memset(labels, 0, sizeof(labels)); 1239 memset(relocs, 0, sizeof(relocs)); 1240 memset(final_handler, 0, sizeof(final_handler)); 1241 1242 if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) { 1243 htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, 1244 scratch_reg); 1245 vmalloc_mode = refill_scratch; 1246 } else { 1247 htlb_info.huge_pte = K0; 1248 htlb_info.restore_scratch = 0; 1249 vmalloc_mode = refill_noscratch; 1250 /* 1251 * create the plain linear handler 1252 */ 1253 if (bcm1250_m3_war()) { 1254 unsigned int segbits = 44; 1255 1256 uasm_i_dmfc0(&p, K0, C0_BADVADDR); 1257 uasm_i_dmfc0(&p, K1, C0_ENTRYHI); 1258 uasm_i_xor(&p, K0, K0, K1); 1259 uasm_i_dsrl_safe(&p, K1, K0, 62); 1260 uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); 1261 uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); 1262 uasm_i_or(&p, K0, K0, K1); 1263 uasm_il_bnez(&p, &r, K0, label_leave); 1264 /* No need for uasm_i_nop */ 1265 } 1266 1267 #ifdef CONFIG_64BIT 1268 build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ 1269 #else 1270 build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ 1271 #endif 1272 1273 #ifdef CONFIG_HUGETLB_PAGE 1274 build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); 1275 #endif 1276 1277 build_get_ptep(&p, K0, K1); 1278 build_update_entries(&p, K0, K1); 1279 build_tlb_write_entry(&p, &l, &r, tlb_random); 1280 uasm_l_leave(&l, p); 1281 uasm_i_eret(&p); /* return from trap */ 1282 } 1283 #ifdef CONFIG_HUGETLB_PAGE 1284 uasm_l_tlb_huge_update(&l, p); 1285 build_huge_update_entries(&p, htlb_info.huge_pte, K1); 1286 build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, 1287 htlb_info.restore_scratch); 1288 #endif 1289 1290 #ifdef CONFIG_64BIT 1291 build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode); 1292 #endif 1293 1294 /* 1295 * Overflow check: For the 64bit handler, we need at least one 1296 * free instruction slot for the wrap-around branch. In worst 1297 * case, if the intended insertion point is a delay slot, we 1298 * need three, with the second nop'ed and the third being 1299 * unused. 1300 */ 1301 /* Loongson2 ebase is different than r4k, we have more space */ 1302 #if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) 1303 if ((p - tlb_handler) > 64) 1304 panic("TLB refill handler space exceeded"); 1305 #else 1306 if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) 1307 || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) 1308 && uasm_insn_has_bdelay(relocs, 1309 tlb_handler + MIPS64_REFILL_INSNS - 3))) 1310 panic("TLB refill handler space exceeded"); 1311 #endif 1312 1313 /* 1314 * Now fold the handler in the TLB refill handler space. 1315 */ 1316 #if defined(CONFIG_32BIT) || defined(CONFIG_CPU_LOONGSON2) 1317 f = final_handler; 1318 /* Simplest case, just copy the handler. */ 1319 uasm_copy_handler(relocs, labels, tlb_handler, p, f); 1320 final_len = p - tlb_handler; 1321 #else /* CONFIG_64BIT */ 1322 f = final_handler + MIPS64_REFILL_INSNS; 1323 if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { 1324 /* Just copy the handler. */ 1325 uasm_copy_handler(relocs, labels, tlb_handler, p, f); 1326 final_len = p - tlb_handler; 1327 } else { 1328 #if defined(CONFIG_HUGETLB_PAGE) 1329 const enum label_id ls = label_tlb_huge_update; 1330 #else 1331 const enum label_id ls = label_vmalloc; 1332 #endif 1333 u32 *split; 1334 int ov = 0; 1335 int i; 1336 1337 for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) 1338 ; 1339 BUG_ON(i == ARRAY_SIZE(labels)); 1340 split = labels[i].addr; 1341 1342 /* 1343 * See if we have overflown one way or the other. 1344 */ 1345 if (split > tlb_handler + MIPS64_REFILL_INSNS || 1346 split < p - MIPS64_REFILL_INSNS) 1347 ov = 1; 1348 1349 if (ov) { 1350 /* 1351 * Split two instructions before the end. One 1352 * for the branch and one for the instruction 1353 * in the delay slot. 1354 */ 1355 split = tlb_handler + MIPS64_REFILL_INSNS - 2; 1356 1357 /* 1358 * If the branch would fall in a delay slot, 1359 * we must back up an additional instruction 1360 * so that it is no longer in a delay slot. 1361 */ 1362 if (uasm_insn_has_bdelay(relocs, split - 1)) 1363 split--; 1364 } 1365 /* Copy first part of the handler. */ 1366 uasm_copy_handler(relocs, labels, tlb_handler, split, f); 1367 f += split - tlb_handler; 1368 1369 if (ov) { 1370 /* Insert branch. */ 1371 uasm_l_split(&l, final_handler); 1372 uasm_il_b(&f, &r, label_split); 1373 if (uasm_insn_has_bdelay(relocs, split)) 1374 uasm_i_nop(&f); 1375 else { 1376 uasm_copy_handler(relocs, labels, 1377 split, split + 1, f); 1378 uasm_move_labels(labels, f, f + 1, -1); 1379 f++; 1380 split++; 1381 } 1382 } 1383 1384 /* Copy the rest of the handler. */ 1385 uasm_copy_handler(relocs, labels, split, p, final_handler); 1386 final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + 1387 (p - split); 1388 } 1389 #endif /* CONFIG_64BIT */ 1390 1391 uasm_resolve_relocs(relocs, labels); 1392 pr_debug("Wrote TLB refill handler (%u instructions).\n", 1393 final_len); 1394 1395 memcpy((void *)ebase, final_handler, 0x100); 1396 1397 dump_handler((u32 *)ebase, 64); 1398 } 1399 1400 /* 1401 * 128 instructions for the fastpath handler is generous and should 1402 * never be exceeded. 1403 */ 1404 #define FASTPATH_SIZE 128 1405 1406 u32 handle_tlbl[FASTPATH_SIZE] __cacheline_aligned; 1407 u32 handle_tlbs[FASTPATH_SIZE] __cacheline_aligned; 1408 u32 handle_tlbm[FASTPATH_SIZE] __cacheline_aligned; 1409 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 1410 u32 tlbmiss_handler_setup_pgd[16] __cacheline_aligned; 1411 1412 static void __cpuinit build_r4000_setup_pgd(void) 1413 { 1414 const int a0 = 4; 1415 const int a1 = 5; 1416 u32 *p = tlbmiss_handler_setup_pgd; 1417 struct uasm_label *l = labels; 1418 struct uasm_reloc *r = relocs; 1419 1420 memset(tlbmiss_handler_setup_pgd, 0, sizeof(tlbmiss_handler_setup_pgd)); 1421 memset(labels, 0, sizeof(labels)); 1422 memset(relocs, 0, sizeof(relocs)); 1423 1424 pgd_reg = allocate_kscratch(); 1425 1426 if (pgd_reg == -1) { 1427 /* PGD << 11 in c0_Context */ 1428 /* 1429 * If it is a ckseg0 address, convert to a physical 1430 * address. Shifting right by 29 and adding 4 will 1431 * result in zero for these addresses. 1432 * 1433 */ 1434 UASM_i_SRA(&p, a1, a0, 29); 1435 UASM_i_ADDIU(&p, a1, a1, 4); 1436 uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1); 1437 uasm_i_nop(&p); 1438 uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); 1439 uasm_l_tlbl_goaround1(&l, p); 1440 UASM_i_SLL(&p, a0, a0, 11); 1441 uasm_i_jr(&p, 31); 1442 UASM_i_MTC0(&p, a0, C0_CONTEXT); 1443 } else { 1444 /* PGD in c0_KScratch */ 1445 uasm_i_jr(&p, 31); 1446 UASM_i_MTC0(&p, a0, 31, pgd_reg); 1447 } 1448 if (p - tlbmiss_handler_setup_pgd > ARRAY_SIZE(tlbmiss_handler_setup_pgd)) 1449 panic("tlbmiss_handler_setup_pgd space exceeded"); 1450 uasm_resolve_relocs(relocs, labels); 1451 pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", 1452 (unsigned int)(p - tlbmiss_handler_setup_pgd)); 1453 1454 dump_handler(tlbmiss_handler_setup_pgd, 1455 ARRAY_SIZE(tlbmiss_handler_setup_pgd)); 1456 } 1457 #endif 1458 1459 static void __cpuinit 1460 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) 1461 { 1462 #ifdef CONFIG_SMP 1463 # ifdef CONFIG_64BIT_PHYS_ADDR 1464 if (cpu_has_64bits) 1465 uasm_i_lld(p, pte, 0, ptr); 1466 else 1467 # endif 1468 UASM_i_LL(p, pte, 0, ptr); 1469 #else 1470 # ifdef CONFIG_64BIT_PHYS_ADDR 1471 if (cpu_has_64bits) 1472 uasm_i_ld(p, pte, 0, ptr); 1473 else 1474 # endif 1475 UASM_i_LW(p, pte, 0, ptr); 1476 #endif 1477 } 1478 1479 static void __cpuinit 1480 iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, 1481 unsigned int mode) 1482 { 1483 #ifdef CONFIG_64BIT_PHYS_ADDR 1484 unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); 1485 #endif 1486 1487 uasm_i_ori(p, pte, pte, mode); 1488 #ifdef CONFIG_SMP 1489 # ifdef CONFIG_64BIT_PHYS_ADDR 1490 if (cpu_has_64bits) 1491 uasm_i_scd(p, pte, 0, ptr); 1492 else 1493 # endif 1494 UASM_i_SC(p, pte, 0, ptr); 1495 1496 if (r10000_llsc_war()) 1497 uasm_il_beqzl(p, r, pte, label_smp_pgtable_change); 1498 else 1499 uasm_il_beqz(p, r, pte, label_smp_pgtable_change); 1500 1501 # ifdef CONFIG_64BIT_PHYS_ADDR 1502 if (!cpu_has_64bits) { 1503 /* no uasm_i_nop needed */ 1504 uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); 1505 uasm_i_ori(p, pte, pte, hwmode); 1506 uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr); 1507 uasm_il_beqz(p, r, pte, label_smp_pgtable_change); 1508 /* no uasm_i_nop needed */ 1509 uasm_i_lw(p, pte, 0, ptr); 1510 } else 1511 uasm_i_nop(p); 1512 # else 1513 uasm_i_nop(p); 1514 # endif 1515 #else 1516 # ifdef CONFIG_64BIT_PHYS_ADDR 1517 if (cpu_has_64bits) 1518 uasm_i_sd(p, pte, 0, ptr); 1519 else 1520 # endif 1521 UASM_i_SW(p, pte, 0, ptr); 1522 1523 # ifdef CONFIG_64BIT_PHYS_ADDR 1524 if (!cpu_has_64bits) { 1525 uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); 1526 uasm_i_ori(p, pte, pte, hwmode); 1527 uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr); 1528 uasm_i_lw(p, pte, 0, ptr); 1529 } 1530 # endif 1531 #endif 1532 } 1533 1534 /* 1535 * Check if PTE is present, if not then jump to LABEL. PTR points to 1536 * the page table where this PTE is located, PTE will be re-loaded 1537 * with it's original value. 1538 */ 1539 static void __cpuinit 1540 build_pte_present(u32 **p, struct uasm_reloc **r, 1541 int pte, int ptr, int scratch, enum label_id lid) 1542 { 1543 int t = scratch >= 0 ? scratch : pte; 1544 1545 if (kernel_uses_smartmips_rixi) { 1546 if (use_bbit_insns()) { 1547 uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); 1548 uasm_i_nop(p); 1549 } else { 1550 uasm_i_andi(p, t, pte, _PAGE_PRESENT); 1551 uasm_il_beqz(p, r, t, lid); 1552 if (pte == t) 1553 /* You lose the SMP race :-(*/ 1554 iPTE_LW(p, pte, ptr); 1555 } 1556 } else { 1557 uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); 1558 uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); 1559 uasm_il_bnez(p, r, t, lid); 1560 if (pte == t) 1561 /* You lose the SMP race :-(*/ 1562 iPTE_LW(p, pte, ptr); 1563 } 1564 } 1565 1566 /* Make PTE valid, store result in PTR. */ 1567 static void __cpuinit 1568 build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, 1569 unsigned int ptr) 1570 { 1571 unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; 1572 1573 iPTE_SW(p, r, pte, ptr, mode); 1574 } 1575 1576 /* 1577 * Check if PTE can be written to, if not branch to LABEL. Regardless 1578 * restore PTE with value from PTR when done. 1579 */ 1580 static void __cpuinit 1581 build_pte_writable(u32 **p, struct uasm_reloc **r, 1582 unsigned int pte, unsigned int ptr, int scratch, 1583 enum label_id lid) 1584 { 1585 int t = scratch >= 0 ? scratch : pte; 1586 1587 uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); 1588 uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); 1589 uasm_il_bnez(p, r, t, lid); 1590 if (pte == t) 1591 /* You lose the SMP race :-(*/ 1592 iPTE_LW(p, pte, ptr); 1593 else 1594 uasm_i_nop(p); 1595 } 1596 1597 /* Make PTE writable, update software status bits as well, then store 1598 * at PTR. 1599 */ 1600 static void __cpuinit 1601 build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, 1602 unsigned int ptr) 1603 { 1604 unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID 1605 | _PAGE_DIRTY); 1606 1607 iPTE_SW(p, r, pte, ptr, mode); 1608 } 1609 1610 /* 1611 * Check if PTE can be modified, if not branch to LABEL. Regardless 1612 * restore PTE with value from PTR when done. 1613 */ 1614 static void __cpuinit 1615 build_pte_modifiable(u32 **p, struct uasm_reloc **r, 1616 unsigned int pte, unsigned int ptr, int scratch, 1617 enum label_id lid) 1618 { 1619 if (use_bbit_insns()) { 1620 uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); 1621 uasm_i_nop(p); 1622 } else { 1623 int t = scratch >= 0 ? scratch : pte; 1624 uasm_i_andi(p, t, pte, _PAGE_WRITE); 1625 uasm_il_beqz(p, r, t, lid); 1626 if (pte == t) 1627 /* You lose the SMP race :-(*/ 1628 iPTE_LW(p, pte, ptr); 1629 } 1630 } 1631 1632 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 1633 1634 1635 /* 1636 * R3000 style TLB load/store/modify handlers. 1637 */ 1638 1639 /* 1640 * This places the pte into ENTRYLO0 and writes it with tlbwi. 1641 * Then it returns. 1642 */ 1643 static void __cpuinit 1644 build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) 1645 { 1646 uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1647 uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ 1648 uasm_i_tlbwi(p); 1649 uasm_i_jr(p, tmp); 1650 uasm_i_rfe(p); /* branch delay */ 1651 } 1652 1653 /* 1654 * This places the pte into ENTRYLO0 and writes it with tlbwi 1655 * or tlbwr as appropriate. This is because the index register 1656 * may have the probe fail bit set as a result of a trap on a 1657 * kseg2 access, i.e. without refill. Then it returns. 1658 */ 1659 static void __cpuinit 1660 build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, 1661 struct uasm_reloc **r, unsigned int pte, 1662 unsigned int tmp) 1663 { 1664 uasm_i_mfc0(p, tmp, C0_INDEX); 1665 uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1666 uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ 1667 uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */ 1668 uasm_i_tlbwi(p); /* cp0 delay */ 1669 uasm_i_jr(p, tmp); 1670 uasm_i_rfe(p); /* branch delay */ 1671 uasm_l_r3000_write_probe_fail(l, *p); 1672 uasm_i_tlbwr(p); /* cp0 delay */ 1673 uasm_i_jr(p, tmp); 1674 uasm_i_rfe(p); /* branch delay */ 1675 } 1676 1677 static void __cpuinit 1678 build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, 1679 unsigned int ptr) 1680 { 1681 long pgdc = (long)pgd_current; 1682 1683 uasm_i_mfc0(p, pte, C0_BADVADDR); 1684 uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */ 1685 uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); 1686 uasm_i_srl(p, pte, pte, 22); /* load delay */ 1687 uasm_i_sll(p, pte, pte, 2); 1688 uasm_i_addu(p, ptr, ptr, pte); 1689 uasm_i_mfc0(p, pte, C0_CONTEXT); 1690 uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */ 1691 uasm_i_andi(p, pte, pte, 0xffc); /* load delay */ 1692 uasm_i_addu(p, ptr, ptr, pte); 1693 uasm_i_lw(p, pte, 0, ptr); 1694 uasm_i_tlbp(p); /* load delay */ 1695 } 1696 1697 static void __cpuinit build_r3000_tlb_load_handler(void) 1698 { 1699 u32 *p = handle_tlbl; 1700 struct uasm_label *l = labels; 1701 struct uasm_reloc *r = relocs; 1702 1703 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1704 memset(labels, 0, sizeof(labels)); 1705 memset(relocs, 0, sizeof(relocs)); 1706 1707 build_r3000_tlbchange_handler_head(&p, K0, K1); 1708 build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); 1709 uasm_i_nop(&p); /* load delay */ 1710 build_make_valid(&p, &r, K0, K1); 1711 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1712 1713 uasm_l_nopage_tlbl(&l, p); 1714 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1715 uasm_i_nop(&p); 1716 1717 if ((p - handle_tlbl) > FASTPATH_SIZE) 1718 panic("TLB load handler fastpath space exceeded"); 1719 1720 uasm_resolve_relocs(relocs, labels); 1721 pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", 1722 (unsigned int)(p - handle_tlbl)); 1723 1724 dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl)); 1725 } 1726 1727 static void __cpuinit build_r3000_tlb_store_handler(void) 1728 { 1729 u32 *p = handle_tlbs; 1730 struct uasm_label *l = labels; 1731 struct uasm_reloc *r = relocs; 1732 1733 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 1734 memset(labels, 0, sizeof(labels)); 1735 memset(relocs, 0, sizeof(relocs)); 1736 1737 build_r3000_tlbchange_handler_head(&p, K0, K1); 1738 build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); 1739 uasm_i_nop(&p); /* load delay */ 1740 build_make_write(&p, &r, K0, K1); 1741 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1742 1743 uasm_l_nopage_tlbs(&l, p); 1744 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1745 uasm_i_nop(&p); 1746 1747 if ((p - handle_tlbs) > FASTPATH_SIZE) 1748 panic("TLB store handler fastpath space exceeded"); 1749 1750 uasm_resolve_relocs(relocs, labels); 1751 pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", 1752 (unsigned int)(p - handle_tlbs)); 1753 1754 dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs)); 1755 } 1756 1757 static void __cpuinit build_r3000_tlb_modify_handler(void) 1758 { 1759 u32 *p = handle_tlbm; 1760 struct uasm_label *l = labels; 1761 struct uasm_reloc *r = relocs; 1762 1763 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 1764 memset(labels, 0, sizeof(labels)); 1765 memset(relocs, 0, sizeof(relocs)); 1766 1767 build_r3000_tlbchange_handler_head(&p, K0, K1); 1768 build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); 1769 uasm_i_nop(&p); /* load delay */ 1770 build_make_write(&p, &r, K0, K1); 1771 build_r3000_pte_reload_tlbwi(&p, K0, K1); 1772 1773 uasm_l_nopage_tlbm(&l, p); 1774 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1775 uasm_i_nop(&p); 1776 1777 if ((p - handle_tlbm) > FASTPATH_SIZE) 1778 panic("TLB modify handler fastpath space exceeded"); 1779 1780 uasm_resolve_relocs(relocs, labels); 1781 pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", 1782 (unsigned int)(p - handle_tlbm)); 1783 1784 dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm)); 1785 } 1786 #endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ 1787 1788 /* 1789 * R4000 style TLB load/store/modify handlers. 1790 */ 1791 static struct work_registers __cpuinit 1792 build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, 1793 struct uasm_reloc **r) 1794 { 1795 struct work_registers wr = build_get_work_registers(p); 1796 1797 #ifdef CONFIG_64BIT 1798 build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */ 1799 #else 1800 build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */ 1801 #endif 1802 1803 #ifdef CONFIG_HUGETLB_PAGE 1804 /* 1805 * For huge tlb entries, pmd doesn't contain an address but 1806 * instead contains the tlb pte. Check the PAGE_HUGE bit and 1807 * see if we need to jump to huge tlb processing. 1808 */ 1809 build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update); 1810 #endif 1811 1812 UASM_i_MFC0(p, wr.r1, C0_BADVADDR); 1813 UASM_i_LW(p, wr.r2, 0, wr.r2); 1814 UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); 1815 uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); 1816 UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); 1817 1818 #ifdef CONFIG_SMP 1819 uasm_l_smp_pgtable_change(l, *p); 1820 #endif 1821 iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ 1822 if (!m4kc_tlbp_war()) 1823 build_tlb_probe_entry(p); 1824 return wr; 1825 } 1826 1827 static void __cpuinit 1828 build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, 1829 struct uasm_reloc **r, unsigned int tmp, 1830 unsigned int ptr) 1831 { 1832 uasm_i_ori(p, ptr, ptr, sizeof(pte_t)); 1833 uasm_i_xori(p, ptr, ptr, sizeof(pte_t)); 1834 build_update_entries(p, tmp, ptr); 1835 build_tlb_write_entry(p, l, r, tlb_indexed); 1836 uasm_l_leave(l, *p); 1837 build_restore_work_registers(p); 1838 uasm_i_eret(p); /* return from trap */ 1839 1840 #ifdef CONFIG_64BIT 1841 build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill); 1842 #endif 1843 } 1844 1845 static void __cpuinit build_r4000_tlb_load_handler(void) 1846 { 1847 u32 *p = handle_tlbl; 1848 struct uasm_label *l = labels; 1849 struct uasm_reloc *r = relocs; 1850 struct work_registers wr; 1851 1852 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1853 memset(labels, 0, sizeof(labels)); 1854 memset(relocs, 0, sizeof(relocs)); 1855 1856 if (bcm1250_m3_war()) { 1857 unsigned int segbits = 44; 1858 1859 uasm_i_dmfc0(&p, K0, C0_BADVADDR); 1860 uasm_i_dmfc0(&p, K1, C0_ENTRYHI); 1861 uasm_i_xor(&p, K0, K0, K1); 1862 uasm_i_dsrl_safe(&p, K1, K0, 62); 1863 uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); 1864 uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); 1865 uasm_i_or(&p, K0, K0, K1); 1866 uasm_il_bnez(&p, &r, K0, label_leave); 1867 /* No need for uasm_i_nop */ 1868 } 1869 1870 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 1871 build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); 1872 if (m4kc_tlbp_war()) 1873 build_tlb_probe_entry(&p); 1874 1875 if (kernel_uses_smartmips_rixi) { 1876 /* 1877 * If the page is not _PAGE_VALID, RI or XI could not 1878 * have triggered it. Skip the expensive test.. 1879 */ 1880 if (use_bbit_insns()) { 1881 uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), 1882 label_tlbl_goaround1); 1883 } else { 1884 uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); 1885 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); 1886 } 1887 uasm_i_nop(&p); 1888 1889 uasm_i_tlbr(&p); 1890 /* Examine entrylo 0 or 1 based on ptr. */ 1891 if (use_bbit_insns()) { 1892 uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); 1893 } else { 1894 uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); 1895 uasm_i_beqz(&p, wr.r3, 8); 1896 } 1897 /* load it in the delay slot*/ 1898 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); 1899 /* load it if ptr is odd */ 1900 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); 1901 /* 1902 * If the entryLo (now in wr.r3) is valid (bit 1), RI or 1903 * XI must have triggered it. 1904 */ 1905 if (use_bbit_insns()) { 1906 uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); 1907 uasm_i_nop(&p); 1908 uasm_l_tlbl_goaround1(&l, p); 1909 } else { 1910 uasm_i_andi(&p, wr.r3, wr.r3, 2); 1911 uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); 1912 uasm_i_nop(&p); 1913 } 1914 uasm_l_tlbl_goaround1(&l, p); 1915 } 1916 build_make_valid(&p, &r, wr.r1, wr.r2); 1917 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 1918 1919 #ifdef CONFIG_HUGETLB_PAGE 1920 /* 1921 * This is the entry point when build_r4000_tlbchange_handler_head 1922 * spots a huge page. 1923 */ 1924 uasm_l_tlb_huge_update(&l, p); 1925 iPTE_LW(&p, wr.r1, wr.r2); 1926 build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); 1927 build_tlb_probe_entry(&p); 1928 1929 if (kernel_uses_smartmips_rixi) { 1930 /* 1931 * If the page is not _PAGE_VALID, RI or XI could not 1932 * have triggered it. Skip the expensive test.. 1933 */ 1934 if (use_bbit_insns()) { 1935 uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), 1936 label_tlbl_goaround2); 1937 } else { 1938 uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); 1939 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); 1940 } 1941 uasm_i_nop(&p); 1942 1943 uasm_i_tlbr(&p); 1944 /* Examine entrylo 0 or 1 based on ptr. */ 1945 if (use_bbit_insns()) { 1946 uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); 1947 } else { 1948 uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); 1949 uasm_i_beqz(&p, wr.r3, 8); 1950 } 1951 /* load it in the delay slot*/ 1952 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); 1953 /* load it if ptr is odd */ 1954 UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); 1955 /* 1956 * If the entryLo (now in wr.r3) is valid (bit 1), RI or 1957 * XI must have triggered it. 1958 */ 1959 if (use_bbit_insns()) { 1960 uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); 1961 } else { 1962 uasm_i_andi(&p, wr.r3, wr.r3, 2); 1963 uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); 1964 } 1965 if (PM_DEFAULT_MASK == 0) 1966 uasm_i_nop(&p); 1967 /* 1968 * We clobbered C0_PAGEMASK, restore it. On the other branch 1969 * it is restored in build_huge_tlb_write_entry. 1970 */ 1971 build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0); 1972 1973 uasm_l_tlbl_goaround2(&l, p); 1974 } 1975 uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); 1976 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 1977 #endif 1978 1979 uasm_l_nopage_tlbl(&l, p); 1980 build_restore_work_registers(&p); 1981 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1982 uasm_i_nop(&p); 1983 1984 if ((p - handle_tlbl) > FASTPATH_SIZE) 1985 panic("TLB load handler fastpath space exceeded"); 1986 1987 uasm_resolve_relocs(relocs, labels); 1988 pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", 1989 (unsigned int)(p - handle_tlbl)); 1990 1991 dump_handler(handle_tlbl, ARRAY_SIZE(handle_tlbl)); 1992 } 1993 1994 static void __cpuinit build_r4000_tlb_store_handler(void) 1995 { 1996 u32 *p = handle_tlbs; 1997 struct uasm_label *l = labels; 1998 struct uasm_reloc *r = relocs; 1999 struct work_registers wr; 2000 2001 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 2002 memset(labels, 0, sizeof(labels)); 2003 memset(relocs, 0, sizeof(relocs)); 2004 2005 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 2006 build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); 2007 if (m4kc_tlbp_war()) 2008 build_tlb_probe_entry(&p); 2009 build_make_write(&p, &r, wr.r1, wr.r2); 2010 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 2011 2012 #ifdef CONFIG_HUGETLB_PAGE 2013 /* 2014 * This is the entry point when 2015 * build_r4000_tlbchange_handler_head spots a huge page. 2016 */ 2017 uasm_l_tlb_huge_update(&l, p); 2018 iPTE_LW(&p, wr.r1, wr.r2); 2019 build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); 2020 build_tlb_probe_entry(&p); 2021 uasm_i_ori(&p, wr.r1, wr.r1, 2022 _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); 2023 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2024 #endif 2025 2026 uasm_l_nopage_tlbs(&l, p); 2027 build_restore_work_registers(&p); 2028 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 2029 uasm_i_nop(&p); 2030 2031 if ((p - handle_tlbs) > FASTPATH_SIZE) 2032 panic("TLB store handler fastpath space exceeded"); 2033 2034 uasm_resolve_relocs(relocs, labels); 2035 pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", 2036 (unsigned int)(p - handle_tlbs)); 2037 2038 dump_handler(handle_tlbs, ARRAY_SIZE(handle_tlbs)); 2039 } 2040 2041 static void __cpuinit build_r4000_tlb_modify_handler(void) 2042 { 2043 u32 *p = handle_tlbm; 2044 struct uasm_label *l = labels; 2045 struct uasm_reloc *r = relocs; 2046 struct work_registers wr; 2047 2048 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 2049 memset(labels, 0, sizeof(labels)); 2050 memset(relocs, 0, sizeof(relocs)); 2051 2052 wr = build_r4000_tlbchange_handler_head(&p, &l, &r); 2053 build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); 2054 if (m4kc_tlbp_war()) 2055 build_tlb_probe_entry(&p); 2056 /* Present and writable bits set, set accessed and dirty bits. */ 2057 build_make_write(&p, &r, wr.r1, wr.r2); 2058 build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); 2059 2060 #ifdef CONFIG_HUGETLB_PAGE 2061 /* 2062 * This is the entry point when 2063 * build_r4000_tlbchange_handler_head spots a huge page. 2064 */ 2065 uasm_l_tlb_huge_update(&l, p); 2066 iPTE_LW(&p, wr.r1, wr.r2); 2067 build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); 2068 build_tlb_probe_entry(&p); 2069 uasm_i_ori(&p, wr.r1, wr.r1, 2070 _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); 2071 build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); 2072 #endif 2073 2074 uasm_l_nopage_tlbm(&l, p); 2075 build_restore_work_registers(&p); 2076 uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 2077 uasm_i_nop(&p); 2078 2079 if ((p - handle_tlbm) > FASTPATH_SIZE) 2080 panic("TLB modify handler fastpath space exceeded"); 2081 2082 uasm_resolve_relocs(relocs, labels); 2083 pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", 2084 (unsigned int)(p - handle_tlbm)); 2085 2086 dump_handler(handle_tlbm, ARRAY_SIZE(handle_tlbm)); 2087 } 2088 2089 void __cpuinit build_tlb_refill_handler(void) 2090 { 2091 /* 2092 * The refill handler is generated per-CPU, multi-node systems 2093 * may have local storage for it. The other handlers are only 2094 * needed once. 2095 */ 2096 static int run_once = 0; 2097 2098 #ifdef CONFIG_64BIT 2099 check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); 2100 #endif 2101 2102 switch (current_cpu_type()) { 2103 case CPU_R2000: 2104 case CPU_R3000: 2105 case CPU_R3000A: 2106 case CPU_R3081E: 2107 case CPU_TX3912: 2108 case CPU_TX3922: 2109 case CPU_TX3927: 2110 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT 2111 build_r3000_tlb_refill_handler(); 2112 if (!run_once) { 2113 build_r3000_tlb_load_handler(); 2114 build_r3000_tlb_store_handler(); 2115 build_r3000_tlb_modify_handler(); 2116 run_once++; 2117 } 2118 #else 2119 panic("No R3000 TLB refill handler"); 2120 #endif 2121 break; 2122 2123 case CPU_R6000: 2124 case CPU_R6000A: 2125 panic("No R6000 TLB refill handler yet"); 2126 break; 2127 2128 case CPU_R8000: 2129 panic("No R8000 TLB refill handler yet"); 2130 break; 2131 2132 default: 2133 if (!run_once) { 2134 scratch_reg = allocate_kscratch(); 2135 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 2136 build_r4000_setup_pgd(); 2137 #endif 2138 build_r4000_tlb_load_handler(); 2139 build_r4000_tlb_store_handler(); 2140 build_r4000_tlb_modify_handler(); 2141 run_once++; 2142 } 2143 build_r4000_tlb_refill_handler(); 2144 } 2145 } 2146 2147 void __cpuinit flush_tlb_handlers(void) 2148 { 2149 local_flush_icache_range((unsigned long)handle_tlbl, 2150 (unsigned long)handle_tlbl + sizeof(handle_tlbl)); 2151 local_flush_icache_range((unsigned long)handle_tlbs, 2152 (unsigned long)handle_tlbs + sizeof(handle_tlbs)); 2153 local_flush_icache_range((unsigned long)handle_tlbm, 2154 (unsigned long)handle_tlbm + sizeof(handle_tlbm)); 2155 #ifdef CONFIG_MIPS_PGD_C0_CONTEXT 2156 local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, 2157 (unsigned long)tlbmiss_handler_setup_pgd + sizeof(handle_tlbm)); 2158 #endif 2159 } 2160