1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Copyright 2020 Joyent, Inc. 26 * 27 * Assembly code support for Cheetah/Cheetah+ modules 28 */ 29 30#include "assym.h" 31 32#include <sys/asm_linkage.h> 33#include <sys/mmu.h> 34#include <vm/hat_sfmmu.h> 35#include <sys/machparam.h> 36#include <sys/machcpuvar.h> 37#include <sys/machthread.h> 38#include <sys/machtrap.h> 39#include <sys/privregs.h> 40#include <sys/trap.h> 41#include <sys/cheetahregs.h> 42#include <sys/us3_module.h> 43#include <sys/xc_impl.h> 44#include <sys/intreg.h> 45#include <sys/async.h> 46#include <sys/clock.h> 47#include <sys/cheetahasm.h> 48#include <sys/cmpregs.h> 49 50#ifdef TRAPTRACE 51#include <sys/traptrace.h> 52#endif /* TRAPTRACE */ 53 54/* BEGIN CSTYLED */ 55 56#define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \ 57 ldxa [%g0]ASI_DCU, tmp1 ;\ 58 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\ 59 bz,pn %icc, 1f ;\ 60 ASM_LD(tmp1, dcache_linesize) ;\ 61 ASM_LD(tmp2, dflush_type) ;\ 62 cmp tmp2, FLUSHPAGE_TYPE ;\ 63 be,pt %icc, 2f ;\ 64 nop ;\ 65 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\ 66 ASM_LD(tmp3, dcache_size) ;\ 67 cmp tmp2, FLUSHMATCH_TYPE ;\ 68 be,pt %icc, 3f ;\ 69 nop ;\ 70 /* \ 71 * flushtype = FLUSHALL_TYPE, flush the whole thing \ 72 * tmp3 = cache size \ 73 * tmp1 = cache line size \ 74 */ \ 75 sub tmp3, tmp1, tmp2 ;\ 764: \ 77 stxa %g0, [tmp2]ASI_DC_TAG ;\ 78 membar #Sync ;\ 79 cmp %g0, tmp2 ;\ 80 bne,pt %icc, 4b ;\ 81 sub tmp2, tmp1, tmp2 ;\ 82 ba,pt %icc, 1f ;\ 83 nop ;\ 84 /* \ 85 * flushtype = FLUSHPAGE_TYPE \ 86 * arg1 = pfn \ 87 * arg2 = virtual color \ 88 * tmp1 = cache line size \ 89 * tmp2 = tag from cache \ 90 * tmp3 = counter \ 91 */ \ 922: \ 93 set MMU_PAGESIZE, tmp3 ;\ 94 sllx arg1, MMU_PAGESHIFT, arg1 /* pfn to 43 bit PA */ ;\ 95 sub tmp3, tmp1, tmp3 ;\ 964: \ 97 stxa %g0, [arg1 + tmp3]ASI_DC_INVAL ;\ 98 membar #Sync ;\ 995: \ 100 cmp %g0, tmp3 ;\ 101 bnz,pt %icc, 4b /* branch if not done */ ;\ 102 sub tmp3, tmp1, tmp3 ;\ 103 ba,pt %icc, 1f ;\ 104 nop ;\ 105 /* \ 106 * flushtype = FLUSHMATCH_TYPE \ 107 * arg1 = tag to compare against \ 108 * tmp1 = cache line size \ 109 * tmp3 = cache size \ 110 * arg2 = counter \ 111 * tmp2 = cache tag \ 112 */ \ 1133: \ 114 sub tmp3, tmp1, arg2 ;\ 1154: \ 116 ldxa [arg2]ASI_DC_TAG, tmp2 /* read tag */ ;\ 117 btst CHEETAH_DC_VBIT_MASK, tmp2 ;\ 118 bz,pn %icc, 5f /* br if no valid sub-blocks */ ;\ 119 andn tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\ 120 cmp tmp2, arg1 ;\ 121 bne,pn %icc, 5f /* branch if tag miss */ ;\ 122 nop ;\ 123 stxa %g0, [arg2]ASI_DC_TAG ;\ 124 membar #Sync ;\ 1255: \ 126 cmp %g0, arg2 ;\ 127 bne,pt %icc, 4b /* branch if not done */ ;\ 128 sub arg2, tmp1, arg2 ;\ 1291: 130 131/* 132 * macro that flushes the entire dcache color 133 * dcache size = 64K, one way 16K 134 * 135 * In: 136 * arg = virtual color register (not clobbered) 137 * way = way#, can either be a constant or a register (not clobbered) 138 * tmp1, tmp2, tmp3 = scratch registers 139 * 140 */ 141#define DCACHE_FLUSHCOLOR(arg, way, tmp1, tmp2, tmp3) \ 142 ldxa [%g0]ASI_DCU, tmp1; \ 143 btst DCU_DC, tmp1; /* is dcache enabled? */ \ 144 bz,pn %icc, 1f; \ 145 ASM_LD(tmp1, dcache_linesize) \ 146 /* \ 147 * arg = virtual color \ 148 * tmp1 = cache line size \ 149 */ \ 150 sllx arg, MMU_PAGESHIFT, tmp2; /* color to dcache page */ \ 151 mov way, tmp3; \ 152 sllx tmp3, 14, tmp3; /* One way 16K */ \ 153 or tmp2, tmp3, tmp3; \ 154 set MMU_PAGESIZE, tmp2; \ 155 /* \ 156 * tmp2 = page size \ 157 * tmp3 = cached page in dcache \ 158 */ \ 159 sub tmp2, tmp1, tmp2; \ 1602: \ 161 stxa %g0, [tmp3 + tmp2]ASI_DC_TAG; \ 162 membar #Sync; \ 163 cmp %g0, tmp2; \ 164 bne,pt %icc, 2b; \ 165 sub tmp2, tmp1, tmp2; \ 1661: 167 168/* END CSTYLED */ 169 170/* 171 * Cheetah MMU and Cache operations. 172 */ 173 174 ENTRY_NP(vtag_flushpage) 175 /* 176 * flush page from the tlb 177 * 178 * %o0 = vaddr 179 * %o1 = sfmmup 180 */ 181 rdpr %pstate, %o5 182#ifdef DEBUG 183 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1) 184#endif /* DEBUG */ 185 /* 186 * disable ints 187 */ 188 andn %o5, PSTATE_IE, %o4 189 wrpr %o4, 0, %pstate 190 191 /* 192 * Then, blow out the tlb 193 * Interrupts are disabled to prevent the primary ctx register 194 * from changing underneath us. 195 */ 196 sethi %hi(ksfmmup), %o3 197 ldx [%o3 + %lo(ksfmmup)], %o3 198 cmp %o3, %o1 199 bne,pt %xcc, 1f ! if not kernel as, go to 1 200 sethi %hi(FLUSH_ADDR), %o3 201 /* 202 * For Kernel demaps use primary. type = page implicitly 203 */ 204 stxa %g0, [%o0]ASI_DTLB_DEMAP /* dmmu flush for KCONTEXT */ 205 stxa %g0, [%o0]ASI_ITLB_DEMAP /* immu flush for KCONTEXT */ 206 flush %o3 207 retl 208 wrpr %g0, %o5, %pstate /* enable interrupts */ 2091: 210 /* 211 * User demap. We need to set the primary context properly. 212 * Secondary context cannot be used for Cheetah IMMU. 213 * %o0 = vaddr 214 * %o1 = sfmmup 215 * %o3 = FLUSH_ADDR 216 */ 217 SFMMU_CPU_CNUM(%o1, %g1, %g2) ! %g1 = sfmmu cnum on this CPU 218 219 ldub [%o1 + SFMMU_CEXT], %o4 ! %o4 = sfmmup->sfmmu_cext 220 sll %o4, CTXREG_EXT_SHIFT, %o4 221 or %g1, %o4, %g1 ! %g1 = primary pgsz | cnum 222 223 wrpr %g0, 1, %tl 224 set MMU_PCONTEXT, %o4 225 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0 226 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum 227 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz 228 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz 229 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum 230 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum 231 232 stxa %g0, [%o0]ASI_DTLB_DEMAP 233 stxa %g0, [%o0]ASI_ITLB_DEMAP 234 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */ 235 flush %o3 236 wrpr %g0, 0, %tl 237 238 retl 239 wrpr %g0, %o5, %pstate /* enable interrupts */ 240 SET_SIZE(vtag_flushpage) 241 242 ENTRY_NP2(vtag_flushall, demap_all) 243 /* 244 * flush the tlb 245 */ 246 sethi %hi(FLUSH_ADDR), %o3 247 set DEMAP_ALL_TYPE, %g1 248 stxa %g0, [%g1]ASI_DTLB_DEMAP 249 stxa %g0, [%g1]ASI_ITLB_DEMAP 250 flush %o3 251 retl 252 nop 253 SET_SIZE(demap_all) 254 SET_SIZE(vtag_flushall) 255 256 257 ENTRY_NP(vtag_flushpage_tl1) 258 /* 259 * x-trap to flush page from tlb and tsb 260 * 261 * %g1 = vaddr, zero-extended on 32-bit kernel 262 * %g2 = sfmmup 263 * 264 * assumes TSBE_TAG = 0 265 */ 266 srln %g1, MMU_PAGESHIFT, %g1 267 268 sethi %hi(ksfmmup), %g3 269 ldx [%g3 + %lo(ksfmmup)], %g3 270 cmp %g3, %g2 271 bne,pt %xcc, 1f ! if not kernel as, go to 1 272 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */ 273 274 /* We need to demap in the kernel context */ 275 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1 276 stxa %g0, [%g1]ASI_DTLB_DEMAP 277 stxa %g0, [%g1]ASI_ITLB_DEMAP 278 retry 2791: 280 /* We need to demap in a user context */ 281 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1 282 283 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU 284 285 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext 286 sll %g4, CTXREG_EXT_SHIFT, %g4 287 or %g6, %g4, %g6 ! %g6 = pgsz | cnum 288 289 set MMU_PCONTEXT, %g4 290 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */ 291 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */ 292 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */ 293 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */ 294 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */ 295 stxa %g0, [%g1]ASI_DTLB_DEMAP 296 stxa %g0, [%g1]ASI_ITLB_DEMAP 297 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */ 298 retry 299 SET_SIZE(vtag_flushpage_tl1) 300 301 302 ENTRY_NP(vtag_flush_pgcnt_tl1) 303 /* 304 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb 305 * 306 * %g1 = vaddr, zero-extended on 32-bit kernel 307 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits. 308 * 309 * NOTE: this handler relies on the fact that no 310 * interrupts or traps can occur during the loop 311 * issuing the TLB_DEMAP operations. It is assumed 312 * that interrupts are disabled and this code is 313 * fetching from the kernel locked text address. 314 * 315 * assumes TSBE_TAG = 0 316 */ 317 set SFMMU_PGCNT_MASK, %g4 318 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */ 319 add %g3, 1, %g3 /* g3 = pgcnt */ 320 321 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */ 322 srln %g1, MMU_PAGESHIFT, %g1 323 324 sethi %hi(ksfmmup), %g4 325 ldx [%g4 + %lo(ksfmmup)], %g4 326 cmp %g4, %g2 327 bne,pn %xcc, 1f /* if not kernel as, go to 1 */ 328 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */ 329 330 /* We need to demap in the kernel context */ 331 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1 332 set MMU_PAGESIZE, %g2 /* g2 = pgsize */ 333 sethi %hi(FLUSH_ADDR), %g5 3344: 335 stxa %g0, [%g1]ASI_DTLB_DEMAP 336 stxa %g0, [%g1]ASI_ITLB_DEMAP 337 flush %g5 ! flush required by immu 338 339 deccc %g3 /* decr pgcnt */ 340 bnz,pt %icc,4b 341 add %g1, %g2, %g1 /* next page */ 342 retry 3431: 344 /* 345 * We need to demap in a user context 346 * 347 * g2 = sfmmup 348 * g3 = pgcnt 349 */ 350 SFMMU_CPU_CNUM(%g2, %g5, %g6) ! %g5 = sfmmu cnum on this CPU 351 352 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1 353 354 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext 355 sll %g4, CTXREG_EXT_SHIFT, %g4 356 or %g5, %g4, %g5 357 358 set MMU_PCONTEXT, %g4 359 ldxa [%g4]ASI_DMMU, %g6 /* rd old ctxnum */ 360 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */ 361 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */ 362 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */ 363 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */ 364 365 set MMU_PAGESIZE, %g2 /* g2 = pgsize */ 366 sethi %hi(FLUSH_ADDR), %g5 3673: 368 stxa %g0, [%g1]ASI_DTLB_DEMAP 369 stxa %g0, [%g1]ASI_ITLB_DEMAP 370 flush %g5 ! flush required by immu 371 372 deccc %g3 /* decr pgcnt */ 373 bnz,pt %icc,3b 374 add %g1, %g2, %g1 /* next page */ 375 376 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */ 377 retry 378 SET_SIZE(vtag_flush_pgcnt_tl1) 379 380 ENTRY_NP(vtag_flushall_tl1) 381 /* 382 * x-trap to flush tlb 383 */ 384 set DEMAP_ALL_TYPE, %g4 385 stxa %g0, [%g4]ASI_DTLB_DEMAP 386 stxa %g0, [%g4]ASI_ITLB_DEMAP 387 retry 388 SET_SIZE(vtag_flushall_tl1) 389 390 391/* 392 * vac_flushpage(pfnum, color) 393 * Flush 1 8k page of the D-$ with physical page = pfnum 394 * Algorithm: 395 * The cheetah dcache is a 64k psuedo 4 way accaociative cache. 396 * It is virtual indexed, physically tagged cache. 397 */ 398 .seg ".data" 399 .align 8 400 .global dflush_type 401dflush_type: 402 .word FLUSHPAGE_TYPE 403 404 ENTRY(vac_flushpage) 405 /* 406 * flush page from the d$ 407 * 408 * %o0 = pfnum, %o1 = color 409 */ 410 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4) 411 retl 412 nop 413 SET_SIZE(vac_flushpage) 414 415 416 ENTRY_NP(vac_flushpage_tl1) 417 /* 418 * x-trap to flush page from the d$ 419 * 420 * %g1 = pfnum, %g2 = color 421 */ 422 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5) 423 retry 424 SET_SIZE(vac_flushpage_tl1) 425 426 427 ENTRY(vac_flushcolor) 428 /* 429 * %o0 = vcolor 430 */ 431 DCACHE_FLUSHCOLOR(%o0, 0, %o1, %o2, %o3) 432 DCACHE_FLUSHCOLOR(%o0, 1, %o1, %o2, %o3) 433 DCACHE_FLUSHCOLOR(%o0, 2, %o1, %o2, %o3) 434 DCACHE_FLUSHCOLOR(%o0, 3, %o1, %o2, %o3) 435 retl 436 nop 437 SET_SIZE(vac_flushcolor) 438 439 440 ENTRY(vac_flushcolor_tl1) 441 /* 442 * %g1 = vcolor 443 */ 444 DCACHE_FLUSHCOLOR(%g1, 0, %g2, %g3, %g4) 445 DCACHE_FLUSHCOLOR(%g1, 1, %g2, %g3, %g4) 446 DCACHE_FLUSHCOLOR(%g1, 2, %g2, %g3, %g4) 447 DCACHE_FLUSHCOLOR(%g1, 3, %g2, %g3, %g4) 448 retry 449 SET_SIZE(vac_flushcolor_tl1) 450 451/* 452 * Determine whether or not the IDSR is busy. 453 * Entry: no arguments 454 * Returns: 1 if busy, 0 otherwise 455 */ 456 ENTRY(idsr_busy) 457 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1 458 clr %o0 459 btst IDSR_BUSY, %g1 460 bz,a,pt %xcc, 1f 461 mov 1, %o0 4621: 463 retl 464 nop 465 SET_SIZE(idsr_busy) 466 467 .global _dispatch_status_busy 468_dispatch_status_busy: 469 .asciz "ASI_INTR_DISPATCH_STATUS error: busy" 470 .align 4 471 472/* 473 * Setup interrupt dispatch data registers 474 * Entry: 475 * %o0 - function or inumber to call 476 * %o1, %o2 - arguments (2 uint64_t's) 477 */ 478 .seg "text" 479 480 ENTRY(init_mondo) 481#ifdef DEBUG 482 ! 483 ! IDSR should not be busy at the moment 484 ! 485 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1 486 btst IDSR_BUSY, %g1 487 bz,pt %xcc, 1f 488 nop 489 sethi %hi(_dispatch_status_busy), %o0 490 call panic 491 or %o0, %lo(_dispatch_status_busy), %o0 492#endif /* DEBUG */ 493 494 ALTENTRY(init_mondo_nocheck) 495 ! 496 ! interrupt vector dispatch data reg 0 497 ! 4981: 499 mov IDDR_0, %g1 500 mov IDDR_1, %g2 501 mov IDDR_2, %g3 502 stxa %o0, [%g1]ASI_INTR_DISPATCH 503 504 ! 505 ! interrupt vector dispatch data reg 1 506 ! 507 stxa %o1, [%g2]ASI_INTR_DISPATCH 508 509 ! 510 ! interrupt vector dispatch data reg 2 511 ! 512 stxa %o2, [%g3]ASI_INTR_DISPATCH 513 514 membar #Sync 515 retl 516 nop 517 SET_SIZE(init_mondo_nocheck) 518 SET_SIZE(init_mondo) 519 520 521#if !(defined(JALAPENO) || defined(SERRANO)) 522 523/* 524 * Ship mondo to aid using busy/nack pair bn 525 */ 526 ENTRY_NP(shipit) 527 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id 528 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair 529 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70 530 or %g1, %g2, %g1 531 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch 532 membar #Sync 533 retl 534 nop 535 SET_SIZE(shipit) 536 537#endif /* !(JALAPENO || SERRANO) */ 538 539 540/* 541 * flush_instr_mem: 542 * Flush 1 page of the I-$ starting at vaddr 543 * %o0 vaddr 544 * %o1 bytes to be flushed 545 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with 546 * the stores from all processors so that a FLUSH instruction is only needed 547 * to ensure pipeline is consistent. This means a single flush is sufficient at 548 * the end of a sequence of stores that updates the instruction stream to 549 * ensure correct operation. 550 */ 551 552 ENTRY(flush_instr_mem) 553 flush %o0 ! address irrelevant 554 retl 555 nop 556 SET_SIZE(flush_instr_mem) 557 558 559#if defined(CPU_IMP_ECACHE_ASSOC) 560 561 ENTRY(get_ecache_ctrl) 562 GET_CPU_IMPL(%o0) 563 cmp %o0, JAGUAR_IMPL 564 ! 565 ! Putting an ASI access in the delay slot may 566 ! cause it to be accessed, even when annulled. 567 ! 568 bne 1f 569 nop 570 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg 571 b 2f 572 nop 5731: 574 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg 5752: 576 retl 577 nop 578 SET_SIZE(get_ecache_ctrl) 579 580#endif /* CPU_IMP_ECACHE_ASSOC */ 581 582 583#if !(defined(JALAPENO) || defined(SERRANO)) 584 585/* 586 * flush_ecache: 587 * %o0 - 64 bit physical address 588 * %o1 - ecache size 589 * %o2 - ecache linesize 590 */ 591 592 ENTRY(flush_ecache) 593 594 /* 595 * For certain CPU implementations, we have to flush the L2 cache 596 * before flushing the ecache. 597 */ 598 PN_L2_FLUSHALL(%g3, %g4, %g5) 599 600 /* 601 * Flush the entire Ecache using displacement flush. 602 */ 603 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4) 604 605 retl 606 nop 607 SET_SIZE(flush_ecache) 608 609#endif /* !(JALAPENO || SERRANO) */ 610 611 612 ENTRY(flush_dcache) 613 ASM_LD(%o0, dcache_size) 614 ASM_LD(%o1, dcache_linesize) 615 CH_DCACHE_FLUSHALL(%o0, %o1, %o2) 616 retl 617 nop 618 SET_SIZE(flush_dcache) 619 620 621 ENTRY(flush_icache) 622 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1); 623 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1 624 ba,pt %icc, 2f 625 ld [%o0 + CHPR_ICACHE_SIZE], %o0 626flush_icache_1: 627 ASM_LD(%o0, icache_size) 628 ASM_LD(%o1, icache_linesize) 6292: 630 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4) 631 retl 632 nop 633 SET_SIZE(flush_icache) 634 635 ENTRY(kdi_flush_idcache) 636 CH_DCACHE_FLUSHALL(%o0, %o1, %g1) 637 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2) 638 membar #Sync 639 retl 640 nop 641 SET_SIZE(kdi_flush_idcache) 642 643 ENTRY(flush_pcache) 644 PCACHE_FLUSHALL(%o0, %o1, %o2) 645 retl 646 nop 647 SET_SIZE(flush_pcache) 648 649 650#if defined(CPU_IMP_L1_CACHE_PARITY) 651 652/* 653 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t 654 * structure (see cheetahregs.h): 655 * The Dcache *should* be turned off when this code is executed. 656 */ 657 .align 128 658 ENTRY(get_dcache_dtag) 659 rdpr %pstate, %o5 660 andn %o5, PSTATE_IE | PSTATE_AM, %o3 661 wrpr %g0, %o3, %pstate 662 b 1f 663 stx %o0, [%o1 + CH_DC_IDX] 664 665 .align 128 6661: 667 ldxa [%o0]ASI_DC_TAG, %o2 668 stx %o2, [%o1 + CH_DC_TAG] 669 membar #Sync 670 ldxa [%o0]ASI_DC_UTAG, %o2 671 membar #Sync 672 stx %o2, [%o1 + CH_DC_UTAG] 673 ldxa [%o0]ASI_DC_SNP_TAG, %o2 674 stx %o2, [%o1 + CH_DC_SNTAG] 675 add %o1, CH_DC_DATA, %o1 676 clr %o3 6772: 678 membar #Sync ! required before ASI_DC_DATA 679 ldxa [%o0 + %o3]ASI_DC_DATA, %o2 680 membar #Sync ! required after ASI_DC_DATA 681 stx %o2, [%o1 + %o3] 682 cmp %o3, CH_DC_DATA_REG_SIZE - 8 683 blt 2b 684 add %o3, 8, %o3 685 686 /* 687 * Unlike other CPUs in the family, D$ data parity bits for Panther 688 * do not reside in the microtag. Instead, we have to read them 689 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead 690 * of just having 8 parity bits to protect all 32 bytes of data 691 * per line, we now have 32 bits of parity. 692 */ 693 GET_CPU_IMPL(%o3) 694 cmp %o3, PANTHER_IMPL 695 bne 4f 696 clr %o3 697 698 /* 699 * move our pointer to the next field where we store parity bits 700 * and add the offset of the last parity byte since we will be 701 * storing all 4 parity bytes within one 64 bit field like this: 702 * 703 * +------+------------+------------+------------+------------+ 704 * | - | DC_parity | DC_parity | DC_parity | DC_parity | 705 * | - | for word 3 | for word 2 | for word 1 | for word 0 | 706 * +------+------------+------------+------------+------------+ 707 * 63:32 31:24 23:16 15:8 7:0 708 */ 709 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1 710 711 /* add the DC_data_parity bit into our working index */ 712 mov 1, %o2 713 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2 714 or %o0, %o2, %o0 7153: 716 membar #Sync ! required before ASI_DC_DATA 717 ldxa [%o0 + %o3]ASI_DC_DATA, %o2 718 membar #Sync ! required after ASI_DC_DATA 719 stb %o2, [%o1] 720 dec %o1 721 cmp %o3, CH_DC_DATA_REG_SIZE - 8 722 blt 3b 723 add %o3, 8, %o3 7244: 725 retl 726 wrpr %g0, %o5, %pstate 727 SET_SIZE(get_dcache_dtag) 728 729 730/* 731 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t 732 * structure (see cheetahregs.h): 733 * The Icache *Must* be turned off when this function is called. 734 * This is because diagnostic accesses to the Icache interfere with cache 735 * consistency. 736 */ 737 .align 128 738 ENTRY(get_icache_dtag) 739 rdpr %pstate, %o5 740 andn %o5, PSTATE_IE | PSTATE_AM, %o3 741 wrpr %g0, %o3, %pstate 742 743 stx %o0, [%o1 + CH_IC_IDX] 744 ldxa [%o0]ASI_IC_TAG, %o2 745 stx %o2, [%o1 + CH_IC_PATAG] 746 add %o0, CH_ICTAG_UTAG, %o0 747 ldxa [%o0]ASI_IC_TAG, %o2 748 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0 749 stx %o2, [%o1 + CH_IC_UTAG] 750 ldxa [%o0]ASI_IC_TAG, %o2 751 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0 752 stx %o2, [%o1 + CH_IC_UPPER] 753 ldxa [%o0]ASI_IC_TAG, %o2 754 andn %o0, CH_ICTAG_TMASK, %o0 755 stx %o2, [%o1 + CH_IC_LOWER] 756 ldxa [%o0]ASI_IC_SNP_TAG, %o2 757 stx %o2, [%o1 + CH_IC_SNTAG] 758 add %o1, CH_IC_DATA, %o1 759 clr %o3 7602: 761 ldxa [%o0 + %o3]ASI_IC_DATA, %o2 762 stx %o2, [%o1 + %o3] 763 cmp %o3, PN_IC_DATA_REG_SIZE - 8 764 blt 2b 765 add %o3, 8, %o3 766 767 retl 768 wrpr %g0, %o5, %pstate 769 SET_SIZE(get_icache_dtag) 770 771/* 772 * Get pcache data and tags. 773 * inputs: 774 * pcache_idx - fully constructed VA for for accessing P$ diagnostic 775 * registers. Contains PC_way and PC_addr shifted into 776 * the correct bit positions. See the PRM for more details. 777 * data - pointer to a ch_pc_data_t 778 * structure (see cheetahregs.h): 779 */ 780 .align 128 781 ENTRY(get_pcache_dtag) 782 rdpr %pstate, %o5 783 andn %o5, PSTATE_IE | PSTATE_AM, %o3 784 wrpr %g0, %o3, %pstate 785 786 stx %o0, [%o1 + CH_PC_IDX] 787 ldxa [%o0]ASI_PC_STATUS_DATA, %o2 788 stx %o2, [%o1 + CH_PC_STATUS] 789 ldxa [%o0]ASI_PC_TAG, %o2 790 stx %o2, [%o1 + CH_PC_TAG] 791 ldxa [%o0]ASI_PC_SNP_TAG, %o2 792 stx %o2, [%o1 + CH_PC_SNTAG] 793 add %o1, CH_PC_DATA, %o1 794 clr %o3 7952: 796 ldxa [%o0 + %o3]ASI_PC_DATA, %o2 797 stx %o2, [%o1 + %o3] 798 cmp %o3, CH_PC_DATA_REG_SIZE - 8 799 blt 2b 800 add %o3, 8, %o3 801 802 retl 803 wrpr %g0, %o5, %pstate 804 SET_SIZE(get_pcache_dtag) 805 806#endif /* CPU_IMP_L1_CACHE_PARITY */ 807 808/* 809 * re-enable the i$, d$, w$, and p$ according to bootup cache state. 810 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE. 811 * %o0 - 64 bit constant 812 */ 813 ENTRY(set_dcu) 814 stxa %o0, [%g0]ASI_DCU ! Store to DCU 815 flush %g0 /* flush required after changing the IC bit */ 816 retl 817 nop 818 SET_SIZE(set_dcu) 819 820 821/* 822 * Return DCU register. 823 */ 824 ENTRY(get_dcu) 825 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */ 826 retl 827 nop 828 SET_SIZE(get_dcu) 829 830/* 831 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry. 832 * 833 * This handler is used to check for softints generated by error trap 834 * handlers to report errors. On Cheetah, this mechanism is used by the 835 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast 836 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers. 837 * NB: Must be 8 instructions or less to fit in trap table and code must 838 * be relocatable. 839 */ 840 841 ENTRY_NP(ch_pil15_interrupt_instr) 842 ASM_JMP(%g1, ch_pil15_interrupt) 843 SET_SIZE(ch_pil15_interrupt_instr) 844 845 846 ENTRY_NP(ch_pil15_interrupt) 847 848 /* 849 * Since pil_interrupt is hacked to assume that every level 15 850 * interrupt is generated by the CPU to indicate a performance 851 * counter overflow this gets ugly. Before calling pil_interrupt 852 * the Error at TL>0 pending status is inspected. If it is 853 * non-zero, then an error has occurred and it is handled. 854 * Otherwise control is transfered to pil_interrupt. Note that if 855 * an error is detected pil_interrupt will not be called and 856 * overflow interrupts may be lost causing erroneous performance 857 * measurements. However, error-recovery will have a detrimental 858 * effect on performance anyway. 859 */ 860 CPU_INDEX(%g1, %g4) 861 set ch_err_tl1_pending, %g4 862 ldub [%g1 + %g4], %g2 863 brz %g2, 1f 864 nop 865 866 /* 867 * We have a pending TL>0 error, clear the TL>0 pending status. 868 */ 869 stb %g0, [%g1 + %g4] 870 871 /* 872 * Clear the softint. 873 */ 874 mov 1, %g5 875 sll %g5, PIL_15, %g5 876 wr %g5, CLEAR_SOFTINT 877 878 /* 879 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15 880 * to process the Fast ECC/Cache Parity at TL>0 error. Clear 881 * panic flag (%g2). 882 */ 883 set cpu_tl1_error, %g1 884 clr %g2 885 ba sys_trap 886 mov PIL_15, %g4 887 8881: 889 /* 890 * The logout is invalid. 891 * 892 * Call the default interrupt handler. 893 */ 894 sethi %hi(pil_interrupt), %g1 895 jmp %g1 + %lo(pil_interrupt) 896 mov PIL_15, %g4 897 898 SET_SIZE(ch_pil15_interrupt) 899 900 901/* 902 * Error Handling 903 * 904 * Cheetah provides error checking for all memory access paths between 905 * the CPU, External Cache, Cheetah Data Switch and system bus. Error 906 * information is logged in the AFSR, (also AFSR_EXT for Panther) and 907 * AFAR and one of the following traps is generated (provided that it 908 * is enabled in External Cache Error Enable Register) to handle that 909 * error: 910 * 1. trap 0x70: Precise trap 911 * tt0_fecc for errors at trap level(TL)>=0 912 * 2. trap 0x0A and 0x32: Deferred trap 913 * async_err for errors at TL>=0 914 * 3. trap 0x63: Disrupting trap 915 * ce_err for errors at TL=0 916 * (Note that trap 0x63 cannot happen at trap level > 0) 917 * 918 * Trap level one handlers panic the system except for the fast ecc 919 * error handler which tries to recover from certain errors. 920 */ 921 922/* 923 * FAST ECC TRAP STRATEGY: 924 * 925 * Software must handle single and multi bit errors which occur due to data 926 * or instruction cache reads from the external cache. A single or multi bit 927 * error occuring in one of these situations results in a precise trap. 928 * 929 * The basic flow of this trap handler is as follows: 930 * 931 * 1) Record the state and then turn off the Dcache and Icache. The Dcache 932 * is disabled because bad data could have been installed. The Icache is 933 * turned off because we want to capture the Icache line related to the 934 * AFAR. 935 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing. 936 * 3) Park sibling core if caches are shared (to avoid race condition while 937 * accessing shared resources such as L3 data staging register during 938 * CPU logout. 939 * 4) Read the AFAR and AFSR. 940 * 5) If CPU logout structure is not being used, then: 941 * 6) Clear all errors from the AFSR. 942 * 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure. 943 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous 944 * state. 945 * 9) Unpark sibling core if we parked it earlier. 946 * 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already 947 * running at PIL 15. 948 * 6) Otherwise, if CPU logout structure is being used: 949 * 7) Incriment the "logout busy count". 950 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous 951 * state. 952 * 9) Unpark sibling core if we parked it earlier. 953 * 10) Issue a retry since the other CPU error logging code will end up 954 * finding this error bit and logging information about it later. 955 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not 956 * yet initialized such that we can't even check the logout struct, then 957 * we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and 958 * call cpu_fast_ecc_error via systrap. The clo_flags parameter is used 959 * to determine information such as TL, TT, CEEN and NCEEN settings, etc 960 * in the high level trap handler since we don't have access to detailed 961 * logout information in cases where the cpu_private struct is not yet 962 * initialized. 963 * 964 * We flush the E$ and D$ here on TL=1 code to prevent getting nested 965 * Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in 966 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which, 967 * since it is uses different code/data from this handler, has a better 968 * chance of fixing things up than simply recursing through this code 969 * again (this would probably cause an eventual kernel stack overflow). 970 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it 971 * can flush the E$ (or the error is a stuck-at bit), we will recurse in 972 * the Fast ECC at TL>0 handler and eventually Red Mode. 973 * 974 * Note that for Cheetah (and only Cheetah), we use alias addresses for 975 * flushing rather than ASI accesses (which don't exist on Cheetah). 976 * Should we encounter a Fast ECC error within this handler on Cheetah, 977 * there's a good chance it's within the ecache_flushaddr buffer (since 978 * it's the largest piece of memory we touch in the handler and it is 979 * usually kernel text/data). For that reason the Fast ECC at TL>0 980 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr. 981 */ 982 983/* 984 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0 985 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various 986 * architecture-specific files. 987 * NB: Must be 8 instructions or less to fit in trap table and code must 988 * be relocatable. 989 */ 990 991 ENTRY_NP(fecc_err_instr) 992 membar #Sync ! Cheetah requires membar #Sync 993 994 /* 995 * Save current DCU state. Turn off the Dcache and Icache. 996 */ 997 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 998 andn %g1, DCU_DC + DCU_IC, %g4 999 stxa %g4, [%g0]ASI_DCU 1000 flush %g0 /* flush required after changing the IC bit */ 1001 1002 ASM_JMP(%g4, fast_ecc_err) 1003 SET_SIZE(fecc_err_instr) 1004 1005 1006#if !(defined(JALAPENO) || defined(SERRANO)) 1007 1008 .section ".text" 1009 .align 64 1010 ENTRY_NP(fast_ecc_err) 1011 1012 /* 1013 * Turn off CEEN and NCEEN. 1014 */ 1015 ldxa [%g0]ASI_ESTATE_ERR, %g3 1016 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4 1017 stxa %g4, [%g0]ASI_ESTATE_ERR 1018 membar #Sync ! membar sync required 1019 1020 /* 1021 * Check to see whether we need to park our sibling core 1022 * before recording diagnostic information from caches 1023 * which may be shared by both cores. 1024 * We use %g1 to store information about whether or not 1025 * we had to park the core (%g1 holds our DCUCR value and 1026 * we only use bits from that register which are "reserved" 1027 * to keep track of core parking) so that we know whether 1028 * or not to unpark later. %g5 and %g4 are scratch registers. 1029 */ 1030 PARK_SIBLING_CORE(%g1, %g5, %g4) 1031 1032 /* 1033 * Do the CPU log out capture. 1034 * %g3 = "failed?" return value. 1035 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1036 * into this macro via %g4. Output only valid if cpu_private 1037 * struct has not been initialized. 1038 * CHPR_FECCTL0_LOGOUT = cpu logout structure offset input 1039 * %g4 = Trap information stored in the cpu logout flags field 1040 * %g5 = scr1 1041 * %g6 = scr2 1042 * %g3 = scr3 1043 * %g4 = scr4 1044 */ 1045 /* store the CEEN and NCEEN values, TL=0 */ 1046 and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4 1047 set CHPR_FECCTL0_LOGOUT, %g6 1048 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1049 1050 /* 1051 * Flush the Ecache (and L2 cache for Panther) to get the error out 1052 * of the Ecache. If the UCC or UCU is on a dirty line, then the 1053 * following flush will turn that into a WDC or WDU, respectively. 1054 */ 1055 PN_L2_FLUSHALL(%g4, %g5, %g6) 1056 1057 CPU_INDEX(%g4, %g5) 1058 mulx %g4, CPU_NODE_SIZE, %g4 1059 set cpunodes, %g5 1060 add %g4, %g5, %g4 1061 ld [%g4 + ECACHE_LINESIZE], %g5 1062 ld [%g4 + ECACHE_SIZE], %g4 1063 1064 ASM_LDX(%g6, ecache_flushaddr) 1065 ECACHE_FLUSHALL(%g4, %g5, %g6, %g7) 1066 1067 /* 1068 * Flush the Dcache. Since bad data could have been installed in 1069 * the Dcache we must flush it before re-enabling it. 1070 */ 1071 ASM_LD(%g5, dcache_size) 1072 ASM_LD(%g6, dcache_linesize) 1073 CH_DCACHE_FLUSHALL(%g5, %g6, %g7) 1074 1075 /* 1076 * Flush the Icache. Since we turned off the Icache to capture the 1077 * Icache line it is now stale or corrupted and we must flush it 1078 * before re-enabling it. 1079 */ 1080 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5); 1081 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1082 ba,pt %icc, 6f 1083 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1084fast_ecc_err_5: 1085 ASM_LD(%g5, icache_size) 1086 ASM_LD(%g6, icache_linesize) 10876: 1088 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1089 1090 /* 1091 * check to see whether we parked our sibling core at the start 1092 * of this handler. If so, we need to unpark it here. 1093 * We use DCUCR reserved bits (stored in %g1) to keep track of 1094 * whether or not we need to unpark. %g5 and %g4 are scratch registers. 1095 */ 1096 UNPARK_SIBLING_CORE(%g1, %g5, %g4) 1097 1098 /* 1099 * Restore the Dcache and Icache to the previous state. 1100 */ 1101 stxa %g1, [%g0]ASI_DCU 1102 flush %g0 /* flush required after changing the IC bit */ 1103 1104 /* 1105 * Make sure our CPU logout operation was successful. 1106 */ 1107 cmp %g3, %g0 1108 be 8f 1109 nop 1110 1111 /* 1112 * If the logout structure had been busy, how many times have 1113 * we tried to use it and failed (nesting count)? If we have 1114 * already recursed a substantial number of times, then we can 1115 * assume things are not going to get better by themselves and 1116 * so it would be best to panic. 1117 */ 1118 cmp %g3, CLO_NESTING_MAX 1119 blt 7f 1120 nop 1121 1122 call ptl1_panic 1123 mov PTL1_BAD_ECC, %g1 1124 11257: 1126 /* 1127 * Otherwise, if the logout structure was busy but we have not 1128 * nested more times than our maximum value, then we simply 1129 * issue a retry. Our TL=0 trap handler code will check and 1130 * clear the AFSR after it is done logging what is currently 1131 * in the logout struct and handle this event at that time. 1132 */ 1133 retry 11348: 1135 /* 1136 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're 1137 * already at PIL 15. 1138 */ 1139 set cpu_fast_ecc_error, %g1 1140 rdpr %pil, %g4 1141 cmp %g4, PIL_14 1142 ba sys_trap 1143 movl %icc, PIL_14, %g4 1144 1145 SET_SIZE(fast_ecc_err) 1146 1147#endif /* !(JALAPENO || SERRANO) */ 1148 1149 1150/* 1151 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy: 1152 * 1153 * The basic flow of this trap handler is as follows: 1154 * 1155 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a 1156 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we 1157 * will use to save %g1 and %g2. 1158 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr), 1159 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc 1160 * handler (using the just saved %g1). 1161 * 3) Turn off the Dcache if it was on and save the state of the Dcache 1162 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate. 1163 * NB: we don't turn off the Icache because bad data is not installed nor 1164 * will we be doing any diagnostic accesses. 1165 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2 1166 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the 1167 * %tpc, %tnpc, %tstate values previously saved). 1168 * 6) set %tl to %tl - 1. 1169 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure. 1170 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field. 1171 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For 1172 * Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear. 1173 * Save the values in ch_err_tl1_data. For Panther, read the shadow 1174 * AFSR_EXT and save the value in ch_err_tl1_data. 1175 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from 1176 * being queued. We'll report them via the AFSR/AFAR capture in step 13. 1177 * 11) Flush the Ecache. 1178 * NB: the Ecache is flushed assuming the largest possible size with 1179 * the smallest possible line size since access to the cpu_nodes may 1180 * cause an unrecoverable DTLB miss. 1181 * 12) Reenable CEEN/NCEEN with the value saved from step 10. 1182 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again. 1183 * For Cheetah+ (and later), read the primary AFAR and AFSR and now clear. 1184 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther, 1185 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data. 1186 * 14) Flush and re-enable the Dcache if it was on at step 3. 1187 * 15) Do TRAPTRACE if enabled. 1188 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so. 1189 * 17) Set the event pending flag in ch_err_tl1_pending[CPU] 1190 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the 1191 * event pending flag and call cpu_tl1_error via systrap if set. 1192 * 19) Restore the registers from step 5 and issue retry. 1193 */ 1194 1195/* 1196 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0 1197 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various 1198 * architecture-specific files. This generates a "Software Trap 0" at TL>0, 1199 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there. 1200 * NB: Must be 8 instructions or less to fit in trap table and code must 1201 * be relocatable. 1202 */ 1203 1204 ENTRY_NP(fecc_err_tl1_instr) 1205 CH_ERR_TL1_TRAPENTRY(SWTRAP_0); 1206 SET_SIZE(fecc_err_tl1_instr) 1207 1208/* 1209 * Software trap 0 at TL>0. 1210 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of 1211 * the various architecture-specific files. This is used as a continuation 1212 * of the fast ecc handling where we've bought an extra TL level, so we can 1213 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1214 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1215 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1216 * order two bits from %g1 and %g2 respectively). 1217 * NB: Must be 8 instructions or less to fit in trap table and code must 1218 * be relocatable. 1219 */ 1220 1221 ENTRY_NP(fecc_err_tl1_cont_instr) 1222 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err) 1223 SET_SIZE(fecc_err_tl1_cont_instr) 1224 1225 1226/* 1227 * The ce_err function handles disrupting trap type 0x63 at TL=0. 1228 * 1229 * AFSR errors bits which cause this trap are: 1230 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC 1231 * 1232 * NCEEN Bit of Cheetah External Cache Error Enable Register enables 1233 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU 1234 * 1235 * CEEN Bit of Cheetah External Cache Error Enable Register enables 1236 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC 1237 * 1238 * Cheetah+ also handles (No additional processing required): 1239 * DUE, DTO, DBERR (NCEEN controlled) 1240 * THCE (CEEN and ET_ECC_en controlled) 1241 * TUE (ET_ECC_en controlled) 1242 * 1243 * Panther further adds: 1244 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1245 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1246 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1247 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1248 * THCE (CEEN and L2_tag_ECC_en controlled) 1249 * L3_THCE (CEEN and ET_ECC_en controlled) 1250 * 1251 * Steps: 1252 * 1. Disable hardware corrected disrupting errors only (CEEN) 1253 * 2. Park sibling core if caches are shared (to avoid race 1254 * condition while accessing shared resources such as L3 1255 * data staging register during CPU logout. 1256 * 3. If the CPU logout structure is not currently being used: 1257 * 4. Clear AFSR error bits 1258 * 5. Capture Ecache, Dcache and Icache lines associated 1259 * with AFAR. 1260 * 6. Unpark sibling core if we parked it earlier. 1261 * 7. call cpu_disrupting_error via sys_trap at PIL 14 1262 * unless we're already running at PIL 15. 1263 * 4. Otherwise, if the CPU logout structure is busy: 1264 * 5. Incriment "logout busy count" and place into %g3 1265 * 6. Unpark sibling core if we parked it earlier. 1266 * 7. Issue a retry since the other CPU error logging 1267 * code will end up finding this error bit and logging 1268 * information about it later. 1269 * 5. Alternatively (to 3 and 4 above), if the cpu_private struct is 1270 * not yet initialized such that we can't even check the logout 1271 * struct, then we place the clo_flags data into %g2 1272 * (sys_trap->have_win arg #1) and call cpu_disrupting_error via 1273 * systrap. The clo_flags parameter is used to determine information 1274 * such as TL, TT, CEEN settings, etc in the high level trap 1275 * handler since we don't have access to detailed logout information 1276 * in cases where the cpu_private struct is not yet initialized. 1277 * 1278 * %g3: [ logout busy count ] - arg #2 1279 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1 1280 */ 1281 1282 .align 128 1283 ENTRY_NP(ce_err) 1284 membar #Sync ! Cheetah requires membar #Sync 1285 1286 /* 1287 * Disable trap on hardware corrected errors (CEEN) while at TL=0 1288 * to prevent recursion. 1289 */ 1290 ldxa [%g0]ASI_ESTATE_ERR, %g1 1291 bclr EN_REG_CEEN, %g1 1292 stxa %g1, [%g0]ASI_ESTATE_ERR 1293 membar #Sync ! membar sync required 1294 1295 /* 1296 * Save current DCU state. Turn off Icache to allow capture of 1297 * Icache data by DO_CPU_LOGOUT. 1298 */ 1299 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1300 andn %g1, DCU_IC, %g4 1301 stxa %g4, [%g0]ASI_DCU 1302 flush %g0 /* flush required after changing the IC bit */ 1303 1304 /* 1305 * Check to see whether we need to park our sibling core 1306 * before recording diagnostic information from caches 1307 * which may be shared by both cores. 1308 * We use %g1 to store information about whether or not 1309 * we had to park the core (%g1 holds our DCUCR value and 1310 * we only use bits from that register which are "reserved" 1311 * to keep track of core parking) so that we know whether 1312 * or not to unpark later. %g5 and %g4 are scratch registers. 1313 */ 1314 PARK_SIBLING_CORE(%g1, %g5, %g4) 1315 1316 /* 1317 * Do the CPU log out capture. 1318 * %g3 = "failed?" return value. 1319 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1320 * into this macro via %g4. Output only valid if cpu_private 1321 * struct has not been initialized. 1322 * CHPR_CECC_LOGOUT = cpu logout structure offset input 1323 * %g4 = Trap information stored in the cpu logout flags field 1324 * %g5 = scr1 1325 * %g6 = scr2 1326 * %g3 = scr3 1327 * %g4 = scr4 1328 */ 1329 clr %g4 ! TL=0 bit in afsr 1330 set CHPR_CECC_LOGOUT, %g6 1331 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1332 1333 /* 1334 * Flush the Icache. Since we turned off the Icache to capture the 1335 * Icache line it is now stale or corrupted and we must flush it 1336 * before re-enabling it. 1337 */ 1338 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1); 1339 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1340 ba,pt %icc, 2f 1341 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1342ce_err_1: 1343 ASM_LD(%g5, icache_size) 1344 ASM_LD(%g6, icache_linesize) 13452: 1346 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1347 1348 /* 1349 * check to see whether we parked our sibling core at the start 1350 * of this handler. If so, we need to unpark it here. 1351 * We use DCUCR reserved bits (stored in %g1) to keep track of 1352 * whether or not we need to unpark. %g5 and %g4 are scratch registers. 1353 */ 1354 UNPARK_SIBLING_CORE(%g1, %g5, %g4) 1355 1356 /* 1357 * Restore Icache to previous state. 1358 */ 1359 stxa %g1, [%g0]ASI_DCU 1360 flush %g0 /* flush required after changing the IC bit */ 1361 1362 /* 1363 * Make sure our CPU logout operation was successful. 1364 */ 1365 cmp %g3, %g0 1366 be 4f 1367 nop 1368 1369 /* 1370 * If the logout structure had been busy, how many times have 1371 * we tried to use it and failed (nesting count)? If we have 1372 * already recursed a substantial number of times, then we can 1373 * assume things are not going to get better by themselves and 1374 * so it would be best to panic. 1375 */ 1376 cmp %g3, CLO_NESTING_MAX 1377 blt 3f 1378 nop 1379 1380 call ptl1_panic 1381 mov PTL1_BAD_ECC, %g1 1382 13833: 1384 /* 1385 * Otherwise, if the logout structure was busy but we have not 1386 * nested more times than our maximum value, then we simply 1387 * issue a retry. Our TL=0 trap handler code will check and 1388 * clear the AFSR after it is done logging what is currently 1389 * in the logout struct and handle this event at that time. 1390 */ 1391 retry 13924: 1393 /* 1394 * Call cpu_disrupting_error via systrap at PIL 14 unless we're 1395 * already at PIL 15. 1396 */ 1397 set cpu_disrupting_error, %g1 1398 rdpr %pil, %g4 1399 cmp %g4, PIL_14 1400 ba sys_trap 1401 movl %icc, PIL_14, %g4 1402 SET_SIZE(ce_err) 1403 1404 1405/* 1406 * This trap cannot happen at TL>0 which means this routine will never 1407 * actually be called and so we treat this like a BAD TRAP panic. 1408 */ 1409 .align 64 1410 ENTRY_NP(ce_err_tl1) 1411 1412 call ptl1_panic 1413 mov PTL1_BAD_TRAP, %g1 1414 1415 SET_SIZE(ce_err_tl1) 1416 1417 1418/* 1419 * The async_err function handles deferred trap types 0xA 1420 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0. 1421 * 1422 * AFSR errors bits which cause this trap are: 1423 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1424 * On some platforms, EMU may causes cheetah to pull the error pin 1425 * never giving Solaris a chance to take a trap. 1426 * 1427 * NCEEN Bit of Cheetah External Cache Error Enable Register enables 1428 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR 1429 * 1430 * Steps: 1431 * 1. Disable CEEN and NCEEN errors to prevent recursive errors. 1432 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture 1433 * I$ line in DO_CPU_LOGOUT. 1434 * 3. Park sibling core if caches are shared (to avoid race 1435 * condition while accessing shared resources such as L3 1436 * data staging register during CPU logout. 1437 * 4. If the CPU logout structure is not currently being used: 1438 * 5. Clear AFSR error bits 1439 * 6. Capture Ecache, Dcache and Icache lines associated 1440 * with AFAR. 1441 * 7. Unpark sibling core if we parked it earlier. 1442 * 8. call cpu_deferred_error via sys_trap. 1443 * 5. Otherwise, if the CPU logout structure is busy: 1444 * 6. Incriment "logout busy count" 1445 * 7. Unpark sibling core if we parked it earlier. 1446 * 8) Issue a retry since the other CPU error logging 1447 * code will end up finding this error bit and logging 1448 * information about it later. 1449 * 6. Alternatively (to 4 and 5 above), if the cpu_private struct is 1450 * not yet initialized such that we can't even check the logout 1451 * struct, then we place the clo_flags data into %g2 1452 * (sys_trap->have_win arg #1) and call cpu_deferred_error via 1453 * systrap. The clo_flags parameter is used to determine information 1454 * such as TL, TT, CEEN settings, etc in the high level trap handler 1455 * since we don't have access to detailed logout information in cases 1456 * where the cpu_private struct is not yet initialized. 1457 * 1458 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1 1459 * %g3: [ logout busy count ] - arg #2 1460 */ 1461 1462 ENTRY_NP(async_err) 1463 membar #Sync ! Cheetah requires membar #Sync 1464 1465 /* 1466 * Disable CEEN and NCEEN. 1467 */ 1468 ldxa [%g0]ASI_ESTATE_ERR, %g3 1469 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4 1470 stxa %g4, [%g0]ASI_ESTATE_ERR 1471 membar #Sync ! membar sync required 1472 1473 /* 1474 * Save current DCU state. 1475 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT. 1476 * Do this regardless of whether this is a Data Access Error or 1477 * Instruction Access Error Trap. 1478 * Disable Dcache for both Data Access Error and Instruction Access 1479 * Error per Cheetah PRM P.5 Note 6. 1480 */ 1481 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1482 andn %g1, DCU_IC + DCU_DC, %g4 1483 stxa %g4, [%g0]ASI_DCU 1484 flush %g0 /* flush required after changing the IC bit */ 1485 1486 /* 1487 * Check to see whether we need to park our sibling core 1488 * before recording diagnostic information from caches 1489 * which may be shared by both cores. 1490 * We use %g1 to store information about whether or not 1491 * we had to park the core (%g1 holds our DCUCR value and 1492 * we only use bits from that register which are "reserved" 1493 * to keep track of core parking) so that we know whether 1494 * or not to unpark later. %g6 and %g4 are scratch registers. 1495 */ 1496 PARK_SIBLING_CORE(%g1, %g6, %g4) 1497 1498 /* 1499 * Do the CPU logout capture. 1500 * 1501 * %g3 = "failed?" return value. 1502 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1503 * into this macro via %g4. Output only valid if cpu_private 1504 * struct has not been initialized. 1505 * CHPR_ASYNC_LOGOUT = cpu logout structure offset input 1506 * %g4 = Trap information stored in the cpu logout flags field 1507 * %g5 = scr1 1508 * %g6 = scr2 1509 * %g3 = scr3 1510 * %g4 = scr4 1511 */ 1512 andcc %g5, T_TL1, %g0 1513 clr %g6 1514 movnz %xcc, 1, %g6 ! set %g6 if T_TL1 set 1515 sllx %g6, CLO_FLAGS_TL_SHIFT, %g6 1516 sllx %g5, CLO_FLAGS_TT_SHIFT, %g4 1517 set CLO_FLAGS_TT_MASK, %g2 1518 and %g4, %g2, %g4 ! ttype 1519 or %g6, %g4, %g4 ! TT and TL 1520 and %g3, EN_REG_CEEN, %g3 ! CEEN value 1521 or %g3, %g4, %g4 ! TT and TL and CEEN 1522 set CHPR_ASYNC_LOGOUT, %g6 1523 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1524 1525 /* 1526 * If the logout struct was busy, we may need to pass the 1527 * TT, TL, and CEEN information to the TL=0 handler via 1528 * systrap parameter so save it off here. 1529 */ 1530 cmp %g3, %g0 1531 be 1f 1532 nop 1533 sllx %g4, 32, %g4 1534 or %g4, %g3, %g3 15351: 1536 /* 1537 * Flush the Icache. Since we turned off the Icache to capture the 1538 * Icache line it is now stale or corrupted and we must flush it 1539 * before re-enabling it. 1540 */ 1541 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1); 1542 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1543 ba,pt %icc, 2f 1544 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1545async_err_1: 1546 ASM_LD(%g5, icache_size) 1547 ASM_LD(%g6, icache_linesize) 15482: 1549 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1550 1551 /* 1552 * XXX - Don't we need to flush the Dcache before turning it back 1553 * on to avoid stale or corrupt data? Was this broken? 1554 */ 1555 /* 1556 * Flush the Dcache before turning it back on since it may now 1557 * contain stale or corrupt data. 1558 */ 1559 ASM_LD(%g5, dcache_size) 1560 ASM_LD(%g6, dcache_linesize) 1561 CH_DCACHE_FLUSHALL(%g5, %g6, %g7) 1562 1563 /* 1564 * check to see whether we parked our sibling core at the start 1565 * of this handler. If so, we need to unpark it here. 1566 * We use DCUCR reserved bits (stored in %g1) to keep track of 1567 * whether or not we need to unpark. %g5 and %g7 are scratch registers. 1568 */ 1569 UNPARK_SIBLING_CORE(%g1, %g5, %g7) 1570 1571 /* 1572 * Restore Icache and Dcache to previous state. 1573 */ 1574 stxa %g1, [%g0]ASI_DCU 1575 flush %g0 /* flush required after changing the IC bit */ 1576 1577 /* 1578 * Make sure our CPU logout operation was successful. 1579 */ 1580 cmp %g3, %g0 1581 be 4f 1582 nop 1583 1584 /* 1585 * If the logout structure had been busy, how many times have 1586 * we tried to use it and failed (nesting count)? If we have 1587 * already recursed a substantial number of times, then we can 1588 * assume things are not going to get better by themselves and 1589 * so it would be best to panic. 1590 */ 1591 cmp %g3, CLO_NESTING_MAX 1592 blt 3f 1593 nop 1594 1595 call ptl1_panic 1596 mov PTL1_BAD_ECC, %g1 1597 15983: 1599 /* 1600 * Otherwise, if the logout structure was busy but we have not 1601 * nested more times than our maximum value, then we simply 1602 * issue a retry. Our TL=0 trap handler code will check and 1603 * clear the AFSR after it is done logging what is currently 1604 * in the logout struct and handle this event at that time. 1605 */ 1606 retry 16074: 1608 RESET_USER_RTT_REGS(%g4, %g5, async_err_resetskip) 1609async_err_resetskip: 1610 set cpu_deferred_error, %g1 1611 ba sys_trap 1612 mov PIL_15, %g4 ! run at pil 15 1613 SET_SIZE(async_err) 1614 1615#if defined(CPU_IMP_L1_CACHE_PARITY) 1616 1617/* 1618 * D$ parity error trap (trap 71) at TL=0. 1619 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of 1620 * the various architecture-specific files. This merely sets up the 1621 * arguments for cpu_parity_error and calls it via sys_trap. 1622 * NB: Must be 8 instructions or less to fit in trap table and code must 1623 * be relocatable. 1624 */ 1625 ENTRY_NP(dcache_parity_instr) 1626 membar #Sync ! Cheetah+ requires membar #Sync 1627 set cpu_parity_error, %g1 1628 or %g0, CH_ERR_DPE, %g2 1629 rdpr %tpc, %g3 1630 sethi %hi(sys_trap), %g7 1631 jmp %g7 + %lo(sys_trap) 1632 mov PIL_15, %g4 ! run at pil 15 1633 SET_SIZE(dcache_parity_instr) 1634 1635 1636/* 1637 * D$ parity error trap (trap 71) at TL>0. 1638 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of 1639 * the various architecture-specific files. This generates a "Software 1640 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we 1641 * continue the handling there. 1642 * NB: Must be 8 instructions or less to fit in trap table and code must 1643 * be relocatable. 1644 */ 1645 ENTRY_NP(dcache_parity_tl1_instr) 1646 CH_ERR_TL1_TRAPENTRY(SWTRAP_1); 1647 SET_SIZE(dcache_parity_tl1_instr) 1648 1649 1650/* 1651 * Software trap 1 at TL>0. 1652 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap 1653 * of the various architecture-specific files. This is used as a continuation 1654 * of the dcache parity handling where we've bought an extra TL level, so we 1655 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1656 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1657 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1658 * order two bits from %g1 and %g2 respectively). 1659 * NB: Must be 8 instructions or less to fit in trap table and code must 1660 * be relocatable. 1661 */ 1662 ENTRY_NP(dcache_parity_tl1_cont_instr) 1663 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err); 1664 SET_SIZE(dcache_parity_tl1_cont_instr) 1665 1666/* 1667 * D$ parity error at TL>0 handler 1668 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter 1669 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate. 1670 */ 1671 1672 ENTRY_NP(dcache_parity_tl1_err) 1673 1674 /* 1675 * This macro saves all the %g registers in the ch_err_tl1_data 1676 * structure, updates the ch_err_tl1_flags and saves the %tpc in 1677 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to 1678 * the ch_err_tl1_data structure and %g2 will have the original 1679 * flags in the ch_err_tl1_data structure. All %g registers 1680 * except for %g1 and %g2 will be available. 1681 */ 1682 CH_ERR_TL1_ENTER(CH_ERR_DPE); 1683 1684#ifdef TRAPTRACE 1685 /* 1686 * Get current trap trace entry physical pointer. 1687 */ 1688 CPU_INDEX(%g6, %g5) 1689 sll %g6, TRAPTR_SIZE_SHIFT, %g6 1690 set trap_trace_ctl, %g5 1691 add %g6, %g5, %g6 1692 ld [%g6 + TRAPTR_LIMIT], %g5 1693 tst %g5 1694 be %icc, dpe_tl1_skip_tt 1695 nop 1696 ldx [%g6 + TRAPTR_PBASE], %g5 1697 ld [%g6 + TRAPTR_OFFSET], %g4 1698 add %g5, %g4, %g5 1699 1700 /* 1701 * Create trap trace entry. 1702 */ 1703 rd %asi, %g7 1704 wr %g0, TRAPTR_ASI, %asi 1705 rd STICK, %g4 1706 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi 1707 rdpr %tl, %g4 1708 stha %g4, [%g5 + TRAP_ENT_TL]%asi 1709 rdpr %tt, %g4 1710 stha %g4, [%g5 + TRAP_ENT_TT]%asi 1711 rdpr %tpc, %g4 1712 stna %g4, [%g5 + TRAP_ENT_TPC]%asi 1713 rdpr %tstate, %g4 1714 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi 1715 stna %sp, [%g5 + TRAP_ENT_SP]%asi 1716 stna %g0, [%g5 + TRAP_ENT_TR]%asi 1717 stna %g0, [%g5 + TRAP_ENT_F1]%asi 1718 stna %g0, [%g5 + TRAP_ENT_F2]%asi 1719 stna %g0, [%g5 + TRAP_ENT_F3]%asi 1720 stna %g0, [%g5 + TRAP_ENT_F4]%asi 1721 wr %g0, %g7, %asi 1722 1723 /* 1724 * Advance trap trace pointer. 1725 */ 1726 ld [%g6 + TRAPTR_OFFSET], %g5 1727 ld [%g6 + TRAPTR_LIMIT], %g4 1728 st %g5, [%g6 + TRAPTR_LAST_OFFSET] 1729 add %g5, TRAP_ENT_SIZE, %g5 1730 sub %g4, TRAP_ENT_SIZE, %g4 1731 cmp %g5, %g4 1732 movge %icc, 0, %g5 1733 st %g5, [%g6 + TRAPTR_OFFSET] 1734dpe_tl1_skip_tt: 1735#endif /* TRAPTRACE */ 1736 1737 /* 1738 * I$ and D$ are automatically turned off by HW when the CPU hits 1739 * a dcache or icache parity error so we will just leave those two 1740 * off for now to avoid repeating this trap. 1741 * For Panther, however, since we trap on P$ data parity errors 1742 * and HW does not automatically disable P$, we need to disable it 1743 * here so that we don't encounter any recursive traps when we 1744 * issue the retry. 1745 */ 1746 ldxa [%g0]ASI_DCU, %g3 1747 mov 1, %g4 1748 sllx %g4, DCU_PE_SHIFT, %g4 1749 andn %g3, %g4, %g3 1750 stxa %g3, [%g0]ASI_DCU 1751 membar #Sync 1752 1753 /* 1754 * We fall into this macro if we've successfully logged the error in 1755 * the ch_err_tl1_data structure and want the PIL15 softint to pick 1756 * it up and log it. %g1 must point to the ch_err_tl1_data structure. 1757 * Restores the %g registers and issues retry. 1758 */ 1759 CH_ERR_TL1_EXIT; 1760 SET_SIZE(dcache_parity_tl1_err) 1761 1762/* 1763 * I$ parity error trap (trap 72) at TL=0. 1764 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of 1765 * the various architecture-specific files. This merely sets up the 1766 * arguments for cpu_parity_error and calls it via sys_trap. 1767 * NB: Must be 8 instructions or less to fit in trap table and code must 1768 * be relocatable. 1769 */ 1770 1771 ENTRY_NP(icache_parity_instr) 1772 membar #Sync ! Cheetah+ requires membar #Sync 1773 set cpu_parity_error, %g1 1774 or %g0, CH_ERR_IPE, %g2 1775 rdpr %tpc, %g3 1776 sethi %hi(sys_trap), %g7 1777 jmp %g7 + %lo(sys_trap) 1778 mov PIL_15, %g4 ! run at pil 15 1779 SET_SIZE(icache_parity_instr) 1780 1781/* 1782 * I$ parity error trap (trap 72) at TL>0. 1783 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of 1784 * the various architecture-specific files. This generates a "Software 1785 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we 1786 * continue the handling there. 1787 * NB: Must be 8 instructions or less to fit in trap table and code must 1788 * be relocatable. 1789 */ 1790 ENTRY_NP(icache_parity_tl1_instr) 1791 CH_ERR_TL1_TRAPENTRY(SWTRAP_2); 1792 SET_SIZE(icache_parity_tl1_instr) 1793 1794/* 1795 * Software trap 2 at TL>0. 1796 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap 1797 * of the various architecture-specific files. This is used as a continuation 1798 * of the icache parity handling where we've bought an extra TL level, so we 1799 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1800 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1801 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1802 * order two bits from %g1 and %g2 respectively). 1803 * NB: Must be 8 instructions or less to fit in trap table and code must 1804 * be relocatable. 1805 */ 1806 ENTRY_NP(icache_parity_tl1_cont_instr) 1807 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err); 1808 SET_SIZE(icache_parity_tl1_cont_instr) 1809 1810 1811/* 1812 * I$ parity error at TL>0 handler 1813 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter 1814 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate. 1815 */ 1816 1817 ENTRY_NP(icache_parity_tl1_err) 1818 1819 /* 1820 * This macro saves all the %g registers in the ch_err_tl1_data 1821 * structure, updates the ch_err_tl1_flags and saves the %tpc in 1822 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to 1823 * the ch_err_tl1_data structure and %g2 will have the original 1824 * flags in the ch_err_tl1_data structure. All %g registers 1825 * except for %g1 and %g2 will be available. 1826 */ 1827 CH_ERR_TL1_ENTER(CH_ERR_IPE); 1828 1829#ifdef TRAPTRACE 1830 /* 1831 * Get current trap trace entry physical pointer. 1832 */ 1833 CPU_INDEX(%g6, %g5) 1834 sll %g6, TRAPTR_SIZE_SHIFT, %g6 1835 set trap_trace_ctl, %g5 1836 add %g6, %g5, %g6 1837 ld [%g6 + TRAPTR_LIMIT], %g5 1838 tst %g5 1839 be %icc, ipe_tl1_skip_tt 1840 nop 1841 ldx [%g6 + TRAPTR_PBASE], %g5 1842 ld [%g6 + TRAPTR_OFFSET], %g4 1843 add %g5, %g4, %g5 1844 1845 /* 1846 * Create trap trace entry. 1847 */ 1848 rd %asi, %g7 1849 wr %g0, TRAPTR_ASI, %asi 1850 rd STICK, %g4 1851 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi 1852 rdpr %tl, %g4 1853 stha %g4, [%g5 + TRAP_ENT_TL]%asi 1854 rdpr %tt, %g4 1855 stha %g4, [%g5 + TRAP_ENT_TT]%asi 1856 rdpr %tpc, %g4 1857 stna %g4, [%g5 + TRAP_ENT_TPC]%asi 1858 rdpr %tstate, %g4 1859 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi 1860 stna %sp, [%g5 + TRAP_ENT_SP]%asi 1861 stna %g0, [%g5 + TRAP_ENT_TR]%asi 1862 stna %g0, [%g5 + TRAP_ENT_F1]%asi 1863 stna %g0, [%g5 + TRAP_ENT_F2]%asi 1864 stna %g0, [%g5 + TRAP_ENT_F3]%asi 1865 stna %g0, [%g5 + TRAP_ENT_F4]%asi 1866 wr %g0, %g7, %asi 1867 1868 /* 1869 * Advance trap trace pointer. 1870 */ 1871 ld [%g6 + TRAPTR_OFFSET], %g5 1872 ld [%g6 + TRAPTR_LIMIT], %g4 1873 st %g5, [%g6 + TRAPTR_LAST_OFFSET] 1874 add %g5, TRAP_ENT_SIZE, %g5 1875 sub %g4, TRAP_ENT_SIZE, %g4 1876 cmp %g5, %g4 1877 movge %icc, 0, %g5 1878 st %g5, [%g6 + TRAPTR_OFFSET] 1879ipe_tl1_skip_tt: 1880#endif /* TRAPTRACE */ 1881 1882 /* 1883 * We fall into this macro if we've successfully logged the error in 1884 * the ch_err_tl1_data structure and want the PIL15 softint to pick 1885 * it up and log it. %g1 must point to the ch_err_tl1_data structure. 1886 * Restores the %g registers and issues retry. 1887 */ 1888 CH_ERR_TL1_EXIT; 1889 1890 SET_SIZE(icache_parity_tl1_err) 1891 1892#endif /* CPU_IMP_L1_CACHE_PARITY */ 1893 1894 1895/* 1896 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the 1897 * tte, the virtual address, and the ctxnum of the specified tlb entry. They 1898 * should only be used in places where you have no choice but to look at the 1899 * tlb itself. 1900 * 1901 * Note: These two routines are required by the Estar "cpr" loadable module. 1902 */ 1903 1904 ENTRY_NP(itlb_rd_entry) 1905 sllx %o0, 3, %o0 1906 ldxa [%o0]ASI_ITLB_ACCESS, %g1 1907 stx %g1, [%o1] 1908 ldxa [%o0]ASI_ITLB_TAGREAD, %g2 1909 set TAGREAD_CTX_MASK, %o4 1910 andn %g2, %o4, %o5 1911 retl 1912 stx %o5, [%o2] 1913 SET_SIZE(itlb_rd_entry) 1914 1915 1916 ENTRY_NP(dtlb_rd_entry) 1917 sllx %o0, 3, %o0 1918 ldxa [%o0]ASI_DTLB_ACCESS, %g1 1919 stx %g1, [%o1] 1920 ldxa [%o0]ASI_DTLB_TAGREAD, %g2 1921 set TAGREAD_CTX_MASK, %o4 1922 andn %g2, %o4, %o5 1923 retl 1924 stx %o5, [%o2] 1925 SET_SIZE(dtlb_rd_entry) 1926 1927 1928#if !(defined(JALAPENO) || defined(SERRANO)) 1929 1930 ENTRY(get_safari_config) 1931 ldxa [%g0]ASI_SAFARI_CONFIG, %o0 1932 retl 1933 nop 1934 SET_SIZE(get_safari_config) 1935 1936 1937 ENTRY(set_safari_config) 1938 stxa %o0, [%g0]ASI_SAFARI_CONFIG 1939 membar #Sync 1940 retl 1941 nop 1942 SET_SIZE(set_safari_config) 1943 1944#endif /* !(JALAPENO || SERRANO) */ 1945 1946 1947 /* 1948 * Clear the NPT (non-privileged trap) bit in the %tick/%stick 1949 * registers. In an effort to make the change in the 1950 * tick/stick counter as consistent as possible, we disable 1951 * all interrupts while we're changing the registers. We also 1952 * ensure that the read and write instructions are in the same 1953 * line in the instruction cache. 1954 */ 1955 ENTRY_NP(cpu_clearticknpt) 1956 rdpr %pstate, %g1 /* save processor state */ 1957 andn %g1, PSTATE_IE, %g3 /* turn off */ 1958 wrpr %g0, %g3, %pstate /* interrupts */ 1959 rdpr %tick, %g2 /* get tick register */ 1960 brgez,pn %g2, 1f /* if NPT bit off, we're done */ 1961 mov 1, %g3 /* create mask */ 1962 sllx %g3, 63, %g3 /* for NPT bit */ 1963 ba,a,pt %xcc, 2f 1964 .align 8 /* Ensure rd/wr in same i$ line */ 19652: 1966 rdpr %tick, %g2 /* get tick register */ 1967 wrpr %g3, %g2, %tick /* write tick register, */ 1968 /* clearing NPT bit */ 19691: 1970 rd STICK, %g2 /* get stick register */ 1971 brgez,pn %g2, 3f /* if NPT bit off, we're done */ 1972 mov 1, %g3 /* create mask */ 1973 sllx %g3, 63, %g3 /* for NPT bit */ 1974 ba,a,pt %xcc, 4f 1975 .align 8 /* Ensure rd/wr in same i$ line */ 19764: 1977 rd STICK, %g2 /* get stick register */ 1978 wr %g3, %g2, STICK /* write stick register, */ 1979 /* clearing NPT bit */ 19803: 1981 jmp %g4 + 4 1982 wrpr %g0, %g1, %pstate /* restore processor state */ 1983 1984 SET_SIZE(cpu_clearticknpt) 1985 1986 1987#if defined(CPU_IMP_L1_CACHE_PARITY) 1988 1989/* 1990 * correct_dcache_parity(size_t size, size_t linesize) 1991 * 1992 * Correct D$ data parity by zeroing the data and initializing microtag 1993 * for all indexes and all ways of the D$. 1994 * 1995 */ 1996 ENTRY(correct_dcache_parity) 1997 /* 1998 * Register Usage: 1999 * 2000 * %o0 = input D$ size 2001 * %o1 = input D$ line size 2002 * %o2 = scratch 2003 * %o3 = scratch 2004 * %o4 = scratch 2005 */ 2006 2007 sub %o0, %o1, %o0 ! init cache line address 2008 2009 /* 2010 * For Panther CPUs, we also need to clear the data parity bits 2011 * using DC_data_parity bit of the ASI_DCACHE_DATA register. 2012 */ 2013 GET_CPU_IMPL(%o3) 2014 cmp %o3, PANTHER_IMPL 2015 bne 1f 2016 clr %o3 ! zero for non-Panther 2017 mov 1, %o3 2018 sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3 2019 20201: 2021 /* 2022 * Set utag = way since it must be unique within an index. 2023 */ 2024 srl %o0, 14, %o2 ! get cache way (DC_way) 2025 membar #Sync ! required before ASI_DC_UTAG 2026 stxa %o2, [%o0]ASI_DC_UTAG ! set D$ utag = cache way 2027 membar #Sync ! required after ASI_DC_UTAG 2028 2029 /* 2030 * Zero line of D$ data (and data parity bits for Panther) 2031 */ 2032 sub %o1, 8, %o2 2033 or %o0, %o3, %o4 ! same address + DC_data_parity 20342: 2035 membar #Sync ! required before ASI_DC_DATA 2036 stxa %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data 2037 membar #Sync ! required after ASI_DC_DATA 2038 /* 2039 * We also clear the parity bits if this is a panther. For non-Panther 2040 * CPUs, we simply end up clearing the $data register twice. 2041 */ 2042 stxa %g0, [%o4 + %o2]ASI_DC_DATA 2043 membar #Sync 2044 2045 subcc %o2, 8, %o2 2046 bge 2b 2047 nop 2048 2049 subcc %o0, %o1, %o0 2050 bge 1b 2051 nop 2052 2053 retl 2054 nop 2055 SET_SIZE(correct_dcache_parity) 2056 2057#endif /* CPU_IMP_L1_CACHE_PARITY */ 2058 2059 2060 ENTRY_NP(stick_timestamp) 2061 rd STICK, %g1 ! read stick reg 2062 sllx %g1, 1, %g1 2063 srlx %g1, 1, %g1 ! clear npt bit 2064 2065 retl 2066 stx %g1, [%o0] ! store the timestamp 2067 SET_SIZE(stick_timestamp) 2068 2069 2070 ENTRY_NP(stick_adj) 2071 rdpr %pstate, %g1 ! save processor state 2072 andn %g1, PSTATE_IE, %g3 2073 ba 1f ! cache align stick adj 2074 wrpr %g0, %g3, %pstate ! turn off interrupts 2075 2076 .align 16 20771: nop 2078 2079 rd STICK, %g4 ! read stick reg 2080 add %g4, %o0, %o1 ! adjust stick with skew 2081 wr %o1, %g0, STICK ! write stick reg 2082 2083 retl 2084 wrpr %g1, %pstate ! restore processor state 2085 SET_SIZE(stick_adj) 2086 2087 ENTRY_NP(kdi_get_stick) 2088 rd STICK, %g1 2089 stx %g1, [%o0] 2090 retl 2091 mov %g0, %o0 2092 SET_SIZE(kdi_get_stick) 2093 2094/* 2095 * Invalidate the specified line from the D$. 2096 * 2097 * Register usage: 2098 * %o0 - index for the invalidation, specifies DC_way and DC_addr 2099 * 2100 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is 2101 * stored to a particular DC_way and DC_addr in ASI_DC_TAG. 2102 * 2103 * The format of the stored 64-bit value is: 2104 * 2105 * +----------+--------+----------+ 2106 * | Reserved | DC_tag | DC_valid | 2107 * +----------+--------+----------+ 2108 * 63 31 30 1 0 2109 * 2110 * DC_tag is the 30-bit physical tag of the associated line. 2111 * DC_valid is the 1-bit valid field for both the physical and snoop tags. 2112 * 2113 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is: 2114 * 2115 * +----------+--------+----------+----------+ 2116 * | Reserved | DC_way | DC_addr | Reserved | 2117 * +----------+--------+----------+----------+ 2118 * 63 16 15 14 13 5 4 0 2119 * 2120 * DC_way is a 2-bit index that selects one of the 4 ways. 2121 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields. 2122 * 2123 * Setting the DC_valid bit to zero for the specified DC_way and 2124 * DC_addr index into the D$ results in an invalidation of a D$ line. 2125 */ 2126 ENTRY(dcache_inval_line) 2127 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr 2128 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits 2129 membar #Sync 2130 retl 2131 nop 2132 SET_SIZE(dcache_inval_line) 2133 2134/* 2135 * Invalidate the entire I$ 2136 * 2137 * Register usage: 2138 * %o0 - specifies IC_way, IC_addr, IC_tag 2139 * %o1 - scratch 2140 * %o2 - used to save and restore DCU value 2141 * %o3 - scratch 2142 * %o5 - used to save and restore PSTATE 2143 * 2144 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG, 2145 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and 2146 * block out snoops and invalidates to the I$, causing I$ consistency 2147 * to be broken. Before turning on the I$, all I$ lines must be invalidated. 2148 * 2149 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is 2150 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The 2151 * info below describes store (write) use of ASI_IC_TAG. Note that read 2152 * use of ASI_IC_TAG behaves differently. 2153 * 2154 * The format of the stored 64-bit value is: 2155 * 2156 * +----------+--------+---------------+-----------+ 2157 * | Reserved | Valid | IC_vpred<7:0> | Undefined | 2158 * +----------+--------+---------------+-----------+ 2159 * 63 55 54 53 46 45 0 2160 * 2161 * Valid is the 1-bit valid field for both the physical and snoop tags. 2162 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at 2163 * the 32-byte boundary aligned address specified by IC_addr. 2164 * 2165 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is: 2166 * 2167 * +----------+--------+---------+--------+---------+ 2168 * | Reserved | IC_way | IC_addr | IC_tag |Reserved | 2169 * +----------+--------+---------+--------+---------+ 2170 * 63 16 15 14 13 5 4 3 2 0 2171 * 2172 * IC_way is a 2-bit index that selects one of the 4 ways. 2173 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields. 2174 * IC_addr[5] is a "don't care" for a store. 2175 * IC_tag set to 2 specifies that the stored value is to be interpreted 2176 * as containing Valid and IC_vpred as described above. 2177 * 2178 * Setting the Valid bit to zero for the specified IC_way and 2179 * IC_addr index into the I$ results in an invalidation of an I$ line. 2180 */ 2181 ENTRY(icache_inval_all) 2182 rdpr %pstate, %o5 2183 andn %o5, PSTATE_IE, %o3 2184 wrpr %g0, %o3, %pstate ! clear IE bit 2185 2186 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1); 2187 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1 2188 ba,pt %icc, 2f 2189 ld [%o0 + CHPR_ICACHE_SIZE], %o0 2190icache_inval_all_1: 2191 ASM_LD(%o0, icache_size) 2192 ASM_LD(%o1, icache_linesize) 21932: 2194 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4) 2195 2196 retl 2197 wrpr %g0, %o5, %pstate ! restore earlier pstate 2198 SET_SIZE(icache_inval_all) 2199 2200 2201/* 2202 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a 2203 * crosstrap. It atomically increments the outstanding request counter and, 2204 * if there was not already an outstanding request, branches to setsoftint_tl1 2205 * to enqueue an intr_vec for the given inum. 2206 */ 2207 2208 ! Register usage: 2209 ! 2210 ! Arguments: 2211 ! %g1 - inum 2212 ! %g2 - index into chsm_outstanding array 2213 ! 2214 ! Internal: 2215 ! %g2, %g3, %g5 - scratch 2216 ! %g4 - ptr. to scrub_misc chsm_outstanding[index]. 2217 ! %g6 - setsoftint_tl1 address 2218 2219 ENTRY_NP(cache_scrubreq_tl1) 2220 mulx %g2, CHSM_OUTSTANDING_INCR, %g2 2221 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3 2222 add %g2, %g3, %g2 2223 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f); 2224 ld [%g4], %g2 ! cpu's chsm_outstanding[index] 2225 ! 2226 ! no need to use atomic instructions for the following 2227 ! increment - we're at tl1 2228 ! 2229 add %g2, 0x1, %g3 2230 brnz,pn %g2, 1f ! no need to enqueue more intr_vec 2231 st %g3, [%g4] ! delay - store incremented counter 2232 ASM_JMP(%g6, setsoftint_tl1) 2233 ! not reached 22341: 2235 retry 2236 SET_SIZE(cache_scrubreq_tl1) 2237 2238 2239/* 2240 * Get the error state for the processor. 2241 * Note that this must not be used at TL>0 2242 */ 2243 ENTRY(get_cpu_error_state) 2244#if defined(CHEETAH_PLUS) 2245 set ASI_SHADOW_REG_VA, %o2 2246 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg 2247 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR] 2248 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg 2249 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR] 2250 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers 2251 cmp %o3, PANTHER_IMPL 2252 bne,a 1f 2253 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN 2254 set ASI_AFSR_EXT_VA, %o2 2255 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg 2256 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT] 2257 set ASI_SHADOW_AFSR_EXT_VA, %o2 2258 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg 2259 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] 2260 b 2f 2261 nop 22621: 2263 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN 22642: 2265#else /* CHEETAH_PLUS */ 2266 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR] 2267 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR] 2268 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] 2269 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] 2270#endif /* CHEETAH_PLUS */ 2271#if defined(SERRANO) 2272 /* 2273 * Serrano has an afar2 which captures the address on FRC/FRU errors. 2274 * We save this in the afar2 of the register save area. 2275 */ 2276 set ASI_MCU_AFAR2_VA, %o2 2277 ldxa [%o2]ASI_MCU_CTRL, %o1 2278 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2] 2279#endif /* SERRANO */ 2280 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg 2281 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR] 2282 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg 2283 retl 2284 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR] 2285 SET_SIZE(get_cpu_error_state) 2286 2287/* 2288 * Check a page of memory for errors. 2289 * 2290 * Load each 64 byte block from physical memory. 2291 * Check AFSR after each load to see if an error 2292 * was caused. If so, log/scrub that error. 2293 * 2294 * Used to determine if a page contains 2295 * CEs when CEEN is disabled. 2296 */ 2297 ENTRY(cpu_check_block) 2298 ! 2299 ! get a new window with room for the error regs 2300 ! 2301 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp 2302 srl %i1, 6, %l4 ! clear top bits of psz 2303 ! and divide by 64 2304 rd %fprs, %l2 ! store FP 2305 wr %g0, FPRS_FEF, %fprs ! enable FP 23061: 2307 ldda [%i0]ASI_BLK_P, %d0 ! load a block 2308 membar #Sync 2309 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg 2310 brz,a,pt %l3, 2f ! check for error 2311 nop 2312 2313 ! 2314 ! if error, read the error regs and log it 2315 ! 2316 call get_cpu_error_state 2317 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0 2318 2319 ! 2320 ! cpu_ce_detected(ch_cpu_errors_t *, flag) 2321 ! 2322 call cpu_ce_detected ! log the error 2323 mov CE_CEEN_TIMEOUT, %o1 23242: 2325 dec %l4 ! next 64-byte block 2326 brnz,a,pt %l4, 1b 2327 add %i0, 64, %i0 ! increment block addr 2328 2329 wr %l2, %g0, %fprs ! restore FP 2330 ret 2331 restore 2332 2333 SET_SIZE(cpu_check_block) 2334 2335/* 2336 * Perform a cpu logout called from C. This is used where we did not trap 2337 * for the error but still want to gather "what we can". Caller must make 2338 * sure cpu private area exists and that the indicated logout area is free 2339 * for use, and that we are unable to migrate cpus. 2340 */ 2341 ENTRY(cpu_delayed_logout) 2342 rdpr %pstate, %o2 2343 andn %o2, PSTATE_IE, %o2 2344 wrpr %g0, %o2, %pstate ! disable interrupts 2345 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value 2346 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1 2347 rd %asi, %g1 2348 wr %g0, ASI_P, %asi 2349 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5) 2350 wr %g1, %asi 2351 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again 2352 rdpr %pstate, %o2 2353 or %o2, PSTATE_IE, %o2 2354 wrpr %g0, %o2, %pstate 2355 retl 2356 nop 2357 SET_SIZE(cpu_delayed_logout) 2358 2359 ENTRY(dtrace_blksuword32) 2360 save %sp, -SA(MINFRAME + 4), %sp 2361 2362 rdpr %pstate, %l1 2363 andn %l1, PSTATE_IE, %l2 ! disable interrupts to 2364 wrpr %g0, %l2, %pstate ! protect our FPU diddling 2365 2366 rd %fprs, %l0 2367 andcc %l0, FPRS_FEF, %g0 2368 bz,a,pt %xcc, 1f ! if the fpu is disabled 2369 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu 2370 2371 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack 23721: 2373 set 0f, %l5 2374 /* 2375 * We're about to write a block full or either total garbage 2376 * (not kernel data, don't worry) or user floating-point data 2377 * (so it only _looks_ like garbage). 2378 */ 2379 ld [%i1], %f0 ! modify the block 2380 membar #Sync 2381 stn %l5, [THREAD_REG + T_LOFAULT] ! set up the lofault handler 2382 stda %d0, [%i0]ASI_BLK_COMMIT_S ! store the modified block 2383 membar #Sync 2384 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler 2385 2386 bz,a,pt %xcc, 1f 2387 wr %g0, %l0, %fprs ! restore %fprs 2388 2389 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0 23901: 2391 2392 wrpr %g0, %l1, %pstate ! restore interrupts 2393 2394 ret 2395 restore %g0, %g0, %o0 2396 23970: 2398 membar #Sync 2399 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler 2400 2401 bz,a,pt %xcc, 1f 2402 wr %g0, %l0, %fprs ! restore %fprs 2403 2404 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0 24051: 2406 2407 wrpr %g0, %l1, %pstate ! restore interrupts 2408 2409 /* 2410 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err() 2411 * which deals with watchpoints. Otherwise, just return -1. 2412 */ 2413 brnz,pt %i2, 1f 2414 nop 2415 ret 2416 restore %g0, -1, %o0 24171: 2418 call dtrace_blksuword32_err 2419 restore 2420 2421 SET_SIZE(dtrace_blksuword32) 2422 2423#ifdef CHEETAHPLUS_ERRATUM_25 2424 2425 /* Claim a chunk of physical address space. */ 2426 ENTRY(claimlines) 24271: 2428 subcc %o1, %o2, %o1 2429 add %o0, %o1, %o3 2430 bgeu,a,pt %xcc, 1b 2431 casxa [%o3]ASI_MEM, %g0, %g0 2432 membar #Sync 2433 retl 2434 nop 2435 SET_SIZE(claimlines) 2436 2437 /* 2438 * CPU feature initialization, 2439 * turn BPE off, 2440 * get device id. 2441 */ 2442 ENTRY(cpu_feature_init) 2443 save %sp, -SA(MINFRAME), %sp 2444 sethi %hi(cheetah_bpe_off), %o0 2445 ld [%o0 + %lo(cheetah_bpe_off)], %o0 2446 brz %o0, 1f 2447 nop 2448 rd ASR_DISPATCH_CONTROL, %o0 2449 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0 2450 wr %o0, 0, ASR_DISPATCH_CONTROL 24511: 2452 ! 2453 ! get the device_id and store the device_id 2454 ! in the appropriate cpunodes structure 2455 ! given the cpus index 2456 ! 2457 CPU_INDEX(%o0, %o1) 2458 mulx %o0, CPU_NODE_SIZE, %o0 2459 set cpunodes + DEVICE_ID, %o1 2460 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2 2461 stx %o2, [%o0 + %o1] 2462#ifdef CHEETAHPLUS_ERRATUM_34 2463 ! 2464 ! apply Cheetah+ erratum 34 workaround 2465 ! 2466 call itlb_erratum34_fixup 2467 nop 2468 call dtlb_erratum34_fixup 2469 nop 2470#endif /* CHEETAHPLUS_ERRATUM_34 */ 2471 ret 2472 restore 2473 SET_SIZE(cpu_feature_init) 2474 2475/* 2476 * Copy a tsb entry atomically, from src to dest. 2477 * src must be 128 bit aligned. 2478 */ 2479 ENTRY(copy_tsb_entry) 2480 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data 2481 stx %o2, [%o1] 2482 stx %o3, [%o1 + 8 ] 2483 retl 2484 nop 2485 SET_SIZE(copy_tsb_entry) 2486 2487#endif /* CHEETAHPLUS_ERRATUM_25 */ 2488 2489#ifdef CHEETAHPLUS_ERRATUM_34 2490 2491 ! 2492 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked 2493 ! index 0 TTE will erroneously be displaced when a new TTE is 2494 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34, 2495 ! locked index 0 TTEs must be relocated. 2496 ! 2497 ! NOTE: Care must be taken to avoid an ITLB miss in this routine. 2498 ! 2499 ENTRY_NP(itlb_erratum34_fixup) 2500 rdpr %pstate, %o3 2501#ifdef DEBUG 2502 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1) 2503#endif /* DEBUG */ 2504 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts 2505 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data 2506 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag 2507 2508 cmp %o1, %g0 ! Is this entry valid? 2509 bge %xcc, 1f 2510 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked? 2511 bnz %icc, 2f 2512 nop 25131: 2514 retl ! Nope, outta here... 2515 wrpr %g0, %o3, %pstate ! Enable interrupts 25162: 2517 sethi %hi(FLUSH_ADDR), %o4 2518 stxa %g0, [%o2]ASI_ITLB_DEMAP ! Flush this mapping 2519 flush %o4 ! Flush required for I-MMU 2520 ! 2521 ! Start search from index 1 up. This is because the kernel force 2522 ! loads its text page at index 15 in sfmmu_kernel_remap() and we 2523 ! don't want our relocated entry evicted later. 2524 ! 2525 ! NOTE: We assume that we'll be successful in finding an unlocked 2526 ! or invalid entry. If that isn't the case there are bound to 2527 ! bigger problems. 2528 ! 2529 set (1 << 3), %g3 25303: 2531 ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16 2532 ! 2533 ! If this entry isn't valid, we'll choose to displace it (regardless 2534 ! of the lock bit). 2535 ! 2536 cmp %o4, %g0 ! TTE is > 0 iff not valid 2537 bge %xcc, 4f ! If invalid, go displace 2538 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit 2539 bnz,a %icc, 3b ! If locked, look at next 2540 add %g3, (1 << 3), %g3 ! entry 25414: 2542 ! 2543 ! We found an unlocked or invalid entry; we'll explicitly load 2544 ! the former index 0 entry here. 2545 ! 2546 sethi %hi(FLUSH_ADDR), %o4 2547 set MMU_TAG_ACCESS, %g4 2548 stxa %o2, [%g4]ASI_IMMU 2549 stxa %o1, [%g3]ASI_ITLB_ACCESS 2550 flush %o4 ! Flush required for I-MMU 2551 retl 2552 wrpr %g0, %o3, %pstate ! Enable interrupts 2553 SET_SIZE(itlb_erratum34_fixup) 2554 2555 ! 2556 ! In Cheetah+ erratum 34, under certain conditions a DTLB locked 2557 ! index 0 TTE will erroneously be displaced when a new TTE is 2558 ! loaded. In order to avoid cheetah+ erratum 34, locked index 0 2559 ! TTEs must be relocated. 2560 ! 2561 ENTRY_NP(dtlb_erratum34_fixup) 2562 rdpr %pstate, %o3 2563#ifdef DEBUG 2564 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label2, %g1) 2565#endif /* DEBUG */ 2566 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts 2567 ldxa [%g0]ASI_DTLB_ACCESS, %o1 ! %o1 = entry 0 data 2568 ldxa [%g0]ASI_DTLB_TAGREAD, %o2 ! %o2 = entry 0 tag 2569 2570 cmp %o1, %g0 ! Is this entry valid? 2571 bge %xcc, 1f 2572 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked? 2573 bnz %icc, 2f 2574 nop 25751: 2576 retl ! Nope, outta here... 2577 wrpr %g0, %o3, %pstate ! Enable interrupts 25782: 2579 stxa %g0, [%o2]ASI_DTLB_DEMAP ! Flush this mapping 2580 membar #Sync 2581 ! 2582 ! Start search from index 1 up. 2583 ! 2584 ! NOTE: We assume that we'll be successful in finding an unlocked 2585 ! or invalid entry. If that isn't the case there are bound to 2586 ! bigger problems. 2587 ! 2588 set (1 << 3), %g3 25893: 2590 ldxa [%g3]ASI_DTLB_ACCESS, %o4 ! Load TTE from t16 2591 ! 2592 ! If this entry isn't valid, we'll choose to displace it (regardless 2593 ! of the lock bit). 2594 ! 2595 cmp %o4, %g0 ! TTE is > 0 iff not valid 2596 bge %xcc, 4f ! If invalid, go displace 2597 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit 2598 bnz,a %icc, 3b ! If locked, look at next 2599 add %g3, (1 << 3), %g3 ! entry 26004: 2601 ! 2602 ! We found an unlocked or invalid entry; we'll explicitly load 2603 ! the former index 0 entry here. 2604 ! 2605 set MMU_TAG_ACCESS, %g4 2606 stxa %o2, [%g4]ASI_DMMU 2607 stxa %o1, [%g3]ASI_DTLB_ACCESS 2608 membar #Sync 2609 retl 2610 wrpr %g0, %o3, %pstate ! Enable interrupts 2611 SET_SIZE(dtlb_erratum34_fixup) 2612 2613#endif /* CHEETAHPLUS_ERRATUM_34 */ 2614 2615