1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * Assembly code support for Cheetah/Cheetah+ modules 26 */ 27 28#pragma ident "%Z%%M% %I% %E% SMI" 29 30#if !defined(lint) 31#include "assym.h" 32#endif /* !lint */ 33 34#include <sys/asm_linkage.h> 35#include <sys/mmu.h> 36#include <vm/hat_sfmmu.h> 37#include <sys/machparam.h> 38#include <sys/machcpuvar.h> 39#include <sys/machthread.h> 40#include <sys/machtrap.h> 41#include <sys/privregs.h> 42#include <sys/trap.h> 43#include <sys/cheetahregs.h> 44#include <sys/us3_module.h> 45#include <sys/xc_impl.h> 46#include <sys/intreg.h> 47#include <sys/async.h> 48#include <sys/clock.h> 49#include <sys/cheetahasm.h> 50#include <sys/cmpregs.h> 51 52#ifdef TRAPTRACE 53#include <sys/traptrace.h> 54#endif /* TRAPTRACE */ 55 56#if !defined(lint) 57 58/* BEGIN CSTYLED */ 59 60#define DCACHE_FLUSHPAGE(arg1, arg2, tmp1, tmp2, tmp3) \ 61 ldxa [%g0]ASI_DCU, tmp1 ;\ 62 btst DCU_DC, tmp1 /* is dcache enabled? */ ;\ 63 bz,pn %icc, 1f ;\ 64 ASM_LD(tmp1, dcache_linesize) ;\ 65 ASM_LD(tmp2, dflush_type) ;\ 66 cmp tmp2, FLUSHPAGE_TYPE ;\ 67 be,pt %icc, 2f ;\ 68 nop ;\ 69 sllx arg1, CHEETAH_DC_VBIT_SHIFT, arg1/* tag to compare */ ;\ 70 ASM_LD(tmp3, dcache_size) ;\ 71 cmp tmp2, FLUSHMATCH_TYPE ;\ 72 be,pt %icc, 3f ;\ 73 nop ;\ 74 /* \ 75 * flushtype = FLUSHALL_TYPE, flush the whole thing \ 76 * tmp3 = cache size \ 77 * tmp1 = cache line size \ 78 */ \ 79 sub tmp3, tmp1, tmp2 ;\ 804: \ 81 stxa %g0, [tmp2]ASI_DC_TAG ;\ 82 membar #Sync ;\ 83 cmp %g0, tmp2 ;\ 84 bne,pt %icc, 4b ;\ 85 sub tmp2, tmp1, tmp2 ;\ 86 ba,pt %icc, 1f ;\ 87 nop ;\ 88 /* \ 89 * flushtype = FLUSHPAGE_TYPE \ 90 * arg1 = pfn \ 91 * arg2 = virtual color \ 92 * tmp1 = cache line size \ 93 * tmp2 = tag from cache \ 94 * tmp3 = counter \ 95 */ \ 962: \ 97 set MMU_PAGESIZE, tmp3 ;\ 98 sllx arg1, MMU_PAGESHIFT, arg1 /* pfn to 43 bit PA */ ;\ 99 sub tmp3, tmp1, tmp3 ;\ 1004: \ 101 stxa %g0, [arg1 + tmp3]ASI_DC_INVAL ;\ 102 membar #Sync ;\ 1035: \ 104 cmp %g0, tmp3 ;\ 105 bnz,pt %icc, 4b /* branch if not done */ ;\ 106 sub tmp3, tmp1, tmp3 ;\ 107 ba,pt %icc, 1f ;\ 108 nop ;\ 109 /* \ 110 * flushtype = FLUSHMATCH_TYPE \ 111 * arg1 = tag to compare against \ 112 * tmp1 = cache line size \ 113 * tmp3 = cache size \ 114 * arg2 = counter \ 115 * tmp2 = cache tag \ 116 */ \ 1173: \ 118 sub tmp3, tmp1, arg2 ;\ 1194: \ 120 ldxa [arg2]ASI_DC_TAG, tmp2 /* read tag */ ;\ 121 btst CHEETAH_DC_VBIT_MASK, tmp2 ;\ 122 bz,pn %icc, 5f /* br if no valid sub-blocks */ ;\ 123 andn tmp2, CHEETAH_DC_VBIT_MASK, tmp2 /* clear out v bits */ ;\ 124 cmp tmp2, arg1 ;\ 125 bne,pn %icc, 5f /* branch if tag miss */ ;\ 126 nop ;\ 127 stxa %g0, [arg2]ASI_DC_TAG ;\ 128 membar #Sync ;\ 1295: \ 130 cmp %g0, arg2 ;\ 131 bne,pt %icc, 4b /* branch if not done */ ;\ 132 sub arg2, tmp1, arg2 ;\ 1331: 134 135 136/* END CSTYLED */ 137 138#endif /* !lint */ 139 140/* 141 * Cheetah MMU and Cache operations. 142 */ 143 144#if defined(lint) 145 146/* ARGSUSED */ 147void 148vtag_flushpage(caddr_t vaddr, uint64_t sfmmup) 149{} 150 151#else /* lint */ 152 153 ENTRY_NP(vtag_flushpage) 154 /* 155 * flush page from the tlb 156 * 157 * %o0 = vaddr 158 * %o1 = sfmmup 159 */ 160 rdpr %pstate, %o5 161#ifdef DEBUG 162 PANIC_IF_INTR_DISABLED_PSTR(%o5, u3_di_label0, %g1) 163#endif /* DEBUG */ 164 /* 165 * disable ints 166 */ 167 andn %o5, PSTATE_IE, %o4 168 wrpr %o4, 0, %pstate 169 170 /* 171 * Then, blow out the tlb 172 * Interrupts are disabled to prevent the primary ctx register 173 * from changing underneath us. 174 */ 175 sethi %hi(ksfmmup), %o3 176 ldx [%o3 + %lo(ksfmmup)], %o3 177 cmp %o3, %o1 178 bne,pt %xcc, 1f ! if not kernel as, go to 1 179 sethi %hi(FLUSH_ADDR), %o3 180 /* 181 * For Kernel demaps use primary. type = page implicitly 182 */ 183 stxa %g0, [%o0]ASI_DTLB_DEMAP /* dmmu flush for KCONTEXT */ 184 stxa %g0, [%o0]ASI_ITLB_DEMAP /* immu flush for KCONTEXT */ 185 flush %o3 186 retl 187 wrpr %g0, %o5, %pstate /* enable interrupts */ 1881: 189 /* 190 * User demap. We need to set the primary context properly. 191 * Secondary context cannot be used for Cheetah IMMU. 192 * %o0 = vaddr 193 * %o1 = sfmmup 194 * %o3 = FLUSH_ADDR 195 */ 196 SFMMU_CPU_CNUM(%o1, %g1, %g2) ! %g1 = sfmmu cnum on this CPU 197 198 ldub [%o1 + SFMMU_CEXT], %o4 ! %o4 = sfmmup->sfmmu_cext 199 sll %o4, CTXREG_EXT_SHIFT, %o4 200 or %g1, %o4, %g1 ! %g1 = primary pgsz | cnum 201 202 wrpr %g0, 1, %tl 203 set MMU_PCONTEXT, %o4 204 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %o0, %o0 205 ldxa [%o4]ASI_DMMU, %o2 ! %o2 = save old ctxnum 206 srlx %o2, CTXREG_NEXT_SHIFT, %o1 ! need to preserve nucleus pgsz 207 sllx %o1, CTXREG_NEXT_SHIFT, %o1 ! %o1 = nucleus pgsz 208 or %g1, %o1, %g1 ! %g1 = nucleus pgsz | primary pgsz | cnum 209 stxa %g1, [%o4]ASI_DMMU ! wr new ctxum 210 211 stxa %g0, [%o0]ASI_DTLB_DEMAP 212 stxa %g0, [%o0]ASI_ITLB_DEMAP 213 stxa %o2, [%o4]ASI_DMMU /* restore old ctxnum */ 214 flush %o3 215 wrpr %g0, 0, %tl 216 217 retl 218 wrpr %g0, %o5, %pstate /* enable interrupts */ 219 SET_SIZE(vtag_flushpage) 220 221#endif /* lint */ 222 223#if defined(lint) 224 225void 226vtag_flushall(void) 227{} 228 229#else /* lint */ 230 231 ENTRY_NP2(vtag_flushall, demap_all) 232 /* 233 * flush the tlb 234 */ 235 sethi %hi(FLUSH_ADDR), %o3 236 set DEMAP_ALL_TYPE, %g1 237 stxa %g0, [%g1]ASI_DTLB_DEMAP 238 stxa %g0, [%g1]ASI_ITLB_DEMAP 239 flush %o3 240 retl 241 nop 242 SET_SIZE(demap_all) 243 SET_SIZE(vtag_flushall) 244 245#endif /* lint */ 246 247 248#if defined(lint) 249 250/* ARGSUSED */ 251void 252vtag_flushpage_tl1(uint64_t vaddr, uint64_t sfmmup) 253{} 254 255#else /* lint */ 256 257 ENTRY_NP(vtag_flushpage_tl1) 258 /* 259 * x-trap to flush page from tlb and tsb 260 * 261 * %g1 = vaddr, zero-extended on 32-bit kernel 262 * %g2 = sfmmup 263 * 264 * assumes TSBE_TAG = 0 265 */ 266 srln %g1, MMU_PAGESHIFT, %g1 267 268 sethi %hi(ksfmmup), %g3 269 ldx [%g3 + %lo(ksfmmup)], %g3 270 cmp %g3, %g2 271 bne,pt %xcc, 1f ! if not kernel as, go to 1 272 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */ 273 274 /* We need to demap in the kernel context */ 275 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1 276 stxa %g0, [%g1]ASI_DTLB_DEMAP 277 stxa %g0, [%g1]ASI_ITLB_DEMAP 278 retry 2791: 280 /* We need to demap in a user context */ 281 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1 282 283 SFMMU_CPU_CNUM(%g2, %g6, %g3) ! %g6 = sfmmu cnum on this CPU 284 285 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext 286 sll %g4, CTXREG_EXT_SHIFT, %g4 287 or %g6, %g4, %g6 ! %g6 = pgsz | cnum 288 289 set MMU_PCONTEXT, %g4 290 ldxa [%g4]ASI_DMMU, %g5 /* rd old ctxnum */ 291 srlx %g5, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */ 292 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */ 293 or %g6, %g2, %g6 /* %g6 = nucleus pgsz | primary pgsz | cnum */ 294 stxa %g6, [%g4]ASI_DMMU /* wr new ctxum */ 295 stxa %g0, [%g1]ASI_DTLB_DEMAP 296 stxa %g0, [%g1]ASI_ITLB_DEMAP 297 stxa %g5, [%g4]ASI_DMMU /* restore old ctxnum */ 298 retry 299 SET_SIZE(vtag_flushpage_tl1) 300 301#endif /* lint */ 302 303 304#if defined(lint) 305 306/* ARGSUSED */ 307void 308vtag_flush_pgcnt_tl1(uint64_t vaddr, uint64_t sfmmup_pgcnt) 309{} 310 311#else /* lint */ 312 313 ENTRY_NP(vtag_flush_pgcnt_tl1) 314 /* 315 * x-trap to flush pgcnt MMU_PAGESIZE pages from tlb 316 * 317 * %g1 = vaddr, zero-extended on 32-bit kernel 318 * %g2 = <sfmmup58|pgcnt6>, (pgcnt - 1) is pass'ed in via pgcnt6 bits. 319 * 320 * NOTE: this handler relies on the fact that no 321 * interrupts or traps can occur during the loop 322 * issuing the TLB_DEMAP operations. It is assumed 323 * that interrupts are disabled and this code is 324 * fetching from the kernel locked text address. 325 * 326 * assumes TSBE_TAG = 0 327 */ 328 set SFMMU_PGCNT_MASK, %g4 329 and %g4, %g2, %g3 /* g3 = pgcnt - 1 */ 330 add %g3, 1, %g3 /* g3 = pgcnt */ 331 332 andn %g2, SFMMU_PGCNT_MASK, %g2 /* g2 = sfmmup */ 333 srln %g1, MMU_PAGESHIFT, %g1 334 335 sethi %hi(ksfmmup), %g4 336 ldx [%g4 + %lo(ksfmmup)], %g4 337 cmp %g4, %g2 338 bne,pn %xcc, 1f /* if not kernel as, go to 1 */ 339 slln %g1, MMU_PAGESHIFT, %g1 /* g1 = vaddr */ 340 341 /* We need to demap in the kernel context */ 342 or DEMAP_NUCLEUS | DEMAP_PAGE_TYPE, %g1, %g1 343 set MMU_PAGESIZE, %g2 /* g2 = pgsize */ 344 sethi %hi(FLUSH_ADDR), %g5 3454: 346 stxa %g0, [%g1]ASI_DTLB_DEMAP 347 stxa %g0, [%g1]ASI_ITLB_DEMAP 348 flush %g5 ! flush required by immu 349 350 deccc %g3 /* decr pgcnt */ 351 bnz,pt %icc,4b 352 add %g1, %g2, %g1 /* next page */ 353 retry 3541: 355 /* 356 * We need to demap in a user context 357 * 358 * g2 = sfmmup 359 * g3 = pgcnt 360 */ 361 SFMMU_CPU_CNUM(%g2, %g5, %g6) ! %g5 = sfmmu cnum on this CPU 362 363 or DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1, %g1 364 365 ldub [%g2 + SFMMU_CEXT], %g4 ! %g4 = sfmmup->cext 366 sll %g4, CTXREG_EXT_SHIFT, %g4 367 or %g5, %g4, %g5 368 369 set MMU_PCONTEXT, %g4 370 ldxa [%g4]ASI_DMMU, %g6 /* rd old ctxnum */ 371 srlx %g6, CTXREG_NEXT_SHIFT, %g2 /* %g2 = nucleus pgsz */ 372 sllx %g2, CTXREG_NEXT_SHIFT, %g2 /* preserve nucleus pgsz */ 373 or %g5, %g2, %g5 /* %g5 = nucleus pgsz | primary pgsz | cnum */ 374 stxa %g5, [%g4]ASI_DMMU /* wr new ctxum */ 375 376 set MMU_PAGESIZE, %g2 /* g2 = pgsize */ 377 sethi %hi(FLUSH_ADDR), %g5 3783: 379 stxa %g0, [%g1]ASI_DTLB_DEMAP 380 stxa %g0, [%g1]ASI_ITLB_DEMAP 381 flush %g5 ! flush required by immu 382 383 deccc %g3 /* decr pgcnt */ 384 bnz,pt %icc,3b 385 add %g1, %g2, %g1 /* next page */ 386 387 stxa %g6, [%g4]ASI_DMMU /* restore old ctxnum */ 388 retry 389 SET_SIZE(vtag_flush_pgcnt_tl1) 390 391#endif /* lint */ 392 393#if defined(lint) 394 395/*ARGSUSED*/ 396void 397vtag_flushall_tl1(uint64_t dummy1, uint64_t dummy2) 398{} 399 400#else /* lint */ 401 402 ENTRY_NP(vtag_flushall_tl1) 403 /* 404 * x-trap to flush tlb 405 */ 406 set DEMAP_ALL_TYPE, %g4 407 stxa %g0, [%g4]ASI_DTLB_DEMAP 408 stxa %g0, [%g4]ASI_ITLB_DEMAP 409 retry 410 SET_SIZE(vtag_flushall_tl1) 411 412#endif /* lint */ 413 414 415#if defined(lint) 416 417/* ARGSUSED */ 418void 419vac_flushpage(pfn_t pfnum, int vcolor) 420{} 421 422#else /* lint */ 423 424/* 425 * vac_flushpage(pfnum, color) 426 * Flush 1 8k page of the D-$ with physical page = pfnum 427 * Algorithm: 428 * The cheetah dcache is a 64k psuedo 4 way accaociative cache. 429 * It is virtual indexed, physically tagged cache. 430 */ 431 .seg ".data" 432 .align 8 433 .global dflush_type 434dflush_type: 435 .word FLUSHPAGE_TYPE 436 437 ENTRY(vac_flushpage) 438 /* 439 * flush page from the d$ 440 * 441 * %o0 = pfnum, %o1 = color 442 */ 443 DCACHE_FLUSHPAGE(%o0, %o1, %o2, %o3, %o4) 444 retl 445 nop 446 SET_SIZE(vac_flushpage) 447 448#endif /* lint */ 449 450 451#if defined(lint) 452 453/* ARGSUSED */ 454void 455vac_flushpage_tl1(uint64_t pfnum, uint64_t vcolor) 456{} 457 458#else /* lint */ 459 460 ENTRY_NP(vac_flushpage_tl1) 461 /* 462 * x-trap to flush page from the d$ 463 * 464 * %g1 = pfnum, %g2 = color 465 */ 466 DCACHE_FLUSHPAGE(%g1, %g2, %g3, %g4, %g5) 467 retry 468 SET_SIZE(vac_flushpage_tl1) 469 470#endif /* lint */ 471 472 473#if defined(lint) 474 475/* ARGSUSED */ 476void 477vac_flushcolor(int vcolor, pfn_t pfnum) 478{} 479 480#else /* lint */ 481 /* 482 * In UltraSPARC III flushcolor is same as as flushpage. 483 * This is because we have an ASI to flush dcache using physical 484 * address. 485 * Flushing dcache using physical address is faster because we 486 * don't have to deal with associativity of dcache. 487 * The arguments to vac_flushpage() and vac_flushcolor() are same but 488 * the order is reversed. this is because we maintain compatibility 489 * with spitfire, in which vac_flushcolor has only one argument, namely 490 * vcolor. 491 */ 492 493 ENTRY(vac_flushcolor) 494 /* 495 * %o0 = vcolor, %o1 = pfnum 496 */ 497 DCACHE_FLUSHPAGE(%o1, %o0, %o2, %o3, %o4) 498 retl 499 nop 500 SET_SIZE(vac_flushcolor) 501 502#endif /* lint */ 503 504 505#if defined(lint) 506 507/* ARGSUSED */ 508void 509vac_flushcolor_tl1(uint64_t vcolor, uint64_t pfnum) 510{} 511 512#else /* lint */ 513 514 ENTRY(vac_flushcolor_tl1) 515 /* 516 * %g1 = vcolor 517 * %g2 = pfnum 518 */ 519 DCACHE_FLUSHPAGE(%g2, %g1, %g3, %g4, %g5) 520 retry 521 SET_SIZE(vac_flushcolor_tl1) 522 523#endif /* lint */ 524 525#if defined(lint) 526 527int 528idsr_busy(void) 529{ 530 return (0); 531} 532 533#else /* lint */ 534 535/* 536 * Determine whether or not the IDSR is busy. 537 * Entry: no arguments 538 * Returns: 1 if busy, 0 otherwise 539 */ 540 ENTRY(idsr_busy) 541 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1 542 clr %o0 543 btst IDSR_BUSY, %g1 544 bz,a,pt %xcc, 1f 545 mov 1, %o0 5461: 547 retl 548 nop 549 SET_SIZE(idsr_busy) 550 551#endif /* lint */ 552 553#if defined(lint) 554 555/* ARGSUSED */ 556void 557init_mondo(xcfunc_t *func, uint64_t arg1, uint64_t arg2) 558{} 559 560/* ARGSUSED */ 561void 562init_mondo_nocheck(xcfunc_t *func, uint64_t arg1, uint64_t arg2) 563{} 564 565#else /* lint */ 566 567 .global _dispatch_status_busy 568_dispatch_status_busy: 569 .asciz "ASI_INTR_DISPATCH_STATUS error: busy" 570 .align 4 571 572/* 573 * Setup interrupt dispatch data registers 574 * Entry: 575 * %o0 - function or inumber to call 576 * %o1, %o2 - arguments (2 uint64_t's) 577 */ 578 .seg "text" 579 580 ENTRY(init_mondo) 581#ifdef DEBUG 582 ! 583 ! IDSR should not be busy at the moment 584 ! 585 ldxa [%g0]ASI_INTR_DISPATCH_STATUS, %g1 586 btst IDSR_BUSY, %g1 587 bz,pt %xcc, 1f 588 nop 589 sethi %hi(_dispatch_status_busy), %o0 590 call panic 591 or %o0, %lo(_dispatch_status_busy), %o0 592#endif /* DEBUG */ 593 594 ALTENTRY(init_mondo_nocheck) 595 ! 596 ! interrupt vector dispatch data reg 0 597 ! 5981: 599 mov IDDR_0, %g1 600 mov IDDR_1, %g2 601 mov IDDR_2, %g3 602 stxa %o0, [%g1]ASI_INTR_DISPATCH 603 604 ! 605 ! interrupt vector dispatch data reg 1 606 ! 607 stxa %o1, [%g2]ASI_INTR_DISPATCH 608 609 ! 610 ! interrupt vector dispatch data reg 2 611 ! 612 stxa %o2, [%g3]ASI_INTR_DISPATCH 613 614 membar #Sync 615 retl 616 nop 617 SET_SIZE(init_mondo_nocheck) 618 SET_SIZE(init_mondo) 619 620#endif /* lint */ 621 622 623#if !(defined(JALAPENO) || defined(SERRANO)) 624 625#if defined(lint) 626 627/* ARGSUSED */ 628void 629shipit(int upaid, int bn) 630{ return; } 631 632#else /* lint */ 633 634/* 635 * Ship mondo to aid using busy/nack pair bn 636 */ 637 ENTRY_NP(shipit) 638 sll %o0, IDCR_PID_SHIFT, %g1 ! IDCR<18:14> = agent id 639 sll %o1, IDCR_BN_SHIFT, %g2 ! IDCR<28:24> = b/n pair 640 or %g1, IDCR_OFFSET, %g1 ! IDCR<13:0> = 0x70 641 or %g1, %g2, %g1 642 stxa %g0, [%g1]ASI_INTR_DISPATCH ! interrupt vector dispatch 643 membar #Sync 644 retl 645 nop 646 SET_SIZE(shipit) 647 648#endif /* lint */ 649 650#endif /* !(JALAPENO || SERRANO) */ 651 652 653#if defined(lint) 654 655/* ARGSUSED */ 656void 657flush_instr_mem(caddr_t vaddr, size_t len) 658{} 659 660#else /* lint */ 661 662/* 663 * flush_instr_mem: 664 * Flush 1 page of the I-$ starting at vaddr 665 * %o0 vaddr 666 * %o1 bytes to be flushed 667 * UltraSPARC-III maintains consistency of the on-chip Instruction Cache with 668 * the stores from all processors so that a FLUSH instruction is only needed 669 * to ensure pipeline is consistent. This means a single flush is sufficient at 670 * the end of a sequence of stores that updates the instruction stream to 671 * ensure correct operation. 672 */ 673 674 ENTRY(flush_instr_mem) 675 flush %o0 ! address irrelevant 676 retl 677 nop 678 SET_SIZE(flush_instr_mem) 679 680#endif /* lint */ 681 682 683#if defined(CPU_IMP_ECACHE_ASSOC) 684 685#if defined(lint) 686 687/* ARGSUSED */ 688uint64_t 689get_ecache_ctrl(void) 690{ return (0); } 691 692#else /* lint */ 693 694 ENTRY(get_ecache_ctrl) 695 GET_CPU_IMPL(%o0) 696 cmp %o0, JAGUAR_IMPL 697 ! 698 ! Putting an ASI access in the delay slot may 699 ! cause it to be accessed, even when annulled. 700 ! 701 bne 1f 702 nop 703 ldxa [%g0]ASI_EC_CFG_TIMING, %o0 ! read Jaguar shared E$ ctrl reg 704 b 2f 705 nop 7061: 707 ldxa [%g0]ASI_EC_CTRL, %o0 ! read Ch/Ch+ E$ control reg 7082: 709 retl 710 nop 711 SET_SIZE(get_ecache_ctrl) 712 713#endif /* lint */ 714 715#endif /* CPU_IMP_ECACHE_ASSOC */ 716 717 718#if !(defined(JALAPENO) || defined(SERRANO)) 719 720/* 721 * flush_ecache: 722 * %o0 - 64 bit physical address 723 * %o1 - ecache size 724 * %o2 - ecache linesize 725 */ 726#if defined(lint) 727 728/*ARGSUSED*/ 729void 730flush_ecache(uint64_t physaddr, size_t ecache_size, size_t ecache_linesize) 731{} 732 733#else /* !lint */ 734 735 ENTRY(flush_ecache) 736 737 /* 738 * For certain CPU implementations, we have to flush the L2 cache 739 * before flushing the ecache. 740 */ 741 PN_L2_FLUSHALL(%g3, %g4, %g5) 742 743 /* 744 * Flush the entire Ecache using displacement flush. 745 */ 746 ECACHE_FLUSHALL(%o1, %o2, %o0, %o4) 747 748 retl 749 nop 750 SET_SIZE(flush_ecache) 751 752#endif /* lint */ 753 754#endif /* !(JALAPENO || SERRANO) */ 755 756 757#if defined(lint) 758 759void 760flush_dcache(void) 761{} 762 763#else /* lint */ 764 765 ENTRY(flush_dcache) 766 ASM_LD(%o0, dcache_size) 767 ASM_LD(%o1, dcache_linesize) 768 CH_DCACHE_FLUSHALL(%o0, %o1, %o2) 769 retl 770 nop 771 SET_SIZE(flush_dcache) 772 773#endif /* lint */ 774 775 776#if defined(lint) 777 778void 779flush_icache(void) 780{} 781 782#else /* lint */ 783 784 ENTRY(flush_icache) 785 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, flush_icache_1); 786 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1 787 ba,pt %icc, 2f 788 ld [%o0 + CHPR_ICACHE_SIZE], %o0 789flush_icache_1: 790 ASM_LD(%o0, icache_size) 791 ASM_LD(%o1, icache_linesize) 7922: 793 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4) 794 retl 795 nop 796 SET_SIZE(flush_icache) 797 798#endif /* lint */ 799 800#if defined(lint) 801 802/*ARGSUSED*/ 803void 804kdi_flush_idcache(int dcache_size, int dcache_lsize, int icache_size, 805 int icache_lsize) 806{ 807} 808 809#else /* lint */ 810 811 ENTRY(kdi_flush_idcache) 812 CH_DCACHE_FLUSHALL(%o0, %o1, %g1) 813 CH_ICACHE_FLUSHALL(%o2, %o3, %g1, %g2) 814 membar #Sync 815 retl 816 nop 817 SET_SIZE(kdi_flush_idcache) 818 819#endif /* lint */ 820 821#if defined(lint) 822 823void 824flush_pcache(void) 825{} 826 827#else /* lint */ 828 829 ENTRY(flush_pcache) 830 PCACHE_FLUSHALL(%o0, %o1, %o2) 831 retl 832 nop 833 SET_SIZE(flush_pcache) 834 835#endif /* lint */ 836 837 838#if defined(CPU_IMP_L1_CACHE_PARITY) 839 840#if defined(lint) 841 842/* ARGSUSED */ 843void 844get_dcache_dtag(uint32_t dcache_idx, uint64_t *data) 845{} 846 847#else /* lint */ 848 849/* 850 * Get dcache data and tag. The Dcache data is a pointer to a ch_dc_data_t 851 * structure (see cheetahregs.h): 852 * The Dcache *should* be turned off when this code is executed. 853 */ 854 .align 128 855 ENTRY(get_dcache_dtag) 856 rdpr %pstate, %o5 857 andn %o5, PSTATE_IE | PSTATE_AM, %o3 858 wrpr %g0, %o3, %pstate 859 b 1f 860 stx %o0, [%o1 + CH_DC_IDX] 861 862 .align 128 8631: 864 ldxa [%o0]ASI_DC_TAG, %o2 865 stx %o2, [%o1 + CH_DC_TAG] 866 membar #Sync 867 ldxa [%o0]ASI_DC_UTAG, %o2 868 membar #Sync 869 stx %o2, [%o1 + CH_DC_UTAG] 870 ldxa [%o0]ASI_DC_SNP_TAG, %o2 871 stx %o2, [%o1 + CH_DC_SNTAG] 872 add %o1, CH_DC_DATA, %o1 873 clr %o3 8742: 875 membar #Sync ! required before ASI_DC_DATA 876 ldxa [%o0 + %o3]ASI_DC_DATA, %o2 877 membar #Sync ! required after ASI_DC_DATA 878 stx %o2, [%o1 + %o3] 879 cmp %o3, CH_DC_DATA_REG_SIZE - 8 880 blt 2b 881 add %o3, 8, %o3 882 883 /* 884 * Unlike other CPUs in the family, D$ data parity bits for Panther 885 * do not reside in the microtag. Instead, we have to read them 886 * using the DC_data_parity bit of ASI_DCACHE_DATA. Also, instead 887 * of just having 8 parity bits to protect all 32 bytes of data 888 * per line, we now have 32 bits of parity. 889 */ 890 GET_CPU_IMPL(%o3) 891 cmp %o3, PANTHER_IMPL 892 bne 4f 893 clr %o3 894 895 /* 896 * move our pointer to the next field where we store parity bits 897 * and add the offset of the last parity byte since we will be 898 * storing all 4 parity bytes within one 64 bit field like this: 899 * 900 * +------+------------+------------+------------+------------+ 901 * | - | DC_parity | DC_parity | DC_parity | DC_parity | 902 * | - | for word 3 | for word 2 | for word 1 | for word 0 | 903 * +------+------------+------------+------------+------------+ 904 * 63:32 31:24 23:16 15:8 7:0 905 */ 906 add %o1, CH_DC_PN_DATA_PARITY - CH_DC_DATA + 7, %o1 907 908 /* add the DC_data_parity bit into our working index */ 909 mov 1, %o2 910 sll %o2, PN_DC_DATA_PARITY_BIT_SHIFT, %o2 911 or %o0, %o2, %o0 9123: 913 membar #Sync ! required before ASI_DC_DATA 914 ldxa [%o0 + %o3]ASI_DC_DATA, %o2 915 membar #Sync ! required after ASI_DC_DATA 916 stb %o2, [%o1] 917 dec %o1 918 cmp %o3, CH_DC_DATA_REG_SIZE - 8 919 blt 3b 920 add %o3, 8, %o3 9214: 922 retl 923 wrpr %g0, %o5, %pstate 924 SET_SIZE(get_dcache_dtag) 925 926#endif /* lint */ 927 928 929#if defined(lint) 930 931/* ARGSUSED */ 932void 933get_icache_dtag(uint32_t ecache_idx, uint64_t *data) 934{} 935 936#else /* lint */ 937 938/* 939 * Get icache data and tag. The data argument is a pointer to a ch_ic_data_t 940 * structure (see cheetahregs.h): 941 * The Icache *Must* be turned off when this function is called. 942 * This is because diagnostic accesses to the Icache interfere with cache 943 * consistency. 944 */ 945 .align 128 946 ENTRY(get_icache_dtag) 947 rdpr %pstate, %o5 948 andn %o5, PSTATE_IE | PSTATE_AM, %o3 949 wrpr %g0, %o3, %pstate 950 951 stx %o0, [%o1 + CH_IC_IDX] 952 ldxa [%o0]ASI_IC_TAG, %o2 953 stx %o2, [%o1 + CH_IC_PATAG] 954 add %o0, CH_ICTAG_UTAG, %o0 955 ldxa [%o0]ASI_IC_TAG, %o2 956 add %o0, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), %o0 957 stx %o2, [%o1 + CH_IC_UTAG] 958 ldxa [%o0]ASI_IC_TAG, %o2 959 add %o0, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), %o0 960 stx %o2, [%o1 + CH_IC_UPPER] 961 ldxa [%o0]ASI_IC_TAG, %o2 962 andn %o0, CH_ICTAG_TMASK, %o0 963 stx %o2, [%o1 + CH_IC_LOWER] 964 ldxa [%o0]ASI_IC_SNP_TAG, %o2 965 stx %o2, [%o1 + CH_IC_SNTAG] 966 add %o1, CH_IC_DATA, %o1 967 clr %o3 9682: 969 ldxa [%o0 + %o3]ASI_IC_DATA, %o2 970 stx %o2, [%o1 + %o3] 971 cmp %o3, PN_IC_DATA_REG_SIZE - 8 972 blt 2b 973 add %o3, 8, %o3 974 975 retl 976 wrpr %g0, %o5, %pstate 977 SET_SIZE(get_icache_dtag) 978 979#endif /* lint */ 980 981#if defined(lint) 982 983/* ARGSUSED */ 984void 985get_pcache_dtag(uint32_t pcache_idx, uint64_t *data) 986{} 987 988#else /* lint */ 989 990/* 991 * Get pcache data and tags. 992 * inputs: 993 * pcache_idx - fully constructed VA for for accessing P$ diagnostic 994 * registers. Contains PC_way and PC_addr shifted into 995 * the correct bit positions. See the PRM for more details. 996 * data - pointer to a ch_pc_data_t 997 * structure (see cheetahregs.h): 998 */ 999 .align 128 1000 ENTRY(get_pcache_dtag) 1001 rdpr %pstate, %o5 1002 andn %o5, PSTATE_IE | PSTATE_AM, %o3 1003 wrpr %g0, %o3, %pstate 1004 1005 stx %o0, [%o1 + CH_PC_IDX] 1006 ldxa [%o0]ASI_PC_STATUS_DATA, %o2 1007 stx %o2, [%o1 + CH_PC_STATUS] 1008 ldxa [%o0]ASI_PC_TAG, %o2 1009 stx %o2, [%o1 + CH_PC_TAG] 1010 ldxa [%o0]ASI_PC_SNP_TAG, %o2 1011 stx %o2, [%o1 + CH_PC_SNTAG] 1012 add %o1, CH_PC_DATA, %o1 1013 clr %o3 10142: 1015 ldxa [%o0 + %o3]ASI_PC_DATA, %o2 1016 stx %o2, [%o1 + %o3] 1017 cmp %o3, CH_PC_DATA_REG_SIZE - 8 1018 blt 2b 1019 add %o3, 8, %o3 1020 1021 retl 1022 wrpr %g0, %o5, %pstate 1023 SET_SIZE(get_pcache_dtag) 1024 1025#endif /* lint */ 1026 1027#endif /* CPU_IMP_L1_CACHE_PARITY */ 1028 1029#if defined(lint) 1030 1031/* ARGSUSED */ 1032void 1033set_dcu(uint64_t dcu) 1034{} 1035 1036#else /* lint */ 1037 1038/* 1039 * re-enable the i$, d$, w$, and p$ according to bootup cache state. 1040 * Turn on WE, HPE, SPE, PE, IC, and DC bits defined as DCU_CACHE. 1041 * %o0 - 64 bit constant 1042 */ 1043 ENTRY(set_dcu) 1044 stxa %o0, [%g0]ASI_DCU ! Store to DCU 1045 flush %g0 /* flush required after changing the IC bit */ 1046 retl 1047 nop 1048 SET_SIZE(set_dcu) 1049 1050#endif /* lint */ 1051 1052 1053#if defined(lint) 1054 1055uint64_t 1056get_dcu(void) 1057{ 1058 return ((uint64_t)0); 1059} 1060 1061#else /* lint */ 1062 1063/* 1064 * Return DCU register. 1065 */ 1066 ENTRY(get_dcu) 1067 ldxa [%g0]ASI_DCU, %o0 /* DCU control register */ 1068 retl 1069 nop 1070 SET_SIZE(get_dcu) 1071 1072#endif /* lint */ 1073 1074/* 1075 * Cheetah/Cheetah+ level 15 interrupt handler trap table entry. 1076 * 1077 * This handler is used to check for softints generated by error trap 1078 * handlers to report errors. On Cheetah, this mechanism is used by the 1079 * Fast ECC at TL>0 error trap handler and, on Cheetah+, by both the Fast 1080 * ECC at TL>0 error and the I$/D$ parity error at TL>0 trap handlers. 1081 * NB: Must be 8 instructions or less to fit in trap table and code must 1082 * be relocatable. 1083 */ 1084#if defined(lint) 1085 1086void 1087ch_pil15_interrupt_instr(void) 1088{} 1089 1090#else /* lint */ 1091 1092 ENTRY_NP(ch_pil15_interrupt_instr) 1093 ASM_JMP(%g1, ch_pil15_interrupt) 1094 SET_SIZE(ch_pil15_interrupt_instr) 1095 1096#endif 1097 1098 1099#if defined(lint) 1100 1101void 1102ch_pil15_interrupt(void) 1103{} 1104 1105#else /* lint */ 1106 1107 ENTRY_NP(ch_pil15_interrupt) 1108 1109 /* 1110 * Since pil_interrupt is hacked to assume that every level 15 1111 * interrupt is generated by the CPU to indicate a performance 1112 * counter overflow this gets ugly. Before calling pil_interrupt 1113 * the Error at TL>0 pending status is inspected. If it is 1114 * non-zero, then an error has occurred and it is handled. 1115 * Otherwise control is transfered to pil_interrupt. Note that if 1116 * an error is detected pil_interrupt will not be called and 1117 * overflow interrupts may be lost causing erroneous performance 1118 * measurements. However, error-recovery will have a detrimental 1119 * effect on performance anyway. 1120 */ 1121 CPU_INDEX(%g1, %g4) 1122 set ch_err_tl1_pending, %g4 1123 ldub [%g1 + %g4], %g2 1124 brz %g2, 1f 1125 nop 1126 1127 /* 1128 * We have a pending TL>0 error, clear the TL>0 pending status. 1129 */ 1130 stb %g0, [%g1 + %g4] 1131 1132 /* 1133 * Clear the softint. 1134 */ 1135 mov 1, %g5 1136 sll %g5, PIL_15, %g5 1137 wr %g5, CLEAR_SOFTINT 1138 1139 /* 1140 * For Cheetah*, call cpu_tl1_error via systrap at PIL 15 1141 * to process the Fast ECC/Cache Parity at TL>0 error. Clear 1142 * panic flag (%g2). 1143 */ 1144 set cpu_tl1_error, %g1 1145 clr %g2 1146 ba sys_trap 1147 mov PIL_15, %g4 1148 11491: 1150 /* 1151 * The logout is invalid. 1152 * 1153 * Call the default interrupt handler. 1154 */ 1155 sethi %hi(pil_interrupt), %g1 1156 jmp %g1 + %lo(pil_interrupt) 1157 mov PIL_15, %g4 1158 1159 SET_SIZE(ch_pil15_interrupt) 1160#endif 1161 1162 1163/* 1164 * Error Handling 1165 * 1166 * Cheetah provides error checking for all memory access paths between 1167 * the CPU, External Cache, Cheetah Data Switch and system bus. Error 1168 * information is logged in the AFSR, (also AFSR_EXT for Panther) and 1169 * AFAR and one of the following traps is generated (provided that it 1170 * is enabled in External Cache Error Enable Register) to handle that 1171 * error: 1172 * 1. trap 0x70: Precise trap 1173 * tt0_fecc for errors at trap level(TL)>=0 1174 * 2. trap 0x0A and 0x32: Deferred trap 1175 * async_err for errors at TL>=0 1176 * 3. trap 0x63: Disrupting trap 1177 * ce_err for errors at TL=0 1178 * (Note that trap 0x63 cannot happen at trap level > 0) 1179 * 1180 * Trap level one handlers panic the system except for the fast ecc 1181 * error handler which tries to recover from certain errors. 1182 */ 1183 1184/* 1185 * FAST ECC TRAP STRATEGY: 1186 * 1187 * Software must handle single and multi bit errors which occur due to data 1188 * or instruction cache reads from the external cache. A single or multi bit 1189 * error occuring in one of these situations results in a precise trap. 1190 * 1191 * The basic flow of this trap handler is as follows: 1192 * 1193 * 1) Record the state and then turn off the Dcache and Icache. The Dcache 1194 * is disabled because bad data could have been installed. The Icache is 1195 * turned off because we want to capture the Icache line related to the 1196 * AFAR. 1197 * 2) Disable trapping on CEEN/NCCEN errors during TL=0 processing. 1198 * 3) Park sibling core if caches are shared (to avoid race condition while 1199 * accessing shared resources such as L3 data staging register during 1200 * CPU logout. 1201 * 4) Read the AFAR and AFSR. 1202 * 5) If CPU logout structure is not being used, then: 1203 * 6) Clear all errors from the AFSR. 1204 * 7) Capture Ecache, Dcache and Icache lines in "CPU log out" structure. 1205 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous 1206 * state. 1207 * 9) Unpark sibling core if we parked it earlier. 1208 * 10) call cpu_fast_ecc_error via systrap at PIL 14 unless we're already 1209 * running at PIL 15. 1210 * 6) Otherwise, if CPU logout structure is being used: 1211 * 7) Incriment the "logout busy count". 1212 * 8) Flush Ecache then Flush Dcache and Icache and restore to previous 1213 * state. 1214 * 9) Unpark sibling core if we parked it earlier. 1215 * 10) Issue a retry since the other CPU error logging code will end up 1216 * finding this error bit and logging information about it later. 1217 * 7) Alternatively (to 5 and 6 above), if the cpu_private struct is not 1218 * yet initialized such that we can't even check the logout struct, then 1219 * we place the clo_flags data into %g2 (sys_trap->have_win arg #1) and 1220 * call cpu_fast_ecc_error via systrap. The clo_flags parameter is used 1221 * to determine information such as TL, TT, CEEN and NCEEN settings, etc 1222 * in the high level trap handler since we don't have access to detailed 1223 * logout information in cases where the cpu_private struct is not yet 1224 * initialized. 1225 * 1226 * We flush the E$ and D$ here on TL=1 code to prevent getting nested 1227 * Fast ECC traps in the TL=0 code. If we get a Fast ECC event here in 1228 * the TL=1 code, we will go to the Fast ECC at TL>0 handler which, 1229 * since it is uses different code/data from this handler, has a better 1230 * chance of fixing things up than simply recursing through this code 1231 * again (this would probably cause an eventual kernel stack overflow). 1232 * If the Fast ECC at TL>0 handler encounters a Fast ECC error before it 1233 * can flush the E$ (or the error is a stuck-at bit), we will recurse in 1234 * the Fast ECC at TL>0 handler and eventually Red Mode. 1235 * 1236 * Note that for Cheetah (and only Cheetah), we use alias addresses for 1237 * flushing rather than ASI accesses (which don't exist on Cheetah). 1238 * Should we encounter a Fast ECC error within this handler on Cheetah, 1239 * there's a good chance it's within the ecache_flushaddr buffer (since 1240 * it's the largest piece of memory we touch in the handler and it is 1241 * usually kernel text/data). For that reason the Fast ECC at TL>0 1242 * handler for Cheetah uses an alternate buffer: ecache_tl1_flushaddr. 1243 */ 1244 1245/* 1246 * Cheetah ecc-protected E$ trap (Trap 70) at TL=0 1247 * tt0_fecc is replaced by fecc_err_instr in cpu_init_trap of the various 1248 * architecture-specific files. 1249 * NB: Must be 8 instructions or less to fit in trap table and code must 1250 * be relocatable. 1251 */ 1252 1253#if defined(lint) 1254 1255void 1256fecc_err_instr(void) 1257{} 1258 1259#else /* lint */ 1260 1261 ENTRY_NP(fecc_err_instr) 1262 membar #Sync ! Cheetah requires membar #Sync 1263 1264 /* 1265 * Save current DCU state. Turn off the Dcache and Icache. 1266 */ 1267 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1268 andn %g1, DCU_DC + DCU_IC, %g4 1269 stxa %g4, [%g0]ASI_DCU 1270 flush %g0 /* flush required after changing the IC bit */ 1271 1272 ASM_JMP(%g4, fast_ecc_err) 1273 SET_SIZE(fecc_err_instr) 1274 1275#endif /* lint */ 1276 1277 1278#if !(defined(JALAPENO) || defined(SERRANO)) 1279 1280#if defined(lint) 1281 1282void 1283fast_ecc_err(void) 1284{} 1285 1286#else /* lint */ 1287 1288 .section ".text" 1289 .align 64 1290 ENTRY_NP(fast_ecc_err) 1291 1292 /* 1293 * Turn off CEEN and NCEEN. 1294 */ 1295 ldxa [%g0]ASI_ESTATE_ERR, %g3 1296 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4 1297 stxa %g4, [%g0]ASI_ESTATE_ERR 1298 membar #Sync ! membar sync required 1299 1300 /* 1301 * Check to see whether we need to park our sibling core 1302 * before recording diagnostic information from caches 1303 * which may be shared by both cores. 1304 * We use %g1 to store information about whether or not 1305 * we had to park the core (%g1 holds our DCUCR value and 1306 * we only use bits from that register which are "reserved" 1307 * to keep track of core parking) so that we know whether 1308 * or not to unpark later. %g5 and %g4 are scratch registers. 1309 */ 1310 PARK_SIBLING_CORE(%g1, %g5, %g4) 1311 1312 /* 1313 * Do the CPU log out capture. 1314 * %g3 = "failed?" return value. 1315 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1316 * into this macro via %g4. Output only valid if cpu_private 1317 * struct has not been initialized. 1318 * CHPR_FECCTL0_LOGOUT = cpu logout structure offset input 1319 * %g4 = Trap information stored in the cpu logout flags field 1320 * %g5 = scr1 1321 * %g6 = scr2 1322 * %g3 = scr3 1323 * %g4 = scr4 1324 */ 1325 /* store the CEEN and NCEEN values, TL=0 */ 1326 and %g3, EN_REG_CEEN + EN_REG_NCEEN, %g4 1327 set CHPR_FECCTL0_LOGOUT, %g6 1328 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1329 1330 /* 1331 * Flush the Ecache (and L2 cache for Panther) to get the error out 1332 * of the Ecache. If the UCC or UCU is on a dirty line, then the 1333 * following flush will turn that into a WDC or WDU, respectively. 1334 */ 1335 PN_L2_FLUSHALL(%g4, %g5, %g6) 1336 1337 CPU_INDEX(%g4, %g5) 1338 mulx %g4, CPU_NODE_SIZE, %g4 1339 set cpunodes, %g5 1340 add %g4, %g5, %g4 1341 ld [%g4 + ECACHE_LINESIZE], %g5 1342 ld [%g4 + ECACHE_SIZE], %g4 1343 1344 ASM_LDX(%g6, ecache_flushaddr) 1345 ECACHE_FLUSHALL(%g4, %g5, %g6, %g7) 1346 1347 /* 1348 * Flush the Dcache. Since bad data could have been installed in 1349 * the Dcache we must flush it before re-enabling it. 1350 */ 1351 ASM_LD(%g5, dcache_size) 1352 ASM_LD(%g6, dcache_linesize) 1353 CH_DCACHE_FLUSHALL(%g5, %g6, %g7) 1354 1355 /* 1356 * Flush the Icache. Since we turned off the Icache to capture the 1357 * Icache line it is now stale or corrupted and we must flush it 1358 * before re-enabling it. 1359 */ 1360 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, fast_ecc_err_5); 1361 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1362 ba,pt %icc, 6f 1363 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1364fast_ecc_err_5: 1365 ASM_LD(%g5, icache_size) 1366 ASM_LD(%g6, icache_linesize) 13676: 1368 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1369 1370 /* 1371 * check to see whether we parked our sibling core at the start 1372 * of this handler. If so, we need to unpark it here. 1373 * We use DCUCR reserved bits (stored in %g1) to keep track of 1374 * whether or not we need to unpark. %g5 and %g4 are scratch registers. 1375 */ 1376 UNPARK_SIBLING_CORE(%g1, %g5, %g4) 1377 1378 /* 1379 * Restore the Dcache and Icache to the previous state. 1380 */ 1381 stxa %g1, [%g0]ASI_DCU 1382 flush %g0 /* flush required after changing the IC bit */ 1383 1384 /* 1385 * Make sure our CPU logout operation was successful. 1386 */ 1387 cmp %g3, %g0 1388 be 8f 1389 nop 1390 1391 /* 1392 * If the logout structure had been busy, how many times have 1393 * we tried to use it and failed (nesting count)? If we have 1394 * already recursed a substantial number of times, then we can 1395 * assume things are not going to get better by themselves and 1396 * so it would be best to panic. 1397 */ 1398 cmp %g3, CLO_NESTING_MAX 1399 blt 7f 1400 nop 1401 1402 call ptl1_panic 1403 mov PTL1_BAD_ECC, %g1 1404 14057: 1406 /* 1407 * Otherwise, if the logout structure was busy but we have not 1408 * nested more times than our maximum value, then we simply 1409 * issue a retry. Our TL=0 trap handler code will check and 1410 * clear the AFSR after it is done logging what is currently 1411 * in the logout struct and handle this event at that time. 1412 */ 1413 retry 14148: 1415 /* 1416 * Call cpu_fast_ecc_error via systrap at PIL 14 unless we're 1417 * already at PIL 15. 1418 */ 1419 set cpu_fast_ecc_error, %g1 1420 rdpr %pil, %g4 1421 cmp %g4, PIL_14 1422 ba sys_trap 1423 movl %icc, PIL_14, %g4 1424 1425 SET_SIZE(fast_ecc_err) 1426 1427#endif /* lint */ 1428 1429#endif /* !(JALAPENO || SERRANO) */ 1430 1431 1432/* 1433 * Cheetah/Cheetah+ Fast ECC at TL>0 trap strategy: 1434 * 1435 * The basic flow of this trap handler is as follows: 1436 * 1437 * 1) In the "trap 70" trap table code (fecc_err_tl1_instr), generate a 1438 * software trap 0 ("ta 0") to buy an extra set of %tpc, etc. which we 1439 * will use to save %g1 and %g2. 1440 * 2) At the software trap 0 at TL>0 trap table code (fecc_err_tl1_cont_instr), 1441 * we save %g1+%g2 using %tpc, %tnpc + %tstate and jump to the fast ecc 1442 * handler (using the just saved %g1). 1443 * 3) Turn off the Dcache if it was on and save the state of the Dcache 1444 * (whether on or off) in Bit2 (CH_ERR_TSTATE_DC_ON) of %tstate. 1445 * NB: we don't turn off the Icache because bad data is not installed nor 1446 * will we be doing any diagnostic accesses. 1447 * 4) compute physical address of the per-cpu/per-tl save area using %g1+%g2 1448 * 5) Save %g1-%g7 into the per-cpu/per-tl save area (%g1 + %g2 from the 1449 * %tpc, %tnpc, %tstate values previously saved). 1450 * 6) set %tl to %tl - 1. 1451 * 7) Save the appropriate flags and TPC in the ch_err_tl1_data structure. 1452 * 8) Save the value of CH_ERR_TSTATE_DC_ON in the ch_err_tl1_tmp field. 1453 * 9) For Cheetah and Jalapeno, read the AFAR and AFSR and clear. For 1454 * Cheetah+ (and later), read the shadow AFAR and AFSR but don't clear. 1455 * Save the values in ch_err_tl1_data. For Panther, read the shadow 1456 * AFSR_EXT and save the value in ch_err_tl1_data. 1457 * 10) Disable CEEN/NCEEN to prevent any disrupting/deferred errors from 1458 * being queued. We'll report them via the AFSR/AFAR capture in step 13. 1459 * 11) Flush the Ecache. 1460 * NB: the Ecache is flushed assuming the largest possible size with 1461 * the smallest possible line size since access to the cpu_nodes may 1462 * cause an unrecoverable DTLB miss. 1463 * 12) Reenable CEEN/NCEEN with the value saved from step 10. 1464 * 13) For Cheetah and Jalapeno, read the AFAR and AFSR and clear again. 1465 * For Cheetah+ (and later), read the primary AFAR and AFSR and now clear. 1466 * Save the read AFSR/AFAR values in ch_err_tl1_data. For Panther, 1467 * read and clear the primary AFSR_EXT and save it in ch_err_tl1_data. 1468 * 14) Flush and re-enable the Dcache if it was on at step 3. 1469 * 15) Do TRAPTRACE if enabled. 1470 * 16) Check if a UCU->WDU (or L3_UCU->WDU for Panther) happened, panic if so. 1471 * 17) Set the event pending flag in ch_err_tl1_pending[CPU] 1472 * 18) Cause a softint 15. The pil15_interrupt handler will inspect the 1473 * event pending flag and call cpu_tl1_error via systrap if set. 1474 * 19) Restore the registers from step 5 and issue retry. 1475 */ 1476 1477/* 1478 * Cheetah ecc-protected E$ trap (Trap 70) at TL>0 1479 * tt1_fecc is replaced by fecc_err_tl1_instr in cpu_init_trap of the various 1480 * architecture-specific files. This generates a "Software Trap 0" at TL>0, 1481 * which goes to fecc_err_tl1_cont_instr, and we continue the handling there. 1482 * NB: Must be 8 instructions or less to fit in trap table and code must 1483 * be relocatable. 1484 */ 1485 1486#if defined(lint) 1487 1488void 1489fecc_err_tl1_instr(void) 1490{} 1491 1492#else /* lint */ 1493 1494 ENTRY_NP(fecc_err_tl1_instr) 1495 CH_ERR_TL1_TRAPENTRY(SWTRAP_0); 1496 SET_SIZE(fecc_err_tl1_instr) 1497 1498#endif /* lint */ 1499 1500/* 1501 * Software trap 0 at TL>0. 1502 * tt1_swtrap0 is replaced by fecc_err_tl1_cont_instr in cpu_init_trap of 1503 * the various architecture-specific files. This is used as a continuation 1504 * of the fast ecc handling where we've bought an extra TL level, so we can 1505 * use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 1506 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 1507 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 1508 * order two bits from %g1 and %g2 respectively). 1509 * NB: Must be 8 instructions or less to fit in trap table and code must 1510 * be relocatable. 1511 */ 1512#if defined(lint) 1513 1514void 1515fecc_err_tl1_cont_instr(void) 1516{} 1517 1518#else /* lint */ 1519 1520 ENTRY_NP(fecc_err_tl1_cont_instr) 1521 CH_ERR_TL1_SWTRAPENTRY(fast_ecc_tl1_err) 1522 SET_SIZE(fecc_err_tl1_cont_instr) 1523 1524#endif /* lint */ 1525 1526 1527#if defined(lint) 1528 1529void 1530ce_err(void) 1531{} 1532 1533#else /* lint */ 1534 1535/* 1536 * The ce_err function handles disrupting trap type 0x63 at TL=0. 1537 * 1538 * AFSR errors bits which cause this trap are: 1539 * CE, EMC, EDU:ST, EDC, WDU, WDC, CPU, CPC, IVU, IVC 1540 * 1541 * NCEEN Bit of Cheetah External Cache Error Enable Register enables 1542 * the following AFSR disrupting traps: EDU:ST, WDU, CPU, IVU 1543 * 1544 * CEEN Bit of Cheetah External Cache Error Enable Register enables 1545 * the following AFSR disrupting traps: CE, EMC, EDC, WDC, CPC, IVC 1546 * 1547 * Cheetah+ also handles (No additional processing required): 1548 * DUE, DTO, DBERR (NCEEN controlled) 1549 * THCE (CEEN and ET_ECC_en controlled) 1550 * TUE (ET_ECC_en controlled) 1551 * 1552 * Panther further adds: 1553 * IMU, L3_EDU, L3_WDU, L3_CPU (NCEEN controlled) 1554 * IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE (CEEN controlled) 1555 * TUE_SH, TUE (NCEEN and L2_tag_ECC_en controlled) 1556 * L3_TUE, L3_TUE_SH (NCEEN and ET_ECC_en controlled) 1557 * THCE (CEEN and L2_tag_ECC_en controlled) 1558 * L3_THCE (CEEN and ET_ECC_en controlled) 1559 * 1560 * Steps: 1561 * 1. Disable hardware corrected disrupting errors only (CEEN) 1562 * 2. Park sibling core if caches are shared (to avoid race 1563 * condition while accessing shared resources such as L3 1564 * data staging register during CPU logout. 1565 * 3. If the CPU logout structure is not currently being used: 1566 * 4. Clear AFSR error bits 1567 * 5. Capture Ecache, Dcache and Icache lines associated 1568 * with AFAR. 1569 * 6. Unpark sibling core if we parked it earlier. 1570 * 7. call cpu_disrupting_error via sys_trap at PIL 14 1571 * unless we're already running at PIL 15. 1572 * 4. Otherwise, if the CPU logout structure is busy: 1573 * 5. Incriment "logout busy count" and place into %g3 1574 * 6. Unpark sibling core if we parked it earlier. 1575 * 7. Issue a retry since the other CPU error logging 1576 * code will end up finding this error bit and logging 1577 * information about it later. 1578 * 5. Alternatively (to 3 and 4 above), if the cpu_private struct is 1579 * not yet initialized such that we can't even check the logout 1580 * struct, then we place the clo_flags data into %g2 1581 * (sys_trap->have_win arg #1) and call cpu_disrupting_error via 1582 * systrap. The clo_flags parameter is used to determine information 1583 * such as TL, TT, CEEN settings, etc in the high level trap 1584 * handler since we don't have access to detailed logout information 1585 * in cases where the cpu_private struct is not yet initialized. 1586 * 1587 * %g3: [ logout busy count ] - arg #2 1588 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1 1589 */ 1590 1591 .align 128 1592 ENTRY_NP(ce_err) 1593 membar #Sync ! Cheetah requires membar #Sync 1594 1595 /* 1596 * Disable trap on hardware corrected errors (CEEN) while at TL=0 1597 * to prevent recursion. 1598 */ 1599 ldxa [%g0]ASI_ESTATE_ERR, %g1 1600 bclr EN_REG_CEEN, %g1 1601 stxa %g1, [%g0]ASI_ESTATE_ERR 1602 membar #Sync ! membar sync required 1603 1604 /* 1605 * Save current DCU state. Turn off Icache to allow capture of 1606 * Icache data by DO_CPU_LOGOUT. 1607 */ 1608 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1609 andn %g1, DCU_IC, %g4 1610 stxa %g4, [%g0]ASI_DCU 1611 flush %g0 /* flush required after changing the IC bit */ 1612 1613 /* 1614 * Check to see whether we need to park our sibling core 1615 * before recording diagnostic information from caches 1616 * which may be shared by both cores. 1617 * We use %g1 to store information about whether or not 1618 * we had to park the core (%g1 holds our DCUCR value and 1619 * we only use bits from that register which are "reserved" 1620 * to keep track of core parking) so that we know whether 1621 * or not to unpark later. %g5 and %g4 are scratch registers. 1622 */ 1623 PARK_SIBLING_CORE(%g1, %g5, %g4) 1624 1625 /* 1626 * Do the CPU log out capture. 1627 * %g3 = "failed?" return value. 1628 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1629 * into this macro via %g4. Output only valid if cpu_private 1630 * struct has not been initialized. 1631 * CHPR_CECC_LOGOUT = cpu logout structure offset input 1632 * %g4 = Trap information stored in the cpu logout flags field 1633 * %g5 = scr1 1634 * %g6 = scr2 1635 * %g3 = scr3 1636 * %g4 = scr4 1637 */ 1638 clr %g4 ! TL=0 bit in afsr 1639 set CHPR_CECC_LOGOUT, %g6 1640 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1641 1642 /* 1643 * Flush the Icache. Since we turned off the Icache to capture the 1644 * Icache line it is now stale or corrupted and we must flush it 1645 * before re-enabling it. 1646 */ 1647 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, ce_err_1); 1648 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1649 ba,pt %icc, 2f 1650 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1651ce_err_1: 1652 ASM_LD(%g5, icache_size) 1653 ASM_LD(%g6, icache_linesize) 16542: 1655 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1656 1657 /* 1658 * check to see whether we parked our sibling core at the start 1659 * of this handler. If so, we need to unpark it here. 1660 * We use DCUCR reserved bits (stored in %g1) to keep track of 1661 * whether or not we need to unpark. %g5 and %g4 are scratch registers. 1662 */ 1663 UNPARK_SIBLING_CORE(%g1, %g5, %g4) 1664 1665 /* 1666 * Restore Icache to previous state. 1667 */ 1668 stxa %g1, [%g0]ASI_DCU 1669 flush %g0 /* flush required after changing the IC bit */ 1670 1671 /* 1672 * Make sure our CPU logout operation was successful. 1673 */ 1674 cmp %g3, %g0 1675 be 4f 1676 nop 1677 1678 /* 1679 * If the logout structure had been busy, how many times have 1680 * we tried to use it and failed (nesting count)? If we have 1681 * already recursed a substantial number of times, then we can 1682 * assume things are not going to get better by themselves and 1683 * so it would be best to panic. 1684 */ 1685 cmp %g3, CLO_NESTING_MAX 1686 blt 3f 1687 nop 1688 1689 call ptl1_panic 1690 mov PTL1_BAD_ECC, %g1 1691 16923: 1693 /* 1694 * Otherwise, if the logout structure was busy but we have not 1695 * nested more times than our maximum value, then we simply 1696 * issue a retry. Our TL=0 trap handler code will check and 1697 * clear the AFSR after it is done logging what is currently 1698 * in the logout struct and handle this event at that time. 1699 */ 1700 retry 17014: 1702 /* 1703 * Call cpu_disrupting_error via systrap at PIL 14 unless we're 1704 * already at PIL 15. 1705 */ 1706 set cpu_disrupting_error, %g1 1707 rdpr %pil, %g4 1708 cmp %g4, PIL_14 1709 ba sys_trap 1710 movl %icc, PIL_14, %g4 1711 SET_SIZE(ce_err) 1712 1713#endif /* lint */ 1714 1715 1716#if defined(lint) 1717 1718/* 1719 * This trap cannot happen at TL>0 which means this routine will never 1720 * actually be called and so we treat this like a BAD TRAP panic. 1721 */ 1722void 1723ce_err_tl1(void) 1724{} 1725 1726#else /* lint */ 1727 1728 .align 64 1729 ENTRY_NP(ce_err_tl1) 1730 1731 call ptl1_panic 1732 mov PTL1_BAD_TRAP, %g1 1733 1734 SET_SIZE(ce_err_tl1) 1735 1736#endif /* lint */ 1737 1738 1739#if defined(lint) 1740 1741void 1742async_err(void) 1743{} 1744 1745#else /* lint */ 1746 1747/* 1748 * The async_err function handles deferred trap types 0xA 1749 * (instruction_access_error) and 0x32 (data_access_error) at TL>=0. 1750 * 1751 * AFSR errors bits which cause this trap are: 1752 * UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR 1753 * On some platforms, EMU may causes cheetah to pull the error pin 1754 * never giving Solaris a chance to take a trap. 1755 * 1756 * NCEEN Bit of Cheetah External Cache Error Enable Register enables 1757 * the following AFSR deferred traps: UE, EMU, EDU:BLD, TO, BERR 1758 * 1759 * Steps: 1760 * 1. Disable CEEN and NCEEN errors to prevent recursive errors. 1761 * 2. Turn D$ off per Cheetah PRM P.5 Note 6, turn I$ off to capture 1762 * I$ line in DO_CPU_LOGOUT. 1763 * 3. Park sibling core if caches are shared (to avoid race 1764 * condition while accessing shared resources such as L3 1765 * data staging register during CPU logout. 1766 * 4. If the CPU logout structure is not currently being used: 1767 * 5. Clear AFSR error bits 1768 * 6. Capture Ecache, Dcache and Icache lines associated 1769 * with AFAR. 1770 * 7. Unpark sibling core if we parked it earlier. 1771 * 8. call cpu_deferred_error via sys_trap. 1772 * 5. Otherwise, if the CPU logout structure is busy: 1773 * 6. Incriment "logout busy count" 1774 * 7. Unpark sibling core if we parked it earlier. 1775 * 8) Issue a retry since the other CPU error logging 1776 * code will end up finding this error bit and logging 1777 * information about it later. 1778 * 6. Alternatively (to 4 and 5 above), if the cpu_private struct is 1779 * not yet initialized such that we can't even check the logout 1780 * struct, then we place the clo_flags data into %g2 1781 * (sys_trap->have_win arg #1) and call cpu_deferred_error via 1782 * systrap. The clo_flags parameter is used to determine information 1783 * such as TL, TT, CEEN settings, etc in the high level trap handler 1784 * since we don't have access to detailed logout information in cases 1785 * where the cpu_private struct is not yet initialized. 1786 * 1787 * %g2: [ clo_flags if cpu_private unavailable ] - sys_trap->have_win: arg #1 1788 * %g3: [ logout busy count ] - arg #2 1789 */ 1790 1791 ENTRY_NP(async_err) 1792 membar #Sync ! Cheetah requires membar #Sync 1793 1794 /* 1795 * Disable CEEN and NCEEN. 1796 */ 1797 ldxa [%g0]ASI_ESTATE_ERR, %g3 1798 andn %g3, EN_REG_NCEEN + EN_REG_CEEN, %g4 1799 stxa %g4, [%g0]ASI_ESTATE_ERR 1800 membar #Sync ! membar sync required 1801 1802 /* 1803 * Save current DCU state. 1804 * Disable Icache to allow capture of Icache data by DO_CPU_LOGOUT. 1805 * Do this regardless of whether this is a Data Access Error or 1806 * Instruction Access Error Trap. 1807 * Disable Dcache for both Data Access Error and Instruction Access 1808 * Error per Cheetah PRM P.5 Note 6. 1809 */ 1810 ldxa [%g0]ASI_DCU, %g1 ! save DCU in %g1 1811 andn %g1, DCU_IC + DCU_DC, %g4 1812 stxa %g4, [%g0]ASI_DCU 1813 flush %g0 /* flush required after changing the IC bit */ 1814 1815 /* 1816 * Check to see whether we need to park our sibling core 1817 * before recording diagnostic information from caches 1818 * which may be shared by both cores. 1819 * We use %g1 to store information about whether or not 1820 * we had to park the core (%g1 holds our DCUCR value and 1821 * we only use bits from that register which are "reserved" 1822 * to keep track of core parking) so that we know whether 1823 * or not to unpark later. %g6 and %g4 are scratch registers. 1824 */ 1825 PARK_SIBLING_CORE(%g1, %g6, %g4) 1826 1827 /* 1828 * Do the CPU logout capture. 1829 * 1830 * %g3 = "failed?" return value. 1831 * %g2 = Input = AFAR. Output the clo_flags info which is passed 1832 * into this macro via %g4. Output only valid if cpu_private 1833 * struct has not been initialized. 1834 * CHPR_ASYNC_LOGOUT = cpu logout structure offset input 1835 * %g4 = Trap information stored in the cpu logout flags field 1836 * %g5 = scr1 1837 * %g6 = scr2 1838 * %g3 = scr3 1839 * %g4 = scr4 1840 */ 1841 andcc %g5, T_TL1, %g0 1842 clr %g6 1843 movnz %xcc, 1, %g6 ! set %g6 if T_TL1 set 1844 sllx %g6, CLO_FLAGS_TL_SHIFT, %g6 1845 sllx %g5, CLO_FLAGS_TT_SHIFT, %g4 1846 set CLO_FLAGS_TT_MASK, %g2 1847 and %g4, %g2, %g4 ! ttype 1848 or %g6, %g4, %g4 ! TT and TL 1849 and %g3, EN_REG_CEEN, %g3 ! CEEN value 1850 or %g3, %g4, %g4 ! TT and TL and CEEN 1851 set CHPR_ASYNC_LOGOUT, %g6 1852 DO_CPU_LOGOUT(%g3, %g2, %g6, %g4, %g5, %g6, %g3, %g4) 1853 1854 /* 1855 * If the logout struct was busy, we may need to pass the 1856 * TT, TL, and CEEN information to the TL=0 handler via 1857 * systrap parameter so save it off here. 1858 */ 1859 cmp %g3, %g0 1860 be 1f 1861 nop 1862 sllx %g4, 32, %g4 1863 or %g4, %g3, %g3 18641: 1865 /* 1866 * Flush the Icache. Since we turned off the Icache to capture the 1867 * Icache line it is now stale or corrupted and we must flush it 1868 * before re-enabling it. 1869 */ 1870 GET_CPU_PRIVATE_PTR(%g0, %g5, %g7, async_err_1); 1871 ld [%g5 + CHPR_ICACHE_LINESIZE], %g6 1872 ba,pt %icc, 2f 1873 ld [%g5 + CHPR_ICACHE_SIZE], %g5 1874async_err_1: 1875 ASM_LD(%g5, icache_size) 1876 ASM_LD(%g6, icache_linesize) 18772: 1878 CH_ICACHE_FLUSHALL(%g5, %g6, %g7, %g4) 1879 1880 /* 1881 * XXX - Don't we need to flush the Dcache before turning it back 1882 * on to avoid stale or corrupt data? Was this broken? 1883 */ 1884 /* 1885 * Flush the Dcache before turning it back on since it may now 1886 * contain stale or corrupt data. 1887 */ 1888 ASM_LD(%g5, dcache_size) 1889 ASM_LD(%g6, dcache_linesize) 1890 CH_DCACHE_FLUSHALL(%g5, %g6, %g7) 1891 1892 /* 1893 * check to see whether we parked our sibling core at the start 1894 * of this handler. If so, we need to unpark it here. 1895 * We use DCUCR reserved bits (stored in %g1) to keep track of 1896 * whether or not we need to unpark. %g5 and %g7 are scratch registers. 1897 */ 1898 UNPARK_SIBLING_CORE(%g1, %g5, %g7) 1899 1900 /* 1901 * Restore Icache and Dcache to previous state. 1902 */ 1903 stxa %g1, [%g0]ASI_DCU 1904 flush %g0 /* flush required after changing the IC bit */ 1905 1906 /* 1907 * Make sure our CPU logout operation was successful. 1908 */ 1909 cmp %g3, %g0 1910 be 4f 1911 nop 1912 1913 /* 1914 * If the logout structure had been busy, how many times have 1915 * we tried to use it and failed (nesting count)? If we have 1916 * already recursed a substantial number of times, then we can 1917 * assume things are not going to get better by themselves and 1918 * so it would be best to panic. 1919 */ 1920 cmp %g3, CLO_NESTING_MAX 1921 blt 3f 1922 nop 1923 1924 call ptl1_panic 1925 mov PTL1_BAD_ECC, %g1 1926 19273: 1928 /* 1929 * Otherwise, if the logout structure was busy but we have not 1930 * nested more times than our maximum value, then we simply 1931 * issue a retry. Our TL=0 trap handler code will check and 1932 * clear the AFSR after it is done logging what is currently 1933 * in the logout struct and handle this event at that time. 1934 */ 1935 retry 19364: 1937 RESET_USER_RTT_REGS(%g4, %g5, 5f) 19385: 1939 set cpu_deferred_error, %g1 1940 ba sys_trap 1941 mov PIL_15, %g4 ! run at pil 15 1942 SET_SIZE(async_err) 1943 1944#endif /* lint */ 1945 1946#if defined(CPU_IMP_L1_CACHE_PARITY) 1947 1948/* 1949 * D$ parity error trap (trap 71) at TL=0. 1950 * tt0_dperr is replaced by dcache_parity_instr in cpu_init_trap of 1951 * the various architecture-specific files. This merely sets up the 1952 * arguments for cpu_parity_error and calls it via sys_trap. 1953 * NB: Must be 8 instructions or less to fit in trap table and code must 1954 * be relocatable. 1955 */ 1956#if defined(lint) 1957 1958void 1959dcache_parity_instr(void) 1960{} 1961 1962#else /* lint */ 1963 ENTRY_NP(dcache_parity_instr) 1964 membar #Sync ! Cheetah+ requires membar #Sync 1965 set cpu_parity_error, %g1 1966 or %g0, CH_ERR_DPE, %g2 1967 rdpr %tpc, %g3 1968 sethi %hi(sys_trap), %g7 1969 jmp %g7 + %lo(sys_trap) 1970 mov PIL_15, %g4 ! run at pil 15 1971 SET_SIZE(dcache_parity_instr) 1972 1973#endif /* lint */ 1974 1975 1976/* 1977 * D$ parity error trap (trap 71) at TL>0. 1978 * tt1_dperr is replaced by dcache_parity_tl1_instr in cpu_init_trap of 1979 * the various architecture-specific files. This generates a "Software 1980 * Trap 1" at TL>0, which goes to dcache_parity_tl1_cont_instr, and we 1981 * continue the handling there. 1982 * NB: Must be 8 instructions or less to fit in trap table and code must 1983 * be relocatable. 1984 */ 1985#if defined(lint) 1986 1987void 1988dcache_parity_tl1_instr(void) 1989{} 1990 1991#else /* lint */ 1992 ENTRY_NP(dcache_parity_tl1_instr) 1993 CH_ERR_TL1_TRAPENTRY(SWTRAP_1); 1994 SET_SIZE(dcache_parity_tl1_instr) 1995 1996#endif /* lint */ 1997 1998 1999/* 2000 * Software trap 1 at TL>0. 2001 * tt1_swtrap1 is replaced by dcache_parity_tl1_cont_instr in cpu_init_trap 2002 * of the various architecture-specific files. This is used as a continuation 2003 * of the dcache parity handling where we've bought an extra TL level, so we 2004 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 2005 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 2006 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 2007 * order two bits from %g1 and %g2 respectively). 2008 * NB: Must be 8 instructions or less to fit in trap table and code must 2009 * be relocatable. 2010 */ 2011#if defined(lint) 2012 2013void 2014dcache_parity_tl1_cont_instr(void) 2015{} 2016 2017#else /* lint */ 2018 ENTRY_NP(dcache_parity_tl1_cont_instr) 2019 CH_ERR_TL1_SWTRAPENTRY(dcache_parity_tl1_err); 2020 SET_SIZE(dcache_parity_tl1_cont_instr) 2021 2022#endif /* lint */ 2023 2024/* 2025 * D$ parity error at TL>0 handler 2026 * We get here via trap 71 at TL>0->Software trap 1 at TL>0. We enter 2027 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate. 2028 */ 2029#if defined(lint) 2030 2031void 2032dcache_parity_tl1_err(void) 2033{} 2034 2035#else /* lint */ 2036 2037 ENTRY_NP(dcache_parity_tl1_err) 2038 2039 /* 2040 * This macro saves all the %g registers in the ch_err_tl1_data 2041 * structure, updates the ch_err_tl1_flags and saves the %tpc in 2042 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to 2043 * the ch_err_tl1_data structure and %g2 will have the original 2044 * flags in the ch_err_tl1_data structure. All %g registers 2045 * except for %g1 and %g2 will be available. 2046 */ 2047 CH_ERR_TL1_ENTER(CH_ERR_DPE); 2048 2049#ifdef TRAPTRACE 2050 /* 2051 * Get current trap trace entry physical pointer. 2052 */ 2053 CPU_INDEX(%g6, %g5) 2054 sll %g6, TRAPTR_SIZE_SHIFT, %g6 2055 set trap_trace_ctl, %g5 2056 add %g6, %g5, %g6 2057 ld [%g6 + TRAPTR_LIMIT], %g5 2058 tst %g5 2059 be %icc, dpe_tl1_skip_tt 2060 nop 2061 ldx [%g6 + TRAPTR_PBASE], %g5 2062 ld [%g6 + TRAPTR_OFFSET], %g4 2063 add %g5, %g4, %g5 2064 2065 /* 2066 * Create trap trace entry. 2067 */ 2068 rd %asi, %g7 2069 wr %g0, TRAPTR_ASI, %asi 2070 rd STICK, %g4 2071 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi 2072 rdpr %tl, %g4 2073 stha %g4, [%g5 + TRAP_ENT_TL]%asi 2074 rdpr %tt, %g4 2075 stha %g4, [%g5 + TRAP_ENT_TT]%asi 2076 rdpr %tpc, %g4 2077 stna %g4, [%g5 + TRAP_ENT_TPC]%asi 2078 rdpr %tstate, %g4 2079 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi 2080 stna %sp, [%g5 + TRAP_ENT_SP]%asi 2081 stna %g0, [%g5 + TRAP_ENT_TR]%asi 2082 stna %g0, [%g5 + TRAP_ENT_F1]%asi 2083 stna %g0, [%g5 + TRAP_ENT_F2]%asi 2084 stna %g0, [%g5 + TRAP_ENT_F3]%asi 2085 stna %g0, [%g5 + TRAP_ENT_F4]%asi 2086 wr %g0, %g7, %asi 2087 2088 /* 2089 * Advance trap trace pointer. 2090 */ 2091 ld [%g6 + TRAPTR_OFFSET], %g5 2092 ld [%g6 + TRAPTR_LIMIT], %g4 2093 st %g5, [%g6 + TRAPTR_LAST_OFFSET] 2094 add %g5, TRAP_ENT_SIZE, %g5 2095 sub %g4, TRAP_ENT_SIZE, %g4 2096 cmp %g5, %g4 2097 movge %icc, 0, %g5 2098 st %g5, [%g6 + TRAPTR_OFFSET] 2099dpe_tl1_skip_tt: 2100#endif /* TRAPTRACE */ 2101 2102 /* 2103 * I$ and D$ are automatically turned off by HW when the CPU hits 2104 * a dcache or icache parity error so we will just leave those two 2105 * off for now to avoid repeating this trap. 2106 * For Panther, however, since we trap on P$ data parity errors 2107 * and HW does not automatically disable P$, we need to disable it 2108 * here so that we don't encounter any recursive traps when we 2109 * issue the retry. 2110 */ 2111 ldxa [%g0]ASI_DCU, %g3 2112 mov 1, %g4 2113 sllx %g4, DCU_PE_SHIFT, %g4 2114 andn %g3, %g4, %g3 2115 stxa %g3, [%g0]ASI_DCU 2116 membar #Sync 2117 2118 /* 2119 * We fall into this macro if we've successfully logged the error in 2120 * the ch_err_tl1_data structure and want the PIL15 softint to pick 2121 * it up and log it. %g1 must point to the ch_err_tl1_data structure. 2122 * Restores the %g registers and issues retry. 2123 */ 2124 CH_ERR_TL1_EXIT; 2125 SET_SIZE(dcache_parity_tl1_err) 2126 2127#endif /* lint */ 2128 2129/* 2130 * I$ parity error trap (trap 72) at TL=0. 2131 * tt0_iperr is replaced by icache_parity_instr in cpu_init_trap of 2132 * the various architecture-specific files. This merely sets up the 2133 * arguments for cpu_parity_error and calls it via sys_trap. 2134 * NB: Must be 8 instructions or less to fit in trap table and code must 2135 * be relocatable. 2136 */ 2137#if defined(lint) 2138 2139void 2140icache_parity_instr(void) 2141{} 2142 2143#else /* lint */ 2144 2145 ENTRY_NP(icache_parity_instr) 2146 membar #Sync ! Cheetah+ requires membar #Sync 2147 set cpu_parity_error, %g1 2148 or %g0, CH_ERR_IPE, %g2 2149 rdpr %tpc, %g3 2150 sethi %hi(sys_trap), %g7 2151 jmp %g7 + %lo(sys_trap) 2152 mov PIL_15, %g4 ! run at pil 15 2153 SET_SIZE(icache_parity_instr) 2154 2155#endif /* lint */ 2156 2157/* 2158 * I$ parity error trap (trap 72) at TL>0. 2159 * tt1_iperr is replaced by icache_parity_tl1_instr in cpu_init_trap of 2160 * the various architecture-specific files. This generates a "Software 2161 * Trap 2" at TL>0, which goes to icache_parity_tl1_cont_instr, and we 2162 * continue the handling there. 2163 * NB: Must be 8 instructions or less to fit in trap table and code must 2164 * be relocatable. 2165 */ 2166#if defined(lint) 2167 2168void 2169icache_parity_tl1_instr(void) 2170{} 2171 2172#else /* lint */ 2173 ENTRY_NP(icache_parity_tl1_instr) 2174 CH_ERR_TL1_TRAPENTRY(SWTRAP_2); 2175 SET_SIZE(icache_parity_tl1_instr) 2176 2177#endif /* lint */ 2178 2179/* 2180 * Software trap 2 at TL>0. 2181 * tt1_swtrap2 is replaced by icache_parity_tl1_cont_instr in cpu_init_trap 2182 * of the various architecture-specific files. This is used as a continuation 2183 * of the icache parity handling where we've bought an extra TL level, so we 2184 * can use %tpc, %tnpc, %tstate to temporarily save the value of registers %g1 2185 * and %g2. Note that %tstate has bits 0-2 and then bits 8-19 as r/w, 2186 * there's a reserved hole from 3-7. We only use bits 0-1 and 8-9 (the low 2187 * order two bits from %g1 and %g2 respectively). 2188 * NB: Must be 8 instructions or less to fit in trap table and code must 2189 * be relocatable. 2190 */ 2191#if defined(lint) 2192 2193void 2194icache_parity_tl1_cont_instr(void) 2195{} 2196 2197#else /* lint */ 2198 ENTRY_NP(icache_parity_tl1_cont_instr) 2199 CH_ERR_TL1_SWTRAPENTRY(icache_parity_tl1_err); 2200 SET_SIZE(icache_parity_tl1_cont_instr) 2201 2202#endif /* lint */ 2203 2204 2205/* 2206 * I$ parity error at TL>0 handler 2207 * We get here via trap 72 at TL>0->Software trap 2 at TL>0. We enter 2208 * this routine with %g1 and %g2 already saved in %tpc, %tnpc and %tstate. 2209 */ 2210#if defined(lint) 2211 2212void 2213icache_parity_tl1_err(void) 2214{} 2215 2216#else /* lint */ 2217 2218 ENTRY_NP(icache_parity_tl1_err) 2219 2220 /* 2221 * This macro saves all the %g registers in the ch_err_tl1_data 2222 * structure, updates the ch_err_tl1_flags and saves the %tpc in 2223 * ch_err_tl1_tpc. At the end of this macro, %g1 will point to 2224 * the ch_err_tl1_data structure and %g2 will have the original 2225 * flags in the ch_err_tl1_data structure. All %g registers 2226 * except for %g1 and %g2 will be available. 2227 */ 2228 CH_ERR_TL1_ENTER(CH_ERR_IPE); 2229 2230#ifdef TRAPTRACE 2231 /* 2232 * Get current trap trace entry physical pointer. 2233 */ 2234 CPU_INDEX(%g6, %g5) 2235 sll %g6, TRAPTR_SIZE_SHIFT, %g6 2236 set trap_trace_ctl, %g5 2237 add %g6, %g5, %g6 2238 ld [%g6 + TRAPTR_LIMIT], %g5 2239 tst %g5 2240 be %icc, ipe_tl1_skip_tt 2241 nop 2242 ldx [%g6 + TRAPTR_PBASE], %g5 2243 ld [%g6 + TRAPTR_OFFSET], %g4 2244 add %g5, %g4, %g5 2245 2246 /* 2247 * Create trap trace entry. 2248 */ 2249 rd %asi, %g7 2250 wr %g0, TRAPTR_ASI, %asi 2251 rd STICK, %g4 2252 stxa %g4, [%g5 + TRAP_ENT_TICK]%asi 2253 rdpr %tl, %g4 2254 stha %g4, [%g5 + TRAP_ENT_TL]%asi 2255 rdpr %tt, %g4 2256 stha %g4, [%g5 + TRAP_ENT_TT]%asi 2257 rdpr %tpc, %g4 2258 stna %g4, [%g5 + TRAP_ENT_TPC]%asi 2259 rdpr %tstate, %g4 2260 stxa %g4, [%g5 + TRAP_ENT_TSTATE]%asi 2261 stna %sp, [%g5 + TRAP_ENT_SP]%asi 2262 stna %g0, [%g5 + TRAP_ENT_TR]%asi 2263 stna %g0, [%g5 + TRAP_ENT_F1]%asi 2264 stna %g0, [%g5 + TRAP_ENT_F2]%asi 2265 stna %g0, [%g5 + TRAP_ENT_F3]%asi 2266 stna %g0, [%g5 + TRAP_ENT_F4]%asi 2267 wr %g0, %g7, %asi 2268 2269 /* 2270 * Advance trap trace pointer. 2271 */ 2272 ld [%g6 + TRAPTR_OFFSET], %g5 2273 ld [%g6 + TRAPTR_LIMIT], %g4 2274 st %g5, [%g6 + TRAPTR_LAST_OFFSET] 2275 add %g5, TRAP_ENT_SIZE, %g5 2276 sub %g4, TRAP_ENT_SIZE, %g4 2277 cmp %g5, %g4 2278 movge %icc, 0, %g5 2279 st %g5, [%g6 + TRAPTR_OFFSET] 2280ipe_tl1_skip_tt: 2281#endif /* TRAPTRACE */ 2282 2283 /* 2284 * We fall into this macro if we've successfully logged the error in 2285 * the ch_err_tl1_data structure and want the PIL15 softint to pick 2286 * it up and log it. %g1 must point to the ch_err_tl1_data structure. 2287 * Restores the %g registers and issues retry. 2288 */ 2289 CH_ERR_TL1_EXIT; 2290 2291 SET_SIZE(icache_parity_tl1_err) 2292 2293#endif /* lint */ 2294 2295#endif /* CPU_IMP_L1_CACHE_PARITY */ 2296 2297 2298/* 2299 * The itlb_rd_entry and dtlb_rd_entry functions return the tag portion of the 2300 * tte, the virtual address, and the ctxnum of the specified tlb entry. They 2301 * should only be used in places where you have no choice but to look at the 2302 * tlb itself. 2303 * 2304 * Note: These two routines are required by the Estar "cpr" loadable module. 2305 */ 2306 2307#if defined(lint) 2308 2309/* ARGSUSED */ 2310void 2311itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2312{} 2313 2314#else /* lint */ 2315 2316 ENTRY_NP(itlb_rd_entry) 2317 sllx %o0, 3, %o0 2318 ldxa [%o0]ASI_ITLB_ACCESS, %g1 2319 stx %g1, [%o1] 2320 ldxa [%o0]ASI_ITLB_TAGREAD, %g2 2321 set TAGREAD_CTX_MASK, %o4 2322 andn %g2, %o4, %o5 2323 retl 2324 stx %o5, [%o2] 2325 SET_SIZE(itlb_rd_entry) 2326 2327#endif /* lint */ 2328 2329 2330#if defined(lint) 2331 2332/* ARGSUSED */ 2333void 2334dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag) 2335{} 2336 2337#else /* lint */ 2338 2339 ENTRY_NP(dtlb_rd_entry) 2340 sllx %o0, 3, %o0 2341 ldxa [%o0]ASI_DTLB_ACCESS, %g1 2342 stx %g1, [%o1] 2343 ldxa [%o0]ASI_DTLB_TAGREAD, %g2 2344 set TAGREAD_CTX_MASK, %o4 2345 andn %g2, %o4, %o5 2346 retl 2347 stx %o5, [%o2] 2348 SET_SIZE(dtlb_rd_entry) 2349#endif /* lint */ 2350 2351 2352#if !(defined(JALAPENO) || defined(SERRANO)) 2353 2354#if defined(lint) 2355 2356uint64_t 2357get_safari_config(void) 2358{ return (0); } 2359 2360#else /* lint */ 2361 2362 ENTRY(get_safari_config) 2363 ldxa [%g0]ASI_SAFARI_CONFIG, %o0 2364 retl 2365 nop 2366 SET_SIZE(get_safari_config) 2367 2368#endif /* lint */ 2369 2370 2371#if defined(lint) 2372 2373/* ARGSUSED */ 2374void 2375set_safari_config(uint64_t safari_config) 2376{} 2377 2378#else /* lint */ 2379 2380 ENTRY(set_safari_config) 2381 stxa %o0, [%g0]ASI_SAFARI_CONFIG 2382 membar #Sync 2383 retl 2384 nop 2385 SET_SIZE(set_safari_config) 2386 2387#endif /* lint */ 2388 2389#endif /* !(JALAPENO || SERRANO) */ 2390 2391 2392#if defined(lint) 2393 2394void 2395cpu_cleartickpnt(void) 2396{} 2397 2398#else /* lint */ 2399 /* 2400 * Clear the NPT (non-privileged trap) bit in the %tick/%stick 2401 * registers. In an effort to make the change in the 2402 * tick/stick counter as consistent as possible, we disable 2403 * all interrupts while we're changing the registers. We also 2404 * ensure that the read and write instructions are in the same 2405 * line in the instruction cache. 2406 */ 2407 ENTRY_NP(cpu_clearticknpt) 2408 rdpr %pstate, %g1 /* save processor state */ 2409 andn %g1, PSTATE_IE, %g3 /* turn off */ 2410 wrpr %g0, %g3, %pstate /* interrupts */ 2411 rdpr %tick, %g2 /* get tick register */ 2412 brgez,pn %g2, 1f /* if NPT bit off, we're done */ 2413 mov 1, %g3 /* create mask */ 2414 sllx %g3, 63, %g3 /* for NPT bit */ 2415 ba,a,pt %xcc, 2f 2416 .align 8 /* Ensure rd/wr in same i$ line */ 24172: 2418 rdpr %tick, %g2 /* get tick register */ 2419 wrpr %g3, %g2, %tick /* write tick register, */ 2420 /* clearing NPT bit */ 24211: 2422 rd STICK, %g2 /* get stick register */ 2423 brgez,pn %g2, 3f /* if NPT bit off, we're done */ 2424 mov 1, %g3 /* create mask */ 2425 sllx %g3, 63, %g3 /* for NPT bit */ 2426 ba,a,pt %xcc, 4f 2427 .align 8 /* Ensure rd/wr in same i$ line */ 24284: 2429 rd STICK, %g2 /* get stick register */ 2430 wr %g3, %g2, STICK /* write stick register, */ 2431 /* clearing NPT bit */ 24323: 2433 jmp %g4 + 4 2434 wrpr %g0, %g1, %pstate /* restore processor state */ 2435 2436 SET_SIZE(cpu_clearticknpt) 2437 2438#endif /* lint */ 2439 2440 2441#if defined(CPU_IMP_L1_CACHE_PARITY) 2442 2443#if defined(lint) 2444/* 2445 * correct_dcache_parity(size_t size, size_t linesize) 2446 * 2447 * Correct D$ data parity by zeroing the data and initializing microtag 2448 * for all indexes and all ways of the D$. 2449 * 2450 */ 2451/* ARGSUSED */ 2452void 2453correct_dcache_parity(size_t size, size_t linesize) 2454{} 2455 2456#else /* lint */ 2457 2458 ENTRY(correct_dcache_parity) 2459 /* 2460 * Register Usage: 2461 * 2462 * %o0 = input D$ size 2463 * %o1 = input D$ line size 2464 * %o2 = scratch 2465 * %o3 = scratch 2466 * %o4 = scratch 2467 */ 2468 2469 sub %o0, %o1, %o0 ! init cache line address 2470 2471 /* 2472 * For Panther CPUs, we also need to clear the data parity bits 2473 * using DC_data_parity bit of the ASI_DCACHE_DATA register. 2474 */ 2475 GET_CPU_IMPL(%o3) 2476 cmp %o3, PANTHER_IMPL 2477 bne 1f 2478 clr %o3 ! zero for non-Panther 2479 mov 1, %o3 2480 sll %o3, PN_DC_DATA_PARITY_BIT_SHIFT, %o3 2481 24821: 2483 /* 2484 * Set utag = way since it must be unique within an index. 2485 */ 2486 srl %o0, 14, %o2 ! get cache way (DC_way) 2487 membar #Sync ! required before ASI_DC_UTAG 2488 stxa %o2, [%o0]ASI_DC_UTAG ! set D$ utag = cache way 2489 membar #Sync ! required after ASI_DC_UTAG 2490 2491 /* 2492 * Zero line of D$ data (and data parity bits for Panther) 2493 */ 2494 sub %o1, 8, %o2 2495 or %o0, %o3, %o4 ! same address + DC_data_parity 24962: 2497 membar #Sync ! required before ASI_DC_DATA 2498 stxa %g0, [%o0 + %o2]ASI_DC_DATA ! zero 8 bytes of D$ data 2499 membar #Sync ! required after ASI_DC_DATA 2500 /* 2501 * We also clear the parity bits if this is a panther. For non-Panther 2502 * CPUs, we simply end up clearing the $data register twice. 2503 */ 2504 stxa %g0, [%o4 + %o2]ASI_DC_DATA 2505 membar #Sync 2506 2507 subcc %o2, 8, %o2 2508 bge 2b 2509 nop 2510 2511 subcc %o0, %o1, %o0 2512 bge 1b 2513 nop 2514 2515 retl 2516 nop 2517 SET_SIZE(correct_dcache_parity) 2518 2519#endif /* lint */ 2520 2521#endif /* CPU_IMP_L1_CACHE_PARITY */ 2522 2523 2524#if defined(lint) 2525/* 2526 * Get timestamp (stick). 2527 */ 2528/* ARGSUSED */ 2529void 2530stick_timestamp(int64_t *ts) 2531{ 2532} 2533 2534#else /* lint */ 2535 2536 ENTRY_NP(stick_timestamp) 2537 rd STICK, %g1 ! read stick reg 2538 sllx %g1, 1, %g1 2539 srlx %g1, 1, %g1 ! clear npt bit 2540 2541 retl 2542 stx %g1, [%o0] ! store the timestamp 2543 SET_SIZE(stick_timestamp) 2544 2545#endif /* lint */ 2546 2547 2548#if defined(lint) 2549/* 2550 * Set STICK adjusted by skew. 2551 */ 2552/* ARGSUSED */ 2553void 2554stick_adj(int64_t skew) 2555{ 2556} 2557 2558#else /* lint */ 2559 2560 ENTRY_NP(stick_adj) 2561 rdpr %pstate, %g1 ! save processor state 2562 andn %g1, PSTATE_IE, %g3 2563 ba 1f ! cache align stick adj 2564 wrpr %g0, %g3, %pstate ! turn off interrupts 2565 2566 .align 16 25671: nop 2568 2569 rd STICK, %g4 ! read stick reg 2570 add %g4, %o0, %o1 ! adjust stick with skew 2571 wr %o1, %g0, STICK ! write stick reg 2572 2573 retl 2574 wrpr %g1, %pstate ! restore processor state 2575 SET_SIZE(stick_adj) 2576 2577#endif /* lint */ 2578 2579#if defined(lint) 2580/* 2581 * Debugger-specific stick retrieval 2582 */ 2583/*ARGSUSED*/ 2584int 2585kdi_get_stick(uint64_t *stickp) 2586{ 2587 return (0); 2588} 2589 2590#else /* lint */ 2591 2592 ENTRY_NP(kdi_get_stick) 2593 rd STICK, %g1 2594 stx %g1, [%o0] 2595 retl 2596 mov %g0, %o0 2597 SET_SIZE(kdi_get_stick) 2598 2599#endif /* lint */ 2600 2601#if defined(lint) 2602/* 2603 * Invalidate the specified line from the D$. 2604 * 2605 * Register usage: 2606 * %o0 - index for the invalidation, specifies DC_way and DC_addr 2607 * 2608 * ASI_DC_TAG, 0x47, is used in the following manner. A 64-bit value is 2609 * stored to a particular DC_way and DC_addr in ASI_DC_TAG. 2610 * 2611 * The format of the stored 64-bit value is: 2612 * 2613 * +----------+--------+----------+ 2614 * | Reserved | DC_tag | DC_valid | 2615 * +----------+--------+----------+ 2616 * 63 31 30 1 0 2617 * 2618 * DC_tag is the 30-bit physical tag of the associated line. 2619 * DC_valid is the 1-bit valid field for both the physical and snoop tags. 2620 * 2621 * The format of the 64-bit DC_way and DC_addr into ASI_DC_TAG is: 2622 * 2623 * +----------+--------+----------+----------+ 2624 * | Reserved | DC_way | DC_addr | Reserved | 2625 * +----------+--------+----------+----------+ 2626 * 63 16 15 14 13 5 4 0 2627 * 2628 * DC_way is a 2-bit index that selects one of the 4 ways. 2629 * DC_addr is a 9-bit index that selects one of 512 tag/valid fields. 2630 * 2631 * Setting the DC_valid bit to zero for the specified DC_way and 2632 * DC_addr index into the D$ results in an invalidation of a D$ line. 2633 */ 2634/*ARGSUSED*/ 2635void 2636dcache_inval_line(int index) 2637{ 2638} 2639#else /* lint */ 2640 ENTRY(dcache_inval_line) 2641 sll %o0, 5, %o0 ! shift index into DC_way and DC_addr 2642 stxa %g0, [%o0]ASI_DC_TAG ! zero the DC_valid and DC_tag bits 2643 membar #Sync 2644 retl 2645 nop 2646 SET_SIZE(dcache_inval_line) 2647#endif /* lint */ 2648 2649#if defined(lint) 2650/* 2651 * Invalidate the entire I$ 2652 * 2653 * Register usage: 2654 * %o0 - specifies IC_way, IC_addr, IC_tag 2655 * %o1 - scratch 2656 * %o2 - used to save and restore DCU value 2657 * %o3 - scratch 2658 * %o5 - used to save and restore PSTATE 2659 * 2660 * Due to the behavior of the I$ control logic when accessing ASI_IC_TAG, 2661 * the I$ should be turned off. Accesses to ASI_IC_TAG may collide and 2662 * block out snoops and invalidates to the I$, causing I$ consistency 2663 * to be broken. Before turning on the I$, all I$ lines must be invalidated. 2664 * 2665 * ASI_IC_TAG, 0x67, is used in the following manner. A 64-bit value is 2666 * stored to a particular IC_way, IC_addr, IC_tag in ASI_IC_TAG. The 2667 * info below describes store (write) use of ASI_IC_TAG. Note that read 2668 * use of ASI_IC_TAG behaves differently. 2669 * 2670 * The format of the stored 64-bit value is: 2671 * 2672 * +----------+--------+---------------+-----------+ 2673 * | Reserved | Valid | IC_vpred<7:0> | Undefined | 2674 * +----------+--------+---------------+-----------+ 2675 * 63 55 54 53 46 45 0 2676 * 2677 * Valid is the 1-bit valid field for both the physical and snoop tags. 2678 * IC_vpred is the 8-bit LPB bits for 8 instructions starting at 2679 * the 32-byte boundary aligned address specified by IC_addr. 2680 * 2681 * The format of the 64-bit IC_way, IC_addr, IC_tag into ASI_IC_TAG is: 2682 * 2683 * +----------+--------+---------+--------+---------+ 2684 * | Reserved | IC_way | IC_addr | IC_tag |Reserved | 2685 * +----------+--------+---------+--------+---------+ 2686 * 63 16 15 14 13 5 4 3 2 0 2687 * 2688 * IC_way is a 2-bit index that selects one of the 4 ways. 2689 * IC_addr[13:6] is an 8-bit index that selects one of 256 valid fields. 2690 * IC_addr[5] is a "don't care" for a store. 2691 * IC_tag set to 2 specifies that the stored value is to be interpreted 2692 * as containing Valid and IC_vpred as described above. 2693 * 2694 * Setting the Valid bit to zero for the specified IC_way and 2695 * IC_addr index into the I$ results in an invalidation of an I$ line. 2696 */ 2697/*ARGSUSED*/ 2698void 2699icache_inval_all(void) 2700{ 2701} 2702#else /* lint */ 2703 ENTRY(icache_inval_all) 2704 rdpr %pstate, %o5 2705 andn %o5, PSTATE_IE, %o3 2706 wrpr %g0, %o3, %pstate ! clear IE bit 2707 2708 GET_CPU_PRIVATE_PTR(%g0, %o0, %o2, icache_inval_all_1); 2709 ld [%o0 + CHPR_ICACHE_LINESIZE], %o1 2710 ba,pt %icc, 2f 2711 ld [%o0 + CHPR_ICACHE_SIZE], %o0 2712icache_inval_all_1: 2713 ASM_LD(%o0, icache_size) 2714 ASM_LD(%o1, icache_linesize) 27152: 2716 CH_ICACHE_FLUSHALL(%o0, %o1, %o2, %o4) 2717 2718 retl 2719 wrpr %g0, %o5, %pstate ! restore earlier pstate 2720 SET_SIZE(icache_inval_all) 2721#endif /* lint */ 2722 2723 2724#if defined(lint) 2725/* ARGSUSED */ 2726void 2727cache_scrubreq_tl1(uint64_t inum, uint64_t index) 2728{ 2729} 2730 2731#else /* lint */ 2732/* 2733 * cache_scrubreq_tl1 is the crosstrap handler called on offlined cpus via a 2734 * crosstrap. It atomically increments the outstanding request counter and, 2735 * if there was not already an outstanding request, branches to setsoftint_tl1 2736 * to enqueue an intr_vec for the given inum. 2737 */ 2738 2739 ! Register usage: 2740 ! 2741 ! Arguments: 2742 ! %g1 - inum 2743 ! %g2 - index into chsm_outstanding array 2744 ! 2745 ! Internal: 2746 ! %g2, %g3, %g5 - scratch 2747 ! %g4 - ptr. to scrub_misc chsm_outstanding[index]. 2748 ! %g6 - setsoftint_tl1 address 2749 2750 ENTRY_NP(cache_scrubreq_tl1) 2751 mulx %g2, CHSM_OUTSTANDING_INCR, %g2 2752 set CHPR_SCRUB_MISC + CHSM_OUTSTANDING, %g3 2753 add %g2, %g3, %g2 2754 GET_CPU_PRIVATE_PTR(%g2, %g4, %g5, 1f); 2755 ld [%g4], %g2 ! cpu's chsm_outstanding[index] 2756 ! 2757 ! no need to use atomic instructions for the following 2758 ! increment - we're at tl1 2759 ! 2760 add %g2, 0x1, %g3 2761 brnz,pn %g2, 1f ! no need to enqueue more intr_vec 2762 st %g3, [%g4] ! delay - store incremented counter 2763 ASM_JMP(%g6, setsoftint_tl1) 2764 ! not reached 27651: 2766 retry 2767 SET_SIZE(cache_scrubreq_tl1) 2768 2769#endif /* lint */ 2770 2771 2772#if defined(lint) 2773 2774/* ARGSUSED */ 2775void 2776get_cpu_error_state(ch_cpu_errors_t *cpu_error_regs) 2777{} 2778 2779#else /* lint */ 2780 2781/* 2782 * Get the error state for the processor. 2783 * Note that this must not be used at TL>0 2784 */ 2785 ENTRY(get_cpu_error_state) 2786#if defined(CHEETAH_PLUS) 2787 set ASI_SHADOW_REG_VA, %o2 2788 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr reg 2789 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR] 2790 ldxa [%o2]ASI_AFAR, %o1 ! shadow afar reg 2791 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR] 2792 GET_CPU_IMPL(%o3) ! Only panther has AFSR_EXT registers 2793 cmp %o3, PANTHER_IMPL 2794 bne,a 1f 2795 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] ! zero for non-PN 2796 set ASI_AFSR_EXT_VA, %o2 2797 ldxa [%o2]ASI_AFSR, %o1 ! afsr_ext reg 2798 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR_EXT] 2799 set ASI_SHADOW_AFSR_EXT_VA, %o2 2800 ldxa [%o2]ASI_AFSR, %o1 ! shadow afsr_ext reg 2801 stx %o1, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] 2802 b 2f 2803 nop 28041: 2805 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] ! zero for non-PN 28062: 2807#else /* CHEETAH_PLUS */ 2808 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR] 2809 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFAR] 2810 stx %g0, [%o0 + CH_CPU_ERRORS_AFSR_EXT] 2811 stx %g0, [%o0 + CH_CPU_ERRORS_SHADOW_AFSR_EXT] 2812#endif /* CHEETAH_PLUS */ 2813#if defined(SERRANO) 2814 /* 2815 * Serrano has an afar2 which captures the address on FRC/FRU errors. 2816 * We save this in the afar2 of the register save area. 2817 */ 2818 set ASI_MCU_AFAR2_VA, %o2 2819 ldxa [%o2]ASI_MCU_CTRL, %o1 2820 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR2] 2821#endif /* SERRANO */ 2822 ldxa [%g0]ASI_AFSR, %o1 ! primary afsr reg 2823 stx %o1, [%o0 + CH_CPU_ERRORS_AFSR] 2824 ldxa [%g0]ASI_AFAR, %o1 ! primary afar reg 2825 retl 2826 stx %o1, [%o0 + CH_CPU_ERRORS_AFAR] 2827 SET_SIZE(get_cpu_error_state) 2828#endif /* lint */ 2829 2830#if defined(lint) 2831 2832/* 2833 * Check a page of memory for errors. 2834 * 2835 * Load each 64 byte block from physical memory. 2836 * Check AFSR after each load to see if an error 2837 * was caused. If so, log/scrub that error. 2838 * 2839 * Used to determine if a page contains 2840 * CEs when CEEN is disabled. 2841 */ 2842/*ARGSUSED*/ 2843void 2844cpu_check_block(caddr_t va, uint_t psz) 2845{} 2846 2847#else /* lint */ 2848 2849 ENTRY(cpu_check_block) 2850 ! 2851 ! get a new window with room for the error regs 2852 ! 2853 save %sp, -SA(MINFRAME + CH_CPU_ERROR_SIZE), %sp 2854 srl %i1, 6, %l4 ! clear top bits of psz 2855 ! and divide by 64 2856 rd %fprs, %l2 ! store FP 2857 wr %g0, FPRS_FEF, %fprs ! enable FP 28581: 2859 ldda [%i0]ASI_BLK_P, %d0 ! load a block 2860 membar #Sync 2861 ldxa [%g0]ASI_AFSR, %l3 ! read afsr reg 2862 brz,a,pt %l3, 2f ! check for error 2863 nop 2864 2865 ! 2866 ! if error, read the error regs and log it 2867 ! 2868 call get_cpu_error_state 2869 add %fp, STACK_BIAS - CH_CPU_ERROR_SIZE, %o0 2870 2871 ! 2872 ! cpu_ce_detected(ch_cpu_errors_t *, flag) 2873 ! 2874 call cpu_ce_detected ! log the error 2875 mov CE_CEEN_TIMEOUT, %o1 28762: 2877 dec %l4 ! next 64-byte block 2878 brnz,a,pt %l4, 1b 2879 add %i0, 64, %i0 ! increment block addr 2880 2881 wr %l2, %g0, %fprs ! restore FP 2882 ret 2883 restore 2884 2885 SET_SIZE(cpu_check_block) 2886 2887#endif /* lint */ 2888 2889#if defined(lint) 2890 2891/* 2892 * Perform a cpu logout called from C. This is used where we did not trap 2893 * for the error but still want to gather "what we can". Caller must make 2894 * sure cpu private area exists and that the indicated logout area is free 2895 * for use, and that we are unable to migrate cpus. 2896 */ 2897/*ARGSUSED*/ 2898void 2899cpu_delayed_logout(uint64_t afar, ch_cpu_logout_t *clop) 2900{ } 2901 2902#else 2903 ENTRY(cpu_delayed_logout) 2904 rdpr %pstate, %o2 2905 andn %o2, PSTATE_IE, %o2 2906 wrpr %g0, %o2, %pstate ! disable interrupts 2907 PARK_SIBLING_CORE(%o2, %o3, %o4) ! %o2 has DCU value 2908 add %o1, CH_CLO_DATA + CH_CHD_EC_DATA, %o1 2909 rd %asi, %g1 2910 wr %g0, ASI_P, %asi 2911 GET_ECACHE_DTAGS(%o0, %o1, %o3, %o4, %o5) 2912 wr %g1, %asi 2913 UNPARK_SIBLING_CORE(%o2, %o3, %o4) ! can use %o2 again 2914 rdpr %pstate, %o2 2915 or %o2, PSTATE_IE, %o2 2916 wrpr %g0, %o2, %pstate 2917 retl 2918 nop 2919 SET_SIZE(cpu_delayed_logout) 2920 2921#endif /* lint */ 2922 2923#if defined(lint) 2924 2925/*ARGSUSED*/ 2926int 2927dtrace_blksuword32(uintptr_t addr, uint32_t *data, int tryagain) 2928{ return (0); } 2929 2930#else 2931 2932 ENTRY(dtrace_blksuword32) 2933 save %sp, -SA(MINFRAME + 4), %sp 2934 2935 rdpr %pstate, %l1 2936 andn %l1, PSTATE_IE, %l2 ! disable interrupts to 2937 wrpr %g0, %l2, %pstate ! protect our FPU diddling 2938 2939 rd %fprs, %l0 2940 andcc %l0, FPRS_FEF, %g0 2941 bz,a,pt %xcc, 1f ! if the fpu is disabled 2942 wr %g0, FPRS_FEF, %fprs ! ... enable the fpu 2943 2944 st %f0, [%fp + STACK_BIAS - 4] ! save %f0 to the stack 29451: 2946 set 0f, %l5 2947 /* 2948 * We're about to write a block full or either total garbage 2949 * (not kernel data, don't worry) or user floating-point data 2950 * (so it only _looks_ like garbage). 2951 */ 2952 ld [%i1], %f0 ! modify the block 2953 membar #Sync 2954 stn %l5, [THREAD_REG + T_LOFAULT] ! set up the lofault handler 2955 stda %d0, [%i0]ASI_BLK_COMMIT_S ! store the modified block 2956 membar #Sync 2957 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler 2958 2959 bz,a,pt %xcc, 1f 2960 wr %g0, %l0, %fprs ! restore %fprs 2961 2962 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0 29631: 2964 2965 wrpr %g0, %l1, %pstate ! restore interrupts 2966 2967 ret 2968 restore %g0, %g0, %o0 2969 29700: 2971 membar #Sync 2972 stn %g0, [THREAD_REG + T_LOFAULT] ! remove the lofault handler 2973 2974 bz,a,pt %xcc, 1f 2975 wr %g0, %l0, %fprs ! restore %fprs 2976 2977 ld [%fp + STACK_BIAS - 4], %f0 ! restore %f0 29781: 2979 2980 wrpr %g0, %l1, %pstate ! restore interrupts 2981 2982 /* 2983 * If tryagain is set (%i2) we tail-call dtrace_blksuword32_err() 2984 * which deals with watchpoints. Otherwise, just return -1. 2985 */ 2986 brnz,pt %i2, 1f 2987 nop 2988 ret 2989 restore %g0, -1, %o0 29901: 2991 call dtrace_blksuword32_err 2992 restore 2993 2994 SET_SIZE(dtrace_blksuword32) 2995 2996#endif /* lint */ 2997 2998#ifdef CHEETAHPLUS_ERRATUM_25 2999 3000#if defined(lint) 3001/* 3002 * Claim a chunk of physical address space. 3003 */ 3004/*ARGSUSED*/ 3005void 3006claimlines(uint64_t pa, size_t sz, int stride) 3007{} 3008#else /* lint */ 3009 ENTRY(claimlines) 30101: 3011 subcc %o1, %o2, %o1 3012 add %o0, %o1, %o3 3013 bgeu,a,pt %xcc, 1b 3014 casxa [%o3]ASI_MEM, %g0, %g0 3015 membar #Sync 3016 retl 3017 nop 3018 SET_SIZE(claimlines) 3019#endif /* lint */ 3020 3021#if defined(lint) 3022/* 3023 * CPU feature initialization, 3024 * turn BPE off, 3025 * get device id. 3026 */ 3027/*ARGSUSED*/ 3028void 3029cpu_feature_init(void) 3030{} 3031#else /* lint */ 3032 ENTRY(cpu_feature_init) 3033 save %sp, -SA(MINFRAME), %sp 3034 sethi %hi(cheetah_bpe_off), %o0 3035 ld [%o0 + %lo(cheetah_bpe_off)], %o0 3036 brz %o0, 1f 3037 nop 3038 rd ASR_DISPATCH_CONTROL, %o0 3039 andn %o0, ASR_DISPATCH_CONTROL_BPE, %o0 3040 wr %o0, 0, ASR_DISPATCH_CONTROL 30411: 3042 ! 3043 ! get the device_id and store the device_id 3044 ! in the appropriate cpunodes structure 3045 ! given the cpus index 3046 ! 3047 CPU_INDEX(%o0, %o1) 3048 mulx %o0, CPU_NODE_SIZE, %o0 3049 set cpunodes + DEVICE_ID, %o1 3050 ldxa [%g0] ASI_DEVICE_SERIAL_ID, %o2 3051 stx %o2, [%o0 + %o1] 3052#ifdef CHEETAHPLUS_ERRATUM_34 3053 ! 3054 ! apply Cheetah+ erratum 34 workaround 3055 ! 3056 call itlb_erratum34_fixup 3057 nop 3058#endif /* CHEETAHPLUS_ERRATUM_34 */ 3059 ret 3060 restore 3061 SET_SIZE(cpu_feature_init) 3062#endif /* lint */ 3063 3064#if defined(lint) 3065/* 3066 * Copy a tsb entry atomically, from src to dest. 3067 * src must be 128 bit aligned. 3068 */ 3069/*ARGSUSED*/ 3070void 3071copy_tsb_entry(uintptr_t src, uintptr_t dest) 3072{} 3073#else /* lint */ 3074 ENTRY(copy_tsb_entry) 3075 ldda [%o0]ASI_NQUAD_LD, %o2 ! %o2 = tag, %o3 = data 3076 stx %o2, [%o1] 3077 stx %o3, [%o1 + 8 ] 3078 retl 3079 nop 3080 SET_SIZE(copy_tsb_entry) 3081#endif /* lint */ 3082 3083#endif /* CHEETAHPLUS_ERRATUM_25 */ 3084 3085#ifdef CHEETAHPLUS_ERRATUM_34 3086 3087#if defined(lint) 3088 3089/*ARGSUSED*/ 3090void 3091itlb_erratum34_fixup(void) 3092{} 3093 3094#else /* lint */ 3095 3096 ! 3097 ! In Cheetah+ erratum 34, under certain conditions an ITLB locked 3098 ! index 0 TTE will erroneously be displaced when a new TTE is 3099 ! loaded via ASI_ITLB_IN. In order to avoid cheetah+ erratum 34, 3100 ! locked index 0 TTEs must be relocated. 3101 ! 3102 ! NOTE: Care must be taken to avoid an ITLB miss in this routine. 3103 ! 3104 ENTRY_NP(itlb_erratum34_fixup) 3105 rdpr %pstate, %o3 3106#ifdef DEBUG 3107 PANIC_IF_INTR_DISABLED_PSTR(%o3, u3_di_label1, %g1) 3108#endif /* DEBUG */ 3109 wrpr %o3, PSTATE_IE, %pstate ! Disable interrupts 3110 ldxa [%g0]ASI_ITLB_ACCESS, %o1 ! %o1 = entry 0 data 3111 ldxa [%g0]ASI_ITLB_TAGREAD, %o2 ! %o2 = entry 0 tag 3112 3113 cmp %o1, %g0 ! Is this entry valid? 3114 bge %xcc, 1f 3115 andcc %o1, TTE_LCK_INT, %g0 ! Is this entry locked? 3116 bnz %icc, 2f 3117 nop 31181: 3119 retl ! Nope, outta here... 3120 wrpr %g0, %o3, %pstate ! Enable interrupts 31212: 3122 sethi %hi(FLUSH_ADDR), %o4 3123 stxa %g0, [%o2]ASI_ITLB_DEMAP ! Flush this mapping 3124 flush %o4 ! Flush required for I-MMU 3125 ! 3126 ! Start search from index 1 up. This is because the kernel force 3127 ! loads its text page at index 15 in sfmmu_kernel_remap() and we 3128 ! don't want our relocated entry evicted later. 3129 ! 3130 ! NOTE: We assume that we'll be successful in finding an unlocked 3131 ! or invalid entry. If that isn't the case there are bound to 3132 ! bigger problems. 3133 ! 3134 set (1 << 3), %g3 31353: 3136 ldxa [%g3]ASI_ITLB_ACCESS, %o4 ! Load TTE from t16 3137 ! 3138 ! If this entry isn't valid, we'll choose to displace it (regardless 3139 ! of the lock bit). 3140 ! 3141 cmp %o4, %g0 ! TTE is > 0 iff not valid 3142 bge %xcc, 4f ! If invalid, go displace 3143 andcc %o4, TTE_LCK_INT, %g0 ! Check for lock bit 3144 bnz,a %icc, 3b ! If locked, look at next 3145 add %g3, (1 << 3), %g3 ! entry 31464: 3147 ! 3148 ! We found an unlocked or invalid entry; we'll explicitly load 3149 ! the former index 0 entry here. 3150 ! 3151 sethi %hi(FLUSH_ADDR), %o4 3152 set MMU_TAG_ACCESS, %g4 3153 stxa %o2, [%g4]ASI_IMMU 3154 stxa %o1, [%g3]ASI_ITLB_ACCESS 3155 flush %o4 ! Flush required for I-MMU 3156 retl 3157 wrpr %g0, %o3, %pstate ! Enable interrupts 3158 SET_SIZE(itlb_erratum34_fixup) 3159 3160#endif /* lint */ 3161 3162#endif /* CHEETAHPLUS_ERRATUM_34 */ 3163 3164