/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _CHEETAHASM_H #define _CHEETAHASM_H #pragma ident "%Z%%M% %I% %E% SMI" #ifdef __cplusplus extern "C" { #endif #ifdef _ASM /* BEGIN CSTYLED */ #define ASM_LD(reg, symbol) \ sethi %hi(symbol), reg; \ ld [reg + %lo(symbol)], reg; \ #define ASM_LDX(reg, symbol) \ sethi %hi(symbol), reg; \ ldx [reg + %lo(symbol)], reg; \ #define ASM_JMP(reg, symbol) \ sethi %hi(symbol), reg; \ jmp reg + %lo(symbol); \ nop /* * Macro for getting to offset from 'cpu_private' ptr. The 'cpu_private' * ptr is in the machcpu structure. * off_reg: Register offset from 'cpu_private' ptr. * scr1: Scratch, ptr is returned in this register. * scr2: Scratch * label: Label to branch to if cpu_private ptr is null/zero. */ #define GET_CPU_PRIVATE_PTR(off_reg, scr1, scr2, label) \ CPU_ADDR(scr1, scr2); \ ldn [scr1 + CPU_PRIVATE], scr1; \ cmp scr1, 0; \ be label; \ nop; \ add scr1, off_reg, scr1 /* * Macro version of get_dcache_dtag. We use this macro in the * CPU logout code. Since the Dcache is virtually indexed, only * bits [12:5] of the AFAR can be used so we need to search through * 8 indexes (4 ways + bit 13) in order to find the tag we want. * afar: input AFAR, not modified. * datap: input ptr to ch_dc_data_t, at end pts to end of ch_dc_data_t. * scr1: scratch. * scr2: scratch, will hold tag to look for. * scr3: used for Dcache index, loops through 4 ways. */ #define GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3) \ set CH_DCACHE_IDX_MASK, scr3; \ and afar, scr3, scr3; \ srlx afar, CH_DCTAG_PA_SHIFT, scr2; \ b 1f; \ or scr2, CH_DCTAG_VALID_BIT, scr2; /* tag we want */ \ .align 128; \ 1: \ ldxa [scr3]ASI_DC_TAG, scr1; /* read tag */ \ cmp scr1, scr2; \ bne 4f; /* not found? */ \ nop; \ stxa scr3, [datap + CH_DC_IDX]%asi; /* store index */ \ stxa scr1, [datap + CH_DC_TAG]%asi; /* store tag */ \ membar #Sync; /* Cheetah PRM 10.6.3 */ \ ldxa [scr3]ASI_DC_UTAG, scr1; /* read utag */ \ membar #Sync; /* Cheetah PRM 10.6.3 */ \ stxa scr1, [datap + CH_DC_UTAG]%asi; \ ldxa [scr3]ASI_DC_SNP_TAG, scr1; /* read snoop tag */ \ stxa scr1, [datap + CH_DC_SNTAG]%asi; \ add datap, CH_DC_DATA, datap; \ clr scr2; \ 2: \ membar #Sync; /* Cheetah PRM 10.6.1 */ \ ldxa [scr3 + scr2]ASI_DC_DATA, scr1; /* read data */ \ membar #Sync; /* Cheetah PRM 10.6.1 */ \ stxa scr1, [datap]%asi; \ add datap, 8, datap; \ cmp scr2, CH_DC_DATA_REG_SIZE - 8; \ blt 2b; \ add scr2, 8, scr2; \ \ GET_CPU_IMPL(scr2); /* Parity bits are elsewhere for */ \ cmp scr2, PANTHER_IMPL; /* panther processors. */ \ bne,a 5f; /* Done if not panther. */ \ add datap, 8, datap; /* Skip to the end of the struct. */ \ clr scr2; \ add datap, 7, datap; /* offset of the last parity byte */ \ mov 1, scr1; \ sll scr1, PN_DC_DATA_PARITY_BIT_SHIFT, scr1; \ or scr3, scr1, scr3; /* add DC_data_parity bit to index */ \ 3: \ membar #Sync; /* Cheetah PRM 10.6.1 */ \ ldxa [scr3 + scr2]ASI_DC_DATA, scr1; /* read parity bits */ \ membar #Sync; /* Cheetah PRM 10.6.1 */ \ stba scr1, [datap]%asi; \ dec datap; \ cmp scr2, CH_DC_DATA_REG_SIZE - 8; \ blt 3b; \ add scr2, 8, scr2; \ b 5f; \ add datap, 5, datap; /* set pointer to end of our struct */ \ 4: \ set CH_DCACHE_IDX_INCR, scr1; /* incr. idx (scr3) */ \ add scr3, scr1, scr3; \ set CH_DCACHE_IDX_LIMIT, scr1; /* done? */ \ cmp scr3, scr1; \ blt 1b; \ nop; \ add datap, CH_DC_DATA_SIZE, datap; \ 5: /* * Macro version of get_icache_dtag. We use this macro in the CPU * logout code. If the Icache is on, we don't want to capture the data. * afar: input AFAR, not modified. * datap: input ptr to ch_ic_data_t, at end pts to end of ch_ic_data_t. * scr1: scratch. * scr2: scratch, will hold tag to look for. * scr3: used for Icache index, loops through 4 ways. * Note: For Panther, the Icache is virtually indexed and increases in * size to 64KB (instead of 32KB) with a line size of 64 bytes (instead * of 32). This means the IC_addr index bits[14:7] for Panther now * correspond to VA bits[13:6]. But since it is virtually indexed, we * still mask out only bits[12:5] from the AFAR (we have to manually * check bit 13). In order to make this code work for all processors, * we end up checking twice as many indexes (8 instead of 4) as required * for non-Panther CPUs and saving off twice as much data (16 instructions * instead of just 8). */ #define GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3) \ ldxa [%g0]ASI_DCU, scr1; \ btst DCU_IC, scr1; /* is Icache enabled? */ \ bne,a 6f; /* yes, don't capture */ \ add datap, CH_IC_DATA_SIZE, datap; /* anul if no branch */ \ GET_CPU_IMPL(scr2); /* Panther only uses VA[13:6] */ \ cmp scr2, PANTHER_IMPL; /* and we also want to mask */ \ be 1f; /* out bit 13 since the */ \ nop; /* Panther I$ is VIPT. */ \ set CH_ICACHE_IDX_MASK, scr3; \ b 2f; \ nop; \ 1: \ set PN_ICACHE_VA_IDX_MASK, scr3; \ 2: \ and afar, scr3, scr3; \ sllx scr3, CH_ICACHE_IDX_SHIFT, scr3; \ srlx afar, CH_ICPATAG_SHIFT, scr2; /* pa tag we want */ \ andn scr2, CH_ICPATAG_LBITS, scr2; /* mask off lower */ \ b 3f; \ nop; \ .align 128; \ 3: \ ldxa [scr3]ASI_IC_TAG, scr1; /* read pa tag */ \ andn scr1, CH_ICPATAG_LBITS, scr1; /* mask off lower */ \ cmp scr1, scr2; \ bne 5f; /* not found? */ \ nop; \ stxa scr3, [datap + CH_IC_IDX]%asi; /* store index */ \ stxa scr1, [datap + CH_IC_PATAG]%asi; /* store pa tag */ \ add scr3, CH_ICTAG_UTAG, scr3; /* read utag */ \ ldxa [scr3]ASI_IC_TAG, scr1; \ add scr3, (CH_ICTAG_UPPER - CH_ICTAG_UTAG), scr3; \ stxa scr1, [datap + CH_IC_UTAG]%asi; \ ldxa [scr3]ASI_IC_TAG, scr1; /* read upper tag */ \ add scr3, (CH_ICTAG_LOWER - CH_ICTAG_UPPER), scr3; \ stxa scr1, [datap + CH_IC_UPPER]%asi; \ ldxa [scr3]ASI_IC_TAG, scr1; /* read lower tag */ \ andn scr3, CH_ICTAG_TMASK, scr3; \ stxa scr1, [datap + CH_IC_LOWER]%asi; \ ldxa [scr3]ASI_IC_SNP_TAG, scr1; /* read snoop tag */ \ stxa scr1, [datap + CH_IC_SNTAG]%asi; \ add datap, CH_IC_DATA, datap; \ clr scr2; \ 4: \ ldxa [scr3 + scr2]ASI_IC_DATA, scr1; /* read ins. data */ \ stxa scr1, [datap]%asi; \ add datap, 8, datap; \ cmp scr2, PN_IC_DATA_REG_SIZE - 8; \ blt 4b; \ add scr2, 8, scr2; \ b 6f; \ nop; \ 5: \ set CH_ICACHE_IDX_INCR, scr1; /* incr. idx (scr3) */ \ add scr3, scr1, scr3; \ set PN_ICACHE_IDX_LIMIT, scr1; /* done? */ \ cmp scr3, scr1; \ blt 3b; \ nop; \ add datap, CH_IC_DATA_SIZE, datap; \ 6: #if defined(JALAPENO) || defined(SERRANO) /* * Macro version of get_ecache_dtag. We use this macro in the * CPU logout code. * afar: input AFAR, not modified * datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t. * ec_way: Constant value (way number) * scr1: Scratch * scr2: Scratch. * scr3: Scratch. */ #define GET_ECACHE_DTAG(afar, datap, ec_way, scr1, scr2, scr3) \ mov ec_way, scr1; \ and scr1, JP_ECACHE_NWAY - 1, scr1; /* mask E$ way bits */ \ sllx scr1, JP_EC_TAG_DATA_WAY_SHIFT, scr1; \ set ((JP_ECACHE_MAX_SIZE / JP_ECACHE_NWAY) - 1), scr2; \ and afar, scr2, scr3; /* get set offset */ \ andn scr3, (JP_ECACHE_MAX_LSIZE - 1), scr3; /* VA<5:0>=0 */ \ or scr3, scr1, scr3; /* or WAY bits */ \ b 1f; \ stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \ .align 64; \ 1: \ JP_EC_DIAG_ACCESS_MEMBAR; \ ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \ JP_EC_DIAG_ACCESS_MEMBAR; \ stxa scr1, [datap + CH_EC_TAG]%asi; \ add datap, CH_EC_DATA, datap; \ 2: \ ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \ clr scr1; \ 3: /* loop thru 5 regs */ \ ldxa [scr1]ASI_EC_DATA, scr2; \ stxa scr2, [datap]%asi; \ add datap, 8, datap; \ cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \ bne 3b; \ add scr1, 8, scr1; \ btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \ beq 2b; \ add scr3, CH_ECACHE_STGREG_SIZE, scr3 #define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \ GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \ GET_ECACHE_DTAG(afar, datap, 1, scr1, scr2, scr3); \ GET_ECACHE_DTAG(afar, datap, 2, scr1, scr2, scr3); \ GET_ECACHE_DTAG(afar, datap, 3, scr1, scr2, scr3); \ add datap, (CHD_EC_DATA_SETS-4)*CH_EC_DATA_SIZE, datap; \ add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \ /* * Jalapeno does not have cores so these macros are null. */ #define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2) #define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2) #if defined(JALAPENO) /* * Jalapeno gets primary AFSR and AFAR. All bits in the AFSR except * the fatal error bits are cleared. * datap: pointer to cpu logout structure. * afar: returned primary AFAR value. * scr1: scratch * scr2: scratch */ #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \ ldxa [%g0]ASI_AFAR, afar; \ stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \ ldxa [%g0]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \ sethi %hh(C_AFSR_FATAL_ERRS), scr1; \ sllx scr1, 32, scr1; \ bclr scr1, scr2; /* Clear fatal error bits here, so */ \ stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \ membar #Sync /* * Jalapeno has no shadow AFAR, null operation. */ #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) #elif defined(SERRANO) /* * Serrano gets primary AFSR and AFAR. All bits in the AFSR except * the fatal error bits are cleared. For Serrano, we also save the * AFAR2 register. * datap: pointer to cpu logout structure. * afar: returned primary AFAR value. * scr1: scratch * scr2: scratch */ #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \ set ASI_MCU_AFAR2_VA, scr1; \ ldxa [scr1]ASI_MCU_CTRL, afar; \ stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR2)]%asi; \ ldxa [%g0]ASI_AFAR, afar; \ stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \ ldxa [%g0]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \ sethi %hh(C_AFSR_FATAL_ERRS), scr1; \ sllx scr1, 32, scr1; \ bclr scr1, scr2; /* Clear fatal error bits here, so */ \ stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \ membar #Sync /* * Serrano needs to capture E$, D$ and I$ lines associated with afar2. * afar: scratch, holds afar2. * datap: pointer to cpu logout structure * scr1: scratch * scr2: scratch * scr3: scratch */ #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) \ ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR2)]%asi, afar; \ add datap, CH_CLO_SDW_DATA + CH_CHD_EC_DATA, datap; \ GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \ GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \ GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \ sub datap, CH_CPU_LOGOUT_SIZE, datap #endif /* SERRANO */ #elif defined(CHEETAH_PLUS) /* * Macro version of get_ecache_dtag. We use this macro in the * CPU logout code. * afar: input AFAR, not modified. * datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t. * pn_way: ecache way for panther (value = 0-3). For non-panther * cpus, this macro will be called with pn_way = 0. * scr1: Scratch. * scr2: Scratch. * scr3: Scratch. */ #define GET_ECACHE_DTAG(afar, datap, pn_way, scr1, scr2, scr3) \ mov afar, scr3; \ andn scr3, (CH_ECACHE_SUBBLK_SIZE - 1), scr3; /* VA<5:0>=0 */\ set (CH_ECACHE_8M_SIZE - 1), scr2; \ and scr3, scr2, scr3; /* VA<63:23>=0 */ \ mov pn_way, scr1; /* panther L3$ is 4-way so we ... */ \ sllx scr1, PN_L3_WAY_SHIFT, scr1; /* need to mask... */ \ or scr3, scr1, scr3; /* in the way bits <24:23>. */ \ b 1f; \ stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \ .align 64; \ 1: \ ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \ stxa scr1, [datap + CH_EC_TAG]%asi; \ set CHP_ECACHE_IDX_TAG_ECC, scr1; \ or scr3, scr1, scr1; \ ldxa [scr1]ASI_EC_DIAG, scr1; /* get E$ tag ECC */ \ stxa scr1, [datap + CH_EC_TAG_ECC]%asi; \ add datap, CH_EC_DATA, datap; \ 2: \ ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \ clr scr1; \ 3: /* loop thru 5 regs */ \ ldxa [scr1]ASI_EC_DATA, scr2; \ stxa scr2, [datap]%asi; \ add datap, 8, datap; \ cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \ bne 3b; \ add scr1, 8, scr1; \ btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \ beq 2b; \ add scr3, CH_ECACHE_STGREG_SIZE, scr3 /* * If this is a panther, we need to make sure the sibling core is * parked so that we avoid any race conditions during diagnostic * accesses to the shared L2 and L3 caches. * dcucr_reg: This register will be used to keep track of whether * or not we need to unpark the core later. * It just so happens that we also use this same register * to keep track of our saved DCUCR value so we only touch * bit 4 of the register (which is a "reserved" bit in the * DCUCR) for keeping track of core parking. * scr1: Scratch register. * scr2: Scratch register. */ #define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2) \ GET_CPU_IMPL(scr1); \ cmp scr1, PANTHER_IMPL; /* only park for panthers */ \ bne,a %xcc, 2f; \ andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \ set ASI_CORE_RUNNING_STATUS, scr1; /* check other core */ \ ldxa [scr1]ASI_CMP_SHARED, scr2; /* is it running? */ \ cmp scr2, PN_BOTH_CORES_RUNNING; \ bne,a %xcc, 2f; /* if not running, we are done */ \ andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \ or dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \ set ASI_CORE_ID, scr1; \ ldxa [scr1]ASI_CMP_PER_CORE, scr2; \ and scr2, COREID_MASK, scr2; \ or %g0, 1, scr1; /* find out which core... */ \ sll scr1, scr2, scr2; /* ... we need to park... */ \ 1: \ set ASI_CORE_RUNNING_RW, scr1; \ stxa scr2, [scr1]ASI_CMP_SHARED; /* ... and park it. */ \ membar #Sync; /* spin until the... */ \ ldxa [scr1]ASI_CMP_SHARED, scr1; /* ... the other... */ \ cmp scr1, scr2; /* ...core is parked according to... */ \ bne,a %xcc, 1b; /* ...the core running status reg. */ \ nop; \ 2: /* * The core running this code will unpark its sibling core if the * sibling core had been parked by the current core earlier in this * trap handler. * dcucr_reg: This register is used to keep track of whether or not * we need to unpark our sibling core. * It just so happens that we also use this same register * to keep track of our saved DCUCR value so we only touch * bit 4 of the register (which is a "reserved" bit in the * DCUCR) for keeping track of core parking. * scr1: Scratch register. * scr2: Scratch register. */ #define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2) \ btst PN_PARKED_OTHER_CORE, dcucr_reg; \ bz,pt %xcc, 1f; /* if nothing to unpark, we are done */ \ andn dcucr_reg, PN_PARKED_OTHER_CORE, dcucr_reg; \ set ASI_CORE_RUNNING_RW, scr1; \ set PN_BOTH_CORES_RUNNING, scr2; /* we want both... */ \ stxa scr2, [scr1]ASI_CMP_SHARED; /* ...cores running. */ \ membar #Sync; \ 1: /* * Cheetah+ and Jaguar get both primary and secondary AFSR/AFAR. All bits * in the primary AFSR are cleared except the fatal error bits. For Panther, * we also have to read and clear the AFSR_EXT, again leaving the fatal * error bits alone. * datap: pointer to cpu logout structure. * afar: returned primary AFAR value. * scr1: scratch * scr2: scratch */ #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \ set ASI_SHADOW_REG_VA, scr1; \ ldxa [scr1]ASI_AFAR, scr2; \ stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFAR)]%asi; \ ldxa [scr1]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFSR)]%asi; \ ldxa [%g0]ASI_AFAR, afar; \ stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \ ldxa [%g0]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \ sethi %hh(C_AFSR_FATAL_ERRS), scr1; \ sllx scr1, 32, scr1; \ bclr scr1, scr2; /* Clear fatal error bits here, so */ \ stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \ membar #Sync; \ GET_CPU_IMPL(scr1); \ cmp scr1, PANTHER_IMPL; \ bne %xcc, 1f; \ nop; \ set ASI_SHADOW_AFSR_EXT_VA, scr1; /* shadow AFSR_EXT */ \ ldxa [scr1]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_SDW_DATA + CH_CHD_AFSR_EXT)]%asi; \ set ASI_AFSR_EXT_VA, scr1; /* primary AFSR_EXT */ \ ldxa [scr1]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR_EXT)]%asi; \ set C_AFSR_EXT_FATAL_ERRS, scr1; \ bclr scr1, scr2; /* Clear fatal error bits here, */ \ set ASI_AFSR_EXT_VA, scr1; /* so they're left */ \ stxa scr2, [scr1]ASI_AFSR; /* as is in AFSR_EXT */ \ membar #Sync; \ 1: /* * This macro is used in the CPU logout code to capture diagnostic * information from the L2 cache on panther processors. * afar: input AFAR, not modified. * datap: Ptr to pn_l2_data_t, at end pts just past pn_l2_data_t. * scr1: Scratch. * scr2: Scratch. * scr3: Scratch. */ #define GET_PN_L2_CACHE_DTAGS(afar, datap, scr1, scr2, scr3) \ mov afar, scr3; \ set PN_L2_INDEX_MASK, scr1; \ and scr3, scr1, scr3; \ b 1f; /* code to read tags and data should be ... */ \ nop; /* ...on the same cache line if possible. */ \ .align 128; /* update this line if you add lines below. */ \ 1: \ stxa scr3, [datap + CH_EC_IDX]%asi; /* store L2$ index */ \ ldxa [scr3]ASI_L2_TAG, scr1; /* read the L2$ tag */ \ stxa scr1, [datap + CH_EC_TAG]%asi; \ add datap, CH_EC_DATA, datap; \ clr scr1; \ 2: \ ldxa [scr3 + scr1]ASI_L2_DATA, scr2; /* loop through */ \ stxa scr2, [datap]%asi; /* <511:256> of L2 */ \ add datap, 8, datap; /* data and record */ \ cmp scr1, (PN_L2_LINESIZE / 2) - 8; /* it in the cpu */ \ bne 2b; /* logout struct. */ \ add scr1, 8, scr1; \ set PN_L2_DATA_ECC_SEL, scr2; /* ECC_sel bit. */ \ ldxa [scr3 + scr2]ASI_L2_DATA, scr2; /* Read and record */ \ stxa scr2, [datap]%asi; /* ecc of <511:256> */ \ add datap, 8, datap; \ 3: \ ldxa [scr3 + scr1]ASI_L2_DATA, scr2; /* loop through */ \ stxa scr2, [datap]%asi; /* <255:0> of L2 */ \ add datap, 8, datap; /* data and record */ \ cmp scr1, PN_L2_LINESIZE - 8; /* it in the cpu */ \ bne 3b; /* logout struct. */ \ add scr1, 8, scr1; \ set PN_L2_DATA_ECC_SEL, scr2; /* ECC_sel bit. */ \ add scr2, PN_L2_ECC_LO_REG, scr2; \ ldxa [scr3 + scr2]ASI_L2_DATA, scr2; /* Read and record */ \ stxa scr2, [datap]%asi; /* ecc of <255:0>. */ \ add datap, 8, datap; /* Advance pointer */ \ set PN_L2_SET_SIZE, scr2; \ set PN_L2_MAX_SET, scr1; \ cmp scr1, scr3; /* more ways to try for this line? */ \ bg,a %xcc, 1b; /* if so, start over with next way */ \ add scr3, scr2, scr3 /* * Cheetah+ assumes E$ is 2-way and grabs both E$ lines associated with afar. * afar: AFAR from access. * datap: pointer to cpu logout structure. * scr1: scratch * scr2: scratch * scr3: scratch */ #define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \ GET_CPU_IMPL(scr1); \ cmp scr1, PANTHER_IMPL; \ bne %xcc, 4f; \ nop; \ GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \ GET_ECACHE_DTAG(afar, datap, 1, scr1, scr2, scr3); \ GET_ECACHE_DTAG(afar, datap, 2, scr1, scr2, scr3); \ GET_ECACHE_DTAG(afar, datap, 3, scr1, scr2, scr3); \ add datap, (CHD_EC_DATA_SETS-4)*CH_EC_DATA_SIZE, datap; \ GET_PN_L2_CACHE_DTAGS(afar, datap, scr1, scr2, scr3); \ b 5f; \ nop; \ 4: \ GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \ GET_ECACHE_WAY_BIT(scr1, scr2); \ xor afar, scr1, afar; \ GET_ECACHE_DTAG(afar, datap, 0, scr1, scr2, scr3); \ GET_ECACHE_WAY_BIT(scr1, scr2); /* restore AFAR */ \ xor afar, scr1, afar; \ add datap, (CHD_EC_DATA_SETS-2)*CH_EC_DATA_SIZE, datap; \ add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \ 5: /* * Cheetah+ needs to capture E$, D$ and I$ lines associated with * shadow afar. * afar: scratch, holds shadow afar. * datap: pointer to cpu logout structure * scr1: scratch * scr2: scratch * scr3: scratch */ #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) \ ldxa [datap + (CH_CLO_SDW_DATA + CH_CHD_AFAR)]%asi, afar; \ add datap, CH_CLO_SDW_DATA + CH_CHD_EC_DATA, datap; \ GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \ GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \ GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \ sub datap, CH_CPU_LOGOUT_SIZE, datap /* * Compute the "Way" bit for 2-way Ecache for Cheetah+. */ #define GET_ECACHE_WAY_BIT(scr1, scr2) \ CPU_INDEX(scr1, scr2); \ mulx scr1, CPU_NODE_SIZE, scr1; \ add scr1, ECACHE_SIZE, scr1; \ set cpunodes, scr2; \ ld [scr1 + scr2], scr1; \ srlx scr1, 1, scr1 #else /* CHEETAH_PLUS */ /* * Macro version of get_ecache_dtag. We use this macro in the * CPU logout code. * afar: input AFAR, not modified. * datap: Ptr to ch_ec_data_t, at end pts just past ch_ec_data_t. * scr1: Scratch. * scr2: Scratch. * scr3: Scratch. */ #define GET_ECACHE_DTAG(afar, datap, scr1, scr2, scr3) \ mov afar, scr3; \ andn scr3, (CH_ECACHE_SUBBLK_SIZE - 1), scr3; /* VA<5:0>=0 */\ set (CH_ECACHE_8M_SIZE - 1), scr2; \ and scr3, scr2, scr3; /* VA<63:23>=0 */ \ b 1f; \ stxa scr3, [datap + CH_EC_IDX]%asi; /* store E$ index */ \ .align 64; \ 1: \ ldxa [scr3]ASI_EC_DIAG, scr1; /* get E$ tag */ \ stxa scr1, [datap + CH_EC_TAG]%asi; \ add datap, CH_EC_DATA, datap; \ 2: \ ldxa [scr3]ASI_EC_R, %g0; /* ld E$ stging regs */ \ clr scr1; \ 3: /* loop thru 5 regs */ \ ldxa [scr1]ASI_EC_DATA, scr2; \ stxa scr2, [datap]%asi; \ add datap, 8, datap; \ cmp scr1, CH_ECACHE_STGREG_TOTALSIZE - 8; \ bne 3b; \ add scr1, 8, scr1; \ btst CH_ECACHE_STGREG_SIZE, scr3; /* done? */ \ beq 2b; \ add scr3, CH_ECACHE_STGREG_SIZE, scr3 /* * Cheetah does not have cores so these macros are null. */ #define PARK_SIBLING_CORE(dcucr_reg, scr1, scr2) #define UNPARK_SIBLING_CORE(dcucr_reg, scr1, scr2) /* * Cheetah gets primary AFSR and AFAR and clears the AFSR, except for the * fatal error bits. * datap: pointer to cpu logout structure. * afar: returned primary AFAR value. * scr1: scratch * scr2: scratch */ #define GET_AFSR_AFAR(datap, afar, scr1, scr2) \ ldxa [%g0]ASI_AFAR, afar; \ stxa afar, [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi; \ ldxa [%g0]ASI_AFSR, scr2; \ stxa scr2, [datap + (CH_CLO_DATA + CH_CHD_AFSR)]%asi; \ sethi %hh(C_AFSR_FATAL_ERRS), scr1; \ sllx scr1, 32, scr1; \ bclr scr1, scr2; /* Clear fatal error bits here, so */ \ stxa scr2, [%g0]ASI_AFSR; /* they're left as is in AFSR */ \ membar #Sync /* * Cheetah E$ is direct-mapped, so we grab line data and skip second line. * afar: AFAR from access. * datap: pointer to cpu logout structure. * scr1: scratch * scr2: scratch * scr3: scratch */ #define GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3) \ GET_ECACHE_DTAG(afar, datap, scr1, scr2, scr3); \ add datap, (CHD_EC_DATA_SETS-1)*CH_EC_DATA_SIZE, datap; \ add datap, CH_EC_DATA_SIZE * PN_L2_NWAYS, datap; \ /* * Cheetah has no shadow AFAR, null operation. */ #define GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3) #endif /* CHEETAH_PLUS */ /* * Cheetah/(Cheetah+ Jaguar Panther)/Jalapeno Macro for capturing CPU * logout data at TL>0. r_val is a register that returns the "failure count" * to the caller, and may be used as a scratch register until the end of * the macro. afar is used to return the primary AFAR value to the caller * and it too can be used as a scratch register until the end. r_or_s is * a reg or symbol that has the offset within the "cpu_private" data area * to deposit the logout data. t_flags is a register that has the * trap-type/trap-level/CEEN info. This t_flags register may be used after * the GET_AFSR_AFAR macro. * * The CPU logout operation will fail (r_val > 0) if the logout * structure in question is already being used. Otherwise, the CPU * logout operation will succeed (r_val = 0). For failures, r_val * returns the busy count (# of times we tried using this CPU logout * structure when it was busy.) * * Register usage: * %asi: Must be set to either ASI_MEM if the address in datap * is a physical address or to ASI_N if the address in * datap is a virtual address. * r_val: This register is the return value which tells the * caller whether or not the LOGOUT operation was successful. * For failures, r_val returns the fail count (i.e. number of * times we have tried to use this logout structure when it was * already being used. * afar: output: contains AFAR on exit * t_flags: input trap type info, may be used as scratch after stored * to cpu log out structure. * datap: Points to log out data area. * scr1: Scratch * scr2: Scratch (may be r_val) * scr3: Scratch (may be t_flags) */ #define DO_TL1_CPU_LOGOUT(r_val, afar, t_flags, datap, scr1, scr2, scr3) \ setx LOGOUT_INVALID, scr2, scr1; \ ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi, scr2; \ cmp scr2, scr1; \ bne 8f; \ nop; \ stxa t_flags, [datap + CH_CLO_FLAGS]%asi; \ GET_AFSR_AFAR(datap, afar, scr1, scr2); \ add datap, CH_CLO_DATA + CH_CHD_EC_DATA, datap; \ GET_ECACHE_DTAGS(afar, datap, scr1, scr2, scr3); \ GET_DCACHE_DTAG(afar, datap, scr1, scr2, scr3); \ GET_ICACHE_DTAG(afar, datap, scr1, scr2, scr3); \ sub datap, CH_CLO_DATA + CH_DIAG_DATA_SIZE, datap; \ GET_SHADOW_DATA(afar, datap, scr1, scr2, scr3); \ ldxa [datap + (CH_CLO_DATA + CH_CHD_AFAR)]%asi, afar; \ set 0, r_val; /* return value for success */ \ ba 9f; \ nop; \ 8: \ ldxa [%g0]ASI_AFAR, afar; \ ldxa [datap + CH_CLO_NEST_CNT]%asi, r_val; \ inc r_val; /* return value for failure */ \ stxa r_val, [datap + CH_CLO_NEST_CNT]%asi; \ membar #Sync; \ 9: /* * Cheetah/(Cheetah+ Jaguar Panther)/Jalapeno Macro for capturing CPU * logout data. Uses DO_TL1_CPU_LOGOUT macro defined above, and sets * up the expected data pointer in the scr1 register and sets the %asi * register to ASI_N for kernel virtual addresses instead of ASI_MEM as * is used at TL>0. * * The CPU logout operation will fail (r_val > 0) if the logout * structure in question is already being used. Otherwise, the CPU * logout operation will succeed (r_val = 0). For failures, r_val * returns the busy count (# of times we tried using this CPU logout * structure when it was busy.) * * Register usage: * r_val: This register is the return value which tells the * caller whether or not the LOGOUT operation was successful. * For failures, r_val returns the fail count (i.e. number of * times we have tried to use this logout structure when it was * already being used. * afar: returns AFAR, used internally as afar value. * output: if the cpu_private struct has not been initialized, * then we return the t_flags value listed below. * r_or_s: input offset, either register or constant (symbol). It's * OK for r_or_s to be a register as long as it's not scr1 or * scr3. * t_flags: input trap type info, may be used as scratch after stored * to cpu log out structure. * scr1: Scratch, points to log out data area. * scr2: Scratch (may be r_or_s) * scr3: Scratch (may be r_val) * scr4: Scratch (may be t_flags) */ #define DO_CPU_LOGOUT(r_val, afar, r_or_s, t_flags, scr1, scr2, scr3, scr4) \ GET_CPU_PRIVATE_PTR(r_or_s, scr1, scr3, 7f); /* can't use scr2/4 */ \ wr %g0, ASI_N, %asi; \ DO_TL1_CPU_LOGOUT(r_val, afar, t_flags, scr1, scr2, scr3, scr4) \ ba 6f; \ nop; \ 7: \ mov t_flags, afar; /* depends on afar = %g2 */ \ set 0, r_val; /* success in this case. */ \ 6: /* * The P$ is flushed as a side effect of writing to the Primary * or Secondary Context Register. After writing to a context * register, every line of the P$ in the Valid state is invalidated, * regardless of which context it belongs to. * This routine simply touches the Primary context register by * reading the current value and writing it back. The Primary * context is not changed. */ #define PCACHE_FLUSHALL(tmp1, tmp2, tmp3) \ sethi %hi(FLUSH_ADDR), tmp1 ;\ set MMU_PCONTEXT, tmp2 ;\ ldxa [tmp2]ASI_DMMU, tmp3 ;\ stxa tmp3, [tmp2]ASI_DMMU ;\ flush tmp1 /* See Cheetah PRM 8.10.2 */ /* * Macro that flushes the entire Dcache. * * arg1 = dcache size * arg2 = dcache linesize */ #define CH_DCACHE_FLUSHALL(arg1, arg2, tmp1) \ sub arg1, arg2, tmp1; \ 1: \ stxa %g0, [tmp1]ASI_DC_TAG; \ membar #Sync; \ cmp %g0, tmp1; \ bne,pt %icc, 1b; \ sub tmp1, arg2, tmp1; /* * Macro that flushes the entire Icache. * * Note that we cannot access ASI 0x67 (ASI_IC_TAG) with the Icache on, * because accesses to ASI 0x67 interfere with Icache coherency. We * must make sure the Icache is off, then turn it back on after the entire * cache has been invalidated. If the Icache is originally off, we'll just * clear the tags but not turn the Icache on. * * arg1 = icache size * arg2 = icache linesize */ #define CH_ICACHE_FLUSHALL(arg1, arg2, tmp1, tmp2) \ ldxa [%g0]ASI_DCU, tmp2; \ andn tmp2, DCU_IC, tmp1; \ stxa tmp1, [%g0]ASI_DCU; \ flush %g0; /* flush required after changing the IC bit */ \ sllx arg2, 1, arg2; /* arg2 = linesize * 2 */ \ sllx arg1, 1, arg1; /* arg1 = size * 2 */ \ sub arg1, arg2, arg1; \ or arg1, CH_ICTAG_LOWER, arg1; /* "write" tag */ \ 1: \ stxa %g0, [arg1]ASI_IC_TAG; \ membar #Sync; /* Cheetah PRM 8.9.3 */ \ cmp arg1, CH_ICTAG_LOWER; \ bne,pt %icc, 1b; \ sub arg1, arg2, arg1; \ stxa tmp2, [%g0]ASI_DCU; \ flush %g0; /* flush required after changing the IC bit */ #if defined(JALAPENO) || defined(SERRANO) /* * ASI access to the L2 tag or L2 flush can hang the cpu when interacting * with combinations of L2 snoops, victims and stores. * * A possible workaround is to surround each L2 ASI access with membars * and make sure that the code is hitting in the Icache. This requires * aligning code sequence at E$ boundary and forcing I$ fetch by * jumping to selected offsets so that we don't take any I$ misses * during ASI access to the L2 tag or L2 flush. This also requires * making sure that we don't take any interrupts or traps (such as * fast ECC trap, I$/D$ tag parity error) which can result in eviction * of this code sequence from I$, thus causing a miss. * * Because of the complexity/risk, we have decided to do a partial fix * of adding membar around each ASI access to the L2 tag or L2 flush. */ #define JP_EC_DIAG_ACCESS_MEMBAR \ membar #Sync /* * Jalapeno version of macro that flushes the entire Ecache. * * Uses Jalapeno displacement flush feature of ASI_EC_DIAG. * * arg1 = ecache size * arg2 = ecache linesize - not modified; can be an immediate constant. */ #define ECACHE_FLUSHALL(arg1, arg2, tmp1, tmp2) \ CPU_INDEX(tmp1, tmp2); \ set JP_ECACHE_IDX_DISP_FLUSH, tmp2; \ sllx tmp1, JP_ECFLUSH_PORTID_SHIFT, tmp1; \ or tmp1, tmp2, tmp1; \ srlx arg1, JP_EC_TO_SET_SIZE_SHIFT, tmp2; \ 1: \ subcc tmp2, arg2, tmp2; \ JP_EC_DIAG_ACCESS_MEMBAR; \ ldxa [tmp1 + tmp2]ASI_EC_DIAG, %g0; \ JP_EC_DIAG_ACCESS_MEMBAR; \ bg,pt %xcc, 1b; \ nop; \ mov 1, tmp2; \ sllx tmp2, JP_ECFLUSH_EC_WAY_SHIFT, tmp2; \ add tmp1, tmp2, tmp1; \ mov (JP_ECACHE_NWAY-1), tmp2; \ sllx tmp2, JP_ECFLUSH_EC_WAY_SHIFT, tmp2; \ andcc tmp1, tmp2, tmp2; \ bnz,pt %xcc, 1b; \ srlx arg1, JP_EC_TO_SET_SIZE_SHIFT, tmp2 #else /* JALAPENO || SERRANO */ /* * Cheetah version of macro that flushes the entire Ecache. * * Need to displacement flush 2x ecache size from Ecache flush area. * * arg1 = ecache size * arg2 = ecache linesize * arg3 = ecache flush address - for cheetah only */ #define CH_ECACHE_FLUSHALL(arg1, arg2, arg3) \ sllx arg1, 1, arg1; \ 1: \ subcc arg1, arg2, arg1; \ bg,pt %xcc, 1b; \ ldxa [arg1 + arg3]ASI_MEM, %g0; /* * Cheetah+ version of macro that flushes the entire Ecache. * * Uses the displacement flush feature. * * arg1 = ecache size * arg2 = ecache linesize * impl = CPU implementation as returned from GET_CPU_IMPL() * The value in this register is destroyed during execution * of the macro. */ #if defined(CHEETAH_PLUS) #define CHP_ECACHE_FLUSHALL(arg1, arg2, impl) \ cmp impl, PANTHER_IMPL; \ bne %xcc, 1f; \ nop; \ set PN_L3_IDX_DISP_FLUSH, impl; \ b 2f; \ nop; \ 1: \ set CHP_ECACHE_IDX_DISP_FLUSH, impl; \ 2: \ subcc arg1, arg2, arg1; \ bg,pt %xcc, 2b; \ ldxa [arg1 + impl]ASI_EC_DIAG, %g0; #else /* CHEETAH_PLUS */ #define CHP_ECACHE_FLUSHALL(arg1, arg2, impl) #endif /* CHEETAH_PLUS */ /* * Macro that flushes the entire Ecache. * * arg1 = ecache size * arg2 = ecache linesize * arg3 = ecache flush address - for cheetah only */ #define ECACHE_FLUSHALL(arg1, arg2, arg3, tmp1) \ GET_CPU_IMPL(tmp1); \ cmp tmp1, CHEETAH_IMPL; \ bne %xcc, 2f; \ nop; \ CH_ECACHE_FLUSHALL(arg1, arg2, arg3); \ ba 3f; \ nop; \ 2: \ CHP_ECACHE_FLUSHALL(arg1, arg2, tmp1); \ 3: #endif /* JALAPENO || SERRANO */ /* * Macro that flushes the Panther L2 cache. */ #if defined(CHEETAH_PLUS) #define PN_L2_FLUSHALL(scr1, scr2, scr3) \ GET_CPU_IMPL(scr3); \ cmp scr3, PANTHER_IMPL; \ bne %xcc, 2f; \ nop; \ set PN_L2_SIZE, scr1; \ set PN_L2_LINESIZE, scr2; \ set PN_L2_IDX_DISP_FLUSH, scr3; \ 1: \ subcc scr1, scr2, scr1; \ bg,pt %xcc, 1b; \ ldxa [scr1 + scr3]ASI_L2_TAG, %g0; \ 2: #else /* CHEETAH_PLUS */ #define PN_L2_FLUSHALL(scr1, scr2, scr3) #endif /* CHEETAH_PLUS */ /* * Given a VA and page size (page size as encoded in ASI_MMU_TAG_ACCESS_EXT), * this macro returns the TLB index for that mapping based on a 512 entry * (2-way set associative) TLB. Aaside from the 16 entry fully associative * TLBs, all TLBs in Panther are 512 entry, 2-way set associative. * * To find the index, we shift the VA right by 13 + (3 * pg_sz) and then * mask out all but the lower 8 bits because: * * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 0 for 8K * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 1 for 64K * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 2 for 512K * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 3 for 4M * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 4 for 32M * ASI_[D|I]MMU_TAG_ACCESS_EXT.PgSz = 5 for 256M * * and * * array index for 8K pages = VA[20:13] * array index for 64K pages = VA[23:16] * array index for 512K pages = VA[26:19] * array index for 4M pages = VA[29:22] * array index for 32M pages = VA[32:25] * array index for 256M pages = VA[35:28] * * Inputs: * * va - Register. * Input: Virtual address in which we are interested. * Output: TLB index value. * pg_sz - Register. Page Size of the TLB in question as encoded * in the ASI_[D|I]MMU_TAG_ACCESS_EXT register. */ #if defined(CHEETAH_PLUS) #define PN_GET_TLB_INDEX(va, pg_sz) \ srlx va, 13, va; /* first shift the 13 bits and then */ \ srlx va, pg_sz, va; /* shift by pg_sz three times. */ \ srlx va, pg_sz, va; \ srlx va, pg_sz, va; \ and va, 0xff, va; /* mask out all but the lower 8 bits */ #endif /* CHEETAH_PLUS */ /* * The following macros are for error traps at TL>0. * The issue with error traps at TL>0 is that there are no safely * available global registers. So we use the trick of generating a * software trap, then using the %tpc, %tnpc and %tstate registers to * temporarily save the values of %g1 and %g2. */ /* * Macro to generate 8-instruction trap table entry for TL>0 trap handlers. * Does the following steps: * 1. membar #Sync - required for USIII family errors. * 2. Specified software trap. * NB: Must be 8 instructions or less to fit in trap table and code must * be relocatable. */ #define CH_ERR_TL1_TRAPENTRY(trapno) \ membar #Sync; \ ta trapno; \ nop; nop; nop; nop; nop; nop /* * Macro to generate 8-instruction trap table entry for TL>0 software trap. * We save the values of %g1 and %g2 in %tpc, %tnpc and %tstate (since * the low-order two bits of %tpc/%tnpc are reserved and read as zero, * we need to put the low-order two bits of %g1 and %g2 in %tstate). * Note that %tstate has a reserved hole from bits 3-7, so we put the * low-order two bits of %g1 in bits 0-1 and the low-order two bits of * %g2 in bits 10-11 (insuring bits 8-9 are zero for use by the D$/I$ * state bits). Note that we must do a jmp instruction, since this * is moved into the trap table entry. * NB: Must be 8 instructions or less to fit in trap table and code must * be relocatable. */ #define CH_ERR_TL1_SWTRAPENTRY(label) \ wrpr %g1, %tpc; \ and %g1, 3, %g1; \ wrpr %g2, %tnpc; \ sllx %g2, CH_ERR_G2_TO_TSTATE_SHFT, %g2; \ or %g1, %g2, %g2; \ sethi %hi(label), %g1; \ jmp %g1+%lo(label); \ wrpr %g2, %tstate /* * Macro to get ptr to ch_err_tl1_data. * reg1 will either point to a physaddr with ASI_MEM in %asi OR it * will point to a kernel nucleus virtual address with ASI_N in %asi. * This allows us to: * 1. Avoid getting MMU misses. We may have gotten the original * Fast ECC error in an MMU handler and if we get an MMU trap * in the TL>0 handlers, we'll scribble on the MMU regs. * 2. Allows us to use the same code in the TL>0 handlers whether * we're accessing kernel nucleus virtual addresses or physical * addresses. * pseudo-code: * reg1 <- ch_err_tl1_paddrs[CPUID]; * if (reg1 == NULL) { * reg1 <- &ch_err_tl1_data * %asi <- ASI_N * } else { * reg1 <- reg1 + offset + * sizeof (ch_err_tl1_data) * (%tl - 3) * %asi <- ASI_MEM * } */ #define GET_CH_ERR_TL1_PTR(reg1, reg2, offset) \ CPU_INDEX(reg1, reg2); \ sllx reg1, 3, reg1; \ set ch_err_tl1_paddrs, reg2; \ ldx [reg1+reg2], reg1; \ brnz reg1, 1f; \ add reg1, offset, reg1; \ set ch_err_tl1_data, reg1; \ ba 2f; \ wr %g0, ASI_N, %asi; \ 1: rdpr %tl, reg2; \ sub reg2, 3, reg2; \ mulx reg2, CH_ERR_TL1_DATA_SIZE, reg2; \ add reg1, reg2, reg1; \ wr %g0, ASI_MEM, %asi; \ 2: /* * Macro to generate entry code for TL>0 error handlers. * At the end of this macro, %g1 will point to the ch_err_tl1_data * structure and %g2 will have the original flags in the ch_err_tl1_data * structure and %g5 will have the value of %tstate where the Fast ECC * routines will save the state of the D$ in Bit2 CH_ERR_TSTATE_DC_ON. * All %g registers except for %g1, %g2 and %g5 will be available after * this macro. * Does the following steps: * 1. Compute physical address of per-cpu/per-tl save area using * only %g1+%g2 (which we've saved in %tpc, %tnpc, %tstate) * leaving address in %g1 and updating the %asi register. * If there is no data area available, we branch to label. * 2. Save %g3-%g7 in save area. * 3. Save %tpc->%g3, %tnpc->%g4, %tstate->%g5, which contain * original %g1+%g2 values (because we're going to change %tl). * 4. set %tl <- %tl - 1. We do this ASAP to make window of * running at %tl+1 as small as possible. * 5. Reconstitute %g1+%g2 from %tpc (%g3), %tnpc (%g4), * %tstate (%g5) and save in save area, carefully preserving %g5 * because it has the CH_ERR_TSTATE_DC_ON value. * 6. Load existing ch_err_tl1_data flags in %g2 * 7. Compute the new flags * 8. If %g2 is non-zero (the structure was busy), shift the new * flags by CH_ERR_ME_SHIFT and or them with the old flags. * 9. Store the updated flags into ch_err_tl1_data flags. * 10. If %g2 is non-zero, read the %tpc and store it in * ch_err_tl1_data. */ #define CH_ERR_TL1_ENTER(flags) \ GET_CH_ERR_TL1_PTR(%g1, %g2, CHPR_TL1_ERR_DATA); \ stxa %g3, [%g1 + CH_ERR_TL1_G3]%asi; \ stxa %g4, [%g1 + CH_ERR_TL1_G4]%asi; \ stxa %g5, [%g1 + CH_ERR_TL1_G5]%asi; \ stxa %g6, [%g1 + CH_ERR_TL1_G6]%asi; \ stxa %g7, [%g1 + CH_ERR_TL1_G7]%asi; \ rdpr %tpc, %g3; \ rdpr %tnpc, %g4; \ rdpr %tstate, %g5; \ rdpr %tl, %g6; \ sub %g6, 1, %g6; \ wrpr %g6, %tl; \ and %g5, 3, %g6; \ andn %g3, 3, %g3; \ or %g3, %g6, %g3; \ stxa %g3, [%g1 + CH_ERR_TL1_G1]%asi; \ srlx %g5, CH_ERR_G2_TO_TSTATE_SHFT, %g6; \ and %g6, 3, %g6; \ andn %g4, 3, %g4; \ or %g6, %g4, %g4; \ stxa %g4, [%g1 + CH_ERR_TL1_G2]%asi; \ ldxa [%g1 + CH_ERR_TL1_FLAGS]%asi, %g2; \ set flags | CH_ERR_TL, %g3; \ brz %g2, 9f; \ sllx %g3, CH_ERR_ME_SHIFT, %g4; \ or %g2, %g4, %g3; \ 9: stxa %g3, [%g1 + CH_ERR_TL1_FLAGS]%asi; \ brnz %g2, 8f; \ rdpr %tpc, %g4; \ stxa %g4, [%g1 + CH_ERR_TL1_TPC]%asi; \ 8: /* * Turns off D$/I$ and saves the state of DCU_DC+DCU_IC in %tstate Bits 8+9 * (CH_ERR_TSTATE_DC_ON/CH_ERR_TSTATE_IC_ON). This is invoked on Fast ECC * at TL>0 handlers because the D$ may have corrupted data and we need to * turn off the I$ to allow for diagnostic accesses. We then invoke * the normal entry macro and after it is done we save the values of * the original D$/I$ state, which is in %g5 bits CH_ERR_TSTATE_DC_ON/ * CH_ERR_TSTATE_IC_ON in ch_err_tl1_tmp. */ #define CH_ERR_TL1_FECC_ENTER \ ldxa [%g0]ASI_DCU, %g1; \ andn %g1, DCU_DC + DCU_IC, %g2; \ stxa %g2, [%g0]ASI_DCU; \ flush %g0; /* DCU_IC need flush */ \ rdpr %tstate, %g2; \ and %g1, DCU_DC + DCU_IC, %g1; \ sllx %g1, CH_ERR_DCU_TO_TSTATE_SHFT, %g1; \ or %g1, %g2, %g2; \ wrpr %g2, %tstate; \ CH_ERR_TL1_ENTER(CH_ERR_FECC); \ and %g5, CH_ERR_TSTATE_DC_ON + CH_ERR_TSTATE_IC_ON, %g5; \ stxa %g5, [%g1 + CH_ERR_TL1_TMP]%asi /* * Macro to generate exit code for TL>0 error handlers. * We fall into this macro if we've successfully logged the error in * the ch_err_tl1_data structure and want the PIL15 softint to pick * it up and log it. * Does the following steps: * 1. Set pending flag for this cpu in ch_err_tl1_pending. * 2. Write %set_softint with (1<0 handlers. * At label (Unrecoverable error routine) * 1. Sets flags in ch_err_tl1_data and leaves in %g2 (first * argument to cpu_tl1_err_panic). * 2. Call cpu_tl1_err_panic via systrap at PIL 15 */ #define CH_ERR_TL1_PANIC_EXIT(label) \ label: ldxa [%g1 + CH_ERR_TL1_FLAGS]%asi, %g2; \ or %g2, CH_ERR_TL | CH_ERR_PANIC, %g2; \ stxa %g2, [%g1 + CH_ERR_TL1_FLAGS]%asi; \ set cpu_tl1_err_panic, %g1; \ ba sys_trap; \ mov PIL_15, %g4 /* END CSTYLED */ #endif /* _ASM */ #ifdef __cplusplus } #endif #endif /* _CHEETAHASM_H */