sun4u/cpu/us3_common.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/types.h>
#include <sys/systm.h>
#include <sys/ddi.h>
#include <sys/sysmacros.h>
#include <sys/archsystm.h>
#include <sys/vmsystm.h>
#include <sys/machparam.h>
#include <sys/machsystm.h>
#include <sys/machthread.h>
#include <sys/cpu.h>
#include <sys/cmp.h>
#include <sys/elf_SPARC.h>
#include <vm/vm_dep.h>
#include <vm/hat_sfmmu.h>
#include <vm/seg_kpm.h>
#include <sys/cpuvar.h>
#include <sys/cheetahregs.h>
#include <sys/us3_module.h>
#include <sys/async.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/dditypes.h>
#include <sys/prom_debug.h>
#include <sys/prom_plat.h>
#include <sys/cpu_module.h>
#include <sys/sysmacros.h>
#include <sys/intreg.h>
#include <sys/clock.h>
#include <sys/platform_module.h>
#include <sys/machtrap.h>
#include <sys/ontrap.h>
#include <sys/panic.h>
#include <sys/memlist.h>
#include <sys/bootconf.h>
#include <sys/ivintr.h>
#include <sys/atomic.h>
#include <sys/taskq.h>
#include <sys/note.h>
#include <sys/ndifm.h>
#include <sys/ddifm.h>
#include <sys/fm/protocol.h>
#include <sys/fm/util.h>
#include <sys/fm/cpu/UltraSPARC-III.h>
#include <sys/fpras_impl.h>
#include <sys/dtrace.h>
#include <sys/watchpoint.h>
#include <sys/plat_ecc_unum.h>
#include <sys/cyclic.h>
#include <sys/errorq.h>
#include <sys/errclassify.h>

#ifdef	CHEETAHPLUS_ERRATUM_25
#include <sys/xc_impl.h>
#endif	/* CHEETAHPLUS_ERRATUM_25 */

/*
 * Note that 'Cheetah PRM' refers to:
 *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
 */

/*
 * Per CPU pointers to physical address of TL>0 logout data areas.
 * These pointers have to be in the kernel nucleus to avoid MMU
 * misses.
 */
uint64_t ch_err_tl1_paddrs[NCPU];

/*
 * One statically allocated structure to use during startup/DR
 * to prevent unnecessary panics.
 */
ch_err_tl1_data_t ch_err_tl1_data;

/*
 * Per CPU pending error at TL>0, used by level15 softint handler
 */
uchar_t ch_err_tl1_pending[NCPU];

/*
 * For deferred CE re-enable after trap.
 */
taskq_t		*ch_check_ce_tq;

/*
 * Internal functions.
 */
static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
    ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
static int clear_ecc(struct async_flt *ecc);
#if defined(CPU_IMP_ECACHE_ASSOC)
static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
#endif
static int cpu_ecache_set_size(struct cpu *cp);
static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
static int cpu_ectag_pa_to_subblk_state(int cachesize,
				uint64_t subaddr, uint64_t tag);
static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
static void cpu_scrubphys(struct async_flt *aflt);
static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
    int *, int *);
static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
static void cpu_ereport_init(struct async_flt *aflt);
static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
    uint64_t nceen, ch_cpu_logout_t *clop);
static int cpu_ce_delayed_ec_logout(uint64_t);
static int cpu_matching_ecache_line(uint64_t, void *, int, int *);

#ifdef	CHEETAHPLUS_ERRATUM_25
static int mondo_recover_proc(uint16_t, int);
static void cheetah_nudge_init(void);
static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
    cyc_time_t *when);
static void cheetah_nudge_buddy(void);
#endif	/* CHEETAHPLUS_ERRATUM_25 */

#if defined(CPU_IMP_L1_CACHE_PARITY)
static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
    ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
#endif	/* CPU_IMP_L1_CACHE_PARITY */

int (*p2get_mem_info)(int synd_code, uint64_t paddr,
    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    int *segsp, int *banksp, int *mcidp);

/*
 * This table is used to determine which bit(s) is(are) bad when an ECC
 * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
 * of this array have the following semantics:
 *
 *      00-127  The number of the bad bit, when only one bit is bad.
 *      128     ECC bit C0 is bad.
 *      129     ECC bit C1 is bad.
 *      130     ECC bit C2 is bad.
 *      131     ECC bit C3 is bad.
 *      132     ECC bit C4 is bad.
 *      133     ECC bit C5 is bad.
 *      134     ECC bit C6 is bad.
 *      135     ECC bit C7 is bad.
 *      136     ECC bit C8 is bad.
 *	137-143 reserved for Mtag Data and ECC.
 *      144(M2) Two bits are bad within a nibble.
 *      145(M3) Three bits are bad within a nibble.
 *      146(M3) Four bits are bad within a nibble.
 *      147(M)  Multiple bits (5 or more) are bad.
 *      148     NO bits are bad.
 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
 */

#define	C0	128
#define	C1	129
#define	C2	130
#define	C3	131
#define	C4	132
#define	C5	133
#define	C6	134
#define	C7	135
#define	C8	136
#define	MT0	137	/* Mtag Data bit 0 */
#define	MT1	138
#define	MT2	139
#define	MTC0	140	/* Mtag Check bit 0 */
#define	MTC1	141
#define	MTC2	142
#define	MTC3	143
#define	M2	144
#define	M3	145
#define	M4	146
#define	M	147
#define	NA	148
#if defined(JALAPENO) || defined(SERRANO)
#define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
#define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
#define	SLAST	S003MEM	/* last special syndrome */
#else /* JALAPENO || SERRANO */
#define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
#define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
#define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
#define	SLAST	S11C	/* last special syndrome */
#endif /* JALAPENO || SERRANO */
#if defined(JALAPENO) || defined(SERRANO)
#define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
#define	BPAR15	167
#endif	/* JALAPENO || SERRANO */

static uint8_t ecc_syndrome_tab[] =
{
NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
#if defined(JALAPENO) || defined(SERRANO)
116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
#else	/* JALAPENO || SERRANO */
116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
#endif	/* JALAPENO || SERRANO */
C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
#if defined(JALAPENO) || defined(SERRANO)
M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
#else	/* JALAPENO || SERRANO */
M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
#endif	/* JALAPENO || SERRANO */
M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
};

#define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))

#if !(defined(JALAPENO) || defined(SERRANO))
/*
 * This table is used to determine which bit(s) is(are) bad when a Mtag
 * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
 * of this array have the following semantics:
 *
 *      -1	Invalid mtag syndrome.
 *      137     Mtag Data 0 is bad.
 *      138     Mtag Data 1 is bad.
 *      139     Mtag Data 2 is bad.
 *      140     Mtag ECC 0 is bad.
 *      141     Mtag ECC 1 is bad.
 *      142     Mtag ECC 2 is bad.
 *      143     Mtag ECC 3 is bad.
 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
 */
short mtag_syndrome_tab[] =
{
NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
};

#define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))

#else /* !(JALAPENO || SERRANO) */

#define	BSYND_TBL_SIZE	16

#endif /* !(JALAPENO || SERRANO) */

/*
 * CE initial classification and subsequent action lookup table
 */
static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
static int ce_disp_inited;

/*
 * Set to disable leaky and partner check for memory correctables
 */
int ce_xdiag_off;

/*
 * The following are not incremented atomically so are indicative only
 */
static int ce_xdiag_drops;
static int ce_xdiag_lkydrops;
static int ce_xdiag_ptnrdrops;
static int ce_xdiag_bad;

/*
 * CE leaky check callback structure
 */
typedef struct {
	struct async_flt *lkycb_aflt;
	errorq_t *lkycb_eqp;
	errorq_elem_t *lkycb_eqep;
} ce_lkychk_cb_t;

/*
 * defines for various ecache_flush_flag's
 */
#define	ECACHE_FLUSH_LINE	1
#define	ECACHE_FLUSH_ALL	2

/*
 * STICK sync
 */
#define	STICK_ITERATION 10
#define	MAX_TSKEW	1
#define	EV_A_START	0
#define	EV_A_END	1
#define	EV_B_START	2
#define	EV_B_END	3
#define	EVENTS		4

static int64_t stick_iter = STICK_ITERATION;
static int64_t stick_tsk = MAX_TSKEW;

typedef enum {
	EVENT_NULL = 0,
	SLAVE_START,
	SLAVE_CONT,
	MASTER_START
} event_cmd_t;

static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
static int64_t timestamp[EVENTS];
static volatile int slave_done;

#ifdef DEBUG
#define	DSYNC_ATTEMPTS 64
typedef struct {
	int64_t	skew_val[DSYNC_ATTEMPTS];
} ss_t;

ss_t stick_sync_stats[NCPU];
#endif /* DEBUG */

/*
 * Maximum number of contexts for Cheetah.
 */
#define	MAX_NCTXS	(1 << 13)

/* Will be set !NULL for Cheetah+ and derivatives. */
uchar_t *ctx_pgsz_array = NULL;
#if defined(CPU_IMP_DUAL_PAGESIZE)
static uchar_t ctx_pgsz_arr[MAX_NCTXS];
uint_t disable_dual_pgsz = 0;
#endif	/* CPU_IMP_DUAL_PAGESIZE */

/*
 * Save the cache bootup state for use when internal
 * caches are to be re-enabled after an error occurs.
 */
uint64_t cache_boot_state;

/*
 * PA[22:0] represent Displacement in Safari configuration space.
 */
uint_t	root_phys_addr_lo_mask = 0x7fffffu;

bus_config_eclk_t bus_config_eclk[] = {
#if defined(JALAPENO) || defined(SERRANO)
	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
#else /* JALAPENO || SERRANO */
	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
#endif /* JALAPENO || SERRANO */
	{0, 0}
};

/*
 * Interval for deferred CEEN reenable
 */
int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;

/*
 * set in /etc/system to control logging of user BERR/TO's
 */
int cpu_berr_to_verbose = 0;

/*
 * set to 0 in /etc/system to defer CEEN reenable for all CEs
 */
uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;

/*
 * Set of all offline cpus
 */
cpuset_t cpu_offline_set;

static void cpu_delayed_check_ce_errors(void *);
static void cpu_check_ce_errors(void *);
void cpu_error_ecache_flush(ch_async_flt_t *);
static int cpu_error_ecache_flush_required(ch_async_flt_t *);
static void cpu_log_and_clear_ce(ch_async_flt_t *);
void cpu_ce_detected(ch_cpu_errors_t *, int);

/*
 * CE Leaky check timeout in microseconds.  This is chosen to be twice the
 * memory refresh interval of current DIMMs (64ms).  After initial fix that
 * gives at least one full refresh cycle in which the cell can leak
 * (whereafter further refreshes simply reinforce any incorrect bit value).
 */
clock_t cpu_ce_lkychk_timeout_usec = 128000;

/*
 * CE partner check partner caching period in seconds
 */
int cpu_ce_ptnr_cachetime_sec = 60;

/*
 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
 */
#define	CH_SET_TRAP(ttentry, ttlabel)			\
		bcopy((const void *)&ttlabel, &ttentry, 32);		\
		flush_instr_mem((caddr_t)&ttentry, 32);

static int min_ecache_size;
static uint_t priv_hcl_1;
static uint_t priv_hcl_2;
static uint_t priv_hcl_4;
static uint_t priv_hcl_8;

void
cpu_setup(void)
{
	extern int at_flags;
	extern int disable_delay_tlb_flush, delay_tlb_flush;
	extern int cpc_has_overflow_intr;
	extern int disable_text_largepages;
	extern int use_text_pgsz4m;

	/*
	 * Setup chip-specific trap handlers.
	 */
	cpu_init_trap();

	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);

	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;

	/*
	 * save the cache bootup state.
	 */
	cache_boot_state = get_dcu() & DCU_CACHE;

	/*
	 * Use the maximum number of contexts available for Cheetah
	 * unless it has been tuned for debugging.
	 * We are checking against 0 here since this value can be patched
	 * while booting.  It can not be patched via /etc/system since it
	 * will be patched too late and thus cause the system to panic.
	 */
	if (nctxs == 0)
		nctxs = MAX_NCTXS;

	/*
	 * Due to the number of entries in the fully-associative tlb
	 * this may have to be tuned lower than in spitfire.
	 */
	pp_slots = MIN(8, MAXPP_SLOTS);

	/*
	 * Block stores do not invalidate all pages of the d$, pagecopy
	 * et. al. need virtual translations with virtual coloring taken
	 * into consideration.  prefetch/ldd will pollute the d$ on the
	 * load side.
	 */
	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;

	if (use_page_coloring) {
		do_pg_coloring = 1;
		if (use_virtual_coloring)
			do_virtual_coloring = 1;
	}

	isa_list =
	    "sparcv9+vis2 sparcv9+vis sparcv9 "
	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";

	/*
	 * On Panther-based machines, this should
	 * also include AV_SPARC_POPC too
	 */
	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;

	/*
	 * On cheetah, there's no hole in the virtual address space
	 */
	hole_start = hole_end = 0;

	/*
	 * The kpm mapping window.
	 * kpm_size:
	 *	The size of a single kpm range.
	 *	The overall size will be: kpm_size * vac_colors.
	 * kpm_vbase:
	 *	The virtual start address of the kpm range within the kernel
	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
	 */
	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
	kpm_size_shift = 43;
	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
	kpm_smallpages = 1;

	/*
	 * The traptrace code uses either %tick or %stick for
	 * timestamping.  We have %stick so we can use it.
	 */
	traptrace_use_stick = 1;

	/*
	 * Cheetah has a performance counter overflow interrupt
	 */
	cpc_has_overflow_intr = 1;

	/*
	 * Use cheetah flush-all support
	 */
	if (!disable_delay_tlb_flush)
		delay_tlb_flush = 1;

#if defined(CPU_IMP_DUAL_PAGESIZE)
	/*
	 * Use Cheetah+ and later dual page size support.
	 */
	if (!disable_dual_pgsz) {
		ctx_pgsz_array = ctx_pgsz_arr;
	}
#endif	/* CPU_IMP_DUAL_PAGESIZE */

	/*
	 * Declare that this architecture/cpu combination does fpRAS.
	 */
	fpras_implemented = 1;

	/*
	 * Enable 4M pages to be used for mapping user text by default.  Don't
	 * use large pages for initialized data segments since we may not know
	 * at exec() time what should be the preferred large page size for DTLB
	 * programming.
	 */
	use_text_pgsz4m = 1;
	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
	    (1 << TTE32M) | (1 << TTE256M);

	/*
	 * Setup CE lookup table
	 */
	CE_INITDISPTBL_POPULATE(ce_disp_table);
	ce_disp_inited = 1;
}

/*
 * Called by setcpudelay
 */
void
cpu_init_tick_freq(void)
{
	/*
	 * For UltraSPARC III and beyond we want to use the
	 * system clock rate as the basis for low level timing,
	 * due to support of mixed speed CPUs and power managment.
	 */
	if (system_clock_freq == 0)
		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");

	sys_tick_freq = system_clock_freq;
}

#ifdef CHEETAHPLUS_ERRATUM_25
/*
 * Tunables
 */
int cheetah_bpe_off = 0;
int cheetah_sendmondo_recover = 1;
int cheetah_sendmondo_fullscan = 0;
int cheetah_sendmondo_recover_delay = 5;

#define	CHEETAH_LIVELOCK_MIN_DELAY	1

/*
 * Recovery Statistics
 */
typedef struct cheetah_livelock_entry	{
	int cpuid;		/* fallen cpu */
	int buddy;		/* cpu that ran recovery */
	clock_t lbolt;		/* when recovery started */
	hrtime_t recovery_time;	/* time spent in recovery */
} cheetah_livelock_entry_t;

#define	CHEETAH_LIVELOCK_NENTRY	32

cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
int cheetah_livelock_entry_nxt;

#define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
		cheetah_livelock_entry_nxt = 0;				\
	}								\
}

#define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val

struct {
	hrtime_t hrt;		/* maximum recovery time */
	int recovery;		/* recovered */
	int full_claimed;	/* maximum pages claimed in full recovery */
	int proc_entry;		/* attempted to claim TSB */
	int proc_tsb_scan;	/* tsb scanned */
	int proc_tsb_partscan;	/* tsb partially scanned */
	int proc_tsb_fullscan;	/* whole tsb scanned */
	int proc_claimed;	/* maximum pages claimed in tsb scan */
	int proc_user;		/* user thread */
	int proc_kernel;	/* kernel thread */
	int proc_onflt;		/* bad stack */
	int proc_cpu;		/* null cpu */
	int proc_thread;	/* null thread */
	int proc_proc;		/* null proc */
	int proc_as;		/* null as */
	int proc_hat;		/* null hat */
	int proc_hat_inval;	/* hat contents don't make sense */
	int proc_hat_busy;	/* hat is changing TSBs */
	int proc_tsb_reloc;	/* TSB skipped because being relocated */
	int proc_cnum_bad;	/* cnum out of range */
	int proc_cnum;		/* last cnum processed */
	tte_t proc_tte;		/* last tte processed */
} cheetah_livelock_stat;

#define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++

#define	CHEETAH_LIVELOCK_STATSET(item, value)		\
	cheetah_livelock_stat.item = value

#define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
	if (value > cheetah_livelock_stat.item)		\
		cheetah_livelock_stat.item = value;	\
}

/*
 * Attempt to recover a cpu by claiming every cache line as saved
 * in the TSB that the non-responsive cpu is using. Since we can't
 * grab any adaptive lock, this is at best an attempt to do so. Because
 * we don't grab any locks, we must operate under the protection of
 * on_fault().
 *
 * Return 1 if cpuid could be recovered, 0 if failed.
 */
int
mondo_recover_proc(uint16_t cpuid, int bn)
{
	label_t ljb;
	cpu_t *cp;
	kthread_t *t;
	proc_t *p;
	struct as *as;
	struct hat *hat;
	short  cnum;
	struct tsb_info *tsbinfop;
	struct tsbe *tsbep;
	caddr_t tsbp;
	caddr_t end_tsbp;
	uint64_t paddr;
	uint64_t idsr;
	u_longlong_t pahi, palo;
	int pages_claimed = 0;
	tte_t tsbe_tte;
	int tried_kernel_tsb = 0;

	CHEETAH_LIVELOCK_STAT(proc_entry);

	if (on_fault(&ljb)) {
		CHEETAH_LIVELOCK_STAT(proc_onflt);
		goto badstruct;
	}

	if ((cp = cpu[cpuid]) == NULL) {
		CHEETAH_LIVELOCK_STAT(proc_cpu);
		goto badstruct;
	}

	if ((t = cp->cpu_thread) == NULL) {
		CHEETAH_LIVELOCK_STAT(proc_thread);
		goto badstruct;
	}

	if ((p = ttoproc(t)) == NULL) {
		CHEETAH_LIVELOCK_STAT(proc_proc);
		goto badstruct;
	}

	if ((as = p->p_as) == NULL) {
		CHEETAH_LIVELOCK_STAT(proc_as);
		goto badstruct;
	}

	if ((hat = as->a_hat) == NULL) {
		CHEETAH_LIVELOCK_STAT(proc_hat);
		goto badstruct;
	}

	if (hat != ksfmmup) {
		CHEETAH_LIVELOCK_STAT(proc_user);
		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
			goto badstruct;
		}
		tsbinfop = hat->sfmmu_tsb;
		if (tsbinfop == NULL) {
			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
			goto badstruct;
		}
		tsbp = tsbinfop->tsb_va;
		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
	} else {
		CHEETAH_LIVELOCK_STAT(proc_kernel);
		tsbinfop = NULL;
		tsbp = ktsb_base;
		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
	}

	/* Verify as */
	if (hat->sfmmu_as != as) {
		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
		goto badstruct;
	}

	cnum = hat->sfmmu_cnum;
	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);

	if ((cnum < 0) || (cnum == INVALID_CONTEXT) || (cnum >= nctxs)) {
		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
		goto badstruct;
	}

	do {
		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);

		/*
		 * Skip TSBs being relocated.  This is important because
		 * we want to avoid the following deadlock scenario:
		 *
		 * 1) when we came in we set ourselves to "in recover" state.
		 * 2) when we try to touch TSB being relocated the mapping
		 *    will be in the suspended state so we'll spin waiting
		 *    for it to be unlocked.
		 * 3) when the CPU that holds the TSB mapping locked tries to
		 *    unlock it it will send a xtrap which will fail to xcall
		 *    us or the CPU we're trying to recover, and will in turn
		 *    enter the mondo code.
		 * 4) since we are still spinning on the locked mapping
		 *    no further progress will be made and the system will
		 *    inevitably hard hang.
		 *
		 * A TSB not being relocated can't begin being relocated
		 * while we're accessing it because we check
		 * sendmondo_in_recover before relocating TSBs.
		 */
		if (hat != ksfmmup &&
		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
			goto next_tsbinfo;
		}

		for (tsbep = (struct tsbe *)tsbp;
		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
			tsbe_tte = tsbep->tte_data;

			if (tsbe_tte.tte_val == 0) {
				/*
				 * Invalid tte
				 */
				continue;
			}
			if (tsbe_tte.tte_se) {
				/*
				 * Don't want device registers
				 */
				continue;
			}
			if (tsbe_tte.tte_cp == 0) {
				/*
				 * Must be cached in E$
				 */
				continue;
			}
			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
			idsr = getidsr();
			if ((idsr & (IDSR_NACK_BIT(bn) |
			    IDSR_BUSY_BIT(bn))) == 0) {
				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
				goto done;
			}
			pahi = tsbe_tte.tte_pahi;
			palo = tsbe_tte.tte_palo;
			paddr = (uint64_t)((pahi << 32) |
			    (palo << MMU_PAGESHIFT));
			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
			    CH_ECACHE_SUBBLK_SIZE);
			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
				shipit(cpuid, bn);
			}
			pages_claimed++;
		}
next_tsbinfo:
		if (tsbinfop != NULL)
			tsbinfop = tsbinfop->tsb_next;
		if (tsbinfop != NULL) {
			tsbp = tsbinfop->tsb_va;
			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
		} else if (tsbp == ktsb_base) {
			tried_kernel_tsb = 1;
		} else if (!tried_kernel_tsb) {
			tsbp = ktsb_base;
			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
			hat = ksfmmup;
			tsbinfop = NULL;
		}
	} while (tsbinfop != NULL ||
			((tsbp == ktsb_base) && !tried_kernel_tsb));

	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
	no_fault();
	idsr = getidsr();
	if ((idsr & (IDSR_NACK_BIT(bn) |
	    IDSR_BUSY_BIT(bn))) == 0) {
		return (1);
	} else {
		return (0);
	}

done:
	no_fault();
	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
	return (1);

badstruct:
	no_fault();
	return (0);
}

/*
 * Attempt to claim ownership, temporarily, of every cache line that a
 * non-responsive cpu might be using.  This might kick that cpu out of
 * this state.
 *
 * The return value indicates to the caller if we have exhausted all recovery
 * techniques. If 1 is returned, it is useless to call this function again
 * even for a different target CPU.
 */
int
mondo_recover(uint16_t cpuid, int bn)
{
	struct memseg *seg;
	uint64_t begin_pa, end_pa, cur_pa;
	hrtime_t begin_hrt, end_hrt;
	int retval = 0;
	int pages_claimed = 0;
	cheetah_livelock_entry_t *histp;
	uint64_t idsr;

	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
		/*
		 * Wait while recovery takes place
		 */
		while (sendmondo_in_recover) {
			drv_usecwait(1);
		}
		/*
		 * Assume we didn't claim the whole memory. If
		 * the target of this caller is not recovered,
		 * it will come back.
		 */
		return (retval);
	}

	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);

	begin_hrt = gethrtime_waitfree();
	/*
	 * First try to claim the lines in the TSB the target
	 * may have been using.
	 */
	if (mondo_recover_proc(cpuid, bn) == 1) {
		/*
		 * Didn't claim the whole memory
		 */
		goto done;
	}

	/*
	 * We tried using the TSB. The target is still
	 * not recovered. Check if complete memory scan is
	 * enabled.
	 */
	if (cheetah_sendmondo_fullscan == 0) {
		/*
		 * Full memory scan is disabled.
		 */
		retval = 1;
		goto done;
	}

	/*
	 * Try claiming the whole memory.
	 */
	for (seg = memsegs; seg; seg = seg->next) {
		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
		for (cur_pa = begin_pa; cur_pa < end_pa;
		    cur_pa += MMU_PAGESIZE) {
			idsr = getidsr();
			if ((idsr & (IDSR_NACK_BIT(bn) |
			    IDSR_BUSY_BIT(bn))) == 0) {
				/*
				 * Didn't claim all memory
				 */
				goto done;
			}
			claimlines(cur_pa, MMU_PAGESIZE,
			    CH_ECACHE_SUBBLK_SIZE);
			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
				shipit(cpuid, bn);
			}
			pages_claimed++;
		}
	}

	/*
	 * We did all we could.
	 */
	retval = 1;

done:
	/*
	 * Update statistics
	 */
	end_hrt = gethrtime_waitfree();
	CHEETAH_LIVELOCK_STAT(recovery);
	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
	    (end_hrt -  begin_hrt));

	while (cas32(&sendmondo_in_recover, 1, 0) != 1);

	return (retval);
}

/*
 * This is called by the cyclic framework when this CPU becomes online
 */
/*ARGSUSED*/
static void
cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
{

	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
	hdlr->cyh_level = CY_LOW_LEVEL;
	hdlr->cyh_arg = NULL;

	/*
	 * Stagger the start time
	 */
	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
	}
	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
}

/*
 * Create a low level cyclic to send a xtrap to the next cpu online.
 * However, there's no need to have this running on a uniprocessor system.
 */
static void
cheetah_nudge_init(void)
{
	cyc_omni_handler_t hdlr;

	if (max_ncpus == 1) {
		return;
	}

	hdlr.cyo_online = cheetah_nudge_onln;
	hdlr.cyo_offline = NULL;
	hdlr.cyo_arg = NULL;

	mutex_enter(&cpu_lock);
	(void) cyclic_add_omni(&hdlr);
	mutex_exit(&cpu_lock);
}

/*
 * Cyclic handler to wake up buddy
 */
void
cheetah_nudge_buddy(void)
{
	/*
	 * Disable kernel preemption to protect the cpu list
	 */
	kpreempt_disable();
	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
		    0, 0);
	}
	kpreempt_enable();
}

#endif	/* CHEETAHPLUS_ERRATUM_25 */

#ifdef SEND_MONDO_STATS
uint32_t x_one_stimes[64];
uint32_t x_one_ltimes[16];
uint32_t x_set_stimes[64];
uint32_t x_set_ltimes[16];
uint32_t x_set_cpus[NCPU];
uint32_t x_nack_stimes[64];
#endif

/*
 * Note: A version of this function is used by the debugger via the KDI,
 * and must be kept in sync with this version.  Any changes made to this
 * function to support new chips or to accomodate errata must also be included
 * in the KDI-specific version.  See us3_kdi.c.
 */
void
send_one_mondo(int cpuid)
{
	int busy, nack;
	uint64_t idsr, starttick, endtick, tick, lasttick;
	uint64_t busymask;
#ifdef	CHEETAHPLUS_ERRATUM_25
	int recovered = 0;
#endif

	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
	starttick = lasttick = gettick();
	shipit(cpuid, 0);
	endtick = starttick + xc_tick_limit;
	busy = nack = 0;
#if defined(JALAPENO) || defined(SERRANO)
	/*
	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
	 * will be used for dispatching interrupt. For now, assume
	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
	 * issues with respect to BUSY/NACK pair usage.
	 */
	busymask  = IDSR_BUSY_BIT(cpuid);
#else /* JALAPENO || SERRANO */
	busymask = IDSR_BUSY;
#endif /* JALAPENO || SERRANO */
	for (;;) {
		idsr = getidsr();
		if (idsr == 0)
			break;

		tick = gettick();
		/*
		 * If there is a big jump between the current tick
		 * count and lasttick, we have probably hit a break
		 * point.  Adjust endtick accordingly to avoid panic.
		 */
		if (tick > (lasttick + xc_tick_jump_limit))
			endtick += (tick - lasttick);
		lasttick = tick;
		if (tick > endtick) {
			if (panic_quiesce)
				return;
#ifdef	CHEETAHPLUS_ERRATUM_25
			if (cheetah_sendmondo_recover && recovered == 0) {
				if (mondo_recover(cpuid, 0)) {
					/*
					 * We claimed the whole memory or
					 * full scan is disabled.
					 */
					recovered++;
				}
				tick = gettick();
				endtick = tick + xc_tick_limit;
				lasttick = tick;
				/*
				 * Recheck idsr
				 */
				continue;
			} else
#endif	/* CHEETAHPLUS_ERRATUM_25 */
			{
				cmn_err(CE_PANIC, "send mondo timeout "
				    "(target 0x%x) [%d NACK %d BUSY]",
				    cpuid, nack, busy);
			}
		}

		if (idsr & busymask) {
			busy++;
			continue;
		}
		drv_usecwait(1);
		shipit(cpuid, 0);
		nack++;
		busy = 0;
	}
#ifdef SEND_MONDO_STATS
	{
		int n = gettick() - starttick;
		if (n < 8192)
			x_one_stimes[n >> 7]++;
		else
			x_one_ltimes[(n >> 13) & 0xf]++;
	}
#endif
}

void
syncfpu(void)
{
}

/*
 * Return processor specific async error structure
 * size used.
 */
int
cpu_aflt_size(void)
{
	return (sizeof (ch_async_flt_t));
}

/*
 * The fast_ecc_err handler transfers control here for UCU, UCC events.
 * Note that we flush Ecache twice, once in the fast_ecc_err handler to
 * flush the error that caused the UCU/UCC, then again here at the end to
 * flush the TL=1 trap handler code out of the Ecache, so we can minimize
 * the probability of getting a TL>1 Fast ECC trap when we're fielding
 * another Fast ECC trap.
 *
 * Cheetah+ also handles: TSCE: No additional processing required.
 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
 *
 * Note that the p_clo_flags input is only valid in cases where the
 * cpu_private struct is not yet initialized (since that is the only
 * time that information cannot be obtained from the logout struct.)
 */
/*ARGSUSED*/
void
cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
{
	ch_cpu_logout_t *clop;
	uint64_t ceen, nceen;

	/*
	 * Get the CPU log out info. If we can't find our CPU private
	 * pointer, then we will have to make due without any detailed
	 * logout information.
	 */
	if (CPU_PRIVATE(CPU) == NULL) {
		clop = NULL;
		ceen = p_clo_flags & EN_REG_CEEN;
		nceen = p_clo_flags & EN_REG_NCEEN;
	} else {
		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
		ceen = clop->clo_flags & EN_REG_CEEN;
		nceen = clop->clo_flags & EN_REG_NCEEN;
	}

	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
}

/*
 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
 * ECC at TL>0.  Need to supply either a error register pointer or a
 * cpu logout structure pointer.
 */
static void
cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
    uint64_t nceen, ch_cpu_logout_t *clop)
{
	struct async_flt *aflt;
	ch_async_flt_t ch_flt;
	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
	char pr_reason[MAX_REASON_STRING];
	ch_cpu_errors_t cpu_error_regs;

	bzero(&ch_flt, sizeof (ch_async_flt_t));
	/*
	 * If no cpu logout data, then we will have to make due without
	 * any detailed logout information.
	 */
	if (clop == NULL) {
		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
		get_cpu_error_state(&cpu_error_regs);
		set_cpu_error_state(&cpu_error_regs);
		t_afar = cpu_error_regs.afar;
		t_afsr = cpu_error_regs.afsr;
		t_afsr_ext = cpu_error_regs.afsr_ext;
#if defined(SERRANO)
		ch_flt.afar2 = cpu_error_regs.afar2;
#endif	/* SERRANO */
	} else {
		t_afar = clop->clo_data.chd_afar;
		t_afsr = clop->clo_data.chd_afsr;
		t_afsr_ext = clop->clo_data.chd_afsr_ext;
#if defined(SERRANO)
		ch_flt.afar2 = clop->clo_data.chd_afar2;
#endif	/* SERRANO */
	}

	/*
	 * In order to simplify code, we maintain this afsr_errs
	 * variable which holds the aggregate of AFSR and AFSR_EXT
	 * sticky bits.
	 */
	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
	    (t_afsr & C_AFSR_ALL_ERRS);
	pr_reason[0] = '\0';

	/* Setup the async fault structure */
	aflt = (struct async_flt *)&ch_flt;
	aflt->flt_id = gethrtime_waitfree();
	ch_flt.afsr_ext = t_afsr_ext;
	ch_flt.afsr_errs = t_afsr_errs;
	aflt->flt_stat = t_afsr;
	aflt->flt_addr = t_afar;
	aflt->flt_bus_id = getprocessorid();
	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_pc = tpc;
	aflt->flt_prot = AFLT_PROT_NONE;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_priv = priv;
	aflt->flt_tl = tl;
	aflt->flt_status = ECC_F_TRAP;
	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);

	/*
	 * XXXX - Phenomenal hack to get around Solaris not getting all the
	 * cmn_err messages out to the console.  The situation is a UCU (in
	 * priv mode) which causes a WDU which causes a UE (on the retry).
	 * The messages for the UCU and WDU are enqueued and then pulled off
	 * the async queue via softint and syslogd starts to process them
	 * but doesn't get them to the console.  The UE causes a panic, but
	 * since the UCU/WDU messages are already in transit, those aren't
	 * on the async queue.  The hack is to check if we have a matching
	 * WDU event for the UCU, and if it matches, we're more than likely
	 * going to panic with a UE, unless we're under protection.  So, we
	 * check to see if we got a matching WDU event and if we're under
	 * protection.
	 *
	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
	 * looks like this:
	 *    UCU->WDU->UE
	 * For Panther, it could look like either of these:
	 *    UCU---->WDU->L3_WDU->UE
	 *    L3_UCU->WDU->L3_WDU->UE
	 */
	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
		get_cpu_error_state(&cpu_error_regs);
		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
		    (cpu_error_regs.afar == t_afar));
		aflt->flt_panic |= ((clop == NULL) &&
		    (t_afsr_errs & C_AFSR_WDU));
	}

	/*
	 * Queue events on the async event queue, one event per error bit.
	 * If no events are queued or no Fast ECC events are on in the AFSR,
	 * queue an event to complain.
	 */
	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
		ch_flt.flt_type = CPU_INV_AFSR;
		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
		    aflt->flt_panic);
	}

	/*
	 * Zero out + invalidate CPU logout.
	 */
	if (clop) {
		bzero(clop, sizeof (ch_cpu_logout_t));
		clop->clo_data.chd_afar = LOGOUT_INVALID;
	}

	/*
	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
	 * or disrupting errors have happened.  We do this because if a
	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
	 * deferred or disrupting error happening between checking the AFSR and
	 * enabling NCEEN/CEEN.
	 *
	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
	 * taken.
	 */
	set_error_enable(get_error_enable() | (nceen | ceen));
	if (clear_errors(&ch_flt)) {
		aflt->flt_panic |= ((ch_flt.afsr_errs &
		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
		    NULL);
	}

	/*
	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
	 * be logged as part of the panic flow.
	 */
	if (aflt->flt_panic)
		fm_panic("%sError(s)", pr_reason);

	/*
	 * Flushing the Ecache here gets the part of the trap handler that
	 * is run at TL=1 out of the Ecache.
	 */
	cpu_flush_ecache();
}

/*
 * This is called via sys_trap from pil15_interrupt code if the
 * corresponding entry in ch_err_tl1_pending is set.  Checks the
 * various ch_err_tl1_data structures for valid entries based on the bit
 * settings in the ch_err_tl1_flags entry of the structure.
 */
/*ARGSUSED*/
void
cpu_tl1_error(struct regs *rp, int panic)
{
	ch_err_tl1_data_t *cl1p, cl1;
	int i, ncl1ps;
	uint64_t me_flags;
	uint64_t ceen, nceen;

	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
		cl1p = &ch_err_tl1_data;
		ncl1ps = 1;
	} else if (CPU_PRIVATE(CPU) != NULL) {
		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
		ncl1ps = CH_ERR_TL1_TLMAX;
	} else {
		ncl1ps = 0;
	}

	for (i = 0; i < ncl1ps; i++, cl1p++) {
		if (cl1p->ch_err_tl1_flags == 0)
			continue;

		/*
		 * Grab a copy of the logout data and invalidate
		 * the logout area.
		 */
		cl1 = *cl1p;
		bzero(cl1p, sizeof (ch_err_tl1_data_t));
		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);

		/*
		 * Log "first error" in ch_err_tl1_data.
		 */
		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
			ceen = get_error_enable() & EN_REG_CEEN;
			nceen = get_error_enable() & EN_REG_NCEEN;
			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
		}
#if defined(CPU_IMP_L1_CACHE_PARITY)
		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
			    (caddr_t)cl1.ch_err_tl1_tpc);
		}
#endif	/* CPU_IMP_L1_CACHE_PARITY */

		/*
		 * Log "multiple events" in ch_err_tl1_data.  Note that
		 * we don't read and clear the AFSR/AFAR in the TL>0 code
		 * if the structure is busy, we just do the cache flushing
		 * we have to do and then do the retry.  So the AFSR/AFAR
		 * at this point *should* have some relevant info.  If there
		 * are no valid errors in the AFSR, we'll assume they've
		 * already been picked up and logged.  For I$/D$ parity,
		 * we just log an event with an "Unknown" (NULL) TPC.
		 */
		if (me_flags & CH_ERR_FECC) {
			ch_cpu_errors_t cpu_error_regs;
			uint64_t t_afsr_errs;

			/*
			 * Get the error registers and see if there's
			 * a pending error.  If not, don't bother
			 * generating an "Invalid AFSR" error event.
			 */
			get_cpu_error_state(&cpu_error_regs);
			t_afsr_errs = (cpu_error_regs.afsr_ext &
			    C_AFSR_EXT_ALL_ERRS) |
			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
			if (t_afsr_errs != 0) {
				ceen = get_error_enable() & EN_REG_CEEN;
				nceen = get_error_enable() & EN_REG_NCEEN;
				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
				    1, ceen, nceen, NULL);
			}
		}
#if defined(CPU_IMP_L1_CACHE_PARITY)
		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
		}
#endif	/* CPU_IMP_L1_CACHE_PARITY */
	}
}

/*
 * Called from Fast ECC TL>0 handler in case of fatal error.
 * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
 * but if we don't, we'll panic with something reasonable.
 */
/*ARGSUSED*/
void
cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
{
	cpu_tl1_error(rp, 1);
	/*
	 * Should never return, but just in case.
	 */
	fm_panic("Unsurvivable ECC Error at TL>0");
}

/*
 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
 *
 * Cheetah+ also handles (No additional processing required):
 *    DUE, DTO, DBERR	(NCEEN controlled)
 *    THCE		(CEEN and ET_ECC_en controlled)
 *    TUE		(ET_ECC_en controlled)
 *
 * Panther further adds:
 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
 *    THCE			(CEEN and L2_tag_ECC_en controlled)
 *    L3_THCE			(CEEN and ET_ECC_en controlled)
 *
 * Note that the p_clo_flags input is only valid in cases where the
 * cpu_private struct is not yet initialized (since that is the only
 * time that information cannot be obtained from the logout struct.)
 */
/*ARGSUSED*/
void
cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
{
	struct async_flt *aflt;
	ch_async_flt_t ch_flt;
	char pr_reason[MAX_REASON_STRING];
	ch_cpu_logout_t *clop;
	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
	ch_cpu_errors_t cpu_error_regs;

	bzero(&ch_flt, sizeof (ch_async_flt_t));
	/*
	 * Get the CPU log out info. If we can't find our CPU private
	 * pointer, then we will have to make due without any detailed
	 * logout information.
	 */
	if (CPU_PRIVATE(CPU) == NULL) {
		clop = NULL;
		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
		get_cpu_error_state(&cpu_error_regs);
		set_cpu_error_state(&cpu_error_regs);
		t_afar = cpu_error_regs.afar;
		t_afsr = cpu_error_regs.afsr;
		t_afsr_ext = cpu_error_regs.afsr_ext;
#if defined(SERRANO)
		ch_flt.afar2 = cpu_error_regs.afar2;
#endif	/* SERRANO */
	} else {
		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
		t_afar = clop->clo_data.chd_afar;
		t_afsr = clop->clo_data.chd_afsr;
		t_afsr_ext = clop->clo_data.chd_afsr_ext;
#if defined(SERRANO)
		ch_flt.afar2 = clop->clo_data.chd_afar2;
#endif	/* SERRANO */
	}

	/*
	 * In order to simplify code, we maintain this afsr_errs
	 * variable which holds the aggregate of AFSR and AFSR_EXT
	 * sticky bits.
	 */
	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
	    (t_afsr & C_AFSR_ALL_ERRS);

	pr_reason[0] = '\0';
	/* Setup the async fault structure */
	aflt = (struct async_flt *)&ch_flt;
	ch_flt.afsr_ext = t_afsr_ext;
	ch_flt.afsr_errs = t_afsr_errs;
	aflt->flt_stat = t_afsr;
	aflt->flt_addr = t_afar;
	aflt->flt_pc = (caddr_t)rp->r_pc;
	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
	aflt->flt_tl = 0;
	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);

	/*
	 * If this trap is a result of one of the errors not masked
	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
	 * indicate that a timeout is to be set later.
	 */
	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
	    !aflt->flt_panic)
		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
	else
		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;

	/*
	 * log the CE and clean up
	 */
	cpu_log_and_clear_ce(&ch_flt);

	/*
	 * We re-enable CEEN (if required) and check if any disrupting errors
	 * have happened.  We do this because if a disrupting error had occurred
	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
	 * we enable CEEN *before* checking the AFSR to avoid the small window
	 * of a error happening between checking the AFSR and enabling CEEN.
	 */
	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
	    set_error_enable(get_error_enable() | EN_REG_CEEN);
	if (clear_errors(&ch_flt)) {
		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
		    NULL);
	}

	/*
	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
	 * be logged as part of the panic flow.
	 */
	if (aflt->flt_panic)
		fm_panic("%sError(s)", pr_reason);
}

/*
 * The async_err handler transfers control here for UE, EMU, EDU:BLD,
 * L3_EDU:BLD, TO, and BERR events.
 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
 *
 * Cheetah+: No additional errors handled.
 *
 * Note that the p_clo_flags input is only valid in cases where the
 * cpu_private struct is not yet initialized (since that is the only
 * time that information cannot be obtained from the logout struct.)
 */
/*ARGSUSED*/
void
cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
{
	ushort_t ttype, tl;
	ch_async_flt_t ch_flt;
	struct async_flt *aflt;
	int trampolined = 0;
	char pr_reason[MAX_REASON_STRING];
	ch_cpu_logout_t *clop;
	uint64_t ceen, clo_flags;
	uint64_t log_afsr;
	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
	ch_cpu_errors_t cpu_error_regs;
	int expected = DDI_FM_ERR_UNEXPECTED;
	ddi_acc_hdl_t *hp;

	/*
	 * We need to look at p_flag to determine if the thread detected an
	 * error while dumping core.  We can't grab p_lock here, but it's ok
	 * because we just need a consistent snapshot and we know that everyone
	 * else will store a consistent set of bits while holding p_lock.  We
	 * don't have to worry about a race because SDOCORE is set once prior
	 * to doing i/o from the process's address space and is never cleared.
	 */
	uint_t pflag = ttoproc(curthread)->p_flag;

	bzero(&ch_flt, sizeof (ch_async_flt_t));
	/*
	 * Get the CPU log out info. If we can't find our CPU private
	 * pointer then we will have to make due without any detailed
	 * logout information.
	 */
	if (CPU_PRIVATE(CPU) == NULL) {
		clop = NULL;
		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
		get_cpu_error_state(&cpu_error_regs);
		set_cpu_error_state(&cpu_error_regs);
		t_afar = cpu_error_regs.afar;
		t_afsr = cpu_error_regs.afsr;
		t_afsr_ext = cpu_error_regs.afsr_ext;
#if defined(SERRANO)
		ch_flt.afar2 = cpu_error_regs.afar2;
#endif	/* SERRANO */
		clo_flags = p_clo_flags;
	} else {
		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
		t_afar = clop->clo_data.chd_afar;
		t_afsr = clop->clo_data.chd_afsr;
		t_afsr_ext = clop->clo_data.chd_afsr_ext;
#if defined(SERRANO)
		ch_flt.afar2 = clop->clo_data.chd_afar2;
#endif	/* SERRANO */
		clo_flags = clop->clo_flags;
	}

	/*
	 * In order to simplify code, we maintain this afsr_errs
	 * variable which holds the aggregate of AFSR and AFSR_EXT
	 * sticky bits.
	 */
	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
	    (t_afsr & C_AFSR_ALL_ERRS);
	pr_reason[0] = '\0';

	/*
	 * Grab information encoded into our clo_flags field.
	 */
	ceen = clo_flags & EN_REG_CEEN;
	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;

	/*
	 * handle the specific error
	 */
	aflt = (struct async_flt *)&ch_flt;
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_bus_id = getprocessorid();
	aflt->flt_inst = CPU->cpu_id;
	ch_flt.afsr_ext = t_afsr_ext;
	ch_flt.afsr_errs = t_afsr_errs;
	aflt->flt_stat = t_afsr;
	aflt->flt_addr = t_afar;
	aflt->flt_pc = (caddr_t)rp->r_pc;
	aflt->flt_prot = AFLT_PROT_NONE;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
	aflt->flt_tl = (uchar_t)tl;
	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
	    C_AFSR_PANIC(t_afsr_errs));
	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);

	/*
	 * If the trap occurred in privileged mode at TL=0, we need to check to
	 * see if we were executing in the kernel under on_trap() or t_lofault
	 * protection.  If so, modify the saved registers so that we return
	 * from the trap to the appropriate trampoline routine.
	 */
	if (aflt->flt_priv && tl == 0) {
		if (curthread->t_ontrap != NULL) {
			on_trap_data_t *otp = curthread->t_ontrap;

			if (otp->ot_prot & OT_DATA_EC) {
				aflt->flt_prot = AFLT_PROT_EC;
				otp->ot_trap |= OT_DATA_EC;
				rp->r_pc = otp->ot_trampoline;
				rp->r_npc = rp->r_pc + 4;
				trampolined = 1;
			}

			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
			    (otp->ot_prot & OT_DATA_ACCESS)) {
				aflt->flt_prot = AFLT_PROT_ACCESS;
				otp->ot_trap |= OT_DATA_ACCESS;
				rp->r_pc = otp->ot_trampoline;
				rp->r_npc = rp->r_pc + 4;
				trampolined = 1;
				/*
				 * for peeks and caut_gets errors are expected
				 */
				hp = (ddi_acc_hdl_t *)otp->ot_handle;
				if (!hp)
					expected = DDI_FM_ERR_PEEK;
				else if (hp->ah_acc.devacc_attr_access ==
				    DDI_CAUTIOUS_ACC)
					expected = DDI_FM_ERR_EXPECTED;
			}

		} else if (curthread->t_lofault) {
			aflt->flt_prot = AFLT_PROT_COPY;
			rp->r_g1 = EFAULT;
			rp->r_pc = curthread->t_lofault;
			rp->r_npc = rp->r_pc + 4;
			trampolined = 1;
		}
	}

	/*
	 * If we're in user mode or we're doing a protected copy, we either
	 * want the ASTON code below to send a signal to the user process
	 * or we want to panic if aft_panic is set.
	 *
	 * If we're in privileged mode and we're not doing a copy, then we
	 * need to check if we've trampolined.  If we haven't trampolined,
	 * we should panic.
	 */
	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
		if (t_afsr_errs &
		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
		    ~(C_AFSR_BERR | C_AFSR_TO)))
			aflt->flt_panic |= aft_panic;
	} else if (!trampolined) {
			aflt->flt_panic = 1;
	}

	/*
	 * If we've trampolined due to a privileged TO or BERR, or if an
	 * unprivileged TO or BERR occurred, we don't want to enqueue an
	 * event for that TO or BERR.  Queue all other events (if any) besides
	 * the TO/BERR.  Since we may not be enqueing any events, we need to
	 * ignore the number of events queued.  If we haven't trampolined due
	 * to a TO or BERR, just enqueue events normally.
	 */
	log_afsr = t_afsr_errs;
	if (trampolined) {
		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
	} else if (!aflt->flt_priv) {
		/*
		 * User mode, suppress messages if
		 * cpu_berr_to_verbose is not set.
		 */
		if (!cpu_berr_to_verbose)
			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
	}

	/*
	 * Log any errors that occurred
	 */
	if (((log_afsr &
		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
		(t_afsr_errs &
		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
		ch_flt.flt_type = CPU_INV_AFSR;
		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
		    aflt->flt_panic);
	}

	/*
	 * Zero out + invalidate CPU logout.
	 */
	if (clop) {
		bzero(clop, sizeof (ch_cpu_logout_t));
		clop->clo_data.chd_afar = LOGOUT_INVALID;
	}

#if defined(JALAPENO) || defined(SERRANO)
	/*
	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
	 * IO errors that may have resulted in this trap.
	 */
	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
		cpu_run_bus_error_handlers(aflt, expected);
	}

	/*
	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
	 * line from the Ecache.  We also need to query the bus nexus for
	 * fatal errors.  Attempts to do diagnostic read on caches may
	 * introduce more errors (especially when the module is bad).
	 */
	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
		/*
		 * Ask our bus nexus friends if they have any fatal errors.  If
		 * so, they will log appropriate error messages.
		 */
		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
			aflt->flt_panic = 1;

		/*
		 * We got a UE or RUE and are panicking, save the fault PA in
		 * a known location so that the platform specific panic code
		 * can check for copyback errors.
		 */
		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
			panic_aflt = *aflt;
		}
	}

	/*
	 * Flush Ecache line or entire Ecache
	 */
	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
		cpu_error_ecache_flush(&ch_flt);
#else /* JALAPENO || SERRANO */
	/*
	 * UE/BERR/TO: Call our bus nexus friends to check for
	 * IO errors that may have resulted in this trap.
	 */
	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
		cpu_run_bus_error_handlers(aflt, expected);
	}

	/*
	 * UE: If the UE is in memory, we need to flush the bad
	 * line from the Ecache.  We also need to query the bus nexus for
	 * fatal errors.  Attempts to do diagnostic read on caches may
	 * introduce more errors (especially when the module is bad).
	 */
	if (t_afsr & C_AFSR_UE) {
		/*
		 * Ask our legacy bus nexus friends if they have any fatal
		 * errors.  If so, they will log appropriate error messages.
		 */
		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
			aflt->flt_panic = 1;

		/*
		 * We got a UE and are panicking, save the fault PA in a known
		 * location so that the platform specific panic code can check
		 * for copyback errors.
		 */
		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
			panic_aflt = *aflt;
		}
	}

	/*
	 * Flush Ecache line or entire Ecache
	 */
	if (t_afsr_errs &
	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
		cpu_error_ecache_flush(&ch_flt);
#endif /* JALAPENO || SERRANO */

	/*
	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
	 * or disrupting errors have happened.  We do this because if a
	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
	 * deferred or disrupting error happening between checking the AFSR and
	 * enabling NCEEN/CEEN.
	 *
	 * Note: CEEN reenabled only if it was on when trap taken.
	 */
	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
	if (clear_errors(&ch_flt)) {
		/*
		 * Check for secondary errors, and avoid panicking if we
		 * have them
		 */
		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
		    t_afar) == 0) {
			aflt->flt_panic |= ((ch_flt.afsr_errs &
			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
		}
		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
		    NULL);
	}

	/*
	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
	 * be logged as part of the panic flow.
	 */
	if (aflt->flt_panic)
		fm_panic("%sError(s)", pr_reason);

	/*
	 * If we queued an error and we are going to return from the trap and
	 * the error was in user mode or inside of a copy routine, set AST flag
	 * so the queue will be drained before returning to user mode.  The
	 * AST processing will also act on our failure policy.
	 */
	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
		int pcb_flag = 0;

		if (t_afsr_errs &
		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
		    ~(C_AFSR_BERR | C_AFSR_TO)))
			pcb_flag |= ASYNC_HWERR;

		if (t_afsr & C_AFSR_BERR)
			pcb_flag |= ASYNC_BERR;

		if (t_afsr & C_AFSR_TO)
			pcb_flag |= ASYNC_BTO;

		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
		aston(curthread);
	}
}

#if defined(CPU_IMP_L1_CACHE_PARITY)
/*
 * Handling of data and instruction parity errors (traps 0x71, 0x72).
 *
 * For Panther, P$ data parity errors during floating point load hits
 * are also detected (reported as TT 0x71) and handled by this trap
 * handler.
 *
 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
 * is available.
 */
/*ARGSUSED*/
void
cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
{
	ch_async_flt_t ch_flt;
	struct async_flt *aflt;
	uchar_t tl = ((flags & CH_ERR_TL) != 0);
	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
	char *error_class;

	/*
	 * Log the error.
	 * For icache parity errors the fault address is the trap PC.
	 * For dcache/pcache parity errors the instruction would have to
	 * be decoded to determine the address and that isn't possible
	 * at high PIL.
	 */
	bzero(&ch_flt, sizeof (ch_async_flt_t));
	aflt = (struct async_flt *)&ch_flt;
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_bus_id = getprocessorid();
	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_pc = tpc;
	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
	aflt->flt_prot = AFLT_PROT_NONE;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
	aflt->flt_tl = tl;
	aflt->flt_panic = panic;
	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;

	if (iparity) {
		cpu_icache_parity_info(&ch_flt);
		if (ch_flt.parity_data.ipe.cpl_off != -1)
			error_class = FM_EREPORT_CPU_USIII_IDSPE;
		else if (ch_flt.parity_data.ipe.cpl_way != -1)
			error_class = FM_EREPORT_CPU_USIII_ITSPE;
		else
			error_class = FM_EREPORT_CPU_USIII_IPE;
		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
	} else {
		cpu_dcache_parity_info(&ch_flt);
		if (ch_flt.parity_data.dpe.cpl_off != -1)
			error_class = FM_EREPORT_CPU_USIII_DDSPE;
		else if (ch_flt.parity_data.dpe.cpl_way != -1)
			error_class = FM_EREPORT_CPU_USIII_DTSPE;
		else
			error_class = FM_EREPORT_CPU_USIII_DPE;
		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
		/*
		 * For panther we also need to check the P$ for parity errors.
		 */
		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
			cpu_pcache_parity_info(&ch_flt);
			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
				error_class = FM_EREPORT_CPU_USIII_PDSPE;
				aflt->flt_payload =
				    FM_EREPORT_PAYLOAD_PCACHE_PE;
			}
		}
	}

	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);

	if (iparity) {
		/*
		 * Invalidate entire I$.
		 * This is required due to the use of diagnostic ASI
		 * accesses that may result in a loss of I$ coherency.
		 */
		if (cache_boot_state & DCU_IC) {
			flush_icache();
		}
		/*
		 * According to section P.3.1 of the Panther PRM, we
		 * need to do a little more for recovery on those
		 * CPUs after encountering an I$ parity error.
		 */
		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
			flush_ipb();
			correct_dcache_parity(dcache_size,
			    dcache_linesize);
			flush_pcache();
		}
	} else {
		/*
		 * Since the valid bit is ignored when checking parity the
		 * D$ data and tag must also be corrected.  Set D$ data bits
		 * to zero and set utag to 0, 1, 2, 3.
		 */
		correct_dcache_parity(dcache_size, dcache_linesize);

		/*
		 * According to section P.3.3 of the Panther PRM, we
		 * need to do a little more for recovery on those
		 * CPUs after encountering a D$ or P$ parity error.
		 *
		 * As far as clearing P$ parity errors, it is enough to
		 * simply invalidate all entries in the P$ since P$ parity
		 * error traps are only generated for floating point load
		 * hits.
		 */
		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
			flush_icache();
			flush_ipb();
			flush_pcache();
		}
	}

	/*
	 * Invalidate entire D$ if it was enabled.
	 * This is done to avoid stale data in the D$ which might
	 * occur with the D$ disabled and the trap handler doing
	 * stores affecting lines already in the D$.
	 */
	if (cache_boot_state & DCU_DC) {
		flush_dcache();
	}

	/*
	 * Restore caches to their bootup state.
	 */
	set_dcu(get_dcu() | cache_boot_state);

	/*
	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
	 * be logged as part of the panic flow.
	 */
	if (aflt->flt_panic)
		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");

	/*
	 * If this error occurred at TL>0 then flush the E$ here to reduce
	 * the chance of getting an unrecoverable Fast ECC error.  This
	 * flush will evict the part of the parity trap handler that is run
	 * at TL>1.
	 */
	if (tl) {
		cpu_flush_ecache();
	}
}

/*
 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
 * to indicate which portions of the captured data should be in the ereport.
 */
void
cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
{
	int way = ch_flt->parity_data.ipe.cpl_way;
	int offset = ch_flt->parity_data.ipe.cpl_off;
	int tag_index;
	struct async_flt *aflt = (struct async_flt *)ch_flt;


	if ((offset != -1) || (way != -1)) {
		/*
		 * Parity error in I$ tag or data
		 */
		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
			    PN_ICIDX_TO_WAY(tag_index);
		else
			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
			    CH_ICIDX_TO_WAY(tag_index);
		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
		    IC_LOGFLAG_MAGIC;
	} else {
		/*
		 * Parity error was not identified.
		 * Log tags and data for all ways.
		 */
		for (way = 0; way < CH_ICACHE_NWAY; way++) {
			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
				    PN_ICIDX_TO_WAY(tag_index);
			else
				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
				    CH_ICIDX_TO_WAY(tag_index);
			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
			    IC_LOGFLAG_MAGIC;
		}
	}
}

/*
 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
 * to indicate which portions of the captured data should be in the ereport.
 */
void
cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
{
	int way = ch_flt->parity_data.dpe.cpl_way;
	int offset = ch_flt->parity_data.dpe.cpl_off;
	int tag_index;

	if (offset != -1) {
		/*
		 * Parity error in D$ or P$ data array.
		 *
		 * First check to see whether the parity error is in D$ or P$
		 * since P$ data parity errors are reported in Panther using
		 * the same trap.
		 */
		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
			    CH_PCIDX_TO_WAY(tag_index);
			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
			    PC_LOGFLAG_MAGIC;
		} else {
			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
			    CH_DCIDX_TO_WAY(tag_index);
			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
			    DC_LOGFLAG_MAGIC;
		}
	} else if (way != -1) {
		/*
		 * Parity error in D$ tag.
		 */
		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
		    CH_DCIDX_TO_WAY(tag_index);
		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
		    DC_LOGFLAG_MAGIC;
	}
}
#endif	/* CPU_IMP_L1_CACHE_PARITY */

/*
 * The cpu_async_log_err() function is called via the [uc]e_drain() function to
 * post-process CPU events that are dequeued.  As such, it can be invoked
 * from softint context, from AST processing in the trap() flow, or from the
 * panic flow.  We decode the CPU-specific data, and take appropriate actions.
 * Historically this entry point was used to log the actual cmn_err(9F) text;
 * now with FMA it is used to prepare 'flt' to be converted into an ereport.
 * With FMA this function now also returns a flag which indicates to the
 * caller whether the ereport should be posted (1) or suppressed (0).
 */
static int
cpu_async_log_err(void *flt, errorq_elem_t *eqep)
{
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
	struct async_flt *aflt = (struct async_flt *)flt;
	uint64_t errors;

	switch (ch_flt->flt_type) {
	case CPU_INV_AFSR:
		/*
		 * If it is a disrupting trap and the AFSR is zero, then
		 * the event has probably already been noted. Do not post
		 * an ereport.
		 */
		if ((aflt->flt_status & ECC_C_TRAP) &&
		    (!(aflt->flt_stat & C_AFSR_MASK)))
			return (0);
		else
			return (1);
	case CPU_TO:
	case CPU_BERR:
	case CPU_FATAL:
	case CPU_FPUERR:
		return (1);

	case CPU_UE_ECACHE_RETIRE:
		cpu_log_err(aflt);
		cpu_page_retire(ch_flt);
		return (1);

	/*
	 * Cases where we may want to suppress logging or perform
	 * extended diagnostics.
	 */
	case CPU_CE:
	case CPU_EMC:
		/*
		 * We want to skip logging and further classification
		 * only if ALL the following conditions are true:
		 *
		 *	1. There is only one error
		 *	2. That error is a correctable memory error
		 *	3. The error is caused by the memory scrubber (in
		 *	   which case the error will have occurred under
		 *	   on_trap protection)
		 *	4. The error is on a retired page
		 *
		 * Note: AFLT_PROT_EC is used places other than the memory
		 * scrubber.  However, none of those errors should occur
		 * on a retired page.
		 */
		if ((ch_flt->afsr_errs &
		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
		    aflt->flt_prot == AFLT_PROT_EC) {

			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {

				/*
				 * Since we're skipping logging, we'll need
				 * to schedule the re-enabling of CEEN
				 */
				(void) timeout(cpu_delayed_check_ce_errors,
				    (void *)(uintptr_t)aflt->flt_inst,
				    drv_usectohz((clock_t)cpu_ceen_delay_secs
						 * MICROSEC));
			    }
			    return (0);
			}
		}

		/*
		 * Perform/schedule further classification actions, but
		 * only if the page is healthy (we don't want bad
		 * pages inducing too much diagnostic activity).  If we could
		 * not find a page pointer then we also skip this.  If
		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
		 * to copy and recirculate the event (for further diagnostics)
		 * and we should not proceed to log it here.
		 *
		 * This must be the last step here before the cpu_log_err()
		 * below - if an event recirculates cpu_ce_log_err() will
		 * not call the current function but just proceed directly
		 * to cpu_ereport_post after the cpu_log_err() avoided below.
		 *
		 * Note: Check cpu_impl_async_log_err if changing this
		 */
		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
			    CE_XDIAG_SKIP_NOPP);
		} else {
			if (errors != PR_OK) {
				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
				    CE_XDIAG_SKIP_PAGEDET);
			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
				return (0);
			}
		}
		/*FALLTHRU*/

	/*
	 * Cases where we just want to report the error and continue.
	 */
	case CPU_CE_ECACHE:
	case CPU_UE_ECACHE:
	case CPU_IV:
	case CPU_ORPH:
		cpu_log_err(aflt);
		return (1);

	/*
	 * Cases where we want to fall through to handle panicking.
	 */
	case CPU_UE:
		/*
		 * We want to skip logging in the same conditions as the
		 * CE case.  In addition, we want to make sure we're not
		 * panicking.
		 */
		if (!panicstr && (ch_flt->afsr_errs &
		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
		    aflt->flt_prot == AFLT_PROT_EC) {
			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
				/* Zero the address to clear the error */
				softcall(ecc_page_zero, (void *)aflt->flt_addr);
				return (0);
			}
		}
		cpu_log_err(aflt);
		break;

	default:
		/*
		 * If the us3_common.c code doesn't know the flt_type, it may
		 * be an implementation-specific code.  Call into the impldep
		 * backend to find out what to do: if it tells us to continue,
		 * break and handle as if falling through from a UE; if not,
		 * the impldep backend has handled the error and we're done.
		 */
		switch (cpu_impl_async_log_err(flt, eqep)) {
		case CH_ASYNC_LOG_DONE:
			return (1);
		case CH_ASYNC_LOG_RECIRC:
			return (0);
		case CH_ASYNC_LOG_CONTINUE:
			break; /* continue on to handle UE-like error */
		default:
			cmn_err(CE_WARN, "discarding error 0x%p with "
			    "invalid fault type (0x%x)",
			    (void *)aflt, ch_flt->flt_type);
			return (0);
		}
	}

	/* ... fall through from the UE case */

	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
		if (!panicstr) {
			cpu_page_retire(ch_flt);
		} else {
			/*
			 * Clear UEs on panic so that we don't
			 * get haunted by them during panic or
			 * after reboot
			 */
			cpu_clearphys(aflt);
			(void) clear_errors(NULL);
		}
	}

	return (1);
}

/*
 * Retire the bad page that may contain the flushed error.
 */
void
cpu_page_retire(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	(void) page_retire(aflt->flt_addr, PR_UE);
}

/*
 * The cpu_log_err() function is called by cpu_async_log_err() to perform the
 * generic event post-processing for correctable and uncorrectable memory,
 * E$, and MTag errors.  Historically this entry point was used to log bits of
 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
 * converted into an ereport.  In addition, it transmits the error to any
 * platform-specific service-processor FRU logging routines, if available.
 */
void
cpu_log_err(struct async_flt *aflt)
{
	char unum[UNUM_NAMLEN];
	int len = 0;
	int synd_status, synd_code, afar_status;
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;

	/*
	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
	 * For Panther, L2$ is not external, so we don't want to
	 * generate an E$ unum for those errors.
	 */
	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
			aflt->flt_status |= ECC_ECACHE;
	} else {
		if (ch_flt->flt_bit & C_AFSR_ECACHE)
			aflt->flt_status |= ECC_ECACHE;
	}

	/*
	 * Determine syndrome status.
	 */
	synd_status = afsr_to_synd_status(aflt->flt_inst,
	    ch_flt->afsr_errs, ch_flt->flt_bit);

	/*
	 * Determine afar status.
	 */
	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
				ch_flt->flt_bit);
	else
		afar_status = AFLT_STAT_INVALID;

	/*
	 * If afar status is not invalid do a unum lookup.
	 */
	if (afar_status != AFLT_STAT_INVALID) {
		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
			UNUM_NAMLEN, &len);
	} else {
		unum[0] = '\0';
	}

	synd_code = synd_to_synd_code(synd_status,
	    aflt->flt_synd, ch_flt->flt_bit);

	/*
	 * Do not send the fruid message (plat_ecc_error_data_t)
	 * to the SC if it can handle the enhanced error information
	 * (plat_ecc_error2_data_t) or when the tunable
	 * ecc_log_fruid_enable is set to 0.
	 */

	if (&plat_ecc_capability_sc_get &&
	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
		if (&plat_log_fruid_error)
			plat_log_fruid_error(synd_code, aflt, unum,
			    ch_flt->flt_bit);
	}

	if (aflt->flt_func != NULL)
		aflt->flt_func(aflt, unum);

	if (afar_status != AFLT_STAT_INVALID)
		cpu_log_diag_info(ch_flt);

	/*
	 * If we have a CEEN error , we do not reenable CEEN until after
	 * we exit the trap handler. Otherwise, another error may
	 * occur causing the handler to be entered recursively.
	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
	 * to try and ensure that the CPU makes progress in the face
	 * of a CE storm.
	 */
	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
		(void) timeout(cpu_delayed_check_ce_errors,
		    (void *)(uintptr_t)aflt->flt_inst,
		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
	}
}

/*
 * Invoked by error_init() early in startup and therefore before
 * startup_errorq() is called to drain any error Q -
 *
 * startup()
 *   startup_end()
 *     error_init()
 *       cpu_error_init()
 * errorq_init()
 *   errorq_drain()
 * start_other_cpus()
 *
 * The purpose of this routine is to create error-related taskqs.  Taskqs
 * are used for this purpose because cpu_lock can't be grabbed from interrupt
 * context.
 */
void
cpu_error_init(int items)
{
	/*
	 * Create taskq(s) to reenable CE
	 */
	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
	    items, items, TASKQ_PREPOPULATE);
}

void
cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
{
	char unum[UNUM_NAMLEN];
	int len;

	switch (aflt->flt_class) {
	case CPU_FAULT:
		cpu_ereport_init(aflt);
		if (cpu_async_log_err(aflt, eqep))
			cpu_ereport_post(aflt);
		break;

	case BUS_FAULT:
		if (aflt->flt_func != NULL) {
			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
			    unum, UNUM_NAMLEN, &len);
			aflt->flt_func(aflt, unum);
		}
		break;

	case RECIRC_CPU_FAULT:
		aflt->flt_class = CPU_FAULT;
		cpu_log_err(aflt);
		cpu_ereport_post(aflt);
		break;

	case RECIRC_BUS_FAULT:
		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
		/*FALLTHRU*/
	default:
		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
		return;
	}
}

/*
 * Scrub and classify a CE.  This function must not modify the
 * fault structure passed to it but instead should return the classification
 * information.
 */

static uchar_t
cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
{
	uchar_t disp = CE_XDIAG_EXTALG;
	on_trap_data_t otd;
	uint64_t orig_err;
	ch_cpu_logout_t *clop;

	/*
	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
	 * this, but our other callers have not.  Disable preemption to
	 * avoid CPU migration so that we restore CEEN on the correct
	 * cpu later.
	 *
	 * CEEN is cleared so that further CEs that our instruction and
	 * data footprint induce do not cause use to either creep down
	 * kernel stack to the point of overflow, or do so much CE
	 * notification as to make little real forward progress.
	 *
	 * NCEEN must not be cleared.  However it is possible that
	 * our accesses to the flt_addr may provoke a bus error or timeout
	 * if the offending address has just been unconfigured as part of
	 * a DR action.  So we must operate under on_trap protection.
	 */
	kpreempt_disable();
	orig_err = get_error_enable();
	if (orig_err & EN_REG_CEEN)
	    set_error_enable(orig_err & ~EN_REG_CEEN);

	/*
	 * Our classification algorithm includes the line state before
	 * the scrub; we'd like this captured after the detection and
	 * before the algorithm below - the earlier the better.
	 *
	 * If we've come from a cpu CE trap then this info already exists
	 * in the cpu logout area.
	 *
	 * For a CE detected by memscrub for which there was no trap
	 * (running with CEEN off) cpu_log_and_clear_ce has called
	 * cpu_ce_delayed_ec_logout to capture some cache data, and
	 * marked the fault structure as incomplete as a flag to later
	 * logging code.
	 *
	 * If called directly from an IO detected CE there has been
	 * no line data capture.  In this case we logout to the cpu logout
	 * area - that's appropriate since it's the cpu cache data we need
	 * for classification.  We thus borrow the cpu logout area for a
	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
	 * this time (we will invalidate it again below).
	 *
	 * If called from the partner check xcall handler then this cpu
	 * (the partner) has not necessarily experienced a CE at this
	 * address.  But we want to capture line state before its scrub
	 * attempt since we use that in our classification.
	 */
	if (logout_tried == B_FALSE) {
		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
			disp |= CE_XDIAG_NOLOGOUT;
	}

	/*
	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
	 * no longer be valid (if DR'd since the initial event) so we
	 * perform this scrub under on_trap protection.  If this access is
	 * ok then further accesses below will also be ok - DR cannot
	 * proceed while this thread is active (preemption is disabled);
	 * to be safe we'll nonetheless use on_trap again below.
	 */
	if (!on_trap(&otd, OT_DATA_ACCESS)) {
		cpu_scrubphys(ecc);
	} else {
		no_trap();
		if (orig_err & EN_REG_CEEN)
		    set_error_enable(orig_err);
		kpreempt_enable();
		return (disp);
	}
	no_trap();

	/*
	 * Did the casx read of the scrub log a CE that matches the AFAR?
	 * Note that it's quite possible that the read sourced the data from
	 * another cpu.
	 */
	if (clear_ecc(ecc))
		disp |= CE_XDIAG_CE1;

	/*
	 * Read the data again.  This time the read is very likely to
	 * come from memory since the scrub induced a writeback to memory.
	 */
	if (!on_trap(&otd, OT_DATA_ACCESS)) {
		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
	} else {
		no_trap();
		if (orig_err & EN_REG_CEEN)
		    set_error_enable(orig_err);
		kpreempt_enable();
		return (disp);
	}
	no_trap();

	/* Did that read induce a CE that matches the AFAR? */
	if (clear_ecc(ecc))
		disp |= CE_XDIAG_CE2;

	/*
	 * Look at the logout information and record whether we found the
	 * line in l2/l3 cache.  For Panther we are interested in whether
	 * we found it in either cache (it won't reside in both but
	 * it is possible to read it that way given the moving target).
	 */
	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
		int hit, level;
		int state;
		int totalsize;
		ch_ec_data_t *ecp;

		/*
		 * If hit is nonzero then a match was found and hit will
		 * be one greater than the index which hit.  For Panther we
		 * also need to pay attention to level to see which of l2$ or
		 * l3$ it hit in.
		 */
		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
		    0, &level);

		if (hit) {
			--hit;
			disp |= CE_XDIAG_AFARMATCH;

			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
				if (level == 2)
					ecp = &clop->clo_data.chd_l2_data[hit];
				else
					ecp = &clop->clo_data.chd_ec_data[hit];
			} else {
				ASSERT(level == 2);
				ecp = &clop->clo_data.chd_ec_data[hit];
			}
			totalsize = cpunodes[CPU->cpu_id].ecache_size;
			state = cpu_ectag_pa_to_subblk_state(totalsize,
			    ecc->flt_addr, ecp->ec_tag);

			/*
			 * Cheetah variants use different state encodings -
			 * the CH_ECSTATE_* defines vary depending on the
			 * module we're compiled for.  Translate into our
			 * one true version.  Conflate Owner-Shared state
			 * of SSM mode with Owner as victimisation of such
			 * lines may cause a writeback.
			 */
			switch (state) {
			case CH_ECSTATE_MOD:
				disp |= EC_STATE_M;
				break;

			case CH_ECSTATE_OWN:
			case CH_ECSTATE_OWS:
				disp |= EC_STATE_O;
				break;

			case CH_ECSTATE_EXL:
				disp |= EC_STATE_E;
				break;

			case CH_ECSTATE_SHR:
				disp |= EC_STATE_S;
				break;

			default:
				disp |= EC_STATE_I;
				break;
			}
		}

		/*
		 * If we initiated the delayed logout then we are responsible
		 * for invalidating the logout area.
		 */
		if (logout_tried == B_FALSE) {
			bzero(clop, sizeof (ch_cpu_logout_t));
			clop->clo_data.chd_afar = LOGOUT_INVALID;
		}
	}

	/*
	 * Re-enable CEEN if we turned it off.
	 */
	if (orig_err & EN_REG_CEEN)
	    set_error_enable(orig_err);
	kpreempt_enable();

	return (disp);
}

/*
 * Scrub a correctable memory error and collect data for classification
 * of CE type.  This function is called in the detection path, ie tl0 handling
 * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
 */
void
cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
{
	/*
	 * Cheetah CE classification does not set any bits in flt_status.
	 * Instead we will record classification datapoints in flt_disp.
	 */
	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);

	/*
	 * To check if the error detected by IO is persistent, sticky or
	 * intermittent.  This is noticed by clear_ecc().
	 */
	if (ecc->flt_status & ECC_IOBUS)
		ecc->flt_stat = C_AFSR_MEMORY;

	/*
	 * Record information from this first part of the algorithm in
	 * flt_disp.
	 */
	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
}

/*
 * Select a partner to perform a further CE classification check from.
 * Must be called with kernel preemption disabled (to stop the cpu list
 * from changing).  The detecting cpu we are partnering has cpuid
 * aflt->flt_inst; we might not be running on the detecting cpu.
 *
 * Restrict choice to active cpus in the same cpu partition as ourselves in
 * an effort to stop bad cpus in one partition causing other partitions to
 * perform excessive diagnostic activity.  Actually since the errorq drain
 * is run from a softint most of the time and that is a global mechanism
 * this isolation is only partial.  Return NULL if we fail to find a
 * suitable partner.
 *
 * We prefer a partner that is in a different latency group to ourselves as
 * we will share fewer datapaths.  If such a partner is unavailable then
 * choose one in the same lgroup but prefer a different chip and only allow
 * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
 * flags includes PTNR_SELFOK then permit selection of the original detector.
 *
 * We keep a cache of the last partner selected for a cpu, and we'll try to
 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
 * have passed since that selection was made.  This provides the benefit
 * of the point-of-view of different partners over time but without
 * requiring frequent cpu list traversals.
 */

#define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
#define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */

static cpu_t *
ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
{
	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
	hrtime_t lasttime, thistime;

	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);

	dtcr = cpu[aflt->flt_inst];

	/*
	 * Short-circuit for the following cases:
	 *	. the dtcr is not flagged active
	 *	. there is just one cpu present
	 *	. the detector has disappeared
	 *	. we were given a bad flt_inst cpuid; this should not happen
	 *	  (eg PCI code now fills flt_inst) but if it does it is no
	 *	  reason to panic.
	 *	. there is just one cpu left online in the cpu partition
	 *
	 * If we return NULL after this point then we do not update the
	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
	 * again next time; this is the case where the only other cpu online
	 * in the detector's partition is on the same chip as the detector
	 * and since CEEN re-enable is throttled even that case should not
	 * hurt performance.
	 */
	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
		return (NULL);
	}
	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
		if (flags & PTNR_SELFOK) {
			*typep = CE_XDIAG_PTNR_SELF;
			return (dtcr);
		} else {
			return (NULL);
		}
	}

	thistime = gethrtime();
	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);

	/*
	 * Select a starting point.
	 */
	if (!lasttime) {
		/*
		 * We've never selected a partner for this detector before.
		 * Start the scan at the next online cpu in the same cpu
		 * partition.
		 */
		sp = dtcr->cpu_next_part;
	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
		/*
		 * Our last selection has not aged yet.  If this partner:
		 *	. is still a valid cpu,
		 *	. is still in the same partition as the detector
		 *	. is still marked active
		 *	. satisfies the 'flags' argument criteria
		 * then select it again without updating the timestamp.
		 */
		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
		    !cpu_flagged_active(sp->cpu_flags) ||
		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
		    !(flags & PTNR_SIBLINGOK))) {
			sp = dtcr->cpu_next_part;
		} else {
			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
				*typep = CE_XDIAG_PTNR_REMOTE;
			} else if (sp == dtcr) {
				*typep = CE_XDIAG_PTNR_SELF;
			} else if (sp->cpu_chip->chip_id ==
			    dtcr->cpu_chip->chip_id) {
				*typep = CE_XDIAG_PTNR_SIBLING;
			} else {
				*typep = CE_XDIAG_PTNR_LOCAL;
			}
			return (sp);
		}
	} else {
		/*
		 * Our last selection has aged.  If it is nonetheless still a
		 * valid cpu then start the scan at the next cpu in the
		 * partition after our last partner.  If the last selection
		 * is no longer a valid cpu then go with our default.  In
		 * this way we slowly cycle through possible partners to
		 * obtain multiple viewpoints over time.
		 */
		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
		if (sp == NULL) {
			sp = dtcr->cpu_next_part;
		} else {
			sp = sp->cpu_next_part;		/* may be dtcr */
			if (sp->cpu_part != dtcr->cpu_part)
				sp = dtcr;
		}
	}

	/*
	 * We have a proposed starting point for our search, but if this
	 * cpu is offline then its cpu_next_part will point to itself
	 * so we can't use that to iterate over cpus in this partition in
	 * the loop below.  We still want to avoid iterating over cpus not
	 * in our partition, so in the case that our starting point is offline
	 * we will repoint it to be the detector itself;  and if the detector
	 * happens to be offline we'll return NULL from the following loop.
	 */
	if (!cpu_flagged_active(sp->cpu_flags)) {
		sp = dtcr;
	}

	ptnr = sp;
	locptnr = NULL;
	sibptnr = NULL;
	do {
		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
			continue;
		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
			*typep = CE_XDIAG_PTNR_REMOTE;
			return (ptnr);
		}
		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
			if (sibptnr == NULL)
				sibptnr = ptnr;
			continue;
		}
		if (locptnr == NULL)
			locptnr = ptnr;
	} while ((ptnr = ptnr->cpu_next_part) != sp);

	/*
	 * A foreign partner has already been returned if one was available.
	 *
	 * If locptnr is not NULL it is a cpu in the same lgroup as the
	 * detector, is active, and is not a sibling of the detector.
	 *
	 * If sibptnr is not NULL it is a sibling of the detector, and is
	 * active.
	 *
	 * If we have to resort to using the detector itself we have already
	 * checked that it is active.
	 */
	if (locptnr) {
		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
		*typep = CE_XDIAG_PTNR_LOCAL;
		return (locptnr);
	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
		*typep = CE_XDIAG_PTNR_SIBLING;
		return (sibptnr);
	} else if (flags & PTNR_SELFOK) {
		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
		*typep = CE_XDIAG_PTNR_SELF;
		return (dtcr);
	}

	return (NULL);
}

/*
 * Cross call handler that is requested to run on the designated partner of
 * a cpu that experienced a possibly sticky or possibly persistnet CE.
 */
static void
ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
{
	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
}

/*
 * The associated errorqs are never destroyed so we do not need to deal with
 * them disappearing before this timeout fires.  If the affected memory
 * has been DR'd out since the original event the scrub algrithm will catch
 * any errors and return null disposition info.  If the original detecting
 * cpu has been DR'd out then ereport detector info will not be able to
 * lookup CPU type;  with a small timeout this is unlikely.
 */
static void
ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
{
	struct async_flt *aflt = cbarg->lkycb_aflt;
	uchar_t disp;
	cpu_t *cp;
	int ptnrtype;

	kpreempt_disable();
	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
	    &ptnrtype)) {
		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
		    (uint64_t)&disp);
		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
	} else {
		ce_xdiag_lkydrops++;
		if (ncpus > 1)
			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
			    CE_XDIAG_SKIP_NOPTNR);
	}
	kpreempt_enable();

	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
}

/*
 * Called from errorq drain code when processing a CE error, both from
 * CPU and PCI drain functions.  Decide what further classification actions,
 * if any, we will perform.  Perform immediate actions now, and schedule
 * delayed actions as required.  Note that we are no longer necessarily running
 * on the detecting cpu, and that the async_flt structure will not persist on
 * return from this function.
 *
 * Calls to this function should aim to be self-throtlling in some way.  With
 * the delayed re-enable of CEEN the absolute rate of calls should not
 * be excessive.  Callers should also avoid performing in-depth classification
 * for events in pages that are already known to be suspect.
 *
 * We return nonzero to indicate that the event has been copied and
 * recirculated for further testing.  The caller should not log the event
 * in this case - it will be logged when further test results are available.
 *
 * Our possible contexts are that of errorq_drain: below lock level or from
 * panic context.  We can assume that the cpu we are running on is online.
 */


#ifdef DEBUG
static int ce_xdiag_forceaction;
#endif

int
ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
    errorq_elem_t *eqep, size_t afltoffset)
{
	ce_dispact_t dispact, action;
	cpu_t *cp;
	uchar_t dtcrinfo, disp;
	int ptnrtype;

	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
		ce_xdiag_drops++;
		return (0);
	} else if (!aflt->flt_in_memory) {
		ce_xdiag_drops++;
		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
		return (0);
	}

	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);

	/*
	 * Some correctable events are not scrubbed/classified, such as those
	 * noticed at the tail of cpu_deferred_error.  So if there is no
	 * initial detector classification go no further.
	 */
	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
		ce_xdiag_drops++;
		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
		return (0);
	}

	dispact = CE_DISPACT(ce_disp_table,
	    CE_XDIAG_AFARMATCHED(dtcrinfo),
	    CE_XDIAG_STATE(dtcrinfo),
	    CE_XDIAG_CE1SEEN(dtcrinfo),
	    CE_XDIAG_CE2SEEN(dtcrinfo));


	action = CE_ACT(dispact);	/* bad lookup caught below */
#ifdef DEBUG
	if (ce_xdiag_forceaction != 0)
		action = ce_xdiag_forceaction;
#endif

	switch (action) {
	case CE_ACT_LKYCHK: {
		caddr_t ndata;
		errorq_elem_t *neqep;
		struct async_flt *ecc;
		ce_lkychk_cb_t *cbargp;

		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
			ce_xdiag_lkydrops++;
			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
			    CE_XDIAG_SKIP_DUPFAIL);
			break;
		}
		ecc = (struct async_flt *)(ndata + afltoffset);

		ASSERT(ecc->flt_class == CPU_FAULT ||
		    ecc->flt_class == BUS_FAULT);
		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;

		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
		cbargp->lkycb_aflt = ecc;
		cbargp->lkycb_eqp = eqp;
		cbargp->lkycb_eqep = neqep;

		(void) timeout((void (*)(void *))ce_lkychk_cb,
		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
		return (1);
	}

	case CE_ACT_PTNRCHK:
		kpreempt_disable();	/* stop cpu list changing */
		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
			    (uint64_t)aflt, (uint64_t)&disp);
			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
		} else if (ncpus > 1) {
			ce_xdiag_ptnrdrops++;
			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
			    CE_XDIAG_SKIP_NOPTNR);
		} else {
			ce_xdiag_ptnrdrops++;
			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
			    CE_XDIAG_SKIP_UNIPROC);
		}
		kpreempt_enable();
		break;

	case CE_ACT_DONE:
		break;

	case CE_ACT(CE_DISP_BAD):
	default:
#ifdef DEBUG
		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
#endif
		ce_xdiag_bad++;
		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
		break;
	}

	return (0);
}

/*
 * We route all errors through a single switch statement.
 */
void
cpu_ue_log_err(struct async_flt *aflt)
{
	switch (aflt->flt_class) {
	case CPU_FAULT:
		cpu_ereport_init(aflt);
		if (cpu_async_log_err(aflt, NULL))
			cpu_ereport_post(aflt);
		break;

	case BUS_FAULT:
		bus_async_log_err(aflt);
		break;

	default:
		cmn_err(CE_WARN, "discarding async error %p with invalid "
		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
		return;
	}
}

/*
 * Routine for panic hook callback from panic_idle().
 */
void
cpu_async_panic_callb(void)
{
	ch_async_flt_t ch_flt;
	struct async_flt *aflt;
	ch_cpu_errors_t cpu_error_regs;
	uint64_t afsr_errs;

	get_cpu_error_state(&cpu_error_regs);

	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);

	if (afsr_errs) {

		bzero(&ch_flt, sizeof (ch_async_flt_t));
		aflt = (struct async_flt *)&ch_flt;
		aflt->flt_id = gethrtime_waitfree();
		aflt->flt_bus_id = getprocessorid();
		aflt->flt_inst = CPU->cpu_id;
		aflt->flt_stat = cpu_error_regs.afsr;
		aflt->flt_addr = cpu_error_regs.afar;
		aflt->flt_prot = AFLT_PROT_NONE;
		aflt->flt_class = CPU_FAULT;
		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
		aflt->flt_panic = 1;
		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
		ch_flt.afsr_errs = afsr_errs;
#if defined(SERRANO)
		ch_flt.afar2 = cpu_error_regs.afar2;
#endif	/* SERRANO */
		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
	}
}

/*
 * Routine to convert a syndrome into a syndrome code.
 */
static int
synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
{
	if (synd_status == AFLT_STAT_INVALID)
		return (-1);

	/*
	 * Use the syndrome to index the appropriate syndrome table,
	 * to get the code indicating which bit(s) is(are) bad.
	 */
	if (afsr_bit &
	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
		if (afsr_bit & C_AFSR_MSYND_ERRS) {
#if defined(JALAPENO) || defined(SERRANO)
			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
				return (-1);
			else
				return (BPAR0 + synd);
#else /* JALAPENO || SERRANO */
			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
				return (-1);
			else
				return (mtag_syndrome_tab[synd]);
#endif /* JALAPENO || SERRANO */
		} else {
			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
				return (-1);
			else
				return (ecc_syndrome_tab[synd]);
		}
	} else {
		return (-1);
	}
}

/*
 * Routine to return a string identifying the physical name
 * associated with a memory/cache error.
 */
int
cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
    uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
    ushort_t flt_status, char *buf, int buflen, int *lenp)
{
	int synd_code;
	int ret;

	/*
	 * An AFSR of -1 defaults to a memory syndrome.
	 */
	if (flt_stat == (uint64_t)-1)
		flt_stat = C_AFSR_CE;

	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);

	/*
	 * Syndrome code must be either a single-bit error code
	 * (0...143) or -1 for unum lookup.
	 */
	if (synd_code < 0 || synd_code >= M2)
		synd_code = -1;
	if (&plat_get_mem_unum) {
		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
			buf[0] = '\0';
			*lenp = 0;
		}

		return (ret);
	}

	return (ENOTSUP);
}

/*
 * Wrapper for cpu_get_mem_unum() routine that takes an
 * async_flt struct rather than explicit arguments.
 */
int
cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
    char *buf, int buflen, int *lenp)
{
	/*
	 * If we come thru here for an IO bus error aflt->flt_stat will
	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
	 * so it will interpret this as a memory error.
	 */
	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
	    (aflt->flt_class == BUS_FAULT) ?
	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
	    aflt->flt_status, buf, buflen, lenp));
}

/*
 * This routine is a more generic interface to cpu_get_mem_unum()
 * that may be used by other modules (e.g. mm).
 */
int
cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
    char *buf, int buflen, int *lenp)
{
	int synd_status, flt_in_memory, ret;
	ushort_t flt_status = 0;
	char unum[UNUM_NAMLEN];

	/*
	 * Check for an invalid address.
	 */
	if (afar == (uint64_t)-1)
		return (ENXIO);

	if (synd == (uint64_t)-1)
		synd_status = AFLT_STAT_INVALID;
	else
		synd_status = AFLT_STAT_VALID;

	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
	    pf_is_memory(afar >> MMU_PAGESHIFT);

	/*
	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
	 * For Panther, L2$ is not external, so we don't want to
	 * generate an E$ unum for those errors.
	 */
	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
			flt_status |= ECC_ECACHE;
	} else {
		if (*afsr & C_AFSR_ECACHE)
			flt_status |= ECC_ECACHE;
	}

	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
	if (ret != 0)
		return (ret);

	if (*lenp >= buflen)
		return (ENAMETOOLONG);

	(void) strncpy(buf, unum, buflen);

	return (0);
}

/*
 * Routine to return memory information associated
 * with a physical address and syndrome.
 */
int
cpu_get_mem_info(uint64_t synd, uint64_t afar,
    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    int *segsp, int *banksp, int *mcidp)
{
	int synd_status, synd_code;

	if (afar == (uint64_t)-1)
		return (ENXIO);

	if (synd == (uint64_t)-1)
		synd_status = AFLT_STAT_INVALID;
	else
		synd_status = AFLT_STAT_VALID;

	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);

	if (p2get_mem_info != NULL)
		return ((p2get_mem_info)(synd_code, afar,
			mem_sizep, seg_sizep, bank_sizep,
			segsp, banksp, mcidp));
	else
		return (ENOTSUP);
}

/*
 * Routine to return a string identifying the physical
 * name associated with a cpuid.
 */
int
cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
{
	int ret;
	char unum[UNUM_NAMLEN];

	if (&plat_get_cpu_unum) {
		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
		    != 0)
			return (ret);
	} else {
		return (ENOTSUP);
	}

	if (*lenp >= buflen)
		return (ENAMETOOLONG);

	(void) strncpy(buf, unum, buflen);

	return (0);
}

/*
 * This routine exports the name buffer size.
 */
size_t
cpu_get_name_bufsize()
{
	return (UNUM_NAMLEN);
}

/*
 * Historical function, apparantly not used.
 */
/* ARGSUSED */
void
cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
{}

/*
 * Historical function only called for SBus errors in debugging.
 */
/*ARGSUSED*/
void
read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
{}

/*
 * Clear the AFSR sticky bits.  The routine returns a non-zero value if
 * any of the AFSR's sticky errors are detected.  If a non-null pointer to
 * an async fault structure argument is passed in, the captured error state
 * (AFSR, AFAR) info will be returned in the structure.
 */
int
clear_errors(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	ch_cpu_errors_t	cpu_error_regs;

	get_cpu_error_state(&cpu_error_regs);

	if (ch_flt != NULL) {
		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
		aflt->flt_addr = cpu_error_regs.afar;
		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
#if defined(SERRANO)
		ch_flt->afar2 = cpu_error_regs.afar2;
#endif	/* SERRANO */
	}

	set_cpu_error_state(&cpu_error_regs);

	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
}

/*
 * Clear any AFSR error bits, and check for persistence.
 *
 * It would be desirable to also insist that syndrome match.  PCI handling
 * has already filled flt_synd.  For errors trapped by CPU we only fill
 * flt_synd when we queue the event, so we do not have a valid flt_synd
 * during initial classification (it is valid if we're called as part of
 * subsequent low-pil additional classification attempts).  We could try
 * to determine which syndrome to use: we know we're only called for
 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
 * would be esynd/none and esynd/msynd, respectively.  If that is
 * implemented then what do we do in the case that we do experience an
 * error on the same afar but with different syndrome?  At the very least
 * we should count such occurences.  Anyway, for now, we'll leave it as
 * it has been for ages.
 */
static int
clear_ecc(struct async_flt *aflt)
{
	ch_cpu_errors_t	cpu_error_regs;

	/*
	 * Snapshot the AFSR and AFAR and clear any errors
	 */
	get_cpu_error_state(&cpu_error_regs);
	set_cpu_error_state(&cpu_error_regs);

	/*
	 * If any of the same memory access error bits are still on and
	 * the AFAR matches, return that the error is persistent.
	 */
	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
	    cpu_error_regs.afar == aflt->flt_addr);
}

/*
 * Turn off all cpu error detection, normally only used for panics.
 */
void
cpu_disable_errors(void)
{
	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
}

/*
 * Enable errors.
 */
void
cpu_enable_errors(void)
{
	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
}

/*
 * Flush the entire ecache using displacement flush by reading through a
 * physical address range twice as large as the Ecache.
 */
void
cpu_flush_ecache(void)
{
	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
	    cpunodes[CPU->cpu_id].ecache_linesize);
}

/*
 * Return CPU E$ set size - E$ size divided by the associativity.
 * We use this function in places where the CPU_PRIVATE ptr may not be
 * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
 * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
 * up before the kernel switches from OBP's to the kernel's trap table, so
 * we don't have to worry about cpunodes being unitialized.
 */
int
cpu_ecache_set_size(struct cpu *cp)
{
	if (CPU_PRIVATE(cp))
		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));

	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
}

/*
 * Flush Ecache line.
 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
 * Uses normal displacement flush for Cheetah.
 */
static void
cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	int ec_set_size = cpu_ecache_set_size(CPU);

	ecache_flush_line(aflt->flt_addr, ec_set_size);
}

/*
 * Scrub physical address.
 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
 * Ecache or direct-mapped Ecache.
 */
static void
cpu_scrubphys(struct async_flt *aflt)
{
	int ec_set_size = cpu_ecache_set_size(CPU);

	scrubphys(aflt->flt_addr, ec_set_size);
}

/*
 * Clear physical address.
 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
 * Ecache or direct-mapped Ecache.
 */
void
cpu_clearphys(struct async_flt *aflt)
{
	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
	int ec_set_size = cpu_ecache_set_size(CPU);


	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
}

#if defined(CPU_IMP_ECACHE_ASSOC)
/*
 * Check for a matching valid line in all the sets.
 * If found, return set# + 1. Otherwise return 0.
 */
static int
cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
	int ec_set_size = cpu_ecache_set_size(CPU);
	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
	int nway = cpu_ecache_nway();
	int i;

	for (i = 0; i < nway; i++, ecp++) {
		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
			return (i+1);
	}
	return (0);
}
#endif /* CPU_IMP_ECACHE_ASSOC */

/*
 * Check whether a line in the given logout info matches the specified
 * fault address.  If reqval is set then the line must not be Invalid.
 * Returns 0 on failure;  on success (way + 1) is returned an *level is
 * set to 2 for l2$ or 3 for l3$.
 */
static int
cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
{
	ch_diag_data_t *cdp = data;
	ch_ec_data_t *ecp;
	int totalsize, ec_set_size;
	int i, ways;
	int match = 0;
	int tagvalid;
	uint64_t addr, tagpa;
	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);

	/*
	 * Check the l2$ logout data
	 */
	if (ispanther) {
		ecp = &cdp->chd_l2_data[0];
		ec_set_size = PN_L2_SET_SIZE;
		ways = PN_L2_NWAYS;
	} else {
		ecp = &cdp->chd_ec_data[0];
		ec_set_size = cpu_ecache_set_size(CPU);
		ways = cpu_ecache_nway();
		totalsize = cpunodes[CPU->cpu_id].ecache_size;
	}
	/* remove low order PA bits from fault address not used in PA tag */
	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
	for (i = 0; i < ways; i++, ecp++) {
		if (ispanther) {
			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
		} else {
			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
			tagvalid = !cpu_ectag_line_invalid(totalsize,
			    ecp->ec_tag);
		}
		if (tagpa == addr && (!reqval || tagvalid)) {
			match = i + 1;
			*level = 2;
			break;
		}
	}

	if (match || !ispanther)
		return (match);

	/* For Panther we also check the l3$ */
	ecp = &cdp->chd_ec_data[0];
	ec_set_size = PN_L3_SET_SIZE;
	ways = PN_L3_NWAYS;
	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);

	for (i = 0; i < ways; i++, ecp++) {
		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
			match = i + 1;
			*level = 3;
			break;
		}
	}

	return (match);
}

#if defined(CPU_IMP_L1_CACHE_PARITY)
/*
 * Record information related to the source of an Dcache Parity Error.
 */
static void
cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
{
	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
	int index;

	/*
	 * Since instruction decode cannot be done at high PIL
	 * just examine the entire Dcache to locate the error.
	 */
	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
		ch_flt->parity_data.dpe.cpl_way = -1;
		ch_flt->parity_data.dpe.cpl_off = -1;
	}
	for (index = 0; index < dc_set_size; index += dcache_linesize)
		cpu_dcache_parity_check(ch_flt, index);
}

/*
 * Check all ways of the Dcache at a specified index for good parity.
 */
static void
cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
{
	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
	uint64_t parity_bits, pbits, data_word;
	static int parity_bits_popc[] = { 0, 1, 1, 0 };
	int way, word, data_byte;
	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
	ch_dc_data_t tmp_dcp;

	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
		/*
		 * Perform diagnostic read.
		 */
		get_dcache_dtag(index + way * dc_set_size,
				(uint64_t *)&tmp_dcp);

		/*
		 * Check tag for even parity.
		 * Sum of 1 bits (including parity bit) should be even.
		 */
		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
			/*
			 * If this is the first error log detailed information
			 * about it and check the snoop tag. Otherwise just
			 * record the fact that we found another error.
			 */
			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
				ch_flt->parity_data.dpe.cpl_way = way;
				ch_flt->parity_data.dpe.cpl_cache =
				    CPU_DC_PARITY;
				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;

				if (popc64(tmp_dcp.dc_sntag &
						CHP_DCSNTAG_PARMASK) & 1) {
					ch_flt->parity_data.dpe.cpl_tag |=
								CHP_DC_SNTAG;
					ch_flt->parity_data.dpe.cpl_lcnt++;
				}

				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
			}

			ch_flt->parity_data.dpe.cpl_lcnt++;
		}

		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
			/*
			 * Panther has more parity bits than the other
			 * processors for covering dcache data and so each
			 * byte of data in each word has its own parity bit.
			 */
			parity_bits = tmp_dcp.dc_pn_data_parity;
			for (word = 0; word < 4; word++) {
				data_word = tmp_dcp.dc_data[word];
				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
				for (data_byte = 0; data_byte < 8;
				    data_byte++) {
					if (((popc64(data_word &
					    PN_DC_DATA_PARITY_MASK)) & 1) ^
					    (pbits & 1)) {
						cpu_record_dc_data_parity(
						ch_flt, dcp, &tmp_dcp, way,
						word);
					}
					pbits >>= 1;
					data_word >>= 8;
				}
				parity_bits >>= 8;
			}
		} else {
			/*
			 * Check data array for even parity.
			 * The 8 parity bits are grouped into 4 pairs each
			 * of which covers a 64-bit word.  The endianness is
			 * reversed -- the low-order parity bits cover the
			 * high-order data words.
			 */
			parity_bits = tmp_dcp.dc_utag >> 8;
			for (word = 0; word < 4; word++) {
				pbits = (parity_bits >> (6 - word * 2)) & 3;
				if ((popc64(tmp_dcp.dc_data[word]) +
				    parity_bits_popc[pbits]) & 1) {
					cpu_record_dc_data_parity(ch_flt, dcp,
					    &tmp_dcp, way, word);
				}
			}
		}
	}
}

static void
cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
    ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
{
	/*
	 * If this is the first error log detailed information about it.
	 * Otherwise just record the fact that we found another error.
	 */
	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
		ch_flt->parity_data.dpe.cpl_way = way;
		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
		ch_flt->parity_data.dpe.cpl_off = word * 8;
		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
	}
	ch_flt->parity_data.dpe.cpl_lcnt++;
}

/*
 * Record information related to the source of an Icache Parity Error.
 *
 * Called with the Icache disabled so any diagnostic accesses are safe.
 */
static void
cpu_icache_parity_info(ch_async_flt_t *ch_flt)
{
	int	ic_set_size;
	int	ic_linesize;
	int	index;

	if (CPU_PRIVATE(CPU)) {
		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
		    CH_ICACHE_NWAY;
		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
	} else {
		ic_set_size = icache_size / CH_ICACHE_NWAY;
		ic_linesize = icache_linesize;
	}

	ch_flt->parity_data.ipe.cpl_way = -1;
	ch_flt->parity_data.ipe.cpl_off = -1;

	for (index = 0; index < ic_set_size; index += ic_linesize)
		cpu_icache_parity_check(ch_flt, index);
}

/*
 * Check all ways of the Icache at a specified index for good parity.
 */
static void
cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
{
	uint64_t parmask, pn_inst_parity;
	int ic_set_size;
	int ic_linesize;
	int flt_index, way, instr, num_instr;
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
	ch_ic_data_t tmp_icp;

	if (CPU_PRIVATE(CPU)) {
		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
		    CH_ICACHE_NWAY;
		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
	} else {
		ic_set_size = icache_size / CH_ICACHE_NWAY;
		ic_linesize = icache_linesize;
	}

	/*
	 * Panther has twice as many instructions per icache line and the
	 * instruction parity bit is in a different location.
	 */
	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
	} else {
		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
		pn_inst_parity = 0;
	}

	/*
	 * Index at which we expect to find the parity error.
	 */
	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);

	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
		/*
		 * Diagnostic reads expect address argument in ASI format.
		 */
		get_icache_dtag(2 * (index + way * ic_set_size),
				(uint64_t *)&tmp_icp);

		/*
		 * If this is the index in which we expect to find the
		 * error log detailed information about each of the ways.
		 * This information will be displayed later if we can't
		 * determine the exact way in which the error is located.
		 */
		if (flt_index == index)
			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));

		/*
		 * Check tag for even parity.
		 * Sum of 1 bits (including parity bit) should be even.
		 */
		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
			/*
			 * If this way is the one in which we expected
			 * to find the error record the way and check the
			 * snoop tag. Otherwise just record the fact we
			 * found another error.
			 */
			if (flt_index == index) {
				ch_flt->parity_data.ipe.cpl_way = way;
				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;

				if (popc64(tmp_icp.ic_sntag &
						CHP_ICSNTAG_PARMASK) & 1) {
					ch_flt->parity_data.ipe.cpl_tag |=
								CHP_IC_SNTAG;
					ch_flt->parity_data.ipe.cpl_lcnt++;
				}

			}
			ch_flt->parity_data.ipe.cpl_lcnt++;
			continue;
		}

		/*
		 * Check instruction data for even parity.
		 * Bits participating in parity differ for PC-relative
		 * versus non-PC-relative instructions.
		 */
		for (instr = 0; instr < num_instr; instr++) {
			parmask = (tmp_icp.ic_data[instr] &
					CH_ICDATA_PRED_ISPCREL) ?
				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
				/*
				 * If this way is the one in which we expected
				 * to find the error record the way and offset.
				 * Otherwise just log the fact we found another
				 * error.
				 */
				if (flt_index == index) {
					ch_flt->parity_data.ipe.cpl_way = way;
					ch_flt->parity_data.ipe.cpl_off =
								instr * 4;
				}
				ch_flt->parity_data.ipe.cpl_lcnt++;
				continue;
			}
		}
	}
}

/*
 * Record information related to the source of an Pcache Parity Error.
 */
static void
cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
{
	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
	int index;

	/*
	 * Since instruction decode cannot be done at high PIL just
	 * examine the entire Pcache to check for any parity errors.
	 */
	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
		ch_flt->parity_data.dpe.cpl_way = -1;
		ch_flt->parity_data.dpe.cpl_off = -1;
	}
	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
		cpu_pcache_parity_check(ch_flt, index);
}

/*
 * Check all ways of the Pcache at a specified index for good parity.
 */
static void
cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
{
	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
	int way, word, pbit, parity_bits;
	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
	ch_pc_data_t tmp_pcp;

	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
		/*
		 * Perform diagnostic read.
		 */
		get_pcache_dtag(index + way * pc_set_size,
				(uint64_t *)&tmp_pcp);
		/*
		 * Check data array for odd parity. There are 8 parity
		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
		 * of those bits covers exactly 8 bytes of the data
		 * array:
		 *
		 *	parity bit	P$ data bytes covered
		 *	----------	---------------------
		 *	50		63:56
		 *	51		55:48
		 *	52		47:40
		 *	53		39:32
		 *	54		31:24
		 *	55		23:16
		 *	56		15:8
		 *	57		7:0
		 */
		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
		for (word = 0; word < pc_data_words; word++) {
			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
				/*
				 * If this is the first error log detailed
				 * information about it. Otherwise just record
				 * the fact that we found another error.
				 */
				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
					ch_flt->parity_data.dpe.cpl_way = way;
					ch_flt->parity_data.dpe.cpl_cache =
					    CPU_PC_PARITY;
					ch_flt->parity_data.dpe.cpl_off =
					    word * sizeof (uint64_t);
					bcopy(&tmp_pcp, pcp,
							sizeof (ch_pc_data_t));
				}
				ch_flt->parity_data.dpe.cpl_lcnt++;
			}
		}
	}
}


/*
 * Add L1 Data cache data to the ereport payload.
 */
static void
cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
{
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
	ch_dc_data_t *dcp;
	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
	uint_t nelem;
	int i, ways_to_check, ways_logged = 0;

	/*
	 * If this is an D$ fault then there may be multiple
	 * ways captured in the ch_parity_log_t structure.
	 * Otherwise, there will be at most one way captured
	 * in the ch_diag_data_t struct.
	 * Check each way to see if it should be encoded.
	 */
	if (ch_flt->flt_type == CPU_DC_PARITY)
		ways_to_check = CH_DCACHE_NWAY;
	else
		ways_to_check = 1;
	for (i = 0; i < ways_to_check; i++) {
		if (ch_flt->flt_type == CPU_DC_PARITY)
			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
		else
			dcp = &ch_flt->flt_diag_data.chd_dc_data;
		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
			bcopy(dcp, &dcdata[ways_logged],
				sizeof (ch_dc_data_t));
			ways_logged++;
		}
	}

	/*
	 * Add the dcache data to the payload.
	 */
	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
	if (ways_logged != 0) {
		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
	}
}

/*
 * Add L1 Instruction cache data to the ereport payload.
 */
static void
cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
{
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
	ch_ic_data_t *icp;
	ch_ic_data_t icdata[CH_ICACHE_NWAY];
	uint_t nelem;
	int i, ways_to_check, ways_logged = 0;

	/*
	 * If this is an I$ fault then there may be multiple
	 * ways captured in the ch_parity_log_t structure.
	 * Otherwise, there will be at most one way captured
	 * in the ch_diag_data_t struct.
	 * Check each way to see if it should be encoded.
	 */
	if (ch_flt->flt_type == CPU_IC_PARITY)
		ways_to_check = CH_ICACHE_NWAY;
	else
		ways_to_check = 1;
	for (i = 0; i < ways_to_check; i++) {
		if (ch_flt->flt_type == CPU_IC_PARITY)
			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
		else
			icp = &ch_flt->flt_diag_data.chd_ic_data;
		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
			bcopy(icp, &icdata[ways_logged],
				sizeof (ch_ic_data_t));
			ways_logged++;
		}
	}

	/*
	 * Add the icache data to the payload.
	 */
	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
	if (ways_logged != 0) {
		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
	}
}

#endif	/* CPU_IMP_L1_CACHE_PARITY */

/*
 * Add ecache data to payload.
 */
static void
cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
{
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
	ch_ec_data_t *ecp;
	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
	uint_t nelem;
	int i, ways_logged = 0;

	/*
	 * Check each way to see if it should be encoded
	 * and concatinate it into a temporary buffer.
	 */
	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
			bcopy(ecp, &ecdata[ways_logged],
				sizeof (ch_ec_data_t));
			ways_logged++;
		}
	}

	/*
	 * Panther CPUs have an additional level of cache and so
	 * what we just collected was the L3 (ecache) and not the
	 * L2 cache.
	 */
	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
		/*
		 * Add the L3 (ecache) data to the payload.
		 */
		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
		if (ways_logged != 0) {
			nelem = sizeof (ch_ec_data_t) /
			    sizeof (uint64_t) * ways_logged;
			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
			    DATA_TYPE_UINT64_ARRAY, nelem,
			    (uint64_t *)ecdata, NULL);
		}

		/*
		 * Now collect the L2 cache.
		 */
		ways_logged = 0;
		for (i = 0; i < PN_L2_NWAYS; i++) {
			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
				bcopy(ecp, &ecdata[ways_logged],
				    sizeof (ch_ec_data_t));
				ways_logged++;
			}
		}
	}

	/*
	 * Add the L2 cache data to the payload.
	 */
	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
	if (ways_logged != 0) {
		nelem = sizeof (ch_ec_data_t) /
			sizeof (uint64_t) * ways_logged;
		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
	}
}

/*
 * Encode the data saved in the ch_async_flt_t struct into
 * the FM ereport payload.
 */
static void
cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
	nvlist_t *resource, int *afar_status, int *synd_status)
{
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
	*synd_status = AFLT_STAT_INVALID;
	*afar_status = AFLT_STAT_INVALID;

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
	}

	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
		    ch_flt->flt_bit);
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
		    DATA_TYPE_BOOLEAN_VALUE,
		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
		    DATA_TYPE_BOOLEAN_VALUE,
		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
		*synd_status = afsr_to_synd_status(aflt->flt_inst,
		    ch_flt->afsr_errs, ch_flt->flt_bit);
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
		cpu_payload_add_ecache(aflt, payload);

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
	}

	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
		    DATA_TYPE_UINT32_ARRAY, 16,
		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
	}

#if defined(CPU_IMP_L1_CACHE_PARITY)
	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
		cpu_payload_add_dcache(aflt, payload);
	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
		cpu_payload_add_icache(aflt, payload);
#endif	/* CPU_IMP_L1_CACHE_PARITY */

#if defined(CHEETAH_PLUS)
	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
		cpu_payload_add_pcache(aflt, payload);
	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
		cpu_payload_add_tlb(aflt, payload);
#endif	/* CHEETAH_PLUS */
	/*
	 * Create the FMRI that goes into the payload
	 * and contains the unum info if necessary.
	 */
	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
	    (*afar_status == AFLT_STAT_VALID)) {
		char unum[UNUM_NAMLEN];
		int len;

		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
		    UNUM_NAMLEN, &len) == 0) {
			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
			    NULL, unum, NULL);
			fm_payload_set(payload,
			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
			    DATA_TYPE_NVLIST, resource, NULL);
		}
	}
}

/*
 * Initialize the way info if necessary.
 */
void
cpu_ereport_init(struct async_flt *aflt)
{
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
	int i;

	/*
	 * Initialize the info in the CPU logout structure.
	 * The I$/D$ way information is not initialized here
	 * since it is captured in the logout assembly code.
	 */
	for (i = 0; i < CHD_EC_DATA_SETS; i++)
		(ecp + i)->ec_way = i;

	for (i = 0; i < PN_L2_NWAYS; i++)
		(l2p + i)->ec_way = i;
}

/*
 * Returns whether fault address is valid for this error bit and
 * whether the address is "in memory" (i.e. pf_is_memory returns 1).
 */
int
cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;

	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
	    AFLT_STAT_VALID &&
	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
}

static void
cpu_log_diag_info(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
#if defined(CPU_IMP_ECACHE_ASSOC)
	int i, nway;
#endif /* CPU_IMP_ECACHE_ASSOC */

	/*
	 * Check if the CPU log out captured was valid.
	 */
	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
	    ch_flt->flt_data_incomplete)
		return;

#if defined(CPU_IMP_ECACHE_ASSOC)
	nway = cpu_ecache_nway();
	i =  cpu_ecache_line_valid(ch_flt);
	if (i == 0 || i > nway) {
		for (i = 0; i < nway; i++)
			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
	} else
		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
#else /* CPU_IMP_ECACHE_ASSOC */
	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
#endif /* CPU_IMP_ECACHE_ASSOC */

#if defined(CHEETAH_PLUS)
	pn_cpu_log_diag_l2_info(ch_flt);
#endif /* CHEETAH_PLUS */

	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
	}

	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
		else
			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
		icp->ic_logflag = IC_LOGFLAG_MAGIC;
	}
}

/*
 * Cheetah ECC calculation.
 *
 * We only need to do the calculation on the data bits and can ignore check
 * bit and Mtag bit terms in the calculation.
 */
static uint64_t ch_ecc_table[9][2] = {
	/*
	 * low order 64-bits   high-order 64-bits
	 */
	{ 0x46bffffeccd1177f, 0x488800022100014c },
	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
	{ 0x7f5511421b113809, 0x901c88d84288aafe },
	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
	{ 0xf552181014448344, 0x7ff8f4443e411911 },
	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
	{ 0x3280008440001112, 0xfee88b337ffffd62 },
};

/*
 * 64-bit population count, use well-known popcnt trick.
 * We could use the UltraSPARC V9 POPC instruction, but some
 * CPUs including Cheetahplus and Jaguar do not support that
 * instruction.
 */
int
popc64(uint64_t val)
{
	int cnt;

	for (cnt = 0; val != 0; val &= val - 1)
		cnt++;
	return (cnt);
}

/*
 * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
 * of 1 bits == 0, so we can just use the least significant bit of the popcnt
 * instead of doing all the xor's.
 */
uint32_t
us3_gen_ecc(uint64_t data_low, uint64_t data_high)
{
	int bitno, s;
	int synd = 0;

	for (bitno = 0; bitno < 9; bitno++) {
		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
		synd |= (s << bitno);
	}
	return (synd);

}

/*
 * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
 * tag associated with it or is a fatal event (aflt_panic set), it is sent to
 * the UE event queue.  Otherwise it is dispatched to the CE event queue.
 */
static void
cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
    ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;

	if (reason &&
	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
		(void) strcat(reason, eccp->ec_reason);
	}

	ch_flt->flt_bit = eccp->ec_afsr_bit;
	ch_flt->flt_type = eccp->ec_flt_type;
	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
		ch_flt->flt_diag_data = *cdp;
	else
		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);

	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
	else
		aflt->flt_synd = 0;

	aflt->flt_payload = eccp->ec_err_payload;

	if (aflt->flt_panic || (eccp->ec_afsr_bit &
	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
		cpu_errorq_dispatch(eccp->ec_err_class,
		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
		    aflt->flt_panic);
	else
		cpu_errorq_dispatch(eccp->ec_err_class,
		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
		    aflt->flt_panic);
}

/*
 * Queue events on async event queue one event per error bit.  First we
 * queue the events that we "expect" for the given trap, then we queue events
 * that we may not expect.  Return number of events queued.
 */
int
cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
    ch_cpu_logout_t *clop)
{
	struct async_flt *aflt = (struct async_flt *)ch_flt;
	ecc_type_to_info_t *eccp;
	int nevents = 0;
	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
#if defined(CHEETAH_PLUS)
	uint64_t orig_t_afsr_errs;
#endif
	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
	ch_diag_data_t *cdp = NULL;

	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);

#if defined(CHEETAH_PLUS)
	orig_t_afsr_errs = t_afsr_errs;

	/*
	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
	 */
	if (clop != NULL) {
		/*
		 * Set the AFSR and AFAR fields to the shadow registers.  The
		 * flt_addr and flt_stat fields will be reset to the primaries
		 * below, but the sdw_addr and sdw_stat will stay as the
		 * secondaries.
		 */
		cdp = &clop->clo_sdw_data;
		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);

		/*
		 * If the primary and shadow AFSR differ, tag the shadow as
		 * the first fault.
		 */
		if ((primary_afar != cdp->chd_afar) ||
		    (primary_afsr_errs != ch_flt->afsr_errs)) {
			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
		}

		/*
		 * Check AFSR bits as well as AFSR_EXT bits in order of
		 * the AFAR overwrite priority. Our stored AFSR_EXT value
		 * is expected to be zero for those CPUs which do not have
		 * an AFSR_EXT register.
		 */
		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
			if ((eccp->ec_afsr_bit &
			    (ch_flt->afsr_errs & t_afsr_errs)) &&
			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
				cdp = NULL;
				t_afsr_errs &= ~eccp->ec_afsr_bit;
				nevents++;
			}
		}

		/*
		 * If the ME bit is on in the primary AFSR turn all the
		 * error bits on again that may set the ME bit to make
		 * sure we see the ME AFSR error logs.
		 */
		if ((primary_afsr & C_AFSR_ME) != 0)
			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
	}
#endif	/* CHEETAH_PLUS */

	if (clop != NULL)
		cdp = &clop->clo_data;

	/*
	 * Queue expected errors, error bit and fault type must match
	 * in the ecc_type_to_info table.
	 */
	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
	    eccp++) {
		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
		    (eccp->ec_flags & aflt->flt_status) != 0) {
#if defined(SERRANO)
			/*
			 * For FRC/FRU errors on Serrano the afar2 captures
			 * the address and the associated data is
			 * in the shadow logout area.
			 */
			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
				if (clop != NULL)
					cdp = &clop->clo_sdw_data;
				aflt->flt_addr = ch_flt->afar2;
			} else {
				if (clop != NULL)
					cdp = &clop->clo_data;
				aflt->flt_addr = primary_afar;
			}
#else	/* SERRANO */
			aflt->flt_addr = primary_afar;
#endif	/* SERRANO */
			aflt->flt_stat = primary_afsr;
			ch_flt->afsr_ext = primary_afsr_ext;
			ch_flt->afsr_errs = primary_afsr_errs;
			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
			cdp = NULL;
			t_afsr_errs &= ~eccp->ec_afsr_bit;
			nevents++;
		}
	}

	/*
	 * Queue unexpected errors, error bit only match.
	 */
	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
	    eccp++) {
		if (eccp->ec_afsr_bit & t_afsr_errs) {
#if defined(SERRANO)
			/*
			 * For FRC/FRU errors on Serrano the afar2 captures
			 * the address and the associated data is
			 * in the shadow logout area.
			 */
			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
				if (clop != NULL)
					cdp = &clop->clo_sdw_data;
				aflt->flt_addr = ch_flt->afar2;
			} else {
				if (clop != NULL)
					cdp = &clop->clo_data;
				aflt->flt_addr = primary_afar;
			}
#else	/* SERRANO */
			aflt->flt_addr = primary_afar;
#endif	/* SERRANO */
			aflt->flt_stat = primary_afsr;
			ch_flt->afsr_ext = primary_afsr_ext;
			ch_flt->afsr_errs = primary_afsr_errs;
			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
			cdp = NULL;
			t_afsr_errs &= ~eccp->ec_afsr_bit;
			nevents++;
		}
	}
	return (nevents);
}

/*
 * Return trap type number.
 */
uint8_t
flt_to_trap_type(struct async_flt *aflt)
{
	if (aflt->flt_status & ECC_I_TRAP)
		return (TRAP_TYPE_ECC_I);
	if (aflt->flt_status & ECC_D_TRAP)
		return (TRAP_TYPE_ECC_D);
	if (aflt->flt_status & ECC_F_TRAP)
		return (TRAP_TYPE_ECC_F);
	if (aflt->flt_status & ECC_C_TRAP)
		return (TRAP_TYPE_ECC_C);
	if (aflt->flt_status & ECC_DP_TRAP)
		return (TRAP_TYPE_ECC_DP);
	if (aflt->flt_status & ECC_IP_TRAP)
		return (TRAP_TYPE_ECC_IP);
	if (aflt->flt_status & ECC_ITLB_TRAP)
		return (TRAP_TYPE_ECC_ITLB);
	if (aflt->flt_status & ECC_DTLB_TRAP)
		return (TRAP_TYPE_ECC_DTLB);
	return (TRAP_TYPE_UNKNOWN);
}

/*
 * Decide an error type based on detector and leaky/partner tests.
 * The following array is used for quick translation - it must
 * stay in sync with ce_dispact_t.
 */

static char *cetypes[] = {
	CE_DISP_DESC_U,
	CE_DISP_DESC_I,
	CE_DISP_DESC_PP,
	CE_DISP_DESC_P,
	CE_DISP_DESC_L,
	CE_DISP_DESC_PS,
	CE_DISP_DESC_S
};

char *
flt_to_error_type(struct async_flt *aflt)
{
	ce_dispact_t dispact, disp;
	uchar_t dtcrinfo, ptnrinfo, lkyinfo;

	/*
	 * The memory payload bundle is shared by some events that do
	 * not perform any classification.  For those flt_disp will be
	 * 0 and we will return "unknown".
	 */
	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
		return (cetypes[CE_DISP_UNKNOWN]);

	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);

	/*
	 * It is also possible that no scrub/classification was performed
	 * by the detector, for instance where a disrupting error logged
	 * in the AFSR while CEEN was off in cpu_deferred_error.
	 */
	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
		return (cetypes[CE_DISP_UNKNOWN]);

	/*
	 * Lookup type in initial classification/action table
	 */
	dispact = CE_DISPACT(ce_disp_table,
	    CE_XDIAG_AFARMATCHED(dtcrinfo),
	    CE_XDIAG_STATE(dtcrinfo),
	    CE_XDIAG_CE1SEEN(dtcrinfo),
	    CE_XDIAG_CE2SEEN(dtcrinfo));

	/*
	 * A bad lookup is not something to panic production systems for.
	 */
	ASSERT(dispact != CE_DISP_BAD);
	if (dispact == CE_DISP_BAD)
		return (cetypes[CE_DISP_UNKNOWN]);

	disp = CE_DISP(dispact);

	switch (disp) {
	case CE_DISP_UNKNOWN:
	case CE_DISP_INTERMITTENT:
		break;

	case CE_DISP_POSS_PERS:
		/*
		 * "Possible persistent" errors to which we have applied a valid
		 * leaky test can be separated into "persistent" or "leaky".
		 */
		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
		if (CE_XDIAG_TESTVALID(lkyinfo)) {
			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
			    CE_XDIAG_CE2SEEN(lkyinfo))
				disp = CE_DISP_LEAKY;
			else
				disp = CE_DISP_PERS;
		}
		break;

	case CE_DISP_POSS_STICKY:
		/*
		 * Promote "possible sticky" results that have been
		 * confirmed by a partner test to "sticky".  Unconfirmed
		 * "possible sticky" events are left at that status - we do not
		 * guess at any bad reader/writer etc status here.
		 */
		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
			disp = CE_DISP_STICKY;

		/*
		 * Promote "possible sticky" results on a uniprocessor
		 * to "sticky"
		 */
		if (disp == CE_DISP_POSS_STICKY &&
		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
			disp = CE_DISP_STICKY;
		break;

	default:
		disp = CE_DISP_UNKNOWN;
		break;
	}

	return (cetypes[disp]);
}

/*
 * Given the entire afsr, the specific bit to check and a prioritized list of
 * error bits, determine the validity of the various overwrite priority
 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
 * different overwrite priorities.
 *
 * Given a specific afsr error bit and the entire afsr, there are three cases:
 *   INVALID:	The specified bit is lower overwrite priority than some other
 *		error bit which is on in the afsr (or IVU/IVC).
 *   VALID:	The specified bit is higher priority than all other error bits
 *		which are on in the afsr.
 *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
 *		bit is on in the afsr.
 */
int
afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
{
	uint64_t afsr_ow;

	while ((afsr_ow = *ow_bits++) != 0) {
		/*
		 * If bit is in the priority class, check to see if another
		 * bit in the same class is on => ambiguous.  Otherwise,
		 * the value is valid.  If the bit is not on at this priority
		 * class, but a higher priority bit is on, then the value is
		 * invalid.
		 */
		if (afsr_ow & afsr_bit) {
			/*
			 * If equal pri bit is on, ambiguous.
			 */
			if (afsr & (afsr_ow & ~afsr_bit))
				return (AFLT_STAT_AMBIGUOUS);
			return (AFLT_STAT_VALID);
		} else if (afsr & afsr_ow)
			break;
	}

	/*
	 * We didn't find a match or a higher priority bit was on.  Not
	 * finding a match handles the case of invalid AFAR for IVC, IVU.
	 */
	return (AFLT_STAT_INVALID);
}

static int
afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
{
#if defined(SERRANO)
	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
	else
#endif	/* SERRANO */
		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
}

static int
afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
{
	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
}

static int
afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
{
	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
}

static int
afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
{
#ifdef lint
	cpuid = cpuid;
#endif
	if (afsr_bit & C_AFSR_MSYND_ERRS) {
		return (afsr_to_msynd_status(afsr, afsr_bit));
	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
#if defined(CHEETAH_PLUS)
		/*
		 * The E_SYND overwrite policy is slightly different
		 * for Panther CPUs.
		 */
		if (IS_PANTHER(cpunodes[cpuid].implementation))
			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
		else
			return (afsr_to_esynd_status(afsr, afsr_bit));
#else /* CHEETAH_PLUS */
		return (afsr_to_esynd_status(afsr, afsr_bit));
#endif /* CHEETAH_PLUS */
	} else {
		return (AFLT_STAT_INVALID);
	}
}

/*
 * Slave CPU stick synchronization.
 */
void
sticksync_slave(void)
{
	int 		i;
	int		tries = 0;
	int64_t		tskew;
	int64_t		av_tskew;

	kpreempt_disable();
	/* wait for the master side */
	while (stick_sync_cmd != SLAVE_START)
		;
	/*
	 * Synchronization should only take a few tries at most. But in the
	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
	 * without it's stick synchronized wouldn't be a good citizen.
	 */
	while (slave_done == 0) {
		/*
		 * Time skew calculation.
		 */
		av_tskew = tskew = 0;

		for (i = 0; i < stick_iter; i++) {
			/* make location hot */
			timestamp[EV_A_START] = 0;
			stick_timestamp(&timestamp[EV_A_START]);

			/* tell the master we're ready */
			stick_sync_cmd = MASTER_START;

			/* and wait */
			while (stick_sync_cmd != SLAVE_CONT)
				;
			/* Event B end */
			stick_timestamp(&timestamp[EV_B_END]);

			/* calculate time skew */
			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
				- (timestamp[EV_A_END] -
				timestamp[EV_A_START])) / 2;

			/* keep running count */
			av_tskew += tskew;
		} /* for */

		/*
		 * Adjust stick for time skew if not within the max allowed;
		 * otherwise we're all done.
		 */
		if (stick_iter != 0)
			av_tskew = av_tskew/stick_iter;
		if (ABS(av_tskew) > stick_tsk) {
			/*
			 * If the skew is 1 (the slave's STICK register
			 * is 1 STICK ahead of the master's), stick_adj
			 * could fail to adjust the slave's STICK register
			 * if the STICK read on the slave happens to
			 * align with the increment of the STICK.
			 * Therefore, we increment the skew to 2.
			 */
			if (av_tskew == 1)
				av_tskew++;
			stick_adj(-av_tskew);
		} else
			slave_done = 1;
#ifdef DEBUG
		if (tries < DSYNC_ATTEMPTS)
			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
				av_tskew;
		++tries;
#endif /* DEBUG */
#ifdef lint
		tries = tries;
#endif

	} /* while */

	/* allow the master to finish */
	stick_sync_cmd = EVENT_NULL;
	kpreempt_enable();
}

/*
 * Master CPU side of stick synchronization.
 *  - timestamp end of Event A
 *  - timestamp beginning of Event B
 */
void
sticksync_master(void)
{
	int		i;

	kpreempt_disable();
	/* tell the slave we've started */
	slave_done = 0;
	stick_sync_cmd = SLAVE_START;

	while (slave_done == 0) {
		for (i = 0; i < stick_iter; i++) {
			/* wait for the slave */
			while (stick_sync_cmd != MASTER_START)
				;
			/* Event A end */
			stick_timestamp(&timestamp[EV_A_END]);

			/* make location hot */
			timestamp[EV_B_START] = 0;
			stick_timestamp(&timestamp[EV_B_START]);

			/* tell the slave to continue */
			stick_sync_cmd = SLAVE_CONT;
		} /* for */

		/* wait while slave calculates time skew */
		while (stick_sync_cmd == SLAVE_CONT)
			;
	} /* while */
	kpreempt_enable();
}

/*
 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
 * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
 * panic idle.
 */
/*ARGSUSED*/
void
cpu_check_allcpus(struct async_flt *aflt)
{}

struct kmem_cache *ch_private_cache;

/*
 * Cpu private unitialization.  Uninitialize the Ecache scrubber and
 * deallocate the scrubber data structures and cpu_private data structure.
 */
void
cpu_uninit_private(struct cpu *cp)
{
	cheetah_private_t *chprp = CPU_PRIVATE(cp);

	ASSERT(chprp);
	cpu_uninit_ecache_scrub_dr(cp);
	CPU_PRIVATE(cp) = NULL;
	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
	kmem_cache_free(ch_private_cache, chprp);
	cmp_delete_cpu(cp->cpu_id);

}

/*
 * Cheetah Cache Scrubbing
 *
 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
 * protected by either parity or ECC.
 *
 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
 * cache per second). Due to the the specifics of how the I$ control
 * logic works with respect to the ASI used to scrub I$ lines, the entire
 * I$ is scanned at once.
 */

/*
 * Tuneables to enable and disable the scrubbing of the caches, and to tune
 * scrubbing behavior.  These may be changed via /etc/system or using mdb
 * on a running system.
 */
int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */

/*
 * The following are the PIL levels that the softints/cross traps will fire at.
 */
uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */

#if defined(JALAPENO)

/*
 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
 * on Jalapeno.
 */
int ecache_scrub_enable = 0;

#else	/* JALAPENO */

/*
 * With all other cpu types, E$ scrubbing is on by default
 */
int ecache_scrub_enable = 1;

#endif	/* JALAPENO */


#if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)

/*
 * The I$ scrubber tends to cause latency problems for real-time SW, so it
 * is disabled by default on non-Cheetah systems
 */
int icache_scrub_enable = 0;

/*
 * Tuneables specifying the scrub calls per second and the scan rate
 * for each cache
 *
 * The cyclic times are set during boot based on the following values.
 * Changing these values in mdb after this time will have no effect.  If
 * a different value is desired, it must be set in /etc/system before a
 * reboot.
 */
int ecache_calls_a_sec = 1;
int dcache_calls_a_sec = 2;
int icache_calls_a_sec = 2;

int ecache_scan_rate_idle = 1;
int ecache_scan_rate_busy = 1;
int dcache_scan_rate_idle = 1;
int dcache_scan_rate_busy = 1;
int icache_scan_rate_idle = 1;
int icache_scan_rate_busy = 1;

#else	/* CHEETAH_PLUS || JALAPENO || SERRANO */

int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */

int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */

int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */

#endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */

/*
 * In order to scrub on offline cpus, a cross trap is sent.  The handler will
 * increment the outstanding request counter and schedule a softint to run
 * the scrubber.
 */
extern xcfunc_t cache_scrubreq_tl1;

/*
 * These are the softint functions for each cache scrubber
 */
static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);

/*
 * The cache scrub info table contains cache specific information
 * and allows for some of the scrub code to be table driven, reducing
 * duplication of cache similar code.
 *
 * This table keeps a copy of the value in the calls per second variable
 * (?cache_calls_a_sec).  This makes it much more difficult for someone
 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
 * mdb in a misguided attempt to disable the scrubber).
 */
struct scrub_info {
	int		*csi_enable;	/* scrubber enable flag */
	int		csi_freq;	/* scrubber calls per second */
	int		csi_index;	/* index to chsm_outstanding[] */
	uint_t		csi_inum;	/* scrubber interrupt number */
	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
	char		csi_name[3];	/* cache name for this scrub entry */
} cache_scrub_info[] = {
{ &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
{ &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
{ &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
};

/*
 * If scrubbing is enabled, increment the outstanding request counter.  If it
 * is 1 (meaning there were no previous requests outstanding), call
 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
 * a self trap.
 */
static void
do_scrub(struct scrub_info *csi)
{
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	int index = csi->csi_index;
	uint32_t *outstanding = &csmp->chsm_outstanding[index];

	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
		if (atomic_add_32_nv(outstanding, 1) == 1) {
			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
			    csi->csi_inum, 0);
		}
	}
}

/*
 * Omni cyclics don't fire on offline cpus, so we use another cyclic to
 * cross-trap the offline cpus.
 */
static void
do_scrub_offline(struct scrub_info *csi)
{
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);

	if (CPUSET_ISNULL(cpu_offline_set)) {
		/*
		 * No offline cpus - nothing to do
		 */
		return;
	}

	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
		    csi->csi_index);
	}
}

/*
 * This is the initial setup for the scrubber cyclics - it sets the
 * interrupt level, frequency, and function to call.
 */
/*ARGSUSED*/
static void
cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
    cyc_time_t *when)
{
	struct scrub_info *csi = (struct scrub_info *)arg;

	ASSERT(csi != NULL);
	hdlr->cyh_func = (cyc_func_t)do_scrub;
	hdlr->cyh_level = CY_LOW_LEVEL;
	hdlr->cyh_arg = arg;

	when->cyt_when = 0;	/* Start immediately */
	when->cyt_interval = NANOSEC / csi->csi_freq;
}

/*
 * Initialization for cache scrubbing.
 * This routine is called AFTER all cpus have had cpu_init_private called
 * to initialize their private data areas.
 */
void
cpu_init_cache_scrub(void)
{
	int i;
	struct scrub_info *csi;
	cyc_omni_handler_t omni_hdlr;
	cyc_handler_t offline_hdlr;
	cyc_time_t when;

	/*
	 * save away the maximum number of lines for the D$
	 */
	dcache_nlines = dcache_size / dcache_linesize;

	/*
	 * register the softints for the cache scrubbing
	 */
	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;

	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;

	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;

	/*
	 * start the scrubbing for all the caches
	 */
	mutex_enter(&cpu_lock);
	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {

		csi = &cache_scrub_info[i];

		if (!(*csi->csi_enable))
			continue;

		/*
		 * force the following to be true:
		 *	1 <= calls_a_sec <= hz
		 */
		if (csi->csi_freq > hz) {
			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
				"(%d); resetting to hz (%d)", csi->csi_name,
				csi->csi_freq, hz);
			csi->csi_freq = hz;
		} else if (csi->csi_freq < 1) {
			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
				"(%d); resetting to 1", csi->csi_name,
				csi->csi_freq);
			csi->csi_freq = 1;
		}

		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
		omni_hdlr.cyo_offline = NULL;
		omni_hdlr.cyo_arg = (void *)csi;

		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
		offline_hdlr.cyh_arg = (void *)csi;
		offline_hdlr.cyh_level = CY_LOW_LEVEL;

		when.cyt_when = 0;	/* Start immediately */
		when.cyt_interval = NANOSEC / csi->csi_freq;

		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
	}
	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
	mutex_exit(&cpu_lock);
}

/*
 * Indicate that the specified cpu is idle.
 */
void
cpu_idle_ecache_scrub(struct cpu *cp)
{
	if (CPU_PRIVATE(cp) != NULL) {
		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
	}
}

/*
 * Indicate that the specified cpu is busy.
 */
void
cpu_busy_ecache_scrub(struct cpu *cp)
{
	if (CPU_PRIVATE(cp) != NULL) {
		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
	}
}

/*
 * Initialization for cache scrubbing for the specified cpu.
 */
void
cpu_init_ecache_scrub_dr(struct cpu *cp)
{
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
	int cpuid = cp->cpu_id;

	/* initialize the number of lines in the caches */
	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
	    cpunodes[cpuid].ecache_linesize;
	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);

	/*
	 * do_scrub() and do_scrub_offline() check both the global
	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
	 * check this value before scrubbing.  Currently, we use it to
	 * disable the E$ scrubber on multi-core cpus or while running at
	 * slowed speed.  For now, just turn everything on and allow
	 * cpu_init_private() to change it if necessary.
	 */
	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;

	cpu_busy_ecache_scrub(cp);
}

/*
 * Un-initialization for cache scrubbing for the specified cpu.
 */
static void
cpu_uninit_ecache_scrub_dr(struct cpu *cp)
{
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);

	/*
	 * un-initialize bookkeeping for cache scrubbing
	 */
	bzero(csmp, sizeof (ch_scrub_misc_t));

	cpu_idle_ecache_scrub(cp);
}

/*
 * Called periodically on each CPU to scrub the D$.
 */
static void
scrub_dcache(int how_many)
{
	int i;
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];

	/*
	 * scrub the desired number of lines
	 */
	for (i = 0; i < how_many; i++) {
		/*
		 * scrub a D$ line
		 */
		dcache_inval_line(index);

		/*
		 * calculate the next D$ line to scrub, assumes
		 * that dcache_nlines is a power of 2
		 */
		index = (index + 1) & (dcache_nlines - 1);
	}

	/*
	 * set the scrub index for the next visit
	 */
	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
}

/*
 * Handler for D$ scrub inum softint. Call scrub_dcache until
 * we decrement the outstanding request count to zero.
 */
/*ARGSUSED*/
static uint_t
scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
{
	int i;
	int how_many;
	int outstanding;
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
	struct scrub_info *csi = (struct scrub_info *)arg1;
	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
		dcache_scan_rate_idle : dcache_scan_rate_busy;

	/*
	 * The scan rates are expressed in units of tenths of a
	 * percent.  A scan rate of 1000 (100%) means the whole
	 * cache is scanned every second.
	 */
	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);

	do {
		outstanding = *countp;
		for (i = 0; i < outstanding; i++) {
			scrub_dcache(how_many);
		}
	} while (atomic_add_32_nv(countp, -outstanding));

	return (DDI_INTR_CLAIMED);
}

/*
 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
 * by invalidating lines. Due to the characteristics of the ASI which
 * is used to invalidate an I$ line, the entire I$ must be invalidated
 * vs. an individual I$ line.
 */
static void
scrub_icache(int how_many)
{
	int i;
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
	int icache_nlines = csmp->chsm_icache_nlines;

	/*
	 * scrub the desired number of lines
	 */
	for (i = 0; i < how_many; i++) {
		/*
		 * since the entire I$ must be scrubbed at once,
		 * wait until the index wraps to zero to invalidate
		 * the entire I$
		 */
		if (index == 0) {
			icache_inval_all();
		}

		/*
		 * calculate the next I$ line to scrub, assumes
		 * that chsm_icache_nlines is a power of 2
		 */
		index = (index + 1) & (icache_nlines - 1);
	}

	/*
	 * set the scrub index for the next visit
	 */
	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
}

/*
 * Handler for I$ scrub inum softint. Call scrub_icache until
 * we decrement the outstanding request count to zero.
 */
/*ARGSUSED*/
static uint_t
scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
{
	int i;
	int how_many;
	int outstanding;
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
	struct scrub_info *csi = (struct scrub_info *)arg1;
	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
	    icache_scan_rate_idle : icache_scan_rate_busy;
	int icache_nlines = csmp->chsm_icache_nlines;

	/*
	 * The scan rates are expressed in units of tenths of a
	 * percent.  A scan rate of 1000 (100%) means the whole
	 * cache is scanned every second.
	 */
	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);

	do {
		outstanding = *countp;
		for (i = 0; i < outstanding; i++) {
			scrub_icache(how_many);
		}
	} while (atomic_add_32_nv(countp, -outstanding));

	return (DDI_INTR_CLAIMED);
}

/*
 * Called periodically on each CPU to scrub the E$.
 */
static void
scrub_ecache(int how_many)
{
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	int i;
	int cpuid = CPU->cpu_id;
	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
	int nlines = csmp->chsm_ecache_nlines;
	int linesize = cpunodes[cpuid].ecache_linesize;
	int ec_set_size = cpu_ecache_set_size(CPU);

	/*
	 * scrub the desired number of lines
	 */
	for (i = 0; i < how_many; i++) {
		/*
		 * scrub the E$ line
		 */
		ecache_flush_line(ecache_flushaddr + (index * linesize),
		    ec_set_size);

		/*
		 * calculate the next E$ line to scrub based on twice
		 * the number of E$ lines (to displace lines containing
		 * flush area data), assumes that the number of lines
		 * is a power of 2
		 */
		index = (index + 1) & ((nlines << 1) - 1);
	}

	/*
	 * set the ecache scrub index for the next visit
	 */
	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
}

/*
 * Handler for E$ scrub inum softint. Call the E$ scrubber until
 * we decrement the outstanding request count to zero.
 *
 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
 * become negative after the atomic_add_32_nv().  This is not a problem, as
 * the next trip around the loop won't scrub anything, and the next add will
 * reset the count back to zero.
 */
/*ARGSUSED*/
static uint_t
scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
{
	int i;
	int how_many;
	int outstanding;
	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
	struct scrub_info *csi = (struct scrub_info *)arg1;
	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
		ecache_scan_rate_idle : ecache_scan_rate_busy;
	int ecache_nlines = csmp->chsm_ecache_nlines;

	/*
	 * The scan rates are expressed in units of tenths of a
	 * percent.  A scan rate of 1000 (100%) means the whole
	 * cache is scanned every second.
	 */
	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);

	do {
		outstanding = *countp;
		for (i = 0; i < outstanding; i++) {
			scrub_ecache(how_many);
		}
	} while (atomic_add_32_nv(countp, -outstanding));

	return (DDI_INTR_CLAIMED);
}

/*
 * Timeout function to reenable CE
 */
static void
cpu_delayed_check_ce_errors(void *arg)
{
	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
	    TQ_NOSLEEP)) {
		(void) timeout(cpu_delayed_check_ce_errors, arg,
		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
	}
}

/*
 * CE Deferred Re-enable after trap.
 *
 * When the CPU gets a disrupting trap for any of the errors
 * controlled by the CEEN bit, CEEN is disabled in the trap handler
 * immediately. To eliminate the possibility of multiple CEs causing
 * recursive stack overflow in the trap handler, we cannot
 * reenable CEEN while still running in the trap handler. Instead,
 * after a CE is logged on a CPU, we schedule a timeout function,
 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
 * seconds. This function will check whether any further CEs
 * have occurred on that CPU, and if none have, will reenable CEEN.
 *
 * If further CEs have occurred while CEEN is disabled, another
 * timeout will be scheduled. This is to ensure that the CPU can
 * make progress in the face of CE 'storms', and that it does not
 * spend all its time logging CE errors.
 */
static void
cpu_check_ce_errors(void *arg)
{
	int	cpuid = (int)(uintptr_t)arg;
	cpu_t	*cp;

	/*
	 * We acquire cpu_lock.
	 */
	ASSERT(curthread->t_pil == 0);

	/*
	 * verify that the cpu is still around, DR
	 * could have got there first ...
	 */
	mutex_enter(&cpu_lock);
	cp = cpu_get(cpuid);
	if (cp == NULL) {
		mutex_exit(&cpu_lock);
		return;
	}
	/*
	 * make sure we don't migrate across CPUs
	 * while checking our CE status.
	 */
	kpreempt_disable();

	/*
	 * If we are running on the CPU that got the
	 * CE, we can do the checks directly.
	 */
	if (cp->cpu_id == CPU->cpu_id) {
		mutex_exit(&cpu_lock);
		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
		kpreempt_enable();
		return;
	}
	kpreempt_enable();

	/*
	 * send an x-call to get the CPU that originally
	 * got the CE to do the necessary checks. If we can't
	 * send the x-call, reschedule the timeout, otherwise we
	 * lose CEEN forever on that CPU.
	 */
	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
		    TIMEOUT_CEEN_CHECK, 0);
		mutex_exit(&cpu_lock);
	} else {
		/*
		 * When the CPU is not accepting xcalls, or
		 * the processor is offlined, we don't want to
		 * incur the extra overhead of trying to schedule the
		 * CE timeout indefinitely. However, we don't want to lose
		 * CE checking forever.
		 *
		 * Keep rescheduling the timeout, accepting the additional
		 * overhead as the cost of correctness in the case where we get
		 * a CE, disable CEEN, offline the CPU during the
		 * the timeout interval, and then online it at some
		 * point in the future. This is unlikely given the short
		 * cpu_ceen_delay_secs.
		 */
		mutex_exit(&cpu_lock);
		(void) timeout(cpu_delayed_check_ce_errors,
		    (void *)(uintptr_t)cp->cpu_id,
		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
	}
}

/*
 * This routine will check whether CEs have occurred while
 * CEEN is disabled. Any CEs detected will be logged and, if
 * possible, scrubbed.
 *
 * The memscrubber will also use this routine to clear any errors
 * caused by its scrubbing with CEEN disabled.
 *
 * flag == SCRUBBER_CEEN_CHECK
 *		called from memscrubber, just check/scrub, no reset
 *		paddr 	physical addr. for start of scrub pages
 *		vaddr 	virtual addr. for scrub area
 *		psz	page size of area to be scrubbed
 *
 * flag == TIMEOUT_CEEN_CHECK
 *		timeout function has triggered, reset timeout or CEEN
 *
 * Note: We must not migrate cpus during this function.  This can be
 * achieved by one of:
 *    - invoking as target of an x-call in which case we're at XCALL_PIL
 *	The flag value must be first xcall argument.
 *    - disabling kernel preemption.  This should be done for very short
 *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
 *	scrub an extended area with cpu_check_block.  The call for
 *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
 *	brief for this case.
 *    - binding to a cpu, eg with thread_affinity_set().  This is used
 *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
 *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
 */
void
cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
{
	ch_cpu_errors_t	cpu_error_regs;
	uint64_t	ec_err_enable;
	uint64_t	page_offset;

	/* Read AFSR */
	get_cpu_error_state(&cpu_error_regs);

	/*
	 * If no CEEN errors have occurred during the timeout
	 * interval, it is safe to re-enable CEEN and exit.
	 */
	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
		if (flag == TIMEOUT_CEEN_CHECK &&
		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
			set_error_enable(ec_err_enable | EN_REG_CEEN);
		return;
	}

	/*
	 * Ensure that CEEN was not reenabled (maybe by DR) before
	 * we log/clear the error.
	 */
	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);

	/*
	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
	 * timeout will be rescheduled when the error is logged.
	 */
	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
	    cpu_ce_detected(&cpu_error_regs,
		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
	else
	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);

	/*
	 * If the memory scrubber runs while CEEN is
	 * disabled, (or if CEEN is disabled during the
	 * scrub as a result of a CE being triggered by
	 * it), the range being scrubbed will not be
	 * completely cleaned. If there are multiple CEs
	 * in the range at most two of these will be dealt
	 * with, (one by the trap handler and one by the
	 * timeout). It is also possible that none are dealt
	 * with, (CEEN disabled and another CE occurs before
	 * the timeout triggers). So to ensure that the
	 * memory is actually scrubbed, we have to access each
	 * memory location in the range and then check whether
	 * that access causes a CE.
	 */
	if (flag == SCRUBBER_CEEN_CHECK && va) {
		if ((cpu_error_regs.afar >= pa) &&
		    (cpu_error_regs.afar < (pa + psz))) {
			/*
			 * Force a load from physical memory for each
			 * 64-byte block, then check AFSR to determine
			 * whether this access caused an error.
			 *
			 * This is a slow way to do a scrub, but as it will
			 * only be invoked when the memory scrubber actually
			 * triggered a CE, it should not happen too
			 * frequently.
			 *
			 * cut down what we need to check as the scrubber
			 * has verified up to AFAR, so get it's offset
			 * into the page and start there.
			 */
			page_offset = (uint64_t)(cpu_error_regs.afar &
			    (psz - 1));
			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
			psz -= (uint_t)(P2ALIGN(page_offset, 64));
			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
			    psz);
		}
	}

	/*
	 * Reset error enable if this CE is not masked.
	 */
	if ((flag == TIMEOUT_CEEN_CHECK) &&
	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
	    set_error_enable(ec_err_enable | EN_REG_CEEN);

}

/*
 * Attempt a cpu logout for an error that we did not trap for, such
 * as a CE noticed with CEEN off.  It is assumed that we are still running
 * on the cpu that took the error and that we cannot migrate.  Returns
 * 0 on success, otherwise nonzero.
 */
static int
cpu_ce_delayed_ec_logout(uint64_t afar)
{
	ch_cpu_logout_t *clop;

	if (CPU_PRIVATE(CPU) == NULL)
		return (0);

	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
	    LOGOUT_INVALID)
		return (0);

	cpu_delayed_logout(afar, clop);
	return (1);
}

/*
 * We got an error while CEEN was disabled. We
 * need to clean up after it and log whatever
 * information we have on the CE.
 */
void
cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
{
	ch_async_flt_t 	ch_flt;
	struct async_flt *aflt;
	char 		pr_reason[MAX_REASON_STRING];

	bzero(&ch_flt, sizeof (ch_async_flt_t));
	ch_flt.flt_trapped_ce = flag;
	aflt = (struct async_flt *)&ch_flt;
	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
	aflt->flt_addr = cpu_error_regs->afar;
#if defined(SERRANO)
	ch_flt.afar2 = cpu_error_regs->afar2;
#endif	/* SERRANO */
	aflt->flt_pc = NULL;
	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
	aflt->flt_tl = 0;
	aflt->flt_panic = 0;
	cpu_log_and_clear_ce(&ch_flt);

	/*
	 * check if we caused any errors during cleanup
	 */
	if (clear_errors(&ch_flt)) {
		pr_reason[0] = '\0';
		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
		    NULL);
	}
}

/*
 * Log/clear CEEN-controlled disrupting errors
 */
static void
cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt;
	uint64_t afsr, afsr_errs;
	ch_cpu_logout_t *clop;
	char 		pr_reason[MAX_REASON_STRING];
	on_trap_data_t	*otp = curthread->t_ontrap;

	aflt = (struct async_flt *)ch_flt;
	afsr = aflt->flt_stat;
	afsr_errs = ch_flt->afsr_errs;
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_bus_id = getprocessorid();
	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_prot = AFLT_PROT_NONE;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_status = ECC_C_TRAP;

	pr_reason[0] = '\0';
	/*
	 * Get the CPU log out info for Disrupting Trap.
	 */
	if (CPU_PRIVATE(CPU) == NULL) {
		clop = NULL;
		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
	} else {
		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
	}

	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
		ch_cpu_errors_t cpu_error_regs;

		get_cpu_error_state(&cpu_error_regs);
		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
		clop->clo_data.chd_afar = cpu_error_regs.afar;
		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
		clop->clo_sdw_data.chd_afsr_ext =
		    cpu_error_regs.shadow_afsr_ext;
#if defined(SERRANO)
		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
#endif	/* SERRANO */
		ch_flt->flt_data_incomplete = 1;

		/*
		 * The logging/clear code expects AFSR/AFAR to be cleared.
		 * The trap handler does it for CEEN enabled errors
		 * so we need to do it here.
		 */
		set_cpu_error_state(&cpu_error_regs);
	}

#if defined(JALAPENO) || defined(SERRANO)
	/*
	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
	 * For Serrano, even thou we do have the AFAR, we still do the
	 * scrub on the RCE side since that's where the error type can
	 * be properly classified as intermittent, persistent, etc.
	 *
	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
	 * the flt_status bits.
	 */
	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
		cpu_ce_scrub_mem_err(aflt, B_TRUE);
	}
#else /* JALAPENO || SERRANO */
	/*
	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
	 * the flt_status bits.
	 */
	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
			cpu_ce_scrub_mem_err(aflt, B_TRUE);
		}
	}

#endif /* JALAPENO || SERRANO */

	/*
	 * Update flt_prot if this error occurred under on_trap protection.
	 */
	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
		aflt->flt_prot = AFLT_PROT_EC;

	/*
	 * Queue events on the async event queue, one event per error bit.
	 */
	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
		ch_flt->flt_type = CPU_INV_AFSR;
		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
		    aflt->flt_panic);
	}

	/*
	 * Zero out + invalidate CPU logout.
	 */
	if (clop) {
		bzero(clop, sizeof (ch_cpu_logout_t));
		clop->clo_data.chd_afar = LOGOUT_INVALID;
	}

	/*
	 * If either a CPC, WDC or EDC error has occurred while CEEN
	 * was disabled, we need to flush either the entire
	 * E$ or an E$ line.
	 */
#if defined(JALAPENO) || defined(SERRANO)
	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
#else	/* JALAPENO || SERRANO */
	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
#endif	/* JALAPENO || SERRANO */
		cpu_error_ecache_flush(ch_flt);

}

/*
 * depending on the error type, we determine whether we
 * need to flush the entire ecache or just a line.
 */
static int
cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
{
	struct async_flt *aflt;
	uint64_t	afsr;
	uint64_t	afsr_errs = ch_flt->afsr_errs;

	aflt = (struct async_flt *)ch_flt;
	afsr = aflt->flt_stat;

	/*
	 * If we got multiple errors, no point in trying
	 * the individual cases, just flush the whole cache
	 */
	if (afsr & C_AFSR_ME) {
		return (ECACHE_FLUSH_ALL);
	}

	/*
	 * If either a CPC, WDC or EDC error has occurred while CEEN
	 * was disabled, we need to flush entire E$. We can't just
	 * flush the cache line affected as the ME bit
	 * is not set when multiple correctable errors of the same
	 * type occur, so we might have multiple CPC or EDC errors,
	 * with only the first recorded.
	 */
#if defined(JALAPENO) || defined(SERRANO)
	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
#else	/* JALAPENO || SERRANO */
	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
#endif	/* JALAPENO || SERRANO */
		return (ECACHE_FLUSH_ALL);
	}

#if defined(JALAPENO) || defined(SERRANO)
	/*
	 * If only UE or RUE is set, flush the Ecache line, otherwise
	 * flush the entire Ecache.
	 */
	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
			return (ECACHE_FLUSH_LINE);
		} else {
			return (ECACHE_FLUSH_ALL);
		}
	}
#else /* JALAPENO || SERRANO */
	/*
	 * If UE only is set, flush the Ecache line, otherwise
	 * flush the entire Ecache.
	 */
	if (afsr_errs & C_AFSR_UE) {
		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
		    C_AFSR_UE) {
			return (ECACHE_FLUSH_LINE);
		} else {
			return (ECACHE_FLUSH_ALL);
		}
	}
#endif /* JALAPENO || SERRANO */

	/*
	 * EDU: If EDU only is set, flush the ecache line, otherwise
	 * flush the entire Ecache.
	 */
	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
			return (ECACHE_FLUSH_LINE);
		} else {
			return (ECACHE_FLUSH_ALL);
		}
	}

	/*
	 * BERR: If BERR only is set, flush the Ecache line, otherwise
	 * flush the entire Ecache.
	 */
	if (afsr_errs & C_AFSR_BERR) {
		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
			return (ECACHE_FLUSH_LINE);
		} else {
			return (ECACHE_FLUSH_ALL);
		}
	}

	return (0);
}

void
cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
{
	int	ecache_flush_flag =
	    cpu_error_ecache_flush_required(ch_flt);

	/*
	 * Flush Ecache line or entire Ecache based on above checks.
	 */
	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
		cpu_flush_ecache();
	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
		cpu_flush_ecache_line(ch_flt);
	}

}

/*
 * Extract the PA portion from the E$ tag.
 */
uint64_t
cpu_ectag_to_pa(int setsize, uint64_t tag)
{
	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
		return (JG_ECTAG_TO_PA(setsize, tag));
	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
		return (PN_L3TAG_TO_PA(tag));
	else
		return (CH_ECTAG_TO_PA(setsize, tag));
}

/*
 * Convert the E$ tag PA into an E$ subblock index.
 */
static int
cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
{
	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
		/* Panther has only one subblock per line */
		return (0);
	else
		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
}

/*
 * All subblocks in an E$ line must be invalid for
 * the line to be invalid.
 */
int
cpu_ectag_line_invalid(int cachesize, uint64_t tag)
{
	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
		return (PN_L3_LINE_INVALID(tag));
	else
		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
}

/*
 * Extract state bits for a subblock given the tag.  Note that for Panther
 * this works on both l2 and l3 tags.
 */
static int
cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
{
	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
		return (tag & CH_ECSTATE_MASK);
	else
		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
}

/*
 * Cpu specific initialization.
 */
void
cpu_mp_init(void)
{
#ifdef	CHEETAHPLUS_ERRATUM_25
	if (cheetah_sendmondo_recover) {
		cheetah_nudge_init();
	}
#endif
}

void
cpu_ereport_post(struct async_flt *aflt)
{
	char *cpu_type, buf[FM_MAX_CLASS];
	nv_alloc_t *nva = NULL;
	nvlist_t *ereport, *detector, *resource;
	errorq_elem_t *eqep;
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
	char unum[UNUM_NAMLEN];
	int len = 0;
	uint8_t  msg_type;
	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;

	if (aflt->flt_panic || panicstr) {
		eqep = errorq_reserve(ereport_errorq);
		if (eqep == NULL)
			return;
		ereport = errorq_elem_nvl(ereport_errorq, eqep);
		nva = errorq_elem_nva(ereport_errorq, eqep);
	} else {
		ereport = fm_nvlist_create(nva);
	}

	/*
	 * Create the scheme "cpu" FMRI.
	 */
	detector = fm_nvlist_create(nva);
	resource = fm_nvlist_create(nva);
	switch (cpunodes[aflt->flt_inst].implementation) {
	case CHEETAH_IMPL:
		cpu_type = FM_EREPORT_CPU_USIII;
		break;
	case CHEETAH_PLUS_IMPL:
		cpu_type = FM_EREPORT_CPU_USIIIplus;
		break;
	case JALAPENO_IMPL:
		cpu_type = FM_EREPORT_CPU_USIIIi;
		break;
	case SERRANO_IMPL:
		cpu_type = FM_EREPORT_CPU_USIIIiplus;
		break;
	case JAGUAR_IMPL:
		cpu_type = FM_EREPORT_CPU_USIV;
		break;
	case PANTHER_IMPL:
		cpu_type = FM_EREPORT_CPU_USIVplus;
		break;
	default:
		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
		break;
	}
	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
	    aflt->flt_inst, (uint8_t)cpunodes[aflt->flt_inst].version,
	    cpunodes[aflt->flt_inst].device_id);

	/*
	 * Encode all the common data into the ereport.
	 */
	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);

	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
	    detector, NULL);

	/*
	 * Encode the error specific data that was saved in
	 * the async_flt structure into the ereport.
	 */
	cpu_payload_add_aflt(aflt, ereport, resource,
	    &plat_ecc_ch_flt.ecaf_afar_status,
	    &plat_ecc_ch_flt.ecaf_synd_status);

	if (aflt->flt_panic || panicstr) {
		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
	} else {
		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
		fm_nvlist_destroy(ereport, FM_NVA_FREE);
		fm_nvlist_destroy(detector, FM_NVA_FREE);
		fm_nvlist_destroy(resource, FM_NVA_FREE);
	}
	/*
	 * Send the enhanced error information (plat_ecc_error2_data_t)
	 * to the SC olny if it can process it.
	 */

	if (&plat_ecc_capability_sc_get &&
	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
		msg_type = cpu_flt_bit_to_plat_error(aflt);
		if (msg_type != PLAT_ECC_ERROR2_NONE) {
			/*
			 * If afar status is not invalid do a unum lookup.
			 */
			if (plat_ecc_ch_flt.ecaf_afar_status !=
			    AFLT_STAT_INVALID) {
				(void) cpu_get_mem_unum_aflt(
				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
				    unum, UNUM_NAMLEN, &len);
			} else {
				unum[0] = '\0';
			}
			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
			    ch_flt->flt_sdw_afsr_ext;

			if (&plat_log_fruid_error2)
				plat_log_fruid_error2(msg_type, unum, aflt,
				    &plat_ecc_ch_flt);
		}
	}
}

void
cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
{
	int status;
	ddi_fm_error_t de;

	bzero(&de, sizeof (ddi_fm_error_t));

	de.fme_version = DDI_FME_VERSION;
	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
	    FM_ENA_FMT1);
	de.fme_flag = expected;
	de.fme_bus_specific = (void *)aflt->flt_addr;
	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
		aflt->flt_panic = 1;
}

void
cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
    errorq_t *eqp, uint_t flag)
{
	struct async_flt *aflt = (struct async_flt *)payload;

	aflt->flt_erpt_class = error_class;
	errorq_dispatch(eqp, payload, payload_sz, flag);
}

/*
 * This routine may be called by the IO module, but does not do
 * anything in this cpu module. The SERD algorithm is handled by
 * cpumem-diagnosis engine instead.
 */
/*ARGSUSED*/
void
cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
{}

void
adjust_hw_copy_limits(int ecache_size)
{
	/*
	 * Set hw copy limits.
	 *
	 * /etc/system will be parsed later and can override one or more
	 * of these settings.
	 *
	 * At this time, ecache size seems only mildly relevant.
	 * We seem to run into issues with the d-cache and stalls
	 * we see on misses.
	 *
	 * Cycle measurement indicates that 2 byte aligned copies fare
	 * little better than doing things with VIS at around 512 bytes.
	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
	 * aligned is faster whenever the source and destination data
	 * in cache and the total size is less than 2 Kbytes.  The 2K
	 * limit seems to be driven by the 2K write cache.
	 * When more than 2K of copies are done in non-VIS mode, stores
	 * backup in the write cache.  In VIS mode, the write cache is
	 * bypassed, allowing faster cache-line writes aligned on cache
	 * boundaries.
	 *
	 * In addition, in non-VIS mode, there is no prefetching, so
	 * for larger copies, the advantage of prefetching to avoid even
	 * occasional cache misses is enough to justify using the VIS code.
	 *
	 * During testing, it was discovered that netbench ran 3% slower
	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
	 * applications, data is only used once (copied to the output
	 * buffer, then copied by the network device off the system).  Using
	 * the VIS copy saves more L2 cache state.  Network copies are
	 * around 1.3K to 1.5K in size for historical reasons.
	 *
	 * Therefore, a limit of 1K bytes will be used for the 8 byte
	 * aligned copy even for large caches and 8 MB ecache.  The
	 * infrastructure to allow different limits for different sized
	 * caches is kept to allow further tuning in later releases.
	 */

	if (min_ecache_size == 0 && use_hw_bcopy) {
		/*
		 * First time through - should be before /etc/system
		 * is read.
		 * Could skip the checks for zero but this lets us
		 * preserve any debugger rewrites.
		 */
		if (hw_copy_limit_1 == 0) {
			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
			priv_hcl_1 = hw_copy_limit_1;
		}
		if (hw_copy_limit_2 == 0) {
			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
			priv_hcl_2 = hw_copy_limit_2;
		}
		if (hw_copy_limit_4 == 0) {
			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
			priv_hcl_4 = hw_copy_limit_4;
		}
		if (hw_copy_limit_8 == 0) {
			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
			priv_hcl_8 = hw_copy_limit_8;
		}
		min_ecache_size = ecache_size;
	} else {
		/*
		 * MP initialization. Called *after* /etc/system has
		 * been parsed. One CPU has already been initialized.
		 * Need to cater for /etc/system having scragged one
		 * of our values.
		 */
		if (ecache_size == min_ecache_size) {
			/*
			 * Same size ecache. We do nothing unless we
			 * have a pessimistic ecache setting. In that
			 * case we become more optimistic (if the cache is
			 * large enough).
			 */
			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
				/*
				 * Need to adjust hw_copy_limit* from our
				 * pessimistic uniprocessor value to a more
				 * optimistic UP value *iff* it hasn't been
				 * reset.
				 */
				if ((ecache_size > 1048576) &&
				    (priv_hcl_8 == hw_copy_limit_8)) {
					if (ecache_size <= 2097152)
						hw_copy_limit_8 = 4 *
						    VIS_COPY_THRESHOLD;
					else if (ecache_size <= 4194304)
						hw_copy_limit_8 = 4 *
						    VIS_COPY_THRESHOLD;
					else
						hw_copy_limit_8 = 4 *
						    VIS_COPY_THRESHOLD;
					priv_hcl_8 = hw_copy_limit_8;
				}
			}
		} else if (ecache_size < min_ecache_size) {
			/*
			 * A different ecache size. Can this even happen?
			 */
			if (priv_hcl_8 == hw_copy_limit_8) {
				/*
				 * The previous value that we set
				 * is unchanged (i.e., it hasn't been
				 * scragged by /etc/system). Rewrite it.
				 */
				if (ecache_size <= 1048576)
					hw_copy_limit_8 = 8 *
					    VIS_COPY_THRESHOLD;
				else if (ecache_size <= 2097152)
					hw_copy_limit_8 = 8 *
					    VIS_COPY_THRESHOLD;
				else if (ecache_size <= 4194304)
					hw_copy_limit_8 = 8 *
					    VIS_COPY_THRESHOLD;
				else
					hw_copy_limit_8 = 10 *
					    VIS_COPY_THRESHOLD;
				priv_hcl_8 = hw_copy_limit_8;
				min_ecache_size = ecache_size;
			}
		}
	}
}

/*
 * Called from illegal instruction trap handler to see if we can attribute
 * the trap to a fpras check.
 */
int
fpras_chktrap(struct regs *rp)
{
	int op;
	struct fpras_chkfngrp *cgp;
	uintptr_t tpc = (uintptr_t)rp->r_pc;

	if (fpras_chkfngrps == NULL)
		return (0);

	cgp = &fpras_chkfngrps[CPU->cpu_id];
	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
			break;
	}
	if (op == FPRAS_NCOPYOPS)
		return (0);

	/*
	 * This is an fpRAS failure caught through an illegal
	 * instruction - trampoline.
	 */
	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
	rp->r_npc = rp->r_pc + 4;
	return (1);
}

/*
 * fpras_failure is called when a fpras check detects a bad calculation
 * result or an illegal instruction trap is attributed to an fpras
 * check.  In all cases we are still bound to CPU.
 */
int
fpras_failure(int op, int how)
{
	int use_hw_bcopy_orig, use_hw_bzero_orig;
	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
	ch_async_flt_t ch_flt;
	struct async_flt *aflt = (struct async_flt *)&ch_flt;
	struct fpras_chkfn *sfp, *cfp;
	uint32_t *sip, *cip;
	int i;

	/*
	 * We're running on a sick CPU.  Avoid further FPU use at least for
	 * the time in which we dispatch an ereport and (if applicable) panic.
	 */
	use_hw_bcopy_orig = use_hw_bcopy;
	use_hw_bzero_orig = use_hw_bzero;
	hcl1_orig = hw_copy_limit_1;
	hcl2_orig = hw_copy_limit_2;
	hcl4_orig = hw_copy_limit_4;
	hcl8_orig = hw_copy_limit_8;
	use_hw_bcopy = use_hw_bzero = 0;
	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
	    hw_copy_limit_8 = 0;

	bzero(&ch_flt, sizeof (ch_async_flt_t));
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_class = CPU_FAULT;
	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_status = (how << 8) | op;
	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
	ch_flt.flt_type = CPU_FPUERR;

	/*
	 * We must panic if the copy operation had no lofault protection -
	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
	 */
	aflt->flt_panic = (curthread->t_lofault == NULL);

	/*
	 * XOR the source instruction block with the copied instruction
	 * block - this will show us which bit(s) are corrupted.
	 */
	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
		sip = &sfp->fpras_blk0[0];
		cip = &cfp->fpras_blk0[0];
	} else {
		sip = &sfp->fpras_blk1[0];
		cip = &cfp->fpras_blk1[0];
	}
	for (i = 0; i < 16; ++i, ++sip, ++cip)
		ch_flt.flt_fpdata[i] = *sip ^ *cip;

	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);

	if (aflt->flt_panic)
		fm_panic("FPU failure on CPU %d", CPU->cpu_id);

	/*
	 * We get here for copyin/copyout and kcopy or bcopy where the
	 * caller has used on_fault.  We will flag the error so that
	 * the process may be killed  The trap_async_hwerr mechanism will
	 * take appropriate further action (such as a reboot, contract
	 * notification etc).  Since we may be continuing we will
	 * restore the global hardware copy acceleration switches.
	 *
	 * When we return from this function to the copy function we want to
	 * avoid potentially bad data being used, ie we want the affected
	 * copy function to return an error.  The caller should therefore
	 * invoke its lofault handler (which always exists for these functions)
	 * which will return the appropriate error.
	 */
	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
	aston(curthread);

	use_hw_bcopy = use_hw_bcopy_orig;
	use_hw_bzero = use_hw_bzero_orig;
	hw_copy_limit_1 = hcl1_orig;
	hw_copy_limit_2 = hcl2_orig;
	hw_copy_limit_4 = hcl4_orig;
	hw_copy_limit_8 = hcl8_orig;

	return (1);
}

#define	VIS_BLOCKSIZE		64

int
dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
{
	int ret, watched;

	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
	ret = dtrace_blksuword32(addr, data, 0);
	if (watched)
		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);

	return (ret);
}

/*
 * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
 * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
 * CEEN from the EER to disable traps for further disrupting error types
 * on that cpu.  We could cross-call instead, but that has a larger
 * instruction and data footprint than cross-trapping, and the cpu is known
 * to be faulted.
 */

void
cpu_faulted_enter(struct cpu *cp)
{
	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
}

/*
 * Called when a cpu leaves the CPU_FAULTED state to return to one of
 * offline, spare, or online (by the cpu requesting this state change).
 * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
 * disrupting error bits that have accumulated without trapping, then
 * we cross-trap to re-enable CEEN controlled traps.
 */
void
cpu_faulted_exit(struct cpu *cp)
{
	ch_cpu_errors_t cpu_error_regs;

	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
	    (uint64_t)&cpu_error_regs, 0);

	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
}

/*
 * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
 * the errors in the original AFSR, 0 otherwise.
 *
 * For all procs if the initial error was a BERR or TO, then it is possible
 * that we may have caused a secondary BERR or TO in the process of logging the
 * inital error via cpu_run_bus_error_handlers().  If this is the case then
 * if the request was protected then a panic is still not necessary, if not
 * protected then aft_panic is already set - so either way there's no need
 * to set aft_panic for the secondary error.
 *
 * For Cheetah and Jalapeno, if the original error was a UE which occurred on
 * a store merge, then the error handling code will call cpu_deferred_error().
 * When clear_errors() is called, it will determine that secondary errors have
 * occurred - in particular, the store merge also caused a EDU and WDU that
 * weren't discovered until this point.
 *
 * We do three checks to verify that we are in this case.  If we pass all three
 * checks, we return 1 to indicate that we should not panic.  If any unexpected
 * errors occur, we return 0.
 *
 * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
 * handled in cpu_disrupting_errors().  Since this function is not even called
 * in the case we are interested in, we just return 0 for these processors.
 */
/*ARGSUSED*/
static int
cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
    uint64_t t_afar)
{
#if defined(CHEETAH_PLUS)
#else	/* CHEETAH_PLUS */
	struct async_flt *aflt = (struct async_flt *)ch_flt;
#endif	/* CHEETAH_PLUS */

	/*
	 * Was the original error a BERR or TO and only a BERR or TO
	 * (multiple errors are also OK)
	 */
	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
		/*
		 * Is the new error a BERR or TO and only a BERR or TO
		 * (multiple errors are also OK)
		 */
		if ((ch_flt->afsr_errs &
		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
			return (1);
	}

#if defined(CHEETAH_PLUS)
	return (0);
#else	/* CHEETAH_PLUS */
	/*
	 * Now look for secondary effects of a UE on cheetah/jalapeno
	 *
	 * Check the original error was a UE, and only a UE.  Note that
	 * the ME bit will cause us to fail this check.
	 */
	if (t_afsr_errs != C_AFSR_UE)
		return (0);

	/*
	 * Check the secondary errors were exclusively an EDU and/or WDU.
	 */
	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
		return (0);

	/*
	 * Check the AFAR of the original error and secondary errors
	 * match to the 64-byte boundary
	 */
	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
		return (0);

	/*
	 * We've passed all the checks, so it's a secondary error!
	 */
	return (1);
#endif	/* CHEETAH_PLUS */
}

/*
 * Translate the flt_bit or flt_type into an error type.  First, flt_bit
 * is checked for any valid errors.  If found, the error type is
 * returned. If not found, the flt_type is checked for L1$ parity errors.
 */
/*ARGSUSED*/
static uint8_t
cpu_flt_bit_to_plat_error(struct async_flt *aflt)
{
#if defined(JALAPENO)
	/*
	 * Currently, logging errors to the SC is not supported on Jalapeno
	 */
	return (PLAT_ECC_ERROR2_NONE);
#else
	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;

	switch (ch_flt->flt_bit) {
	case C_AFSR_CE:
		return (PLAT_ECC_ERROR2_CE);
	case C_AFSR_UCC:
	case C_AFSR_EDC:
	case C_AFSR_WDC:
	case C_AFSR_CPC:
		return (PLAT_ECC_ERROR2_L2_CE);
	case C_AFSR_EMC:
		return (PLAT_ECC_ERROR2_EMC);
	case C_AFSR_IVC:
		return (PLAT_ECC_ERROR2_IVC);
	case C_AFSR_UE:
		return (PLAT_ECC_ERROR2_UE);
	case C_AFSR_UCU:
	case C_AFSR_EDU:
	case C_AFSR_WDU:
	case C_AFSR_CPU:
		return (PLAT_ECC_ERROR2_L2_UE);
	case C_AFSR_IVU:
		return (PLAT_ECC_ERROR2_IVU);
	case C_AFSR_TO:
		return (PLAT_ECC_ERROR2_TO);
	case C_AFSR_BERR:
		return (PLAT_ECC_ERROR2_BERR);
#if defined(CHEETAH_PLUS)
	case C_AFSR_L3_EDC:
	case C_AFSR_L3_UCC:
	case C_AFSR_L3_CPC:
	case C_AFSR_L3_WDC:
		return (PLAT_ECC_ERROR2_L3_CE);
	case C_AFSR_IMC:
		return (PLAT_ECC_ERROR2_IMC);
	case C_AFSR_TSCE:
		return (PLAT_ECC_ERROR2_L2_TSCE);
	case C_AFSR_THCE:
		return (PLAT_ECC_ERROR2_L2_THCE);
	case C_AFSR_L3_MECC:
		return (PLAT_ECC_ERROR2_L3_MECC);
	case C_AFSR_L3_THCE:
		return (PLAT_ECC_ERROR2_L3_THCE);
	case C_AFSR_L3_CPU:
	case C_AFSR_L3_EDU:
	case C_AFSR_L3_UCU:
	case C_AFSR_L3_WDU:
		return (PLAT_ECC_ERROR2_L3_UE);
	case C_AFSR_DUE:
		return (PLAT_ECC_ERROR2_DUE);
	case C_AFSR_DTO:
		return (PLAT_ECC_ERROR2_DTO);
	case C_AFSR_DBERR:
		return (PLAT_ECC_ERROR2_DBERR);
#endif	/* CHEETAH_PLUS */
	default:
		switch (ch_flt->flt_type) {
#if defined(CPU_IMP_L1_CACHE_PARITY)
		case CPU_IC_PARITY:
			return (PLAT_ECC_ERROR2_IPE);
		case CPU_DC_PARITY:
			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
				if (ch_flt->parity_data.dpe.cpl_cache ==
				    CPU_PC_PARITY) {
					return (PLAT_ECC_ERROR2_PCACHE);
				}
			}
			return (PLAT_ECC_ERROR2_DPE);
#endif /* CPU_IMP_L1_CACHE_PARITY */
		case CPU_ITLB_PARITY:
			return (PLAT_ECC_ERROR2_ITLB);
		case CPU_DTLB_PARITY:
			return (PLAT_ECC_ERROR2_DTLB);
		default:
			return (PLAT_ECC_ERROR2_NONE);
		}
	}
#endif	/* JALAPENO */
}