/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * eversholt rules for generic-sparc sparc cpu errors. * * Most propagations are generated by preprocessor macros. The event * declarations are deliberately not part of the propagation macros * so that we know we have full coverage - propagations defined without * events, or events not used in propagations, will produce compiler * whinges. */ #define DIAGNOSE_ERPT (payloadprop_defined("diagnose") && \ payloadprop("diagnose") != 0x0) #define SET_SERDT (!payloadprop_defined("serd_t") || \ setserdt(payloadprop("serd_t"))) #define SET_SERDN (!payloadprop_defined("serd_n") || \ setserdn(payloadprop("serd_n"))) #define SET_RATIO \ ((payloadprop_defined("filter_ratio") && \ payloadprop("filter_ratio") != 0x0) ? \ (setserdincrement(payloadprop("filter_ratio"))) : 1) /* * The ereport and fault events are declared at multiple levels: * some set of @chip, @core, and @strand resources since this is * a generic DE and must be flexible and handle present and future * sun4v platforms. For example, one processor may have an L2 * cache per chip, another may have an L2 per core. * * For UE errors, faults are produced immediately. * * For CE errors, the errors are put through a SERD engine. If * the SERD engine trips, the fault is produced. SERD engine * names are of the format: * serd.cpu.generic-sparc. * Ex: serd.cpu.generic-sparc.chipitlb * SERD N/T values are set to default values, but can be * overridden via the ereport or the eft.conf file. The * order or precedence of the SERD N/T values is: * - the 'serd_override' tunable via eft.conf * - the 'serd_n' and 'serd_t' payload members in the * incoming ereport * - the built-in default values * * The increment rate of the SERD engines can also be * controlled via the ereport payload using the 'filter_ratio' * payload member. N in incremented by the value of * 'filter_ratio' if the payload member is present, 1 otherwise. */ /* * Ereport event for cpu errors */ #define ERPT_EVENT(level, leafclass) \ event ereport.cpu.generic-sparc.leafclass@level { within(1s) } /* * Ereports for uncorrectable cpu errors */ ERPT_EVENT(chip, itlb-uc); ERPT_EVENT(core, itlb-uc); ERPT_EVENT(strand, itlb-uc); ERPT_EVENT(chip, dtlb-uc); ERPT_EVENT(core, dtlb-uc); ERPT_EVENT(strand, dtlb-uc); ERPT_EVENT(chip, icache-uc); ERPT_EVENT(core, icache-uc); ERPT_EVENT(chip, dcache-uc); ERPT_EVENT(core, dcache-uc); ERPT_EVENT(chip, ireg-uc); ERPT_EVENT(core, ireg-uc); ERPT_EVENT(strand, ireg-uc); ERPT_EVENT(chip, freg-uc); ERPT_EVENT(core, freg-uc); ERPT_EVENT(strand, freg-uc); ERPT_EVENT(chip, mreg-uc); ERPT_EVENT(core, mreg-uc); ERPT_EVENT(strand, mreg-uc); ERPT_EVENT(chip, l2data-uc); ERPT_EVENT(core, l2data-uc); ERPT_EVENT(chip, l2tagctl-uc); ERPT_EVENT(core, l2tagctl-uc); ERPT_EVENT(chip, l3data-uc); ERPT_EVENT(core, l3data-uc); ERPT_EVENT(chip, l3tagctl-uc); ERPT_EVENT(core, l3tagctl-uc); ERPT_EVENT(chip, int-mem-ue); ERPT_EVENT(core, int-mem-ue); ERPT_EVENT(strand, int-mem-ue); ERPT_EVENT(chip, gchip-uc); ERPT_EVENT(core, gcore-uc); ERPT_EVENT(strand, gstrand-uc); /* * Propagations for CPU UE errors * A fault is produced immediately for a CPU UE errors. */ #define FLT_CPU_UE(level, erptleaf, fltleaf) \ event fault.cpu.generic-sparc.fltleaf@level; \ \ prop fault.cpu.generic-sparc.fltleaf@level \ { DIAGNOSE_ERPT } (0)-> \ ereport.cpu.generic-sparc.erptleaf@level; \ \ event upset.cpu.generic-sparc.fltleaf@level; \ \ prop upset.cpu.generic-sparc.fltleaf@level \ { !DIAGNOSE_ERPT } (0)-> \ ereport.cpu.generic-sparc.erptleaf@level FLT_CPU_UE(chip, itlb-uc, chip-uc); FLT_CPU_UE(core, itlb-uc, core-uc); FLT_CPU_UE(strand, itlb-uc, strand-uc); FLT_CPU_UE(chip, dtlb-uc, chip-uc); FLT_CPU_UE(core, dtlb-uc, core-uc); FLT_CPU_UE(strand, dtlb-uc, strand-uc); FLT_CPU_UE(chip, icache-uc, chip-uc); FLT_CPU_UE(core, icache-uc, core-uc); FLT_CPU_UE(chip, dcache-uc, chip-uc); FLT_CPU_UE(core, dcache-uc, core-uc); FLT_CPU_UE(chip, ireg-uc, chip-uc); FLT_CPU_UE(core, ireg-uc, core-uc); FLT_CPU_UE(strand, ireg-uc, strand-uc); FLT_CPU_UE(chip, mreg-uc, chip-uc); FLT_CPU_UE(core, mreg-uc, core-uc); FLT_CPU_UE(strand, mreg-uc, strand-uc); FLT_CPU_UE(chip, freg-uc, chip-uc); FLT_CPU_UE(core, freg-uc, core-uc); FLT_CPU_UE(strand, freg-uc, strand-uc); FLT_CPU_UE(chip, l2data-uc, chip-uc); FLT_CPU_UE(core, l2data-uc, core-uc); FLT_CPU_UE(chip, l2tagctl-uc, chip-uc); FLT_CPU_UE(core, l2tagctl-uc, core-uc); FLT_CPU_UE(chip, l3data-uc, chip-uc); FLT_CPU_UE(core, l3data-uc, core-uc); FLT_CPU_UE(chip, l3tagctl-uc, chip-uc); FLT_CPU_UE(core, l3tagctl-uc, core-uc); FLT_CPU_UE(chip, gchip-uc, chip-uc); FLT_CPU_UE(core, gcore-uc, core-uc); FLT_CPU_UE(strand, gstrand-uc, strand-uc); #define FLT_CPU_UE_UNRETIRED(level, erptleaf, fltleaf) \ event fault.cpu.generic-sparc.fltleaf@level, \ retire=0; \ \ prop fault.cpu.generic-sparc.fltleaf@level \ { DIAGNOSE_ERPT } (0)-> \ ereport.cpu.generic-sparc.erptleaf@level; \ \ event upset.cpu.generic-sparc.fltleaf@level; \ \ prop upset.cpu.generic-sparc.fltleaf@level \ { !DIAGNOSE_ERPT } (0)-> \ ereport.cpu.generic-sparc.erptleaf@level FLT_CPU_UE_UNRETIRED(chip, int-mem-ue, chip-uc-nr); FLT_CPU_UE_UNRETIRED(core, int-mem-ue, core-uc-nr); FLT_CPU_UE_UNRETIRED(strand, int-mem-ue, strand-uc-nr); /* * Ereport events for corectable errors. */ ERPT_EVENT(chip, itlb); ERPT_EVENT(core, itlb); ERPT_EVENT(strand, itlb); ERPT_EVENT(chip, dtlb); ERPT_EVENT(core, dtlb); ERPT_EVENT(strand, dtlb); ERPT_EVENT(chip, icache); ERPT_EVENT(core, icache); ERPT_EVENT(chip, dcache); ERPT_EVENT(core, dcache); ERPT_EVENT(chip, ireg); ERPT_EVENT(core, ireg); ERPT_EVENT(strand, ireg); ERPT_EVENT(chip, freg); ERPT_EVENT(core, freg); ERPT_EVENT(strand, freg); ERPT_EVENT(chip, mreg); ERPT_EVENT(core, mreg); ERPT_EVENT(strand, mreg); ERPT_EVENT(chip, l2data); ERPT_EVENT(core, l2data); ERPT_EVENT(chip, l2tagctl); ERPT_EVENT(core, l2tagctl); ERPT_EVENT(chip, l3data); ERPT_EVENT(core, l3data); ERPT_EVENT(chip, l3tagctl); ERPT_EVENT(core, l3tagctl); ERPT_EVENT(chip, int-mem); ERPT_EVENT(core, int-mem); ERPT_EVENT(strand, int-mem); ERPT_EVENT(chip, gchip); ERPT_EVENT(core, gcore); ERPT_EVENT(strand, gstrand); /* * Propagations for CE errors * Errors are serded and fault is generated when the SERD engine trips * The serd name & the N & T values are set at the running time. */ engine serd.cpu.generic-sparc.core@core, N=8, T=1week; engine serd.cpu.generic-sparc.strand@strand, N=8, T=1week; #define FLT_CPU_CE(erptleaf, level, fltleaf, n, t) \ \ /* Simple fault event */ \ event fault.cpu.generic-sparc.fltleaf@level, \ engine=serd.cpu.generic-sparc.fltleaf@level; \ \ /* When the correctable engine trips, diagnose a fault */ \ prop fault.cpu.generic-sparc.fltleaf@level \ { DIAGNOSE_ERPT && setserdsuffix("erptleaf") && \ setserdn(n) && setserdt(t) && SET_SERDN && \ SET_SERDT && SET_RATIO } (0) -> \ ereport.cpu.generic-sparc.erptleaf@level; \ \ event upset.cpu.generic-sparc.fltleaf@level; \ \ prop upset.cpu.generic-sparc.fltleaf@level \ { !DIAGNOSE_ERPT } (0) -> \ ereport.cpu.generic-sparc.erptleaf@level FLT_CPU_CE(itlb, core, core, 8, 1week); FLT_CPU_CE(itlb, strand, strand, 8, 1week); FLT_CPU_CE(dtlb, core, core, 8, 1week); FLT_CPU_CE(dtlb, strand, strand, 8, 1week); FLT_CPU_CE(icache, core, core, 8, 1week); FLT_CPU_CE(dcache, core, core, 8, 1week); FLT_CPU_CE(ireg, core, core, 8, 1week); FLT_CPU_CE(ireg, strand, strand, 8, 1week); FLT_CPU_CE(freg, core, core, 8, 1week); FLT_CPU_CE(freg, strand, strand, 8, 1week); FLT_CPU_CE(mreg, core, core, 8, 1week); FLT_CPU_CE(mreg, strand, strand, 8, 1week); FLT_CPU_CE(l2data, core, core, 8, 1week); FLT_CPU_CE(l2tagctl, core, core, 8, 1week); FLT_CPU_CE(l3data, core, core, 8, 1week); FLT_CPU_CE(l3tagctl, core, core, 8, 1week); FLT_CPU_CE(gcore, core, core, 8, 1week); FLT_CPU_CE(gstrand, strand, strand, 8, 1week); engine serd.cpu.generic-sparc.chip-nr@chip, N=8, T=1week; engine serd.cpu.generic-sparc.core-nr@core, N=8, T=1week; engine serd.cpu.generic-sparc.strand-nr@strand, N=8, T=1week; #define FLT_CPU_CE_UNRETIRED(erptleaf, level, fltleaf, n, t) \ \ /* Simple fault event */ \ event fault.cpu.generic-sparc.fltleaf@level, \ retire=0, \ engine=serd.cpu.generic-sparc.fltleaf@level; \ \ /* When the correctable engine trips, diagnose a fault */ \ prop fault.cpu.generic-sparc.fltleaf@level \ { DIAGNOSE_ERPT && setserdsuffix("erptleaf") && setserdn(n) && \ setserdt(t) && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> \ ereport.cpu.generic-sparc.erptleaf@level; \ \ event upset.fault.cpu.generic-sparc.fltleaf@level; \ \ prop upset.fault.cpu.generic-sparc.fltleaf@level \ { !DIAGNOSE_ERPT } (0)-> \ ereport.cpu.generic-sparc.erptleaf@level FLT_CPU_CE_UNRETIRED(itlb, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(dtlb, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(icache, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(dcache, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(ireg, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(freg, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(mreg, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(l2data, chip, chip-nr, 8, 2h); FLT_CPU_CE_UNRETIRED(l2tagctl, chip, chip-nr, 8, 2h); FLT_CPU_CE_UNRETIRED(l3data, chip, chip-nr, 8, 2h); FLT_CPU_CE_UNRETIRED(l3tagctl, chip, chip-nr, 8, 2h); FLT_CPU_CE_UNRETIRED(gchip, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(int-mem, chip, chip-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(int-mem, core, core-nr, 8, 1week); FLT_CPU_CE_UNRETIRED(int-mem, strand, strand-nr, 8, 1week); /* * c2c-link-uc, c2c-prot-uc, c2c-failover errors * The detector and sender are faulted immediately. * If ereport does not have a sender, all chips are faulted */ #define CONTAINS_SENDER (payloadprop_contains("sender", asru(chip))) #define HAS_SENDER (payloadprop_defined("sender")) #define HAS_C2C_FAULT(c) has_fault(c, "fault.cpu.generic-sparc.c2c") #define CONTAINS_DET (payloadprop_contains("detector", asru(chip))) event ereport.cpu.generic-sparc.c2c-link-uc@chip { within(1s) }; event ereport.cpu.generic-sparc.c2c-prot-uc@chip { within(1s) }; event ereport.cpu.generic-sparc.c2c-failover@chip { within(1s) }; event fault.cpu.generic-sparc.c2c-uc@chip, retire=0; event fault.cpu.generic-sparc.c2c-failover@chip, retire=0; event upset.cpu.generic-sparc.c2c-uc@chip; event upset.cpu.generic-sparc.c2c-failover@chip; prop fault.cpu.generic-sparc.c2c-uc@chip { DIAGNOSE_ERPT && CONTAINS_SENDER } (0) -> ereport.cpu.generic-sparc.c2c-link-uc@chip; prop fault.cpu.generic-sparc.c2c-uc@chip { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.c2c-link-uc@chip; prop fault.cpu.generic-sparc.c2c-uc@chip { DIAGNOSE_ERPT && !HAS_SENDER } (0) -> ereport.cpu.generic-sparc.c2c-link-uc@chip; prop fault.cpu.generic-sparc.c2c-uc@chip { DIAGNOSE_ERPT && CONTAINS_SENDER } (0) -> ereport.cpu.generic-sparc.c2c-prot-uc@chip; prop fault.cpu.generic-sparc.c2c-uc@chip { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.c2c-prot-uc@chip; prop fault.cpu.generic-sparc.c2c-uc@chip { DIAGNOSE_ERPT && !HAS_SENDER } (0) -> ereport.cpu.generic-sparc.c2c-prot-uc@chip; prop upset.cpu.generic-sparc.c2c-uc@chip { !DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.c2c-prot-uc@chip, ereport.cpu.generic-sparc.c2c-link-uc@chip; prop fault.cpu.generic-sparc.c2c-failover@chip { DIAGNOSE_ERPT && CONTAINS_SENDER } (0) -> ereport.cpu.generic-sparc.c2c-failover@chip; prop fault.cpu.generic-sparc.c2c-failover@chip { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.c2c-failover@chip; prop fault.cpu.generic-sparc.c2c-failover@chip { DIAGNOSE_ERPT && !HAS_SENDER } (0) -> ereport.cpu.generic-sparc.c2c-failover@chip; prop upset.cpu.generic-sparc.c2c-failover@chip { !DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.c2c-failover@chip; /* * c2c-link, c2c-prot. Errors are serded. When the serd trips, * the detector & sender will be faulted. * If ereport does not have a sender, all chips will be faulted * If ereport sender or detector was faulted, discard ereport */ event ereport.cpu.generic-sparc.c2c-link@chip { within(1s) }; event ereport.cpu.generic-sparc.c2c-prot@chip { within(1s) }; engine serd.cpu.generic-sparc.c2c@chip, N=120, T=30min; event fault.cpu.generic-sparc.c2c@chip, retire=0, engine=serd.cpu.generic-sparc.c2c@chip; event upset.cpu.generic-sparc.c2c-link@chip; event upset.cpu.generic-sparc.c2c_discard@chip; prop fault.cpu.generic-sparc.c2c@chip { DIAGNOSE_ERPT && CONTAINS_SENDER && !HAS_C2C_FAULT(asru(chip)) && !HAS_C2C_FAULT(payloadprop("detector")) && setserdsuffix("link") && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> ereport.cpu.generic-sparc.c2c-link@chip; prop fault.cpu.generic-sparc.c2c@chip { DIAGNOSE_ERPT && !HAS_C2C_FAULT(asru(chip)) && !HAS_C2C_FAULT(payloadprop("sender")) && setserdsuffix("link") && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> ereport.cpu.generic-sparc.c2c-link@chip; prop fault.cpu.generic-sparc.c2c@chip { DIAGNOSE_ERPT && !HAS_SENDER && setserdsuffix("link") && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> ereport.cpu.generic-sparc.c2c-link@chip; prop fault.cpu.generic-sparc.c2c@chip { DIAGNOSE_ERPT && CONTAINS_SENDER && !HAS_C2C_FAULT(asru(chip)) && !HAS_C2C_FAULT(payloadprop("detector")) && setserdsuffix("prot") && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> ereport.cpu.generic-sparc.c2c-prot@chip; prop fault.cpu.generic-sparc.c2c@chip { DIAGNOSE_ERPT && !HAS_C2C_FAULT(asru(chip)) && !HAS_C2C_FAULT(payloadprop("sender")) && setserdsuffix("prot") && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> ereport.cpu.generic-sparc.c2c-prot@chip; prop fault.cpu.generic-sparc.c2c@chip { DIAGNOSE_ERPT && !HAS_SENDER && setserdsuffix("prot") && SET_SERDN && SET_SERDT && SET_RATIO } (0) -> ereport.cpu.generic-sparc.c2c-prot@chip; prop upset.cpu.generic-sparc.c2c-link@chip { !DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.c2c-prot@chip, ereport.cpu.generic-sparc.c2c-link@chip; prop upset.cpu.generic-sparc.c2c_discard@chip (0) -> ereport.cpu.generic-sparc.c2c-link@chip, ereport.cpu.generic-sparc.c2c-prot@chip; /* * addr-oob is a firmware error - there is no associated FRU/ASRU * and firmware is not represented in topology. Rather than ignore * the error, the DE generates a defect with no FRU or ASRU. It * is generated @chassis so no location (label) is picked up out * of the topology. The associated knowledge article can instruct * users what steps to take to address the error. */ fru NULL; asru NULL; event ereport.cpu.generic-sparc.addr-oob@chassis; event defect.fw.generic-sparc.addr-oob@chassis, ASRU=NULL, FRU=NULL; event upset.fw.generic-sparc.addr-oob@chassis; prop defect.fw.generic-sparc.addr-oob@chassis { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.addr-oob@chassis; prop upset.fw.generic-sparc.addr-oob@chassis { !DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.addr-oob@chassis; event ereport.cpu.generic-sparc.inconsistent@chassis; event defect.fw.generic-sparc.erpt-gen@chassis, ASRU=NULL, FRU=NULL; event upset.fw.generic-sparc.erpt-gen@chassis; prop defect.fw.generic-sparc.erpt-gen@chassis { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.inconsistent@chassis; prop upset.fw.generic-sparc.erpt-gen@chassis { !DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.inconsistent@chassis; /* * bootbus-prot, bootbus-to and bootbus-par errors. Fault the detector. */ event ereport.cpu.generic-sparc.bootbus-to@chip; event ereport.cpu.generic-sparc.bootbus-par@chip; event ereport.cpu.generic-sparc.bootbus-prot@chip; event upset.cpu.generic-sparc.bootbus@chip; event fault.cpu.generic-sparc.bootbus@chip, retire=0; prop fault.cpu.generic-sparc.bootbus@chip { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.bootbus-to@chip; prop fault.cpu.generic-sparc.bootbus@chip { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.bootbus-par@chip; prop fault.cpu.generic-sparc.bootbus@chip { DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.bootbus-prot@chip; prop upset.cpu.generic-sparc.bootbus@chip { !DIAGNOSE_ERPT } (0) -> ereport.cpu.generic-sparc.bootbus-to@chip, ereport.cpu.generic-sparc.bootbus-par@chip, ereport.cpu.generic-sparc.bootbus-prot@chip; /* * ignore the pio-read error. */ event ereport.cpu.generic-sparc.pio-read@chip; event upset.cpu.generic-sparc.discard@chip; prop upset.cpu.generic-sparc.discard@chip (0) -> ereport.cpu.generic-sparc.pio-read@chip;