1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29#pragma dictionary "AMD" 30 31/* 32 * Eversholt rules for the AMD Opteron CPU/Memory 33 */ 34 35fru motherboard; 36fru chip; 37fru dimm; 38 39asru chip/cpu; 40asru dimm; 41asru dimm/rank; 42asru dram-channel; 43asru chip/memory-controller/chip-select; 44 45#define MAX(x, y) ((x) >= (y) ? (x) : (y)) 46#define MIN(x, y) ((x) <= (y) ? (x) : (y)) 47 48/* 49 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that 50 * we diagnose for page faults, to record the physical address of the faulting 51 * page. The "asru-" prefix is hooked in the "rewrite-ASRU" confcalls made on 52 * diagnosis of associated faults when the libtopo mem scheme rewrites the 53 * asru in "mem" scheme. 54 */ 55#define SET_ADDR (setpayloadprop("asru-physaddr", payloadprop("addr"))) 56 57#define SET_OFFSET (setpayloadprop("asru-offset", \ 58 payloadprop("resource[0].hc-specific.offset"))) 59 60/* 61 * RESOURCE_EXISTS is true if a member with name "resource" exists in the 62 * payload - regardless of type (e.g., nvlist or nvlist array) or value. 63 */ 64#define RESOURCE_EXISTS (payloadprop_defined("resource")) 65 66/* 67 * CONTAINS_RANK is true if the "resource" nvlist array (as used in memory 68 * ereports) exists and one if its members matches the path for the 69 * rank node. Our memory propogation are of the form 70 * 71 * "prop foo@chip/memory-controller/dimm/rank -> blah@chip/cpu" 72 * 73 * since cpus detect memory errors; in eversholt such a propogation, where 74 * the lhs path and rhs path do not match, expands to the cross-product of 75 * all dimms, ranks and cpus on the same chip (since chip appears in the 76 * path on both sides). We use CONTAINS_RANK to constrain the propogation 77 * such that it only happens if the payload resource matches the rank. 78 */ 79#define CONTAINS_RANK (payloadprop_contains("resource", \ 80 asru(chip/memory-controller/dimm/rank)) \ 81 || payloadprop_contains("resource", \ 82 asru(chip/memory-controller/dimm))) 83 84/* 85 * The following will tell us whether a syndrome that is known to be 86 * correctable (from a mem_ce ereport) is single-bit or multi-bit. For a 87 * correctable ChipKill syndrome the number of bits set in the lowest 88 * nibble indicates how many bits were in error. 89 */ 90 91#define CBITMASK(synd) ((synd) & 0xf) 92 93#define CKSINGLE(synd) \ 94 ((synd) == 0 || \ 95 (CBITMASK(synd) == 0x1 || CBITMASK(synd) == 0x2 || \ 96 CBITMASK(synd) == 0x4 || CBITMASK(synd) == 0x8)) 97 98#define SINGLE_BIT_CE \ 99 (payloadprop("syndrome-type") == "E" || \ 100 (payloadprop("syndrome-type") == "C" && \ 101 CKSINGLE(payloadprop("syndrome")))) 102 103#define MULTI_BIT_CE \ 104 (payloadprop("syndrome-type") == "C" && \ 105 !CKSINGLE(payloadprop("syndrome"))) 106 107/* 108 * A single bit fault in a memory rank can cause: 109 * 110 * - mem_ce : reported by nb 111 * - inf_sys_ecc1: reported by ic or dc; inf_sys_ecc1 errors detected at the 112 * ic do not record a syndrome; these errors will not be triggered in 113 * ChipKill ECC mode (the NB corrects all ECC errors in that mode) 114 * - s_ecc1: reported by bu; this error will not be triggered in ChipKill 115 * ECC mode (the NB corrects all ECC in that mode) 116 * 117 * Single-bit errors are fed into a per-rank SERD engine; if a SERD engine 118 * trips we diagnose a fault.memory.page so that the response agent can 119 * retire the page that caused the trip. If the total number of pages 120 * faulted in this way on a single rank exceeds a threshold we will 121 * diagnose a fault.memory.dimm_sb against the containing. 122 * 123 * Multibit ChipKill-correctable errors are treated identically to 124 * single-bit errors, but via separate serd engines to allow distinct 125 * parameters if desired. 126 * 127 * Uncorrectable errors produce an immediate page fault and corresponding 128 * fault.memory.dimm_ue. 129 * 130 * Page faults are essentially internal - action is only required when 131 * they are accompanied by a dimm fault. As such we include message=0 132 * on page faults. 133 */ 134 135event ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu{within(5s)}; 136event ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu{within(5s)}; 137event ereport.cpu.amd.bu.s_ecc1@chip/cpu{within(5s)}; 138event ereport.cpu.amd.nb.mem_ce@chip/cpu{within(5s)}; 139 140/* 141 * If the address is not valid then no resource member will be included 142 * in a nb.mem_ce or nb.mem_ue ereport. These cases should be rare. 143 * We will also discard all inf_sys_ecc1 events detected at the ic since they 144 * have no syndrome and therefore no resource information. 145 * We will discard such ereports. An alternative may be to SERD them 146 * on a per MC basis and trip if we see too many such events. 147 */ 148 149event upset.memory.discard1@chip/cpu; 150 151/* #PAGE# 152 * Single-bit correctable errors are diagnosed as upsets and feed into per-rank 153 * SERD engines which diagnose fault.memory.page_sb if they trip. 154 * 155 * Multi-bit correctable (via ChipKill) errors are diagnosed as upsets and feed 156 * into additional per-rank SERD engines which diagnose fault.memory.page_ck 157 * if they trip. 158 * 159 * The number of fault.memory.page and fault.memory.page_ck diagnosed is 160 * counted in stat engines for each type. These are used in deciding 161 * whether to declare a dimm faulty after repeated page faults. 162 */ 163 164#define PAGE_FIT 1 165#define PAGE_SB_COUNT 2 166#define PAGE_SB_TIME 72h 167#define PAGE_CK_COUNT 2 168#define PAGE_CK_TIME 72h 169 170/* 171 * The fraction of pages on a single rank that must be diagnosed as faulty 172 * with single correctable unit faults before we will fault the rank. 173 * Once we have faulted the rank we will continue to diagnose any further page 174 * faults on the rank up to some maximum multiple of the threshold at which 175 * we faulted the dimm. This allows us to potentially contain some fairly 176 * far-reaching but still limited-extent fault (such as a partial column 177 * failure) without getting carried away and allowing a single faulty rank to 178 * use up the entire system-imposed page retirenment limit (which, once 179 * reached, causes retirement request to have no effect other than to fill 180 * the fault manager cache and logs). 181 * 182 * This fraction is specified in basis points, where 100 basis points are 183 * equivalent to 1 percent. It is applied on a per-rank basis. 184 * 185 * The system imposes an absolute maximum on the number of pages it will 186 * retire; the current value is 10 basis points, or 0.1% of 'physmem'. Note 187 * that 'physmem' is reduced from installed memory pages by an amount 188 * reflecting permanent kernel memory allocations. This system page retire 189 * limit bounds the maximum real response to page faults across all ranks 190 * that fault manager response agents can effect, but it should not be confused 191 * with any diagnosis threshold (i.e., the number of faulty pages we are 192 * prepared to tolerate from a single rank before faulting the rank is 193 * distinct from the total number of pages we are prepared to retire from use 194 * in response to that and other faults). It is, however, desirable to 195 * arrange that the maximum number of pages we are prepared to fault from 196 * any one rank is less than the system-wide quota. 197 */ 198#define PAGE_RETIRE_LIMIT_BPS 5 /* or 0.05%; ~ 131 pages/GB %/ 199 200/* 201 * A macro to manipulate the above fraction. Given a size in bytes convert 202 * this to pages (4K pagesize) and calculate the number of those pages 203 * indicated by PAGE_RETIRE_LIMIT_BPS basis points. 204 */ 205#define _BPS_PGCNT(totalbytes) \ 206 ((((totalbytes) / 4096 ) * PAGE_RETIRE_LIMIT_BPS) / 10000) 207 208/* 209 * The single-correctable-unit threshold at which number of faulted pages 210 * on a rank we we fault the rank. We insist that this be at least 128 and 211 * never more than 512. 212 */ 213#define RANK_THRESH MIN(512, MAX(128, \ 214 _BPS_PGCNT(confprop(asru(chip/memory-controller/dimm/rank), "size")))) 215 216/* 217 * The maximum number of single-correctable-unit page faults we will diagnose 218 * on a single rank (must be greater than RANK_THRESH). We set 219 * this at twice the rank fault threshold. 220 */ 221#define RANK_PGFLT_MAX (2 * RANK_THRESH) 222 223engine stat.sbpgflt@chip/memory-controller/dimm/rank; 224engine stat.ckpgflt@chip/memory-controller/dimm/rank; 225 226event fault.memory.page_sb@chip/memory-controller/dimm/rank, 227 FITrate=PAGE_FIT, ASRU=dimm/rank, message=0, 228 count=stat.sbpgflt@chip/memory-controller/dimm/rank, 229 action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */ 230 231#define SB_PGFLTS (count(stat.sbpgflt@chip/memory-controller/dimm/rank)) 232 233event fault.memory.page_ck@chip/memory-controller/dimm/rank, 234 FITrate=PAGE_FIT, ASRU=dimm/rank, message=0, 235 count=stat.ckpgflt@chip/memory-controller/dimm/rank, 236 action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */ 237 238#define CK_PGFLTS (count(stat.ckpgflt@chip/memory-controller/dimm/rank)) 239 240#define RANK_PGFLT_LIMIT_REACHED \ 241 (SB_PGFLTS + CK_PGFLTS > RANK_PGFLT_MAX) 242 243event ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank{within(5s)}; 244engine serd.memory.page_sb@chip/memory-controller/dimm/rank, 245 N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent, 246 trip=ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank; 247event upset.memory.page_sb@chip/memory-controller/dimm/rank, 248 engine=serd.memory.page_sb@chip/memory-controller/dimm/rank; 249 250event ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank{within(5s)}; 251engine serd.memory.page_ck@chip/memory-controller/dimm/rank, 252 N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent, 253 trip=ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank; 254event upset.memory.page_ck@chip/memory-controller/dimm/rank, 255 engine=serd.memory.page_ck@chip/memory-controller/dimm/rank; 256 257event upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank; 258 259/* 260 * If we have not reached the per-rank limit on faulted pages then 261 * continue to explain ereport observations as upsets which can lead 262 * lead to page fault diagnoses if the serd engine trips. 263 */ 264prop upset.memory.page_sb@chip/memory-controller/dimm/rank 265 { CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)-> 266 ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 267 ereport.cpu.amd.bu.s_ecc1@chip/cpu, 268 ereport.cpu.amd.nb.mem_ce@chip/cpu; 269 270prop upset.memory.page_ck@chip/memory-controller/dimm/rank 271 { CONTAINS_RANK && MULTI_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)-> 272 /* no dc.inf_sys_ecc1 or bu.s_ecc1 in ChipKill mode */ 273 ereport.cpu.amd.nb.mem_ce@chip/cpu; 274 275/* 276 * If we have reached the per-rank limit on faulted pages then diagnose 277 * further observations on the rank to a engine-less upset (i.e., discard 278 * them). 279 */ 280prop upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank 281 { CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED } (1)-> 282 ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 283 ereport.cpu.amd.bu.s_ecc1@chip/cpu, 284 ereport.cpu.amd.nb.mem_ce@chip/cpu; 285 286prop fault.memory.page_sb@chip/memory-controller/dimm/rank (1)-> 287 ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank; 288 289prop fault.memory.page_ck@chip/memory-controller/dimm/rank (1)-> 290 ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank; 291 292prop fault.memory.page_sb@chip/memory-controller/dimm/rank 293 { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)-> 294 ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 295 ereport.cpu.amd.bu.s_ecc1@chip/cpu, 296 ereport.cpu.amd.nb.mem_ce@chip/cpu; 297 298prop fault.memory.page_ck@chip/memory-controller/dimm/rank 299 { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)-> 300 ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 301 ereport.cpu.amd.bu.s_ecc1@chip/cpu, 302 ereport.cpu.amd.nb.mem_ce@chip/cpu; 303 304/* 305 * Discard memory ereports that do not indicate a resource. 306 */ 307prop upset.memory.discard1@chip/cpu 308 { !RESOURCE_EXISTS } (1)-> 309 ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu, 310 ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 311 ereport.cpu.amd.bu.s_ecc1@chip/cpu, 312 ereport.cpu.amd.nb.mem_ce@chip/cpu; 313 314/* #DIMM_SCU# 315 * "Single-correctable-unit" DIMM faults are diagnosed when the total number of 316 * page faults (diagnosed from repeated single-bit or multibit-chipkills) 317 * from any one rank on that DIMM reaches a threshold. A "correctable unit" 318 * is a single bit in normal 64/8 ECC mode, or a single symbol in ChipKill 319 * 128/16 mode (i.e., nibble-aligned nibble for the code used on Opteron). 320 * 321 * We do not stop diagnosing further single-bit page faults once we have 322 * declared a single-bit DIMM fault - we continue diagnosing them and 323 * response agents can continue to retire those pages up to the system-imposed 324 * retirement limit. 325 * 326 * Two distinct fault types may be diagnosed - fault.memory.dimm_sb and 327 * fault.memory.dimm_ck. Which one is diagnosed depends on whether we 328 * have reached the threshold for a majority of single-bit page faults or 329 * multibit page faults. 330 * 331 * Implementation: we maintain parallel SERD engines to the page_sb and 332 * page_ck engines, which trip in unison. On trip it generates a distinct 333 * ereport which we diagnose to a fault if the threshold has been 334 * reached, or to a throwaway upset if not. 335 * 336 */ 337 338#define DIMM_SB_FIT 2000 339#define DIMM_CK_FIT 4000 340 341event fault.memory.dimm_sb@chip/memory-controller/dimm/rank, 342 FITrate=DIMM_SB_FIT, FRU=dimm, ASRU=dimm, 343 action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 344 345event fault.memory.dimm_ck@chip/memory-controller/dimm/rank, 346 FITrate=DIMM_CK_FIT, FRU=dimm, ASRU=dimm, 347 action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 348 349event ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank 350 { within(5s) }; 351engine serd.memory.dimm_sb@chip/memory-controller/dimm/rank, 352 N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent, 353 trip=ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank; 354event upset.memory.dimm_sb@chip/memory-controller/dimm/rank, 355 engine=serd.memory.dimm_sb@chip/memory-controller/dimm/rank; 356 357event ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank 358 { within(5s) }; 359engine serd.memory.dimm_ck@chip/memory-controller/dimm/rank, 360 N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent, 361 trip=ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank; 362event upset.memory.dimm_ck@chip/memory-controller/dimm/rank, 363 engine=serd.memory.dimm_ck@chip/memory-controller/dimm/rank; 364 365event upset.memory.discard2@chip/memory-controller/dimm/rank; 366 367prop upset.memory.dimm_sb@chip/memory-controller/dimm/rank 368 { CONTAINS_RANK && SINGLE_BIT_CE } (0)-> 369 ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 370 ereport.cpu.amd.bu.s_ecc1@chip/cpu, 371 ereport.cpu.amd.nb.mem_ce@chip/cpu; 372 373prop upset.memory.dimm_ck@chip/memory-controller/dimm/rank 374 { CONTAINS_RANK && MULTI_BIT_CE } (0)-> 375 ereport.cpu.amd.nb.mem_ce@chip/cpu; 376 377/* 378 * The following two propogations diagnose a fault.memory.dimm_sb when 379 * either the dimm_sb or dimm_ck engine trips (for a new page fault) 380 * and the total number of page faults (sb and ck) exceeds the threshold 381 * value with the majority being from sb page faults. 382 */ 383prop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)-> 384 ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank 385 { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 }; 386 387prop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)-> 388 ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank 389 { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 }; 390 391/* 392 * The following two propogation diagnose a fault.memory.dimm_ck when 393 * either the dimm_sb or dimm_ck engine trip (for a new page fault) 394 * and the total number of page faults (sb and ck) exceeds the threshold 395 * value with the majority being from ck page faults. 396 */ 397prop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)-> 398 ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank 399 { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 }; 400 401prop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)-> 402 ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank 403 { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 }; 404 405prop upset.memory.discard2@chip/memory-controller/dimm/rank (1)-> 406 ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank, 407 ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank; 408 409/* #DIMM_UE# 410 * #PAGE_UE# 411 * An uncorrectable multi-bit fault in a memory dimm can cause: 412 * 413 * - mem_ue : reported by nb for an access from a remote cpu 414 * - inf_sys_eccm : reported by ic or dc; the ic does not report a syndrome 415 * - s_eccm : reported by bu 416 * 417 * Note we use a SERD engine here simply as a way of ensuring that we get 418 * both dimm and page faults reported. 419 * 420 * Since on production systems we force HT Sync Flood on uncorrectable 421 * memory errors (if not already set as such by the BIOS, as it should be) 422 * we won't actually receive these ereports since the system will be reset. 423 */ 424 425#define DIMM_UE_FIT 6000 426 427event ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu{within(5s)}; 428event ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu{within(5s)}; 429event ereport.cpu.amd.bu.s_eccm@chip/cpu{within(5s)}; 430event ereport.cpu.amd.nb.mem_ue@chip/cpu{within(5s)}; 431 432event fault.memory.dimm_ue@chip/memory-controller/dimm/rank, 433 FITrate=DIMM_UE_FIT, FRU=dimm, ASRU=dimm, 434 action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 435 436event fault.memory.page_ue@chip/memory-controller/dimm/rank, 437 FITrate=PAGE_FIT, ASRU=dimm/rank, message=0, 438 action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */ 439 440event ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank{within(5s)}; 441engine serd.memory.dimm_ue@chip/memory-controller/dimm/rank, 442 N=0, T=1h, method=persistent, 443 trip=ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank; 444event upset.memory.dimm_ue@chip/memory-controller/dimm/rank, 445 engine=serd.memory.dimm_ue@chip/memory-controller/dimm/rank; 446 447event ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank{within(5s)}; 448engine serd.memory.page_ue@chip/memory-controller/dimm/rank, 449 N=0, T=1h, method=persistent, 450 trip=ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank; 451event upset.memory.page_ue@chip/memory-controller/dimm/rank, 452 engine=serd.memory.page_ue@chip/memory-controller/dimm/rank; 453 454event upset.memory.discard3@chip/cpu; 455 456prop upset.memory.page_ue@chip/memory-controller/dimm/rank 457 { CONTAINS_RANK } (0)-> 458 ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 459 ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 460 ereport.cpu.amd.bu.s_eccm@chip/cpu, 461 ereport.cpu.amd.nb.mem_ue@chip/cpu; 462 463prop upset.memory.dimm_ue@chip/memory-controller/dimm/rank 464 { CONTAINS_RANK } (0)-> 465 ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 466 ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 467 ereport.cpu.amd.bu.s_eccm@chip/cpu, 468 ereport.cpu.amd.nb.mem_ue@chip/cpu; 469 470prop fault.memory.page_ue@chip/memory-controller/dimm/rank (1)-> 471 ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank; 472 473prop fault.memory.page_ue@chip/memory-controller/dimm/rank 474 { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)-> 475 ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 476 ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 477 ereport.cpu.amd.bu.s_eccm@chip/cpu, 478 ereport.cpu.amd.nb.mem_ue@chip/cpu; 479 480prop fault.memory.dimm_ue@chip/memory-controller/dimm/rank (1)-> 481 ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank; 482 483prop upset.memory.discard3@chip/cpu 484 { !RESOURCE_EXISTS } (1)-> 485 ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 486 ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 487 ereport.cpu.amd.bu.s_eccm@chip/cpu, 488 ereport.cpu.amd.nb.mem_ce@chip/cpu; 489 490/* #CSTESTFAIL# 491 * If the BIOS fails a chip-select during POST, or perhaps after a 492 * sync flood from an uncorrectable error, then on revision F and G it 493 * should mark that chip-select as TestFail in the CS Base register. 494 * When the memory-controller driver discovers all the MC configuration 495 * it notes such failed chip-selects and creates topology nodes for the 496 * chip-select and associated dimms and ranks, and produces an ereport for each 497 * failed chip-select with detector set to the memory-controller node 498 * and resource indicating the failed chip-select. 499 */ 500 501event ereport.cpu.amd.mc.cs_testfail@chip/memory-controller{within(5s)}; 502 503event fault.memory.dimm_testfail@chip/memory-controller/dimm/rank, 504 FITrate=1000, ASRU=dimm, FRU=dimm, 505 action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 506 507event error.memory.cs_testfail@chip/memory-controller/chip-select; 508 509#define CONTAINS_CS (payloadprop_contains("resource", \ 510 asru(chip/memory-controller/chip-select))) 511 512prop error.memory.cs_testfail@chip/memory-controller/chip-select (1)-> 513 ereport.cpu.amd.mc.cs_testfail@chip/memory-controller 514 { CONTAINS_CS }; 515 516#define CSMATCH(s) \ 517 (confprop_defined(asru(chip/memory-controller/chip-select), s) && \ 518 confprop(asru(chip/memory-controller/chip-select), s) == \ 519 confprop(asru(chip/memory-controller/dimm/rank), "csname")) 520 521prop fault.memory.dimm_testfail@chip/memory-controller/dimm/rank (1)-> 522 error.memory.cs_testfail@chip/memory-controller/chip-select 523 { CSMATCH("dimm1-csname") || CSMATCH("dimm2-csname")}; 524 525/* #ADDRPAR# 526 * DRAM Command/Address Parity Errors. 527 * 528 * - dramaddr_par : reported by the nb; the NB status register includes 529 * a bit indicating which dram controller channel (A or B) experienced 530 * the error. 531 */ 532 533event ereport.cpu.amd.nb.dramaddr_par@chip/cpu{within(5s)}; 534 535event fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel, 536 FITrate=1000, ASRU=dram-channel; 537 538#define GET_CHANNEL ($chan = (payloadprop("bank-status") >> 32 & 0x200) ? \ 539 1 : 0) 540 541prop fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel[y] (0)-> 542 ereport.cpu.amd.nb.dramaddr_par@chip/cpu { GET_CHANNEL && $chan == y }; 543 544/* 545 * l2 cache data errors. 546 */ 547 548#define L2CACHEDATA_FIT 1000 549#define L2CACHEDATA_SB_COUNT 3 550#define L2CACHEDATA_SB_TIME 12h 551 552event fault.cpu.amd.l2cachedata@chip/cpu, FITrate=L2CACHEDATA_FIT, 553 FRU=chip, ASRU=chip/cpu; 554event error.cpu.amd.l2cachedata_sb@chip/cpu; 555event error.cpu.amd.l2cachedata_mb@chip/cpu; 556 557prop fault.cpu.amd.l2cachedata@chip/cpu (1)-> 558 error.cpu.amd.l2cachedata_sb@chip/cpu, 559 error.cpu.amd.l2cachedata_mb@chip/cpu; 560 561/* #L2D_SINGLE# 562 * A single bit data array fault in an l2 cache can cause: 563 * 564 * - inf_l2_ecc1 : reported by ic on this cpu 565 * - inf_l2_ecc1 : reported by dc on this cpu 566 * - l2d_ecc1 : reported by bu on copyback or on snoop from another cpu 567 * 568 * Single-bit errors are diagnosed to cache upsets. SERD engines are used 569 * to count upsets resulting from CEs. 570 */ 571 572event ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu{within(5s)}; 573event ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu{within(5s)}; 574event ereport.cpu.amd.bu.l2d_ecc1@chip/cpu{within(5s)}; 575event ereport.cpu.amd.l2d_sb_trip@chip/cpu{within(5s)}; 576 577engine serd.cpu.amd.l2d_sb@chip/cpu, 578 N=L2CACHEDATA_SB_COUNT, T=L2CACHEDATA_SB_TIME, method=persistent, 579 trip=ereport.cpu.amd.l2d_sb_trip@chip/cpu; 580 581event upset.cpu.amd.l2d_sb@chip/cpu, 582 engine=serd.cpu.amd.l2d_sb@chip/cpu; 583 584prop upset.cpu.amd.l2d_sb@chip/cpu (1)-> 585 ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu, 586 ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu, 587 ereport.cpu.amd.bu.l2d_ecc1@chip/cpu; 588 589prop error.cpu.amd.l2cachedata_sb@chip/cpu (1)-> 590 ereport.cpu.amd.l2d_sb_trip@chip/cpu; 591 592prop fault.cpu.amd.l2cachedata@chip/cpu (0)-> 593 ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu, 594 ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu, 595 ereport.cpu.amd.bu.l2d_ecc1@chip/cpu; 596 597/* #L2D_MULTI# 598 * A multi-bit data array fault in an l2 cache can cause: 599 * 600 * - inf_l2_eccm : reported by ic on this cpu 601 * - inf_l2_eccm : reported by dc on this cpu 602 * - l2d_eccm : reported by bu on copyback or on snoop from another cpu 603 */ 604 605event ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu{within(5s)}; 606event ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu{within(5s)}; 607event ereport.cpu.amd.bu.l2d_eccm@chip/cpu{within(5s)}; 608 609prop error.cpu.amd.l2cachedata_mb@chip/cpu (1)-> 610 ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu, 611 ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu, 612 ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 613 614prop fault.cpu.amd.l2cachedata@chip/cpu (0)-> 615 ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu, 616 ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu, 617 ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 618 619/* 620 * l2 cache main tag errors 621 */ 622 623#define L2CACHETAG_FIT 1000 624#define L2CACHETAG_SB_COUNT 3 625#define L2CACHETAG_SB_TIME 12h 626 627event fault.cpu.amd.l2cachetag@chip/cpu, FITrate=L2CACHETAG_FIT, 628 FRU=chip, ASRU=chip/cpu; 629event error.cpu.amd.l2cachetag_sb@chip/cpu; 630event error.cpu.amd.l2cachetag_mb@chip/cpu; 631 632prop fault.cpu.amd.l2cachetag@chip/cpu (1)-> 633 error.cpu.amd.l2cachetag_sb@chip/cpu, 634 error.cpu.amd.l2cachetag_mb@chip/cpu; 635 636/* #L2T_SINGLE# 637 * A single bit tag array fault in an l2 cache can cause: 638 * 639 * - l2t_ecc1 : reported by bu on this cpu when detected during snoop 640 * - l2t_par : reported by bu on this cpu when detected other than during snoop 641 * 642 * Note that the bu.l2t_par ereport could be due to a single bit or multi bit 643 * event. If the l2t_sb_trip has already triggered it will be treated as another 644 * ce, otherwise it will be treated as a ue event. 645 */ 646 647event ereport.cpu.amd.bu.l2t_ecc1@chip/cpu{within(5s)}; 648event ereport.cpu.amd.bu.l2t_par@chip/cpu{within(5s)}; 649event ereport.cpu.amd.l2t_sb_trip@chip/cpu{within(5s)}; 650 651engine serd.cpu.amd.l2t_sb@chip/cpu, 652 N=L2CACHETAG_SB_COUNT, T=L2CACHETAG_SB_TIME, method=persistent, 653 trip=ereport.cpu.amd.l2t_sb_trip@chip/cpu; 654 655event upset.cpu.amd.l2t_sb@chip/cpu, 656 engine=serd.cpu.amd.l2t_sb@chip/cpu; 657 658prop upset.cpu.amd.l2t_sb@chip/cpu (1)-> 659 ereport.cpu.amd.bu.l2t_ecc1@chip/cpu, 660 ereport.cpu.amd.bu.l2t_par@chip/cpu; 661 662prop error.cpu.amd.l2cachetag_sb@chip/cpu (1)-> 663 ereport.cpu.amd.l2t_sb_trip@chip/cpu; 664 665prop fault.cpu.amd.l2cachetag@chip/cpu (0)-> 666 ereport.cpu.amd.bu.l2t_ecc1@chip/cpu, 667 ereport.cpu.amd.bu.l2t_par@chip/cpu; 668 669/* #L2T_MULTI# 670 * A multi-bit tag array fault in an l2 cache can cause: 671 * 672 * - l2t_eccm : reported by bu on this cpu when detected during snoop 673 * - l2t_par : reported by bu on this cpu when detected other than during snoop 674 */ 675 676event ereport.cpu.amd.bu.l2t_eccm@chip/cpu{within(5s)}; 677 678prop error.cpu.amd.l2cachetag_mb@chip/cpu (1)-> 679 ereport.cpu.amd.bu.l2t_eccm@chip/cpu, 680 ereport.cpu.amd.bu.l2t_par@chip/cpu; 681 682prop fault.cpu.amd.l2cachetag@chip/cpu (0)-> 683 ereport.cpu.amd.bu.l2t_eccm@chip/cpu, 684 ereport.cpu.amd.bu.l2t_par@chip/cpu; 685 686/* #ICD_PAR# 687 * A data array parity fault in an I cache can cause: 688 * 689 * - data_par : reported by ic on this cpu 690 */ 691 692#define ICACHEDATA_FIT 1000 693#define ICACHEDATA_SB_COUNT 2 694#define ICACHEDATA_SB_TIME 168h 695 696event ereport.cpu.amd.ic.data_par@chip/cpu{within(5s)}; 697event ereport.cpu.amd.ic_dp_trip@chip/cpu{within(5s)}; 698 699event fault.cpu.amd.icachedata@chip/cpu, FITrate=ICACHEDATA_FIT, 700 FRU=chip, ASRU=chip/cpu; 701 702engine serd.cpu.amd.icachedata@chip/cpu, 703 N=ICACHEDATA_SB_COUNT, T=ICACHEDATA_SB_TIME, method=persistent, 704 trip=ereport.cpu.amd.ic_dp_trip@chip/cpu; 705 706event upset.cpu.amd.icachedata@chip/cpu, 707 engine=serd.cpu.amd.icachedata@chip/cpu; 708 709prop upset.cpu.amd.icachedata@chip/cpu (1)-> 710 ereport.cpu.amd.ic.data_par@chip/cpu; 711 712prop fault.cpu.amd.icachedata@chip/cpu (1)-> 713 ereport.cpu.amd.ic_dp_trip@chip/cpu; 714 715prop fault.cpu.amd.icachedata@chip/cpu (0)-> 716 ereport.cpu.amd.ic.data_par@chip/cpu; 717 718/* #ICT_PAR# 719 * A tag array parity fault in an I cache can cause: 720 * 721 * - tag_par : reported by ic on this cpu 722 */ 723 724#define ICACHETAG_FIT 1000 725#define ICACHETAG_SB_COUNT 2 726#define ICACHETAG_SB_TIME 168h 727 728event ereport.cpu.amd.ic.tag_par@chip/cpu{within(5s)}; 729event ereport.cpu.amd.ic_tp_trip@chip/cpu{within(5s)}; 730 731event fault.cpu.amd.icachetag@chip/cpu, FITrate=ICACHETAG_FIT, 732 FRU=chip, ASRU=chip/cpu; 733 734engine serd.cpu.amd.icachetag@chip/cpu, 735 N=ICACHETAG_SB_COUNT, T=ICACHETAG_SB_TIME, method=persistent, 736 trip=ereport.cpu.amd.ic_tp_trip@chip/cpu; 737 738event upset.cpu.amd.icachetag@chip/cpu, 739 engine=serd.cpu.amd.icachetag@chip/cpu; 740 741prop upset.cpu.amd.icachetag@chip/cpu (1)-> 742 ereport.cpu.amd.ic.tag_par@chip/cpu; 743 744prop fault.cpu.amd.icachetag@chip/cpu (1)-> 745 ereport.cpu.amd.ic_tp_trip@chip/cpu; 746 747prop fault.cpu.amd.icachetag@chip/cpu (0)-> 748 ereport.cpu.amd.ic.tag_par@chip/cpu; 749 750/* #ICT_SNOOP# 751 * A snoop tag array parity fault in an I cache can cause: 752 * 753 * - stag_par : reported by ic on this cpu 754 */ 755 756#define ICACHESTAG_FIT 1000 757 758event ereport.cpu.amd.ic.stag_par@chip/cpu{within(5s)}; 759 760event fault.cpu.amd.icachestag@chip/cpu, FITrate=ICACHESTAG_FIT, 761 FRU=chip, ASRU=chip/cpu; 762 763prop fault.cpu.amd.icachestag@chip/cpu (1)-> 764 ereport.cpu.amd.ic.stag_par@chip/cpu; 765 766/* #ICTLB_1# 767 * An l1tlb parity fault in an I cache can cause: 768 * 769 * - l1tlb_par : reported by ic on this cpu 770 */ 771 772#define ICACHEL1TLB_FIT 1000 773#define ICACHEL1TLB_SB_COUNT 2 774#define ICACHEL1TLB_SB_TIME 168h 775 776event ereport.cpu.amd.ic.l1tlb_par@chip/cpu{within(5s)}; 777event ereport.cpu.amd.ic_l1tlb_trip@chip/cpu{within(5s)}; 778 779event fault.cpu.amd.l1itlb@chip/cpu, FITrate=ICACHEL1TLB_FIT, 780 FRU=chip, ASRU=chip/cpu; 781 782engine serd.cpu.amd.l1itlb@chip/cpu, 783 N=ICACHEL1TLB_SB_COUNT, T=ICACHEL1TLB_SB_TIME, method=persistent, 784 trip=ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 785 786event upset.cpu.amd.l1itlb@chip/cpu, 787 engine=serd.cpu.amd.l1itlb@chip/cpu; 788 789prop upset.cpu.amd.l1itlb@chip/cpu (1)-> 790 ereport.cpu.amd.ic.l1tlb_par@chip/cpu; 791 792prop fault.cpu.amd.l1itlb@chip/cpu (1)-> 793 ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 794 795prop fault.cpu.amd.l1itlb@chip/cpu (0)-> 796 ereport.cpu.amd.ic.l1tlb_par@chip/cpu; 797 798/* #ICTLB_2# 799 * An l2tlb parity fault in an I cache can cause: 800 * 801 * - l2tlb_par : reported by ic on this cpu 802 */ 803 804#define ICACHEL2TLB_FIT 1000 805#define ICACHEL2TLB_SB_COUNT 2 806#define ICACHEL2TLB_SB_TIME 168h 807 808event ereport.cpu.amd.ic.l2tlb_par@chip/cpu{within(5s)}; 809event ereport.cpu.amd.ic_l2tlb_trip@chip/cpu{within(5s)}; 810 811event fault.cpu.amd.l2itlb@chip/cpu, FITrate=ICACHEL2TLB_FIT, 812 FRU=chip, ASRU=chip/cpu; 813 814engine serd.cpu.amd.l2itlb@chip/cpu, 815 N=ICACHEL2TLB_SB_COUNT, T=ICACHEL2TLB_SB_TIME, method=persistent, 816 trip=ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 817 818event upset.cpu.amd.l2itlb@chip/cpu, 819 engine=serd.cpu.amd.l2itlb@chip/cpu; 820 821prop upset.cpu.amd.l2itlb@chip/cpu (1)-> 822 ereport.cpu.amd.ic.l2tlb_par@chip/cpu; 823 824prop fault.cpu.amd.l2itlb@chip/cpu (1)-> 825 ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 826 827prop fault.cpu.amd.l2itlb@chip/cpu (0)-> 828 ereport.cpu.amd.ic.l2tlb_par@chip/cpu; 829 830/* 831 * dcache data errors 832 */ 833 834#define DCACHEDATA_FIT 1000 835#define DCACHEDATA_SB_COUNT 2 836#define DCACHEDATA_SB_TIME 168h 837 838event fault.cpu.amd.dcachedata@chip/cpu, FITrate=DCACHEDATA_FIT, 839 FRU=chip, ASRU=chip/cpu; 840event error.cpu.amd.dcachedata_sb@chip/cpu; 841event error.cpu.amd.dcachedata_mb@chip/cpu; 842 843prop fault.cpu.amd.dcachedata@chip/cpu (1)-> 844 error.cpu.amd.dcachedata_sb@chip/cpu, 845 error.cpu.amd.dcachedata_mb@chip/cpu; 846 847/* #DCD_SINGLE# 848 * A single bit data array fault in an D cache can cause: 849 * 850 * - data_ecc1 : reported by dc on this cpu by scrubber 851 * - data_ecc1_uc : reported by dc on this cpu other than by scrubber 852 * 853 * Make data_ecc1_uc fault immediately as it may have caused a panic 854 */ 855 856event ereport.cpu.amd.dc.data_ecc1@chip/cpu{within(5s)}; 857event ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu{within(5s)}; 858event ereport.cpu.amd.dc_sb_trip@chip/cpu{within(5s)}; 859 860engine serd.cpu.amd.dc_sb@chip/cpu, 861 N=DCACHEDATA_SB_COUNT, T=DCACHEDATA_SB_TIME, method=persistent, 862 trip=ereport.cpu.amd.dc_sb_trip@chip/cpu; 863 864engine serd.cpu.amd.dc_sb_uc@chip/cpu, 865 N=0, T=1hr, method=persistent, 866 trip=ereport.cpu.amd.dc_sb_trip@chip/cpu; 867 868event upset.cpu.amd.dc_sb@chip/cpu, 869 engine=serd.cpu.amd.dc_sb@chip/cpu; 870 871event upset.cpu.amd.dc_sb_uc@chip/cpu, 872 engine=serd.cpu.amd.dc_sb_uc@chip/cpu; 873 874prop upset.cpu.amd.dc_sb@chip/cpu (1)-> 875 ereport.cpu.amd.dc.data_ecc1@chip/cpu; 876 877prop upset.cpu.amd.dc_sb_uc@chip/cpu (1)-> 878 ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu; 879 880prop error.cpu.amd.dcachedata_sb@chip/cpu (1)-> 881 ereport.cpu.amd.dc_sb_trip@chip/cpu; 882 883prop fault.cpu.amd.dcachedata@chip/cpu (0)-> 884 ereport.cpu.amd.dc.data_ecc1@chip/cpu, 885 ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu; 886 887/* #DCD_MULTI# 888 * A multi-bit data array fault in an D cache can cause: 889 * 890 * - data_eccm : reported by dc on this cpu 891 */ 892 893event ereport.cpu.amd.dc.data_eccm@chip/cpu{within(5s)}; 894 895prop error.cpu.amd.dcachedata_mb@chip/cpu (1)-> 896 ereport.cpu.amd.dc.data_eccm@chip/cpu; 897 898prop fault.cpu.amd.dcachedata@chip/cpu (0)-> 899 ereport.cpu.amd.dc.data_eccm@chip/cpu; 900 901/* #DCT_PAR# 902 * A tag array parity fault in an D cache can cause: 903 * 904 * - tag_par : reported by dc on this cpu 905 */ 906 907#define DCACHETAG_FIT 1000 908 909event ereport.cpu.amd.dc.tag_par@chip/cpu{within(5s)}; 910 911event fault.cpu.amd.dcachetag@chip/cpu, FITrate=DCACHETAG_FIT, 912 FRU=chip, ASRU=chip/cpu; 913 914prop fault.cpu.amd.dcachetag@chip/cpu (1)-> 915 ereport.cpu.amd.dc.tag_par@chip/cpu; 916 917/* #DCT_SNOOP# 918 * A snoop tag array parity fault in an D cache can cause: 919 * 920 * - stag_par : reported by dc on this cpu 921 */ 922 923#define DCACHESTAG_FIT 1000 924 925event ereport.cpu.amd.dc.stag_par@chip/cpu{within(5s)}; 926 927event fault.cpu.amd.dcachestag@chip/cpu, FITrate=DCACHESTAG_FIT, 928 FRU=chip, ASRU=chip/cpu; 929 930prop fault.cpu.amd.dcachestag@chip/cpu (1)-> 931 ereport.cpu.amd.dc.stag_par@chip/cpu; 932 933/* #DCTLB_1# 934 * An l1tlb parity fault in an D cache can cause: 935 * 936 * - l1tlb_par : reported by dc on this cpu 937 */ 938 939#define L1DTLB_FIT 1000 940 941event ereport.cpu.amd.dc.l1tlb_par@chip/cpu{within(5s)}; 942 943event fault.cpu.amd.l1dtlb@chip/cpu, FITrate=L1DTLB_FIT, 944 FRU=chip, ASRU=chip/cpu; 945 946prop fault.cpu.amd.l1dtlb@chip/cpu (1)-> 947 ereport.cpu.amd.dc.l1tlb_par@chip/cpu; 948 949/* #DCTLB_2# 950 * An l2tlb parity fault in an D cache can cause: 951 * 952 * - l2tlb_par : reported by dc on this cpu 953 */ 954 955#define L2DTLB_FIT 1000 956 957event ereport.cpu.amd.dc.l2tlb_par@chip/cpu{within(5s)}; 958 959event fault.cpu.amd.l2dtlb@chip/cpu, FITrate=L2DTLB_FIT, 960 FRU=chip, ASRU=chip/cpu; 961 962prop fault.cpu.amd.l2dtlb@chip/cpu (1)-> 963 ereport.cpu.amd.dc.l2tlb_par@chip/cpu; 964 965/* #MISC# 966 * Ereports that should not normally happen and which we will discard 967 * without diagnosis if they do. These fall into a few categories: 968 * 969 * - the corresponding detector is not enabled, typically because 970 * detection/handling of the event is taking place elsewhere 971 * (nb.ma, nb.ta, ls.rde, ic.rdde, bu.s_rde, nb.gart_walk) 972 * - the event is associated with a sync flood so even if the detector is 973 * enabled we will never handle the event and generate an ereport *and* 974 * even if the ereport did arrive we could perform no useful diagnosis 975 * e.g., the NB can be configured for sync flood on nb.mem_eccm 976 * but we don't choose to discard that ereport here since we could have 977 * made a useful diagnosis from it had it been delivered 978 * (nb.ht_sync, nb.ht_crc) 979 * - events that will be accompanied by an immediate panic and 980 * delivery of the ereport during subsequent reboot but from 981 * which no useful diagnosis can be made. (nb.rmw, nb.wdog) 982 * 983 * Ereports for all of these can be generated by error simulation and 984 * injection. We will perform a null diagnosos of all these ereports in order 985 * to avoid "no subscription" complaints during test harness runs. 986 */ 987 988event ereport.cpu.amd.nb.ma@cpu{within(5s)}; 989event ereport.cpu.amd.nb.ta@cpu{within(5s)}; 990event ereport.cpu.amd.ls.s_rde@cpu{within(5s)}; 991event ereport.cpu.amd.ic.rdde@cpu{within(5s)}; 992event ereport.cpu.amd.bu.s_rde@cpu{within(5s)}; 993event ereport.cpu.amd.nb.gart_walk@cpu{within(5s)}; 994event ereport.cpu.amd.nb.ht_sync@cpu{within(5s)}; 995event ereport.cpu.amd.nb.ht_crc@cpu{within(5s)}; 996event ereport.cpu.amd.nb.rmw@cpu{within(5s)}; 997event ereport.cpu.amd.nb.wdog@cpu{within(5s)}; 998event ereport.cpu.amd.unknown@cpu{within(5s)}; 999 1000event upset.null_diag@cpu; 1001 1002prop upset.null_diag@cpu (1)-> 1003 ereport.cpu.amd.nb.ma@cpu, 1004 ereport.cpu.amd.nb.ta@cpu, 1005 ereport.cpu.amd.ls.s_rde@cpu, 1006 ereport.cpu.amd.ic.rdde@cpu, 1007 ereport.cpu.amd.bu.s_rde@cpu, 1008 ereport.cpu.amd.nb.gart_walk@cpu, 1009 ereport.cpu.amd.nb.ht_sync@cpu, 1010 ereport.cpu.amd.nb.ht_crc@cpu, 1011 ereport.cpu.amd.nb.rmw@cpu, 1012 ereport.cpu.amd.nb.wdog@cpu, 1013 ereport.cpu.amd.unknown@cpu; 1014