xref: /illumos-gate/usr/src/cmd/fm/eversholt/files/i386/i86pc/amd64.esc (revision b5d991cd75971f6bf7a801452ede3fe1667ddfde)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#pragma dictionary "AMD"
30
31/*
32 * Eversholt rules for the AMD Opteron CPU/Memory
33 */
34
35fru motherboard;
36fru chip;
37fru dimm;
38
39asru chip/cpu;
40asru dimm;
41asru dimm/rank;
42asru dram-channel;
43asru chip/memory-controller/chip-select;
44
45#define	MAX(x, y) ((x) >= (y) ? (x) : (y))
46#define	MIN(x, y) ((x) <= (y) ? (x) : (y))
47
48/*
49 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that
50 * we diagnose for page faults, to record the physical address of the faulting
51 * page.  The "asru-" prefix is hooked in the "rewrite-ASRU" confcalls made on
52 * diagnosis of associated faults when the libtopo mem scheme rewrites the
53 * asru in "mem" scheme.
54 */
55#define	SET_ADDR (setpayloadprop("asru-physaddr", payloadprop("IA32_MCi_ADDR")))
56
57#define	SET_OFFSET (setpayloadprop("asru-offset", \
58	payloadprop("resource[0].hc-specific.offset")))
59
60/*
61 * RESOURCE_EXISTS is true if a member with name "resource" exists in the
62 * payload - regardless of type (e.g., nvlist or nvlist array) or value.
63 */
64#define	RESOURCE_EXISTS	(payloadprop_defined("resource"))
65
66/*
67 * CONTAINS_RANK is true if the "resource" nvlist array (as used in memory
68 * ereports) exists and one if its members matches the path for the
69 * rank node.  Our memory propogation are of the form
70 *
71 * "prop foo@chip/memory-controller/dimm/rank -> blah@chip/cpu"
72 *
73 * since cpus detect memory errors;  in eversholt such a propogation, where
74 * the lhs path and rhs path do not match, expands to the cross-product of
75 * all dimms, ranks and cpus on the same chip (since chip appears in the
76 * path on both sides).  We use CONTAINS_RANK to constrain the propogation
77 * such that it only happens if the payload resource matches the rank.
78 */
79#define	CONTAINS_RANK (payloadprop_contains("resource", \
80	asru(chip/memory-controller/dimm/rank)) \
81	|| payloadprop_contains("resource", \
82	asru(chip/memory-controller/dimm)))
83
84/*
85 * The following will tell us whether a syndrome that is known to be
86 * correctable (from a mem_ce ereport) is single-bit or multi-bit.  For a
87 * correctable ChipKill syndrome the number of bits set in the lowest
88 * nibble indicates how many bits were in error.
89 */
90
91#define	CBITMASK(synd) ((synd) & 0xf)
92
93#define	CKSINGLE(synd)							\
94	((synd) == 0 ||							\
95	(CBITMASK(synd) == 0x1 || CBITMASK(synd) == 0x2 ||		\
96	CBITMASK(synd) == 0x4 || CBITMASK(synd) == 0x8))
97
98#define	SINGLE_BIT_CE							\
99	(payloadprop("syndrome-type") == "E" ||				\
100	(payloadprop("syndrome-type") == "C" &&				\
101	CKSINGLE(payloadprop("syndrome"))))
102
103#define	MULTI_BIT_CE							\
104	(payloadprop("syndrome-type") == "C" &&				\
105	!CKSINGLE(payloadprop("syndrome")))
106
107/*
108 * A single bit fault in a memory rank can cause:
109 *
110 *  - mem_ce : reported by nb
111 *  - inf_sys_ecc1: reported by ic or dc; inf_sys_ecc1 errors detected at the
112 *    ic do not record a syndrome; these errors will not be triggered in
113 *    ChipKill ECC mode (the NB corrects all ECC errors in that mode)
114 *  - s_ecc1: reported by bu; this error will not be triggered in ChipKill
115 *    ECC mode (the NB corrects all ECC in that mode)
116 *
117 * Single-bit errors are fed into a per-rank SERD engine; if a SERD engine
118 * trips we diagnose a fault.memory.page so that the response agent can
119 * retire the page that caused the trip.  If the total number of pages
120 * faulted in this way on a single rank exceeds a threshold we will
121 * diagnose a fault.memory.dimm_sb against the containing.
122 *
123 * Multibit ChipKill-correctable errors are treated identically to
124 * single-bit errors, but via separate serd engines to allow distinct
125 * parameters if desired.
126 *
127 * Uncorrectable errors produce an immediate page fault and corresponding
128 * fault.memory.dimm_ue.
129 *
130 * Page faults are essentially internal - action is only required when
131 * they are accompanied by a dimm fault.  As such we include message=0
132 * on page faults.
133 */
134
135event ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu{within(5s)};
136event ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu{within(5s)};
137event ereport.cpu.amd.bu.s_ecc1@chip/cpu{within(5s)};
138event ereport.cpu.amd.nb.mem_ce@chip/cpu{within(5s)};
139
140/*
141 * If the address is not valid then no resource member will be included
142 * in a nb.mem_ce or nb.mem_ue ereport.  These cases should be rare.
143 * We will also discard all inf_sys_ecc1 events detected at the ic since they
144 * have no syndrome and therefore no resource information.
145 * We will discard such ereports.  An alternative may be to SERD them
146 * on a per MC basis and trip if we see too many such events.
147 */
148
149event upset.memory.discard1@chip/cpu;
150
151/*								#PAGE#
152 * Single-bit correctable errors are diagnosed as upsets and feed into per-rank
153 * SERD engines which diagnose fault.memory.page_sb if they trip.
154 *
155 * Multi-bit correctable (via ChipKill) errors are diagnosed as upsets and feed
156 * into additional per-rank SERD engines which diagnose fault.memory.page_ck
157 * if they trip.
158 *
159 * The number of fault.memory.page and fault.memory.page_ck diagnosed is
160 * counted in stat engines for each type.  These are used in deciding
161 * whether to declare a dimm faulty after repeated page faults.
162 */
163
164#define PAGE_FIT		1
165#define PAGE_SB_COUNT		2
166#define PAGE_SB_TIME		72h
167#define	PAGE_CK_COUNT		2
168#define	PAGE_CK_TIME		72h
169
170/*
171 * The fraction of pages on a single rank that must be diagnosed as faulty
172 * with single correctable unit faults before we will fault the rank.
173 * Once we have faulted the rank we will continue to diagnose any further page
174 * faults on the rank up to some maximum multiple of the threshold at which
175 * we faulted the dimm.  This allows us to potentially contain some fairly
176 * far-reaching but still limited-extent fault (such as a partial column
177 * failure) without getting carried away and allowing a single faulty rank to
178 * use up the entire system-imposed page retirenment limit (which, once
179 * reached, causes retirement request to have no effect other than to fill
180 * the fault manager cache and logs).
181 *
182 * This fraction is specified in basis points, where 100 basis points are
183 * equivalent to 1 percent.  It is applied on a per-rank basis.
184 *
185 * The system imposes an absolute maximum on the number of pages it will
186 * retire;  the current value is 10 basis points, or 0.1% of 'physmem'.  Note
187 * that 'physmem' is reduced from installed memory pages by an amount
188 * reflecting permanent kernel memory allocations.  This system page retire
189 * limit bounds the maximum real response to page faults across all ranks
190 * that fault manager response agents can effect, but it should not be confused
191 * with any diagnosis threshold (i.e., the number of faulty pages we are
192 * prepared to tolerate from a single rank before faulting the rank is
193 * distinct from the total number of pages we are prepared to retire from use
194 * in response to that and other faults).  It is, however, desirable to
195 * arrange that the maximum number of pages we are prepared to fault from
196 * any one rank is less than the system-wide quota.
197 */
198#define	PAGE_RETIRE_LIMIT_BPS	5		/* or 0.05%; ~ 131 pages/GB %/
199
200/*
201 * A macro to manipulate the above fraction.  Given a size in bytes convert
202 * this to pages (4K pagesize) and calculate the number of those pages
203 * indicated by PAGE_RETIRE_LIMIT_BPS basis points.
204 */
205#define	_BPS_PGCNT(totalbytes) \
206	((((totalbytes) / 4096 ) * PAGE_RETIRE_LIMIT_BPS) / 10000)
207
208/*
209 * The single-correctable-unit threshold at which number of faulted pages
210 * on a rank we we fault the rank.  We insist that this be at least 128 and
211 * never more than 512.
212 */
213#define	RANK_THRESH MIN(512, MAX(128, \
214	_BPS_PGCNT(confprop(asru(chip/memory-controller/dimm/rank), "size"))))
215
216/*
217 * The maximum number of single-correctable-unit page faults we will diagnose
218 * on a single rank (must be greater than RANK_THRESH).  We set
219 * this at twice the rank fault threshold.
220 */
221#define	RANK_PGFLT_MAX (2 * RANK_THRESH)
222
223engine stat.sbpgflt@chip/memory-controller/dimm/rank;
224engine stat.ckpgflt@chip/memory-controller/dimm/rank;
225
226event fault.memory.page_sb@chip/memory-controller/dimm/rank,
227    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
228    count=stat.sbpgflt@chip/memory-controller/dimm/rank,
229    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
230
231#define	SB_PGFLTS (count(stat.sbpgflt@chip/memory-controller/dimm/rank))
232
233event fault.memory.page_ck@chip/memory-controller/dimm/rank,
234    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
235    count=stat.ckpgflt@chip/memory-controller/dimm/rank,
236    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
237
238#define	CK_PGFLTS (count(stat.ckpgflt@chip/memory-controller/dimm/rank))
239
240#define	RANK_PGFLT_LIMIT_REACHED \
241    (SB_PGFLTS + CK_PGFLTS > RANK_PGFLT_MAX)
242
243event ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank{within(5s)};
244engine serd.memory.page_sb@chip/memory-controller/dimm/rank,
245    N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent,
246    trip=ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
247event upset.memory.page_sb@chip/memory-controller/dimm/rank,
248    engine=serd.memory.page_sb@chip/memory-controller/dimm/rank;
249
250event ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank{within(5s)};
251engine serd.memory.page_ck@chip/memory-controller/dimm/rank,
252    N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent,
253    trip=ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
254event upset.memory.page_ck@chip/memory-controller/dimm/rank,
255    engine=serd.memory.page_ck@chip/memory-controller/dimm/rank;
256
257event upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank;
258
259/*
260 * If we have not reached the per-rank limit on faulted pages then
261 * continue to explain ereport observations as upsets which can lead
262 * lead to page fault diagnoses if the serd engine trips.
263 */
264prop upset.memory.page_sb@chip/memory-controller/dimm/rank
265    { CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)->
266    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
267    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
268    ereport.cpu.amd.nb.mem_ce@chip/cpu;
269
270prop upset.memory.page_ck@chip/memory-controller/dimm/rank
271    { CONTAINS_RANK && MULTI_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)->
272    /* no dc.inf_sys_ecc1 or bu.s_ecc1 in ChipKill mode */
273    ereport.cpu.amd.nb.mem_ce@chip/cpu;
274
275/*
276 * If we have reached the per-rank limit on faulted pages then diagnose
277 * further observations on the rank to a engine-less upset (i.e., discard
278 * them).
279 */
280prop upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank
281    { CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED } (1)->
282    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
283    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
284    ereport.cpu.amd.nb.mem_ce@chip/cpu;
285
286prop fault.memory.page_sb@chip/memory-controller/dimm/rank (1)->
287    ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
288
289prop fault.memory.page_ck@chip/memory-controller/dimm/rank (1)->
290    ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
291
292prop fault.memory.page_sb@chip/memory-controller/dimm/rank
293    { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)->
294    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
295    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
296    ereport.cpu.amd.nb.mem_ce@chip/cpu;
297
298prop fault.memory.page_ck@chip/memory-controller/dimm/rank
299    { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)->
300    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
301    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
302    ereport.cpu.amd.nb.mem_ce@chip/cpu;
303
304/*
305 * Discard memory ereports that do not indicate a resource.
306 */
307prop upset.memory.discard1@chip/cpu
308    { !RESOURCE_EXISTS } (1)->
309    ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu,
310    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
311    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
312    ereport.cpu.amd.nb.mem_ce@chip/cpu;
313
314/*								#DIMM_SCU#
315 * "Single-correctable-unit" DIMM faults are diagnosed when the total number of
316 * page faults (diagnosed from repeated single-bit or multibit-chipkills)
317 * from any one rank on that DIMM reaches a threshold.  A "correctable unit"
318 * is a single bit in normal 64/8 ECC mode, or a single symbol in ChipKill
319 * 128/16 mode (i.e., nibble-aligned nibble for the code used on Opteron).
320 *
321 * We do not stop diagnosing further single-bit page faults once we have
322 * declared a single-bit DIMM fault - we continue diagnosing them and
323 * response agents can continue to retire those pages up to the system-imposed
324 * retirement limit.
325 *
326 * Two distinct fault types may be diagnosed - fault.memory.dimm_sb and
327 * fault.memory.dimm_ck.  Which one is diagnosed depends on whether we
328 * have reached the threshold for a majority of single-bit page faults or
329 * multibit page faults.
330 *
331 * Implementation: we maintain parallel SERD engines to the page_sb and
332 * page_ck engines, which trip in unison.  On trip it generates a distinct
333 * ereport which we diagnose to a fault if the threshold has been
334 * reached, or to a throwaway upset if not.
335 *
336 */
337
338#define DIMM_SB_FIT		2000
339#define DIMM_CK_FIT		4000
340
341event fault.memory.dimm_sb@chip/memory-controller/dimm/rank,
342    FITrate=DIMM_SB_FIT, FRU=dimm, ASRU=dimm,
343    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
344
345event fault.memory.dimm_ck@chip/memory-controller/dimm/rank,
346    FITrate=DIMM_CK_FIT, FRU=dimm, ASRU=dimm,
347    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
348
349event ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
350	{ within(5s) };
351engine serd.memory.dimm_sb@chip/memory-controller/dimm/rank,
352    N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent,
353    trip=ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank;
354event upset.memory.dimm_sb@chip/memory-controller/dimm/rank,
355    engine=serd.memory.dimm_sb@chip/memory-controller/dimm/rank;
356
357event ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
358	{ within(5s) };
359engine serd.memory.dimm_ck@chip/memory-controller/dimm/rank,
360    N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent,
361    trip=ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank;
362event upset.memory.dimm_ck@chip/memory-controller/dimm/rank,
363    engine=serd.memory.dimm_ck@chip/memory-controller/dimm/rank;
364
365event upset.memory.discard2@chip/memory-controller/dimm/rank;
366
367prop upset.memory.dimm_sb@chip/memory-controller/dimm/rank
368    { CONTAINS_RANK && SINGLE_BIT_CE } (0)->
369    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
370    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
371    ereport.cpu.amd.nb.mem_ce@chip/cpu;
372
373prop upset.memory.dimm_ck@chip/memory-controller/dimm/rank
374    { CONTAINS_RANK && MULTI_BIT_CE } (0)->
375    ereport.cpu.amd.nb.mem_ce@chip/cpu;
376
377/*
378 * The following two propogations diagnose a fault.memory.dimm_sb when
379 * either the dimm_sb or dimm_ck engine trips (for a new page fault)
380 * and the total number of page faults (sb and ck) exceeds the threshold
381 * value with the majority being from sb page faults.
382 */
383prop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
384    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
385    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 };
386
387prop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
388    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
389    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 };
390
391/*
392 * The following two propogation diagnose a fault.memory.dimm_ck when
393 * either the dimm_sb or dimm_ck engine trip (for a new page fault)
394 * and the total number of page faults (sb and ck) exceeds the threshold
395 * value with the majority  being from ck page faults.
396 */
397prop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
398    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
399    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 };
400
401prop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
402    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
403    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 };
404
405prop upset.memory.discard2@chip/memory-controller/dimm/rank (1)->
406    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank,
407    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank;
408
409/* 								#DIMM_UE#
410 *								#PAGE_UE#
411 * An uncorrectable multi-bit fault in a memory dimm can cause:
412 *
413 *  - mem_ue    	   : reported by nb for an access from a remote cpu
414 *  - inf_sys_eccm : reported by ic or dc; the ic does not report a syndrome
415 *  - s_eccm	   : reported by bu
416 *
417 * Note we use a SERD engine here simply as a way of ensuring that we get
418 * both dimm and page faults reported.
419 *
420 * Since on production systems we force HT Sync Flood on uncorrectable
421 * memory errors (if not already set as such by the BIOS, as it should be)
422 * we won't actually receive these ereports since the system will be reset.
423 */
424
425#define DIMM_UE_FIT		6000
426
427event ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu{within(5s)};
428event ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu{within(5s)};
429event ereport.cpu.amd.bu.s_eccm@chip/cpu{within(5s)};
430event ereport.cpu.amd.nb.mem_ue@chip/cpu{within(5s)};
431
432event fault.memory.dimm_ue@chip/memory-controller/dimm/rank,
433    FITrate=DIMM_UE_FIT, FRU=dimm, ASRU=dimm,
434    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
435
436event fault.memory.page_ue@chip/memory-controller/dimm/rank,
437    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
438    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
439
440event ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank{within(5s)};
441engine serd.memory.dimm_ue@chip/memory-controller/dimm/rank,
442    N=0, T=1h, method=persistent,
443    trip=ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
444event upset.memory.dimm_ue@chip/memory-controller/dimm/rank,
445    engine=serd.memory.dimm_ue@chip/memory-controller/dimm/rank;
446
447event ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank{within(5s)};
448engine serd.memory.page_ue@chip/memory-controller/dimm/rank,
449    N=0, T=1h, method=persistent,
450    trip=ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
451event upset.memory.page_ue@chip/memory-controller/dimm/rank,
452    engine=serd.memory.page_ue@chip/memory-controller/dimm/rank;
453
454event upset.memory.discard3@chip/cpu;
455
456prop upset.memory.page_ue@chip/memory-controller/dimm/rank
457    { CONTAINS_RANK } (0)->
458    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
459    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
460    ereport.cpu.amd.bu.s_eccm@chip/cpu,
461    ereport.cpu.amd.nb.mem_ue@chip/cpu;
462
463prop upset.memory.dimm_ue@chip/memory-controller/dimm/rank
464    { CONTAINS_RANK } (0)->
465    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
466    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
467    ereport.cpu.amd.bu.s_eccm@chip/cpu,
468    ereport.cpu.amd.nb.mem_ue@chip/cpu;
469
470prop fault.memory.page_ue@chip/memory-controller/dimm/rank (1)->
471    ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
472
473prop fault.memory.page_ue@chip/memory-controller/dimm/rank
474    { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)->
475    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
476    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
477    ereport.cpu.amd.bu.s_eccm@chip/cpu,
478    ereport.cpu.amd.nb.mem_ue@chip/cpu;
479
480prop fault.memory.dimm_ue@chip/memory-controller/dimm/rank (1)->
481    ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
482
483prop upset.memory.discard3@chip/cpu
484    { !RESOURCE_EXISTS } (1)->
485    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
486    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
487    ereport.cpu.amd.bu.s_eccm@chip/cpu,
488    ereport.cpu.amd.nb.mem_ue@chip/cpu;
489
490/*								#CSTESTFAIL#
491 * If the BIOS fails a chip-select during POST, or perhaps after a
492 * sync flood from an uncorrectable error, then on revision F and G it
493 * should mark that chip-select as TestFail in the CS Base register.
494 * When the memory-controller driver discovers all the MC configuration
495 * it notes such failed chip-selects and creates topology nodes for the
496 * chip-select and associated dimms and ranks, and produces an ereport for each
497 * failed chip-select with detector set to the memory-controller node
498 * and resource indicating the failed chip-select.
499 */
500
501event ereport.cpu.amd.mc.cs_testfail@chip/memory-controller{within(5s)};
502
503event fault.memory.dimm_testfail@chip/memory-controller/dimm/rank,
504    FITrate=1000, ASRU=dimm, FRU=dimm,
505    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
506
507event error.memory.cs_testfail@chip/memory-controller/chip-select;
508
509#define	CONTAINS_CS (payloadprop_contains("resource", \
510	asru(chip/memory-controller/chip-select)))
511
512prop error.memory.cs_testfail@chip/memory-controller/chip-select (1)->
513    ereport.cpu.amd.mc.cs_testfail@chip/memory-controller
514    { CONTAINS_CS };
515
516#define CSMATCH(s) \
517	(confprop_defined(asru(chip/memory-controller/chip-select), s) && \
518	confprop(asru(chip/memory-controller/chip-select), s) == \
519	confprop(asru(chip/memory-controller/dimm/rank), "csname"))
520
521prop fault.memory.dimm_testfail@chip/memory-controller/dimm/rank (1)->
522    error.memory.cs_testfail@chip/memory-controller/chip-select
523    { CSMATCH("dimm1-csname") || CSMATCH("dimm2-csname")};
524
525/*								#ADDRPAR#
526 * DRAM Command/Address Parity Errors.
527 *
528 *  - dramaddr_par : reported by the nb; the NB status register includes
529 *    a bit indicating which dram controller channel (A or B) experienced
530 *    the error.
531 */
532
533event ereport.cpu.amd.nb.dramaddr_par@chip/cpu{within(5s)};
534
535event fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel,
536    FITrate=1000, ASRU=dram-channel;
537
538#define GET_CHANNEL ($chan = (payloadprop("IA32_MCi_STATUS") >> 32 & 0x200) ? \
539    1 : 0)
540
541prop fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel[y] (0)->
542    ereport.cpu.amd.nb.dramaddr_par@chip/cpu { GET_CHANNEL && $chan == y };
543
544/*
545 * l2 cache data errors.
546 */
547
548#define L2CACHEDATA_FIT		1000
549#define L2CACHEDATA_SB_COUNT	3
550#define L2CACHEDATA_SB_TIME	12h
551
552event fault.cpu.amd.l2cachedata@chip/cpu, FITrate=L2CACHEDATA_FIT,
553	FRU=chip, ASRU=chip/cpu;
554event error.cpu.amd.l2cachedata_sb@chip/cpu;
555event error.cpu.amd.l2cachedata_mb@chip/cpu;
556
557prop fault.cpu.amd.l2cachedata@chip/cpu (1)->
558    error.cpu.amd.l2cachedata_sb@chip/cpu,
559    error.cpu.amd.l2cachedata_mb@chip/cpu;
560
561/* 								#L2D_SINGLE#
562 * A single bit data array fault in an l2 cache can cause:
563 *
564 *  - inf_l2_ecc1 : reported by ic on this cpu
565 *  - inf_l2_ecc1 : reported by dc on this cpu
566 *  - l2d_ecc1 : reported by bu on copyback or on snoop from another cpu
567 *
568 * Single-bit errors are diagnosed to cache upsets.  SERD engines are used
569 * to count upsets resulting from CEs.
570 */
571
572event ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu{within(5s)};
573event ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu{within(5s)};
574event ereport.cpu.amd.bu.l2d_ecc1@chip/cpu{within(5s)};
575event ereport.cpu.amd.l2d_sb_trip@chip/cpu{within(5s)};
576
577engine serd.cpu.amd.l2d_sb@chip/cpu,
578    N=L2CACHEDATA_SB_COUNT, T=L2CACHEDATA_SB_TIME, method=persistent,
579    trip=ereport.cpu.amd.l2d_sb_trip@chip/cpu;
580
581event upset.cpu.amd.l2d_sb@chip/cpu,
582	engine=serd.cpu.amd.l2d_sb@chip/cpu;
583
584prop upset.cpu.amd.l2d_sb@chip/cpu (1)->
585    ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu,
586    ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu,
587    ereport.cpu.amd.bu.l2d_ecc1@chip/cpu;
588
589prop error.cpu.amd.l2cachedata_sb@chip/cpu (1)->
590    ereport.cpu.amd.l2d_sb_trip@chip/cpu;
591
592prop fault.cpu.amd.l2cachedata@chip/cpu (0)->
593    ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu,
594    ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu,
595    ereport.cpu.amd.bu.l2d_ecc1@chip/cpu;
596
597/* 								#L2D_MULTI#
598 * A multi-bit data array fault in an l2 cache can cause:
599 *
600 *  - inf_l2_eccm : reported by ic on this cpu
601 *  - inf_l2_eccm : reported by dc on this cpu
602 *  - l2d_eccm : reported by bu on copyback or on snoop from another cpu
603 */
604
605event ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu{within(5s)};
606event ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu{within(5s)};
607event ereport.cpu.amd.bu.l2d_eccm@chip/cpu{within(5s)};
608
609prop error.cpu.amd.l2cachedata_mb@chip/cpu (1)->
610    ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu,
611    ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu,
612    ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
613
614prop fault.cpu.amd.l2cachedata@chip/cpu (0)->
615    ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu,
616    ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu,
617    ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
618
619/*
620 * l2 cache main tag errors
621 */
622
623#define L2CACHETAG_FIT		1000
624#define L2CACHETAG_SB_COUNT	3
625#define L2CACHETAG_SB_TIME	12h
626
627event fault.cpu.amd.l2cachetag@chip/cpu, FITrate=L2CACHETAG_FIT,
628	FRU=chip, ASRU=chip/cpu;
629event error.cpu.amd.l2cachetag_sb@chip/cpu;
630event error.cpu.amd.l2cachetag_mb@chip/cpu;
631
632prop fault.cpu.amd.l2cachetag@chip/cpu (1)->
633    error.cpu.amd.l2cachetag_sb@chip/cpu,
634    error.cpu.amd.l2cachetag_mb@chip/cpu;
635
636/* 								#L2T_SINGLE#
637 * A single bit tag array fault in an l2 cache can cause:
638 *
639 *  - l2t_ecc1 : reported by bu on this cpu when detected during snoop
640 *  - l2t_par : reported by bu on this cpu when detected other than during snoop
641 *
642 * Note that the bu.l2t_par ereport could be due to a single bit or multi bit
643 * event. If the l2t_sb_trip has already triggered it will be treated as another
644 * ce, otherwise it will be treated as a ue event.
645 */
646
647event ereport.cpu.amd.bu.l2t_ecc1@chip/cpu{within(5s)};
648event ereport.cpu.amd.bu.l2t_par@chip/cpu{within(5s)};
649event ereport.cpu.amd.l2t_sb_trip@chip/cpu{within(5s)};
650
651engine serd.cpu.amd.l2t_sb@chip/cpu,
652    N=L2CACHETAG_SB_COUNT, T=L2CACHETAG_SB_TIME, method=persistent,
653    trip=ereport.cpu.amd.l2t_sb_trip@chip/cpu;
654
655event upset.cpu.amd.l2t_sb@chip/cpu,
656	engine=serd.cpu.amd.l2t_sb@chip/cpu;
657
658prop upset.cpu.amd.l2t_sb@chip/cpu (1)->
659    ereport.cpu.amd.bu.l2t_ecc1@chip/cpu,
660    ereport.cpu.amd.bu.l2t_par@chip/cpu;
661
662prop error.cpu.amd.l2cachetag_sb@chip/cpu (1)->
663    ereport.cpu.amd.l2t_sb_trip@chip/cpu;
664
665prop fault.cpu.amd.l2cachetag@chip/cpu (0)->
666    ereport.cpu.amd.bu.l2t_ecc1@chip/cpu,
667    ereport.cpu.amd.bu.l2t_par@chip/cpu;
668
669/* 								#L2T_MULTI#
670 * A multi-bit tag array fault in an l2 cache can cause:
671 *
672 *  - l2t_eccm : reported by bu on this cpu when detected during snoop
673 *  - l2t_par : reported by bu on this cpu when detected other than during snoop
674 */
675
676event ereport.cpu.amd.bu.l2t_eccm@chip/cpu{within(5s)};
677
678prop error.cpu.amd.l2cachetag_mb@chip/cpu (1)->
679    ereport.cpu.amd.bu.l2t_eccm@chip/cpu,
680    ereport.cpu.amd.bu.l2t_par@chip/cpu;
681
682prop fault.cpu.amd.l2cachetag@chip/cpu (0)->
683    ereport.cpu.amd.bu.l2t_eccm@chip/cpu,
684    ereport.cpu.amd.bu.l2t_par@chip/cpu;
685
686/* 								#ICD_PAR#
687 * A data array parity fault in an I cache can cause:
688 *
689 *  - data_par : reported by ic on this cpu
690 */
691
692#define ICACHEDATA_FIT		1000
693#define ICACHEDATA_SB_COUNT	2
694#define ICACHEDATA_SB_TIME	168h
695
696event ereport.cpu.amd.ic.data_par@chip/cpu{within(5s)};
697event ereport.cpu.amd.ic_dp_trip@chip/cpu{within(5s)};
698
699event fault.cpu.amd.icachedata@chip/cpu, FITrate=ICACHEDATA_FIT,
700	FRU=chip, ASRU=chip/cpu;
701
702engine serd.cpu.amd.icachedata@chip/cpu,
703    N=ICACHEDATA_SB_COUNT, T=ICACHEDATA_SB_TIME, method=persistent,
704    trip=ereport.cpu.amd.ic_dp_trip@chip/cpu;
705
706event upset.cpu.amd.icachedata@chip/cpu,
707	engine=serd.cpu.amd.icachedata@chip/cpu;
708
709prop upset.cpu.amd.icachedata@chip/cpu (1)->
710    ereport.cpu.amd.ic.data_par@chip/cpu;
711
712prop fault.cpu.amd.icachedata@chip/cpu (1)->
713    ereport.cpu.amd.ic_dp_trip@chip/cpu;
714
715prop fault.cpu.amd.icachedata@chip/cpu (0)->
716    ereport.cpu.amd.ic.data_par@chip/cpu;
717
718/* 								#ICT_PAR#
719 * A tag array parity fault in an I cache can cause:
720 *
721 *  - tag_par : reported by ic on this cpu
722 */
723
724#define ICACHETAG_FIT		1000
725#define ICACHETAG_SB_COUNT	2
726#define ICACHETAG_SB_TIME	168h
727
728event ereport.cpu.amd.ic.tag_par@chip/cpu{within(5s)};
729event ereport.cpu.amd.ic_tp_trip@chip/cpu{within(5s)};
730
731event fault.cpu.amd.icachetag@chip/cpu, FITrate=ICACHETAG_FIT,
732	FRU=chip, ASRU=chip/cpu;
733
734engine serd.cpu.amd.icachetag@chip/cpu,
735    N=ICACHETAG_SB_COUNT, T=ICACHETAG_SB_TIME, method=persistent,
736    trip=ereport.cpu.amd.ic_tp_trip@chip/cpu;
737
738event upset.cpu.amd.icachetag@chip/cpu,
739	engine=serd.cpu.amd.icachetag@chip/cpu;
740
741prop upset.cpu.amd.icachetag@chip/cpu (1)->
742    ereport.cpu.amd.ic.tag_par@chip/cpu;
743
744prop fault.cpu.amd.icachetag@chip/cpu (1)->
745    ereport.cpu.amd.ic_tp_trip@chip/cpu;
746
747prop fault.cpu.amd.icachetag@chip/cpu (0)->
748    ereport.cpu.amd.ic.tag_par@chip/cpu;
749
750/* 								#ICT_SNOOP#
751 * A snoop tag array parity fault in an I cache can cause:
752 *
753 *  - stag_par : reported by ic on this cpu
754 */
755
756#define ICACHESTAG_FIT		1000
757
758event ereport.cpu.amd.ic.stag_par@chip/cpu{within(5s)};
759
760event fault.cpu.amd.icachestag@chip/cpu, FITrate=ICACHESTAG_FIT,
761	FRU=chip, ASRU=chip/cpu;
762
763prop fault.cpu.amd.icachestag@chip/cpu (1)->
764    ereport.cpu.amd.ic.stag_par@chip/cpu;
765
766/* 								#ICTLB_1#
767 * An l1tlb parity fault in an I cache can cause:
768 *
769 *  - l1tlb_par : reported by ic on this cpu
770 */
771
772#define ICACHEL1TLB_FIT		1000
773#define ICACHEL1TLB_SB_COUNT	2
774#define ICACHEL1TLB_SB_TIME	168h
775
776event ereport.cpu.amd.ic.l1tlb_par@chip/cpu{within(5s)};
777event ereport.cpu.amd.ic_l1tlb_trip@chip/cpu{within(5s)};
778
779event fault.cpu.amd.l1itlb@chip/cpu, FITrate=ICACHEL1TLB_FIT,
780	FRU=chip, ASRU=chip/cpu;
781
782engine serd.cpu.amd.l1itlb@chip/cpu,
783    N=ICACHEL1TLB_SB_COUNT, T=ICACHEL1TLB_SB_TIME, method=persistent,
784    trip=ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
785
786event upset.cpu.amd.l1itlb@chip/cpu,
787	engine=serd.cpu.amd.l1itlb@chip/cpu;
788
789prop upset.cpu.amd.l1itlb@chip/cpu (1)->
790    ereport.cpu.amd.ic.l1tlb_par@chip/cpu;
791
792prop fault.cpu.amd.l1itlb@chip/cpu (1)->
793    ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
794
795prop fault.cpu.amd.l1itlb@chip/cpu (0)->
796    ereport.cpu.amd.ic.l1tlb_par@chip/cpu;
797
798/* 								#ICTLB_2#
799 * An l2tlb parity fault in an I cache can cause:
800 *
801 *  - l2tlb_par : reported by ic on this cpu
802 */
803
804#define ICACHEL2TLB_FIT		1000
805#define ICACHEL2TLB_SB_COUNT	2
806#define ICACHEL2TLB_SB_TIME	168h
807
808event ereport.cpu.amd.ic.l2tlb_par@chip/cpu{within(5s)};
809event ereport.cpu.amd.ic_l2tlb_trip@chip/cpu{within(5s)};
810
811event fault.cpu.amd.l2itlb@chip/cpu, FITrate=ICACHEL2TLB_FIT,
812	FRU=chip, ASRU=chip/cpu;
813
814engine serd.cpu.amd.l2itlb@chip/cpu,
815    N=ICACHEL2TLB_SB_COUNT, T=ICACHEL2TLB_SB_TIME, method=persistent,
816    trip=ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
817
818event upset.cpu.amd.l2itlb@chip/cpu,
819	engine=serd.cpu.amd.l2itlb@chip/cpu;
820
821prop upset.cpu.amd.l2itlb@chip/cpu (1)->
822    ereport.cpu.amd.ic.l2tlb_par@chip/cpu;
823
824prop fault.cpu.amd.l2itlb@chip/cpu (1)->
825    ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
826
827prop fault.cpu.amd.l2itlb@chip/cpu (0)->
828    ereport.cpu.amd.ic.l2tlb_par@chip/cpu;
829
830/*
831 * dcache data errors
832 */
833
834#define DCACHEDATA_FIT		1000
835#define DCACHEDATA_SB_COUNT	2
836#define DCACHEDATA_SB_TIME	168h
837
838event fault.cpu.amd.dcachedata@chip/cpu, FITrate=DCACHEDATA_FIT,
839	FRU=chip, ASRU=chip/cpu;
840event error.cpu.amd.dcachedata_sb@chip/cpu;
841event error.cpu.amd.dcachedata_mb@chip/cpu;
842
843prop fault.cpu.amd.dcachedata@chip/cpu (1)->
844    error.cpu.amd.dcachedata_sb@chip/cpu,
845    error.cpu.amd.dcachedata_mb@chip/cpu;
846
847/* 								#DCD_SINGLE#
848 * A single bit data array fault in an D cache can cause:
849 *
850 *  - data_ecc1 : reported by dc on this cpu by scrubber
851 *  - data_ecc1_uc : reported by dc on this cpu other than by scrubber
852 *
853 * Make data_ecc1_uc fault immediately as it may have caused a panic
854 */
855
856event ereport.cpu.amd.dc.data_ecc1@chip/cpu{within(5s)};
857event ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu{within(5s)};
858event ereport.cpu.amd.dc_sb_trip@chip/cpu{within(5s)};
859
860engine serd.cpu.amd.dc_sb@chip/cpu,
861    N=DCACHEDATA_SB_COUNT, T=DCACHEDATA_SB_TIME, method=persistent,
862    trip=ereport.cpu.amd.dc_sb_trip@chip/cpu;
863
864engine serd.cpu.amd.dc_sb_uc@chip/cpu,
865    N=0, T=1hr, method=persistent,
866    trip=ereport.cpu.amd.dc_sb_trip@chip/cpu;
867
868event upset.cpu.amd.dc_sb@chip/cpu,
869	engine=serd.cpu.amd.dc_sb@chip/cpu;
870
871event upset.cpu.amd.dc_sb_uc@chip/cpu,
872	engine=serd.cpu.amd.dc_sb_uc@chip/cpu;
873
874prop upset.cpu.amd.dc_sb@chip/cpu (1)->
875    ereport.cpu.amd.dc.data_ecc1@chip/cpu;
876
877prop upset.cpu.amd.dc_sb_uc@chip/cpu (1)->
878    ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu;
879
880prop error.cpu.amd.dcachedata_sb@chip/cpu (1)->
881    ereport.cpu.amd.dc_sb_trip@chip/cpu;
882
883prop fault.cpu.amd.dcachedata@chip/cpu (0)->
884    ereport.cpu.amd.dc.data_ecc1@chip/cpu,
885    ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu;
886
887/* 								#DCD_MULTI#
888 * A multi-bit data array fault in an D cache can cause:
889 *
890 *  - data_eccm : reported by dc on this cpu
891 */
892
893event ereport.cpu.amd.dc.data_eccm@chip/cpu{within(5s)};
894
895prop error.cpu.amd.dcachedata_mb@chip/cpu (1)->
896    ereport.cpu.amd.dc.data_eccm@chip/cpu;
897
898prop fault.cpu.amd.dcachedata@chip/cpu (0)->
899    ereport.cpu.amd.dc.data_eccm@chip/cpu;
900
901/* 								#DCT_PAR#
902 * A tag array parity fault in an D cache can cause:
903 *
904 *  - tag_par : reported by dc on this cpu
905 */
906
907#define DCACHETAG_FIT		1000
908
909event ereport.cpu.amd.dc.tag_par@chip/cpu{within(5s)};
910
911event fault.cpu.amd.dcachetag@chip/cpu, FITrate=DCACHETAG_FIT,
912	FRU=chip, ASRU=chip/cpu;
913
914prop fault.cpu.amd.dcachetag@chip/cpu (1)->
915    ereport.cpu.amd.dc.tag_par@chip/cpu;
916
917/* 								#DCT_SNOOP#
918 * A snoop tag array parity fault in an D cache can cause:
919 *
920 *  - stag_par : reported by dc on this cpu
921 */
922
923#define DCACHESTAG_FIT		1000
924
925event ereport.cpu.amd.dc.stag_par@chip/cpu{within(5s)};
926
927event fault.cpu.amd.dcachestag@chip/cpu, FITrate=DCACHESTAG_FIT,
928	FRU=chip, ASRU=chip/cpu;
929
930prop fault.cpu.amd.dcachestag@chip/cpu (1)->
931    ereport.cpu.amd.dc.stag_par@chip/cpu;
932
933/* 								#DCTLB_1#
934 * An l1tlb parity fault in an D cache can cause:
935 *
936 *  - l1tlb_par : reported by dc on this cpu
937 */
938
939#define L1DTLB_FIT		1000
940
941event ereport.cpu.amd.dc.l1tlb_par@chip/cpu{within(5s)};
942
943event fault.cpu.amd.l1dtlb@chip/cpu, FITrate=L1DTLB_FIT,
944	FRU=chip, ASRU=chip/cpu;
945
946prop fault.cpu.amd.l1dtlb@chip/cpu (1)->
947    ereport.cpu.amd.dc.l1tlb_par@chip/cpu;
948
949/* 								#DCTLB_2#
950 * An l2tlb parity fault in an D cache can cause:
951 *
952 *  - l2tlb_par : reported by dc on this cpu
953 */
954
955#define L2DTLB_FIT		1000
956
957event ereport.cpu.amd.dc.l2tlb_par@chip/cpu{within(5s)};
958
959event fault.cpu.amd.l2dtlb@chip/cpu, FITrate=L2DTLB_FIT,
960	FRU=chip, ASRU=chip/cpu;
961
962prop fault.cpu.amd.l2dtlb@chip/cpu (1)->
963    ereport.cpu.amd.dc.l2tlb_par@chip/cpu;
964
965/*								#MISC#
966 * Ereports that should not normally happen and which we will discard
967 * without diagnosis if they do.  These fall into a few categories:
968 *
969 *	- the corresponding detector is not enabled, typically because
970 *	  detection/handling of the event is taking place elsewhere
971 *	  (nb.ma, nb.ta, ls.rde, ic.rdde, bu.s_rde, nb.gart_walk)
972 *	- the event is associated with a sync flood so even if the detector is
973 *	  enabled we will never handle the event and generate an ereport *and*
974 *	  even if the ereport did arrive we could perform no useful diagnosis
975 *	  e.g., the NB can be configured for sync flood on nb.mem_eccm
976 *	  but we don't choose to discard that ereport here since we could have
977 *	  made a useful diagnosis from it had it been delivered
978 *	  (nb.ht_sync, nb.ht_crc)
979 *	- events that will be accompanied by an immediate panic and
980 *	  delivery of the ereport during subsequent reboot but from
981 *	  which no useful diagnosis can be made. (nb.rmw, nb.wdog)
982 *
983 * Ereports for all of these can be generated by error simulation and
984 * injection.  We will perform a null diagnosos of all these ereports in order
985 * to avoid "no subscription" complaints during test harness runs.
986 */
987
988event ereport.cpu.amd.nb.ma@cpu{within(5s)};
989event ereport.cpu.amd.nb.ta@cpu{within(5s)};
990event ereport.cpu.amd.ls.s_rde@cpu{within(5s)};
991event ereport.cpu.amd.ic.rdde@cpu{within(5s)};
992event ereport.cpu.amd.bu.s_rde@cpu{within(5s)};
993event ereport.cpu.amd.nb.gart_walk@cpu{within(5s)};
994event ereport.cpu.amd.nb.ht_sync@cpu{within(5s)};
995event ereport.cpu.amd.nb.ht_crc@cpu{within(5s)};
996event ereport.cpu.amd.nb.rmw@cpu{within(5s)};
997event ereport.cpu.amd.nb.wdog@cpu{within(5s)};
998event ereport.cpu.amd.unknown@cpu{within(5s)};
999
1000event upset.null_diag@cpu;
1001
1002prop upset.null_diag@cpu (1)->
1003    ereport.cpu.amd.nb.ma@cpu,
1004    ereport.cpu.amd.nb.ta@cpu,
1005    ereport.cpu.amd.ls.s_rde@cpu,
1006    ereport.cpu.amd.ic.rdde@cpu,
1007    ereport.cpu.amd.bu.s_rde@cpu,
1008    ereport.cpu.amd.nb.gart_walk@cpu,
1009    ereport.cpu.amd.nb.ht_sync@cpu,
1010    ereport.cpu.amd.nb.ht_crc@cpu,
1011    ereport.cpu.amd.nb.rmw@cpu,
1012    ereport.cpu.amd.nb.wdog@cpu,
1013    ereport.cpu.amd.unknown@cpu;
1014