xref: /illumos-gate/usr/src/cmd/fm/eversholt/files/i386/i86pc/intel.esc (revision 56e2cc86321ec889bf83a888d902c60d6fb2ef8d)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma dictionary "INTEL"
28
29/*
30 * Eversholt rules for the intel CPU/Memory
31 */
32
33/*
34 * Ereports for Simple error codes.
35 */
36
37#define SMPL_EVENT(leafclass, t) \
38	event ereport.cpu.intel.leafclass@chip/core/strand { within(t) }
39
40SMPL_EVENT(unknown, 1s);
41SMPL_EVENT(unclassified, 1s);
42SMPL_EVENT(microcode_rom_parity, 1s);
43SMPL_EVENT(external, 1s);
44SMPL_EVENT(frc, 1s);
45SMPL_EVENT(internal_timer, 1s);
46SMPL_EVENT(internal_parity, 1s);
47SMPL_EVENT(internal_unclassified, 1s);
48
49/*
50 * Propogations for all but "external" and "unknown" simple errors.
51 * If the error is uncorrected we produce a fault immediately, otherwise
52 * we diagnose it to an upset and decalre a fault when the SERD engine
53 * trips.
54 */
55
56engine serd.cpu.intel.simple@chip/core/strand, N=3, T=72h;
57event fault.cpu.intel.internal@chip/core/strand,
58    engine=serd.cpu.intel.simple@chip/core/strand;
59
60prop fault.cpu.intel.internal@chip/core/strand
61    { payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1} (0)->
62    ereport.cpu.intel.microcode_rom_parity@chip/core/strand,
63    ereport.cpu.intel.internal_timer@chip/core/strand,
64    ereport.cpu.intel.internal_parity@chip/core/strand,
65    ereport.cpu.intel.unclassified@chip/core/strand,
66    ereport.cpu.intel.internal_unclassified@chip/core/strand,
67    ereport.cpu.intel.frc@chip/core/strand;
68
69/*
70 * Ereports for Compound error codes.  These are in pairs "foo" and "foo_uc"
71 * for the corrected and uncorrected version of each error type.  All are
72 * detected at chip/core/strand.
73 */
74
75#define	CMPND_EVENT(leafclass, t) \
76	event ereport.cpu.intel.leafclass@chip/core/strand { within(t) }; \
77	event ereport.cpu.intel.leafclass/**/_uc@chip/core/strand { within(t) }
78
79/*
80 * Ereports for Compound error codes - intel errors
81 */
82CMPND_EVENT(l0cache, 1s);
83CMPND_EVENT(l1cache, 1s);
84CMPND_EVENT(l2cache, 1s);
85CMPND_EVENT(cache, 1s);
86
87/*
88 * Ereports for Compound error codes - TLB errors
89 */
90CMPND_EVENT(l0dtlb, 1s);
91CMPND_EVENT(l1dtlb, 1s);
92CMPND_EVENT(l2dtlb, 1s);
93CMPND_EVENT(dtlb, 1s);
94
95CMPND_EVENT(l0itlb, 1s);
96CMPND_EVENT(l1itlb, 1s);
97CMPND_EVENT(l2itlb, 1s);
98CMPND_EVENT(itlb, 1s);
99
100CMPND_EVENT(l0tlb, 1s);
101CMPND_EVENT(l1tlb, 1s);
102CMPND_EVENT(l2tlb, 1s);
103CMPND_EVENT(tlb, 1s);
104
105/*
106 * Ereports for Compound error codes - memory hierarchy errors
107 */
108CMPND_EVENT(l0dcache, 1s);
109CMPND_EVENT(l1dcache, 1s);
110CMPND_EVENT(l2dcache, 1s);
111CMPND_EVENT(dcache, 1s);
112
113CMPND_EVENT(l0icache, 1s);
114CMPND_EVENT(l1icache, 1s);
115CMPND_EVENT(l2icache, 1s);
116CMPND_EVENT(icache, 1s);
117
118/*
119 * Ereports for Compound error codes - bus and interconnect errors
120 */
121CMPND_EVENT(bus_interconnect, 1s);
122CMPND_EVENT(bus_interconnect_memory, 1s);
123CMPND_EVENT(bus_interconnect_io, 1s);
124
125/*
126 * Compound error propogations.
127 *
128 * We resist the temptation propogate, for example, a single dcache fault
129 * to all ereports mentioning dcache (l0dcache, l1dcache, l2dcache, dcache).
130 * Instead we will diagnose a distinct fault for each possible cache level,
131 * whether or not current chips have dcaches at all levels.
132 *
133 * Corrected errors are SERDed and produce a fault when the engine fires;
134 * the same fault is diagnosed immediately for a corresponding uncorrected
135 * error.
136 */
137
138#define	CMPND_FLT_PROP_1(erptleaf, fltleaf, n, t)			\
139	engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t;	\
140	event fault.cpu.intel.fltleaf@chip/core/strand,			\
141	    engine=serd.cpu.intel.fltleaf@chip/core/strand;		\
142									\
143	prop fault.cpu.intel.fltleaf@chip/core/strand (0)->		\
144	    ereport.cpu.intel.erptleaf@chip/core/strand;		\
145									\
146	prop fault.cpu.intel.fltleaf@chip/core/strand			\
147	    { setserdincrement(n + 1) } (0)->				\
148	    ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand
149
150#define	CMPND_FLT_PROP_2(erptleaf, fltleaf, n, t)			\
151	engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t;	\
152	event fault.cpu.intel.fltleaf@chip/core/strand, retire=0, response=0,\
153	    engine=serd.cpu.intel.fltleaf@chip/core/strand;		\
154									\
155	prop fault.cpu.intel.fltleaf@chip/core/strand (0)->		\
156	    ereport.cpu.intel.erptleaf@chip/core/strand;		\
157									\
158	prop fault.cpu.intel.fltleaf@chip/core/strand 			\
159	    { setserdincrement(n + 1) } (0)->				\
160	    ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand
161
162CMPND_FLT_PROP_1(l0cache, l0cache, 3, 72h);
163CMPND_FLT_PROP_1(l1cache, l1cache, 3, 72h);
164CMPND_FLT_PROP_1(l2cache, l2cache, 3, 72h);
165CMPND_FLT_PROP_1(cache, cache, 12, 72h);
166
167CMPND_FLT_PROP_1(l0dtlb, l0dtlb, 3, 72h);
168CMPND_FLT_PROP_1(l1dtlb, l1dtlb, 3, 72h);
169CMPND_FLT_PROP_1(l2dtlb, l2dtlb, 3, 72h);
170CMPND_FLT_PROP_1(dtlb, dtlb, 12, 72h);
171
172CMPND_FLT_PROP_1(l0itlb, l0itlb, 3, 72h);
173CMPND_FLT_PROP_1(l1itlb, l1itlb, 3, 72h);
174CMPND_FLT_PROP_1(l2itlb, l2itlb, 3, 72h);
175CMPND_FLT_PROP_1(itlb, itlb, 12, 72h);
176
177CMPND_FLT_PROP_1(l0tlb, litlb, 3, 72h);
178CMPND_FLT_PROP_1(l1tlb, litlb, 3, 72h);
179CMPND_FLT_PROP_1(l2tlb, litlb, 3, 72h);
180CMPND_FLT_PROP_1(tlb, tlb, 12, 72h);
181
182CMPND_FLT_PROP_1(l0dcache, l0dcache, 3, 72h);
183CMPND_FLT_PROP_1(l1dcache, l1dcache, 3, 72h);
184CMPND_FLT_PROP_1(l2dcache, l2dcache, 3, 72h);
185CMPND_FLT_PROP_1(dcache, dcache, 12, 72h);
186
187CMPND_FLT_PROP_1(l0icache, l0icache, 3, 72h);
188CMPND_FLT_PROP_1(l1icache, l1icache, 3, 72h);
189CMPND_FLT_PROP_1(l2icache, l2icache, 3, 72h);
190CMPND_FLT_PROP_1(icache, icache, 12, 72h);
191
192CMPND_FLT_PROP_2(bus_interconnect, bus_interconnect, 10, 72h);
193CMPND_FLT_PROP_2(bus_interconnect_memory, bus_interconnect_memory, 10, 72h);
194CMPND_FLT_PROP_2(bus_interconnect_io, bus_interconnect_io, 10, 72h);
195
196event upset.discard@chip/core/strand;
197
198prop upset.discard@chip/core/strand (0)->
199    ereport.cpu.intel.external@chip/core/strand,
200    ereport.cpu.intel.unknown@chip/core/strand;
201
202/* errors detected in northbridge */
203
204
205/*
206 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that
207 * we diagnose for page faults, to record the physical address of the faulting
208 * page.
209 */
210#define	SET_ADDR (!payloadprop_defined("physaddr") || \
211    setpayloadprop("asru-physaddr", payloadprop("physaddr")))
212
213#define SET_OFFSET (!payloadprop_defined("offset") || \
214    setpayloadprop("asru-offset", payloadprop("offset")))
215
216#define EREPORT_BUS_ERROR						\
217    ereport.cpu.intel.bus_interconnect_memory_uc@chip/core/strand,	\
218    ereport.cpu.intel.bus_interconnect_uc@chip/core/strand,		\
219    ereport.cpu.intel.bus_interconnect_memory@chip/core/strand,		\
220    ereport.cpu.intel.bus_interconnect@chip/core/strand,		\
221    ereport.cpu.intel.external@chip/core/strand
222
223engine stat.ce_pgflt@memory-controller/dram-channel/dimm;
224
225event ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller{within(12s)};
226event ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller{within(12s)};
227event fault.memory.intel.page_ue@
228    motherboard/memory-controller/dram-channel/dimm/rank,
229    message=0, response=0;
230event fault.memory.intel.dimm_ue@
231    motherboard/memory-controller/dram-channel/dimm/rank;
232
233prop fault.memory.intel.page_ue@
234    motherboard/memory-controller/dram-channel/dimm/rank[rank_num]
235    { payloadprop_defined("rank") && rank_num == payloadprop("rank") &&
236    (payloadprop_defined("physaddr") || payloadprop_defined("offset")) &&
237    SET_ADDR && SET_OFFSET } (1)->
238    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
239    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
240
241prop fault.memory.intel.dimm_ue@
242    motherboard/memory-controller/dram-channel<channel_num>/dimm/rank[rank_num]
243    { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)->
244    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
245    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
246
247event upset.memory.intel.discard@motherboard/memory-controller{within(1s)};
248
249prop upset.memory.intel.discard@motherboard/memory-controller (0)->
250    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
251    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
252
253prop upset.memory.intel.discard@motherboard/memory-controller (0)->
254    EREPORT_BUS_ERROR;
255
256#define PAGE_CE_COUNT   2
257#define PAGE_CE_TIME    72h
258#define DIMM_CE_COUNT   10
259#define DIMM_CE_TIME    1week
260
261event ereport.cpu.intel.nb.mem_ce@
262    motherboard/memory-controller/dram-channel/dimm/rank{within(12s)};
263
264engine serd.memory.intel.page_ce@
265    motherboard/memory-controller/dram-channel/dimm/rank,
266    N=PAGE_CE_COUNT, T=PAGE_CE_TIME;
267event fault.memory.intel.page_ce@
268    motherboard/memory-controller/dram-channel/dimm/rank, message=0, response=0,
269    count=stat.ce_pgflt@motherboard/memory-controller/dram-channel/dimm,
270    engine=serd.memory.intel.page_ce@
271    motherboard/memory-controller/dram-channel/dimm/rank;
272prop fault.memory.intel.page_ce@
273    motherboard/memory-controller/dram-channel/dimm/rank
274    { (payloadprop_defined("physaddr") || payloadprop_defined("offset")) &&
275    SET_ADDR && SET_OFFSET } (0)->
276    ereport.cpu.intel.nb.mem_ce@
277    motherboard/memory-controller/dram-channel/dimm/rank;
278
279engine serd.memory.intel.dimm_ce@
280    motherboard/memory-controller/dram-channel/dimm/rank,
281    N=DIMM_CE_COUNT, T=DIMM_CE_TIME;
282event fault.memory.intel.dimm_ce@
283    motherboard/memory-controller/dram-channel/dimm/rank,
284    engine=serd.memory.intel.dimm_ce@
285    motherboard/memory-controller/dram-channel/dimm/rank;
286event error.memory.intel.dimm_ce@
287    motherboard/memory-controller/dram-channel/dimm;
288prop fault.memory.intel.dimm_ce@
289    motherboard/memory-controller/dram-channel/dimm/rank (1)->
290    ereport.cpu.intel.nb.mem_ce@
291    motherboard/memory-controller/dram-channel/dimm/rank;
292prop fault.memory.intel.dimm_ce@
293    motherboard/memory-controller/dram-channel/dimm/rank
294    { !confprop_defined(motherboard/memory-controller/dram-channel/dimm,
295    "dimm-size") } (1)->
296    error.memory.intel.dimm_ce@
297    motherboard/memory-controller/dram-channel/dimm;
298prop error.memory.intel.dimm_ce@motherboard/memory-controller/dram-channel/dimm
299    { !confprop_defined(motherboard/memory-controller/dram-channel/dimm,
300    "dimm-size") &&
301    count(stat.ce_pgflt@
302    motherboard/memory-controller/dram-channel/dimm) > 512 } (1)->
303    ereport.cpu.intel.nb.mem_ce@
304    motherboard/memory-controller/dram-channel/dimm/rank<>;
305
306#define DIMM_CE(dimm_size, n, t, fault_rate) \
307	prop fault.memory.intel.dimm_ce@ \
308	motherboard/memory-controller/dram-channel/dimm/rank { \
309	    confprop(motherboard/memory-controller/dram-channel/dimm, \
310	    "dimm-size") == dimm_size && \
311	    setserdn(n) & setserdt(t) } (1)-> \
312	    error.memory.intel.dimm_ce@ \
313	    motherboard/memory-controller/dram-channel/dimm; \
314	prop error.memory.intel.dimm_ce@ \
315	    motherboard/memory-controller/dram-channel/dimm { \
316	    confprop(motherboard/memory-controller/dram-channel/dimm, \
317	    "dimm-size") == dimm_size && \
318	    count(stat.ce_pgflt@ \
319	    motherboard/memory-controller/dram-channel/dimm) > fault_rate } \
320	    (1)-> \
321    	    ereport.cpu.intel.nb.mem_ce@ \
322	    motherboard/memory-controller/dram-channel/dimm/rank<>;
323
324DIMM_CE("8G", 8, 1week, 2000)
325DIMM_CE("4G", 4, 1week, 1500)
326DIMM_CE("2G", 4, 2week, 1000)
327DIMM_CE("1G", 4, 4week, 500)
328DIMM_CE("512M", 4, 8week, 250)
329DIMM_CE("256M", 4, 16week, 125)
330
331event ereport.cpu.intel.nb.fbd.alert@rank{within(12s)};
332event fault.memory.intel.fbd.alert@rank, retire=0;
333
334prop fault.memory.intel.fbd.alert@rank (1)->
335    ereport.cpu.intel.nb.fbd.alert@rank;
336
337prop fault.memory.intel.fbd.alert@rank (0)->
338    EREPORT_BUS_ERROR;
339
340event ereport.cpu.intel.nb.fbd.crc@rank{within(12s)};
341event fault.memory.intel.fbd.crc@rank, retire=0;
342
343prop fault.memory.intel.fbd.crc@rank (1)->
344    ereport.cpu.intel.nb.fbd.crc@rank;
345
346prop fault.memory.intel.fbd.crc@rank (0)-> EREPORT_BUS_ERROR;
347
348event ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller {within(12s)};
349event fault.memory.intel.fbd.reset_timeout@memory-controller, retire=0;
350
351prop fault.memory.intel.fbd.reset_timeout@memory-controller (1)->
352    ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller;
353
354prop fault.memory.intel.fbd.reset_timeout@memory-controller (0)->
355    EREPORT_BUS_ERROR;
356
357event ereport.cpu.intel.nb.fbd.ch@dram-channel {within(12s)};
358engine serd.cpu.intel.nb.fbd.ch@dram-channel, N=2, T=1month;
359event fault.memory.intel.fbd.ch@dram-channel, retire=0,
360    engine=serd.cpu.intel.nb.fbd.ch@dram-channel;
361
362prop fault.memory.intel.fbd.ch@dram-channel (1)->
363    ereport.cpu.intel.nb.fbd.ch@dram-channel;
364
365prop fault.memory.intel.fbd.ch@dram-channel (0)->
366    EREPORT_BUS_ERROR;
367
368event ereport.cpu.intel.nb.fbd.otf@dram-channel {within(12s)};
369engine serd.cpu.intel.nb.fbd_otf@dram-channel, N=2, T=1week;
370event fault.memory.intel.fbd.otf@dram-channel, retire=0, response=0,
371    engine=serd.cpu.intel.nb.fbd_otf@dram-channel;
372
373prop fault.memory.intel.fbd.otf@dram-channel (1)->
374    ereport.cpu.intel.nb.fbd.otf@dram-channel;
375
376event ereport.cpu.intel.nb.otf@motherboard {within(12s)};
377event fault.cpu.intel.nb.otf@motherboard, retire=0, response=0;
378
379prop fault.cpu.intel.nb.otf@motherboard (1)->
380    ereport.cpu.intel.nb.otf@motherboard;
381
382event ereport.cpu.intel.nb.unknown@memory-controller {within(12s)};
383event ereport.cpu.intel.nb.unknown@memory-controller/dram-channel {within(12s)};
384event ereport.cpu.intel.nb.spd@memory-controller/dram-channel {within(12s)};
385event upset.discard@memory-controller;
386
387prop upset.discard@memory-controller (0)->
388    ereport.cpu.intel.nb.unknown@memory-controller,
389    ereport.cpu.intel.nb.unknown@memory-controller/dram-channel,
390    ereport.cpu.intel.nb.spd@memory-controller/dram-channel;
391
392event ereport.cpu.intel.nb.mem_ds@memory-controller{within(30s)};
393event fault.memory.intel.fbd.mem_ds@memory-controller/dram-channel/dimm/rank,
394    retire=0;
395
396prop fault.memory.intel.fbd.mem_ds@
397    memory-controller/dram-channel/dimm/rank[rank_num]
398    { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)->
399    ereport.cpu.intel.nb.mem_ds@memory-controller;
400
401event ereport.cpu.intel.nb.fsb@chip{within(12s)};
402event fault.cpu.intel.nb.fsb@chip, retire=0;
403
404prop fault.cpu.intel.nb.fsb@chip (1)->
405    ereport.cpu.intel.nb.fsb@chip;
406
407prop fault.cpu.intel.nb.fsb@chip (0)-> EREPORT_BUS_ERROR;
408
409event ereport.cpu.intel.nb.ie@motherboard{within(12s)};
410event fault.cpu.intel.nb.ie@motherboard, retire=0;
411event upset.cpu.intel.nb.ie_ce@motherboard{within(12s)};
412
413prop upset.cpu.intel.nb.ie_ce@motherboard
414    { payloadprop("intel-error-list") == "B6" } (0)->
415    ereport.cpu.intel.nb.ie@motherboard;
416
417prop fault.cpu.intel.nb.ie@motherboard
418    { payloadprop("intel-error-list") != "B6" } (1)->
419    ereport.cpu.intel.nb.ie@motherboard;
420
421prop fault.cpu.intel.nb.ie@motherboard (0)-> EREPORT_BUS_ERROR;
422
423event ereport.cpu.intel.nb.dma@motherboard{within(12s)};
424event fault.cpu.intel.nb.dma@motherboard, retire=0, response=0;
425
426prop fault.cpu.intel.nb.dma@motherboard (1)->
427    ereport.cpu.intel.nb.dma@motherboard;
428
429prop fault.cpu.intel.nb.dma@motherboard (0)-> EREPORT_BUS_ERROR;
430
431event ereport.cpu.intel.nb.esi@motherboard{within(12s)};
432event ereport.cpu.intel.nb.pex@hostbridge{within(12s)};
433event upset.cpu.intel.nb.pex@hostbridge;
434
435prop upset.cpu.intel.nb.pex@hostbridge (1)->
436    ereport.cpu.intel.nb.esi@motherboard,
437    ereport.cpu.intel.nb.pex@hostbridge;
438
439prop upset.cpu.intel.nb.pex@hostbridge (0)-> EREPORT_BUS_ERROR;
440
441event ereport.cpu.intel.nb.unknown@rank{within(12s)};
442event upset.discard@rank;
443
444prop upset.discard@rank (1)->
445    ereport.cpu.intel.nb.unknown@rank;
446
447prop upset.discard@rank (0)-> EREPORT_BUS_ERROR;
448
449/*
450 * CPU integrated memory controller
451 */
452
453#define CONTAINS_RANK (payloadprop_contains("resource", \
454    asru(motherboard/chip/memory-controller/dram-channel/dimm/rank)) || \
455    payloadprop_contains("resource", \
456    asru(motherboard/chip/memory-controller/dram-channel/dimm)))
457
458#define	STAT_CPU_MEM_CE_PGFLTS \
459    stat.ce_pgflt@motherboard/chip/memory-controller/dram-channel/dimm
460#define	CPU_MEM_CE_PGFLTS \ (count(STAT_CPU_MEM_CE_PGFLTS))
461
462#define SET_RES_OFFSET \
463    (!payloadprop_defined("resource[0].hc-specific.offset") || \
464    setpayloadprop("asru-offset", \
465    payloadprop("resource[0].hc-specific.offset")))
466
467engine STAT_CPU_MEM_CE_PGFLTS;
468
469event ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller
470    {within(12s)};
471
472event fault.memory.intel.page_ue@
473    motherboard/chip/memory-controller/dram-channel/dimm/rank,
474    message=0, response=0;		/* do not message individual pageflts */
475
476prop fault.memory.intel.page_ue@
477    motherboard/chip/memory-controller/dram-channel/dimm/rank
478    { CONTAINS_RANK && (payloadprop_defined("physaddr") ||
479    payloadprop_defined("resource[0].hc-specific.offset")) &&
480    SET_ADDR && SET_RES_OFFSET } (1)->
481    ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller;
482
483event fault.memory.intel.dimm_ue@
484    motherboard/chip/memory-controller/dram-channel/dimm/rank;
485
486prop fault.memory.intel.dimm_ue@
487    motherboard/chip/memory-controller/dram-channel/dimm/rank
488    { CONTAINS_RANK } (1)->
489    ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller;
490
491prop fault.memory.intel.dimm_ue@
492    motherboard/chip/memory-controller/dram-channel/dimm/rank (0)->
493    EREPORT_BUS_ERROR;
494
495event ereport.cpu.intel.quickpath.mem_ce@
496    motherboard/chip/memory-controller{within(12s)};
497
498engine serd.memory.intel.page_ce@
499    motherboard/chip/memory-controller/dram-channel/dimm/rank,
500    N=PAGE_CE_COUNT, T=PAGE_CE_TIME;
501
502event fault.memory.intel.page_ce@
503    motherboard/chip/memory-controller/dram-channel/dimm/rank,
504    message=0, response=0,
505    count=STAT_CPU_MEM_CE_PGFLTS,
506    engine=serd.memory.intel.page_ce@
507    motherboard/chip/memory-controller/dram-channel/dimm/rank;
508
509prop fault.memory.intel.page_ce@
510    motherboard/chip/memory-controller/dram-channel/dimm/rank
511    { CONTAINS_RANK && (payloadprop_defined("physaddr") ||
512    payloadprop_defined("resource[0].hc-specific.offset")) &&
513    SET_ADDR && SET_RES_OFFSET } (1)->
514    ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller;
515
516engine serd.memory.intel.dimm_ce@
517    motherboard/chip/memory-controller/dram-channel/dimm,
518    N=PAGE_CE_COUNT, T=PAGE_CE_TIME;
519event fault.memory.intel.dimm_ce@
520    motherboard/chip/memory-controller/dram-channel/dimm,
521    engine=serd.memory.intel.dimm_ce@
522    motherboard/chip/memory-controller/dram-channel/dimm;
523event error.memory.intel.dimm_ce@
524    motherboard/chip/memory-controller/dram-channel/dimm;
525prop fault.memory.intel.dimm_ce@
526    motherboard/chip/memory-controller/dram-channel/dimm
527    { !confprop_defined(motherboard/chip/memory-controller/dram-channel/dimm,
528    "dimm-size") } (1)->
529    error.memory.intel.dimm_ce@
530    motherboard/chip/memory-controller/dram-channel/dimm;
531prop error.memory.intel.dimm_ce@
532    motherboard/chip/memory-controller/dram-channel/dimm
533    { !confprop_defined(motherboard/chip/memory-controller/dram-channel/dimm,
534    "dimm-size") &&
535    count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (1)->
536    ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller;
537
538#define	CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \
539	prop fault.memory.intel.dimm_ce@ \
540	    motherboard/chip/memory-controller/dram-channel/dimm { \
541	    confprop(motherboard/chip/memory-controller/dram-channel/dimm, \
542	    "dimm-size") == dimm_size && \
543	    setserdn(n) & setserdt(t) } (1)-> \
544	    error.memory.intel.dimm_ce@ \
545	    motherboard/chip/memory-controller/dram-channel/dimm; \
546	prop error.memory.intel.dimm_ce@ \
547	    motherboard/chip/memory-controller/dram-channel/dimm { \
548	    confprop(motherboard/chip/memory-controller/dram-channel/dimm, \
549	    "dimm-size") == dimm_size && \
550	    count(STAT_CPU_MEM_CE_PGFLTS) > fault_rate } (1)-> \
551	    ereport.cpu.intel.quickpath.mem_ce@ \
552	    motherboard/chip/memory-controller;
553
554CPU_MEM_DIMM_CE("16G", 16, 1week, 2000)
555CPU_MEM_DIMM_CE("8G", 8, 1week, 2000)
556CPU_MEM_DIMM_CE("4G", 4, 1week, 1500)
557CPU_MEM_DIMM_CE("2G", 4, 2week, 1000)
558CPU_MEM_DIMM_CE("1G", 4, 4week, 500)
559CPU_MEM_DIMM_CE("512M", 4, 8week, 250)
560
561event ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller {within(12s)};
562event ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller/dram-channel
563    {within(12s)};
564event ereport.cpu.intel.quickpath.mem_unknown@
565    motherboard/chip/memory-controller/dram-channel/dimm/rank{within(12s)};
566event upset.discard@motherboard/chip/memory-controller;
567event upset.discard@motherboard/chip/memory-controller/dram-channel/dimm/rank;
568
569prop upset.discard@motherboard/chip/memory-controller (0)->
570    ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller,
571    ereport.cpu.intel.quickpath.mem_unknown@
572    motherboard/chip/memory-controller/dram-channel;
573
574prop upset.discard@
575    motherboard/chip/memory-controller/dram-channel/dimm/rank (1)->
576    ereport.cpu.intel.quickpath.mem_unknown@
577    motherboard/chip/memory-controller/dram-channel/dimm/rank;
578
579event ereport.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller {within(1s)};
580event fault.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller;
581
582prop fault.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller (1)->
583    ereport.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller;
584
585event ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller {within(1s)};
586event fault.cpu.intel.quickpath.mem_addr_parity@
587    motherboard/chip/memory-controller/dram-channel/dimm;
588event fault.cpu.intel.quickpath.mem_addr_parity@
589    motherboard/chip/memory-controller;
590
591prop fault.cpu.intel.quickpath.mem_addr_parity@
592    motherboard/chip/memory-controller (1)->
593    ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller;
594
595prop fault.cpu.intel.quickpath.mem_addr_parity@
596    motherboard/chip/memory-controller/dram-channel/dimm
597    { payloadprop_contains("resource", asru(motherboard/chip/memory-controller/dram-channel/dimm)) } (1)->
598    ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller;
599
600event ereport.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller {within(1s)};
601event fault.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller;
602
603prop fault.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller (1)->
604    ereport.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller;
605
606event ereport.cpu.intel.quickpath.mem_spare@motherboard/chip/memory-controller {within(1s)};
607event fault.cpu.intel.quickpath.mem_spare@
608    motherboard/chip/memory-controller/dram-channel/dimm;
609
610prop fault.cpu.intel.quickpath.mem_spare@
611    motherboard/chip/memory-controller/dram-channel/dimm (1)->
612    ereport.cpu.intel.quickpath.mem_spare@motherboard/chip/memory-controller;
613
614event ereport.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller {within(1s)};
615event fault.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller;
616
617prop fault.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller (1)->
618    ereport.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller;
619
620event ereport.cpu.intel.quickpath.mem_redundant@motherboard/chip/memory-controller {within(1s)};
621engine serd.cpu.intel.quickpath.mem_redundant@
622    motherboard/chip/memory-controller/dram-channel/dimm,
623    N=2, T=72h;
624event fault.cpu.intel.quickpath.mem_redundant@
625    motherboard/chip/memory-controller/dram-channel/dimm,
626    engine=serd.cpu.intel.quickpath.mem_redundant@
627    motherboard/chip/memory-controller/dram-channel/dimm;
628
629prop fault.cpu.intel.quickpath.mem_redundant@
630    motherboard/chip/memory-controller/dram-channel/dimm
631    { payloadprop_contains("resource",
632    asru(motherboard/chip/memory-controller/dram-channel/dimm)) } (1)->
633    ereport.cpu.intel.quickpath.mem_redundant@
634    motherboard/chip/memory-controller;
635
636event ereport.cpu.intel.quickpath.interconnect@motherboard/chip
637    {within(1s)};
638event upset.cpu.intel.quickpath.interconnect@motherboard/chip;
639/* Diagnose corrected events to upsets */
640prop upset.cpu.intel.quickpath.interconnect@motherboard/chip
641    { !STATUS_UC } (1)->
642    ereport.cpu.intel.quickpath.interconnect@motherboard/chip;
643
644
645engine serd.cpu.intel.quickpath.interconnect@motherboard/chip,
646	N=3, T=72h;
647event fault.cpu.intel.quickpath.interconnect@motherboard/chip,
648    engine=serd.cpu.intel.quickpath.interconnect@motherboard/chip;
649
650/* Diagnose uncorrected events to faults */
651prop fault.cpu.intel.quickpath.interconnect@motherboard/chip
652    { STATUS_UC } (0)->
653    ereport.cpu.intel.quickpath.interconnect@motherboard/chip;
654