xref: /illumos-gate/usr/src/cmd/fm/eversholt/files/i386/i86pc/intel.esc (revision a6d4d7d5d0e34964282f736f7bade0574645f1fd)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma dictionary "INTEL"
28
29/*
30 * Eversholt rules for the intel CPU/Memory
31 */
32
33/*
34 * Ereports for Simple error codes.
35 */
36
37#define SMPL_EVENT(leafclass, t) \
38	event ereport.cpu.intel.leafclass@chip/cpu { within(t) }; \
39	event ereport.cpu.intel.leafclass@chip/core/strand { within(t) }
40
41SMPL_EVENT(unknown, 1s);
42SMPL_EVENT(unclassified, 1s);
43SMPL_EVENT(microcode_rom_parity, 1s);
44SMPL_EVENT(external, 1s);
45SMPL_EVENT(frc, 1s);
46SMPL_EVENT(internal_timer, 1s);
47SMPL_EVENT(internal_parity, 1s);
48SMPL_EVENT(internal_unclassified, 1s);
49
50/*
51 * Propogations for all but "external" and "unknown" simple errors.
52 * If the error is uncorrected we produce a fault immediately, otherwise
53 * we diagnose it to an upset and decalre a fault when the SERD engine
54 * trips.
55 */
56
57engine serd.cpu.intel.simple@chip/cpu, N=3, T=72h;
58event fault.cpu.intel.internal@chip/cpu, engine=serd.cpu.intel.simple@chip/cpu;
59engine serd.cpu.intel.simple@chip/core/strand, N=3, T=72h;
60event fault.cpu.intel.internal@chip/core/strand,
61    engine=serd.cpu.intel.simple@chip/core/strand;
62
63prop fault.cpu.intel.internal@chip/cpu
64    { payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1} (0)->
65    ereport.cpu.intel.microcode_rom_parity@chip/cpu,
66    ereport.cpu.intel.internal_timer@chip/cpu,
67    ereport.cpu.intel.internal_parity@chip/cpu,
68    ereport.cpu.intel.unclassified@chip/cpu,
69    ereport.cpu.intel.internal_unclassified@chip/cpu,
70    ereport.cpu.intel.frc@chip/cpu;
71prop fault.cpu.intel.internal@chip/core/strand
72    { payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1} (0)->
73    ereport.cpu.intel.microcode_rom_parity@chip/core/strand,
74    ereport.cpu.intel.internal_timer@chip/core/strand,
75    ereport.cpu.intel.internal_parity@chip/core/strand,
76    ereport.cpu.intel.unclassified@chip/core/strand,
77    ereport.cpu.intel.internal_unclassified@chip/core/strand,
78    ereport.cpu.intel.frc@chip/core/strand;
79
80/*
81 * Ereports for Compound error codes.  These are in pairs "foo" and "foo_uc"
82 * for the corrected and uncorrected version of each error type.  All are
83 * detected at chip/cpu and chip/core/strand.
84 */
85
86#define	CMPND_EVENT(leafclass, t) \
87	event ereport.cpu.intel.leafclass@chip/cpu { within(t) }; \
88	event ereport.cpu.intel.leafclass/**/_uc@chip/cpu { within(t) }; \
89	event ereport.cpu.intel.leafclass@chip/core/strand { within(t) }; \
90	event ereport.cpu.intel.leafclass/**/_uc@chip/core/strand { within(t) }
91
92/*
93 * Ereports for Compound error codes - intel errors
94 */
95CMPND_EVENT(l0cache, 1s);
96CMPND_EVENT(l1cache, 1s);
97CMPND_EVENT(l2cache, 1s);
98CMPND_EVENT(cache, 1s);
99
100/*
101 * Ereports for Compound error codes - TLB errors
102 */
103CMPND_EVENT(l0dtlb, 1s);
104CMPND_EVENT(l1dtlb, 1s);
105CMPND_EVENT(l2dtlb, 1s);
106CMPND_EVENT(dtlb, 1s);
107
108CMPND_EVENT(l0itlb, 1s);
109CMPND_EVENT(l1itlb, 1s);
110CMPND_EVENT(l2itlb, 1s);
111CMPND_EVENT(itlb, 1s);
112
113CMPND_EVENT(l0tlb, 1s);
114CMPND_EVENT(l1tlb, 1s);
115CMPND_EVENT(l2tlb, 1s);
116CMPND_EVENT(tlb, 1s);
117
118/*
119 * Ereports for Compound error codes - memory hierarchy errors
120 */
121CMPND_EVENT(l0dcache, 1s);
122CMPND_EVENT(l1dcache, 1s);
123CMPND_EVENT(l2dcache, 1s);
124CMPND_EVENT(dcache, 1s);
125
126CMPND_EVENT(l0icache, 1s);
127CMPND_EVENT(l1icache, 1s);
128CMPND_EVENT(l2icache, 1s);
129CMPND_EVENT(icache, 1s);
130
131/*
132 * Ereports for Compound error codes - bus and interconnect errors
133 */
134CMPND_EVENT(bus_interconnect, 1s);
135CMPND_EVENT(bus_interconnect_memory, 1s);
136CMPND_EVENT(bus_interconnect_io, 1s);
137
138/*
139 * Compound error propogations.
140 *
141 * We resist the temptation propogate, for example, a single dcache fault
142 * to all ereports mentioning dcache (l0dcache, l1dcache, l2dcache, dcache).
143 * Instead we will diagnose a distinct fault for each possible cache level,
144 * whether or not current chips have dcaches at all levels.
145 *
146 * Corrected errors are SERDed and produce a fault when the engine fires;
147 * the same fault is diagnosed immediately for a corresponding uncorrected
148 * error.
149 */
150
151#define	CMPND_FLT_PROP_1(erptleaf, fltleaf, n, t)			\
152	engine serd.cpu.intel.fltleaf@chip/cpu, N=n, T=t;		\
153	event fault.cpu.intel.fltleaf@chip/cpu,				\
154	    engine=serd.cpu.intel.fltleaf@chip/cpu;			\
155	engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t;	\
156	event fault.cpu.intel.fltleaf@chip/core/strand,			\
157	    engine=serd.cpu.intel.fltleaf@chip/core/strand;		\
158									\
159	prop fault.cpu.intel.fltleaf@chip/cpu (0)->			\
160	    ereport.cpu.intel.erptleaf@chip/cpu;			\
161	prop fault.cpu.intel.fltleaf@chip/core/strand (0)->		\
162	    ereport.cpu.intel.erptleaf@chip/core/strand;		\
163									\
164	prop fault.cpu.intel.fltleaf@chip/cpu				\
165	    { setserdincrement(n + 1) } (0)->				\
166	    ereport.cpu.intel.erptleaf/**/_uc@chip/cpu;			\
167	prop fault.cpu.intel.fltleaf@chip/core/strand			\
168	    { setserdincrement(n + 1) } (0)->				\
169	    ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand
170
171#define	CMPND_FLT_PROP_2(erptleaf, fltleaf, n, t)			\
172	engine serd.cpu.intel.fltleaf@chip/cpu, N=n, T=t;		\
173	event fault.cpu.intel.fltleaf@chip/cpu, retire=0, response=0,	\
174	    engine=serd.cpu.intel.fltleaf@chip/cpu;			\
175	engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t;	\
176	event fault.cpu.intel.fltleaf@chip/core/strand, retire=0, response=0,\
177	    engine=serd.cpu.intel.fltleaf@chip/core/strand;		\
178									\
179	prop fault.cpu.intel.fltleaf@chip/cpu (0)->			\
180	    ereport.cpu.intel.erptleaf@chip/cpu;			\
181	prop fault.cpu.intel.fltleaf@chip/core/strand (0)->		\
182	    ereport.cpu.intel.erptleaf@chip/core/strand;		\
183									\
184	prop fault.cpu.intel.fltleaf@chip/cpu 				\
185	    { setserdincrement(n + 1) } (0)->				\
186	    ereport.cpu.intel.erptleaf/**/_uc@chip/cpu;			\
187	prop fault.cpu.intel.fltleaf@chip/core/strand 			\
188	    { setserdincrement(n + 1) } (0)->				\
189	    ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand
190
191CMPND_FLT_PROP_1(l0cache, l0cache, 3, 72h);
192CMPND_FLT_PROP_1(l1cache, l1cache, 3, 72h);
193CMPND_FLT_PROP_1(l2cache, l2cache, 3, 72h);
194CMPND_FLT_PROP_1(cache, cache, 12, 72h);
195
196CMPND_FLT_PROP_1(l0dtlb, l0dtlb, 3, 72h);
197CMPND_FLT_PROP_1(l1dtlb, l1dtlb, 3, 72h);
198CMPND_FLT_PROP_1(l2dtlb, l2dtlb, 3, 72h);
199CMPND_FLT_PROP_1(dtlb, dtlb, 12, 72h);
200
201CMPND_FLT_PROP_1(l0itlb, l0itlb, 3, 72h);
202CMPND_FLT_PROP_1(l1itlb, l1itlb, 3, 72h);
203CMPND_FLT_PROP_1(l2itlb, l2itlb, 3, 72h);
204CMPND_FLT_PROP_1(itlb, itlb, 12, 72h);
205
206CMPND_FLT_PROP_1(l0tlb, litlb, 3, 72h);
207CMPND_FLT_PROP_1(l1tlb, litlb, 3, 72h);
208CMPND_FLT_PROP_1(l2tlb, litlb, 3, 72h);
209CMPND_FLT_PROP_1(tlb, tlb, 12, 72h);
210
211CMPND_FLT_PROP_1(l0dcache, l0dcache, 3, 72h);
212CMPND_FLT_PROP_1(l1dcache, l1dcache, 3, 72h);
213CMPND_FLT_PROP_1(l2dcache, l2dcache, 3, 72h);
214CMPND_FLT_PROP_1(dcache, dcache, 12, 72h);
215
216CMPND_FLT_PROP_1(l0icache, l0icache, 3, 72h);
217CMPND_FLT_PROP_1(l1icache, l1icache, 3, 72h);
218CMPND_FLT_PROP_1(l2icache, l2icache, 3, 72h);
219CMPND_FLT_PROP_1(icache, icache, 12, 72h);
220
221CMPND_FLT_PROP_2(bus_interconnect, bus_interconnect, 10, 72h);
222CMPND_FLT_PROP_2(bus_interconnect_memory, bus_interconnect_memory, 10, 72h);
223CMPND_FLT_PROP_2(bus_interconnect_io, bus_interconnect_io, 10, 72h);
224
225event upset.discard@chip/cpu;
226
227prop upset.discard@chip/cpu (0)->
228    ereport.cpu.intel.external@chip/cpu,
229    ereport.cpu.intel.unknown@chip/cpu;
230
231event upset.discard@chip/core/strand;
232
233prop upset.discard@chip/core/strand (0)->
234    ereport.cpu.intel.external@chip/core/strand,
235    ereport.cpu.intel.unknown@chip/core/strand;
236
237/* errors detected in northbridge */
238
239
240/*
241 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that
242 * we diagnose for page faults, to record the physical address of the faulting
243 * page.
244 */
245#define	SET_ADDR (!payloadprop_defined("physaddr") || \
246    setpayloadprop("asru-physaddr", payloadprop("physaddr")))
247
248#define SET_OFFSET (!payloadprop_defined("offset") || \
249    setpayloadprop("asru-offset", payloadprop("offset")))
250
251#define EREPORT_BUS_ERROR						\
252    ereport.cpu.intel.bus_interconnect_memory_uc@chip/cpu,		\
253    ereport.cpu.intel.bus_interconnect_uc@chip/cpu,			\
254    ereport.cpu.intel.bus_interconnect_memory@chip/cpu,			\
255    ereport.cpu.intel.bus_interconnect@chip/cpu,			\
256    ereport.cpu.intel.external@chip/cpu,				\
257    ereport.cpu.intel.bus_interconnect_memory_uc@chip/core/strand,	\
258    ereport.cpu.intel.bus_interconnect_uc@chip/core/strand,		\
259    ereport.cpu.intel.bus_interconnect_memory@chip/core/strand,		\
260    ereport.cpu.intel.bus_interconnect@chip/core/strand,		\
261    ereport.cpu.intel.external@chip/core/strand
262
263engine stat.ce_pgflt@memory-controller/dram-channel/dimm;
264
265event ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller{within(12s)};
266event ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller{within(12s)};
267event fault.memory.intel.page_ue@
268    motherboard/memory-controller/dram-channel/dimm/rank,
269    message=0, response=0;
270event fault.memory.intel.dimm_ue@
271    motherboard/memory-controller/dram-channel/dimm/rank;
272
273prop fault.memory.intel.page_ue@
274    motherboard/memory-controller/dram-channel/dimm/rank[rank_num]
275    { payloadprop_defined("rank") && rank_num == payloadprop("rank") &&
276    (payloadprop_defined("physaddr") || payloadprop_defined("offset")) &&
277    SET_ADDR && SET_OFFSET } (1)->
278    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
279    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
280
281prop fault.memory.intel.page_ue@
282    motherboard/memory-controller/dram-channel/dimm/rank (1)->
283    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
284    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
285
286prop fault.memory.intel.page_ue@
287    motherboard/memory-controller/dram-channel/dimm/rank (0)->
288    EREPORT_BUS_ERROR;
289
290prop fault.memory.intel.dimm_ue@
291    motherboard/memory-controller/dram-channel<channel_num>/dimm/rank[rank_num]
292    { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)->
293    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
294    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
295
296prop fault.memory.intel.dimm_ue@
297    motherboard/memory-controller/dram-channel/dimm/rank (1)->
298    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
299    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
300
301prop fault.memory.intel.dimm_ue@
302    motherboard/memory-controller/dram-channel/dimm/rank (0)->
303    EREPORT_BUS_ERROR;
304
305event upset.memory.intel.discard@motherboard/memory-controller{within(1s)};
306
307prop upset.memory.intel.discard@motherboard/memory-controller
308    { !payloadprop_defined("rank") } (1)->
309    ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller,
310    ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller;
311
312prop upset.memory.intel.discard@motherboard/memory-controller (0)->
313    EREPORT_BUS_ERROR;
314
315#define PAGE_CE_COUNT   2
316#define PAGE_CE_TIME    72h
317#define DIMM_CE_COUNT   10
318#define DIMM_CE_TIME    1week
319
320event ereport.cpu.intel.nb.mem_ce@dimm/rank{within(12s)};
321
322engine serd.memory.intel.page_ce@dimm/rank, N=PAGE_CE_COUNT, T=PAGE_CE_TIME;
323event fault.memory.intel.page_ce@dimm/rank, message=0, response=0,
324    count=stat.ce_pgflt@dimm, engine=serd.memory.intel.page_ce@dimm/rank;
325prop fault.memory.intel.page_ce@dimm/rank
326    { (payloadprop_defined("physaddr") || payloadprop_defined("offset")) &&
327    SET_ADDR && SET_OFFSET } (0)->
328    ereport.cpu.intel.nb.mem_ce@dimm/rank;
329
330engine serd.memory.intel.dimm_ce@dimm/rank, N=DIMM_CE_COUNT, T=DIMM_CE_TIME;
331event fault.memory.intel.dimm_ce@dimm/rank,
332    engine=serd.memory.intel.dimm_ce@dimm/rank;
333event error.memory.intel.dimm_ce@dimm;
334prop fault.memory.intel.dimm_ce@dimm/rank (1)->
335    ereport.cpu.intel.nb.mem_ce@dimm/rank;
336prop fault.memory.intel.dimm_ce@dimm/rank
337    { !confprop_defined(dimm, "dimm-size") } (1)->
338    error.memory.intel.dimm_ce@dimm;
339prop error.memory.intel.dimm_ce@dimm
340    { !confprop_defined(dimm, "dimm-size") &&
341    count(stat.ce_pgflt@dimm) > 512 } (1)->
342    ereport.cpu.intel.nb.mem_ce@dimm/rank;
343
344#define DIMM_CE(dimm_size, n, t, fault_rate) \
345	prop fault.memory.intel.dimm_ce@dimm/rank { \
346	    confprop(dimm, "dimm-size") == dimm_size && \
347	    setserdn(n) & setserdt(t) } (1)-> \
348	    error.memory.intel.dimm_ce@dimm; \
349	prop error.memory.intel.dimm_ce@dimm { \
350	    confprop(dimm, "dimm-size") == dimm_size && \
351	    count(stat.ce_pgflt@dimm) > fault_rate } (1)-> \
352    	    ereport.cpu.intel.nb.mem_ce@dimm/rank;
353
354DIMM_CE("8G", 8, 1week, 2000)
355DIMM_CE("4G", 4, 1week, 1500)
356DIMM_CE("2G", 4, 2week, 1000)
357DIMM_CE("1G", 4, 4week, 500)
358DIMM_CE("512M", 4, 8week, 250)
359DIMM_CE("256M", 4, 16week, 125)
360
361event ereport.cpu.intel.nb.fbd.alert@rank{within(12s)};
362event fault.memory.intel.fbd.alert@rank, retire=0;
363
364prop fault.memory.intel.fbd.alert@rank (1)->
365    ereport.cpu.intel.nb.fbd.alert@rank;
366
367prop fault.memory.intel.fbd.alert@rank (0)->
368    EREPORT_BUS_ERROR;
369
370event ereport.cpu.intel.nb.fbd.crc@rank{within(12s)};
371event fault.memory.intel.fbd.crc@rank, retire=0;
372
373prop fault.memory.intel.fbd.crc@rank (1)->
374    ereport.cpu.intel.nb.fbd.crc@rank;
375
376prop fault.memory.intel.fbd.crc@rank (0)-> EREPORT_BUS_ERROR;
377
378event ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller {within(12s)};
379event fault.memory.intel.fbd.reset_timeout@memory-controller, retire=0;
380
381prop fault.memory.intel.fbd.reset_timeout@memory-controller (1)->
382    ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller;
383
384prop fault.memory.intel.fbd.reset_timeout@memory-controller (0)->
385    EREPORT_BUS_ERROR;
386
387event ereport.cpu.intel.nb.fbd.ch@dram-channel {within(12s)};
388engine serd.cpu.intel.nb.fbd.ch@dram-channel, N=2, T=1month;
389event fault.memory.intel.fbd.ch@dram-channel, retire=0,
390    engine=serd.cpu.intel.nb.fbd.ch@dram-channel;
391
392prop fault.memory.intel.fbd.ch@dram-channel (1)->
393    ereport.cpu.intel.nb.fbd.ch@dram-channel;
394
395prop fault.memory.intel.fbd.ch@dram-channel (0)->
396    EREPORT_BUS_ERROR;
397
398event ereport.cpu.intel.nb.fbd.otf@dram-channel {within(12s)};
399engine serd.cpu.intel.nb.fbd_otf@dram-channel, N=2, T=1week;
400event fault.memory.intel.fbd.otf@dram-channel, retire=0, response=0,
401    engine=serd.cpu.intel.nb.fbd_otf@dram-channel;
402
403prop fault.memory.intel.fbd.otf@dram-channel (1)->
404    ereport.cpu.intel.nb.fbd.otf@dram-channel;
405
406event ereport.cpu.intel.nb.otf@motherboard {within(12s)};
407event fault.cpu.intel.nb.otf@motherboard, retire=0, response=0;
408
409prop fault.cpu.intel.nb.otf@motherboard (1)->
410    ereport.cpu.intel.nb.otf@motherboard;
411
412event ereport.cpu.intel.nb.unknown@memory-controller {within(12s)};
413event ereport.cpu.intel.nb.unknown@memory-controller/dram-channel {within(12s)};
414event ereport.cpu.intel.nb.spd@memory-controller/dram-channel {within(12s)};
415event upset.discard@memory-controller;
416
417prop upset.discard@memory-controller (0)->
418    ereport.cpu.intel.nb.unknown@memory-controller,
419    ereport.cpu.intel.nb.unknown@memory-controller/dram-channel,
420    ereport.cpu.intel.nb.spd@memory-controller/dram-channel;
421
422event ereport.cpu.intel.nb.mem_ds@memory-controller{within(30s)};
423event fault.memory.intel.fbd.mem_ds@memory-controller/dram-channel/dimm/rank,
424    retire=0;
425
426prop fault.memory.intel.fbd.mem_ds@
427    memory-controller/dram-channel/dimm/rank[rank_num]
428    { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)->
429    ereport.cpu.intel.nb.mem_ds@memory-controller;
430
431event ereport.cpu.intel.nb.fsb@chip{within(12s)};
432event fault.cpu.intel.nb.fsb@chip, retire=0;
433
434prop fault.cpu.intel.nb.fsb@chip (1)->
435    ereport.cpu.intel.nb.fsb@chip;
436
437prop fault.cpu.intel.nb.fsb@chip (0)-> EREPORT_BUS_ERROR;
438
439event ereport.cpu.intel.nb.ie@motherboard{within(12s)};
440event fault.cpu.intel.nb.ie@motherboard, retire=0;
441
442prop fault.cpu.intel.nb.ie@motherboard (1)->
443    ereport.cpu.intel.nb.ie@motherboard;
444
445prop fault.cpu.intel.nb.ie@motherboard (0)-> EREPORT_BUS_ERROR;
446
447event ereport.cpu.intel.nb.dma@motherboard{within(12s)};
448event fault.cpu.intel.nb.dma@motherboard, retire=0, response=0;
449
450prop fault.cpu.intel.nb.dma@motherboard (1)->
451    ereport.cpu.intel.nb.dma@motherboard;
452
453prop fault.cpu.intel.nb.dma@motherboard (0)-> EREPORT_BUS_ERROR;
454
455event ereport.cpu.intel.nb.esi@motherboard{within(12s)};
456event ereport.cpu.intel.nb.pex@hostbridge{within(12s)};
457event upset.cpu.intel.nb.pex@hostbridge;
458
459prop upset.cpu.intel.nb.pex@hostbridge (1)->
460    ereport.cpu.intel.nb.esi@motherboard,
461    ereport.cpu.intel.nb.pex@hostbridge;
462
463prop upset.cpu.intel.nb.pex@hostbridge (0)-> EREPORT_BUS_ERROR;
464
465event ereport.cpu.intel.nb.unknown@rank{within(12s)};
466event upset.discard@rank;
467
468prop upset.discard@rank (1)->
469    ereport.cpu.intel.nb.unknown@rank;
470
471prop upset.discard@rank (0)-> EREPORT_BUS_ERROR;
472
473/*
474 * CPU integrated memory controller
475 */
476
477#define CONTAINS_RANK (payloadprop_contains("resource", \
478    asru(motherboard/chip/memory-controller/dram-channel/dimm/rank)) || \
479    payloadprop_contains("resource", \
480    asru(motherboard/chip/memory-controller/dram-channel/dimm)))
481
482#define	CPU_MEM_CE_PGFLTS \
483    (count(stat.ce_pgflt@motherboard/chip/memory-controller/dram-channel/dimm))
484
485engine stat.ce_pgflt@motherboard/chip/memory-controller/dram-channel/dimm;
486
487event ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller
488    {within(12s)};
489
490event fault.memory.intel.page_ue@
491    motherboard/chip/memory-controller/dram-channel/dimm/rank,
492    message=0, response=0;		/* do not message individual pageflts */
493
494prop fault.memory.intel.page_ue@
495    motherboard/chip/memory-controller/dram-channel/dimm/rank
496    { CONTAINS_RANK &&
497    (payloadprop_defined("physaddr") || payloadprop_defined("offset")) &&
498    SET_ADDR && SET_OFFSET } (1)->
499    ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller;
500
501event fault.memory.intel.dimm_ue@
502    motherboard/chip/memory-controller/dram-channel/dimm/rank;
503
504prop fault.memory.intel.dimm_ue@
505    motherboard/chip/memory-controller/dram-channel/dimm/rank
506    { CONTAINS_RANK } (1)->
507    ereport.cpu.intel.quickpath.mem_ue@motherboard/chip/memory-controller;
508
509prop fault.memory.intel.dimm_ue@
510    motherboard/chip/memory-controller/dram-channel/dimm/rank (0)->
511    EREPORT_BUS_ERROR;
512
513event ereport.cpu.intel.quickpath.mem_ce@
514    motherboard/chip/memory-controller{within(12s)};
515
516engine serd.memory.intel.page_ce@
517    motherboard/chip/memory-controller/dram-channel/dimm/rank,
518    N=PAGE_CE_COUNT, T=PAGE_CE_TIME;
519
520event fault.memory.intel.page_ce@
521    motherboard/chip/memory-controller/dram-channel/dimm/rank,
522    message=0, response=0,
523    count=stat.ce_pgflt@motherboard/chip/memory-controller/dram-channel/dimm,
524    engine=serd.memory.intel.page_ce@
525    motherboard/chip/memory-controller/dram-channel/dimm/rank;
526
527prop fault.memory.intel.page_ce@
528    motherboard/chip/memory-controller/dram-channel/dimm/rank
529    { CONTAINS_RANK &&
530    (payloadprop_defined("physaddr") || payloadprop_defined("offset")) &&
531    SET_ADDR && SET_OFFSET } (1)->
532    ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller;
533
534engine serd.memory.intel.dimm_ce@
535    motherboard/chip/memory-controller/dram-channel/dimm,
536    N=PAGE_CE_COUNT, T=PAGE_CE_TIME;
537event fault.memory.intel.dimm_ce@
538    motherboard/chip/memory-controller/dram-channel/dimm,
539    engine=serd.memory.intel.dimm_ce@
540    motherboard/chip/memory-controller/dram-channel/dimm;
541event error.memory.intel.dimm_ce@
542    motherboard/chip/memory-controller/dram-channel/dimm;
543prop fault.memory.intel.dimm_ce@
544    motherboard/chip/memory-controller/dram-channel/dimm
545    { !confprop_defined(dimm, "dimm-size") } (1)->
546    error.memory.intel.dimm_ce@
547    motherboard/chip/memory-controller/dram-channel/dimm;
548prop error.memory.intel.dimm_ce@
549    motherboard/chip/memory-controller/dram-channel/dimm
550    { !confprop_defined(dimm, "dimm-size") &&
551    count(stat.ce_pgflt@dimm) > 512 } (1)->
552    ereport.cpu.intel.quickpath.mem_ce@motherboard/chip/memory-controller;
553
554#define	CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \
555	prop fault.memory.intel.dimm_ce@ \
556	    motherboard/chip/memory-controller/dram-channel/dimm { \
557	    confprop(dimm, "dimm-size") == dimm_size && \
558	    setserdn(n) & setserdt(t) } (1)-> \
559	    error.memory.intel.dimm_ce@ \
560	    motherboard/chip/memory-controller/dram-channel/dimm; \
561	prop error.memory.intel.dimm_ce@ \
562	    motherboard/chip/memory-controller/dram-channel/dimm { \
563	    confprop(dimm, "dimm-size") == dimm_size && \
564	    count(stat.ce_pgflt@dimm) > fault_rate } (1)-> \
565	    ereport.cpu.intel.quickpath.mem_ce@ \
566	    motherboard/chip/memory-controller;
567
568CPU_MEM_DIMM_CE("16G", 16, 1week, 2000)
569CPU_MEM_DIMM_CE("8G", 8, 1week, 2000)
570CPU_MEM_DIMM_CE("4G", 4, 1week, 1500)
571CPU_MEM_DIMM_CE("2G", 4, 2week, 1000)
572CPU_MEM_DIMM_CE("1G", 4, 4week, 500)
573CPU_MEM_DIMM_CE("512M", 4, 8week, 250)
574
575event ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller {within(12s)};
576event ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller/dram-channel
577    {within(12s)};
578event ereport.cpu.intel.quickpath.mem_unknown@
579    motherboard/chip/memory-controller/dram-channel/dimm/rank{within(12s)};
580event upset.discard@motherboard/chip/memory-controller;
581event upset.discard@motherboard/chip/memory-controller/dram-channel/dimm/rank;
582
583prop upset.discard@motherboard/chip/memory-controller (0)->
584    ereport.cpu.intel.quickpath.mem_unknown@motherboard/chip/memory-controller,
585    ereport.cpu.intel.quickpath.mem_unknown@
586    motherboard/chip/memory-controller/dram-channel;
587
588prop upset.discard@
589    motherboard/chip/memory-controller/dram-channel/dimm/rank (1)->
590    ereport.cpu.intel.quickpath.mem_unknown@
591    motherboard/chip/memory-controller/dram-channel/dimm/rank;
592
593event ereport.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller {within(1s)};
594event fault.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller;
595
596prop fault.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller (1)->
597    ereport.cpu.intel.quickpath.mem_parity@motherboard/chip/memory-controller;
598
599event ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller {within(1s)};
600event fault.cpu.intel.quickpath.mem_addr_parity@
601    motherboard/chip/memory-controller/dram-channel/dimm;
602event fault.cpu.intel.quickpath.mem_addr_parity@
603    motherboard/chip/memory-controller;
604
605prop fault.cpu.intel.quickpath.mem_addr_parity@
606    motherboard/chip/memory-controller (1)->
607    ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller;
608
609prop fault.cpu.intel.quickpath.mem_addr_parity@
610    motherboard/chip/memory-controller/dram-channel/dimm
611    { payloadprop_contains("resource", asru(motherboard/chip/memory-controller/dram-channel/dimm)) } (1)->
612    ereport.cpu.intel.quickpath.mem_addr_parity@motherboard/chip/memory-controller;
613
614event ereport.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller {within(1s)};
615event fault.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller;
616
617prop fault.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller (1)->
618    ereport.cpu.intel.quickpath.mem_bad_addr@motherboard/chip/memory-controller;
619
620event ereport.cpu.intel.quickpath.mem_spare@motherboard/chip/memory-controller {within(1s)};
621event fault.cpu.intel.quickpath.mem_spare@
622    motherboard/chip/memory-controller/dram-channel/dimm;
623
624prop fault.cpu.intel.quickpath.mem_spare@
625    motherboard/chip/memory-controller/dram-channel/dimm (1)->
626    ereport.cpu.intel.quickpath.mem_spare@motherboard/chip/memory-controller;
627
628event ereport.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller {within(1s)};
629event fault.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller;
630
631prop fault.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller (1)->
632    ereport.cpu.intel.quickpath.mem_bad_id@motherboard/chip/memory-controller;
633
634event ereport.cpu.intel.quickpath.mem_redundant@motherboard/chip/memory-controller {within(1s)};
635engine serd.cpu.intel.quickpath.mem_redundant@motherboard/chip/memory-controller,
636    N=2, T=72h;
637event fault.cpu.intel.quickpath.mem_redundant@
638    motherboard/chip/memory-controller/dram-channel/dimm,
639    engine=serd.cpu.intel.quickpath.mem_redundant@
640    motherboard/chip/memory-controller;
641
642prop fault.cpu.intel.quickpath.mem_redundant@
643    motherboard/chip/memory-controller/dram-channel/dimm (1)->
644    ereport.cpu.intel.quickpath.mem_redundant@
645    motherboard/chip/memory-controller;
646
647event ereport.cpu.intel.quickpath.interconnect@motherboard/chip
648    {within(1s)};
649event upset.cpu.intel.quickpath.interconnect@motherboard/chip;
650/* Diagnose corrected events to upsets */
651prop upset.cpu.intel.quickpath.interconnect@motherboard/chip
652    { !STATUS_UC } (1)->
653    ereport.cpu.intel.quickpath.interconnect@motherboard/chip;
654
655
656engine serd.cpu.intel.quickpath.interconnect@motherboard/chip,
657	N=3, T=72h;
658event fault.cpu.intel.quickpath.interconnect@motherboard/chip,
659    engine=serd.cpu.intel.quickpath.interconnect@motherboard/chip;
660
661/* Diagnose uncorrected events to faults */
662prop fault.cpu.intel.quickpath.interconnect@motherboard/chip
663    { STATUS_UC } (0)->
664    ereport.cpu.intel.quickpath.interconnect@motherboard/chip;
665