xref: /titanic_52/usr/src/cmd/fm/eversholt/files/sparc/sun4u/schizo.esc (revision b7d3956b92a285d8dac2c7f5f7e28d2ef5347ef8)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#pragma dictionary "SUN4U"
29
30#define	AGENT_ID_MASK	0x1f
31#define	AGENT_ID_SHIFT	24
32
33#define	HB_FIT		1000
34#define HBUS_FIT	1000
35#define PCI_BUS_FIT	500
36#define PCI_DEV_FIT	1000
37#define	CPU_FIT		500
38
39#define	PCI_HB_DEV_PATH hostbridge/pcibus/pcidev[32]/pcifn[0]
40
41fru hostbridge;
42asru hostbridge;
43
44event fault.io.schizo@hostbridge,
45    FITrate=HB_FIT, FRU=hostbridge, ASRU=hostbridge;
46
47event error.io.sch.saf.dstat@hostbridge;
48event error.io.sch.saf.to@hostbridge;
49event error.io.sch.saf.bus@hostbridge;
50event error.io.sch.ecc.thresh@hostbridge;
51event error.io.pci.device-ta@hostbridge/pcibus/pcidev/pcifn;
52
53event ereport.io.sch.saf.to@hostbridge{within(5s)};
54event ereport.io.sch.saf.bus@hostbridge{within(5s)};
55event ereport.io.sch.saf.bca@hostbridge{within(5s)};
56event ereport.io.sch.saf.bcb@hostbridge{within(5s)};
57event ereport.io.sch.saf.ciq-to@hostbridge{within(5s)};
58event ereport.io.sch.saf.lpq-to@hostbridge{within(5s)};
59event ereport.io.sch.saf.sfpq-to@hostbridge{within(5s)};
60event ereport.io.sch.saf.ufpq-to@hostbridge{within(5s)};
61event ereport.io.sch.saf.ape@hostbridge{within(5s)};
62event ereport.io.sch.ecc.pce@hostbridge{within(5s)};
63event ereport.io.sch.ecc.pue@hostbridge{within(5s)};
64event ereport.io.sch.ecc.s-pce@hostbridge{within(5s)};
65event ereport.io.sch.ecc.s-pue@hostbridge{within(5s)};
66event ereport.io.sch.ecc.thresh@hostbridge{within(5s)};
67event ereport.io.sch.saf.dstat@hostbridge{within(5s)};
68
69/*
70 * A faulty Schizo hostbridge may cause:
71 *
72 *  - bca:	bad safari command from PCI block A.
73 *  - bcb:	bad safari command from PCI block B.
74 *  - ciq-to:	coherent input queue timeout.
75 *  - lpq-to:	local PIO queue timeout.
76 *  - sfpq-to:	safari foreign PIO queue timeout.
77 *  - ufpq-to:	UPA foreign PIO queue timeout.
78 *  - ape:	address parity error.
79 *  - pue:	PIO uncorrectable error, bad reader.
80 *  - s-pue:	secondary PIO UE, bad reader.
81 *  - ecc:	multiple PIO CEs.
82 *  - to:	safari bus timeout.
83 *  - bus:	safari bus error.
84 *  - dstat:	errant dstat on incoming data.
85 *
86 * The to, bus and dstat errors can cause a target abort to be sent onto the
87 * pci bus in response to a dma request. We represent this using a device-ta
88 * error to propagate into the generic pci.esc rules.
89 */
90prop fault.io.schizo@hostbridge (0)->
91    ereport.io.sch.saf.bca@hostbridge,
92    ereport.io.sch.saf.bcb@hostbridge,
93    ereport.io.sch.saf.ciq-to@hostbridge,
94    ereport.io.sch.saf.lpq-to@hostbridge,
95    ereport.io.sch.saf.sfpq-to@hostbridge,
96    ereport.io.sch.saf.ufpq-to@hostbridge,
97    ereport.io.sch.saf.ape@hostbridge,
98    ereport.io.sch.ecc.pue@hostbridge,
99    ereport.io.sch.ecc.s-pue@hostbridge,
100    error.io.sch.ecc.thresh@hostbridge,
101    error.io.sch.saf.to@hostbridge,
102    error.io.sch.saf.bus@hostbridge,
103    error.io.sch.saf.dstat@hostbridge;
104
105prop error.io.sch.ecc.thresh@hostbridge (2)->
106    ereport.io.sch.ecc.thresh@hostbridge,
107    ereport.io.sch.ecc.pce@hostbridge;
108
109prop error.io.sch.saf.to@hostbridge (2)->
110    ereport.io.sch.saf.to@hostbridge,
111    error.io.pci.device-ta@PCI_HB_DEV_PATH;
112
113prop error.io.sch.saf.bus@hostbridge (2)->
114    ereport.io.sch.saf.bus@hostbridge,
115    error.io.pci.device-ta@PCI_HB_DEV_PATH;
116
117prop error.io.sch.saf.dstat@hostbridge (1)->
118    ereport.io.sch.saf.dstat@hostbridge;
119
120prop error.io.sch.saf.dstat@hostbridge (0)->
121    error.io.pci.device-ta@PCI_HB_DEV_PATH;
122
123engine serd.io.schizo.ecc@hostbridge,
124    N=3, T=1day, method=persistent,
125    trip=ereport.io.sch.ecc.thresh@hostbridge;
126
127event upset.io.schizo@hostbridge,
128    engine=serd.io.schizo.ecc@hostbridge;
129
130/*
131 * An upset schizo may cause:
132 *
133 *  - pce:	PIO correctable error.
134 */
135prop upset.io.schizo@hostbridge (0)->
136    ereport.io.sch.ecc.pce@hostbridge;
137
138fru cpu;
139
140event fault.io.datapath@cpu, FITrate=CPU_FIT, FRU=cpu, retire=0;
141
142event error.io.cpu.ecc.thresh@cpu;
143event ereport.io.sch.saf.ssm-dis@hostbridge{within(5s)};
144event ereport.io.sch.saf.cpu0-par@hostbridge{within(5s)};
145event ereport.io.sch.saf.cpu0-bidi@hostbridge{within(5s)};
146event ereport.io.sch.saf.cpu1-par@hostbridge{within(5s)};
147event ereport.io.sch.saf.cpu1-bidi@hostbridge{within(5s)};
148
149/*
150 * A faulty xcal CPU[0] may cause:
151 *
152 *  - cpu0-par:		parity error on the unidirectional signals.
153 *  - cpu0-bidi:	parity error on the bi-directional signals.
154 */
155prop fault.io.datapath@cpu[0] (0)->
156    ereport.io.sch.saf.cpu0-par@hostbridge,
157    ereport.io.sch.saf.cpu0-bidi@hostbridge;
158
159/*
160 * A faulty xcal CPU[1] may cause:
161 *
162 *  - cpu1-par:		parity error on the unidirectional signals.
163 *  - cpu1-bidi:	parity error on the bidirectional signals.
164 */
165prop fault.io.datapath@cpu[1] (0)->
166    ereport.io.sch.saf.cpu1-par@hostbridge,
167    ereport.io.sch.saf.cpu1-bidi@hostbridge;
168
169/*
170 * A faulty CPU may cause:
171 *
172 *  - to:	safari bus timeout.
173 *  - bus:	safari bus error.
174 *  - dstat:	incorrect dstat sent to hostbridge.
175 *  - ssm-dis:	ssm command sent to hostbridge when not enabled.
176 *  - ape:	safari address parity error.
177 *  - pue:	PIO uncorrectable error.
178 *  - ecc:	multiple PIO CEs.
179 */
180prop fault.io.datapath@cpu (0)->
181    error.io.sch.saf.to@hostbridge,
182    error.io.sch.saf.bus@hostbridge,
183    error.io.sch.saf.dstat@hostbridge,
184    ereport.io.sch.saf.ssm-dis@hostbridge,
185    ereport.io.sch.saf.ape@hostbridge;
186
187prop fault.io.datapath@cpu[cpuid] (0)->
188    ereport.io.sch.ecc.pue@hostbridge
189    {((payloadprop("ecc-afsr") >> AGENT_ID_SHIFT) & AGENT_ID_MASK) == cpuid};
190
191prop fault.io.datapath@cpu (0)->
192    error.io.cpu.ecc.thresh@cpu;
193
194prop error.io.cpu.ecc.thresh@cpu (1)->
195    ereport.io.sch.ecc.thresh@hostbridge<>;
196
197prop error.io.cpu.ecc.thresh@cpu[cpuid] (1)->
198    ereport.io.sch.ecc.pce@hostbridge<>
199    {((payloadprop("ecc-afsr") >> AGENT_ID_SHIFT) & AGENT_ID_MASK) == cpuid};
200
201event fault.io.hbus@hostbridge,
202    FITrate=HBUS_FIT, FRU=hostbridge, ASRU=hostbridge;
203
204/*
205 * A faulty host bus may cause:
206 *
207 *  - ape:		address parity error.
208 *  - cpu0-par:		parity error on the unidirectional signals.
209 *  - cpu0-bidi:	parity error on the bidirectional signals.
210 *  - cpu1-par:		parity error on the unidirectional signals.
211 *  - cpu1-bidi:	parity error on the bidirectional signals.
212 *  - pue:		PIO uncorrectable error.
213 *  - s-pue:		secondary PIO UE.
214 *  - ecc:		multiple PIO CEs.
215 */
216prop fault.io.hbus@hostbridge (0)->
217    ereport.io.sch.saf.ape@hostbridge,
218    ereport.io.sch.saf.cpu0-par@hostbridge,
219    ereport.io.sch.saf.cpu0-bidi@hostbridge,
220    ereport.io.sch.saf.cpu1-par@hostbridge,
221    ereport.io.sch.saf.cpu1-bidi@hostbridge,
222    ereport.io.sch.ecc.pue@hostbridge,
223    ereport.io.sch.ecc.s-pue@hostbridge,
224    error.io.sch.ecc.thresh@hostbridge;
225
226/*
227 * A bad request from a downstream device/driver may cause
228 *
229 *  - um:	safari unmapped address error.
230 *  - mmu:	a iommu translation error.
231 */
232event error.io.pci.badreq-pw-u@hostbridge/pcibus/pcidev/pcifn;
233event error.io.pci.badreq-drw-u@hostbridge/pcibus/pcidev/pcifn;
234
235event ereport.io.pci.rserr@hostbridge/pcibus/pcidev/pcifn{within(5s)};
236event ereport.io.sch.mmu@hostbridge/pcibus/pcidev/pcifn{within(5s)};
237event ereport.io.sch.saf.um@hostbridge{within(5s)};
238
239prop error.io.pci.badreq-pw-u@hostbridge/pcibus/pcidev/pcifn (0)->
240    ereport.io.sch.saf.um@hostbridge;
241
242prop error.io.pci.badreq-pw-u@hostbridge/pcibus/pcidev/pcifn (0)->
243    ereport.io.sch.mmu@PCI_HB_DEV_PATH;
244
245prop error.io.pci.badreq-drw-u@hostbridge/pcibus/pcidev/pcifn (0)->
246    ereport.io.sch.saf.um@hostbridge;
247
248prop error.io.pci.badreq-drw-u@hostbridge/pcibus/pcidev/pcifn (0)->
249    ereport.io.sch.mmu@PCI_HB_DEV_PATH;
250
251fru pcibus;
252asru pcibus;
253
254event fault.io.pci.bus@hostbridge/pcibus,
255    FITrate=PCI_BUS_FIT, FRU=pcibus, ASRU=pcibus;
256
257event ereport.io.sch.bu@hostbridge/pcibus/pcidev/pcifn{within(5s)};
258event ereport.io.sch.s-bu@hostbridge/pcibus/pcidev/pcifn{within(5s)};
259
260/*
261 * A faulty PCI bus may cause:
262 *
263 *  - bu:	PCI bus unusable error.
264 *  - s-bu:	secondary PCI bus unusable error.
265 */
266prop fault.io.pci.bus@hostbridge/pcibus (0)->
267    ereport.io.sch.bu@PCI_HB_DEV_PATH,
268    ereport.io.sch.s-bu@PCI_HB_DEV_PATH;
269
270fru pcibus/pcidev;
271asru pcibus/pcidev/pcifn;
272
273event fault.io.pci.device-interr@hostbridge/pcibus/pcidev/pcifn,
274    FITrate=PCI_DEV_FIT, FRU=pcibus/pcidev, ASRU=pcibus/pcidev/pcifn;
275
276event fault.io.pci.device-interr@pcibus/pcidev/pcifn,
277    FITrate=PCI_DEV_FIT, FRU=pcibus/pcidev, ASRU=pcibus/pcidev/pcifn;
278
279event error.io.sch.pbm.rl@hostbridge/pcibus/pcidev/pcifn;
280event error.io.sch.pbm.rl@pcibus/pcidev/pcifn;
281event error.io.sch.pbm.rl@pcibus/pcidev/pcifn/pcibus/pcidev/pcifn;
282event error.io.sch.pbm.target-rl@pcibus/pcidev/pcifn;
283event error.io.sch.pbm.target-rl@pcibus/pcidev/pcifn/pcibus/pcidev/pcifn;
284event error.io.sch.pbm.tto@hostbridge/pcibus/pcidev/pcifn;
285event error.io.sch.pbm.target-tto@hostbridge/pcibus/pcidev/pcifn;
286event error.io.sch.pbm.target-tto@pcibus/pcidev/pcifn;
287event error.io.sch.pbm.target-tto@pcibus/pcidev/pcifn/pcibus/pcidev/pcifn;
288event error.sch.cpu.berr@cpu;
289event error.io.pci.ma-u@hostbridge/pcibus/pcidev/pcifn;
290event error.io.pci.perr-pw-u@hostbridge/pcibus/pcidev/pcifn;
291event error.io.pci.perr-dw-u@hostbridge/pcibus/pcidev/pcifn;
292event error.io.pci.dpdata-dr-u@hostbridge/pcibus/pcidev/pcifn;
293event error.io.pci.ta-u@hostbridge/pcibus/pcidev/pcifn;
294event error.io.pci.serr-u@hostbridge/pcibus/pcidev/pcifn;
295event error.io.pci.retry-to-d@hostbridge/pcibus/pcidev/pcifn;
296
297event ereport.io.sch.sbh@hostbridge/pcibus/pcidev/pcifn{within(5s)};
298event ereport.io.sch.pbm.rl@hostbridge/pcibus/pcidev/pcifn{within(5s)};
299event ereport.io.sch.pbm.tto@hostbridge/pcibus/pcidev/pcifn{within(5s)};
300event ereport.io.sch.pbm.s-rl@hostbridge/pcibus/pcidev/pcifn{within(5s)};
301event ereport.io.sch.pbm.s-tto@hostbridge/pcibus/pcidev/pcifn{within(5s)};
302event ereport.io.sch.pbm.s-ma@hostbridge/pcibus/pcidev/pcifn{within(5s)};
303event ereport.io.sch.pbm.s-rta@hostbridge/pcibus/pcidev/pcifn{within(5s)};
304event ereport.io.sch.pbm.s-mdpe@hostbridge/pcibus/pcidev/pcifn{within(5s)};
305event ereport.io.sch.pbm.target-rl@pcibus/pcidev/pcifn{within(5s)};
306event ereport.io.sch.pbm.target-tto@pcibus/pcidev/pcifn{within(5s)};
307event ereport.io.pci.sserr@hostbridge/pcibus/pcidev/pcifn{within(5s)};
308event ereport.cpu.ultraSPARC-III.berr@cpu{within(5s)};
309event ereport.cpu.ultraSPARC-IIIplus.berr@cpu{within(5s)};
310event ereport.cpu.ultraSPARC-IV.berr@cpu{within(5s)};
311event ereport.cpu.ultraSPARC-IVplus.berr@cpu{within(5s)};
312
313/*
314 * A faulty PCI device may cause:
315 *
316 *  - sbh:	a streaming byte hole error.
317 *  - rl:	it to exceed the number retriesfor a transaction.
318 *  - tto:	it to not assert trdy# within the alloted timeout.
319 *
320 * For rl and tto, there may be a target- ereport on a child device. For rl,
321 * there may also be an associated dto - the retry-to-d error propagates into
322 * the pci.esc rules to handle this.
323 */
324prop fault.io.pci.device-interr@hostbridge/pcibus/pcidev/pcifn (0)->
325    ereport.io.sch.sbh@PCI_HB_DEV_PATH;
326
327prop fault.io.pci.device-interr@pcibus/pcidev[fromdev]/pcifn (0)->
328    error.io.sch.pbm.rl@pcibus/pcidev<todev>/pcifn {
329	fromdev == todev && fromdev != 32 },
330    error.io.sch.pbm.target-rl@pcibus/pcidev<todev>/pcifn {
331	fromdev == todev && fromdev != 32 };
332
333prop error.io.sch.pbm.rl@pcibus/pcidev/pcifn/pcibus/pcidev/pcifn (1)->
334    error.io.sch.pbm.rl@pcibus/pcidev/pcifn;
335
336prop error.io.sch.pbm.rl@hostbridge/pcibus/pcidev/pcifn (1)->
337    ereport.io.sch.pbm.rl@PCI_HB_DEV_PATH,
338    ereport.io.sch.pbm.s-rl@PCI_HB_DEV_PATH;
339
340prop error.io.sch.pbm.target-rl@pcibus/pcidev/pcifn (1)->
341    error.io.sch.pbm.target-rl@pcibus/pcidev/pcifn/pcibus<>/pcidev<>/pcifn<>;
342
343prop error.io.sch.pbm.target-rl@pcibus/pcidev/pcifn (0)->
344    ereport.io.sch.pbm.target-rl@pcibus/pcidev/pcifn;
345
346prop error.io.sch.pbm.rl@hostbridge/pcibus/pcidev/pcifn (0)->
347    error.io.pci.retry-to-d@hostbridge/pcibus/pcidev/pcifn;
348
349prop error.io.sch.pbm.rl@hostbridge/pcibus/pcidev/pcifn (0)->
350    error.sch.cpu.berr@cpu;
351
352prop fault.io.pci.device-interr@hostbridge/pcibus/pcidev[fromdev]/pcifn (0)->
353    error.io.sch.pbm.tto@hostbridge/pcibus/pcidev<todev>/pcifn {
354	fromdev == todev && fromdev != 32 };
355
356prop error.io.sch.pbm.tto@hostbridge/pcibus/pcidev/pcifn (1)->
357    ereport.io.sch.pbm.tto@PCI_HB_DEV_PATH,
358    ereport.io.sch.pbm.s-tto@PCI_HB_DEV_PATH;
359
360prop error.io.sch.pbm.tto@hostbridge/pcibus/pcidev/pcifn (1)->
361    error.io.sch.pbm.target-tto@hostbridge/pcibus/pcidev/pcifn;
362
363prop error.io.sch.pbm.target-tto@pcibus/pcidev/pcifn (0)->
364    ereport.io.sch.pbm.target-tto@pcibus/pcidev/pcifn;
365
366prop error.io.sch.pbm.target-tto@pcibus/pcidev/pcifn (1)->
367    error.io.sch.pbm.target-tto@pcibus/pcidev/pcifn/pcibus<>/pcidev<>/pcifn<>;
368
369prop error.io.sch.pbm.tto@hostbridge/pcibus/pcidev/pcifn (1)->
370    ereport.io.sch.bu@PCI_HB_DEV_PATH;
371
372/*
373 * Need to add the following schizo specific propagations to complete the PCI
374 * fault tree. These are to allow propagations to secondary errors and cpu
375 * bus errors, and to represent the way the chip can raise both rserr and sserr
376 * on detection of SERR#
377 */
378prop error.io.pci.ma-u@hostbridge/pcibus/pcidev/pcifn (0)->
379    ereport.io.sch.pbm.s-ma@PCI_HB_DEV_PATH;
380
381prop error.io.pci.ta-u@hostbridge/pcibus/pcidev/pcifn (0)->
382    ereport.io.sch.pbm.s-rta@PCI_HB_DEV_PATH;
383
384prop error.io.pci.perr-pw-u@hostbridge/pcibus/pcidev/pcifn (0)->
385    ereport.io.sch.pbm.s-mdpe@PCI_HB_DEV_PATH;
386
387prop error.io.pci.perr-dw-u@hostbridge/pcibus/pcidev/pcifn (0)->
388    ereport.io.sch.pbm.s-mdpe@PCI_HB_DEV_PATH;
389
390prop error.io.pci.dpdata-dr-u@hostbridge/pcibus/pcidev/pcifn (0)->
391    ereport.io.sch.pbm.s-mdpe@PCI_HB_DEV_PATH;
392
393prop error.io.pci.ta-u@hostbridge/pcibus/pcidev/pcifn (0)->
394    error.sch.cpu.berr@cpu;
395
396prop error.io.pci.dpdata-dr-u@hostbridge/pcibus/pcidev/pcifn (0)->
397    error.sch.cpu.berr@cpu;
398
399prop error.io.pci.ma-u@hostbridge/pcibus/pcidev/pcifn (0)->
400    error.sch.cpu.berr@cpu;
401
402prop error.io.pci.serr-u@hostbridge/pcibus/pcidev/pcifn (1)->
403    ereport.io.pci.rserr@PCI_HB_DEV_PATH;
404
405prop error.io.pci.serr-u@hostbridge/pcibus/pcidev/pcifn (0)->
406    ereport.io.pci.sserr@PCI_HB_DEV_PATH;
407
408prop error.sch.cpu.berr@cpu (1)->
409    ereport.cpu.ultraSPARC-III.berr@cpu,
410    ereport.cpu.ultraSPARC-IIIplus.berr@cpu,
411    ereport.cpu.ultraSPARC-IV.berr@cpu,
412    ereport.cpu.ultraSPARC-IVplus.berr@cpu;
413
414event error.io.sch.ecc.drue@hostbridge;
415event ereport.io.sch.ecc.drue@hostbridge{within(5s)};
416event ereport.io.sch.nodiag@hostbridge;
417
418/*
419 * Upset used to hide ereports that can not be currently diagnosed.
420 *
421 * The drue error can cause a target abort to be sent onto the
422 * pci bus in response to a dma request. We represent this using a device-ta
423 * error to propagate into the generic pci.esc rules.
424 */
425engine serd.io.sch.nodiag@hostbridge,
426    N=1000, T=1hour, method=persistent,
427    trip=ereport.io.sch.nodiag@hostbridge;
428
429event upset.io.sch.nodiag@hostbridge,
430    engine=serd.io.sch.nodiag@hostbridge;
431
432prop upset.io.sch.nodiag@hostbridge (0)->
433    ereport.io.sch.ecc.s-pce@hostbridge,
434    error.io.sch.ecc.drue@hostbridge,
435    ereport.io.sch.nodiag@hostbridge;
436
437prop error.io.sch.ecc.drue@hostbridge (1)->
438    ereport.io.sch.ecc.drue@hostbridge;
439
440prop error.io.sch.ecc.drue@hostbridge (0)->
441    error.io.pci.device-ta@PCI_HB_DEV_PATH;
442