xref: /illumos-gate/usr/src/cmd/fm/eversholt/files/common/pciexrc.esc (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#pragma dictionary "PCIEX"
26
27#include <px_err.h>
28
29/*
30 * generic root complex/root port diagnosis rules
31 */
32
33#define	PCIEXFN		pciexbus/pciexdev/pciexfn
34#define	PCIEXFNHZ	pciexbus<>/pciexdev<>/pciexfn<>
35
36#define RC_N    5
37#define RC_T    72h
38
39#define SW_FIT    5000
40#define FW_FIT    5000
41#define HB_FIT    400
42
43#define EPKT_DESC       (payloadprop("desc") >> 12)
44#define EPKT_B_BIT      (payloadprop("desc") & (1 << 7))
45#define EPKT_C_BIT      (payloadprop("desc") & (1 << 5))
46#define EPKT_H_BIT      (payloadprop("desc") & (1 << 4))
47
48#define MATCHES_DESC(b, o, p, c, d) \
49    (EPKT_DESC == (b << 16 | o << 12 | p << 8 | c << 4 | d))
50
51#define IS_CE (EPKT_C_BIT != 0 && setserdsuffix(EPKT_DESC))
52#define IS_UE (EPKT_C_BIT == 0)
53
54/*
55 * BLOCK bit set means the error may cause a pipe stall and thus a CTO
56 * in the fabric
57 */
58#define IS_BLOCKED (EPKT_B_BIT != 0)
59
60#define EPKT(b, o, p, c, d) \
61    ereport.io.pciex.rc.epkt@hostbridge { MATCHES_DESC(b, o, p, c, d) }
62
63/* Errors that will be diagnosed by the fabric DE (pciex.esc) */
64#define	DIAG_BY_FAB \
65	(MATCHES_DESC(BLOCK_INTR,OP_MSI32,PH_DATA,CND_ILL,DIR_IRR) ||	\
66	MATCHES_DESC(BLOCK_PORT,OP_LINK,PH_FC,CND_TO,DIR_IRR) ||	\
67	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_IRR,CND_INV,DIR_RDWR) ||	\
68	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_IRR,CND_RCA,DIR_WRITE) ||	\
69	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_IRR,CND_RUR,DIR_WRITE) ||	\
70	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_IRR,CND_TO,DIR_READ) ||	\
71	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_IRR,CND_TO,DIR_WRITE) ||	\
72	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_IRR,CND_UC,DIR_IRR))
73
74/* Ereport Events */
75event ereport.io.pciex.rc.epkt@hostbridge {within(5s)};
76
77/* Internal Events */
78event error.io.pciex.rc.stall@hostbridge;
79event error.io.pciex.rc.poiscomp@hostbridge;
80event error.io.pciex.nr-d@hostbridge/pciexrc/PCIEXFN;
81event error.io.pciex.badreq-u@hostbridge/pciexrc/PCIEXFN;
82event error.io.pciex.poiscomp-d@hostbridge/pciexrc/PCIEXFN;
83event error.io.pciex.noimpact-d@hostbridge/pciexrc/PCIEXFN;
84event error.io.pciex.lost-d@hostbridge/pciexrc/PCIEXFN;
85event error.io.pciex.degraded-d@hostbridge/pciexrc/PCIEXFN;
86
87/* Upset event */
88event upset.io.pciex.rc.stall@hostbridge;
89event upset.io.pciex.rc.discard@hostbridge;
90
91/*
92 * Fault Events
93 * Do no retire and FRUs for SW/FW faults
94 */
95event fault.io.pciex.rc.generic-ue@hostbridge,
96    FITrate=HB_FIT, retire=0, response=0;
97event fault.io.pciex.rc.generic-sw@hostbridge,
98    FITrate=SW_FIT, retire=0, response=0;
99event fault.io.pciex.rc.generic-fw@hostbridge,
100    FITrate=FW_FIT, retire=0, response=0;
101
102/* Serd engine for CE errors */
103engine serd.io.pciex.rc.generic-ce@hostbridge, N=RC_N, T=RC_T;
104event fault.io.pciex.rc.generic-ce@hostbridge, FITrate=HB_FIT,
105    engine=serd.io.pciex.rc.generic-ce@hostbridge;
106
107/* Fire faults */
108event fault.io.fire.pciex.device@PCIEXFN, FITrate=1000;
109event fault.io.fire.pci.device@pcibus/pcidev/pcifn, FITrate=1000;
110
111/* Generic Root Complex Software faults */
112prop fault.io.pciex.rc.generic-sw@hostbridge ->
113    ereport.io.pciex.rc.epkt@hostbridge {
114	MATCHES_DESC(BLOCK_INTR,OP_FIXED,PH_UNKNOWN,CND_ILL,DIR_INGRESS)  ||
115	MATCHES_DESC(BLOCK_INTR,OP_MSI32,PH_UNKNOWN,CND_ILL,DIR_IRR)  ||
116	MATCHES_DESC(BLOCK_INTR,OP_PCIEMSG,PH_UNKNOWN,CND_ILL,DIR_INGRESS)
117    };
118
119/* Generic Root Complex Firmware faults */
120prop fault.io.pciex.rc.generic-fw@hostbridge ->
121    ereport.io.pciex.rc.epkt@hostbridge {
122	MATCHES_DESC(BLOCK_HOSTBUS,OP_PIO,PH_ADDR,CND_UNMAP,DIR_WRITE)
123    };
124
125/* Generic Root Complex CE faults */
126prop fault.io.pciex.rc.generic-ce@hostbridge { IS_CE } ->
127    ereport.io.pciex.rc.epkt@hostbridge;
128
129/* Generic Root Complex UE faults from propagations */
130event error.io.pciex.rc.generic-ue1@hostbridge;
131event error.io.pciex.rc.generic-ue2@hostbridge;
132
133prop fault.io.pciex.rc.generic-ue@hostbridge ->
134    error.io.pciex.rc.generic-ue1@hostbridge,
135    error.io.pciex.rc.generic-ue2@hostbridge,
136    error.io.pciex.rc.stall@hostbridge,
137    error.io.pciex.rc.poiscomp@hostbridge;
138
139/* Generic Root Complex UE propagations */
140prop error.io.pciex.rc.generic-ue1@hostbridge { IS_UE && !IS_BLOCKED } ->
141    ereport.io.pciex.rc.epkt@hostbridge {
142	MATCHES_DESC(BLOCK_HOSTBUS,OP_DMA,PH_DATA,CND_INT,DIR_READ) ||
143	MATCHES_DESC(BLOCK_HOSTBUS,OP_DMA,PH_DATA,CND_INT,DIR_UNKNOWN) ||
144	MATCHES_DESC(BLOCK_HOSTBUS,OP_DMA,PH_DATA,CND_INT,DIR_WRITE) ||
145	MATCHES_DESC(BLOCK_HOSTBUS,OP_DMA,PH_DATA,CND_TO,DIR_READ) ||
146	MATCHES_DESC(BLOCK_HOSTBUS,OP_DMA,PH_DATA,CND_TO,DIR_WRITE) ||
147	MATCHES_DESC(BLOCK_HOSTBUS,OP_PIO,PH_DATA,CND_INT,DIR_UNKNOWN) ||
148	MATCHES_DESC(BLOCK_HOSTBUS,OP_UNKNOWN,PH_DATA,CND_INT,DIR_UNKNOWN) ||
149	MATCHES_DESC(BLOCK_HOSTBUS,OP_UNKNOWN,PH_DATA,CND_INT,DIR_UNKNOWN) ||
150	MATCHES_DESC(BLOCK_INTR,OP_MSI32,PH_DATA,CND_INT,DIR_UNKNOWN) ||
151	MATCHES_DESC(BLOCK_INTR,OP_MSIQ,PH_DATA,CND_INT,DIR_UNKNOWN)
152    };
153
154prop error.io.pciex.rc.generic-ue2@hostbridge { IS_UE && !IS_BLOCKED } ->
155    ereport.io.pciex.rc.epkt@hostbridge {
156	MATCHES_DESC(BLOCK_MMU,OP_TBW,PH_ADDR,CND_UNKNOWN,DIR_UNKNOWN) ||
157	MATCHES_DESC(BLOCK_MMU,OP_TBW,PH_ADDR,CND_UNMAP,DIR_UNKNOWN) ||
158	MATCHES_DESC(BLOCK_MMU,OP_TBW,PH_DATA,CND_INT,DIR_IRR) ||
159	MATCHES_DESC(BLOCK_MMU,OP_TBW,PH_UNKNOWN,CND_UNKNOWN,DIR_UNKNOWN) ||
160	MATCHES_DESC(BLOCK_MMU,OP_XLAT,PH_DATA,CND_INT,DIR_UNKNOWN) ||
161	MATCHES_DESC(BLOCK_PORT,OP_DMA,PH_DATA,CND_INT,DIR_READ) ||
162	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_DATA,CND_INT,DIR_READ) ||
163	MATCHES_DESC(BLOCK_PORT,OP_PIO,PH_DATA,CND_INT,DIR_UNKNOWN) ||
164	MATCHES_DESC(BLOCK_PORT,OP_UNKNOWN,PH_DATA,CND_INT,DIR_UNKNOWN) ||
165	MATCHES_DESC(BLOCK_PORT,OP_UNKNOWN,PH_DATA,CND_INT,DIR_UNKNOWN)
166    };
167
168/* Errors that will cause a pipe stall and thus a CTO in the fabric */
169prop error.io.pciex.rc.stall@hostbridge (0) ->
170    error.io.pciex.nr-d@hostbridge/pciexrc<>/PCIEXFNHZ;
171prop error.io.pciex.rc.stall@hostbridge { IS_UE && IS_BLOCKED } ->
172    ereport.io.pciex.rc.epkt@hostbridge { !DIAG_BY_FAB };
173
174/*
175 * Errors that will send a poisoned data to the fabric
176 * Also the poiscomp-d could represent a fault that a hardened driver
177 * handled and reported a service impact.
178 */
179prop error.io.pciex.rc.poiscomp@hostbridge (0) ->
180    error.io.pciex.poiscomp-d@hostbridge/pciexrc<>/PCIEXFNHZ,
181    error.io.pciex.noimpact-d@hostbridge/pciexrc<>/PCIEXFNHZ,
182    error.io.pciex.lost-d@hostbridge/pciexrc<>/PCIEXFNHZ,
183    error.io.pciex.degraded-d@hostbridge/pciexrc<>/PCIEXFNHZ;
184
185prop error.io.pciex.rc.poiscomp@hostbridge { IS_UE && !IS_BLOCKED } ->
186    ereport.io.pciex.rc.epkt@hostbridge {
187	MATCHES_DESC(BLOCK_HOSTBUS,OP_DMA,PH_DATA,CND_INT,DIR_READ)
188    };
189
190prop error.io.pciex.badreq-u@hostbridge/pciexrc/PCIEXFN { IS_UE && !IS_BLOCKED } (0) ->
191    ereport.io.pciex.rc.epkt@hostbridge {
192	MATCHES_DESC(BLOCK_MMU,OP_XLAT,PH_ADDR,CND_UNMAP,DIR_RDWR) ||
193	MATCHES_DESC(BLOCK_MMU,OP_XLAT,PH_DATA,CND_INV,DIR_RDWR) ||
194	MATCHES_DESC(BLOCK_MMU,OP_XLAT,PH_DATA,CND_PROT,DIR_RDWR)
195    };
196
197/*
198 * The errors will be diagnosed by pciex.esc but may also cause a CTO
199 * in the fabric.
200 */
201prop upset.io.pciex.rc.stall@hostbridge ->
202    ereport.io.pciex.rc.epkt@hostbridge { IS_BLOCKED && DIAG_BY_FAB };
203prop upset.io.pciex.rc.stall@hostbridge (0) ->
204    error.io.pciex.nr-d@hostbridge/pciexrc<>/PCIEXFNHZ;
205
206/* The errors will be discarded here and diagnosed by pciex.esc. */
207prop upset.io.pciex.rc.discard@hostbridge ->
208    ereport.io.pciex.rc.epkt@hostbridge { !IS_BLOCKED && DIAG_BY_FAB };
209
210/* Event queue overflow */
211#define PROP_PLAT_FRU "FRU"
212#define GET_HB_FRU (confprop(asru(hostbridge), PROP_PLAT_FRU))
213#define GET_PCIE_FRU (confprop(asru(pciexbus[b]/pciexdev[d]/pciexfn[0]), PROP_PLAT_FRU))
214#define GET_PCI_FRU (confprop(asru(pcibus[b]/pcidev[d]/pcifn[0]), PROP_PLAT_FRU))
215
216prop fault.io.fire.pciex.device@pciexbus[b]/pciexdev[d]/pciexfn[0]
217    {
218        /*
219         * Indict PCI-E FRU(s) under this root complex excluding the
220         * one that the Fire ASIC resides on.
221         */
222        is_under(hostbridge, pciexbus[b]/pciexdev[d]/pciexfn[0]) &&
223	(GET_HB_FRU != GET_PCIE_FRU)
224    } (0) -> EPKT(BLOCK_INTR,OP_MSIQ,PH_UNKNOWN,CND_OV,DIR_IRR);
225
226prop fault.io.fire.pci.device@pcibus[b]/pcidev[d]/pcifn[0]
227    {
228        /*
229         * Indict PCI FRU(s) under this root complex excluding the
230         * one that the Fire ASIC resides on.
231         */
232        is_under(hostbridge, pcibus[b]/pcidev[d]/pcifn[0]) &&
233	    (GET_HB_FRU != GET_PCI_FRU)
234    } (0) -> EPKT(BLOCK_INTR,OP_MSIQ,PH_UNKNOWN,CND_OV,DIR_IRR);
235