1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Ereport-handling routines for memory errors
28 */
29
30 #include <cmd_mem.h>
31 #include <cmd_dimm.h>
32 #include <cmd_bank.h>
33 #include <cmd_page.h>
34 #include <cmd_cpu.h>
35 #include <cmd.h>
36
37 #include <strings.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <unistd.h>
42 #include <fm/fmd_api.h>
43 #include <sys/fm/protocol.h>
44 #include <sys/fm/cpu/UltraSPARC-III.h>
45 #include <sys/async.h>
46 #include <sys/cheetahregs.h>
47 #include <sys/errclassify.h>
48 #include <sys/fm/io/sun4upci.h>
49 #include <sys/pci/pcisch.h>
50
51 /* Jalapeno-specific values from cheetahregs.h */
52 #define USIIIi_AFSR_AID 0x0000000000003e00ull /* AID causing UE/CE */
53 #define USIIIi_AFSR_AID_SHIFT 9
54 #define USIIIi_AFSR_JREQ 0x0000000007000000ull /* Active JBus req */
55 #define USIIIi_AFSR_JREQ_SHIFT 24
56 #define TOM_AID_MATCH_MASK 0xe
57
58 #define FIRE_AID 0xe
59 #define FIRE_JBC_ADDR_MASK 0x000007ffffffffffull
60 #define FIRE_JBC_JITEL1 "jbc-jitel1"
61
62 /*ARGSUSED*/
63 cmd_evdisp_t
cmd_mem_synd_check(fmd_hdl_t * hdl,uint64_t afar,uint8_t afar_status,uint16_t synd,uint8_t synd_status,cmd_cpu_t * cpu)64 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status,
65 uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu)
66 {
67 if (synd == CH_POISON_SYND_FROM_XXU_WRITE ||
68 ((cpu->cpu_type == CPU_ULTRASPARC_IIIi ||
69 cpu->cpu_type == CPU_ULTRASPARC_IIIiplus) &&
70 synd == CH_POISON_SYND_FROM_XXU_WRMERGE)) {
71 fmd_hdl_debug(hdl,
72 "discarding UE due to magic syndrome %x\n", synd);
73 return (CMD_EVD_UNUSED);
74 }
75 return (CMD_EVD_OK);
76 }
77
78 static cmd_evdisp_t
xe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_xe_handler_f * hdlr)79 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
80 const char *class, cmd_xe_handler_f *hdlr)
81 {
82 uint64_t afar;
83 uint16_t synd;
84 uint8_t afar_status, synd_status;
85 nvlist_t *rsrc;
86 char *typenm;
87 uint64_t disp;
88 int minorvers = 1;
89
90 if (nvlist_lookup_pairs(nvl, 0,
91 FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &afar,
92 FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8, &afar_status,
93 FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &synd,
94 FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8, &synd_status,
95 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
96 FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
97 NULL) != 0)
98 return (CMD_EVD_BAD);
99
100 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
101 &disp) != 0)
102 minorvers = 0;
103
104 return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd,
105 synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
106 }
107
108 /*ARGSUSED*/
109 cmd_evdisp_t
cmd_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)110 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
111 cmd_errcl_t clcode)
112 {
113 return (xe_common(hdl, ep, nvl, class, cmd_ce_common));
114 }
115
116 /*ARGSUSED*/
117 cmd_evdisp_t
cmd_ue(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)118 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
119 cmd_errcl_t clcode)
120 {
121 return (xe_common(hdl, ep, nvl, class, cmd_ue_common));
122 }
123
124 cmd_evdisp_t
cmd_frx(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)125 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
126 cmd_errcl_t clcode)
127 {
128 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_FRC ? (CMD_ERRCL_RCE |
129 CMD_ERRCL_IOCE) : (CMD_ERRCL_RUE | CMD_ERRCL_IOUE));
130
131 return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask));
132 }
133
134 /*
135 * When we complete an IOxE/RxE FRx pair, we have enough information to
136 * create either a CE or a UE, as appropriate. Before dispatching the
137 * joined event to the xE handler, we need to generate the FMRI for the
138 * named DIMM. While one of the events may already contain a resource FMRI,
139 * said FMRI is incomplete. The detector didn't have the necessary
140 * information (the AFAR, the AFSR, *and* the syndrome) needed to create
141 * a DIMM-level FMRI.
142 */
143 static cmd_evdisp_t
iorxefrx_synthesize(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,uint64_t afar,uint8_t afar_status,uint64_t afsr,uint16_t synd,uint8_t synd_status,ce_dispact_t type,uint64_t disp,cmd_xe_handler_f * hdlr)144 iorxefrx_synthesize(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
145 const char *class, uint64_t afar, uint8_t afar_status, uint64_t afsr,
146 uint16_t synd, uint8_t synd_status, ce_dispact_t type, uint64_t disp,
147 cmd_xe_handler_f *hdlr)
148 {
149 nvlist_t *fmri;
150 int rc;
151
152 if ((fmri = cmd_dimm_fmri_derive(hdl, afar, synd, afsr)) == NULL)
153 return (CMD_EVD_UNUSED);
154
155 rc = hdlr(hdl, ep, nvl, class, afar, afar_status, synd, synd_status,
156 type, disp, fmri);
157
158 nvlist_free(fmri);
159
160 return (rc);
161 }
162
163 static cmd_iorxefrx_t *
iorxefrx_match(fmd_hdl_t * hdl,cmd_errcl_t errcl,cmd_errcl_t matchmask,uint_t det_agentid,uint_t afsr_agentid)164 iorxefrx_match(fmd_hdl_t *hdl, cmd_errcl_t errcl, cmd_errcl_t matchmask,
165 uint_t det_agentid, uint_t afsr_agentid)
166 {
167 cmd_iorxefrx_t *rf;
168
169 for (rf = cmd_list_next(&cmd.cmd_iorxefrx); rf != NULL;
170 rf = cmd_list_next(rf)) {
171
172 fmd_hdl_debug(hdl, "rf->rf_errcl = %llx, matchmask = %llx\n"
173 "rf->rf_det_agentid = %lx, afsr_agentid = %lx\n"
174 "rf->rf_afsr_agentid = %lx, det_agentid = %lx\n",
175 rf->rf_errcl, matchmask, rf->rf_det_agentid, afsr_agentid,
176 rf->rf_afsr_agentid, det_agentid);
177
178 if ((rf->rf_errcl & matchmask) == 0)
179 continue;
180
181 /*
182 * For IOxEs we are unable to match based on both the detector
183 * and the captured Agent Id in the AFSR, because the bridge
184 * captures it's own Agent Id instead of the remote CPUs.
185 *
186 * Also, the LSB of Tomatillo's jpid is aliased for each chip
187 * and therefore needs to be factored out of our matching.
188 */
189 if ((CMD_ERRCL_ISIOXE(rf->rf_errcl) ||
190 CMD_ERRCL_ISIOXE(errcl)) &&
191 ((rf->rf_afsr_agentid & TOM_AID_MATCH_MASK) ==
192 (afsr_agentid & TOM_AID_MATCH_MASK)))
193 return (rf);
194
195 /*
196 * Check for both here since IOxE is not involved
197 */
198 if ((rf->rf_afsr_agentid == det_agentid) &&
199 (rf->rf_det_agentid == afsr_agentid))
200 return (rf);
201 }
202
203 return (NULL);
204 }
205
206 /*
207 * Got an RxE or an FRx. FRx ereports can be matched with RxE ereports and
208 * vice versa. FRx ereports can also be matched with IOxE ereports.
209 */
210 cmd_evdisp_t
cmd_rxefrx_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode,cmd_errcl_t matchmask)211 cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
212 const char *class, cmd_errcl_t clcode, cmd_errcl_t matchmask)
213 {
214 cmd_xe_handler_f *hdlr;
215 cmd_iorxefrx_t *rfmatch, *rferr;
216 cmd_cpu_t *cpu;
217 char *typenm;
218 int isrxe = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_RUE);
219 int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_FRC);
220 int rc;
221 int minorvers = 1;
222 uint8_t level = clcode & CMD_ERRCL_LEVEL_EXTRACT;
223
224 clcode &= CMD_ERRCL_LEVEL_MASK;
225 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
226
227 if (nvlist_lookup_pairs(nvl, 0,
228 FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &rferr->rf_synd,
229 FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8,
230 &rferr->rf_synd_status,
231 FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar,
232 FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8,
233 &rferr->rf_afar_status,
234 FM_EREPORT_PAYLOAD_NAME_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr,
235 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
236 NULL) != 0) {
237 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
238 return (CMD_EVD_BAD);
239 }
240 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
241 &rferr->rf_disp) != 0)
242 minorvers = 0;
243
244 rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
245
246 if ((cpu = cmd_cpu_lookup_from_detector(hdl, nvl, class,
247 level)) == NULL) {
248 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
249 return (CMD_EVD_UNUSED);
250 }
251
252 if (!isrxe && rferr->rf_synd_status != AFLT_STAT_VALID) {
253 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
254 return (CMD_EVD_UNUSED);
255 }
256
257 if (isrxe) {
258 rferr->rf_afsr_agentid = (rferr->rf_afsr &
259 USIIIi_AFSR_JREQ) >> USIIIi_AFSR_JREQ_SHIFT;
260 } else {
261 rferr->rf_afsr_agentid = (rferr->rf_afsr &
262 USIIIi_AFSR_AID) >> USIIIi_AFSR_AID_SHIFT;
263 }
264
265 rferr->rf_errcl = clcode;
266 rferr->rf_det_agentid = cpu->cpu_cpuid;
267
268 if ((rfmatch = iorxefrx_match(hdl, clcode, matchmask,
269 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
270 cmd_iorxefrx_queue(hdl, rferr);
271 return (CMD_EVD_OK);
272 }
273
274 /*
275 * Found a match. Send a synthesized ereport to the appropriate
276 * routine.
277 */
278 fmd_hdl_debug(hdl, "matched %cE %llx with %llx", "UC"[isce],
279 rferr->rf_errcl, rfmatch->rf_errcl);
280
281 hdlr = (isce ? cmd_ce_common : cmd_ue_common);
282 if (isrxe) {
283 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
284 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
285 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp,
286 hdlr);
287 } else {
288 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rfmatch->rf_afar,
289 rfmatch->rf_afar_status, rferr->rf_afsr, rferr->rf_synd,
290 rferr->rf_synd_status, rfmatch->rf_type, rferr->rf_disp,
291 hdlr);
292 }
293
294 cmd_iorxefrx_free(hdl, rfmatch);
295 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
296
297 return (rc);
298 }
299
300 /*
301 * This fire IOxE must be matched with an FRx before UE/CE processing
302 * is possible.
303 *
304 * Note that for fire ereports we don't receive AFSR, AFAR, AFAR-Status
305 * and SYND values but we can derive the AFAR from the payload value
306 * FIRE_JBC_JITEL1. We may receive a TYPNM value.
307 */
308 static cmd_evdisp_t
cmd_ioxefrx_fire(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t errcl,cmd_errcl_t matchmask)309 cmd_ioxefrx_fire(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
310 const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask)
311 {
312 cmd_xe_handler_f *hdlr;
313 cmd_iorxefrx_t *rfmatch, *rferr;
314 uint64_t afar;
315 int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE);
316 char *portid_str;
317 char *path = NULL;
318 char *typenm = NULL;
319 nvlist_t *det = NULL;
320 int rc;
321 int minorvers = 1;
322
323 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
324
325 /*
326 * Lookup device path of host bridge.
327 */
328 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
329 (void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path);
330
331 /*
332 * get Jbus port id from the device path
333 */
334 portid_str = strrchr(path, '@') + 1;
335 rferr->rf_det_agentid = strtol(portid_str, NULL, 16);
336
337 rferr->rf_errcl = errcl;
338 rferr->rf_afsr_agentid = FIRE_AID;
339 rferr->rf_afar_status = AFLT_STAT_VALID;
340 rferr->rf_synd_status = AFLT_STAT_VALID;
341
342 /*
343 * Extract the afar from the payload
344 */
345 (void) nvlist_lookup_uint64(nvl, FIRE_JBC_JITEL1, &afar);
346 rferr->rf_afar = afar & FIRE_JBC_ADDR_MASK;
347
348 rferr->rf_afsr = NULL;
349 rferr->rf_synd = NULL;
350
351 if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
352 &typenm) == 0)
353 rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
354
355 /*
356 * Need to send in the io_jpid that we get from the device path above
357 * for both the det_agentid and the afsr_agentid, since the CPU does not
358 * capture the same address as the bridge. The bridge has the LSB
359 * aliased and the CPU is missing the MSB.
360 */
361 if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask,
362 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
363 cmd_iorxefrx_queue(hdl, rferr);
364 return (CMD_EVD_OK);
365 }
366
367 /* Found a match. Synthesize an ereport for UE/CE processing. */
368 fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce],
369 rferr->rf_errcl, rfmatch->rf_errcl);
370
371 hdlr = (isce ? cmd_ce_common : cmd_ue_common);
372 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
373 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
374 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr);
375
376 cmd_iorxefrx_free(hdl, rfmatch);
377 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
378
379 return (rc);
380 }
381
382 /* This IOxE must be matched with an FRx before UE/CE processing is possible */
383 static cmd_evdisp_t
cmd_ioxefrx_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t errcl,cmd_errcl_t matchmask)384 cmd_ioxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
385 const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask)
386 {
387 cmd_xe_handler_f *hdlr;
388 cmd_iorxefrx_t *rfmatch, *rferr;
389 char *typenm;
390 int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE);
391 char *portid_str;
392 char *path = NULL;
393 nvlist_t *det = NULL;
394 int rc;
395 int minorvers = 1;
396
397 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
398
399 if (nvlist_lookup_pairs(nvl, 0,
400 PCI_ECC_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar,
401 PCI_ECC_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr,
402 PCI_ECC_SYND, DATA_TYPE_UINT16, &rferr->rf_synd,
403 PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm,
404 NULL) != 0) {
405 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
406 return (CMD_EVD_BAD);
407 }
408
409 if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &rferr->rf_disp) != 0)
410 minorvers = 0;
411
412 rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
413 rferr->rf_errcl = errcl;
414
415 /*
416 * Lookup device path of host bridge.
417 */
418 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
419 (void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path);
420
421 /*
422 * get Jbus port id from the device path
423 */
424 portid_str = strrchr(path, '@') + 1;
425 rferr->rf_det_agentid = strtol(portid_str, NULL, 16);
426
427 rferr->rf_afsr_agentid = (rferr->rf_afsr &
428 SCHIZO_ECC_UE_AFSR_AGENT_MID) >> SCHIZO_ECC_UE_AFSR_AGENT_MID_SHIFT;
429
430 /*
431 * Only 4 bits of the Jbus AID are sent on the Jbus. MSB is the one
432 * that is chosen not to make the trip. This is not in any of the Jbus
433 * or Tomatillo documents and was discovered during testing and verified
434 * by Jalapeno H/W designer.
435 */
436 rferr->rf_afsr_agentid &= 0xf;
437 rferr->rf_afar_status = AFLT_STAT_VALID;
438 rferr->rf_synd_status = AFLT_STAT_VALID;
439
440 /*
441 * Need to send in the io_jpid that we get from the device path above
442 * for both the det_agentid and the afsr_agentid, since the CPU does not
443 * capture the same address as the bridge. The bridge has the LSB
444 * aliased and the CPU is missing the MSB.
445 */
446 if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask,
447 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
448 cmd_iorxefrx_queue(hdl, rferr);
449 return (CMD_EVD_OK);
450 }
451
452 /* Found a match. Synthesize an ereport for UE/CE processing. */
453 fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce],
454 rferr->rf_errcl, rfmatch->rf_errcl);
455
456 hdlr = (isce ? cmd_ce_common : cmd_ue_common);
457 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
458 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
459 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr);
460
461 cmd_iorxefrx_free(hdl, rfmatch);
462 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
463
464 return (rc);
465 }
466
467 /* IOxE ereports that don't need matching with FRx ereports */
468 static cmd_evdisp_t
ioxe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)469 ioxe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
470 cmd_errcl_t clcode)
471 {
472 int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_IOCE);
473 cmd_xe_handler_f *hdlr = isce ? cmd_ce_common : cmd_ue_common;
474 uint64_t afar;
475 uint16_t synd;
476 nvlist_t *rsrc;
477 char *typenm;
478 uint64_t disp;
479 int minorvers = 1;
480
481 if (nvlist_lookup_pairs(nvl, 0,
482 PCI_ECC_AFAR, DATA_TYPE_UINT64, &afar,
483 PCI_ECC_SYND, DATA_TYPE_UINT16, &synd,
484 PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm,
485 PCI_ECC_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
486 NULL) != 0)
487 return (CMD_EVD_BAD);
488
489 if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &disp) != 0)
490 minorvers = 0;
491
492 return (hdlr(hdl, ep, nvl, class, afar, AFLT_STAT_VALID, synd,
493 AFLT_STAT_VALID, cmd_mem_name2type(typenm, minorvers), disp,
494 rsrc));
495 }
496
497 cmd_evdisp_t
cmd_rxe(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)498 cmd_rxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
499 cmd_errcl_t clcode)
500 {
501 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_RCE ? CMD_ERRCL_FRC :
502 CMD_ERRCL_FRU);
503
504 return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask));
505 }
506
507 cmd_evdisp_t
cmd_ioxe(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)508 cmd_ioxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
509 cmd_errcl_t clcode)
510 {
511 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_IOCE ? CMD_ERRCL_FRC :
512 CMD_ERRCL_FRU);
513
514 if (fmd_nvl_class_match(hdl, nvl, "ereport.io.tom.*")) {
515 return (cmd_ioxefrx_common(hdl, ep, nvl, class, clcode,
516 matchmask));
517 } else if (fmd_nvl_class_match(hdl, nvl, "ereport.io.fire.*")) {
518 return (cmd_ioxefrx_fire(hdl, ep, nvl, class, clcode,
519 matchmask));
520 } else
521 return (ioxe_common(hdl, ep, nvl, class, clcode));
522 }
523
524 /*ARGSUSED*/
525 cmd_evdisp_t
cmd_ioxe_sec(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)526 cmd_ioxe_sec(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
527 cmd_errcl_t clcode)
528 {
529 /*
530 * Secondary IOxE's can't be used to identify failed or failing
531 * resources, as they don't contain enough information. Ignore them.
532 */
533 return (CMD_EVD_OK);
534 }
535
536 /*ARGSUSED*/
537 ulong_t
cmd_mem_get_phys_pages(fmd_hdl_t * hdl)538 cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
539 {
540 return (sysconf(_SC_PHYS_PAGES));
541 }
542
543 /*
544 * sun4u bit position as function of e_synd,
545 * from JPS1 Implementation Supplement table P-7
546 * Encode bit positions as follows:
547 * 0-127 data bits 0-127
548 * 128-136 check bits 0-8 (Cn = 128+n)
549 * no error or multibit error = -1 (not valid CE)
550 */
551
552 int esynd2bit [] = {
553 -1, 128, 129, -1, 130, -1, -1, 47,
554 131, -1, -1, 53, -1, 41, 29, -1, /* 000-00F */
555 132, -1, -1, 50, -1, 38, 25, -1,
556 -1, 33, 24, -1, 11, -1, -1, 16, /* 010-01F */
557 133, -1, -1, 46, -1, 37, 19, -1,
558 -1, 31, 32, -1, 7, -1, -1, 10, /* 020-02F */
559 -1, 40, 13, -1, 59, -1, -1, 66,
560 -1, -1, -1, 0, -1, 67, 71, -1, /* 030-03F */
561 134, -1, -1, 43, -1, 36, 18, -1,
562 -1, 49, 15, -1, 63, -1, -1, 6, /* 040-04F */
563 -1, 44, 28, -1, -1, -1, -1, 52,
564 68, -1, -1, 62, -1, -1, -1, -1, /* 050-05F */
565 -1, 26, 106, -1, 64, -1, -1, 2,
566 120, -1, -1, -1, -1, -1, -1, -1, /* 060-06F */
567 116, -1, -1, -1, -1, -1, -1, -1,
568 -1, 58, 54, -1, -1, -1, -1, -1, /* 070-07F */
569 135, -1, -1, 42, -1, 35, 17, -1,
570 -1, 45, 14, -1, 21, -1, -1, 5, /* 080-08F */
571 -1, 27, -1, -1, 99, -1, -1, 3,
572 114, -1, -1, 20, -1, -1, -1, -1, /* 090-09F */
573 -1, 23, 113, -1, 112, -1, -1, 51,
574 95, -1, -1, -1, -1, -1, -1, -1, /* 0A0-0AF */
575 103, -1, -1, -1, -1, -1, -1, -1,
576 -1, 48, -1, -1, 73, -1, -1, -1, /* 0B0-0BF */
577 -1, 22, 110, -1, 109, -1, -1, 9,
578 108, -1, -1, -1, -1, -1, -1, -1, /* 0C0-0CF */
579 102, -1, -1, -1, -1, -1, -1, -1,
580 -1, -1, -1, -1, -1, -1, -1, -1, /* 0D0-0DF */
581 98, -1, -1, -1, -1, -1, -1, -1,
582 -1, -1, -1, -1, -1, -1, -1, -1, /* 0E0-0EF */
583 -1, -1, -1, -1, -1, -1, -1, -1,
584 56, -1, -1, -1, -1, -1, -1, -1, /* 0F0-0FF */
585 136, -1, -1, 39, -1, 34, 105, -1,
586 -1, 30, 104, -1, 101, -1, -1, 4, /* 100-10F */
587 -1, -1, 100, -1, 83, -1, -1, 12,
588 87, -1, -1, 57, -1, -1, -1, -1, /* 110-11F */
589 -1, 97, 82, -1, 78, -1, -1, 1,
590 96, -1, -1, -1, -1, -1, -1, -1, /* 120-12F */
591 94, -1, -1, -1, -1, -1, -1, -1,
592 -1, -1, 79, -1, 69, -1, -1, -1, /* 130-13F */
593 -1, 93, 92, -1, 91, -1, -1, 8,
594 90, -1, -1, -1, -1, -1, -1, -1, /* 140-14F */
595 89, -1, -1, -1, -1, -1, -1, -1,
596 -1, -1, -1, -1, -1, -1, -1, -1, /* 150-15F */
597 86, -1, -1, -1, -1, -1, -1, -1,
598 -1, -1, -1, -1, -1, -1, -1, -1, /* 160-16F */
599 -1, -1, -1, -1, -1, -1, -1, -1,
600 60, -1, -1, -1, -1, -1, -1, -1, /* 170-17F */
601 -1, 88, 85, -1, 84, -1, -1, 55,
602 81, -1, -1, -1, -1, -1, -1, -1, /* 180-18F */
603 77, -1, -1, -1, -1, -1, -1, -1,
604 -1, -1, -1, -1, -1, -1, -1, -1, /* 190-19F */
605 74, -1, -1, -1, -1, -1, -1, -1,
606 -1, -1, -1, -1, -1, -1, -1, -1, /* 1A0-1AF */
607 -1, 70, 107, -1, 65, -1, -1, -1,
608 127, -1, -1, -1, -1, -1, -1, -1, /* 1B0-1BF */
609 80, -1, -1, 72, -1, 119, 118, -1,
610 -1, 126, 76, -1, 125, -1, -1, -1, /* 1C0-1CF */
611 -1, 115, 124, -1, 75, -1, -1, -1,
612 61, -1, -1, -1, -1, -1, -1, -1, /* 1D0-1DF */
613 -1, 123, 122, -1, 121, -1, -1, -1,
614 117, -1, -1, -1, -1, -1, -1, -1, /* 1E0-1EF */
615 111, -1, -1, -1, -1, -1, -1, -1,
616 -1, -1, -1, -1, -1, -1, -1, -1 /* 1F0-1FF */
617 };
618
619 int msynd2bit [] = { /* msynd 0-F */
620 -1, 140, 141, -1,
621 142, -1, -1, 137,
622 143, -1, -1, 138,
623 -1, 139, -1, -1
624 };
625
626 int
cmd_synd2upos(uint16_t syndrome)627 cmd_synd2upos(uint16_t syndrome) {
628 return (esynd2bit[syndrome]);
629 }
630
631 const char *fmd_fmri_get_platform();
632
633 #define DP_MAX 25
634
635 const char *slotname[] = {
636 "Slot A", "Slot B", "Slot C", "Slot D"};
637
638 typedef struct fault_info {
639 uint32_t id;
640 int count;
641 } fault_info_t;
642
643 struct plat2id_map {
644 char *platnm;
645 int id;
646 } id_plat[] = {
647 {"SUNW,Sun-Fire-15000", 1},
648 {"SUNW,Sun-Fire", 2},
649 {"SUNW,Netra-T12", 2},
650 {"SUNW,Sun-Fire-480R", 3},
651 {"SUNW,Sun-Fire-V490", 3},
652 {"SUNW,Sun-Fire-V440", 3},
653 {"SUNW,Sun-Fire-V445", 3},
654 {"SUNW,Netra-440", 3},
655 {"SUNW,Sun-Fire-880", 4},
656 {"SUNW,Sun-Fire-V890", 4},
657 {NULL, 0}
658 };
659
660 /*ARGSUSED*/
661 void
cmd_to_hashed_addr(uint64_t * addr,uint64_t afar,const char * class)662 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class)
663 {
664 *addr = afar;
665 }
666
667 /*ARGSUSED*/
668 int
cmd_same_datapath_dimms(cmd_dimm_t * d1,cmd_dimm_t * d2)669 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2)
670 {
671 return (1);
672 }
673
674 static int
cmd_get_platform()675 cmd_get_platform()
676 {
677 const char *platname;
678 int id = -1;
679 int i;
680
681 platname = fmd_fmri_get_platform();
682 for (i = 0; id_plat[i].platnm != NULL; i++) {
683 if (strcmp(platname, id_plat[i].platnm) == 0) {
684 id = id_plat[i].id;
685 break;
686 }
687 }
688 return (id);
689 }
690
691 static int
cmd_get_boardid(uint32_t cpuid)692 cmd_get_boardid(uint32_t cpuid)
693 {
694 int boardid;
695 int id = cmd_get_platform();
696
697 switch (id) {
698 case 1:
699 boardid = ((cpuid >> 5) & 0x1f);
700 break;
701 case 2:
702 boardid = ((cpuid & 0x1f) / 4);
703 break;
704
705 case 3:
706 cpuid = cpuid & 0x07;
707 boardid = ((cpuid % 2) == 0) ? 0 : 1;
708 break;
709 case 4:
710 cpuid = cpuid & 0x07;
711 if ((cpuid % 2) == 0)
712 boardid = (cpuid < 4) ? 0 : 2;
713 else
714 boardid = (cpuid < 5) ? 1 : 3;
715 break;
716 default:
717 boardid = 5;
718 break;
719 }
720
721 return (boardid);
722 }
723
724 static void
cmd_get_faulted_comp(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,fault_info_t ** fault_list,int cpu)725 cmd_get_faulted_comp(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
726 uint16_t upos, fault_info_t **fault_list, int cpu)
727 {
728 cmd_mq_t *ip;
729 int i, j, k, idj;
730 uint32_t id;
731 uint32_t *cpuid = NULL;
732 int max_rpt;
733
734 max_rpt = 2 * cmd.cmd_nupos;
735
736 cpuid = fmd_hdl_alloc(hdl, max_rpt * sizeof (uint32_t), FMD_SLEEP);
737
738 if (cpuid == NULL)
739 return;
740
741 for (i = 0, j = 0; i < CMD_MAX_CKWDS; i++) {
742 for (ip = cmd_list_next(&d1->mq_root[i]); ip != NULL;
743 ip = cmd_list_next(ip)) {
744 if (upos == ip->mq_unit_position) {
745 cpuid[j] = ip->mq_cpuid;
746 j++;
747 }
748 if (j >= cmd.cmd_nupos)
749 break;
750 }
751 if (j >= cmd.cmd_nupos)
752 break;
753 }
754
755 for (i = 0; i < CMD_MAX_CKWDS; i++) {
756 for (ip = cmd_list_next(&d2->mq_root[i]); ip != NULL;
757 ip = cmd_list_next(ip)) {
758 if (upos == ip->mq_unit_position) {
759 cpuid[j] = ip->mq_cpuid;
760 j++;
761 }
762 if (j >= max_rpt)
763 break;
764 }
765 if (j >= max_rpt)
766 break;
767 }
768
769 for (i = 0, k = 0; i < max_rpt; i++) {
770 if (cpuid[i] == ULONG_MAX)
771 continue;
772 id = (cpu == 0) ? cmd_get_boardid(cpuid[i]) : cpuid[i];
773 fault_list[k] = fmd_hdl_alloc(hdl,
774 sizeof (fault_info_t), FMD_SLEEP);
775 if (fault_list[k] == NULL)
776 break;
777 fault_list[k]->count = 1;
778 fault_list[k]->id = id;
779 for (j = i + 1; j < max_rpt; j++) {
780 if (cpuid[j] == ULONG_MAX)
781 continue;
782 idj = (cpu == 0) ? cmd_get_boardid(cpuid[j]) : cpuid[j];
783 if (id == idj) {
784 fault_list[k]->count++;
785 cpuid[j] = ULONG_MAX;
786 }
787 }
788 k++;
789 }
790
791 fmd_hdl_free(hdl, cpuid, max_rpt * sizeof (uint32_t));
792 }
793
794 /*ARGSUSED*/
795 static nvlist_t *
cmd_board_mkfru(fmd_hdl_t * hdl,char * frustr)796 cmd_board_mkfru(fmd_hdl_t *hdl, char *frustr)
797 {
798 nvlist_t *hcel, *fru;
799 int err;
800
801 if (frustr == NULL)
802 return (NULL);
803
804 if (nvlist_alloc(&hcel, NV_UNIQUE_NAME, 0) != 0)
805 return (NULL);
806
807 err = nvlist_add_string(hcel, FM_FMRI_HC_NAME,
808 FM_FMRI_LEGACY_HC);
809 err |= nvlist_add_string(hcel, FM_FMRI_HC_ID, frustr);
810 if (err != 0) {
811 nvlist_free(hcel);
812 return (NULL);
813 }
814
815 if (nvlist_alloc(&fru, NV_UNIQUE_NAME, 0) != 0) {
816 nvlist_free(hcel);
817 return (NULL);
818 }
819 err = nvlist_add_uint8(fru, FM_VERSION, FM_HC_SCHEME_VERSION);
820 err |= nvlist_add_string(fru, FM_FMRI_SCHEME,
821 FM_FMRI_SCHEME_HC);
822 err |= nvlist_add_string(fru, FM_FMRI_HC_ROOT, "");
823 err |= nvlist_add_uint32(fru, FM_FMRI_HC_LIST_SZ, 1);
824 err |= nvlist_add_nvlist_array(fru, FM_FMRI_HC_LIST, &hcel, 1);
825 if (err != 0) {
826 nvlist_free(fru);
827 nvlist_free(hcel);
828 return (NULL);
829 }
830 nvlist_free(hcel);
831 return (fru);
832 }
833
834 /*
835 * Startcat, Serengeti, V4xx, and V8xx: fault the system boards of
836 * the detectors in proportion to the number of ereports out of 8
837 * Other systems: fault the detectors in proportion to the number of
838 * ereports out of 8
839 */
840 void
cmd_gen_datapath_fault(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,nvlist_t * det)841 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
842 uint16_t upos, nvlist_t *det)
843 {
844 char frustr[DP_MAX];
845 fmd_case_t *cp;
846 int i, ratio, type, fault_cpu, max_rpt;
847 uint32_t id;
848 uint8_t cpumask;
849 char *cpustr;
850 fault_info_t **fault_list = NULL;
851 nvlist_t *fru = NULL, *asru = NULL, *flt = NULL;
852
853 max_rpt = cmd.cmd_nupos * 2;
854 fault_list = fmd_hdl_alloc(hdl,
855 max_rpt * sizeof (fault_info_t *), FMD_SLEEP);
856
857 if (fault_list == NULL)
858 return;
859
860 for (i = 0; i < max_rpt; i++)
861 fault_list[i] = NULL;
862
863 type = cmd_get_platform();
864
865 fault_cpu = (type == -1) ? 1 : 0;
866
867 cmd_get_faulted_comp(hdl, d1, d2, upos, fault_list, fault_cpu);
868
869 cp = fmd_case_open(hdl, NULL);
870
871 for (i = 0; i < max_rpt; i++) {
872 if (fault_list[i] == NULL)
873 continue;
874 id = fault_list[i]->id;
875
876 switch (type) {
877 case 1:
878 (void) snprintf(frustr, DP_MAX, "EX%d", id);
879 break;
880 case 2:
881 (void) snprintf(frustr, DP_MAX, "/N0/SB%d", id);
882 break;
883 case 3:
884 case 4:
885 (void) snprintf(frustr, DP_MAX, slotname[id]);
886 break;
887 default:
888 cpustr = cmd_cpu_getfrustr_by_id(hdl, id);
889 if (nvlist_lookup_uint8(det, FM_FMRI_CPU_MASK, &cpumask)
890 == 0) {
891 asru = cmd_cpu_fmri_create(id, cpumask);
892 (void) fmd_nvl_fmri_expand(hdl, asru);
893 }
894 break;
895 }
896
897 ratio = (fault_list[i]->count * 100) / (cmd.cmd_nupos * 2);
898
899 if (fault_cpu) {
900 fru = cmd_cpu_mkfru(hdl, cpustr, NULL, NULL);
901 fmd_hdl_strfree(hdl, cpustr);
902 if (fru == NULL) {
903 nvlist_free(asru);
904 break;
905 }
906 flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath",
907 ratio, asru, fru, asru);
908 nvlist_free(asru);
909 } else {
910 fru = cmd_board_mkfru(hdl, frustr);
911 if (fru == NULL)
912 break;
913 flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath",
914 ratio, fru, fru, fru);
915 }
916
917 fmd_case_add_suspect(hdl, cp, flt);
918
919 /* free up memory */
920 nvlist_free(fru);
921 }
922
923 fmd_case_solve(hdl, cp);
924
925 for (i = 0; i < max_rpt; i++) {
926 if (fault_list[i] != NULL)
927 fmd_hdl_free(hdl, fault_list[i], sizeof (fault_info_t));
928 }
929
930 fmd_hdl_free(hdl, fault_list, sizeof (fault_info_t *) * max_rpt);
931 }
932