1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Ereport-handling routines for memory errors 28 */ 29 30 #include <cmd_mem.h> 31 #include <cmd_dimm.h> 32 #include <cmd_bank.h> 33 #include <cmd_page.h> 34 #include <cmd_cpu.h> 35 #include <cmd.h> 36 37 #include <strings.h> 38 #include <string.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <unistd.h> 42 #include <fm/fmd_api.h> 43 #include <sys/fm/protocol.h> 44 #include <sys/fm/cpu/UltraSPARC-III.h> 45 #include <sys/async.h> 46 #include <sys/cheetahregs.h> 47 #include <sys/errclassify.h> 48 #include <sys/fm/io/sun4upci.h> 49 #include <sys/pci/pcisch.h> 50 51 /* Jalapeno-specific values from cheetahregs.h */ 52 #define USIIIi_AFSR_AID 0x0000000000003e00ull /* AID causing UE/CE */ 53 #define USIIIi_AFSR_AID_SHIFT 9 54 #define USIIIi_AFSR_JREQ 0x0000000007000000ull /* Active JBus req */ 55 #define USIIIi_AFSR_JREQ_SHIFT 24 56 #define TOM_AID_MATCH_MASK 0xe 57 58 #define FIRE_AID 0xe 59 #define FIRE_JBC_ADDR_MASK 0x000007ffffffffffull 60 #define FIRE_JBC_JITEL1 "jbc-jitel1" 61 62 /*ARGSUSED*/ 63 cmd_evdisp_t 64 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status, 65 uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu) 66 { 67 if (synd == CH_POISON_SYND_FROM_XXU_WRITE || 68 ((cpu->cpu_type == CPU_ULTRASPARC_IIIi || 69 cpu->cpu_type == CPU_ULTRASPARC_IIIiplus) && 70 synd == CH_POISON_SYND_FROM_XXU_WRMERGE)) { 71 fmd_hdl_debug(hdl, 72 "discarding UE due to magic syndrome %x\n", synd); 73 return (CMD_EVD_UNUSED); 74 } 75 return (CMD_EVD_OK); 76 } 77 78 static cmd_evdisp_t 79 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 80 const char *class, cmd_xe_handler_f *hdlr) 81 { 82 uint64_t afar; 83 uint16_t synd; 84 uint8_t afar_status, synd_status; 85 nvlist_t *rsrc; 86 char *typenm; 87 uint64_t disp; 88 int minorvers = 1; 89 90 if (nvlist_lookup_pairs(nvl, 0, 91 FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &afar, 92 FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8, &afar_status, 93 FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &synd, 94 FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8, &synd_status, 95 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm, 96 FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc, 97 NULL) != 0) 98 return (CMD_EVD_BAD); 99 100 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 101 &disp) != 0) 102 minorvers = 0; 103 104 return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd, 105 synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc)); 106 } 107 108 /*ARGSUSED*/ 109 cmd_evdisp_t 110 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 111 cmd_errcl_t clcode) 112 { 113 return (xe_common(hdl, ep, nvl, class, cmd_ce_common)); 114 } 115 116 /*ARGSUSED*/ 117 cmd_evdisp_t 118 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 119 cmd_errcl_t clcode) 120 { 121 return (xe_common(hdl, ep, nvl, class, cmd_ue_common)); 122 } 123 124 cmd_evdisp_t 125 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 126 cmd_errcl_t clcode) 127 { 128 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_FRC ? (CMD_ERRCL_RCE | 129 CMD_ERRCL_IOCE) : (CMD_ERRCL_RUE | CMD_ERRCL_IOUE)); 130 131 return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask)); 132 } 133 134 /* 135 * When we complete an IOxE/RxE FRx pair, we have enough information to 136 * create either a CE or a UE, as appropriate. Before dispatching the 137 * joined event to the xE handler, we need to generate the FMRI for the 138 * named DIMM. While one of the events may already contain a resource FMRI, 139 * said FMRI is incomplete. The detector didn't have the necessary 140 * information (the AFAR, the AFSR, *and* the syndrome) needed to create 141 * a DIMM-level FMRI. 142 */ 143 static cmd_evdisp_t 144 iorxefrx_synthesize(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 145 const char *class, uint64_t afar, uint8_t afar_status, uint64_t afsr, 146 uint16_t synd, uint8_t synd_status, ce_dispact_t type, uint64_t disp, 147 cmd_xe_handler_f *hdlr) 148 { 149 nvlist_t *fmri; 150 int rc; 151 152 if ((fmri = cmd_dimm_fmri_derive(hdl, afar, synd, afsr)) == NULL) 153 return (CMD_EVD_UNUSED); 154 155 rc = hdlr(hdl, ep, nvl, class, afar, afar_status, synd, synd_status, 156 type, disp, fmri); 157 158 nvlist_free(fmri); 159 160 return (rc); 161 } 162 163 static cmd_iorxefrx_t * 164 iorxefrx_match(fmd_hdl_t *hdl, cmd_errcl_t errcl, cmd_errcl_t matchmask, 165 uint_t det_agentid, uint_t afsr_agentid) 166 { 167 cmd_iorxefrx_t *rf; 168 169 for (rf = cmd_list_next(&cmd.cmd_iorxefrx); rf != NULL; 170 rf = cmd_list_next(rf)) { 171 172 fmd_hdl_debug(hdl, "rf->rf_errcl = %llx, matchmask = %llx\n" 173 "rf->rf_det_agentid = %lx, afsr_agentid = %lx\n" 174 "rf->rf_afsr_agentid = %lx, det_agentid = %lx\n", 175 rf->rf_errcl, matchmask, rf->rf_det_agentid, afsr_agentid, 176 rf->rf_afsr_agentid, det_agentid); 177 178 if ((rf->rf_errcl & matchmask) == 0) 179 continue; 180 181 /* 182 * For IOxEs we are unable to match based on both the detector 183 * and the captured Agent Id in the AFSR, because the bridge 184 * captures it's own Agent Id instead of the remote CPUs. 185 * 186 * Also, the LSB of Tomatillo's jpid is aliased for each chip 187 * and therefore needs to be factored out of our matching. 188 */ 189 if ((CMD_ERRCL_ISIOXE(rf->rf_errcl) || 190 CMD_ERRCL_ISIOXE(errcl)) && 191 ((rf->rf_afsr_agentid & TOM_AID_MATCH_MASK) == 192 (afsr_agentid & TOM_AID_MATCH_MASK))) 193 return (rf); 194 195 /* 196 * Check for both here since IOxE is not involved 197 */ 198 if ((rf->rf_afsr_agentid == det_agentid) && 199 (rf->rf_det_agentid == afsr_agentid)) 200 return (rf); 201 } 202 203 return (NULL); 204 } 205 206 /* 207 * Got an RxE or an FRx. FRx ereports can be matched with RxE ereports and 208 * vice versa. FRx ereports can also be matched with IOxE ereports. 209 */ 210 cmd_evdisp_t 211 cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 212 const char *class, cmd_errcl_t clcode, cmd_errcl_t matchmask) 213 { 214 cmd_xe_handler_f *hdlr; 215 cmd_iorxefrx_t *rfmatch, *rferr; 216 cmd_cpu_t *cpu; 217 char *typenm; 218 int isrxe = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_RUE); 219 int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_FRC); 220 int rc; 221 int minorvers = 1; 222 uint8_t level = clcode & CMD_ERRCL_LEVEL_EXTRACT; 223 224 clcode &= CMD_ERRCL_LEVEL_MASK; 225 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP); 226 227 if (nvlist_lookup_pairs(nvl, 0, 228 FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &rferr->rf_synd, 229 FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8, 230 &rferr->rf_synd_status, 231 FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar, 232 FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8, 233 &rferr->rf_afar_status, 234 FM_EREPORT_PAYLOAD_NAME_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr, 235 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm, 236 NULL) != 0) { 237 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 238 return (CMD_EVD_BAD); 239 } 240 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP, 241 &rferr->rf_disp) != 0) 242 minorvers = 0; 243 244 rferr->rf_type = cmd_mem_name2type(typenm, minorvers); 245 246 if ((cpu = cmd_cpu_lookup_from_detector(hdl, nvl, class, 247 level)) == NULL) { 248 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 249 return (CMD_EVD_UNUSED); 250 } 251 252 if (!isrxe && rferr->rf_synd_status != AFLT_STAT_VALID) { 253 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 254 return (CMD_EVD_UNUSED); 255 } 256 257 if (isrxe) { 258 rferr->rf_afsr_agentid = (rferr->rf_afsr & 259 USIIIi_AFSR_JREQ) >> USIIIi_AFSR_JREQ_SHIFT; 260 } else { 261 rferr->rf_afsr_agentid = (rferr->rf_afsr & 262 USIIIi_AFSR_AID) >> USIIIi_AFSR_AID_SHIFT; 263 } 264 265 rferr->rf_errcl = clcode; 266 rferr->rf_det_agentid = cpu->cpu_cpuid; 267 268 if ((rfmatch = iorxefrx_match(hdl, clcode, matchmask, 269 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) { 270 cmd_iorxefrx_queue(hdl, rferr); 271 return (CMD_EVD_OK); 272 } 273 274 /* 275 * Found a match. Send a synthesized ereport to the appropriate 276 * routine. 277 */ 278 fmd_hdl_debug(hdl, "matched %cE %llx with %llx", "UC"[isce], 279 rferr->rf_errcl, rfmatch->rf_errcl); 280 281 hdlr = (isce ? cmd_ce_common : cmd_ue_common); 282 if (isrxe) { 283 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar, 284 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd, 285 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, 286 hdlr); 287 } else { 288 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rfmatch->rf_afar, 289 rfmatch->rf_afar_status, rferr->rf_afsr, rferr->rf_synd, 290 rferr->rf_synd_status, rfmatch->rf_type, rferr->rf_disp, 291 hdlr); 292 } 293 294 cmd_iorxefrx_free(hdl, rfmatch); 295 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 296 297 return (rc); 298 } 299 300 /* 301 * This fire IOxE must be matched with an FRx before UE/CE processing 302 * is possible. 303 * 304 * Note that for fire ereports we don't receive AFSR, AFAR, AFAR-Status 305 * and SYND values but we can derive the AFAR from the payload value 306 * FIRE_JBC_JITEL1. We may receive a TYPNM value. 307 */ 308 static cmd_evdisp_t 309 cmd_ioxefrx_fire(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 310 const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask) 311 { 312 cmd_xe_handler_f *hdlr; 313 cmd_iorxefrx_t *rfmatch, *rferr; 314 uint64_t afar; 315 int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE); 316 char *portid_str; 317 char *path = NULL; 318 char *typenm = NULL; 319 nvlist_t *det = NULL; 320 int rc; 321 int minorvers = 1; 322 323 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP); 324 325 /* 326 * Lookup device path of host bridge. 327 */ 328 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det); 329 (void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path); 330 331 /* 332 * get Jbus port id from the device path 333 */ 334 portid_str = strrchr(path, '@') + 1; 335 rferr->rf_det_agentid = strtol(portid_str, NULL, 16); 336 337 rferr->rf_errcl = errcl; 338 rferr->rf_afsr_agentid = FIRE_AID; 339 rferr->rf_afar_status = AFLT_STAT_VALID; 340 rferr->rf_synd_status = AFLT_STAT_VALID; 341 342 /* 343 * Extract the afar from the payload 344 */ 345 (void) nvlist_lookup_uint64(nvl, FIRE_JBC_JITEL1, &afar); 346 rferr->rf_afar = afar & FIRE_JBC_ADDR_MASK; 347 348 rferr->rf_afsr = NULL; 349 rferr->rf_synd = NULL; 350 351 if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, 352 &typenm) == 0) 353 rferr->rf_type = cmd_mem_name2type(typenm, minorvers); 354 355 /* 356 * Need to send in the io_jpid that we get from the device path above 357 * for both the det_agentid and the afsr_agentid, since the CPU does not 358 * capture the same address as the bridge. The bridge has the LSB 359 * aliased and the CPU is missing the MSB. 360 */ 361 if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask, 362 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) { 363 cmd_iorxefrx_queue(hdl, rferr); 364 return (CMD_EVD_OK); 365 } 366 367 /* Found a match. Synthesize an ereport for UE/CE processing. */ 368 fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce], 369 rferr->rf_errcl, rfmatch->rf_errcl); 370 371 hdlr = (isce ? cmd_ce_common : cmd_ue_common); 372 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar, 373 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd, 374 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr); 375 376 cmd_iorxefrx_free(hdl, rfmatch); 377 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 378 379 return (rc); 380 } 381 382 /* This IOxE must be matched with an FRx before UE/CE processing is possible */ 383 static cmd_evdisp_t 384 cmd_ioxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 385 const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask) 386 { 387 cmd_xe_handler_f *hdlr; 388 cmd_iorxefrx_t *rfmatch, *rferr; 389 char *typenm; 390 int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE); 391 char *portid_str; 392 char *path = NULL; 393 nvlist_t *det = NULL; 394 int rc; 395 int minorvers = 1; 396 397 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP); 398 399 if (nvlist_lookup_pairs(nvl, 0, 400 PCI_ECC_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar, 401 PCI_ECC_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr, 402 PCI_ECC_SYND, DATA_TYPE_UINT16, &rferr->rf_synd, 403 PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm, 404 NULL) != 0) { 405 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 406 return (CMD_EVD_BAD); 407 } 408 409 if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &rferr->rf_disp) != 0) 410 minorvers = 0; 411 412 rferr->rf_type = cmd_mem_name2type(typenm, minorvers); 413 rferr->rf_errcl = errcl; 414 415 /* 416 * Lookup device path of host bridge. 417 */ 418 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det); 419 (void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path); 420 421 /* 422 * get Jbus port id from the device path 423 */ 424 portid_str = strrchr(path, '@') + 1; 425 rferr->rf_det_agentid = strtol(portid_str, NULL, 16); 426 427 rferr->rf_afsr_agentid = (rferr->rf_afsr & 428 SCHIZO_ECC_UE_AFSR_AGENT_MID) >> SCHIZO_ECC_UE_AFSR_AGENT_MID_SHIFT; 429 430 /* 431 * Only 4 bits of the Jbus AID are sent on the Jbus. MSB is the one 432 * that is chosen not to make the trip. This is not in any of the Jbus 433 * or Tomatillo documents and was discovered during testing and verified 434 * by Jalapeno H/W designer. 435 */ 436 rferr->rf_afsr_agentid &= 0xf; 437 rferr->rf_afar_status = AFLT_STAT_VALID; 438 rferr->rf_synd_status = AFLT_STAT_VALID; 439 440 /* 441 * Need to send in the io_jpid that we get from the device path above 442 * for both the det_agentid and the afsr_agentid, since the CPU does not 443 * capture the same address as the bridge. The bridge has the LSB 444 * aliased and the CPU is missing the MSB. 445 */ 446 if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask, 447 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) { 448 cmd_iorxefrx_queue(hdl, rferr); 449 return (CMD_EVD_OK); 450 } 451 452 /* Found a match. Synthesize an ereport for UE/CE processing. */ 453 fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce], 454 rferr->rf_errcl, rfmatch->rf_errcl); 455 456 hdlr = (isce ? cmd_ce_common : cmd_ue_common); 457 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar, 458 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd, 459 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr); 460 461 cmd_iorxefrx_free(hdl, rfmatch); 462 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t)); 463 464 return (rc); 465 } 466 467 /* IOxE ereports that don't need matching with FRx ereports */ 468 static cmd_evdisp_t 469 ioxe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 470 cmd_errcl_t clcode) 471 { 472 int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_IOCE); 473 cmd_xe_handler_f *hdlr = isce ? cmd_ce_common : cmd_ue_common; 474 uint64_t afar; 475 uint16_t synd; 476 nvlist_t *rsrc; 477 char *typenm; 478 uint64_t disp; 479 int minorvers = 1; 480 481 if (nvlist_lookup_pairs(nvl, 0, 482 PCI_ECC_AFAR, DATA_TYPE_UINT64, &afar, 483 PCI_ECC_SYND, DATA_TYPE_UINT16, &synd, 484 PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm, 485 PCI_ECC_RESOURCE, DATA_TYPE_NVLIST, &rsrc, 486 NULL) != 0) 487 return (CMD_EVD_BAD); 488 489 if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &disp) != 0) 490 minorvers = 0; 491 492 return (hdlr(hdl, ep, nvl, class, afar, AFLT_STAT_VALID, synd, 493 AFLT_STAT_VALID, cmd_mem_name2type(typenm, minorvers), disp, 494 rsrc)); 495 } 496 497 cmd_evdisp_t 498 cmd_rxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 499 cmd_errcl_t clcode) 500 { 501 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_RCE ? CMD_ERRCL_FRC : 502 CMD_ERRCL_FRU); 503 504 return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask)); 505 } 506 507 cmd_evdisp_t 508 cmd_ioxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 509 cmd_errcl_t clcode) 510 { 511 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_IOCE ? CMD_ERRCL_FRC : 512 CMD_ERRCL_FRU); 513 514 if (fmd_nvl_class_match(hdl, nvl, "ereport.io.tom.*")) { 515 return (cmd_ioxefrx_common(hdl, ep, nvl, class, clcode, 516 matchmask)); 517 } else if (fmd_nvl_class_match(hdl, nvl, "ereport.io.fire.*")) { 518 return (cmd_ioxefrx_fire(hdl, ep, nvl, class, clcode, 519 matchmask)); 520 } else 521 return (ioxe_common(hdl, ep, nvl, class, clcode)); 522 } 523 524 /*ARGSUSED*/ 525 cmd_evdisp_t 526 cmd_ioxe_sec(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 527 cmd_errcl_t clcode) 528 { 529 /* 530 * Secondary IOxE's can't be used to identify failed or failing 531 * resources, as they don't contain enough information. Ignore them. 532 */ 533 return (CMD_EVD_OK); 534 } 535 536 /*ARGSUSED*/ 537 ulong_t 538 cmd_mem_get_phys_pages(fmd_hdl_t *hdl) 539 { 540 return (sysconf(_SC_PHYS_PAGES)); 541 } 542 543 /* 544 * sun4u bit position as function of e_synd, 545 * from JPS1 Implementation Supplement table P-7 546 * Encode bit positions as follows: 547 * 0-127 data bits 0-127 548 * 128-136 check bits 0-8 (Cn = 128+n) 549 * no error or multibit error = -1 (not valid CE) 550 */ 551 552 int esynd2bit [] = { 553 -1, 128, 129, -1, 130, -1, -1, 47, 554 131, -1, -1, 53, -1, 41, 29, -1, /* 000-00F */ 555 132, -1, -1, 50, -1, 38, 25, -1, 556 -1, 33, 24, -1, 11, -1, -1, 16, /* 010-01F */ 557 133, -1, -1, 46, -1, 37, 19, -1, 558 -1, 31, 32, -1, 7, -1, -1, 10, /* 020-02F */ 559 -1, 40, 13, -1, 59, -1, -1, 66, 560 -1, -1, -1, 0, -1, 67, 71, -1, /* 030-03F */ 561 134, -1, -1, 43, -1, 36, 18, -1, 562 -1, 49, 15, -1, 63, -1, -1, 6, /* 040-04F */ 563 -1, 44, 28, -1, -1, -1, -1, 52, 564 68, -1, -1, 62, -1, -1, -1, -1, /* 050-05F */ 565 -1, 26, 106, -1, 64, -1, -1, 2, 566 120, -1, -1, -1, -1, -1, -1, -1, /* 060-06F */ 567 116, -1, -1, -1, -1, -1, -1, -1, 568 -1, 58, 54, -1, -1, -1, -1, -1, /* 070-07F */ 569 135, -1, -1, 42, -1, 35, 17, -1, 570 -1, 45, 14, -1, 21, -1, -1, 5, /* 080-08F */ 571 -1, 27, -1, -1, 99, -1, -1, 3, 572 114, -1, -1, 20, -1, -1, -1, -1, /* 090-09F */ 573 -1, 23, 113, -1, 112, -1, -1, 51, 574 95, -1, -1, -1, -1, -1, -1, -1, /* 0A0-0AF */ 575 103, -1, -1, -1, -1, -1, -1, -1, 576 -1, 48, -1, -1, 73, -1, -1, -1, /* 0B0-0BF */ 577 -1, 22, 110, -1, 109, -1, -1, 9, 578 108, -1, -1, -1, -1, -1, -1, -1, /* 0C0-0CF */ 579 102, -1, -1, -1, -1, -1, -1, -1, 580 -1, -1, -1, -1, -1, -1, -1, -1, /* 0D0-0DF */ 581 98, -1, -1, -1, -1, -1, -1, -1, 582 -1, -1, -1, -1, -1, -1, -1, -1, /* 0E0-0EF */ 583 -1, -1, -1, -1, -1, -1, -1, -1, 584 56, -1, -1, -1, -1, -1, -1, -1, /* 0F0-0FF */ 585 136, -1, -1, 39, -1, 34, 105, -1, 586 -1, 30, 104, -1, 101, -1, -1, 4, /* 100-10F */ 587 -1, -1, 100, -1, 83, -1, -1, 12, 588 87, -1, -1, 57, -1, -1, -1, -1, /* 110-11F */ 589 -1, 97, 82, -1, 78, -1, -1, 1, 590 96, -1, -1, -1, -1, -1, -1, -1, /* 120-12F */ 591 94, -1, -1, -1, -1, -1, -1, -1, 592 -1, -1, 79, -1, 69, -1, -1, -1, /* 130-13F */ 593 -1, 93, 92, -1, 91, -1, -1, 8, 594 90, -1, -1, -1, -1, -1, -1, -1, /* 140-14F */ 595 89, -1, -1, -1, -1, -1, -1, -1, 596 -1, -1, -1, -1, -1, -1, -1, -1, /* 150-15F */ 597 86, -1, -1, -1, -1, -1, -1, -1, 598 -1, -1, -1, -1, -1, -1, -1, -1, /* 160-16F */ 599 -1, -1, -1, -1, -1, -1, -1, -1, 600 60, -1, -1, -1, -1, -1, -1, -1, /* 170-17F */ 601 -1, 88, 85, -1, 84, -1, -1, 55, 602 81, -1, -1, -1, -1, -1, -1, -1, /* 180-18F */ 603 77, -1, -1, -1, -1, -1, -1, -1, 604 -1, -1, -1, -1, -1, -1, -1, -1, /* 190-19F */ 605 74, -1, -1, -1, -1, -1, -1, -1, 606 -1, -1, -1, -1, -1, -1, -1, -1, /* 1A0-1AF */ 607 -1, 70, 107, -1, 65, -1, -1, -1, 608 127, -1, -1, -1, -1, -1, -1, -1, /* 1B0-1BF */ 609 80, -1, -1, 72, -1, 119, 118, -1, 610 -1, 126, 76, -1, 125, -1, -1, -1, /* 1C0-1CF */ 611 -1, 115, 124, -1, 75, -1, -1, -1, 612 61, -1, -1, -1, -1, -1, -1, -1, /* 1D0-1DF */ 613 -1, 123, 122, -1, 121, -1, -1, -1, 614 117, -1, -1, -1, -1, -1, -1, -1, /* 1E0-1EF */ 615 111, -1, -1, -1, -1, -1, -1, -1, 616 -1, -1, -1, -1, -1, -1, -1, -1 /* 1F0-1FF */ 617 }; 618 619 int msynd2bit [] = { /* msynd 0-F */ 620 -1, 140, 141, -1, 621 142, -1, -1, 137, 622 143, -1, -1, 138, 623 -1, 139, -1, -1 624 }; 625 626 int 627 cmd_synd2upos(uint16_t syndrome) { 628 return (esynd2bit[syndrome]); 629 } 630 631 const char *fmd_fmri_get_platform(); 632 633 #define DP_MAX 25 634 635 const char *slotname[] = { 636 "Slot A", "Slot B", "Slot C", "Slot D"}; 637 638 typedef struct fault_info { 639 uint32_t id; 640 int count; 641 } fault_info_t; 642 643 struct plat2id_map { 644 char *platnm; 645 int id; 646 } id_plat[] = { 647 {"SUNW,Sun-Fire-15000", 1}, 648 {"SUNW,Sun-Fire", 2}, 649 {"SUNW,Netra-T12", 2}, 650 {"SUNW,Sun-Fire-480R", 3}, 651 {"SUNW,Sun-Fire-V490", 3}, 652 {"SUNW,Sun-Fire-V440", 3}, 653 {"SUNW,Sun-Fire-V445", 3}, 654 {"SUNW,Netra-440", 3}, 655 {"SUNW,Sun-Fire-880", 4}, 656 {"SUNW,Sun-Fire-V890", 4}, 657 {NULL, 0} 658 }; 659 660 /*ARGSUSED*/ 661 void 662 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class) 663 { 664 *addr = afar; 665 } 666 667 /*ARGSUSED*/ 668 int 669 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2) 670 { 671 return (1); 672 } 673 674 static int 675 cmd_get_platform() 676 { 677 const char *platname; 678 int id = -1; 679 int i; 680 681 platname = fmd_fmri_get_platform(); 682 for (i = 0; id_plat[i].platnm != NULL; i++) { 683 if (strcmp(platname, id_plat[i].platnm) == 0) { 684 id = id_plat[i].id; 685 break; 686 } 687 } 688 return (id); 689 } 690 691 static int 692 cmd_get_boardid(uint32_t cpuid) 693 { 694 int boardid; 695 int id = cmd_get_platform(); 696 697 switch (id) { 698 case 1: 699 boardid = ((cpuid >> 5) & 0x1f); 700 break; 701 case 2: 702 boardid = ((cpuid & 0x1f) / 4); 703 break; 704 705 case 3: 706 cpuid = cpuid & 0x07; 707 boardid = ((cpuid % 2) == 0) ? 0 : 1; 708 break; 709 case 4: 710 cpuid = cpuid & 0x07; 711 if ((cpuid % 2) == 0) 712 boardid = (cpuid < 4) ? 0 : 2; 713 else 714 boardid = (cpuid < 5) ? 1 : 3; 715 break; 716 default: 717 boardid = 5; 718 break; 719 } 720 721 return (boardid); 722 } 723 724 static void 725 cmd_get_faulted_comp(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2, 726 uint16_t upos, fault_info_t **fault_list, int cpu) 727 { 728 cmd_mq_t *ip; 729 int i, j, k, idj; 730 uint32_t id; 731 uint32_t *cpuid = NULL; 732 int max_rpt; 733 734 max_rpt = 2 * cmd.cmd_nupos; 735 736 cpuid = fmd_hdl_alloc(hdl, max_rpt * sizeof (uint32_t), FMD_SLEEP); 737 738 if (cpuid == NULL) 739 return; 740 741 for (i = 0, j = 0; i < CMD_MAX_CKWDS; i++) { 742 for (ip = cmd_list_next(&d1->mq_root[i]); ip != NULL; 743 ip = cmd_list_next(ip)) { 744 if (upos == ip->mq_unit_position) { 745 cpuid[j] = ip->mq_cpuid; 746 j++; 747 } 748 if (j >= cmd.cmd_nupos) 749 break; 750 } 751 if (j >= cmd.cmd_nupos) 752 break; 753 } 754 755 for (i = 0; i < CMD_MAX_CKWDS; i++) { 756 for (ip = cmd_list_next(&d2->mq_root[i]); ip != NULL; 757 ip = cmd_list_next(ip)) { 758 if (upos == ip->mq_unit_position) { 759 cpuid[j] = ip->mq_cpuid; 760 j++; 761 } 762 if (j >= max_rpt) 763 break; 764 } 765 if (j >= max_rpt) 766 break; 767 } 768 769 for (i = 0, k = 0; i < max_rpt; i++) { 770 if (cpuid[i] == ULONG_MAX) 771 continue; 772 id = (cpu == 0) ? cmd_get_boardid(cpuid[i]) : cpuid[i]; 773 fault_list[k] = fmd_hdl_alloc(hdl, 774 sizeof (fault_info_t), FMD_SLEEP); 775 if (fault_list[k] == NULL) 776 break; 777 fault_list[k]->count = 1; 778 fault_list[k]->id = id; 779 for (j = i + 1; j < max_rpt; j++) { 780 if (cpuid[j] == ULONG_MAX) 781 continue; 782 idj = (cpu == 0) ? cmd_get_boardid(cpuid[j]) : cpuid[j]; 783 if (id == idj) { 784 fault_list[k]->count++; 785 cpuid[j] = ULONG_MAX; 786 } 787 } 788 k++; 789 } 790 791 fmd_hdl_free(hdl, cpuid, max_rpt * sizeof (uint32_t)); 792 } 793 794 /*ARGSUSED*/ 795 static nvlist_t * 796 cmd_board_mkfru(fmd_hdl_t *hdl, char *frustr) 797 { 798 nvlist_t *hcel, *fru; 799 int err; 800 801 if (frustr == NULL) 802 return (NULL); 803 804 if (nvlist_alloc(&hcel, NV_UNIQUE_NAME, 0) != 0) 805 return (NULL); 806 807 err = nvlist_add_string(hcel, FM_FMRI_HC_NAME, 808 FM_FMRI_LEGACY_HC); 809 err |= nvlist_add_string(hcel, FM_FMRI_HC_ID, frustr); 810 if (err != 0) { 811 nvlist_free(hcel); 812 return (NULL); 813 } 814 815 if (nvlist_alloc(&fru, NV_UNIQUE_NAME, 0) != 0) { 816 nvlist_free(hcel); 817 return (NULL); 818 } 819 err = nvlist_add_uint8(fru, FM_VERSION, FM_HC_SCHEME_VERSION); 820 err |= nvlist_add_string(fru, FM_FMRI_SCHEME, 821 FM_FMRI_SCHEME_HC); 822 err |= nvlist_add_string(fru, FM_FMRI_HC_ROOT, ""); 823 err |= nvlist_add_uint32(fru, FM_FMRI_HC_LIST_SZ, 1); 824 err |= nvlist_add_nvlist_array(fru, FM_FMRI_HC_LIST, &hcel, 1); 825 if (err != 0) { 826 nvlist_free(fru); 827 nvlist_free(hcel); 828 return (NULL); 829 } 830 nvlist_free(hcel); 831 return (fru); 832 } 833 834 /* 835 * Startcat, Serengeti, V4xx, and V8xx: fault the system boards of 836 * the detectors in proportion to the number of ereports out of 8 837 * Other systems: fault the detectors in proportion to the number of 838 * ereports out of 8 839 */ 840 void 841 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2, 842 uint16_t upos, nvlist_t *det) 843 { 844 char frustr[DP_MAX]; 845 fmd_case_t *cp; 846 int i, ratio, type, fault_cpu, max_rpt; 847 uint32_t id; 848 uint8_t cpumask; 849 char *cpustr; 850 fault_info_t **fault_list = NULL; 851 nvlist_t *fru = NULL, *asru = NULL, *flt = NULL; 852 853 max_rpt = cmd.cmd_nupos * 2; 854 fault_list = fmd_hdl_alloc(hdl, 855 max_rpt * sizeof (fault_info_t *), FMD_SLEEP); 856 857 if (fault_list == NULL) 858 return; 859 860 for (i = 0; i < max_rpt; i++) 861 fault_list[i] = NULL; 862 863 type = cmd_get_platform(); 864 865 fault_cpu = (type == -1) ? 1 : 0; 866 867 cmd_get_faulted_comp(hdl, d1, d2, upos, fault_list, fault_cpu); 868 869 cp = fmd_case_open(hdl, NULL); 870 871 for (i = 0; i < max_rpt; i++) { 872 if (fault_list[i] == NULL) 873 continue; 874 id = fault_list[i]->id; 875 876 switch (type) { 877 case 1: 878 (void) snprintf(frustr, DP_MAX, "EX%d", id); 879 break; 880 case 2: 881 (void) snprintf(frustr, DP_MAX, "/N0/SB%d", id); 882 break; 883 case 3: 884 case 4: 885 (void) snprintf(frustr, DP_MAX, slotname[id]); 886 break; 887 default: 888 cpustr = cmd_cpu_getfrustr_by_id(hdl, id); 889 if (nvlist_lookup_uint8(det, FM_FMRI_CPU_MASK, &cpumask) 890 == 0) { 891 asru = cmd_cpu_fmri_create(id, cpumask); 892 (void) fmd_nvl_fmri_expand(hdl, asru); 893 } 894 break; 895 } 896 897 ratio = (fault_list[i]->count * 100) / (cmd.cmd_nupos * 2); 898 899 if (fault_cpu) { 900 fru = cmd_cpu_mkfru(hdl, cpustr, NULL, NULL); 901 fmd_hdl_strfree(hdl, cpustr); 902 if (fru == NULL) { 903 nvlist_free(asru); 904 break; 905 } 906 flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath", 907 ratio, asru, fru, asru); 908 nvlist_free(asru); 909 } else { 910 fru = cmd_board_mkfru(hdl, frustr); 911 if (fru == NULL) 912 break; 913 flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath", 914 ratio, fru, fru, fru); 915 } 916 917 fmd_case_add_suspect(hdl, cp, flt); 918 919 /* free up memory */ 920 nvlist_free(fru); 921 } 922 923 fmd_case_solve(hdl, cp); 924 925 for (i = 0; i < max_rpt; i++) { 926 if (fault_list[i] != NULL) 927 fmd_hdl_free(hdl, fault_list[i], sizeof (fault_info_t)); 928 } 929 930 fmd_hdl_free(hdl, fault_list, sizeof (fault_info_t *) * max_rpt); 931 } 932