1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Ereport-handling routines for memory errors 27 */ 28 29 #include <cmd_mem.h> 30 #include <cmd_dimm.h> 31 #include <cmd_bank.h> 32 #include <cmd_page.h> 33 #include <cmd_cpu.h> 34 #include <cmd_branch.h> 35 #include <cmd_state.h> 36 #include <cmd.h> 37 #include <cmd_hc_sun4v.h> 38 39 #include <assert.h> 40 #include <strings.h> 41 #include <string.h> 42 #include <errno.h> 43 #include <unistd.h> 44 #include <fm/fmd_api.h> 45 #include <sys/fm/ldom.h> 46 #include <sys/fm/protocol.h> 47 48 #include <sys/fm/cpu/UltraSPARC-T1.h> 49 #include <sys/mdesc.h> 50 #include <sys/async.h> 51 #include <sys/errclassify.h> 52 #include <sys/niagararegs.h> 53 #include <sys/fm/ldom.h> 54 #include <ctype.h> 55 56 #define VF_TS3_FCR 0x000000000000FFFFULL 57 #define VF_L2ESYR_C2C 0x8000000000000000ULL 58 #define OFFBIT 0xFFFFFFFFFFFC07FFULL 59 #define BIT28_32 0x00000001F0000000ULL 60 #define BIT13_17 0x000000000003E000ULL 61 #define BIT18_19 0x00000000000C0000ULL 62 #define BIT11_12 0x0000000000001800ULL 63 #define UTS2_CPUS_PER_CHIP 64 64 #define FBR_ERROR ".fbr" 65 #define DSU_ERROR ".dsu" 66 #define FERG_INVALID ".invalid" 67 #define DBU_ERROR ".dbu" 68 69 extern ldom_hdl_t *cpumem_diagnosis_lhp; 70 71 static fmd_hdl_t *cpumem_hdl = NULL; 72 73 #define ERR_CLASS(x, y) (strcmp(strrchr(x, '.'), y)) 74 75 static void * 76 cpumem_alloc(size_t size) 77 { 78 assert(cpumem_hdl != NULL); 79 80 return (fmd_hdl_alloc(cpumem_hdl, size, FMD_SLEEP)); 81 } 82 83 static void 84 cpumem_free(void *addr, size_t size) 85 { 86 assert(cpumem_hdl != NULL); 87 88 fmd_hdl_free(cpumem_hdl, addr, size); 89 } 90 91 /*ARGSUSED*/ 92 cmd_evdisp_t 93 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status, 94 uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu) 95 { 96 /* 97 * Niagara writebacks from L2 containing UEs are placed in memory 98 * with the poison syndrome NI_DRAM_POISON_SYND_FROM_LDWU. 99 * Memory UE ereports showing this syndrome are dropped because they 100 * indicate an L2 problem, which should be diagnosed from the 101 * corresponding L2 cache ereport. 102 */ 103 switch (cpu->cpu_type) { 104 case CPU_ULTRASPARC_T1: 105 if (synd == NI_DRAM_POISON_SYND_FROM_LDWU) { 106 fmd_hdl_debug(hdl, 107 "discarding UE due to magic syndrome %x\n", 108 synd); 109 return (CMD_EVD_UNUSED); 110 } 111 break; 112 case CPU_ULTRASPARC_T2: 113 case CPU_ULTRASPARC_T2plus: 114 if (synd == N2_DRAM_POISON_SYND_FROM_LDWU) { 115 fmd_hdl_debug(hdl, 116 "discarding UE due to magic syndrome %x\n", 117 synd); 118 return (CMD_EVD_UNUSED); 119 } 120 break; 121 default: 122 break; 123 } 124 return (CMD_EVD_OK); 125 } 126 127 static int 128 cpu_present(fmd_hdl_t *hdl, nvlist_t *asru, uint32_t *cpuid) 129 { 130 nvlist_t *cp_asru; 131 uint32_t i; 132 133 if (nvlist_dup(asru, &cp_asru, 0) != 0) { 134 fmd_hdl_debug(hdl, "unable to alloc asru for thread\n"); 135 return (-1); 136 } 137 138 for (i = *cpuid; i < *cpuid + UTS2_CPUS_PER_CHIP; i++) { 139 140 (void) nvlist_remove_all(cp_asru, FM_FMRI_CPU_ID); 141 142 if (nvlist_add_uint32(cp_asru, FM_FMRI_CPU_ID, i) == 0) { 143 if (fmd_nvl_fmri_present(hdl, cp_asru) && 144 !fmd_nvl_fmri_unusable(hdl, cp_asru)) { 145 nvlist_free(cp_asru); 146 *cpuid = i; 147 return (0); 148 } 149 } 150 } 151 nvlist_free(cp_asru); 152 return (-1); 153 } 154 155 /*ARGSUSED*/ 156 cmd_evdisp_t 157 cmd_c2c(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 158 cmd_errcl_t clcode) 159 { 160 uint32_t cpuid; 161 nvlist_t *det; 162 int rc; 163 164 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det); 165 if (nvlist_lookup_uint32(det, FM_FMRI_CPU_ID, &cpuid) == 0) { 166 167 /* 168 * If the c2c bit is set, the sending cache of the 169 * cpu must be faulted instead of the memory. 170 * If the detector is chip0, the cache of the chip1 171 * is faulted and vice versa. 172 */ 173 if (cpuid < UTS2_CPUS_PER_CHIP) 174 cpuid = UTS2_CPUS_PER_CHIP; 175 else 176 cpuid = 0; 177 178 rc = cpu_present(hdl, det, &cpuid); 179 180 if (rc != -1) { 181 (void) nvlist_remove(det, FM_FMRI_CPU_ID, 182 DATA_TYPE_UINT32); 183 if (nvlist_add_uint32(det, 184 FM_FMRI_CPU_ID, cpuid) == 0) { 185 clcode |= CMD_CPU_LEVEL_CHIP; 186 return (cmd_l2u(hdl, ep, nvl, class, clcode)); 187 } 188 189 } 190 } 191 fmd_hdl_debug(hdl, "cmd_c2c: no cpuid discarding C2C error"); 192 return (CMD_EVD_BAD); 193 } 194 195 /* 196 * sun4v's xe_common routine has an extra argument, clcode, compared 197 * to routine of same name in sun4u. 198 */ 199 200 static cmd_evdisp_t 201 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 202 const char *class, cmd_errcl_t clcode, cmd_xe_handler_f *hdlr) 203 { 204 uint64_t afar, l2_afar, dram_afar; 205 uint64_t l2_afsr, dram_afsr, l2_esyr; 206 uint16_t synd; 207 uint8_t afar_status, synd_status; 208 nvlist_t *rsrc; 209 char *typenm; 210 uint64_t disp = 0; 211 int minorvers = 1; 212 213 if (nvlist_lookup_uint64(nvl, 214 FM_EREPORT_PAYLOAD_NAME_L2_AFSR, &l2_afsr) != 0 && 215 nvlist_lookup_uint64(nvl, 216 FM_EREPORT_PAYLOAD_NAME_L2_ESR, &l2_afsr) != 0) 217 return (CMD_EVD_BAD); 218 219 if (nvlist_lookup_uint64(nvl, 220 FM_EREPORT_PAYLOAD_NAME_DRAM_AFSR, &dram_afsr) != 0 && 221 nvlist_lookup_uint64(nvl, 222 FM_EREPORT_PAYLOAD_NAME_DRAM_ESR, &dram_afsr) != 0) 223 return (CMD_EVD_BAD); 224 225 if (nvlist_lookup_uint64(nvl, 226 FM_EREPORT_PAYLOAD_NAME_L2_AFAR, &l2_afar) != 0 && 227 nvlist_lookup_uint64(nvl, 228 FM_EREPORT_PAYLOAD_NAME_L2_EAR, &l2_afar) != 0) 229 return (CMD_EVD_BAD); 230 231 if (nvlist_lookup_uint64(nvl, 232 FM_EREPORT_PAYLOAD_NAME_DRAM_AFAR, &dram_afar) != 0 && 233 nvlist_lookup_uint64(nvl, 234 FM_EREPORT_PAYLOAD_NAME_DRAM_EAR, &dram_afar) != 0) 235 return (CMD_EVD_BAD); 236 237 if (nvlist_lookup_pairs(nvl, 0, 238 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm, 239 FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc, 240 NULL) != 0) 241 return (CMD_EVD_BAD); 242 243 synd = dram_afsr; 244 245 /* 246 * Niagara afar and synd validity. 247 * For a given set of error registers, the payload value is valid if 248 * no higher priority error status bit is set. See UltraSPARC-T1.h for 249 * error status bit values and priority settings. Note that for DAC 250 * and DAU, afar value is taken from l2 error registers, syndrome 251 * from dram error * registers; for DSC and DSU, both afar and 252 * syndrome are taken from dram * error registers. DSU afar and 253 * syndrome are always valid because no 254 * higher priority error will override. 255 */ 256 switch (clcode) { 257 case CMD_ERRCL_DAC: 258 afar = l2_afar; 259 afar_status = ((l2_afsr & NI_L2AFSR_P10) == 0) ? 260 AFLT_STAT_VALID : AFLT_STAT_INVALID; 261 synd_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ? 262 AFLT_STAT_VALID : AFLT_STAT_INVALID; 263 break; 264 case CMD_ERRCL_DSC: 265 afar = dram_afar; 266 afar_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ? 267 AFLT_STAT_VALID : AFLT_STAT_INVALID; 268 synd_status = afar_status; 269 break; 270 case CMD_ERRCL_DAU: 271 afar = l2_afar; 272 afar_status = ((l2_afsr & NI_L2AFSR_P05) == 0) ? 273 AFLT_STAT_VALID : AFLT_STAT_INVALID; 274 synd_status = AFLT_STAT_VALID; 275 276 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESYR, 277 &l2_esyr) == 0) { 278 if (l2_esyr & VF_L2ESYR_C2C) { 279 return (cmd_c2c(hdl, ep, nvl, class, clcode)); 280 } 281 } 282 break; 283 case CMD_ERRCL_DSU: 284 afar = dram_afar; 285 afar_status = synd_status = AFLT_STAT_VALID; 286 break; 287 default: 288 fmd_hdl_debug(hdl, "Niagara unrecognized mem error %llx\n", 289 clcode); 290 return (CMD_EVD_UNUSED); 291 } 292 293 return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd, 294 synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc)); 295 } 296 297 298 /*ARGSUSED*/ 299 cmd_evdisp_t 300 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 301 cmd_errcl_t clcode) 302 { 303 if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsc") == 0) 304 return (CMD_EVD_UNUSED); /* drop VF dsc's */ 305 else 306 return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common)); 307 } 308 309 /*ARGSUSED*/ 310 cmd_evdisp_t 311 cmd_ue_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 312 cmd_errcl_t clcode) 313 { 314 cmd_evdisp_t rc, rc1; 315 316 /* 317 * The DAU is cause of the DAU->DCDP/ICDP train: 318 * - process the cause of the event. 319 * - register the error to the nop event train, so the effected errors 320 * (DCDP/ICDP) will be dropped. 321 */ 322 rc = xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common); 323 324 rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP); 325 if (rc1 != 0) 326 fmd_hdl_debug(hdl, 327 "Fail to add error (%llx) to the train, rc = %d", 328 clcode, rc1); 329 330 return (rc); 331 } 332 333 /*ARGSUSED*/ 334 cmd_evdisp_t 335 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 336 cmd_errcl_t clcode) 337 { 338 if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsu") == 0) 339 /* 340 * VF dsu's need to be treated like branch errors, 341 * because we can't localize to a single DIMM or pair of 342 * DIMMs given missing/invalid parts of the dram-ear. 343 */ 344 return (cmd_fb(hdl, ep, nvl, class, clcode)); 345 else 346 return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common)); 347 } 348 349 /*ARGSUSED*/ 350 cmd_evdisp_t 351 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 352 cmd_errcl_t clcode) 353 { 354 return (CMD_EVD_UNUSED); 355 } 356 357 358 /*ARGSUSED*/ 359 cmd_evdisp_t 360 cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 361 cmd_errcl_t clcode) 362 { 363 cmd_branch_t *branch; 364 const char *uuid; 365 nvlist_t *asru, *det; 366 uint64_t ts3_fcr; 367 368 if (nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &asru) < 0) { 369 CMD_STAT_BUMP(bad_mem_asru); 370 return (NULL); 371 } 372 373 if (nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det) < 0) { 374 CMD_STAT_BUMP(bad_mem_asru); 375 return (NULL); 376 } 377 378 if (fmd_nvl_fmri_expand(hdl, det) < 0) { 379 fmd_hdl_debug(hdl, "Failed to expand detector"); 380 return (NULL); 381 } 382 383 branch = cmd_branch_lookup(hdl, asru); 384 if (branch == NULL) { 385 if ((branch = cmd_branch_create(hdl, asru)) == NULL) 386 return (CMD_EVD_UNUSED); 387 } 388 389 if (branch->branch_case.cc_cp != NULL && 390 fmd_case_solved(hdl, branch->branch_case.cc_cp)) { 391 fmd_hdl_debug(hdl, "Case solved\n"); 392 return (CMD_EVD_REDUND); 393 } 394 395 if (branch->branch_case.cc_cp == NULL) { 396 branch->branch_case.cc_cp = cmd_case_create(hdl, 397 &branch->branch_header, CMD_PTR_BRANCH_CASE, &uuid); 398 } 399 400 if (ERR_CLASS(class, FBR_ERROR) == 0) { 401 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_TS3_FCR, 402 &ts3_fcr) == 0 && (ts3_fcr != VF_TS3_FCR)) { 403 fmd_hdl_debug(hdl, 404 "Processing fbr with lane failover\n"); 405 cmd_branch_create_fault(hdl, branch, 406 "fault.memory.link-f", det); 407 408 } else { 409 fmd_hdl_debug(hdl, "Adding fbr event to serd engine\n"); 410 if (branch->branch_case.cc_serdnm == NULL) { 411 branch->branch_case.cc_serdnm = 412 cmd_mem_serdnm_create(hdl, 413 "branch", branch->branch_unum); 414 415 fmd_serd_create(hdl, 416 branch->branch_case.cc_serdnm, 417 fmd_prop_get_int32(hdl, "fbr_n"), 418 fmd_prop_get_int64(hdl, "fbr_t")); 419 } 420 421 if (fmd_serd_record(hdl, 422 branch->branch_case.cc_serdnm, ep) == FMD_B_FALSE) 423 return (CMD_EVD_OK); /* engine hasn't fired */ 424 425 fmd_hdl_debug(hdl, "fbr serd fired\n"); 426 427 fmd_case_add_serd(hdl, branch->branch_case.cc_cp, 428 branch->branch_case.cc_serdnm); 429 430 cmd_branch_create_fault(hdl, branch, 431 "fault.memory.link-c", det); 432 } 433 } else if (ERR_CLASS(class, DSU_ERROR) == 0) { 434 fmd_hdl_debug(hdl, "Processing dsu event"); 435 cmd_branch_create_fault(hdl, branch, "fault.memory.bank", det); 436 } else { 437 fmd_hdl_debug(hdl, "Processing fbu event"); 438 cmd_branch_create_fault(hdl, branch, "fault.memory.link-u", 439 det); 440 } 441 442 branch->branch_flags |= CMD_MEM_F_FAULTING; 443 444 if (branch->branch_case.cc_serdnm != NULL) { 445 fmd_serd_destroy(hdl, branch->branch_case.cc_serdnm); 446 fmd_hdl_strfree(hdl, branch->branch_case.cc_serdnm); 447 branch->branch_case.cc_serdnm = NULL; 448 } 449 450 fmd_case_add_ereport(hdl, branch->branch_case.cc_cp, ep); 451 fmd_case_solve(hdl, branch->branch_case.cc_cp); 452 cmd_branch_dirty(hdl, branch); 453 454 return (CMD_EVD_OK); 455 } 456 457 /*ARGSUSED*/ 458 cmd_evdisp_t 459 cmd_fb_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 460 cmd_errcl_t clcode) 461 { 462 cmd_evdisp_t rc, rc1; 463 464 /* 465 * The FBU is cause of the FBU->DCDP/ICDP train: 466 * - process the cause of the event. 467 * - register the error to the nop event train, so the effected errors 468 * (DCDP/ICDP) will be dropped. 469 */ 470 rc = cmd_fb(hdl, ep, nvl, class, clcode); 471 472 rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP); 473 if (rc1 != 0) 474 fmd_hdl_debug(hdl, 475 "Fail to add error (%llx) to the train, rc = %d", 476 clcode, rc1); 477 478 return (rc); 479 } 480 481 482 /*ARGSUSED*/ 483 cmd_evdisp_t 484 cmd_fw_defect(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class, 485 cmd_errcl_t clcode) 486 { 487 const char *fltclass = NULL; 488 nvlist_t *rsc = NULL; 489 int solve = 0; 490 491 if ((rsc = init_mb(hdl)) == NULL) 492 return (CMD_EVD_UNUSED); 493 494 if (ERR_CLASS(class, FERG_INVALID) == 0) { 495 fltclass = "defect.fw.generic-sparc.erpt-gen"; 496 } else if (ERR_CLASS(class, DBU_ERROR) == 0) { 497 cmd_evdisp_t rc; 498 fltclass = "defect.fw.generic-sparc.addr-oob"; 499 /* 500 * add dbu to nop error train 501 */ 502 rc = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, 503 CMD_XR_HDLR_NOP); 504 if (rc != 0) 505 fmd_hdl_debug(hdl, 506 "Failed to add error (%llx) to the train, rc = %d", 507 clcode, rc); 508 } else { 509 fmd_hdl_debug(hdl, "Unexpected fw defect event %s", class); 510 } 511 512 if (fltclass) { 513 fmd_case_t *cp = NULL; 514 nvlist_t *fault = NULL; 515 516 fault = fmd_nvl_create_fault(hdl, fltclass, 100, NULL, 517 NULL, rsc); 518 if (fault != NULL) { 519 cp = fmd_case_open(hdl, NULL); 520 fmd_case_add_ereport(hdl, cp, ep); 521 fmd_case_add_suspect(hdl, cp, fault); 522 fmd_case_solve(hdl, cp); 523 solve = 1; 524 } 525 } 526 527 nvlist_free(rsc); 528 529 return (solve ? CMD_EVD_OK : CMD_EVD_UNUSED); 530 } 531 532 void 533 cmd_branch_close(fmd_hdl_t *hdl, void *arg) 534 { 535 cmd_branch_destroy(hdl, arg); 536 } 537 538 539 /*ARGSUSED*/ 540 ulong_t 541 cmd_mem_get_phys_pages(fmd_hdl_t *hdl) 542 { 543 /* 544 * Compute and return the total physical memory in pages from the 545 * MD/PRI. 546 * Cache its value. 547 */ 548 static ulong_t npage = 0; 549 md_t *mdp; 550 mde_cookie_t *listp; 551 uint64_t bmem, physmem = 0; 552 ssize_t bufsiz = 0; 553 uint64_t *bufp; 554 int num_nodes, nmblocks, i; 555 556 if (npage > 0) { 557 return (npage); 558 } 559 560 if (cpumem_hdl == NULL) { 561 cpumem_hdl = hdl; 562 } 563 564 if ((bufsiz = ldom_get_core_md(cpumem_diagnosis_lhp, &bufp)) <= 0) { 565 return (0); 566 } 567 if ((mdp = md_init_intern(bufp, cpumem_alloc, cpumem_free)) == NULL || 568 (num_nodes = md_node_count(mdp)) <= 0) { 569 cpumem_free(bufp, (size_t)bufsiz); 570 return (0); 571 } 572 573 listp = (mde_cookie_t *)cpumem_alloc(sizeof (mde_cookie_t) * 574 num_nodes); 575 nmblocks = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE, 576 md_find_name(mdp, "mblock"), 577 md_find_name(mdp, "fwd"), listp); 578 for (i = 0; i < nmblocks; i++) { 579 if (md_get_prop_val(mdp, listp[i], "size", &bmem) < 0) { 580 physmem = 0; 581 break; 582 } 583 physmem += bmem; 584 } 585 npage = (ulong_t)(physmem / cmd.cmd_pagesize); 586 587 cpumem_free(listp, sizeof (mde_cookie_t) * num_nodes); 588 cpumem_free(bufp, (size_t)bufsiz); 589 (void) md_fini(mdp); 590 591 return (npage); 592 } 593 594 static int galois_mul[16][16] = { 595 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 596 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* 0 */ 597 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, /* 1 */ 598 { 0, 2, 4, 6, 8, 10, 12, 14, 3, 1, 7, 5, 11, 9, 15, 13}, /* 2 */ 599 { 0, 3, 6, 5, 12, 15, 10, 9, 11, 8, 13, 14, 7, 4, 1, 2}, /* 3 */ 600 { 0, 4, 8, 12, 3, 7, 11, 15, 6, 2, 14, 10, 5, 1, 13, 9}, /* 4 */ 601 { 0, 5, 10, 15, 7, 2, 13, 8, 14, 11, 4, 1, 9, 12, 3, 6}, /* 5 */ 602 { 0, 6, 12, 10, 11, 13, 7, 1, 5, 3, 9, 15, 14, 8, 2, 4}, /* 6 */ 603 { 0, 7, 14, 9, 15, 8, 1, 6, 13, 10, 3, 4, 2, 5, 12, 11}, /* 7 */ 604 { 0, 8, 3, 11, 6, 14, 5, 13, 12, 4, 15, 7, 10, 2, 9, 1}, /* 8 */ 605 { 0, 9, 1, 8, 2, 11, 3, 10, 4, 13, 5, 12, 6, 15, 7, 14}, /* 9 */ 606 { 0, 10, 7, 13, 14, 4, 9, 3, 15, 5, 8, 2, 1, 11, 6, 12}, /* A */ 607 { 0, 11, 5, 14, 10, 1, 15, 4, 7, 12, 2, 9, 13, 6, 8, 3}, /* B */ 608 { 0, 12, 11, 7, 5, 9, 14, 2, 10, 6, 1, 13, 15, 3, 4, 8}, /* C */ 609 { 0, 13, 9, 4, 1, 12, 8, 5, 2, 15, 11, 6, 3, 14, 10, 7}, /* D */ 610 { 0, 14, 15, 1, 13, 3, 2, 12, 9, 7, 6, 8, 4, 10, 11, 5}, /* E */ 611 { 0, 15, 13, 2, 9, 6, 4, 11, 1, 14, 12, 3, 8, 7, 5, 10} /* F */ 612 }; 613 614 static int 615 galois_div(int num, int denom) { 616 int i; 617 618 for (i = 0; i < 16; i++) { 619 if (galois_mul[denom][i] == num) 620 return (i); 621 } 622 return (-1); 623 } 624 625 /* 626 * Data nibbles N0-N31 => 0-31 627 * check nibbles C0-3 => 32-35 628 */ 629 630 int 631 cmd_synd2upos(uint16_t syndrome) { 632 633 uint16_t s0, s1, s2, s3; 634 635 if (syndrome == 0) 636 return (-1); /* clean syndrome, not a CE */ 637 638 s0 = syndrome & 0xF; 639 s1 = (syndrome >> 4) & 0xF; 640 s2 = (syndrome >> 8) & 0xF; 641 s3 = (syndrome >> 12) & 0xF; 642 643 if (s3 == 0) { 644 if (s2 == 0 && s1 == 0) 645 return (32); /* 0 0 0 e => C0 */ 646 if (s2 == 0 && s0 == 0) 647 return (33); /* 0 0 e 0 => C1 */ 648 if (s1 == 0 && s0 == 0) 649 return (34); /* 0 e 0 0 => C2 */ 650 if (s2 == s1 && s1 == s0) 651 return (31); /* 0 d d d => N31 */ 652 return (-1); /* multibit error */ 653 } else if (s2 == 0) { 654 if (s1 == 0 && s0 == 0) 655 return (35); /* e 0 0 0 => C4 */ 656 if (s1 == 0 || s0 == 0) 657 return (-1); /* not a 0 b c */ 658 if (s3 != galois_div(galois_mul[s1][s1], s0)) 659 return (-1); /* check nibble not valid */ 660 return (galois_div(s0, s1) - 1); /* N0 - N14 */ 661 } else if (s1 == 0) { 662 if (s2 == 0 || s0 == 0) 663 return (-1); /* not a b 0 c */ 664 if (s3 != galois_div(galois_mul[s2][s2], s0)) 665 return (-1); /* check nibble not valid */ 666 return (galois_div(s0, s2) + 14); /* N15 - N29 */ 667 } else if (s0 == 0) { 668 if (s3 == s2 && s2 == s1) 669 return (30); /* d d d 0 => N30 */ 670 return (-1); 671 } else return (-1); 672 } 673 674 nvlist_t * 675 cmd_mem2hc(fmd_hdl_t *hdl, nvlist_t *mem_fmri) { 676 677 char **snp; 678 uint_t n; 679 680 if (nvlist_lookup_string_array(mem_fmri, FM_FMRI_HC_SERIAL_ID, 681 &snp, &n) != 0) 682 return (NULL); /* doesn't have serial id */ 683 684 return (cmd_find_dimm_by_sn(hdl, FM_FMRI_SCHEME_HC, *snp)); 685 } 686 687 /* 688 * formula to convert an unhashed address to hashed address 689 * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11]) 690 */ 691 void 692 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class) 693 { 694 695 if (strstr(class, "ultraSPARC-T1") != NULL) 696 *addr = afar; 697 else { 698 *addr = (afar & OFFBIT) | 699 ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17) | 700 ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12); 701 } 702 } 703 704 int 705 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2) 706 { 707 char *p, *q; 708 709 p = strstr(d1->dimm_unum, "CMP"); 710 q = strstr(d2->dimm_unum, "CMP"); 711 if (p != NULL && q != NULL) { 712 if (strncmp(p, q, 4) == 0) 713 return (1); 714 } 715 return (0); 716 } 717 718 /* 719 * fault the FRU of the common CMP 720 */ 721 /*ARGSUSED*/ 722 void 723 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2, 724 uint16_t upos, nvlist_t *det) 725 { 726 fmd_case_t *cp; 727 char *frustr; 728 nvlist_t *rsrc, *fltlist; 729 char *s; 730 char const *str1, *str2; 731 uint_t len, i; 732 733 s = strstr(d1->dimm_unum, "CMP"); 734 if (s == NULL) 735 return; 736 737 frustr = fmd_hdl_zalloc(hdl, strlen(d1->dimm_unum), FMD_SLEEP); 738 len = strlen(d1->dimm_unum) - strlen(s); 739 740 if (strncmp(d1->dimm_unum, d2->dimm_unum, len) != 0) { 741 for (i = 0, str1 = d1->dimm_unum, str2 = d2->dimm_unum; 742 *str1 == *str2 && i <= len; 743 str1++, str2++, i++) 744 ; 745 len = i; 746 } 747 748 (void) strncpy(frustr, d1->dimm_unum, len); 749 750 rsrc = cmd_mkboard_fru(hdl, frustr, NULL, NULL); 751 752 fmd_hdl_free(hdl, frustr, strlen(d1->dimm_unum)); 753 754 if (rsrc == NULL) 755 return; 756 757 (void) nvlist_add_nvlist(rsrc, FM_FMRI_AUTHORITY, cmd.cmd_auth); 758 759 cp = fmd_case_open(hdl, NULL); 760 761 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath", 100, 762 rsrc, NULL, rsrc); 763 764 fmd_case_add_suspect(hdl, cp, fltlist); 765 fmd_case_solve(hdl, cp); 766 767 nvlist_free(rsrc); 768 } 769