1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 26 /* 27 * Ereport-handling routines for memory errors 28 */ 29 30 #include <gmem_mem.h> 31 #include <gmem_dimm.h> 32 #include <gmem_page.h> 33 #include <gmem.h> 34 35 #include <strings.h> 36 #include <string.h> 37 #include <errno.h> 38 #include <assert.h> 39 #include <fm/fmd_api.h> 40 #include <fm/libtopo.h> 41 #include <sys/fm/protocol.h> 42 #include <sys/async.h> 43 #include <sys/errclassify.h> 44 45 #define OFFBIT 0xFFFFFFFFFFFC07FFULL 46 #define BIT28_32 0x00000001F0000000ULL 47 #define BIT13_17 0x000000000003E000ULL 48 #define BIT18_19 0x00000000000C0000ULL 49 #define BIT11_12 0x0000000000001800ULL 50 51 struct ce_name2type { 52 const char *name; 53 ce_dispact_t type; 54 }; 55 56 nvlist_t *fru_nvl; 57 58 static ce_dispact_t 59 gmem_mem_name2type(const char *name) 60 { 61 static const struct ce_name2type new[] = { 62 { "mem-unk", CE_DISP_UNKNOWN }, 63 { "mem-is", CE_DISP_INTERMITTENT }, 64 { "mem-cs", CE_DISP_PERS }, 65 { "mem-ss", CE_DISP_STICKY }, 66 { NULL } 67 }; 68 const struct ce_name2type *names = &new[0]; 69 const struct ce_name2type *tp; 70 71 for (tp = names; tp->name != NULL; tp++) { 72 if (strcasecmp(name, tp->name) == 0) 73 return (tp->type); 74 } 75 76 return (CE_DISP_UNKNOWN); 77 } 78 79 /*ARGSUSED*/ 80 static int 81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg) 82 { 83 nvlist_t *nvl = (nvlist_t *)arg; 84 nvlist_t *rsc = NULL, *fru = NULL; 85 nvlist_t **hcl, **topo_hcl; 86 uint_t n1, n2; 87 char *name, *name1, *name2; 88 char *id1, *id2; 89 int err, i; 90 91 if (topo_node_resource(node, &rsc, &err) < 0) 92 return (TOPO_WALK_NEXT); 93 94 err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1); 95 96 if (err != 0) { 97 nvlist_free(rsc); 98 return (TOPO_WALK_NEXT); 99 } 100 101 (void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name); 102 if (strcmp(name, "chip") != 0) { 103 nvlist_free(rsc); 104 return (TOPO_WALK_NEXT); 105 } 106 107 (void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2); 108 109 if (n1 != n2) { 110 nvlist_free(rsc); 111 return (TOPO_WALK_NEXT); 112 } 113 114 for (i = 0; i < n1; i++) { 115 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME, 116 &name1); 117 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1); 118 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2); 119 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2); 120 if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) { 121 nvlist_free(rsc); 122 return (TOPO_WALK_NEXT); 123 } 124 } 125 126 (void) topo_node_fru(node, &fru, NULL, &err); 127 if (fru != NULL) { 128 (void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME); 129 nvlist_free(fru); 130 } 131 nvlist_free(rsc); 132 return (TOPO_WALK_TERMINATE); 133 } 134 135 nvlist_t * 136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) { 137 topo_hdl_t *thp; 138 topo_walk_t *twp; 139 int err; 140 fru_nvl = NULL; 141 142 if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL) 143 return (NULL); 144 145 if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, 146 find_fault_fru, nvl, &err)) == NULL) { 147 fmd_hdl_topo_rele(hdl, thp); 148 return (NULL); 149 } 150 151 (void) topo_walk_step(twp, TOPO_WALK_CHILD); 152 topo_walk_fini(twp); 153 fmd_hdl_topo_rele(hdl, thp); 154 return (fru_nvl); 155 } 156 157 /* 158 * fault the FRU of the common detector between two DIMMs 159 */ 160 void 161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det) 162 { 163 char *name, *id; 164 nvlist_t **hcl1, **hcl; 165 uint_t n; 166 int i, j; 167 fmd_case_t *cp; 168 nvlist_t *fltlist, *rsrc; 169 nvlist_t *fru = NULL; 170 171 if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0) 172 return; 173 174 for (i = 0; i < n; i++) { 175 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name); 176 if (strcmp(name, "chip") == 0) 177 break; 178 } 179 180 n = i + 1; 181 hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP); 182 if (hcl == NULL) 183 return; 184 185 for (i = 0; i < n; i++) { 186 (void) nvlist_alloc(&hcl[i], 187 NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0); 188 } 189 190 for (i = 0, j = 0; i < n; i++) { 191 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name); 192 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id); 193 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name); 194 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id); 195 j++; 196 if (strcmp(name, "chip") == 0) 197 break; 198 } 199 200 if (nvlist_alloc(&rsrc, NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) { 201 for (i = 0; i < n; i++) { 202 nvlist_free(hcl[i]); 203 } 204 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n); 205 } 206 207 if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 || 208 nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 || 209 nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 || 210 nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 || 211 nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) { 212 for (i = 0; i < n; i++) { 213 nvlist_free(hcl[i]); 214 } 215 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n); 216 nvlist_free(rsrc); 217 } 218 219 fru = gmem_find_fault_fru(hdl, rsrc); 220 if (fru != NULL) { 221 cp = fmd_case_open(hdl, NULL); 222 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath", 223 100, fru, fru, fru); 224 fmd_case_add_suspect(hdl, cp, fltlist); 225 fmd_case_solve(hdl, cp); 226 nvlist_free(fru); 227 } 228 229 for (i = 0; i < n; i++) { 230 nvlist_free(hcl[i]); 231 } 232 233 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n); 234 nvlist_free(rsrc); 235 } 236 237 /* 238 * formula to conver an unhashed address to hashed address 239 * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11]) 240 */ 241 static void 242 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar) 243 { 244 245 *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17) 246 | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12); 247 } 248 249 /* 250 * check if a dimm has n CEs that have the same symbol-in-error 251 */ 252 int 253 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold) 254 { 255 int i; 256 gmem_mq_t *ip, *next; 257 int count = 0; 258 259 for (i = 0; i < GMEM_MAX_CKWDS; i++) { 260 for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL; 261 ip = next) { 262 next = gmem_list_next(ip); 263 if (ip->mq_unit_position == upos) { 264 count++; 265 if (count >= threshold) 266 return (1); 267 } 268 } 269 } 270 return (0); 271 } 272 273 /* 274 * check if smaller number of retired pages > 1/16 of larger number of 275 * retired pages 276 */ 277 int 278 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2) 279 { 280 uint_t sret, lret; 281 double ratio; 282 283 sret = lret = 0; 284 285 if (d2->dimm_nretired < d1->dimm_nretired) { 286 sret = d2->dimm_nretired; 287 lret = d1->dimm_nretired; 288 } else if (d2->dimm_nretired > d1->dimm_nretired) { 289 sret = d1->dimm_nretired; 290 lret = d2->dimm_nretired; 291 } else 292 return (0); 293 294 ratio = lret * GMEM_MQ_RATIO; 295 296 if (sret > ratio) { 297 fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f", 298 sret, lret, ratio); 299 return (1); 300 } 301 return (0); 302 } 303 304 /* 305 * check bad rw on any two DIMMs. The check succeeds if 306 * - each DIMM has a n CEs which have the same symbol-in-error, 307 * - the smaller number of retired pages > 1/16 larger number of retired pages 308 */ 309 static int 310 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2, 311 uint16_t *rupos) 312 { 313 int i; 314 gmem_mq_t *ip, *next; 315 uint16_t upos; 316 317 for (i = 0; i < GMEM_MAX_CKWDS; i++) { 318 for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL; 319 ip = next) { 320 next = gmem_list_next(ip); 321 upos = ip->mq_unit_position; 322 if (upos_thresh_check(d1, upos, gmem.gm_nupos)) { 323 if (upos_thresh_check(d2, upos, 324 gmem.gm_nupos)) { 325 if (check_bad_rw_retired_pages(hdl, 326 d1, d2)) { 327 *rupos = upos; 328 return (1); 329 } 330 } 331 } 332 } 333 } 334 335 return (0); 336 } 337 338 static void 339 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm) 340 { 341 gmem_dimm_t *d, *next; 342 uint16_t upos; 343 344 for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) { 345 next = gmem_list_next(d); 346 if (d == ce_dimm) 347 continue; 348 if (!gmem_same_datapath_dimms(hdl, ce_dimm, d)) 349 continue; 350 if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) { 351 gmem_gen_datapath_fault(hdl, det); 352 gmem_save_symbol_error(hdl, ce_dimm, upos); 353 fmd_hdl_debug(hdl, 354 "check_bad_rw_dimms succeeded: %s %s\n", 355 ce_dimm->dimm_serial, d->dimm_serial); 356 return; 357 } 358 } 359 } 360 361 /* 362 * rule 5a checking. The check succeeds if 363 * - nretired >= 512 364 * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB 365 */ 366 static void 367 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm) 368 { 369 nvlist_t *flt, *rsrc; 370 fmd_case_t *cp; 371 uint_t nret; 372 uint64_t delta_addr = 0; 373 374 if (dimm->dimm_flags & GMEM_F_FAULTING) 375 return; 376 377 nret = dimm->dimm_nretired; 378 379 if (nret < gmem.gm_low_ce_thresh) 380 return; 381 382 if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low) 383 delta_addr = 384 (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) / 385 (nret - 1); 386 387 if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) { 388 389 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret); 390 dimm->dimm_flags |= GMEM_F_FAULTING; 391 gmem_dimm_dirty(hdl, dimm); 392 393 cp = fmd_case_open(hdl, NULL); 394 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial); 395 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES, 396 GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc); 397 fmd_case_add_suspect(hdl, cp, flt); 398 fmd_case_solve(hdl, cp); 399 nvlist_free(rsrc); 400 } 401 } 402 403 /* 404 * rule 5b checking. The check succeeds if more than 120 405 * non-intermittent CEs are reported against one symbol 406 * position of one afar in 72 hours 407 */ 408 static void 409 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm) 410 { 411 nvlist_t *flt, *rsrc; 412 fmd_case_t *cp; 413 gmem_mq_t *ip, *next; 414 int cw; 415 416 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) { 417 for (ip = gmem_list_next(&dimm->mq_root[cw]); 418 ip != NULL; ip = next) { 419 next = gmem_list_next(ip); 420 if (ip->mq_dupce_count >= gmem.gm_dupce) { 421 fmd_hdl_debug(hdl, 422 "mq_5b_check succeeded: duplicate CE=%d", 423 ip->mq_dupce_count); 424 cp = fmd_case_open(hdl, NULL); 425 rsrc = gmem_find_dimm_rsc(hdl, 426 dimm->dimm_serial); 427 flt = fmd_nvl_create_fault(hdl, 428 GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF, 429 NULL, gmem_dimm_fru(dimm), rsrc); 430 dimm->dimm_flags |= GMEM_F_FAULTING; 431 gmem_dimm_dirty(hdl, dimm); 432 fmd_case_add_suspect(hdl, cp, flt); 433 fmd_case_solve(hdl, cp); 434 nvlist_free(rsrc); 435 return; 436 } 437 } 438 } 439 } 440 441 /* 442 * delete the expired duplicate CE time stamps 443 */ 444 static void 445 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now) 446 { 447 tstamp_t *tsp, *next; 448 449 for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL; 450 tsp = next) { 451 next = gmem_list_next(tsp); 452 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) { 453 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l); 454 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t)); 455 ip->mq_dupce_count--; 456 } 457 } 458 } 459 460 static void 461 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now) 462 { 463 tstamp_t *tsp; 464 465 ip->mq_tstamp = now; 466 ip->mq_ep = ep; 467 if (fmd_serd_exists(hdl, ip->mq_serdnm)) 468 fmd_serd_destroy(hdl, ip->mq_serdnm); 469 470 fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT); 471 (void) fmd_serd_record(hdl, ip->mq_serdnm, ep); 472 473 tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP); 474 tsp->tstamp = now; 475 gmem_list_append(&ip->mq_dupce_tstamp, tsp); 476 ip->mq_dupce_count++; 477 } 478 479 /* 480 * Create a fresh index block for MQSC CE correlation. 481 */ 482 gmem_mq_t * 483 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep, 484 uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now) 485 { 486 gmem_mq_t *cp; 487 tstamp_t *tsp; 488 489 cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP); 490 cp->mq_tstamp = now; 491 cp->mq_ckwd = ckwd; 492 cp->mq_phys_addr = afar; 493 cp->mq_unit_position = upos; 494 cp->mq_ep = ep; 495 cp->mq_serdnm = 496 gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos); 497 498 tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP); 499 tsp->tstamp = now; 500 gmem_list_append(&cp->mq_dupce_tstamp, tsp); 501 cp->mq_dupce_count = 1; 502 503 /* 504 * Create SERD to keep this event from being removed 505 * by fmd which may not know there is an event pointer 506 * saved here. This SERD is *never* meant to fire. 507 */ 508 if (fmd_serd_exists(hdl, cp->mq_serdnm)) 509 fmd_serd_destroy(hdl, cp->mq_serdnm); 510 511 fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT); 512 (void) fmd_serd_record(hdl, cp->mq_serdnm, ep); 513 514 return (cp); 515 } 516 517 gmem_mq_t * 518 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip) 519 { 520 gmem_mq_t *jp = gmem_list_next(ip); 521 tstamp_t *tsp, *next; 522 523 524 if (ip->mq_serdnm != NULL) { 525 if (fmd_serd_exists(hdl, ip->mq_serdnm)) 526 fmd_serd_destroy(hdl, ip->mq_serdnm); 527 fmd_hdl_strfree(hdl, ip->mq_serdnm); 528 ip->mq_serdnm = NULL; 529 } 530 531 for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL; 532 tsp = next) { 533 next = gmem_list_next(tsp); 534 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l); 535 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t)); 536 } 537 538 gmem_list_delete(lp, &ip->mq_l); 539 fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t)); 540 541 return (jp); 542 } 543 544 545 /* 546 * Add an index block for a new CE, sorted 547 * a) by ascending unit position 548 * b) order of arrival (~= time order) 549 */ 550 void 551 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep, 552 uint64_t afar, uint16_t unit_position, uint16_t ckwd, 553 uint64_t now) 554 { 555 gmem_mq_t *ip, *jp; 556 int cw = (int)ckwd; 557 558 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) { 559 if (ip->mq_unit_position > unit_position) { 560 /* list is in unit position order */ 561 break; 562 } else if (ip->mq_unit_position == unit_position && 563 ip->mq_phys_addr == afar) { 564 /* 565 * Found a duplicate cw, unit_position, and afar. 566 * Delete this node, to be superseded by the new 567 * node added below. 568 * update the mq_t structure 569 */ 570 mq_update(hdl, ep, ip, now); 571 return; 572 } else { 573 ip = gmem_list_next(ip); 574 } 575 } 576 577 jp = mq_create(hdl, ep, afar, unit_position, cw, now); 578 if (ip == NULL) 579 gmem_list_append(&dimm->mq_root[cw], jp); 580 else 581 gmem_list_insert_before(&dimm->mq_root[cw], ip, jp); 582 } 583 584 /* 585 * Prune the MQSC index lists (one for each checkword), by deleting 586 * outdated index blocks from each list. 587 */ 588 589 void 590 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now) 591 { 592 gmem_mq_t *ip; 593 int cw; 594 595 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) { 596 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) { 597 if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) { 598 /* 599 * This event has timed out - delete the 600 * mq block as well as serd for the event. 601 */ 602 ip = mq_destroy(hdl, &dimm->mq_root[cw], ip); 603 } else { 604 mq_prune_dup(hdl, ip, now); 605 /* tstamp < now - ce_t */ 606 ip = gmem_list_next(ip); 607 } 608 } /* per checkword */ 609 } /* cw = 0...3 */ 610 } 611 612 /* 613 * Check the MQSC index lists (one for each checkword) by making a 614 * complete pass through each list, checking if the criteria for 615 * Rule 4A has been met. Rule 4A checking is done for each checkword. 616 * 617 * Rule 4A: fault a DIMM "whenever Solaris reports two or more CEs from 618 * two or more different physical addresses on each of two or more different 619 * bit positions from the same DIMM within 72 hours of each other, and all 620 * the addresses are in the same relative checkword (that is, the AFARs 621 * are all the same modulo 64). [Note: This means at least 4 CEs; two 622 * from one bit position, with unique addresses, and two from another, 623 * also with unique addresses, and the lower 6 bits of all the addresses 624 * are the same." 625 */ 626 627 void 628 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm) 629 { 630 int upos_pairs, curr_upos, cw, i, j; 631 nvlist_t *flt, *rsc; 632 typedef struct upos_pair { 633 int upos; 634 gmem_mq_t *mq1; 635 gmem_mq_t *mq2; 636 } upos_pair_t; 637 upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */ 638 gmem_mq_t *ip; 639 640 /* 641 * Each upos_array[] member represents a pair of CEs for the same 642 * unit position (symbol) which is a 4 bit nibble. 643 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM 644 * for rule 4A, and same DRAM for rule 4B) for a violation - this 645 * is why CE pairs are tracked. 646 */ 647 upos_pairs = 0; 648 upos_array[0].mq1 = NULL; 649 650 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) { 651 i = upos_pairs; 652 curr_upos = -1; 653 654 /* 655 * mq_root[] is an array of cumulative lists of CEs 656 * indexed by checkword where the list is in unit position 657 * order. Loop through checking for duplicate unit position 658 * entries (filled in at mq_create()). 659 * The upos_array[] is filled in each time a duplicate 660 * unit position is found; the first time through the loop 661 * of a unit position sets curr_upos but does not fill in 662 * upos_array[] until the second symbol is found. 663 */ 664 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; 665 ip = gmem_list_next(ip)) { 666 if (curr_upos != ip->mq_unit_position) { 667 /* Set initial current position */ 668 curr_upos = ip->mq_unit_position; 669 } else if (i > upos_pairs && 670 curr_upos == upos_array[i-1].upos) { 671 /* 672 * Only keep track of CE pairs; skip 673 * triples, quads, etc... 674 */ 675 continue; 676 } else if (upos_array[i].mq1 == NULL) { 677 /* Have a pair. Add to upos_array[] */ 678 fmd_hdl_debug(hdl, "pair:upos=%d", 679 curr_upos); 680 upos_array[i].upos = curr_upos; 681 upos_array[i].mq1 = gmem_list_prev(ip); 682 upos_array[i].mq2 = ip; 683 upos_array[++i].mq1 = NULL; 684 } 685 } 686 if (i - upos_pairs >= 2) { 687 /* Rule 4A violation */ 688 rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial); 689 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A, 690 GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc); 691 for (j = upos_pairs; j < i; j++) { 692 fmd_case_add_ereport(hdl, 693 dimm->dimm_case.cc_cp, 694 upos_array[j].mq1->mq_ep); 695 fmd_case_add_ereport(hdl, 696 dimm->dimm_case.cc_cp, 697 upos_array[j].mq2->mq_ep); 698 } 699 dimm->dimm_flags |= GMEM_F_FAULTING; 700 gmem_dimm_dirty(hdl, dimm); 701 fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt); 702 fmd_case_solve(hdl, dimm->dimm_case.cc_cp); 703 nvlist_free(rsc); 704 return; 705 } 706 upos_pairs = i; 707 assert(upos_pairs < 16); 708 } 709 } 710 711 /*ARGSUSED*/ 712 gmem_evdisp_t 713 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 714 { 715 uint16_t symbol_pos, cw; 716 uint64_t phyaddr, offset, addr; 717 uint32_t filter_ratio = 0; 718 gmem_dimm_t *dimm; 719 gmem_page_t *page; 720 nvlist_t *fru = NULL; 721 nvlist_t *topo_rsc = NULL; 722 nvlist_t *rsrc, *det; 723 const char *uuid; 724 ce_dispact_t type; 725 boolean_t diagnose; 726 char *sn; 727 int err, rc; 728 uint64_t *now; 729 uint_t nelem; 730 int skip_error = 0; 731 732 err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE, 733 &diagnose); 734 if (err != 0 || diagnose == 0) 735 return (GMEM_EVD_UNUSED); 736 737 if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR, 738 &phyaddr) != 0) || 739 (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET, 740 &offset) != 0)) { 741 fmd_hdl_debug(hdl, "Can't get page phyaddr or offset"); 742 return (GMEM_EVD_BAD); 743 } 744 745 fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset); 746 747 if ((page = gmem_page_lookup(phyaddr)) != NULL && 748 page->page_case.cc_cp != NULL && 749 fmd_case_solved(hdl, page->page_case.cc_cp)) 750 return (GMEM_EVD_REDUND); 751 752 if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE, 753 &rsrc) != 0 || 754 nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) { 755 fmd_hdl_debug(hdl, "Can't get dimm serial\n"); 756 return (GMEM_EVD_BAD); 757 } 758 759 fmd_hdl_debug(hdl, "serial %s", sn); 760 761 if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0) 762 return (GMEM_EVD_BAD); 763 764 /* 765 * Find dimm fru by serial number. 766 */ 767 fru = gmem_find_dimm_fru(hdl, sn); 768 769 if (fru == NULL) { 770 fmd_hdl_debug(hdl, "Dimm is not present\n"); 771 return (GMEM_EVD_UNUSED); 772 } 773 774 if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL && 775 (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) { 776 nvlist_free(fru); 777 return (GMEM_EVD_UNUSED); 778 } 779 780 if (dimm->dimm_case.cc_cp == NULL) { 781 dimm->dimm_case.cc_cp = gmem_case_create(hdl, 782 &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid); 783 } 784 785 /* 786 * Add to MQSC correlation lists all CEs which pass validity 787 * checks above. If there is no symbol_pos & relative ckword 788 * in the ereport, skip rule 4A checking. 789 */ 790 791 err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS, 792 &symbol_pos); 793 err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw); 794 795 if (err == 0) { 796 fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw); 797 798 if (nvlist_lookup_uint64_array(nvl, 799 "__tod", &now, &nelem) == 0) { 800 skip_error = gmem_check_symbol_error(hdl, dimm, 801 symbol_pos); 802 803 if (!skip_error || 804 !(dimm->dimm_flags & GMEM_F_FAULTING)) 805 mq_add(hdl, dimm, ep, phyaddr, symbol_pos, 806 cw, *now); 807 808 mq_prune(hdl, dimm, *now); 809 810 if (!skip_error) 811 bad_reader_writer_check(hdl, det, dimm); 812 if (!(dimm->dimm_flags & GMEM_F_FAULTING)) { 813 mq_check(hdl, dimm); 814 mq_5b_check(hdl, dimm); 815 } 816 } 817 } 818 819 type = gmem_mem_name2type(strstr(class, "mem")); 820 821 switch (type) { 822 case CE_DISP_UNKNOWN: 823 GMEM_STAT_BUMP(ce_unknown); 824 nvlist_free(fru); 825 return (GMEM_EVD_UNUSED); 826 case CE_DISP_INTERMITTENT: 827 GMEM_STAT_BUMP(ce_interm); 828 nvlist_free(fru); 829 return (GMEM_EVD_UNUSED); 830 case CE_DISP_PERS: 831 GMEM_STAT_BUMP(ce_clearable_persis); 832 break; 833 case CE_DISP_STICKY: 834 GMEM_STAT_BUMP(ce_sticky); 835 break; 836 default: 837 nvlist_free(fru); 838 return (GMEM_EVD_BAD); 839 } 840 841 if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) { 842 nvlist_free(fru); 843 return (GMEM_EVD_REDUND); 844 } 845 846 if (page == NULL) { 847 page = gmem_page_create(hdl, fru, phyaddr, offset); 848 if (page == NULL) { 849 nvlist_free(fru); 850 return (GMEM_EVD_UNUSED); 851 } 852 } 853 854 nvlist_free(fru); 855 856 if (page->page_case.cc_cp == NULL) { 857 page->page_case.cc_cp = gmem_case_create(hdl, 858 &page->page_header, GMEM_PTR_PAGE_CASE, &uuid); 859 } 860 861 switch (type) { 862 case CE_DISP_PERS: 863 fmd_hdl_debug(hdl, "adding persistent event to CE serd"); 864 if (page->page_case.cc_serdnm == NULL) 865 gmem_page_serd_create(hdl, page, nvl); 866 867 filter_ratio = gmem_get_serd_filter_ratio(nvl); 868 869 fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio); 870 871 if (gmem_serd_record(hdl, page->page_case.cc_serdnm, 872 filter_ratio, ep) == FMD_B_FALSE) { 873 return (GMEM_EVD_OK); /* engine hasn't fired */ 874 } 875 876 fmd_hdl_debug(hdl, "ce page serd fired\n"); 877 fmd_case_add_serd(hdl, page->page_case.cc_cp, 878 page->page_case.cc_serdnm); 879 fmd_serd_reset(hdl, page->page_case.cc_serdnm); 880 break; /* to retire */ 881 882 case CE_DISP_STICKY: 883 fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep); 884 break; /* to retire */ 885 } 886 887 888 topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial); 889 rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc, 890 ep, phyaddr, offset); 891 892 if (rc) { 893 gmem_to_hashed_addr(&addr, phyaddr); 894 895 if (addr > dimm->dimm_phys_addr_hi) 896 dimm->dimm_phys_addr_hi = addr; 897 if (addr < dimm->dimm_phys_addr_low) 898 dimm->dimm_phys_addr_low = addr; 899 900 dimm->dimm_nretired++; 901 dimm->dimm_retstat.fmds_value.ui64++; 902 gmem_dimm_dirty(hdl, dimm); 903 ce_thresh_check(hdl, dimm); 904 } 905 return (GMEM_EVD_OK); 906 } 907 908 void 909 gmem_dimm_close(fmd_hdl_t *hdl, void *arg) 910 { 911 gmem_dimm_destroy(hdl, arg); 912 } 913