1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Support routines for DIMMs. 27 */ 28 29 #include <cmd_mem.h> 30 #include <limits.h> 31 #include <cmd_dimm.h> 32 #include <cmd_bank.h> 33 #include <cmd.h> 34 35 #include <errno.h> 36 #include <string.h> 37 #include <strings.h> 38 #include <fcntl.h> 39 #include <unistd.h> 40 #include <fm/fmd_api.h> 41 #include <sys/fm/protocol.h> 42 #include <sys/mem.h> 43 #include <sys/nvpair.h> 44 #ifdef sun4v 45 #include <cmd_hc_sun4v.h> 46 #include <cmd_branch.h> 47 #endif /* sun4v */ 48 49 /* 50 * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs, 51 * because sufficient information was unavailable prior to correlation. 52 * When the DE completes the pair, it uses this routine to retrieve the 53 * correct FMRI. 54 */ 55 nvlist_t * 56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd, 57 uint64_t afsr) 58 { 59 nvlist_t *fmri; 60 61 if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL) 62 return (NULL); 63 64 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) { 65 nvlist_free(fmri); 66 return (NULL); 67 } 68 69 return (fmri); 70 } 71 72 nvlist_t * 73 cmd_dimm_fru(cmd_dimm_t *dimm) 74 { 75 return (dimm->dimm_asru_nvl); 76 } 77 78 nvlist_t * 79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm, 80 uint_t cert) 81 { 82 #ifdef sun4v 83 nvlist_t *flt, *nvlfru; 84 /* 85 * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms. 86 * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI 87 * 88 * Use the BR string as a distinguisher. BR (branch) is only 89 * present in ultraSPARC-T2/T2plus DIMM unums 90 */ 91 if (strstr(dimm->dimm_unum, "BR") == NULL) { 92 flt = cmd_nvl_create_fault(hdl, fltnm, cert, 93 dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL); 94 } else { 95 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl); 96 flt = cmd_nvl_create_fault(hdl, fltnm, cert, 97 dimm->dimm_asru_nvl, nvlfru, NULL); 98 nvlist_free(nvlfru); 99 } 100 return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum)); 101 #else 102 return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl, 103 dimm->dimm_asru_nvl, NULL)); 104 #endif /* sun4v */ 105 } 106 107 static void 108 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy) 109 { 110 cmd_case_t *cc = &dimm->dimm_case; 111 int i; 112 cmd_mq_t *q; 113 tstamp_t *tsp, *next; 114 115 #ifdef sun4v 116 cmd_branch_t *branch; 117 #endif 118 if (cc->cc_cp != NULL) { 119 cmd_case_fini(hdl, cc->cc_cp, destroy); 120 if (cc->cc_serdnm != NULL) { 121 if (fmd_serd_exists(hdl, cc->cc_serdnm) && 122 destroy) 123 fmd_serd_destroy(hdl, cc->cc_serdnm); 124 fmd_hdl_strfree(hdl, cc->cc_serdnm); 125 } 126 } 127 128 for (i = 0; i < CMD_MAX_CKWDS; i++) { 129 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) { 130 if (q->mq_serdnm != NULL) { 131 if (fmd_serd_exists(hdl, q->mq_serdnm)) { 132 fmd_serd_destroy(hdl, q->mq_serdnm); 133 } 134 fmd_hdl_strfree(hdl, q->mq_serdnm); 135 q->mq_serdnm = NULL; 136 } 137 138 for (tsp = cmd_list_next(&q->mq_dupce_tstamp); 139 tsp != NULL; tsp = next) { 140 next = cmd_list_next(tsp); 141 cmd_list_delete(&q->mq_dupce_tstamp, 142 &tsp->ts_l); 143 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t)); 144 } 145 146 cmd_list_delete(&dimm->mq_root[i], q); 147 fmd_hdl_free(hdl, q, sizeof (cmd_mq_t)); 148 } 149 } 150 151 if (dimm->dimm_bank != NULL) 152 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm); 153 154 #ifdef sun4v 155 branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum); 156 if (branch != NULL) 157 cmd_branch_remove_dimm(hdl, branch, dimm); 158 #endif 159 160 cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy); 161 162 if (destroy) 163 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname); 164 165 cmd_list_delete(&cmd.cmd_dimms, dimm); 166 fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t)); 167 } 168 169 void 170 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm) 171 { 172 173 fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat)); 174 cmd_dimm_free(hdl, dimm, FMD_B_TRUE); 175 } 176 177 static cmd_dimm_t * 178 dimm_lookup_by_unum(const char *unum) 179 { 180 cmd_dimm_t *dimm; 181 182 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; 183 dimm = cmd_list_next(dimm)) { 184 if (strcmp(dimm->dimm_unum, unum) == 0) 185 return (dimm); 186 } 187 188 return (NULL); 189 } 190 191 static void 192 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm) 193 { 194 cmd_bank_t *bank; 195 196 for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL; 197 bank = cmd_list_next(bank)) { 198 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl, 199 dimm->dimm_asru_nvl)) { 200 cmd_bank_add_dimm(hdl, bank, dimm); 201 return; 202 } 203 } 204 } 205 206 cmd_dimm_t * 207 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru) 208 { 209 cmd_dimm_t *dimm; 210 const char *unum; 211 nvlist_t *fmri; 212 size_t nserids = 0; 213 char **serids = NULL; 214 215 if (!fmd_nvl_fmri_present(hdl, asru)) { 216 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n"); 217 return (NULL); 218 } 219 220 if ((unum = cmd_fmri_get_unum(asru)) == NULL) { 221 CMD_STAT_BUMP(bad_mem_asru); 222 return (NULL); 223 } 224 225 #ifdef sun4v 226 if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids, 227 &nserids) != 0) { 228 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not" 229 " have serial_ids\n"); 230 CMD_STAT_BUMP(bad_mem_asru); 231 return (NULL); 232 } 233 #endif 234 fmri = cmd_mem_fmri_create(unum, serids, nserids); 235 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) { 236 CMD_STAT_BUMP(bad_mem_asru); 237 nvlist_free(fmri); 238 return (NULL); 239 } 240 241 fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum); 242 CMD_STAT_BUMP(dimm_creat); 243 244 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 245 dimm->dimm_nodetype = CMD_NT_DIMM; 246 dimm->dimm_version = CMD_DIMM_VERSION; 247 dimm->dimm_phys_addr_low = ULLONG_MAX; 248 dimm->dimm_phys_addr_hi = 0; 249 dimm->dimm_syl_error = USHRT_MAX; 250 251 cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s", 252 unum); 253 cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum); 254 255 nvlist_free(fmri); 256 257 (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM, 258 (char **)&dimm->dimm_unum); 259 260 dimm_attach_to_bank(hdl, dimm); 261 262 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0, 263 CMD_DIMM_STAT_PREFIX); 264 265 cmd_list_append(&cmd.cmd_dimms, dimm); 266 cmd_dimm_dirty(hdl, dimm); 267 268 return (dimm); 269 } 270 271 cmd_dimm_t * 272 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru) 273 { 274 cmd_dimm_t *dimm; 275 const char *unum; 276 277 if ((unum = cmd_fmri_get_unum(asru)) == NULL) { 278 CMD_STAT_BUMP(bad_mem_asru); 279 return (NULL); 280 } 281 282 dimm = dimm_lookup_by_unum(unum); 283 284 if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) { 285 /* 286 * The DIMM doesn't exist anymore, so we need to delete the 287 * state structure, which is now out of date. The containing 288 * bank (if any) is also out of date, so blow it away too. 289 */ 290 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n"); 291 292 if (dimm->dimm_bank != NULL) 293 cmd_bank_destroy(hdl, dimm->dimm_bank); 294 cmd_dimm_destroy(hdl, dimm); 295 296 return (NULL); 297 } 298 299 return (dimm); 300 } 301 302 static cmd_dimm_t * 303 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz) 304 { 305 cmd_dimm_t *new; 306 307 if (oldsz != sizeof (cmd_dimm_0_t)) { 308 fmd_hdl_abort(hdl, "size of state doesn't match size of " 309 "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t)); 310 } 311 312 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 313 new->dimm_header = old->dimm0_header; 314 new->dimm_version = CMD_DIMM_VERSION; 315 new->dimm_asru = old->dimm0_asru; 316 new->dimm_nretired = old->dimm0_nretired; 317 new->dimm_phys_addr_hi = 0; 318 new->dimm_phys_addr_low = ULLONG_MAX; 319 320 fmd_hdl_free(hdl, old, oldsz); 321 return (new); 322 } 323 324 static cmd_dimm_t * 325 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz) 326 { 327 328 cmd_dimm_t *new; 329 330 if (oldsz != sizeof (cmd_dimm_1_t)) { 331 fmd_hdl_abort(hdl, "size of state doesn't match size of " 332 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t)); 333 } 334 335 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 336 337 new->dimm_header = old->dimm1_header; 338 new->dimm_version = CMD_DIMM_VERSION; 339 new->dimm_asru = old->dimm1_asru; 340 new->dimm_nretired = old->dimm1_nretired; 341 new->dimm_flags = old->dimm1_flags; 342 new->dimm_phys_addr_hi = 0; 343 new->dimm_phys_addr_low = ULLONG_MAX; 344 345 fmd_hdl_free(hdl, old, oldsz); 346 return (new); 347 } 348 349 static cmd_dimm_t * 350 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz) 351 { 352 cmd_dimm_t *dimm; 353 354 if (psz != sizeof (cmd_dimm_pers_t)) { 355 fmd_hdl_abort(hdl, "size of state doesn't match size of " 356 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t)); 357 } 358 359 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP); 360 bcopy(pers, dimm, sizeof (cmd_dimm_pers_t)); 361 fmd_hdl_free(hdl, pers, psz); 362 return (dimm); 363 } 364 365 void * 366 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr) 367 { 368 cmd_dimm_t *dimm; 369 370 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; 371 dimm = cmd_list_next(dimm)) { 372 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0) 373 break; 374 } 375 376 if (dimm == NULL) { 377 int migrated = 0; 378 size_t dimmsz; 379 380 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name); 381 382 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) { 383 fmd_hdl_abort(hdl, "dimm referenced by case %s does " 384 "not exist in saved state\n", 385 fmd_case_uuid(hdl, cp)); 386 } else if (dimmsz > CMD_DIMM_MAXSIZE || 387 dimmsz < CMD_DIMM_MINSIZE) { 388 fmd_hdl_abort(hdl, 389 "dimm buffer referenced by case %s " 390 "is out of bounds (is %u bytes, max %u, min %u)\n", 391 fmd_case_uuid(hdl, cp), dimmsz, 392 CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE); 393 } 394 395 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name, 396 dimmsz)) == NULL) { 397 fmd_hdl_abort(hdl, "failed to read dimm buf %s", 398 ptr->ptr_name); 399 } 400 401 fmd_hdl_debug(hdl, "found %d in version field\n", 402 dimm->dimm_version); 403 404 if (CMD_DIMM_VERSIONED(dimm)) { 405 switch (dimm->dimm_version) { 406 case CMD_DIMM_VERSION_1: 407 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm, 408 dimmsz); 409 break; 410 case CMD_DIMM_VERSION_2: 411 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm, 412 dimmsz); 413 break; 414 default: 415 fmd_hdl_abort(hdl, "unknown version (found %d) " 416 "for dimm state referenced by case %s.\n", 417 dimm->dimm_version, fmd_case_uuid(hdl, cp)); 418 break; 419 } 420 } else { 421 dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz); 422 migrated = 1; 423 } 424 425 if (migrated) { 426 CMD_STAT_BUMP(dimm_migrat); 427 cmd_dimm_dirty(hdl, dimm); 428 } 429 430 cmd_fmri_restore(hdl, &dimm->dimm_asru); 431 432 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl, 433 FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0) 434 fmd_hdl_abort(hdl, "failed to retrieve unum from asru"); 435 436 dimm_attach_to_bank(hdl, dimm); 437 438 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, 439 dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX); 440 441 cmd_list_append(&cmd.cmd_dimms, dimm); 442 } 443 444 switch (ptr->ptr_subtype) { 445 case BUG_PTR_DIMM_CASE: 446 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n"); 447 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE); 448 /*FALLTHROUGH*/ 449 case CMD_PTR_DIMM_CASE: 450 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm", 451 dimm->dimm_unum); 452 break; 453 default: 454 fmd_hdl_abort(hdl, "invalid %s subtype %d\n", 455 ptr->ptr_name, ptr->ptr_subtype); 456 } 457 458 return (dimm); 459 } 460 461 void 462 cmd_dimm_validate(fmd_hdl_t *hdl) 463 { 464 cmd_dimm_t *dimm, *next; 465 466 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) { 467 next = cmd_list_next(dimm); 468 469 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) 470 cmd_dimm_destroy(hdl, dimm); 471 } 472 } 473 474 void 475 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm) 476 { 477 if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) != 478 sizeof (cmd_dimm_pers_t)) 479 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname); 480 481 /* No need to rewrite the FMRIs in the dimm - they don't change */ 482 fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers, 483 sizeof (cmd_dimm_pers_t)); 484 } 485 486 void 487 cmd_dimm_gc(fmd_hdl_t *hdl) 488 { 489 cmd_dimm_validate(hdl); 490 } 491 492 void 493 cmd_dimm_fini(fmd_hdl_t *hdl) 494 { 495 cmd_dimm_t *dimm; 496 497 while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL) 498 cmd_dimm_free(hdl, dimm, FMD_B_FALSE); 499 } 500 501 502 void 503 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos) 504 { 505 cmd_dimm_t *d = NULL, *next = NULL; 506 507 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) { 508 next = cmd_list_next(d); 509 if (cmd_same_datapath_dimms(dimm, d)) 510 d->dimm_syl_error = upos; 511 } 512 } 513 514 int 515 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd) 516 { 517 int upos; 518 cmd_dimm_t *d, *next; 519 520 if ((upos = cmd_synd2upos(synd)) < 0) 521 return (0); 522 523 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) { 524 next = cmd_list_next(d); 525 if (cmd_same_datapath_dimms(dimm, d) && 526 (d->dimm_syl_error == upos)) 527 return (1); 528 } 529 530 return (0); 531 } 532