1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <mem.h> 30 #include <fm/fmd_fmri.h> 31 32 #include <fcntl.h> 33 #include <unistd.h> 34 #include <string.h> 35 #include <strings.h> 36 #include <time.h> 37 #include <sys/mem.h> 38 39 #ifdef sparc 40 #include <sys/fm/ldom.h> 41 ldom_hdl_t *mem_scheme_lhp; 42 #endif /* sparc */ 43 44 mem_t mem; 45 46 #ifdef sparc 47 48 extern int mem_update_mdesc(void); 49 50 /* 51 * Retry values for handling the case where the kernel is not yet ready 52 * to provide DIMM serial ids. Some platforms acquire DIMM serial id 53 * information from their System Controller via a mailbox interface. 54 * The values chosen are for 10 retries 3 seconds apart to approximate the 55 * possible 30 second timeout length of a mailbox message request. 56 */ 57 #define MAX_MEM_SID_RETRIES 10 58 #define MEM_SID_RETRY_WAIT 3 59 60 static mem_dimm_map_t * 61 dm_lookup(const char *name) 62 { 63 mem_dimm_map_t *dm; 64 65 for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) { 66 if (strcmp(name, dm->dm_label) == 0) 67 return (dm); 68 } 69 70 return (NULL); 71 } 72 73 /* 74 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 75 * the unum (or a component of same) wasn't found, -1 is returned with errno 76 * set to ENOENT. If the kernel doesn't have support for serial numbers, 77 * -1 is returned with errno set to ENOTSUP. 78 */ 79 static int 80 mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp) 81 { 82 char **dimms, **serids; 83 size_t ndimms, nserids; 84 int i, rc = 0; 85 int fd; 86 int retries = MAX_MEM_SID_RETRIES; 87 mem_name_t mn; 88 struct timespec rqt; 89 90 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 91 return (-1); 92 93 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) { 94 (void) close(fd); 95 return (-1); /* errno is set for us */ 96 } 97 98 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 99 nserids = ndimms; 100 101 bzero(&mn, sizeof (mn)); 102 103 for (i = 0; i < ndimms; i++) { 104 mn.m_namelen = strlen(dimms[i]) + 1; 105 mn.m_sidlen = MEM_SERID_MAXLEN; 106 107 mn.m_name = fmd_fmri_alloc(mn.m_namelen); 108 mn.m_sid = fmd_fmri_alloc(mn.m_sidlen); 109 110 (void) strcpy(mn.m_name, dimms[i]); 111 112 do { 113 rc = ioctl(fd, MEM_SID, &mn); 114 115 if (rc >= 0 || errno != EAGAIN) 116 break; 117 118 if (retries == 0) { 119 errno = ETIMEDOUT; 120 break; 121 } 122 123 /* 124 * EAGAIN indicates the kernel is 125 * not ready to provide DIMM serial 126 * ids. Sleep MEM_SID_RETRY_WAIT seconds 127 * and try again. 128 * nanosleep() is used instead of sleep() 129 * to avoid interfering with fmd timers. 130 */ 131 rqt.tv_sec = MEM_SID_RETRY_WAIT; 132 rqt.tv_nsec = 0; 133 (void) nanosleep(&rqt, NULL); 134 135 } while (retries--); 136 137 if (rc < 0) { 138 /* 139 * ENXIO can happen if the kernel memory driver 140 * doesn't have the MEM_SID ioctl (e.g. if the 141 * kernel hasn't been patched to provide the 142 * support). 143 * 144 * If the MEM_SID ioctl is available but the 145 * particular platform doesn't support providing 146 * serial ids, ENOTSUP will be returned by the ioctl. 147 */ 148 if (errno == ENXIO) 149 errno = ENOTSUP; 150 fmd_fmri_free(mn.m_name, mn.m_namelen); 151 fmd_fmri_free(mn.m_sid, mn.m_sidlen); 152 mem_strarray_free(serids, nserids); 153 mem_strarray_free(dimms, ndimms); 154 (void) close(fd); 155 return (-1); 156 } 157 158 serids[i] = fmd_fmri_strdup(mn.m_sid); 159 160 fmd_fmri_free(mn.m_name, mn.m_namelen); 161 fmd_fmri_free(mn.m_sid, mn.m_sidlen); 162 } 163 164 mem_strarray_free(dimms, ndimms); 165 166 (void) close(fd); 167 168 *seridsp = serids; 169 *nseridsp = nserids; 170 171 return (0); 172 } 173 174 /* 175 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 176 * the unum (or a component of same) wasn't found, -1 is returned with errno 177 * set to ENOENT. 178 */ 179 static int 180 mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp) 181 { 182 uint64_t drgen = fmd_fmri_get_drgen(); 183 char **dimms, **serids; 184 size_t ndimms, nserids; 185 mem_dimm_map_t *dm; 186 int i, rc = 0; 187 188 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) 189 return (-1); /* errno is set for us */ 190 191 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 192 nserids = ndimms; 193 194 for (i = 0; i < ndimms; i++) { 195 if ((dm = dm_lookup(dimms[i])) == NULL) { 196 rc = fmd_fmri_set_errno(EINVAL); 197 break; 198 } 199 200 if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) { 201 /* 202 * We don't have a cached copy, or the copy we've got is 203 * out of date. Look it up again. 204 */ 205 if (mem_get_serid(dm->dm_device, dm->dm_serid, 206 sizeof (dm->dm_serid)) < 0) { 207 rc = -1; /* errno is set for us */ 208 break; 209 } 210 211 dm->dm_drgen = drgen; 212 } 213 214 serids[i] = fmd_fmri_strdup(dm->dm_serid); 215 } 216 217 mem_strarray_free(dimms, ndimms); 218 219 if (rc == 0) { 220 *seridsp = serids; 221 *nseridsp = nserids; 222 } else { 223 mem_strarray_free(serids, nserids); 224 } 225 226 return (rc); 227 } 228 229 /* 230 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 231 * the unum (or a component of same) wasn't found, -1 is returned with errno 232 * set to ENOENT. 233 */ 234 static int 235 mem_get_serids_from_mdesc(const char *unum, char ***seridsp, size_t *nseridsp) 236 { 237 uint64_t drgen = fmd_fmri_get_drgen(); 238 char **dimms, **serids; 239 size_t ndimms, nserids; 240 mem_dimm_map_t *dm; 241 int i, rc = 0; 242 243 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) 244 return (-1); /* errno is set for us */ 245 246 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 247 nserids = ndimms; 248 249 /* 250 * first go through dimms and see if dm_drgen entries are outdated 251 */ 252 for (i = 0; i < ndimms; i++) { 253 if ((dm = dm_lookup(dimms[i])) == NULL || 254 dm->dm_drgen != drgen) 255 break; 256 } 257 258 if (i < ndimms && mem_update_mdesc() != 0) { 259 mem_strarray_free(dimms, ndimms); 260 return (-1); 261 } 262 263 /* 264 * get to this point if an up-to-date mdesc (and corresponding 265 * entries in the global mem list) exists 266 */ 267 for (i = 0; i < ndimms; i++) { 268 if ((dm = dm_lookup(dimms[i])) == NULL) { 269 rc = fmd_fmri_set_errno(EINVAL); 270 break; 271 } 272 273 if (dm->dm_drgen != drgen) 274 dm->dm_drgen = drgen; 275 276 /* 277 * mdesc and dm entry was updated by an earlier call to 278 * mem_update_mdesc, so we go ahead and dup the serid 279 */ 280 serids[i] = fmd_fmri_strdup(dm->dm_serid); 281 } 282 283 mem_strarray_free(dimms, ndimms); 284 285 if (rc == 0) { 286 *seridsp = serids; 287 *nseridsp = nserids; 288 } else { 289 mem_strarray_free(serids, nserids); 290 } 291 292 return (rc); 293 } 294 295 #endif /* sparc */ 296 297 /*ARGSUSED*/ 298 static int 299 mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp) 300 { 301 /* 302 * Some platforms do not support the caching of serial ids by the 303 * mem scheme plugin but instead support making serial ids available 304 * via the kernel. 305 */ 306 #ifdef sparc 307 if (mem.mem_dm == NULL) 308 return (mem_get_serids_from_kernel(unum, seridsp, nseridsp)); 309 else if (mem_get_serids_from_mdesc(unum, seridsp, nseridsp) == 0) 310 return (0); 311 else 312 return (mem_get_serids_from_cache(unum, seridsp, nseridsp)); 313 #else 314 errno = ENOTSUP; 315 return (-1); 316 #endif /* sparc */ 317 } 318 319 static int 320 mem_fmri_get_unum(nvlist_t *nvl, char **unump) 321 { 322 uint8_t version; 323 char *unum; 324 325 if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || 326 version > FM_MEM_SCHEME_VERSION || 327 nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0) 328 return (fmd_fmri_set_errno(EINVAL)); 329 330 *unump = unum; 331 332 return (0); 333 } 334 335 ssize_t 336 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen) 337 { 338 char format[64]; 339 ssize_t size, presz; 340 char *rawunum, *preunum, *escunum, *prefix; 341 uint64_t val; 342 int i; 343 344 if (mem_fmri_get_unum(nvl, &rawunum) < 0) 345 return (-1); /* errno is set for us */ 346 347 /* 348 * If we have a well-formed unum (hc-FMRI), use the string verbatim 349 * to form the initial mem:/// components. Otherwise use unum=%s. 350 */ 351 if (strncmp(rawunum, "hc://", 5) != 0) 352 prefix = FM_FMRI_MEM_UNUM "="; 353 else 354 prefix = ""; 355 356 /* 357 * If we have a DIMM offset, include it in the string. If we have a PA 358 * then use that. Otherwise just format the unum element. 359 */ 360 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0) { 361 (void) snprintf(format, sizeof (format), 362 "%s:///%s%%1$s/%s=%%2$llx", 363 FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_OFFSET); 364 } else if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0) { 365 (void) snprintf(format, sizeof (format), 366 "%s:///%s%%1$s/%s=%%2$llx", 367 FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_PHYSADDR); 368 } else { 369 (void) snprintf(format, sizeof (format), 370 "%s:///%s%%1$s", FM_FMRI_SCHEME_MEM, prefix); 371 } 372 373 /* 374 * If we have a well-formed unum (hc-FMRI), we skip over the 375 * the scheme and authority prefix. 376 * Otherwise, the spaces and colons will be escaped, 377 * rendering the resulting FMRI pretty much unreadable. 378 * We're therefore going to do some escaping of our own first. 379 */ 380 if (strncmp(rawunum, "hc://", 5) == 0) { 381 rawunum += 5; 382 rawunum = strchr(rawunum, '/'); 383 ++rawunum; 384 /* LINTED: variable format specifier */ 385 size = snprintf(buf, buflen, format, rawunum, val); 386 } else { 387 preunum = fmd_fmri_strdup(rawunum); 388 presz = strlen(preunum) + 1; 389 390 for (i = 0; i < presz - 1; i++) { 391 if (preunum[i] == ':' && preunum[i + 1] == ' ') { 392 bcopy(preunum + i + 2, preunum + i + 1, 393 presz - (i + 2)); 394 } else if (preunum[i] == ' ') { 395 preunum[i] = ','; 396 } 397 } 398 399 escunum = fmd_fmri_strescape(preunum); 400 fmd_fmri_free(preunum, presz); 401 402 /* LINTED: variable format specifier */ 403 size = snprintf(buf, buflen, format, escunum, val); 404 fmd_fmri_strfree(escunum); 405 } 406 407 return (size); 408 } 409 410 int 411 fmd_fmri_expand(nvlist_t *nvl) 412 { 413 char *unum, **serids; 414 uint_t nnvlserids; 415 size_t nserids; 416 int rc; 417 418 if (mem_fmri_get_unum(nvl, &unum) < 0) 419 return (fmd_fmri_set_errno(EINVAL)); 420 421 if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, 422 &serids, &nnvlserids)) == 0) 423 return (0); /* fmri is already expanded */ 424 else if (rc != ENOENT) 425 return (fmd_fmri_set_errno(EINVAL)); 426 427 if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { 428 /* errno is set for us */ 429 if (errno == ENOTSUP) 430 return (0); /* nothing to add - no s/n support */ 431 else 432 return (-1); 433 } 434 435 rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids, 436 nserids); 437 438 mem_strarray_free(serids, nserids); 439 440 if (rc != 0) 441 return (fmd_fmri_set_errno(EINVAL)); 442 443 return (0); 444 } 445 446 static int 447 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2) 448 { 449 int i; 450 451 if (nserids1 != nserids2) 452 return (0); 453 454 for (i = 0; i < nserids1; i++) { 455 if (strcmp(serids1[i], serids2[i]) != 0) 456 return (0); 457 } 458 459 return (1); 460 } 461 462 int 463 fmd_fmri_present(nvlist_t *nvl) 464 { 465 char *unum, **nvlserids, **serids; 466 uint_t nnvlserids; 467 size_t nserids; 468 uint64_t memconfig; 469 int rc; 470 471 if (mem_fmri_get_unum(nvl, &unum) < 0) 472 return (-1); /* errno is set for us */ 473 474 if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids, 475 &nnvlserids) != 0) { 476 /* 477 * Some mem scheme FMRIs don't have serial ids because 478 * either the platform does not support them, or because 479 * the FMRI was created before support for serial ids was 480 * introduced. If this is the case, assume it is there. 481 */ 482 if (mem.mem_dm == NULL) 483 return (1); 484 else 485 return (fmd_fmri_set_errno(EINVAL)); 486 } 487 488 /* 489 * Hypervisor will change the memconfig value when the mapping of 490 * pages to DIMMs changes, e.g. for change in DIMM size or interleave. 491 * If we detect such a change, we discard ereports associated with a 492 * previous memconfig value as invalid. 493 * 494 * The test (mem.mem_memconfig != 0) means we run on a system that 495 * actually suplies a memconfig value. 496 */ 497 498 if ((nvlist_lookup_uint64(nvl, FM_FMRI_MEM_MEMCONFIG, 499 &memconfig) == 0) && (mem.mem_memconfig != 0) && 500 (memconfig != mem.mem_memconfig)) 501 return (0); 502 503 if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { 504 if (errno == ENOTSUP) 505 return (1); /* assume it's there, no s/n support here */ 506 if (errno != ENOENT) { 507 /* 508 * Errors are only signalled to the caller if they're 509 * the caller's fault. This isn't - it's a failure on 510 * our part to burst or read the serial numbers. We'll 511 * whine about it, and tell the caller the named 512 * module(s) isn't/aren't there. 513 */ 514 fmd_fmri_warn("failed to retrieve serial number for " 515 "unum %s", unum); 516 } 517 return (0); 518 } 519 520 rc = serids_eq(serids, nserids, nvlserids, nnvlserids); 521 522 mem_strarray_free(serids, nserids); 523 524 return (rc); 525 } 526 527 int 528 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee) 529 { 530 char *erunum, *eeunum; 531 uint64_t erval = 0, eeval = 0; 532 533 if (mem_fmri_get_unum(er, &erunum) < 0 || 534 mem_fmri_get_unum(ee, &eeunum) < 0) 535 return (-1); /* errno is set for us */ 536 537 if (mem_unum_contains(erunum, eeunum) <= 0) 538 return (0); /* can't parse/match, so assume no containment */ 539 540 if (nvlist_lookup_uint64(er, FM_FMRI_MEM_OFFSET, &erval) == 0) { 541 return (nvlist_lookup_uint64(ee, 542 FM_FMRI_MEM_OFFSET, &eeval) == 0 && erval == eeval); 543 } 544 545 if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erval) == 0) { 546 return (nvlist_lookup_uint64(ee, 547 FM_FMRI_MEM_PHYSADDR, &eeval) == 0 && erval == eeval); 548 } 549 550 return (1); 551 } 552 553 /* 554 * We can only make a usable/unusable determination for pages. Mem FMRIs 555 * without page addresses will be reported as usable since Solaris has no 556 * way at present to dynamically disable an entire DIMM or DIMM pair. 557 */ 558 int 559 fmd_fmri_unusable(nvlist_t *nvl) 560 { 561 uint64_t val; 562 uint8_t version; 563 int rc, err1, err2; 564 nvlist_t *nvlcp = NULL; 565 int retval; 566 567 if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || 568 version > FM_MEM_SCHEME_VERSION) 569 return (fmd_fmri_set_errno(EINVAL)); 570 571 err1 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val); 572 err2 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val); 573 574 if (err1 == ENOENT && err2 == ENOENT) 575 return (0); /* no page, so assume it's still usable */ 576 577 if ((err1 != 0 && err1 != ENOENT) || (err2 != 0 && err2 != ENOENT)) 578 return (fmd_fmri_set_errno(EINVAL)); 579 580 if ((err1 = mem_unum_rewrite(nvl, &nvlcp)) != 0) 581 return (fmd_fmri_set_errno(err1)); 582 583 /* 584 * Ask the kernel if the page is retired, using either the rewritten 585 * hc FMRI or the original mem FMRI with the specified offset or PA. 586 * Refer to the kernel's page_retire_check() for the error codes. 587 */ 588 rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp ? nvlcp : nvl); 589 590 if (rc == -1 && errno == EIO) { 591 /* 592 * The page is not retired and is not scheduled for retirement 593 * (i.e. no request pending and has not seen any errors) 594 */ 595 retval = 0; 596 } else if (rc == 0 || errno == EAGAIN || errno == EINVAL) { 597 /* 598 * The page has been retired, is in the process of being 599 * retired, or doesn't exist. The latter is valid if the page 600 * existed in the past but has been DR'd out. 601 */ 602 retval = 1; 603 } else { 604 /* 605 * Errors are only signalled to the caller if they're the 606 * caller's fault. This isn't - it's a failure of the 607 * retirement-check code. We'll whine about it and tell 608 * the caller the page is unusable. 609 */ 610 fmd_fmri_warn("failed to determine page %s=%llx usability: " 611 "rc=%d errno=%d\n", err1 == 0 ? FM_FMRI_MEM_OFFSET : 612 FM_FMRI_MEM_PHYSADDR, (u_longlong_t)val, rc, errno); 613 retval = 1; 614 } 615 616 if (nvlcp) 617 nvlist_free(nvlcp); 618 619 return (retval); 620 } 621 622 int 623 fmd_fmri_init(void) 624 { 625 #ifdef sparc 626 mem_scheme_lhp = ldom_init(fmd_fmri_alloc, fmd_fmri_free); 627 #endif /* sparc */ 628 return (mem_discover()); 629 } 630 631 void 632 fmd_fmri_fini(void) 633 { 634 mem_dimm_map_t *dm, *em; 635 636 for (dm = mem.mem_dm; dm != NULL; dm = em) { 637 em = dm->dm_next; 638 fmd_fmri_strfree(dm->dm_label); 639 fmd_fmri_strfree(dm->dm_device); 640 fmd_fmri_free(dm, sizeof (mem_dimm_map_t)); 641 } 642 #ifdef sparc 643 ldom_fini(mem_scheme_lhp); 644 #endif /* sparc */ 645 } 646