1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <mem.h> 30 #include <fm/fmd_fmri.h> 31 32 #include <fcntl.h> 33 #include <unistd.h> 34 #include <string.h> 35 #include <strings.h> 36 #include <time.h> 37 #include <sys/mem.h> 38 39 /* 40 * The scheme plugin for mem FMRIs. 41 */ 42 43 mem_t mem; 44 45 /* 46 * Retry values for handling the case where the kernel is not yet ready 47 * to provide DIMM serial ids. Some platforms acquire DIMM serial id 48 * information from their System Controller via a mailbox interface. 49 * The values chosen are for 10 retries 3 seconds apart to approximate the 50 * possible 30 second timeout length of a mailbox message request. 51 */ 52 #define MAX_MEM_SID_RETRIES 10 53 #define MEM_SID_RETRY_WAIT 3 54 55 static mem_dimm_map_t * 56 dm_lookup(const char *name) 57 { 58 mem_dimm_map_t *dm; 59 60 for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) { 61 if (strcmp(name, dm->dm_label) == 0) 62 return (dm); 63 } 64 65 return (NULL); 66 } 67 68 /* 69 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 70 * the unum (or a component of same) wasn't found, -1 is returned with errno 71 * set to ENOENT. If the kernel doesn't have support for serial numbers, 72 * -1 is returned with errno set to ENOTSUP. 73 */ 74 static int 75 mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp) 76 { 77 char **dimms, **serids; 78 size_t ndimms, nserids; 79 int i, rc = 0; 80 int fd; 81 int retries = MAX_MEM_SID_RETRIES; 82 mem_name_t mn; 83 struct timespec rqt; 84 85 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 86 return (-1); 87 88 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) { 89 (void) close(fd); 90 return (-1); /* errno is set for us */ 91 } 92 93 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 94 nserids = ndimms; 95 96 bzero(&mn, sizeof (mn)); 97 98 for (i = 0; i < ndimms; i++) { 99 mn.m_namelen = strlen(dimms[i]) + 1; 100 mn.m_sidlen = MEM_SERID_MAXLEN; 101 102 mn.m_name = fmd_fmri_alloc(mn.m_namelen); 103 mn.m_sid = fmd_fmri_alloc(mn.m_sidlen); 104 105 (void) strcpy(mn.m_name, dimms[i]); 106 107 do { 108 rc = ioctl(fd, MEM_SID, &mn); 109 110 if (rc >= 0 || errno != EAGAIN) 111 break; 112 113 if (retries == 0) { 114 errno = ETIMEDOUT; 115 break; 116 } 117 118 /* 119 * EAGAIN indicates the kernel is 120 * not ready to provide DIMM serial 121 * ids. Sleep MEM_SID_RETRY_WAIT seconds 122 * and try again. 123 * nanosleep() is used instead of sleep() 124 * to avoid interfering with fmd timers. 125 */ 126 rqt.tv_sec = MEM_SID_RETRY_WAIT; 127 rqt.tv_nsec = 0; 128 (void) nanosleep(&rqt, NULL); 129 130 } while (retries--); 131 132 if (rc < 0) { 133 /* 134 * ENXIO can happen if the kernel memory driver 135 * doesn't have the MEM_SID ioctl (e.g. if the 136 * kernel hasn't been patched to provide the 137 * support). 138 * 139 * If the MEM_SID ioctl is available but the 140 * particular platform doesn't support providing 141 * serial ids, ENOTSUP will be returned by the ioctl. 142 */ 143 if (errno == ENXIO) 144 errno = ENOTSUP; 145 fmd_fmri_free(mn.m_name, mn.m_namelen); 146 fmd_fmri_free(mn.m_sid, mn.m_sidlen); 147 mem_strarray_free(serids, nserids); 148 mem_strarray_free(dimms, ndimms); 149 (void) close(fd); 150 return (-1); 151 } 152 153 serids[i] = fmd_fmri_strdup(mn.m_sid); 154 155 fmd_fmri_free(mn.m_name, mn.m_namelen); 156 fmd_fmri_free(mn.m_sid, mn.m_sidlen); 157 } 158 159 mem_strarray_free(dimms, ndimms); 160 161 (void) close(fd); 162 163 *seridsp = serids; 164 *nseridsp = nserids; 165 166 return (0); 167 } 168 169 /* 170 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 171 * the unum (or a component of same) wasn't found, -1 is returned with errno 172 * set to ENOENT. 173 */ 174 static int 175 mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp) 176 { 177 uint64_t drgen = fmd_fmri_get_drgen(); 178 char **dimms, **serids; 179 size_t ndimms, nserids; 180 mem_dimm_map_t *dm; 181 int i, rc = 0; 182 183 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) 184 return (-1); /* errno is set for us */ 185 186 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 187 nserids = ndimms; 188 189 for (i = 0; i < ndimms; i++) { 190 if ((dm = dm_lookup(dimms[i])) == NULL) { 191 rc = fmd_fmri_set_errno(EINVAL); 192 break; 193 } 194 195 if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) { 196 /* 197 * We don't have a cached copy, or the copy we've got is 198 * out of date. Look it up again. 199 */ 200 if (mem_get_serid(dm->dm_device, dm->dm_serid, 201 sizeof (dm->dm_serid)) < 0) { 202 rc = -1; /* errno is set for us */ 203 break; 204 } 205 206 dm->dm_drgen = drgen; 207 } 208 209 serids[i] = fmd_fmri_strdup(dm->dm_serid); 210 } 211 212 mem_strarray_free(dimms, ndimms); 213 214 if (rc == 0) { 215 *seridsp = serids; 216 *nseridsp = nserids; 217 } else { 218 mem_strarray_free(serids, nserids); 219 } 220 221 return (rc); 222 } 223 224 static int 225 mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp) 226 { 227 /* 228 * Some platforms do not support the caching of serial ids by the 229 * mem scheme plugin but instead support making serial ids available 230 * via the kernel. 231 */ 232 if (mem.mem_dm == NULL) 233 return (mem_get_serids_from_kernel(unum, seridsp, nseridsp)); 234 else 235 return (mem_get_serids_from_cache(unum, seridsp, nseridsp)); 236 } 237 238 static int 239 mem_fmri_get_unum(nvlist_t *nvl, char **unump) 240 { 241 uint8_t version; 242 char *unum; 243 244 if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || 245 version > FM_MEM_SCHEME_VERSION || 246 nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0) 247 return (fmd_fmri_set_errno(EINVAL)); 248 249 *unump = unum; 250 251 return (0); 252 } 253 254 ssize_t 255 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen) 256 { 257 const char *fmt = "mem:///component=%1$s"; 258 ssize_t size, presz; 259 uint64_t pa; 260 char *rawunum, *preunum, *escunum; 261 int i; 262 263 if (mem_fmri_get_unum(nvl, &rawunum) < 0) 264 return (-1); /* errno is set for us */ 265 266 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) == 0) 267 fmt = "mem:///pa=%2$llx/component=%1$s"; 268 269 /* 270 * If we leave the unum as-is, the spaces and colons will be escaped, 271 * rendering the resulting FMRI pretty much unreadable. We're therefore 272 * going to do some escaping of our own first. 273 */ 274 preunum = fmd_fmri_strdup(rawunum); 275 presz = strlen(preunum) + 1; 276 277 for (i = 0; i < presz - 1; i++) { 278 if (preunum[i] == ':' && preunum[i + 1] == ' ') { 279 bcopy(preunum + i + 2, preunum + i + 1, 280 presz - (i + 2)); 281 } else if (preunum[i] == ' ') { 282 preunum[i] = ','; 283 } 284 } 285 286 escunum = fmd_fmri_strescape(preunum); 287 fmd_fmri_free(preunum, presz); 288 289 size = snprintf(buf, buflen, fmt, escunum, (u_longlong_t)pa); 290 fmd_fmri_strfree(escunum); 291 292 return (size); 293 } 294 295 int 296 fmd_fmri_expand(nvlist_t *nvl) 297 { 298 char *unum, **serids; 299 uint_t nnvlserids; 300 size_t nserids; 301 int rc; 302 303 if (mem_fmri_get_unum(nvl, &unum) < 0) 304 return (fmd_fmri_set_errno(EINVAL)); 305 306 if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, 307 &serids, &nnvlserids)) == 0) 308 return (0); /* fmri is already expanded */ 309 else if (rc != ENOENT) 310 return (fmd_fmri_set_errno(EINVAL)); 311 312 if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { 313 /* errno is set for us */ 314 if (errno == ENOTSUP) 315 return (0); /* nothing to add - no s/n support */ 316 else 317 return (-1); 318 } 319 320 rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids, 321 nserids); 322 323 mem_strarray_free(serids, nserids); 324 325 if (rc != 0) 326 return (fmd_fmri_set_errno(EINVAL)); 327 328 return (0); 329 } 330 331 static int 332 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2) 333 { 334 int i; 335 336 if (nserids1 != nserids2) 337 return (0); 338 339 for (i = 0; i < nserids1; i++) { 340 if (strcmp(serids1[i], serids2[i]) != 0) 341 return (0); 342 } 343 344 return (1); 345 } 346 347 int 348 fmd_fmri_present(nvlist_t *nvl) 349 { 350 char *unum, **nvlserids, **serids; 351 uint_t nnvlserids; 352 size_t nserids; 353 uint64_t memconfig; 354 int rc; 355 356 if (mem_fmri_get_unum(nvl, &unum) < 0) 357 return (-1); /* errno is set for us */ 358 359 if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids, 360 &nnvlserids) != 0) { 361 /* 362 * Some mem scheme FMRIs don't have serial ids because 363 * either the platform does not support them, or because 364 * the FMRI was created before support for serial ids was 365 * introduced. If this is the case, assume it is there. 366 */ 367 if (mem.mem_dm == NULL) 368 return (1); 369 else 370 return (fmd_fmri_set_errno(EINVAL)); 371 } 372 373 /* 374 * Hypervisor will change the memconfig value when the mapping of 375 * pages to DIMMs changes, e.g. for change in DIMM size or interleave. 376 * If we detect such a change, we discard ereports associated with a 377 * previous memconfig value as invalid. 378 */ 379 380 if ((nvlist_lookup_uint64(nvl, FM_FMRI_MEM_MEMCONFIG, 381 &memconfig) == 0) && memconfig != mem.mem_memconfig) 382 return (0); 383 384 if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { 385 if (errno == ENOTSUP) 386 return (1); /* assume it's there, no s/n support here */ 387 if (errno != ENOENT) { 388 /* 389 * Errors are only signalled to the caller if they're 390 * the caller's fault. This isn't - it's a failure on 391 * our part to burst or read the serial numbers. We'll 392 * whine about it, and tell the caller the named 393 * module(s) isn't/aren't there. 394 */ 395 fmd_fmri_warn("failed to retrieve serial number for " 396 "unum %s", unum); 397 } 398 return (0); 399 } 400 401 rc = serids_eq(serids, nserids, nvlserids, nnvlserids); 402 403 mem_strarray_free(serids, nserids); 404 405 return (rc); 406 } 407 408 int 409 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee) 410 { 411 char *erunum, *eeunum; 412 uint64_t erpa = 0, eepa = 0; 413 414 if (mem_fmri_get_unum(er, &erunum) < 0 || 415 mem_fmri_get_unum(ee, &eeunum) < 0) 416 return (-1); /* errno is set for us */ 417 418 if (mem_unum_contains(erunum, eeunum) <= 0) 419 return (0); /* can't parse/match, so assume no containment */ 420 421 if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erpa) == 0) { 422 /* container has a PA; only match if containee has same PA */ 423 return (nvlist_lookup_uint64(ee, FM_FMRI_MEM_PHYSADDR, 424 &eepa) == 0 && erpa == eepa); 425 } 426 427 return (1); 428 } 429 430 int 431 fmd_fmri_unusable(nvlist_t *nvl) 432 { 433 uint64_t pageaddr; 434 uint8_t version; 435 int rc, err; 436 437 /* 438 * We can only make a usable/unusable determination for pages. FMRIs 439 * without page addresses will be reported as usable. 440 */ 441 442 if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || 443 version > FM_MEM_SCHEME_VERSION) 444 return (fmd_fmri_set_errno(EINVAL)); 445 446 if ((err = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 447 &pageaddr)) == ENOENT) 448 return (0); /* no page, so assume it's still usable */ 449 else if (err != 0) 450 return (fmd_fmri_set_errno(EINVAL)); 451 452 if ((rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvl)) < 0 && 453 errno == EIO) { 454 return (0); /* the page wonders, "why all the fuss?" */ 455 } else if (rc == 0 || errno == EAGAIN || errno == EINVAL) { 456 /* 457 * The page has been retired, is in the process of being 458 * retired, or doesn't exist. The latter is valid if the page 459 * existed in the past but has been DR'd out. 460 */ 461 return (1); 462 } else { 463 /* 464 * Errors are only signalled to the caller if they're the 465 * caller's fault. This isn't - it's a failure of the 466 * retirement-check code. We'll whine about it and tell 467 * the caller the page is unusable. 468 */ 469 fmd_fmri_warn("failed to determine usability of page %llx", 470 pageaddr); 471 return (1); 472 } 473 } 474 475 int 476 fmd_fmri_init(void) 477 { 478 bzero(&mem, sizeof (mem_t)); 479 return (mem_discover()); 480 } 481 482 void 483 fmd_fmri_fini(void) 484 { 485 mem_destroy(); 486 } 487