1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <mem.h> 30 #include <fm/fmd_fmri.h> 31 32 #include <fcntl.h> 33 #include <unistd.h> 34 #include <string.h> 35 #include <strings.h> 36 #include <time.h> 37 #include <sys/mem.h> 38 39 /* 40 * The scheme plugin for mem FMRIs. 41 */ 42 43 mem_t mem; 44 45 /* 46 * Retry values for handling the case where the kernel is not yet ready 47 * to provide DIMM serial ids. Some platforms acquire DIMM serial id 48 * information from their System Controller via a mailbox interface. 49 * The values chosen are for 10 retries 3 seconds apart to approximate the 50 * possible 30 second timeout length of a mailbox message request. 51 */ 52 #define MAX_MEM_SID_RETRIES 10 53 #define MEM_SID_RETRY_WAIT 3 54 55 static mem_dimm_map_t * 56 dm_lookup(const char *name) 57 { 58 mem_dimm_map_t *dm; 59 60 for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) { 61 if (strcmp(name, dm->dm_label) == 0) 62 return (dm); 63 } 64 65 return (NULL); 66 } 67 68 /* 69 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 70 * the unum (or a component of same) wasn't found, -1 is returned with errno 71 * set to ENOENT. If the kernel doesn't have support for serial numbers, 72 * -1 is returned with errno set to ENOTSUP. 73 */ 74 static int 75 mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp) 76 { 77 char **dimms, **serids; 78 size_t ndimms, nserids; 79 int i, rc = 0; 80 int fd; 81 int retries = MAX_MEM_SID_RETRIES; 82 mem_name_t mn; 83 struct timespec rqt; 84 85 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 86 return (-1); 87 88 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) { 89 (void) close(fd); 90 return (-1); /* errno is set for us */ 91 } 92 93 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 94 nserids = ndimms; 95 96 bzero(&mn, sizeof (mn)); 97 98 for (i = 0; i < ndimms; i++) { 99 mn.m_namelen = strlen(dimms[i]) + 1; 100 mn.m_sidlen = MEM_SERID_MAXLEN; 101 102 mn.m_name = fmd_fmri_alloc(mn.m_namelen); 103 mn.m_sid = fmd_fmri_alloc(mn.m_sidlen); 104 105 (void) strcpy(mn.m_name, dimms[i]); 106 107 do { 108 rc = ioctl(fd, MEM_SID, &mn); 109 110 if (rc >= 0 || errno != EAGAIN) 111 break; 112 113 if (retries == 0) { 114 errno = ETIMEDOUT; 115 break; 116 } 117 118 /* 119 * EAGAIN indicates the kernel is 120 * not ready to provide DIMM serial 121 * ids. Sleep MEM_SID_RETRY_WAIT seconds 122 * and try again. 123 * nanosleep() is used instead of sleep() 124 * to avoid interfering with fmd timers. 125 */ 126 rqt.tv_sec = MEM_SID_RETRY_WAIT; 127 rqt.tv_nsec = 0; 128 (void) nanosleep(&rqt, NULL); 129 130 } while (retries--); 131 132 if (rc < 0) { 133 /* 134 * ENXIO can happen if the kernel memory driver 135 * doesn't have the MEM_SID ioctl (e.g. if the 136 * kernel hasn't been patched to provide the 137 * support). 138 * 139 * If the MEM_SID ioctl is available but the 140 * particular platform doesn't support providing 141 * serial ids, ENOTSUP will be returned by the ioctl. 142 */ 143 if (errno == ENXIO) 144 errno = ENOTSUP; 145 fmd_fmri_free(mn.m_name, mn.m_namelen); 146 fmd_fmri_free(mn.m_sid, mn.m_sidlen); 147 mem_strarray_free(serids, nserids); 148 mem_strarray_free(dimms, ndimms); 149 (void) close(fd); 150 return (-1); 151 } 152 153 serids[i] = fmd_fmri_strdup(mn.m_sid); 154 155 fmd_fmri_free(mn.m_name, mn.m_namelen); 156 fmd_fmri_free(mn.m_sid, mn.m_sidlen); 157 } 158 159 mem_strarray_free(dimms, ndimms); 160 161 (void) close(fd); 162 163 *seridsp = serids; 164 *nseridsp = nserids; 165 166 return (0); 167 } 168 169 /* 170 * Returns 0 with serial numbers if found, -1 (with errno set) for errors. If 171 * the unum (or a component of same) wasn't found, -1 is returned with errno 172 * set to ENOENT. 173 */ 174 static int 175 mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp) 176 { 177 uint64_t drgen = fmd_fmri_get_drgen(); 178 char **dimms, **serids; 179 size_t ndimms, nserids; 180 mem_dimm_map_t *dm; 181 int i, rc = 0; 182 183 if (mem_unum_burst(unum, &dimms, &ndimms) < 0) 184 return (-1); /* errno is set for us */ 185 186 serids = fmd_fmri_zalloc(sizeof (char *) * ndimms); 187 nserids = ndimms; 188 189 for (i = 0; i < ndimms; i++) { 190 if ((dm = dm_lookup(dimms[i])) == NULL) { 191 rc = fmd_fmri_set_errno(EINVAL); 192 break; 193 } 194 195 if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) { 196 /* 197 * We don't have a cached copy, or the copy we've got is 198 * out of date. Look it up again. 199 */ 200 if (mem_get_serid(dm->dm_device, dm->dm_serid, 201 sizeof (dm->dm_serid)) < 0) { 202 rc = -1; /* errno is set for us */ 203 break; 204 } 205 206 dm->dm_drgen = drgen; 207 } 208 209 serids[i] = fmd_fmri_strdup(dm->dm_serid); 210 } 211 212 mem_strarray_free(dimms, ndimms); 213 214 if (rc == 0) { 215 *seridsp = serids; 216 *nseridsp = nserids; 217 } else { 218 mem_strarray_free(serids, nserids); 219 } 220 221 return (rc); 222 } 223 224 static int 225 mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp) 226 { 227 /* 228 * Some platforms do not support the caching of serial ids by the 229 * mem scheme plugin but instead support making serial ids available 230 * via the kernel. 231 */ 232 if (mem.mem_dm == NULL) 233 return (mem_get_serids_from_kernel(unum, seridsp, nseridsp)); 234 else 235 return (mem_get_serids_from_cache(unum, seridsp, nseridsp)); 236 } 237 238 static int 239 mem_fmri_get_unum(nvlist_t *nvl, char **unump) 240 { 241 uint8_t version; 242 char *unum; 243 244 if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || 245 version > FM_MEM_SCHEME_VERSION || 246 nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0) 247 return (fmd_fmri_set_errno(EINVAL)); 248 249 *unump = unum; 250 251 return (0); 252 } 253 254 ssize_t 255 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen) 256 { 257 const char *fmt = "mem:///component=%1$s"; 258 ssize_t size, presz; 259 uint64_t pa; 260 char *rawunum, *preunum, *escunum; 261 int i; 262 263 if (mem_fmri_get_unum(nvl, &rawunum) < 0) 264 return (-1); /* errno is set for us */ 265 266 if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &pa) == 0) 267 fmt = "mem:///pa=%2$llx/component=%1$s"; 268 269 /* 270 * If we leave the unum as-is, the spaces and colons will be escaped, 271 * rendering the resulting FMRI pretty much unreadable. We're therefore 272 * going to do some escaping of our own first. 273 */ 274 preunum = fmd_fmri_strdup(rawunum); 275 presz = strlen(preunum) + 1; 276 277 for (i = 0; i < presz - 1; i++) { 278 if (preunum[i] == ':' && preunum[i + 1] == ' ') { 279 bcopy(preunum + i + 2, preunum + i + 1, 280 presz - (i + 2)); 281 } else if (preunum[i] == ' ') { 282 preunum[i] = ','; 283 } 284 } 285 286 escunum = fmd_fmri_strescape(preunum); 287 fmd_fmri_free(preunum, presz); 288 289 size = snprintf(buf, buflen, fmt, escunum, (u_longlong_t)pa); 290 fmd_fmri_strfree(escunum); 291 292 return (size); 293 } 294 295 int 296 fmd_fmri_expand(nvlist_t *nvl) 297 { 298 char *unum, **serids; 299 uint_t nserids; 300 int rc; 301 302 if (mem_fmri_get_unum(nvl, &unum) < 0) 303 return (fmd_fmri_set_errno(EINVAL)); 304 305 if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, 306 &serids, &nserids)) == 0) 307 return (0); /* fmri is already expanded */ 308 else if (rc != ENOENT) 309 return (fmd_fmri_set_errno(EINVAL)); 310 311 if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { 312 /* errno is set for us */ 313 if (errno == ENOTSUP) 314 return (0); /* nothing to add - no s/n support */ 315 else 316 return (-1); 317 } 318 319 rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids, 320 nserids); 321 322 mem_strarray_free(serids, nserids); 323 324 if (rc != 0) 325 return (fmd_fmri_set_errno(EINVAL)); 326 327 return (0); 328 } 329 330 static int 331 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2) 332 { 333 int i; 334 335 if (nserids1 != nserids2) 336 return (0); 337 338 for (i = 0; i < nserids1; i++) { 339 if (strcmp(serids1[i], serids2[i]) != 0) 340 return (0); 341 } 342 343 return (1); 344 } 345 346 int 347 fmd_fmri_present(nvlist_t *nvl) 348 { 349 char *unum, **nvlserids, **serids; 350 uint_t nnvlserids, nserids; 351 uint64_t memconfig; 352 int rc; 353 354 if (mem_fmri_get_unum(nvl, &unum) < 0) 355 return (-1); /* errno is set for us */ 356 357 if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids, 358 &nnvlserids) != 0) { 359 /* 360 * Some mem scheme FMRIs don't have serial ids because 361 * either the platform does not support them, or because 362 * the FMRI was created before support for serial ids was 363 * introduced. If this is the case, assume it is there. 364 */ 365 if (mem.mem_dm == NULL) 366 return (1); 367 else 368 return (fmd_fmri_set_errno(EINVAL)); 369 } 370 371 /* 372 * Hypervisor will change the memconfig value when the mapping of 373 * pages to DIMMs changes, e.g. for change in DIMM size or interleave. 374 * If we detect such a change, we discard ereports associated with a 375 * previous memconfig value as invalid. 376 */ 377 378 if ((nvlist_lookup_uint64(nvl, FM_FMRI_MEM_MEMCONFIG, 379 &memconfig) == 0) && memconfig != mem.mem_memconfig) 380 return (0); 381 382 if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) { 383 if (errno == ENOTSUP) 384 return (1); /* assume it's there, no s/n support here */ 385 if (errno != ENOENT) { 386 /* 387 * Errors are only signalled to the caller if they're 388 * the caller's fault. This isn't - it's a failure on 389 * our part to burst or read the serial numbers. We'll 390 * whine about it, and tell the caller the named 391 * module(s) isn't/aren't there. 392 */ 393 fmd_fmri_warn("failed to retrieve serial number for " 394 "unum %s", unum); 395 } 396 return (0); 397 } 398 399 rc = serids_eq(serids, nserids, nvlserids, nnvlserids); 400 401 mem_strarray_free(serids, nserids); 402 403 return (rc); 404 } 405 406 int 407 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee) 408 { 409 char *erunum, *eeunum; 410 uint64_t erpa = 0, eepa = 0; 411 412 if (mem_fmri_get_unum(er, &erunum) < 0 || 413 mem_fmri_get_unum(ee, &eeunum) < 0) 414 return (-1); /* errno is set for us */ 415 416 if (mem_unum_contains(erunum, eeunum) <= 0) 417 return (0); /* can't parse/match, so assume no containment */ 418 419 if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erpa) == 0) { 420 /* container has a PA; only match if containee has same PA */ 421 return (nvlist_lookup_uint64(ee, FM_FMRI_MEM_PHYSADDR, 422 &eepa) == 0 && erpa == eepa); 423 } 424 425 return (1); 426 } 427 428 int 429 fmd_fmri_unusable(nvlist_t *nvl) 430 { 431 uint64_t pageaddr; 432 uint8_t version; 433 int rc, err; 434 435 /* 436 * We can only make a usable/unusable determination for pages. FMRIs 437 * without page addresses will be reported as usable. 438 */ 439 440 if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 || 441 version > FM_MEM_SCHEME_VERSION) 442 return (fmd_fmri_set_errno(EINVAL)); 443 444 if ((err = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, 445 &pageaddr)) == ENOENT) 446 return (0); /* no page, so assume it's still usable */ 447 else if (err != 0) 448 return (fmd_fmri_set_errno(EINVAL)); 449 450 if ((rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvl)) < 0 && 451 errno == EIO) { 452 return (0); /* the page wonders, "why all the fuss?" */ 453 } else if (rc == 0 || errno == EAGAIN || errno == EINVAL) { 454 /* 455 * The page has been retired, is in the process of being 456 * retired, or doesn't exist. The latter is valid if the page 457 * existed in the past but has been DR'd out. 458 */ 459 return (1); 460 } else { 461 /* 462 * Errors are only signalled to the caller if they're the 463 * caller's fault. This isn't - it's a failure of the 464 * retirement-check code. We'll whine about it and tell 465 * the caller the page is unusable. 466 */ 467 fmd_fmri_warn("failed to determine usability of page %llx", 468 pageaddr); 469 return (1); 470 } 471 } 472 473 int 474 fmd_fmri_init(void) 475 { 476 bzero(&mem, sizeof (mem_t)); 477 return (mem_discover()); 478 } 479 480 void 481 fmd_fmri_fini(void) 482 { 483 mem_destroy(); 484 } 485