xref: /titanic_41/usr/src/cmd/fm/schemes/mem/mem.c (revision 60405de4d8688d96dd05157c28db3ade5c9bc234)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mem.h>
30 #include <fm/fmd_fmri.h>
31 
32 #include <fcntl.h>
33 #include <unistd.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <time.h>
37 #include <sys/mem.h>
38 
39 #ifdef	sparc
40 #include <sys/fm/ldom.h>
41 ldom_hdl_t *mem_scheme_lhp;
42 #endif	/* sparc */
43 
44 mem_t mem;
45 
46 #ifdef	sparc
47 
48 extern int mem_update_mdesc(void);
49 
50 /*
51  * Retry values for handling the case where the kernel is not yet ready
52  * to provide DIMM serial ids.  Some platforms acquire DIMM serial id
53  * information from their System Controller via a mailbox interface.
54  * The values chosen are for 10 retries 3 seconds apart to approximate the
55  * possible 30 second timeout length of a mailbox message request.
56  */
57 #define	MAX_MEM_SID_RETRIES	10
58 #define	MEM_SID_RETRY_WAIT	3
59 
60 static mem_dimm_map_t *
61 dm_lookup(const char *name)
62 {
63 	mem_dimm_map_t *dm;
64 
65 	for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
66 		if (strcmp(name, dm->dm_label) == 0)
67 			return (dm);
68 	}
69 
70 	return (NULL);
71 }
72 
73 /*
74  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
75  * the unum (or a component of same) wasn't found, -1 is returned with errno
76  * set to ENOENT.  If the kernel doesn't have support for serial numbers,
77  * -1 is returned with errno set to ENOTSUP.
78  */
79 static int
80 mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp)
81 {
82 	char **dimms, **serids;
83 	size_t ndimms, nserids;
84 	int i, rc = 0;
85 	int fd;
86 	int retries = MAX_MEM_SID_RETRIES;
87 	mem_name_t mn;
88 	struct timespec rqt;
89 
90 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
91 		return (-1);
92 
93 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0) {
94 		(void) close(fd);
95 		return (-1); /* errno is set for us */
96 	}
97 
98 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
99 	nserids = ndimms;
100 
101 	bzero(&mn, sizeof (mn));
102 
103 	for (i = 0; i < ndimms; i++) {
104 		mn.m_namelen = strlen(dimms[i]) + 1;
105 		mn.m_sidlen = MEM_SERID_MAXLEN;
106 
107 		mn.m_name = fmd_fmri_alloc(mn.m_namelen);
108 		mn.m_sid = fmd_fmri_alloc(mn.m_sidlen);
109 
110 		(void) strcpy(mn.m_name, dimms[i]);
111 
112 		do {
113 			rc = ioctl(fd, MEM_SID, &mn);
114 
115 			if (rc >= 0 || errno != EAGAIN)
116 				break;
117 
118 			if (retries == 0) {
119 				errno = ETIMEDOUT;
120 				break;
121 			}
122 
123 			/*
124 			 * EAGAIN indicates the kernel is
125 			 * not ready to provide DIMM serial
126 			 * ids.  Sleep MEM_SID_RETRY_WAIT seconds
127 			 * and try again.
128 			 * nanosleep() is used instead of sleep()
129 			 * to avoid interfering with fmd timers.
130 			 */
131 			rqt.tv_sec = MEM_SID_RETRY_WAIT;
132 			rqt.tv_nsec = 0;
133 			(void) nanosleep(&rqt, NULL);
134 
135 		} while (retries--);
136 
137 		if (rc < 0) {
138 			/*
139 			 * ENXIO can happen if the kernel memory driver
140 			 * doesn't have the MEM_SID ioctl (e.g. if the
141 			 * kernel hasn't been patched to provide the
142 			 * support).
143 			 *
144 			 * If the MEM_SID ioctl is available but the
145 			 * particular platform doesn't support providing
146 			 * serial ids, ENOTSUP will be returned by the ioctl.
147 			 */
148 			if (errno == ENXIO)
149 				errno = ENOTSUP;
150 			fmd_fmri_free(mn.m_name, mn.m_namelen);
151 			fmd_fmri_free(mn.m_sid, mn.m_sidlen);
152 			mem_strarray_free(serids, nserids);
153 			mem_strarray_free(dimms, ndimms);
154 			(void) close(fd);
155 			return (-1);
156 		}
157 
158 		serids[i] = fmd_fmri_strdup(mn.m_sid);
159 
160 		fmd_fmri_free(mn.m_name, mn.m_namelen);
161 		fmd_fmri_free(mn.m_sid, mn.m_sidlen);
162 	}
163 
164 	mem_strarray_free(dimms, ndimms);
165 
166 	(void) close(fd);
167 
168 	*seridsp = serids;
169 	*nseridsp = nserids;
170 
171 	return (0);
172 }
173 
174 /*
175  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
176  * the unum (or a component of same) wasn't found, -1 is returned with errno
177  * set to ENOENT.
178  */
179 static int
180 mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp)
181 {
182 	uint64_t drgen = fmd_fmri_get_drgen();
183 	char **dimms, **serids;
184 	size_t ndimms, nserids;
185 	mem_dimm_map_t *dm;
186 	int i, rc = 0;
187 
188 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
189 		return (-1); /* errno is set for us */
190 
191 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
192 	nserids = ndimms;
193 
194 	for (i = 0; i < ndimms; i++) {
195 		if ((dm = dm_lookup(dimms[i])) == NULL) {
196 			rc = fmd_fmri_set_errno(EINVAL);
197 			break;
198 		}
199 
200 		if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) {
201 			/*
202 			 * We don't have a cached copy, or the copy we've got is
203 			 * out of date.  Look it up again.
204 			 */
205 			if (mem_get_serid(dm->dm_device, dm->dm_serid,
206 			    sizeof (dm->dm_serid)) < 0) {
207 				rc = -1; /* errno is set for us */
208 				break;
209 			}
210 
211 			dm->dm_drgen = drgen;
212 		}
213 
214 		serids[i] = fmd_fmri_strdup(dm->dm_serid);
215 	}
216 
217 	mem_strarray_free(dimms, ndimms);
218 
219 	if (rc == 0) {
220 		*seridsp = serids;
221 		*nseridsp = nserids;
222 	} else {
223 		mem_strarray_free(serids, nserids);
224 	}
225 
226 	return (rc);
227 }
228 
229 /*
230  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
231  * the unum (or a component of same) wasn't found, -1 is returned with errno
232  * set to ENOENT.
233  */
234 static int
235 mem_get_serids_from_mdesc(const char *unum, char ***seridsp, size_t *nseridsp)
236 {
237 	uint64_t drgen = fmd_fmri_get_drgen();
238 	char **dimms, **serids;
239 	size_t ndimms, nserids;
240 	mem_dimm_map_t *dm;
241 	int i, rc = 0;
242 
243 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
244 		return (-1); /* errno is set for us */
245 
246 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
247 	nserids = ndimms;
248 
249 	/*
250 	 * first go through dimms and see if dm_drgen entries are outdated
251 	 */
252 	for (i = 0; i < ndimms; i++) {
253 		if ((dm = dm_lookup(dimms[i])) == NULL ||
254 		    dm->dm_drgen != drgen)
255 			break;
256 	}
257 
258 	if (i < ndimms && mem_update_mdesc() != 0) {
259 		mem_strarray_free(dimms, ndimms);
260 		return (-1);
261 	}
262 
263 	/*
264 	 * get to this point if an up-to-date mdesc (and corresponding
265 	 * entries in the global mem list) exists
266 	 */
267 	for (i = 0; i < ndimms; i++) {
268 		if ((dm = dm_lookup(dimms[i])) == NULL) {
269 			rc = fmd_fmri_set_errno(EINVAL);
270 			break;
271 		}
272 
273 		if (dm->dm_drgen != drgen)
274 			dm->dm_drgen = drgen;
275 
276 		/*
277 		 * mdesc and dm entry was updated by an earlier call to
278 		 * mem_update_mdesc, so we go ahead and dup the serid
279 		 */
280 		serids[i] = fmd_fmri_strdup(dm->dm_serid);
281 	}
282 
283 	mem_strarray_free(dimms, ndimms);
284 
285 	if (rc == 0) {
286 		*seridsp = serids;
287 		*nseridsp = nserids;
288 	} else {
289 		mem_strarray_free(serids, nserids);
290 	}
291 
292 	return (rc);
293 }
294 
295 #endif	/* sparc */
296 
297 /*ARGSUSED*/
298 static int
299 mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
300 {
301 	/*
302 	 * Some platforms do not support the caching of serial ids by the
303 	 * mem scheme plugin but instead support making serial ids available
304 	 * via the kernel.
305 	 */
306 #ifdef	sparc
307 	if (mem.mem_dm == NULL)
308 		return (mem_get_serids_from_kernel(unum, seridsp, nseridsp));
309 	else if (mem_get_serids_from_mdesc(unum, seridsp, nseridsp) == 0)
310 		return (0);
311 	else
312 		return (mem_get_serids_from_cache(unum, seridsp, nseridsp));
313 #else
314 	errno = ENOTSUP;
315 	return (-1);
316 #endif	/* sparc */
317 }
318 
319 static int
320 mem_fmri_get_unum(nvlist_t *nvl, char **unump)
321 {
322 	uint8_t version;
323 	char *unum;
324 
325 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
326 	    version > FM_MEM_SCHEME_VERSION ||
327 	    nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0)
328 		return (fmd_fmri_set_errno(EINVAL));
329 
330 	*unump = unum;
331 
332 	return (0);
333 }
334 
335 ssize_t
336 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
337 {
338 	char format[64];
339 	ssize_t size, presz;
340 	char *rawunum, *preunum, *escunum, *prefix;
341 	uint64_t val;
342 	int i;
343 
344 	if (mem_fmri_get_unum(nvl, &rawunum) < 0)
345 		return (-1); /* errno is set for us */
346 
347 	/*
348 	 * If we have a well-formed unum (hc-FMRI), use the string verbatim
349 	 * to form the initial mem:/// components.  Otherwise use unum=%s.
350 	 */
351 	if (strncmp(rawunum, "hc:///", 6) != 0)
352 		prefix = FM_FMRI_MEM_UNUM "=";
353 	else
354 		prefix = "";
355 
356 	/*
357 	 * If we have a DIMM offset, include it in the string.  If we have a PA
358 	 * then use that.  Otherwise just format the unum element.
359 	 */
360 	if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0) {
361 		(void) snprintf(format, sizeof (format),
362 		    "%s:///%s%%1$s/%s=%%2$llx",
363 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_OFFSET);
364 	} else if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0) {
365 		(void) snprintf(format, sizeof (format),
366 		    "%s:///%s%%1$s/%s=%%2$llx",
367 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_PHYSADDR);
368 	} else {
369 		(void) snprintf(format, sizeof (format),
370 		    "%s:///%s%%1$s", FM_FMRI_SCHEME_MEM, prefix);
371 	}
372 
373 	/*
374 	 * If we have a well-formed unum (hc-FMRI), leave it as is.
375 	 * Otherwise, the spaces and colons will be escaped,
376 	 * rendering the resulting FMRI pretty much unreadable.
377 	 * We're therefore going to do some escaping of our own first.
378 	 */
379 	if (strncmp(rawunum, "hc:///", 6) == 0) {
380 		/* LINTED: variable format specifier */
381 		size = snprintf(buf, buflen, format, rawunum + 6, val);
382 	} else {
383 		preunum = fmd_fmri_strdup(rawunum);
384 		presz = strlen(preunum) + 1;
385 
386 		for (i = 0; i < presz - 1; i++) {
387 			if (preunum[i] == ':' && preunum[i + 1] == ' ') {
388 				bcopy(preunum + i + 2, preunum + i + 1,
389 				    presz - (i + 2));
390 			} else if (preunum[i] == ' ') {
391 				preunum[i] = ',';
392 			}
393 		}
394 
395 		escunum = fmd_fmri_strescape(preunum);
396 		fmd_fmri_free(preunum, presz);
397 
398 		/* LINTED: variable format specifier */
399 		size = snprintf(buf, buflen, format, escunum, val);
400 		fmd_fmri_strfree(escunum);
401 	}
402 
403 	return (size);
404 }
405 
406 int
407 fmd_fmri_expand(nvlist_t *nvl)
408 {
409 	char *unum, **serids;
410 	uint_t nnvlserids;
411 	size_t nserids;
412 	int rc;
413 
414 	if (mem_fmri_get_unum(nvl, &unum) < 0)
415 		return (fmd_fmri_set_errno(EINVAL));
416 
417 	if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID,
418 	    &serids, &nnvlserids)) == 0)
419 		return (0); /* fmri is already expanded */
420 	else if (rc != ENOENT)
421 		return (fmd_fmri_set_errno(EINVAL));
422 
423 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
424 		/* errno is set for us */
425 		if (errno == ENOTSUP)
426 			return (0); /* nothing to add - no s/n support */
427 		else
428 			return (-1);
429 	}
430 
431 	rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids,
432 	    nserids);
433 
434 	mem_strarray_free(serids, nserids);
435 
436 	if (rc != 0)
437 		return (fmd_fmri_set_errno(EINVAL));
438 
439 	return (0);
440 }
441 
442 static int
443 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2)
444 {
445 	int i;
446 
447 	if (nserids1 != nserids2)
448 		return (0);
449 
450 	for (i = 0; i < nserids1; i++) {
451 		if (strcmp(serids1[i], serids2[i]) != 0)
452 			return (0);
453 	}
454 
455 	return (1);
456 }
457 
458 int
459 fmd_fmri_present(nvlist_t *nvl)
460 {
461 	char *unum, **nvlserids, **serids;
462 	uint_t nnvlserids;
463 	size_t nserids;
464 	uint64_t memconfig;
465 	int rc;
466 
467 	if (mem_fmri_get_unum(nvl, &unum) < 0)
468 		return (-1); /* errno is set for us */
469 
470 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
471 	    &nnvlserids) != 0) {
472 		/*
473 		 * Some mem scheme FMRIs don't have serial ids because
474 		 * either the platform does not support them, or because
475 		 * the FMRI was created before support for serial ids was
476 		 * introduced.  If this is the case, assume it is there.
477 		 */
478 		if (mem.mem_dm == NULL)
479 			return (1);
480 		else
481 			return (fmd_fmri_set_errno(EINVAL));
482 	}
483 
484 	/*
485 	 * Hypervisor will change the memconfig value when the mapping of
486 	 * pages to DIMMs changes, e.g. for change in DIMM size or interleave.
487 	 * If we detect such a change, we discard ereports associated with a
488 	 * previous memconfig value as invalid.
489 	 *
490 	 * The test (mem.mem_memconfig != 0) means we run on a system that
491 	 * actually suplies a memconfig value.
492 	 */
493 
494 	if ((nvlist_lookup_uint64(nvl, FM_FMRI_MEM_MEMCONFIG,
495 	    &memconfig) == 0) && (mem.mem_memconfig != 0) &&
496 	    (memconfig != mem.mem_memconfig))
497 		return (0);
498 
499 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
500 		if (errno == ENOTSUP)
501 			return (1); /* assume it's there, no s/n support here */
502 		if (errno != ENOENT) {
503 			/*
504 			 * Errors are only signalled to the caller if they're
505 			 * the caller's fault.  This isn't - it's a failure on
506 			 * our part to burst or read the serial numbers.  We'll
507 			 * whine about it, and tell the caller the named
508 			 * module(s) isn't/aren't there.
509 			 */
510 			fmd_fmri_warn("failed to retrieve serial number for "
511 			    "unum %s", unum);
512 		}
513 		return (0);
514 	}
515 
516 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids);
517 
518 	mem_strarray_free(serids, nserids);
519 
520 	return (rc);
521 }
522 
523 int
524 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee)
525 {
526 	char *erunum, *eeunum;
527 	uint64_t erval = 0, eeval = 0;
528 
529 	if (mem_fmri_get_unum(er, &erunum) < 0 ||
530 	    mem_fmri_get_unum(ee, &eeunum) < 0)
531 		return (-1); /* errno is set for us */
532 
533 	if (mem_unum_contains(erunum, eeunum) <= 0)
534 		return (0); /* can't parse/match, so assume no containment */
535 
536 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_OFFSET, &erval) == 0) {
537 		return (nvlist_lookup_uint64(ee,
538 		    FM_FMRI_MEM_OFFSET, &eeval) == 0 && erval == eeval);
539 	}
540 
541 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erval) == 0) {
542 		return (nvlist_lookup_uint64(ee,
543 		    FM_FMRI_MEM_PHYSADDR, &eeval) == 0 && erval == eeval);
544 	}
545 
546 	return (1);
547 }
548 
549 /*
550  * We can only make a usable/unusable determination for pages.  Mem FMRIs
551  * without page addresses will be reported as usable since Solaris has no
552  * way at present to dynamically disable an entire DIMM or DIMM pair.
553  */
554 int
555 fmd_fmri_unusable(nvlist_t *nvl)
556 {
557 	uint64_t val;
558 	uint8_t version;
559 	int rc, err1, err2;
560 	nvlist_t *nvlcp = NULL;
561 	int retval;
562 
563 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
564 	    version > FM_MEM_SCHEME_VERSION)
565 		return (fmd_fmri_set_errno(EINVAL));
566 
567 	err1 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val);
568 	err2 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val);
569 
570 	if (err1 == ENOENT && err2 == ENOENT)
571 		return (0); /* no page, so assume it's still usable */
572 
573 	if ((err1 != 0 && err1 != ENOENT) || (err2 != 0 && err2 != ENOENT))
574 		return (fmd_fmri_set_errno(EINVAL));
575 
576 	if ((err1 = mem_unum_rewrite(nvl, &nvlcp)) != 0)
577 		return (fmd_fmri_set_errno(err1));
578 
579 	/*
580 	 * Ask the kernel if the page is retired, using either the rewritten
581 	 * hc FMRI or the original mem FMRI with the specified offset or PA.
582 	 * Refer to the kernel's page_retire_check() for the error codes.
583 	 */
584 	rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp ? nvlcp : nvl);
585 
586 	if (rc == -1 && errno == EIO) {
587 		/*
588 		 * The page is not retired and is not scheduled for retirement
589 		 * (i.e. no request pending and has not seen any errors)
590 		 */
591 		retval = 0;
592 	} else if (rc == 0 || errno == EAGAIN || errno == EINVAL) {
593 		/*
594 		 * The page has been retired, is in the process of being
595 		 * retired, or doesn't exist.  The latter is valid if the page
596 		 * existed in the past but has been DR'd out.
597 		 */
598 		retval = 1;
599 	} else {
600 		/*
601 		 * Errors are only signalled to the caller if they're the
602 		 * caller's fault.  This isn't - it's a failure of the
603 		 * retirement-check code.  We'll whine about it and tell
604 		 * the caller the page is unusable.
605 		 */
606 		fmd_fmri_warn("failed to determine page %s=%llx usability: "
607 		    "rc=%d errno=%d\n", err1 == 0 ? FM_FMRI_MEM_OFFSET :
608 		    FM_FMRI_MEM_PHYSADDR, (u_longlong_t)val, rc, errno);
609 		retval = 1;
610 	}
611 
612 	if (nvlcp)
613 		nvlist_free(nvlcp);
614 
615 	return (retval);
616 }
617 
618 int
619 fmd_fmri_init(void)
620 {
621 #ifdef	sparc
622 	mem_scheme_lhp = ldom_init(fmd_fmri_alloc, fmd_fmri_free);
623 #endif	/* sparc */
624 	return (mem_discover());
625 }
626 
627 void
628 fmd_fmri_fini(void)
629 {
630 	mem_dimm_map_t *dm, *em;
631 
632 	for (dm = mem.mem_dm; dm != NULL; dm = em) {
633 		em = dm->dm_next;
634 		fmd_fmri_strfree(dm->dm_label);
635 		fmd_fmri_strfree(dm->dm_device);
636 		fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
637 	}
638 #ifdef	sparc
639 	ldom_fini(mem_scheme_lhp);
640 #endif	/* sparc */
641 }
642