xref: /titanic_44/usr/src/cmd/fm/schemes/mem/mem.c (revision 2fcbc377041d659446ded306a92901b4b0753b68)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mem.h>
30 #include <fm/fmd_fmri.h>
31 
32 #include <fcntl.h>
33 #include <unistd.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <time.h>
37 #include <sys/mem.h>
38 
39 #ifdef	sparc
40 #include <sys/fm/ldom.h>
41 ldom_hdl_t *mem_scheme_lhp;
42 #endif	/* sparc */
43 
44 mem_t mem;
45 
46 #ifdef	sparc
47 
48 extern int mem_update_mdesc(void);
49 
50 /*
51  * Retry values for handling the case where the kernel is not yet ready
52  * to provide DIMM serial ids.  Some platforms acquire DIMM serial id
53  * information from their System Controller via a mailbox interface.
54  * The values chosen are for 10 retries 3 seconds apart to approximate the
55  * possible 30 second timeout length of a mailbox message request.
56  */
57 #define	MAX_MEM_SID_RETRIES	10
58 #define	MEM_SID_RETRY_WAIT	3
59 
60 static mem_dimm_map_t *
61 dm_lookup(const char *name)
62 {
63 	mem_dimm_map_t *dm;
64 
65 	for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
66 		if (strcmp(name, dm->dm_label) == 0)
67 			return (dm);
68 	}
69 
70 	return (NULL);
71 }
72 
73 /*
74  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
75  * the unum (or a component of same) wasn't found, -1 is returned with errno
76  * set to ENOENT.  If the kernel doesn't have support for serial numbers,
77  * -1 is returned with errno set to ENOTSUP.
78  */
79 static int
80 mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp)
81 {
82 	char **dimms, **serids;
83 	size_t ndimms, nserids;
84 	int i, rc = 0;
85 	int fd;
86 	int retries = MAX_MEM_SID_RETRIES;
87 	mem_name_t mn;
88 	struct timespec rqt;
89 
90 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
91 		return (-1);
92 
93 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0) {
94 		(void) close(fd);
95 		return (-1); /* errno is set for us */
96 	}
97 
98 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
99 	nserids = ndimms;
100 
101 	bzero(&mn, sizeof (mn));
102 
103 	for (i = 0; i < ndimms; i++) {
104 		mn.m_namelen = strlen(dimms[i]) + 1;
105 		mn.m_sidlen = MEM_SERID_MAXLEN;
106 
107 		mn.m_name = fmd_fmri_alloc(mn.m_namelen);
108 		mn.m_sid = fmd_fmri_alloc(mn.m_sidlen);
109 
110 		(void) strcpy(mn.m_name, dimms[i]);
111 
112 		do {
113 			rc = ioctl(fd, MEM_SID, &mn);
114 
115 			if (rc >= 0 || errno != EAGAIN)
116 				break;
117 
118 			if (retries == 0) {
119 				errno = ETIMEDOUT;
120 				break;
121 			}
122 
123 			/*
124 			 * EAGAIN indicates the kernel is
125 			 * not ready to provide DIMM serial
126 			 * ids.  Sleep MEM_SID_RETRY_WAIT seconds
127 			 * and try again.
128 			 * nanosleep() is used instead of sleep()
129 			 * to avoid interfering with fmd timers.
130 			 */
131 			rqt.tv_sec = MEM_SID_RETRY_WAIT;
132 			rqt.tv_nsec = 0;
133 			(void) nanosleep(&rqt, NULL);
134 
135 		} while (retries--);
136 
137 		if (rc < 0) {
138 			/*
139 			 * ENXIO can happen if the kernel memory driver
140 			 * doesn't have the MEM_SID ioctl (e.g. if the
141 			 * kernel hasn't been patched to provide the
142 			 * support).
143 			 *
144 			 * If the MEM_SID ioctl is available but the
145 			 * particular platform doesn't support providing
146 			 * serial ids, ENOTSUP will be returned by the ioctl.
147 			 */
148 			if (errno == ENXIO)
149 				errno = ENOTSUP;
150 			fmd_fmri_free(mn.m_name, mn.m_namelen);
151 			fmd_fmri_free(mn.m_sid, mn.m_sidlen);
152 			mem_strarray_free(serids, nserids);
153 			mem_strarray_free(dimms, ndimms);
154 			(void) close(fd);
155 			return (-1);
156 		}
157 
158 		serids[i] = fmd_fmri_strdup(mn.m_sid);
159 
160 		fmd_fmri_free(mn.m_name, mn.m_namelen);
161 		fmd_fmri_free(mn.m_sid, mn.m_sidlen);
162 	}
163 
164 	mem_strarray_free(dimms, ndimms);
165 
166 	(void) close(fd);
167 
168 	*seridsp = serids;
169 	*nseridsp = nserids;
170 
171 	return (0);
172 }
173 
174 /*
175  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
176  * the unum (or a component of same) wasn't found, -1 is returned with errno
177  * set to ENOENT.
178  */
179 static int
180 mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp)
181 {
182 	uint64_t drgen = fmd_fmri_get_drgen();
183 	char **dimms, **serids;
184 	size_t ndimms, nserids;
185 	mem_dimm_map_t *dm;
186 	int i, rc = 0;
187 
188 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
189 		return (-1); /* errno is set for us */
190 
191 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
192 	nserids = ndimms;
193 
194 	for (i = 0; i < ndimms; i++) {
195 		if ((dm = dm_lookup(dimms[i])) == NULL) {
196 			rc = fmd_fmri_set_errno(EINVAL);
197 			break;
198 		}
199 
200 		if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) {
201 			/*
202 			 * We don't have a cached copy, or the copy we've got is
203 			 * out of date.  Look it up again.
204 			 */
205 			if (mem_get_serid(dm->dm_device, dm->dm_serid,
206 			    sizeof (dm->dm_serid)) < 0) {
207 				rc = -1; /* errno is set for us */
208 				break;
209 			}
210 
211 			dm->dm_drgen = drgen;
212 		}
213 
214 		serids[i] = fmd_fmri_strdup(dm->dm_serid);
215 	}
216 
217 	mem_strarray_free(dimms, ndimms);
218 
219 	if (rc == 0) {
220 		*seridsp = serids;
221 		*nseridsp = nserids;
222 	} else {
223 		mem_strarray_free(serids, nserids);
224 	}
225 
226 	return (rc);
227 }
228 
229 /*
230  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
231  * the unum (or a component of same) wasn't found, -1 is returned with errno
232  * set to ENOENT.
233  */
234 static int
235 mem_get_serids_from_mdesc(const char *unum, char ***seridsp, size_t *nseridsp)
236 {
237 	uint64_t drgen = fmd_fmri_get_drgen();
238 	char **dimms, **serids;
239 	size_t ndimms, nserids;
240 	mem_dimm_map_t *dm;
241 	int i, rc = 0;
242 
243 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
244 		return (-1); /* errno is set for us */
245 
246 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
247 	nserids = ndimms;
248 
249 	/*
250 	 * first go through dimms and see if dm_drgen entries are outdated
251 	 */
252 	for (i = 0; i < ndimms; i++) {
253 		if ((dm = dm_lookup(dimms[i])) == NULL ||
254 		    dm->dm_drgen != drgen)
255 			break;
256 	}
257 
258 	if (i < ndimms && mem_update_mdesc() != 0) {
259 		mem_strarray_free(dimms, ndimms);
260 		return (-1);
261 	}
262 
263 	/*
264 	 * get to this point if an up-to-date mdesc (and corresponding
265 	 * entries in the global mem list) exists
266 	 */
267 	for (i = 0; i < ndimms; i++) {
268 		if ((dm = dm_lookup(dimms[i])) == NULL) {
269 			rc = fmd_fmri_set_errno(EINVAL);
270 			break;
271 		}
272 
273 		if (dm->dm_drgen != drgen)
274 			dm->dm_drgen = drgen;
275 
276 		/*
277 		 * mdesc and dm entry was updated by an earlier call to
278 		 * mem_update_mdesc, so we go ahead and dup the serid
279 		 */
280 		serids[i] = fmd_fmri_strdup(dm->dm_serid);
281 	}
282 
283 	mem_strarray_free(dimms, ndimms);
284 
285 	if (rc == 0) {
286 		*seridsp = serids;
287 		*nseridsp = nserids;
288 	} else {
289 		mem_strarray_free(serids, nserids);
290 	}
291 
292 	return (rc);
293 }
294 
295 #endif	/* sparc */
296 
297 /*ARGSUSED*/
298 static int
299 mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
300 {
301 	/*
302 	 * Some platforms do not support the caching of serial ids by the
303 	 * mem scheme plugin but instead support making serial ids available
304 	 * via the kernel.
305 	 */
306 #ifdef	sparc
307 	if (mem.mem_dm == NULL)
308 		return (mem_get_serids_from_kernel(unum, seridsp, nseridsp));
309 	else if (mem_get_serids_from_mdesc(unum, seridsp, nseridsp) == 0)
310 		return (0);
311 	else
312 		return (mem_get_serids_from_cache(unum, seridsp, nseridsp));
313 #else
314 	errno = ENOTSUP;
315 	return (-1);
316 #endif	/* sparc */
317 }
318 
319 static int
320 mem_fmri_get_unum(nvlist_t *nvl, char **unump)
321 {
322 	uint8_t version;
323 	char *unum;
324 
325 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
326 	    version > FM_MEM_SCHEME_VERSION ||
327 	    nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0)
328 		return (fmd_fmri_set_errno(EINVAL));
329 
330 	*unump = unum;
331 
332 	return (0);
333 }
334 
335 ssize_t
336 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
337 {
338 	char format[64];
339 	ssize_t size, presz;
340 	char *rawunum, *preunum, *escunum, *prefix;
341 	uint64_t val;
342 	int i;
343 
344 	if (mem_fmri_get_unum(nvl, &rawunum) < 0)
345 		return (-1); /* errno is set for us */
346 
347 	/*
348 	 * If we have a well-formed unum (hc-FMRI), use the string verbatim
349 	 * to form the initial mem:/// components.  Otherwise use unum=%s.
350 	 */
351 	if (strncmp(rawunum, "hc://", 5) != 0)
352 		prefix = FM_FMRI_MEM_UNUM "=";
353 	else
354 		prefix = "";
355 
356 	/*
357 	 * If we have a DIMM offset, include it in the string.  If we have a PA
358 	 * then use that.  Otherwise just format the unum element.
359 	 */
360 	if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0) {
361 		(void) snprintf(format, sizeof (format),
362 		    "%s:///%s%%1$s/%s=%%2$llx",
363 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_OFFSET);
364 	} else if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0) {
365 		(void) snprintf(format, sizeof (format),
366 		    "%s:///%s%%1$s/%s=%%2$llx",
367 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_PHYSADDR);
368 	} else {
369 		(void) snprintf(format, sizeof (format),
370 		    "%s:///%s%%1$s", FM_FMRI_SCHEME_MEM, prefix);
371 	}
372 
373 	/*
374 	 * If we have a well-formed unum (hc-FMRI), we skip over the
375 	 * the scheme and authority prefix.
376 	 * Otherwise, the spaces and colons will be escaped,
377 	 * rendering the resulting FMRI pretty much unreadable.
378 	 * We're therefore going to do some escaping of our own first.
379 	 */
380 	if (strncmp(rawunum, "hc://", 5) == 0) {
381 		rawunum += 5;
382 		rawunum = strchr(rawunum, '/');
383 		++rawunum;
384 		/* LINTED: variable format specifier */
385 		size = snprintf(buf, buflen, format, rawunum, val);
386 	} else {
387 		preunum = fmd_fmri_strdup(rawunum);
388 		presz = strlen(preunum) + 1;
389 
390 		for (i = 0; i < presz - 1; i++) {
391 			if (preunum[i] == ':' && preunum[i + 1] == ' ') {
392 				bcopy(preunum + i + 2, preunum + i + 1,
393 				    presz - (i + 2));
394 			} else if (preunum[i] == ' ') {
395 				preunum[i] = ',';
396 			}
397 		}
398 
399 		escunum = fmd_fmri_strescape(preunum);
400 		fmd_fmri_free(preunum, presz);
401 
402 		/* LINTED: variable format specifier */
403 		size = snprintf(buf, buflen, format, escunum, val);
404 		fmd_fmri_strfree(escunum);
405 	}
406 
407 	return (size);
408 }
409 
410 int
411 fmd_fmri_expand(nvlist_t *nvl)
412 {
413 	char *unum, **serids;
414 	uint_t nnvlserids;
415 	size_t nserids;
416 	int rc;
417 
418 	if (mem_fmri_get_unum(nvl, &unum) < 0)
419 		return (fmd_fmri_set_errno(EINVAL));
420 
421 	if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID,
422 	    &serids, &nnvlserids)) == 0)
423 		return (0); /* fmri is already expanded */
424 	else if (rc != ENOENT)
425 		return (fmd_fmri_set_errno(EINVAL));
426 
427 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
428 		/* errno is set for us */
429 		if (errno == ENOTSUP)
430 			return (0); /* nothing to add - no s/n support */
431 		else
432 			return (-1);
433 	}
434 
435 	rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids,
436 	    nserids);
437 
438 	mem_strarray_free(serids, nserids);
439 
440 	if (rc != 0)
441 		return (fmd_fmri_set_errno(EINVAL));
442 
443 	return (0);
444 }
445 
446 static int
447 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2)
448 {
449 	int i;
450 
451 	if (nserids1 != nserids2)
452 		return (0);
453 
454 	for (i = 0; i < nserids1; i++) {
455 		if (strcmp(serids1[i], serids2[i]) != 0)
456 			return (0);
457 	}
458 
459 	return (1);
460 }
461 
462 int
463 fmd_fmri_present(nvlist_t *nvl)
464 {
465 	char *unum, **nvlserids, **serids;
466 	uint_t nnvlserids;
467 	size_t nserids;
468 	uint64_t memconfig;
469 	int rc;
470 
471 	if (mem_fmri_get_unum(nvl, &unum) < 0)
472 		return (-1); /* errno is set for us */
473 
474 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
475 	    &nnvlserids) != 0) {
476 		/*
477 		 * Some mem scheme FMRIs don't have serial ids because
478 		 * either the platform does not support them, or because
479 		 * the FMRI was created before support for serial ids was
480 		 * introduced.  If this is the case, assume it is there.
481 		 */
482 		if (mem.mem_dm == NULL)
483 			return (1);
484 		else
485 			return (fmd_fmri_set_errno(EINVAL));
486 	}
487 
488 	/*
489 	 * Hypervisor will change the memconfig value when the mapping of
490 	 * pages to DIMMs changes, e.g. for change in DIMM size or interleave.
491 	 * If we detect such a change, we discard ereports associated with a
492 	 * previous memconfig value as invalid.
493 	 *
494 	 * The test (mem.mem_memconfig != 0) means we run on a system that
495 	 * actually suplies a memconfig value.
496 	 */
497 
498 	if ((nvlist_lookup_uint64(nvl, FM_FMRI_MEM_MEMCONFIG,
499 	    &memconfig) == 0) && (mem.mem_memconfig != 0) &&
500 	    (memconfig != mem.mem_memconfig))
501 		return (0);
502 
503 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
504 		if (errno == ENOTSUP)
505 			return (1); /* assume it's there, no s/n support here */
506 		if (errno != ENOENT) {
507 			/*
508 			 * Errors are only signalled to the caller if they're
509 			 * the caller's fault.  This isn't - it's a failure on
510 			 * our part to burst or read the serial numbers.  We'll
511 			 * whine about it, and tell the caller the named
512 			 * module(s) isn't/aren't there.
513 			 */
514 			fmd_fmri_warn("failed to retrieve serial number for "
515 			    "unum %s", unum);
516 		}
517 		return (0);
518 	}
519 
520 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids);
521 
522 	mem_strarray_free(serids, nserids);
523 
524 	return (rc);
525 }
526 
527 int
528 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee)
529 {
530 	char *erunum, *eeunum;
531 	uint64_t erval = 0, eeval = 0;
532 
533 	if (mem_fmri_get_unum(er, &erunum) < 0 ||
534 	    mem_fmri_get_unum(ee, &eeunum) < 0)
535 		return (-1); /* errno is set for us */
536 
537 	if (mem_unum_contains(erunum, eeunum) <= 0)
538 		return (0); /* can't parse/match, so assume no containment */
539 
540 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_OFFSET, &erval) == 0) {
541 		return (nvlist_lookup_uint64(ee,
542 		    FM_FMRI_MEM_OFFSET, &eeval) == 0 && erval == eeval);
543 	}
544 
545 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erval) == 0) {
546 		return (nvlist_lookup_uint64(ee,
547 		    FM_FMRI_MEM_PHYSADDR, &eeval) == 0 && erval == eeval);
548 	}
549 
550 	return (1);
551 }
552 
553 /*
554  * We can only make a usable/unusable determination for pages.  Mem FMRIs
555  * without page addresses will be reported as usable since Solaris has no
556  * way at present to dynamically disable an entire DIMM or DIMM pair.
557  */
558 int
559 fmd_fmri_unusable(nvlist_t *nvl)
560 {
561 	uint64_t val;
562 	uint8_t version;
563 	int rc, err1, err2;
564 	nvlist_t *nvlcp = NULL;
565 	int retval;
566 
567 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
568 	    version > FM_MEM_SCHEME_VERSION)
569 		return (fmd_fmri_set_errno(EINVAL));
570 
571 	err1 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val);
572 	err2 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val);
573 
574 	if (err1 == ENOENT && err2 == ENOENT)
575 		return (0); /* no page, so assume it's still usable */
576 
577 	if ((err1 != 0 && err1 != ENOENT) || (err2 != 0 && err2 != ENOENT))
578 		return (fmd_fmri_set_errno(EINVAL));
579 
580 	if ((err1 = mem_unum_rewrite(nvl, &nvlcp)) != 0)
581 		return (fmd_fmri_set_errno(err1));
582 
583 	/*
584 	 * Ask the kernel if the page is retired, using either the rewritten
585 	 * hc FMRI or the original mem FMRI with the specified offset or PA.
586 	 * Refer to the kernel's page_retire_check() for the error codes.
587 	 */
588 	rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp ? nvlcp : nvl);
589 
590 	if (rc == -1 && errno == EIO) {
591 		/*
592 		 * The page is not retired and is not scheduled for retirement
593 		 * (i.e. no request pending and has not seen any errors)
594 		 */
595 		retval = 0;
596 	} else if (rc == 0 || errno == EAGAIN || errno == EINVAL) {
597 		/*
598 		 * The page has been retired, is in the process of being
599 		 * retired, or doesn't exist.  The latter is valid if the page
600 		 * existed in the past but has been DR'd out.
601 		 */
602 		retval = 1;
603 	} else {
604 		/*
605 		 * Errors are only signalled to the caller if they're the
606 		 * caller's fault.  This isn't - it's a failure of the
607 		 * retirement-check code.  We'll whine about it and tell
608 		 * the caller the page is unusable.
609 		 */
610 		fmd_fmri_warn("failed to determine page %s=%llx usability: "
611 		    "rc=%d errno=%d\n", err1 == 0 ? FM_FMRI_MEM_OFFSET :
612 		    FM_FMRI_MEM_PHYSADDR, (u_longlong_t)val, rc, errno);
613 		retval = 1;
614 	}
615 
616 	if (nvlcp)
617 		nvlist_free(nvlcp);
618 
619 	return (retval);
620 }
621 
622 int
623 fmd_fmri_init(void)
624 {
625 #ifdef	sparc
626 	mem_scheme_lhp = ldom_init(fmd_fmri_alloc, fmd_fmri_free);
627 #endif	/* sparc */
628 	return (mem_discover());
629 }
630 
631 void
632 fmd_fmri_fini(void)
633 {
634 	mem_dimm_map_t *dm, *em;
635 
636 	for (dm = mem.mem_dm; dm != NULL; dm = em) {
637 		em = dm->dm_next;
638 		fmd_fmri_strfree(dm->dm_label);
639 		fmd_fmri_strfree(dm->dm_device);
640 		fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
641 	}
642 #ifdef	sparc
643 	ldom_fini(mem_scheme_lhp);
644 #endif	/* sparc */
645 }
646