xref: /titanic_44/usr/src/cmd/fm/schemes/mem/mem.c (revision 5e1c24c3b8bea565f7bfcd11a154db168c5d2643)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mem.h>
30 #include <fm/fmd_fmri.h>
31 
32 #include <fcntl.h>
33 #include <unistd.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <time.h>
37 #include <sys/mem.h>
38 
39 mem_t mem;
40 
41 #ifdef	sparc
42 /*
43  * Retry values for handling the case where the kernel is not yet ready
44  * to provide DIMM serial ids.  Some platforms acquire DIMM serial id
45  * information from their System Controller via a mailbox interface.
46  * The values chosen are for 10 retries 3 seconds apart to approximate the
47  * possible 30 second timeout length of a mailbox message request.
48  */
49 #define	MAX_MEM_SID_RETRIES	10
50 #define	MEM_SID_RETRY_WAIT	3
51 
52 static mem_dimm_map_t *
53 dm_lookup(const char *name)
54 {
55 	mem_dimm_map_t *dm;
56 
57 	for (dm = mem.mem_dm; dm != NULL; dm = dm->dm_next) {
58 		if (strcmp(name, dm->dm_label) == 0)
59 			return (dm);
60 	}
61 
62 	return (NULL);
63 }
64 
65 /*
66  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
67  * the unum (or a component of same) wasn't found, -1 is returned with errno
68  * set to ENOENT.  If the kernel doesn't have support for serial numbers,
69  * -1 is returned with errno set to ENOTSUP.
70  */
71 static int
72 mem_get_serids_from_kernel(const char *unum, char ***seridsp, size_t *nseridsp)
73 {
74 	char **dimms, **serids;
75 	size_t ndimms, nserids;
76 	int i, rc = 0;
77 	int fd;
78 	int retries = MAX_MEM_SID_RETRIES;
79 	mem_name_t mn;
80 	struct timespec rqt;
81 
82 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
83 		return (-1);
84 
85 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0) {
86 		(void) close(fd);
87 		return (-1); /* errno is set for us */
88 	}
89 
90 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
91 	nserids = ndimms;
92 
93 	bzero(&mn, sizeof (mn));
94 
95 	for (i = 0; i < ndimms; i++) {
96 		mn.m_namelen = strlen(dimms[i]) + 1;
97 		mn.m_sidlen = MEM_SERID_MAXLEN;
98 
99 		mn.m_name = fmd_fmri_alloc(mn.m_namelen);
100 		mn.m_sid = fmd_fmri_alloc(mn.m_sidlen);
101 
102 		(void) strcpy(mn.m_name, dimms[i]);
103 
104 		do {
105 			rc = ioctl(fd, MEM_SID, &mn);
106 
107 			if (rc >= 0 || errno != EAGAIN)
108 				break;
109 
110 			if (retries == 0) {
111 				errno = ETIMEDOUT;
112 				break;
113 			}
114 
115 			/*
116 			 * EAGAIN indicates the kernel is
117 			 * not ready to provide DIMM serial
118 			 * ids.  Sleep MEM_SID_RETRY_WAIT seconds
119 			 * and try again.
120 			 * nanosleep() is used instead of sleep()
121 			 * to avoid interfering with fmd timers.
122 			 */
123 			rqt.tv_sec = MEM_SID_RETRY_WAIT;
124 			rqt.tv_nsec = 0;
125 			(void) nanosleep(&rqt, NULL);
126 
127 		} while (retries--);
128 
129 		if (rc < 0) {
130 			/*
131 			 * ENXIO can happen if the kernel memory driver
132 			 * doesn't have the MEM_SID ioctl (e.g. if the
133 			 * kernel hasn't been patched to provide the
134 			 * support).
135 			 *
136 			 * If the MEM_SID ioctl is available but the
137 			 * particular platform doesn't support providing
138 			 * serial ids, ENOTSUP will be returned by the ioctl.
139 			 */
140 			if (errno == ENXIO)
141 				errno = ENOTSUP;
142 			fmd_fmri_free(mn.m_name, mn.m_namelen);
143 			fmd_fmri_free(mn.m_sid, mn.m_sidlen);
144 			mem_strarray_free(serids, nserids);
145 			mem_strarray_free(dimms, ndimms);
146 			(void) close(fd);
147 			return (-1);
148 		}
149 
150 		serids[i] = fmd_fmri_strdup(mn.m_sid);
151 
152 		fmd_fmri_free(mn.m_name, mn.m_namelen);
153 		fmd_fmri_free(mn.m_sid, mn.m_sidlen);
154 	}
155 
156 	mem_strarray_free(dimms, ndimms);
157 
158 	(void) close(fd);
159 
160 	*seridsp = serids;
161 	*nseridsp = nserids;
162 
163 	return (0);
164 }
165 
166 /*
167  * Returns 0 with serial numbers if found, -1 (with errno set) for errors.  If
168  * the unum (or a component of same) wasn't found, -1 is returned with errno
169  * set to ENOENT.
170  */
171 static int
172 mem_get_serids_from_cache(const char *unum, char ***seridsp, size_t *nseridsp)
173 {
174 	uint64_t drgen = fmd_fmri_get_drgen();
175 	char **dimms, **serids;
176 	size_t ndimms, nserids;
177 	mem_dimm_map_t *dm;
178 	int i, rc = 0;
179 
180 	if (mem_unum_burst(unum, &dimms, &ndimms) < 0)
181 		return (-1); /* errno is set for us */
182 
183 	serids = fmd_fmri_zalloc(sizeof (char *) * ndimms);
184 	nserids = ndimms;
185 
186 	for (i = 0; i < ndimms; i++) {
187 		if ((dm = dm_lookup(dimms[i])) == NULL) {
188 			rc = fmd_fmri_set_errno(EINVAL);
189 			break;
190 		}
191 
192 		if (*dm->dm_serid == '\0' || dm->dm_drgen != drgen) {
193 			/*
194 			 * We don't have a cached copy, or the copy we've got is
195 			 * out of date.  Look it up again.
196 			 */
197 			if (mem_get_serid(dm->dm_device, dm->dm_serid,
198 			    sizeof (dm->dm_serid)) < 0) {
199 				rc = -1; /* errno is set for us */
200 				break;
201 			}
202 
203 			dm->dm_drgen = drgen;
204 		}
205 
206 		serids[i] = fmd_fmri_strdup(dm->dm_serid);
207 	}
208 
209 	mem_strarray_free(dimms, ndimms);
210 
211 	if (rc == 0) {
212 		*seridsp = serids;
213 		*nseridsp = nserids;
214 	} else {
215 		mem_strarray_free(serids, nserids);
216 	}
217 
218 	return (rc);
219 }
220 
221 #endif	/* sparc */
222 
223 /*ARGSUSED*/
224 static int
225 mem_get_serids_by_unum(const char *unum, char ***seridsp, size_t *nseridsp)
226 {
227 	/*
228 	 * Some platforms do not support the caching of serial ids by the
229 	 * mem scheme plugin but instead support making serial ids available
230 	 * via the kernel.
231 	 */
232 #ifdef	sparc
233 	if (mem.mem_dm == NULL)
234 		return (mem_get_serids_from_kernel(unum, seridsp, nseridsp));
235 	else
236 		return (mem_get_serids_from_cache(unum, seridsp, nseridsp));
237 #else
238 	errno = ENOTSUP;
239 	return (-1);
240 #endif	/* sparc */
241 }
242 
243 static int
244 mem_fmri_get_unum(nvlist_t *nvl, char **unump)
245 {
246 	uint8_t version;
247 	char *unum;
248 
249 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
250 	    version > FM_MEM_SCHEME_VERSION ||
251 	    nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0)
252 		return (fmd_fmri_set_errno(EINVAL));
253 
254 	*unump = unum;
255 
256 	return (0);
257 }
258 
259 ssize_t
260 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
261 {
262 	char format[64];
263 	ssize_t size, presz;
264 	char *rawunum, *preunum, *escunum, *prefix;
265 	uint64_t val;
266 	int i;
267 
268 	if (mem_fmri_get_unum(nvl, &rawunum) < 0)
269 		return (-1); /* errno is set for us */
270 
271 	/*
272 	 * If we have a well-formed unum (hc-FMRI), use the string verbatim
273 	 * to form the initial mem:/// components.  Otherwise use unum=%s.
274 	 */
275 	if (strncmp(rawunum, "hc:///", 6) != 0)
276 		prefix = FM_FMRI_MEM_UNUM "=";
277 	else
278 		prefix = "";
279 
280 	/*
281 	 * If we have a DIMM offset, include it in the string.  If we have a PA
282 	 * then use that.  Otherwise just format the unum element.
283 	 */
284 	if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0) {
285 		(void) snprintf(format, sizeof (format),
286 		    "%s:///%s%%1$s/%s=%%2$llx",
287 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_OFFSET);
288 	} else if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0) {
289 		(void) snprintf(format, sizeof (format),
290 		    "%s:///%s%%1$s/%s=%%2$llx",
291 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_PHYSADDR);
292 	} else {
293 		(void) snprintf(format, sizeof (format),
294 		    "%s:///%s%%1$s", FM_FMRI_SCHEME_MEM, prefix);
295 	}
296 
297 	/*
298 	 * If we have a well-formed unum (hc-FMRI), leave it as is.
299 	 * Otherwise, the spaces and colons will be escaped,
300 	 * rendering the resulting FMRI pretty much unreadable.
301 	 * We're therefore going to do some escaping of our own first.
302 	 */
303 	if (strncmp(rawunum, "hc:///", 6) == 0) {
304 		/* LINTED: variable format specifier */
305 		size = snprintf(buf, buflen, format, rawunum + 6, val);
306 	} else {
307 		preunum = fmd_fmri_strdup(rawunum);
308 		presz = strlen(preunum) + 1;
309 
310 		for (i = 0; i < presz - 1; i++) {
311 			if (preunum[i] == ':' && preunum[i + 1] == ' ') {
312 				bcopy(preunum + i + 2, preunum + i + 1,
313 				    presz - (i + 2));
314 			} else if (preunum[i] == ' ') {
315 				preunum[i] = ',';
316 			}
317 		}
318 
319 		escunum = fmd_fmri_strescape(preunum);
320 		fmd_fmri_free(preunum, presz);
321 
322 		/* LINTED: variable format specifier */
323 		size = snprintf(buf, buflen, format, escunum, val);
324 		fmd_fmri_strfree(escunum);
325 	}
326 
327 	return (size);
328 }
329 
330 int
331 fmd_fmri_expand(nvlist_t *nvl)
332 {
333 	char *unum, **serids;
334 	uint_t nnvlserids;
335 	size_t nserids;
336 	int rc;
337 
338 	if (mem_fmri_get_unum(nvl, &unum) < 0)
339 		return (fmd_fmri_set_errno(EINVAL));
340 
341 	if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID,
342 	    &serids, &nnvlserids)) == 0)
343 		return (0); /* fmri is already expanded */
344 	else if (rc != ENOENT)
345 		return (fmd_fmri_set_errno(EINVAL));
346 
347 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
348 		/* errno is set for us */
349 		if (errno == ENOTSUP)
350 			return (0); /* nothing to add - no s/n support */
351 		else
352 			return (-1);
353 	}
354 
355 	rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids,
356 	    nserids);
357 
358 	mem_strarray_free(serids, nserids);
359 
360 	if (rc != 0)
361 		return (fmd_fmri_set_errno(EINVAL));
362 
363 	return (0);
364 }
365 
366 static int
367 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2)
368 {
369 	int i;
370 
371 	if (nserids1 != nserids2)
372 		return (0);
373 
374 	for (i = 0; i < nserids1; i++) {
375 		if (strcmp(serids1[i], serids2[i]) != 0)
376 			return (0);
377 	}
378 
379 	return (1);
380 }
381 
382 int
383 fmd_fmri_present(nvlist_t *nvl)
384 {
385 	char *unum, **nvlserids, **serids;
386 	uint_t nnvlserids;
387 	size_t nserids;
388 	uint64_t memconfig;
389 	int rc;
390 
391 	if (mem_fmri_get_unum(nvl, &unum) < 0)
392 		return (-1); /* errno is set for us */
393 
394 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
395 	    &nnvlserids) != 0) {
396 		/*
397 		 * Some mem scheme FMRIs don't have serial ids because
398 		 * either the platform does not support them, or because
399 		 * the FMRI was created before support for serial ids was
400 		 * introduced.  If this is the case, assume it is there.
401 		 */
402 		if (mem.mem_dm == NULL)
403 			return (1);
404 		else
405 			return (fmd_fmri_set_errno(EINVAL));
406 	}
407 
408 	/*
409 	 * Hypervisor will change the memconfig value when the mapping of
410 	 * pages to DIMMs changes, e.g. for change in DIMM size or interleave.
411 	 * If we detect such a change, we discard ereports associated with a
412 	 * previous memconfig value as invalid.
413 	 */
414 
415 	if ((nvlist_lookup_uint64(nvl, FM_FMRI_MEM_MEMCONFIG,
416 	    &memconfig) == 0) && memconfig != mem.mem_memconfig)
417 		return (0);
418 
419 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
420 		if (errno == ENOTSUP)
421 			return (1); /* assume it's there, no s/n support here */
422 		if (errno != ENOENT) {
423 			/*
424 			 * Errors are only signalled to the caller if they're
425 			 * the caller's fault.  This isn't - it's a failure on
426 			 * our part to burst or read the serial numbers.  We'll
427 			 * whine about it, and tell the caller the named
428 			 * module(s) isn't/aren't there.
429 			 */
430 			fmd_fmri_warn("failed to retrieve serial number for "
431 			    "unum %s", unum);
432 		}
433 		return (0);
434 	}
435 
436 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids);
437 
438 	mem_strarray_free(serids, nserids);
439 
440 	return (rc);
441 }
442 
443 int
444 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee)
445 {
446 	char *erunum, *eeunum;
447 	uint64_t erval = 0, eeval = 0;
448 
449 	if (mem_fmri_get_unum(er, &erunum) < 0 ||
450 	    mem_fmri_get_unum(ee, &eeunum) < 0)
451 		return (-1); /* errno is set for us */
452 
453 	if (mem_unum_contains(erunum, eeunum) <= 0)
454 		return (0); /* can't parse/match, so assume no containment */
455 
456 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_OFFSET, &erval) == 0) {
457 		return (nvlist_lookup_uint64(ee,
458 		    FM_FMRI_MEM_OFFSET, &eeval) == 0 && erval == eeval);
459 	}
460 
461 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erval) == 0) {
462 		return (nvlist_lookup_uint64(ee,
463 		    FM_FMRI_MEM_PHYSADDR, &eeval) == 0 && erval == eeval);
464 	}
465 
466 	return (1);
467 }
468 
469 /*
470  * We can only make a usable/unusable determination for pages.  Mem FMRIs
471  * without page addresses will be reported as usable since Solaris has no
472  * way at present to dynamically disable an entire DIMM or DIMM pair.
473  */
474 int
475 fmd_fmri_unusable(nvlist_t *nvl)
476 {
477 	uint64_t val;
478 	uint8_t version;
479 	int rc, err1, err2;
480 	nvlist_t *nvlcp = NULL;
481 	int retval;
482 
483 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
484 	    version > FM_MEM_SCHEME_VERSION)
485 		return (fmd_fmri_set_errno(EINVAL));
486 
487 	err1 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val);
488 	err2 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val);
489 
490 	if (err1 == ENOENT && err2 == ENOENT)
491 		return (0); /* no page, so assume it's still usable */
492 
493 	if ((err1 != 0 && err1 != ENOENT) || (err2 != 0 && err2 != ENOENT))
494 		return (fmd_fmri_set_errno(EINVAL));
495 
496 	if ((err1 = mem_unum_rewrite(nvl, &nvlcp)) != 0)
497 		return (fmd_fmri_set_errno(err1));
498 
499 	/*
500 	 * Ask the kernel if the page is retired, using either the rewritten
501 	 * hc FMRI or the original mem FMRI with the specified offset or PA.
502 	 * Refer to the kernel's page_retire_check() for the error codes.
503 	 */
504 	rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp ? nvlcp : nvl);
505 
506 	if (rc == -1 && errno == EIO) {
507 		/*
508 		 * The page is not retired and is not scheduled for retirement
509 		 * (i.e. no request pending and has not seen any errors)
510 		 */
511 		retval = 0;
512 	} else if (rc == 0 || errno == EAGAIN || errno == EINVAL) {
513 		/*
514 		 * The page has been retired, is in the process of being
515 		 * retired, or doesn't exist.  The latter is valid if the page
516 		 * existed in the past but has been DR'd out.
517 		 */
518 		retval = 1;
519 	} else {
520 		/*
521 		 * Errors are only signalled to the caller if they're the
522 		 * caller's fault.  This isn't - it's a failure of the
523 		 * retirement-check code.  We'll whine about it and tell
524 		 * the caller the page is unusable.
525 		 */
526 		fmd_fmri_warn("failed to determine page %s=%llx usability: "
527 		    "rc=%d errno=%d\n", err1 == 0 ? FM_FMRI_MEM_OFFSET :
528 		    FM_FMRI_MEM_PHYSADDR, (u_longlong_t)val, rc, errno);
529 		retval = 1;
530 	}
531 
532 	if (nvlcp)
533 		nvlist_free(nvlcp);
534 
535 	return (retval);
536 }
537 
538 int
539 fmd_fmri_init(void)
540 {
541 	return (mem_discover());
542 }
543 
544 void
545 fmd_fmri_fini(void)
546 {
547 	mem_dimm_map_t *dm, *em;
548 
549 	for (dm = mem.mem_dm; dm != NULL; dm = em) {
550 		em = dm->dm_next;
551 		fmd_fmri_strfree(dm->dm_label);
552 		fmd_fmri_strfree(dm->dm_device);
553 		fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
554 	}
555 }
556