xref: /illumos-gate/usr/src/cmd/fm/schemes/mem/mem.c (revision fe3e2633be44d2f5361a7bba26abeb80fcc04fbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <mem.h>
30 #include <fm/fmd_fmri.h>
31 #include <fm/libtopo.h>
32 
33 #include <string.h>
34 #include <strings.h>
35 #include <sys/mem.h>
36 
37 mem_t mem;
38 
39 static int
40 mem_fmri_get_unum(nvlist_t *nvl, char **unump)
41 {
42 	uint8_t version;
43 	char *unum;
44 
45 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
46 	    version > FM_MEM_SCHEME_VERSION ||
47 	    nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0)
48 		return (fmd_fmri_set_errno(EINVAL));
49 
50 	*unump = unum;
51 
52 	return (0);
53 }
54 
55 ssize_t
56 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
57 {
58 	char format[64];
59 	ssize_t size, presz;
60 	char *rawunum, *preunum, *escunum, *prefix;
61 	uint64_t val;
62 	int i;
63 
64 	if (mem_fmri_get_unum(nvl, &rawunum) < 0)
65 		return (-1); /* errno is set for us */
66 
67 	/*
68 	 * If we have a well-formed unum (hc-FMRI), use the string verbatim
69 	 * to form the initial mem:/// components.  Otherwise use unum=%s.
70 	 */
71 	if (strncmp(rawunum, "hc://", 5) != 0)
72 		prefix = FM_FMRI_MEM_UNUM "=";
73 	else
74 		prefix = "";
75 
76 	/*
77 	 * If we have a DIMM offset, include it in the string.  If we have a PA
78 	 * then use that.  Otherwise just format the unum element.
79 	 */
80 	if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0) {
81 		(void) snprintf(format, sizeof (format),
82 		    "%s:///%s%%1$s/%s=%%2$llx",
83 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_OFFSET);
84 	} else if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0) {
85 		(void) snprintf(format, sizeof (format),
86 		    "%s:///%s%%1$s/%s=%%2$llx",
87 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_PHYSADDR);
88 	} else {
89 		(void) snprintf(format, sizeof (format),
90 		    "%s:///%s%%1$s", FM_FMRI_SCHEME_MEM, prefix);
91 	}
92 
93 	/*
94 	 * If we have a well-formed unum (hc-FMRI), we skip over the
95 	 * the scheme and authority prefix.
96 	 * Otherwise, the spaces and colons will be escaped,
97 	 * rendering the resulting FMRI pretty much unreadable.
98 	 * We're therefore going to do some escaping of our own first.
99 	 */
100 	if (strncmp(rawunum, "hc://", 5) == 0) {
101 		rawunum += 5;
102 		rawunum = strchr(rawunum, '/');
103 		++rawunum;
104 		/* LINTED: variable format specifier */
105 		size = snprintf(buf, buflen, format, rawunum, val);
106 	} else {
107 		preunum = fmd_fmri_strdup(rawunum);
108 		presz = strlen(preunum) + 1;
109 
110 		for (i = 0; i < presz - 1; i++) {
111 			if (preunum[i] == ':' && preunum[i + 1] == ' ') {
112 				bcopy(preunum + i + 2, preunum + i + 1,
113 				    presz - (i + 2));
114 			} else if (preunum[i] == ' ') {
115 				preunum[i] = ',';
116 			}
117 		}
118 
119 		escunum = fmd_fmri_strescape(preunum);
120 		fmd_fmri_free(preunum, presz);
121 
122 		/* LINTED: variable format specifier */
123 		size = snprintf(buf, buflen, format, escunum, val);
124 		fmd_fmri_strfree(escunum);
125 	}
126 
127 	return (size);
128 }
129 
130 int
131 fmd_fmri_expand(nvlist_t *nvl)
132 {
133 	char *unum, **serids;
134 	uint_t nnvlserids;
135 	size_t nserids;
136 	int rc, err = 0;
137 	topo_hdl_t *thp;
138 
139 	if ((mem_fmri_get_unum(nvl, &unum) < 0) || (*unum == '\0'))
140 		return (fmd_fmri_set_errno(EINVAL));
141 
142 	/*
143 	 * If the mem-scheme topology exports this method expand(), invoke it.
144 	 */
145 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
146 		return (fmd_fmri_set_errno(EINVAL));
147 	rc = topo_fmri_expand(thp, nvl, &err);
148 	fmd_fmri_topo_rele(thp);
149 	if (err != ETOPO_METHOD_NOTSUP)
150 		return (rc);
151 
152 	if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID,
153 	    &serids, &nnvlserids)) == 0) { /* already have serial #s */
154 		return (0);
155 	} else if (rc != ENOENT)
156 		return (fmd_fmri_set_errno(EINVAL));
157 
158 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
159 		/* errno is set for us */
160 		if (errno == ENOTSUP)
161 			return (0); /* nothing to add - no s/n support */
162 		else
163 			return (-1);
164 	}
165 
166 	rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids,
167 	    nserids);
168 
169 	mem_strarray_free(serids, nserids);
170 
171 	if (rc != 0)
172 		return (fmd_fmri_set_errno(EINVAL));
173 	else
174 		return (0);
175 }
176 
177 #ifdef sparc
178 static int
179 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2)
180 {
181 	int i;
182 
183 	if (nserids1 != nserids2)
184 		return (0);
185 
186 	for (i = 0; i < nserids1; i++) {
187 		if (strcmp(serids1[i], serids2[i]) != 0)
188 			return (0);
189 	}
190 
191 	return (1);
192 }
193 #endif /* sparc */
194 
195 int
196 fmd_fmri_present(nvlist_t *nvl)
197 {
198 	char *unum = NULL;
199 	int rc, err = 0;
200 	struct topo_hdl *thp;
201 #ifdef sparc
202 	char **nvlserids, **serids;
203 	uint_t nnvlserids;
204 	size_t nserids;
205 #else
206 	nvlist_t *unum_nvl;
207 	nvlist_t *nvlcp = NULL;
208 	uint64_t val;
209 #endif /* sparc */
210 
211 	if (mem_fmri_get_unum(nvl, &unum) < 0)
212 		return (-1); /* errno is set for us */
213 
214 #ifdef sparc
215 	/*
216 	 * If the mem-scheme topology exports this method present(), invoke it.
217 	 */
218 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
219 		return (fmd_fmri_set_errno(EINVAL));
220 	rc = topo_fmri_present(thp, nvl, &err);
221 	fmd_fmri_topo_rele(thp);
222 	if (err != ETOPO_METHOD_NOTSUP)
223 		return (rc);
224 
225 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
226 	    &nnvlserids) != 0) {
227 		/*
228 		 * Some mem scheme FMRIs don't have serial ids because
229 		 * either the platform does not support them, or because
230 		 * the FMRI was created before support for serial ids was
231 		 * introduced.  If this is the case, assume it is there.
232 		 */
233 		if (mem.mem_dm == NULL)
234 			return (1);
235 		else
236 			return (fmd_fmri_set_errno(EINVAL));
237 	}
238 
239 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
240 		if (errno == ENOTSUP)
241 			return (1); /* assume it's there, no s/n support here */
242 		if (errno != ENOENT) {
243 			/*
244 			 * Errors are only signalled to the caller if they're
245 			 * the caller's fault.  This isn't - it's a failure on
246 			 * our part to burst or read the serial numbers.  We'll
247 			 * whine about it, and tell the caller the named
248 			 * module(s) isn't/aren't there.
249 			 */
250 			fmd_fmri_warn("failed to retrieve serial number for "
251 			    "unum %s", unum);
252 		}
253 		return (0);
254 	}
255 
256 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids);
257 
258 	mem_strarray_free(serids, nserids);
259 #else
260 	/*
261 	 * On X86 we will invoke the topo is_present method passing in the
262 	 * unum, which is in hc scheme.  The libtopo hc-scheme is_present method
263 	 * will invoke the node-specific is_present method, which is implemented
264 	 * by the chip enumerator for rank nodes.  The rank node's is_present
265 	 * method will compare the serial number in the unum with the current
266 	 * serial to determine if the same DIMM is present.
267 	 */
268 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) {
269 		fmd_fmri_warn("failed to get handle to topology");
270 		return (-1);
271 	}
272 	if (topo_fmri_str2nvl(thp, unum, &unum_nvl, &err) == 0) {
273 		rc = topo_fmri_present(thp, unum_nvl, &err);
274 		nvlist_free(unum_nvl);
275 	} else
276 		rc = fmd_fmri_set_errno(EINVAL);
277 	fmd_fmri_topo_rele(thp);
278 
279 	/*
280 	 * Need to check if this is a valid page too. if "isretired" returns
281 	 * EINVAL, assume page invalid and return not_present.
282 	 */
283 	if (rc == 1 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) ==
284 	    0 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 &&
285 	    mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) {
286 		int rval = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp);
287 		if (rval == -1 && errno == EINVAL)
288 			rc = 0;
289 		nvlist_free(nvlcp);
290 	}
291 #endif	/* sparc */
292 	return (rc);
293 }
294 
295 int
296 fmd_fmri_replaced(nvlist_t *nvl)
297 {
298 	char *unum = NULL;
299 	int rc, err = 0;
300 	struct topo_hdl *thp;
301 #ifdef sparc
302 	char **nvlserids, **serids;
303 	uint_t nnvlserids;
304 	size_t nserids;
305 #else
306 	nvlist_t *unum_nvl;
307 	nvlist_t *nvlcp = NULL;
308 	uint64_t val;
309 #endif /* sparc */
310 
311 	if (mem_fmri_get_unum(nvl, &unum) < 0)
312 		return (-1); /* errno is set for us */
313 
314 #ifdef sparc
315 	/*
316 	 * If the mem-scheme topology exports this method replaced(), invoke it.
317 	 */
318 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
319 		return (fmd_fmri_set_errno(EINVAL));
320 	rc = topo_fmri_replaced(thp, nvl, &err);
321 	fmd_fmri_topo_rele(thp);
322 	if (err != ETOPO_METHOD_NOTSUP)
323 		return (rc);
324 
325 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
326 	    &nnvlserids) != 0) {
327 		/*
328 		 * Some mem scheme FMRIs don't have serial ids because
329 		 * either the platform does not support them, or because
330 		 * the FMRI was created before support for serial ids was
331 		 * introduced.  If this is the case, assume it is there.
332 		 */
333 		if (mem.mem_dm == NULL)
334 			return (FMD_OBJ_STATE_UNKNOWN);
335 		else
336 			return (fmd_fmri_set_errno(EINVAL));
337 	}
338 
339 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
340 		if (errno == ENOTSUP)
341 			return (FMD_OBJ_STATE_UNKNOWN);
342 		if (errno != ENOENT) {
343 			/*
344 			 * Errors are only signalled to the caller if they're
345 			 * the caller's fault.  This isn't - it's a failure on
346 			 * our part to burst or read the serial numbers.  We'll
347 			 * whine about it, and tell the caller the named
348 			 * module(s) isn't/aren't there.
349 			 */
350 			fmd_fmri_warn("failed to retrieve serial number for "
351 			    "unum %s", unum);
352 		}
353 		return (FMD_OBJ_STATE_NOT_PRESENT);
354 	}
355 
356 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids) ?
357 	    FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_REPLACED;
358 
359 	mem_strarray_free(serids, nserids);
360 #else
361 	/*
362 	 * On X86 we will invoke the topo is_replaced method passing in the
363 	 * unum, which is in hc scheme.  The libtopo hc-scheme is_replaced
364 	 * method will invoke the node-specific is_replaced method, which is
365 	 * implemented by the chip enumerator for rank nodes.  The rank node's
366 	 * is_replaced method will compare the serial number in the unum with
367 	 * the current serial to determine if the same DIMM is replaced.
368 	 */
369 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) {
370 		fmd_fmri_warn("failed to get handle to topology");
371 		return (-1);
372 	}
373 	if (topo_fmri_str2nvl(thp, unum, &unum_nvl, &err) == 0) {
374 		rc = topo_fmri_replaced(thp, unum_nvl, &err);
375 		nvlist_free(unum_nvl);
376 	} else
377 		rc = fmd_fmri_set_errno(EINVAL);
378 	fmd_fmri_topo_rele(thp);
379 
380 	/*
381 	 * Need to check if this is a valid page too. if "isretired" returns
382 	 * EINVAL, assume page invalid and return not_present.
383 	 */
384 	if ((rc == FMD_OBJ_STATE_STILL_PRESENT ||
385 	    rc == FMD_OBJ_STATE_UNKNOWN) &&
386 	    nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0 &&
387 	    nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 &&
388 	    mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) {
389 		int rval = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp);
390 		if (rval == -1 && errno == EINVAL)
391 			rc = FMD_OBJ_STATE_NOT_PRESENT;
392 		nvlist_free(nvlcp);
393 	}
394 #endif	/* sparc */
395 	return (rc);
396 }
397 
398 int
399 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee)
400 {
401 	int rc, err = 0;
402 	struct topo_hdl *thp;
403 	char *erunum, *eeunum;
404 	uint64_t erval = 0, eeval = 0;
405 
406 	/*
407 	 * If the mem-scheme topology exports this method contains(), invoke it.
408 	 */
409 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
410 		return (fmd_fmri_set_errno(EINVAL));
411 	rc = topo_fmri_contains(thp, er, ee, &err);
412 	fmd_fmri_topo_rele(thp);
413 	if (err != ETOPO_METHOD_NOTSUP)
414 		return (rc);
415 
416 	if (mem_fmri_get_unum(er, &erunum) < 0 ||
417 	    mem_fmri_get_unum(ee, &eeunum) < 0)
418 		return (-1); /* errno is set for us */
419 
420 	if (mem_unum_contains(erunum, eeunum) <= 0)
421 		return (0); /* can't parse/match, so assume no containment */
422 
423 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_OFFSET, &erval) == 0) {
424 		return (nvlist_lookup_uint64(ee,
425 		    FM_FMRI_MEM_OFFSET, &eeval) == 0 && erval == eeval);
426 	}
427 
428 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erval) == 0) {
429 		return (nvlist_lookup_uint64(ee,
430 		    FM_FMRI_MEM_PHYSADDR, &eeval) == 0 && erval == eeval);
431 	}
432 
433 	return (1);
434 }
435 
436 /*
437  * We can only make a usable/unusable determination for pages.  Mem FMRIs
438  * without page addresses will be reported as usable since Solaris has no
439  * way at present to dynamically disable an entire DIMM or DIMM pair.
440  */
441 int
442 fmd_fmri_unusable(nvlist_t *nvl)
443 {
444 	uint64_t val;
445 	uint8_t version;
446 	int rc, err1 = 0, err2;
447 	nvlist_t *nvlcp = NULL;
448 	int retval;
449 	topo_hdl_t *thp;
450 
451 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
452 	    version > FM_MEM_SCHEME_VERSION)
453 		return (fmd_fmri_set_errno(EINVAL));
454 
455 	/*
456 	 * If the mem-scheme topology exports this method unusable(), invoke it.
457 	 */
458 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
459 		return (fmd_fmri_set_errno(EINVAL));
460 	rc = topo_fmri_unusable(thp, nvl, &err1);
461 	fmd_fmri_topo_rele(thp);
462 	if (err1 != ETOPO_METHOD_NOTSUP)
463 		return (rc);
464 
465 	err1 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val);
466 	err2 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val);
467 
468 	if (err1 == ENOENT && err2 == ENOENT)
469 		return (0); /* no page, so assume it's still usable */
470 
471 	if ((err1 != 0 && err1 != ENOENT) || (err2 != 0 && err2 != ENOENT))
472 		return (fmd_fmri_set_errno(EINVAL));
473 
474 	if ((err1 = mem_unum_rewrite(nvl, &nvlcp)) != 0)
475 		return (fmd_fmri_set_errno(err1));
476 
477 	/*
478 	 * Ask the kernel if the page is retired, using either the rewritten
479 	 * hc FMRI or the original mem FMRI with the specified offset or PA.
480 	 * Refer to the kernel's page_retire_check() for the error codes.
481 	 */
482 	rc = mem_page_cmd(MEM_PAGE_FMRI_ISRETIRED, nvlcp ? nvlcp : nvl);
483 
484 	if (rc == -1 && errno == EIO) {
485 		/*
486 		 * The page is not retired and is not scheduled for retirement
487 		 * (i.e. no request pending and has not seen any errors)
488 		 */
489 		retval = 0;
490 	} else if (rc == 0 || errno == EAGAIN || errno == EINVAL) {
491 		/*
492 		 * The page has been retired, is in the process of being
493 		 * retired, or doesn't exist.  The latter is valid if the page
494 		 * existed in the past but has been DR'd out.
495 		 */
496 		retval = 1;
497 	} else {
498 		/*
499 		 * Errors are only signalled to the caller if they're the
500 		 * caller's fault.  This isn't - it's a failure of the
501 		 * retirement-check code.  We'll whine about it and tell
502 		 * the caller the page is unusable.
503 		 */
504 		fmd_fmri_warn("failed to determine page %s=%llx usability: "
505 		    "rc=%d errno=%d\n", err1 == 0 ? FM_FMRI_MEM_OFFSET :
506 		    FM_FMRI_MEM_PHYSADDR, (u_longlong_t)val, rc, errno);
507 		retval = 1;
508 	}
509 
510 	if (nvlcp)
511 		nvlist_free(nvlcp);
512 
513 	return (retval);
514 }
515 
516 int
517 fmd_fmri_init(void)
518 {
519 	return (mem_discover());
520 }
521 
522 void
523 fmd_fmri_fini(void)
524 {
525 	mem_dimm_map_t *dm, *em;
526 	mem_bank_map_t *bm, *cm;
527 	mem_grp_t *gm, *hm;
528 	mem_seg_map_t *sm, *tm;
529 
530 	for (dm = mem.mem_dm; dm != NULL; dm = em) {
531 		em = dm->dm_next;
532 		fmd_fmri_strfree(dm->dm_label);
533 		fmd_fmri_strfree(dm->dm_part);
534 		fmd_fmri_strfree(dm->dm_device);
535 		fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
536 	}
537 	for (bm = mem.mem_bank; bm != NULL; bm = cm) {
538 		cm = bm->bm_next;
539 		fmd_fmri_free(bm, sizeof (mem_bank_map_t));
540 	}
541 	for (gm = mem.mem_group; gm != NULL; gm = hm) {
542 		hm = gm->mg_next;
543 		fmd_fmri_free(gm, sizeof (mem_grp_t));
544 	}
545 	for (sm = mem.mem_seg; sm != NULL; sm = tm) {
546 		tm = sm->sm_next;
547 		fmd_fmri_free(sm, sizeof (mem_seg_map_t));
548 	}
549 }
550