xref: /illumos-gate/usr/src/cmd/fm/schemes/mem/mem.c (revision 96d9f183facd90dbbc2268c9a51689be0b6a0b46)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <mem.h>
28 #include <fm/fmd_fmri.h>
29 #include <fm/libtopo.h>
30 #include <fm/fmd_agent.h>
31 
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/mem.h>
35 
36 mem_t mem;
37 
38 static int
39 mem_fmri_get_unum(nvlist_t *nvl, char **unump)
40 {
41 	uint8_t version;
42 	char *unum;
43 
44 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
45 	    version > FM_MEM_SCHEME_VERSION ||
46 	    nvlist_lookup_string(nvl, FM_FMRI_MEM_UNUM, &unum) != 0)
47 		return (fmd_fmri_set_errno(EINVAL));
48 
49 	*unump = unum;
50 
51 	return (0);
52 }
53 
54 static int
55 page_isretired(nvlist_t *fmri, int *errp)
56 {
57 	fmd_agent_hdl_t *hdl;
58 	int rc, err;
59 
60 	if ((hdl = fmd_agent_open(FMD_AGENT_VERSION)) == NULL)
61 		return (-1);
62 	rc = fmd_agent_page_isretired(hdl, fmri);
63 	err = fmd_agent_errno(hdl);
64 	fmd_agent_close(hdl);
65 
66 	if (errp != NULL)
67 		*errp = err;
68 	return (rc);
69 }
70 
71 ssize_t
72 fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
73 {
74 	char format[64];
75 	ssize_t size, presz;
76 	char *rawunum, *preunum, *escunum, *prefix;
77 	uint64_t val;
78 	int i;
79 
80 	if (mem_fmri_get_unum(nvl, &rawunum) < 0)
81 		return (-1); /* errno is set for us */
82 
83 	/*
84 	 * If we have a well-formed unum (hc-FMRI), use the string verbatim
85 	 * to form the initial mem:/// components.  Otherwise use unum=%s.
86 	 */
87 	if (strncmp(rawunum, "hc://", 5) != 0)
88 		prefix = FM_FMRI_MEM_UNUM "=";
89 	else
90 		prefix = "";
91 
92 	/*
93 	 * If we have a DIMM offset, include it in the string.  If we have a PA
94 	 * then use that.  Otherwise just format the unum element.
95 	 */
96 	if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0) {
97 		(void) snprintf(format, sizeof (format),
98 		    "%s:///%s%%1$s/%s=%%2$llx",
99 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_OFFSET);
100 	} else if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0) {
101 		(void) snprintf(format, sizeof (format),
102 		    "%s:///%s%%1$s/%s=%%2$llx",
103 		    FM_FMRI_SCHEME_MEM, prefix, FM_FMRI_MEM_PHYSADDR);
104 	} else {
105 		(void) snprintf(format, sizeof (format),
106 		    "%s:///%s%%1$s", FM_FMRI_SCHEME_MEM, prefix);
107 	}
108 
109 	/*
110 	 * If we have a well-formed unum (hc-FMRI), we skip over the
111 	 * the scheme and authority prefix.
112 	 * Otherwise, the spaces and colons will be escaped,
113 	 * rendering the resulting FMRI pretty much unreadable.
114 	 * We're therefore going to do some escaping of our own first.
115 	 */
116 	if (strncmp(rawunum, "hc://", 5) == 0) {
117 		rawunum += 5;
118 		rawunum = strchr(rawunum, '/');
119 		++rawunum;
120 		/* LINTED: variable format specifier */
121 		size = snprintf(buf, buflen, format, rawunum, val);
122 	} else {
123 		preunum = fmd_fmri_strdup(rawunum);
124 		presz = strlen(preunum) + 1;
125 
126 		for (i = 0; i < presz - 1; i++) {
127 			if (preunum[i] == ':' && preunum[i + 1] == ' ') {
128 				bcopy(preunum + i + 2, preunum + i + 1,
129 				    presz - (i + 2));
130 			} else if (preunum[i] == ' ') {
131 				preunum[i] = ',';
132 			}
133 		}
134 
135 		escunum = fmd_fmri_strescape(preunum);
136 		fmd_fmri_free(preunum, presz);
137 
138 		/* LINTED: variable format specifier */
139 		size = snprintf(buf, buflen, format, escunum, val);
140 		fmd_fmri_strfree(escunum);
141 	}
142 
143 	return (size);
144 }
145 
146 int
147 fmd_fmri_expand(nvlist_t *nvl)
148 {
149 	char *unum, **serids;
150 	uint_t nnvlserids;
151 	size_t nserids;
152 	int rc, err = 0;
153 	topo_hdl_t *thp;
154 
155 	if ((mem_fmri_get_unum(nvl, &unum) < 0) || (*unum == '\0'))
156 		return (fmd_fmri_set_errno(EINVAL));
157 
158 	/*
159 	 * If the mem-scheme topology exports this method expand(), invoke it.
160 	 */
161 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
162 		return (fmd_fmri_set_errno(EINVAL));
163 	rc = topo_fmri_expand(thp, nvl, &err);
164 	fmd_fmri_topo_rele(thp);
165 	if (err != ETOPO_METHOD_NOTSUP)
166 		return (rc);
167 
168 	if ((rc = nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID,
169 	    &serids, &nnvlserids)) == 0) { /* already have serial #s */
170 		return (0);
171 	} else if (rc != ENOENT)
172 		return (fmd_fmri_set_errno(EINVAL));
173 
174 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
175 		/* errno is set for us */
176 		if (errno == ENOTSUP)
177 			return (0); /* nothing to add - no s/n support */
178 		else
179 			return (-1);
180 	}
181 
182 	rc = nvlist_add_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, serids,
183 	    nserids);
184 
185 	mem_strarray_free(serids, nserids);
186 
187 	if (rc != 0)
188 		return (fmd_fmri_set_errno(EINVAL));
189 	else
190 		return (0);
191 }
192 
193 #ifdef sparc
194 static int
195 serids_eq(char **serids1, uint_t nserids1, char **serids2, uint_t nserids2)
196 {
197 	int i;
198 
199 	if (nserids1 != nserids2)
200 		return (0);
201 
202 	for (i = 0; i < nserids1; i++) {
203 		if (strcmp(serids1[i], serids2[i]) != 0)
204 			return (0);
205 	}
206 
207 	return (1);
208 }
209 #endif /* sparc */
210 
211 int
212 fmd_fmri_present(nvlist_t *nvl)
213 {
214 	char *unum = NULL;
215 	int rc, err = 0;
216 	struct topo_hdl *thp;
217 #ifdef sparc
218 	char **nvlserids, **serids;
219 	uint_t nnvlserids;
220 	size_t nserids;
221 #else
222 	nvlist_t *unum_nvl;
223 	nvlist_t *nvlcp = NULL;
224 	uint64_t val;
225 #endif /* sparc */
226 
227 	if (mem_fmri_get_unum(nvl, &unum) < 0)
228 		return (-1); /* errno is set for us */
229 
230 #ifdef sparc
231 	/*
232 	 * If the mem-scheme topology exports this method present(), invoke it.
233 	 */
234 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
235 		return (fmd_fmri_set_errno(EINVAL));
236 	rc = topo_fmri_present(thp, nvl, &err);
237 	fmd_fmri_topo_rele(thp);
238 	if (err != ETOPO_METHOD_NOTSUP)
239 		return (rc);
240 
241 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
242 	    &nnvlserids) != 0) {
243 		/*
244 		 * Some mem scheme FMRIs don't have serial ids because
245 		 * either the platform does not support them, or because
246 		 * the FMRI was created before support for serial ids was
247 		 * introduced.  If this is the case, assume it is there.
248 		 */
249 		if (mem.mem_dm == NULL)
250 			return (1);
251 		else
252 			return (fmd_fmri_set_errno(EINVAL));
253 	}
254 
255 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
256 		if (errno == ENOTSUP)
257 			return (1); /* assume it's there, no s/n support here */
258 		if (errno != ENOENT) {
259 			/*
260 			 * Errors are only signalled to the caller if they're
261 			 * the caller's fault.  This isn't - it's a failure on
262 			 * our part to burst or read the serial numbers.  We'll
263 			 * whine about it, and tell the caller the named
264 			 * module(s) isn't/aren't there.
265 			 */
266 			fmd_fmri_warn("failed to retrieve serial number for "
267 			    "unum %s", unum);
268 		}
269 		return (0);
270 	}
271 
272 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids);
273 
274 	mem_strarray_free(serids, nserids);
275 #else
276 	/*
277 	 * On X86 we will invoke the topo is_present method passing in the
278 	 * unum, which is in hc scheme.  The libtopo hc-scheme is_present method
279 	 * will invoke the node-specific is_present method, which is implemented
280 	 * by the chip enumerator for rank nodes.  The rank node's is_present
281 	 * method will compare the serial number in the unum with the current
282 	 * serial to determine if the same DIMM is present.
283 	 */
284 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) {
285 		fmd_fmri_warn("failed to get handle to topology");
286 		return (-1);
287 	}
288 	if (topo_fmri_str2nvl(thp, unum, &unum_nvl, &err) == 0) {
289 		rc = topo_fmri_present(thp, unum_nvl, &err);
290 		nvlist_free(unum_nvl);
291 	} else
292 		rc = fmd_fmri_set_errno(EINVAL);
293 	fmd_fmri_topo_rele(thp);
294 
295 	/*
296 	 * Need to check if this is a valid page too. if "isretired" returns
297 	 * EINVAL, assume page invalid and return not_present.
298 	 */
299 	if (rc == 1 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) ==
300 	    0 && nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 &&
301 	    mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) {
302 		int page_err, rval = page_isretired(nvlcp, &page_err);
303 		if (rval == FMD_AGENT_RETIRE_DONE && page_err == EINVAL)
304 			rc = 0;
305 		nvlist_free(nvlcp);
306 	}
307 #endif	/* sparc */
308 	return (rc);
309 }
310 
311 int
312 fmd_fmri_replaced(nvlist_t *nvl)
313 {
314 	char *unum = NULL;
315 	int rc, err = 0;
316 	struct topo_hdl *thp;
317 #ifdef sparc
318 	char **nvlserids, **serids;
319 	uint_t nnvlserids;
320 	size_t nserids;
321 #else
322 	nvlist_t *unum_nvl;
323 	nvlist_t *nvlcp = NULL;
324 	uint64_t val;
325 #endif /* sparc */
326 
327 	if (mem_fmri_get_unum(nvl, &unum) < 0)
328 		return (-1); /* errno is set for us */
329 
330 #ifdef sparc
331 	/*
332 	 * If the mem-scheme topology exports this method replaced(), invoke it.
333 	 */
334 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
335 		return (fmd_fmri_set_errno(EINVAL));
336 	rc = topo_fmri_replaced(thp, nvl, &err);
337 	fmd_fmri_topo_rele(thp);
338 	if (err != ETOPO_METHOD_NOTSUP)
339 		return (rc);
340 
341 	if (nvlist_lookup_string_array(nvl, FM_FMRI_MEM_SERIAL_ID, &nvlserids,
342 	    &nnvlserids) != 0) {
343 		/*
344 		 * Some mem scheme FMRIs don't have serial ids because
345 		 * either the platform does not support them, or because
346 		 * the FMRI was created before support for serial ids was
347 		 * introduced.  If this is the case, assume it is there.
348 		 */
349 		if (mem.mem_dm == NULL)
350 			return (FMD_OBJ_STATE_UNKNOWN);
351 		else
352 			return (fmd_fmri_set_errno(EINVAL));
353 	}
354 
355 	if (mem_get_serids_by_unum(unum, &serids, &nserids) < 0) {
356 		if (errno == ENOTSUP)
357 			return (FMD_OBJ_STATE_UNKNOWN);
358 		if (errno != ENOENT) {
359 			/*
360 			 * Errors are only signalled to the caller if they're
361 			 * the caller's fault.  This isn't - it's a failure on
362 			 * our part to burst or read the serial numbers.  We'll
363 			 * whine about it, and tell the caller the named
364 			 * module(s) isn't/aren't there.
365 			 */
366 			fmd_fmri_warn("failed to retrieve serial number for "
367 			    "unum %s", unum);
368 		}
369 		return (FMD_OBJ_STATE_NOT_PRESENT);
370 	}
371 
372 	rc = serids_eq(serids, nserids, nvlserids, nnvlserids) ?
373 	    FMD_OBJ_STATE_STILL_PRESENT : FMD_OBJ_STATE_REPLACED;
374 
375 	mem_strarray_free(serids, nserids);
376 #else
377 	/*
378 	 * On X86 we will invoke the topo is_replaced method passing in the
379 	 * unum, which is in hc scheme.  The libtopo hc-scheme is_replaced
380 	 * method will invoke the node-specific is_replaced method, which is
381 	 * implemented by the chip enumerator for rank nodes.  The rank node's
382 	 * is_replaced method will compare the serial number in the unum with
383 	 * the current serial to determine if the same DIMM is replaced.
384 	 */
385 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL) {
386 		fmd_fmri_warn("failed to get handle to topology");
387 		return (-1);
388 	}
389 	if (topo_fmri_str2nvl(thp, unum, &unum_nvl, &err) == 0) {
390 		rc = topo_fmri_replaced(thp, unum_nvl, &err);
391 		nvlist_free(unum_nvl);
392 	} else
393 		rc = fmd_fmri_set_errno(EINVAL);
394 	fmd_fmri_topo_rele(thp);
395 
396 	/*
397 	 * Need to check if this is a valid page too. if "isretired" returns
398 	 * EINVAL, assume page invalid and return not_present.
399 	 */
400 	if ((rc == FMD_OBJ_STATE_STILL_PRESENT ||
401 	    rc == FMD_OBJ_STATE_UNKNOWN) &&
402 	    nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val) == 0 &&
403 	    nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val) == 0 &&
404 	    mem_unum_rewrite(nvl, &nvlcp) == 0 && nvlcp != NULL) {
405 		int page_err, rval = page_isretired(nvlcp, &page_err);
406 		if (rval == FMD_AGENT_RETIRE_DONE && page_err == EINVAL)
407 			rc = FMD_OBJ_STATE_NOT_PRESENT;
408 		nvlist_free(nvlcp);
409 	}
410 #endif	/* sparc */
411 	return (rc);
412 }
413 
414 int
415 fmd_fmri_contains(nvlist_t *er, nvlist_t *ee)
416 {
417 	int rc, err = 0;
418 	struct topo_hdl *thp;
419 	char *erunum, *eeunum;
420 	uint64_t erval = 0, eeval = 0;
421 
422 	/*
423 	 * If the mem-scheme topology exports this method contains(), invoke it.
424 	 */
425 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
426 		return (fmd_fmri_set_errno(EINVAL));
427 	rc = topo_fmri_contains(thp, er, ee, &err);
428 	fmd_fmri_topo_rele(thp);
429 	if (err != ETOPO_METHOD_NOTSUP)
430 		return (rc);
431 
432 	if (mem_fmri_get_unum(er, &erunum) < 0 ||
433 	    mem_fmri_get_unum(ee, &eeunum) < 0)
434 		return (-1); /* errno is set for us */
435 
436 	if (mem_unum_contains(erunum, eeunum) <= 0)
437 		return (0); /* can't parse/match, so assume no containment */
438 
439 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_OFFSET, &erval) == 0) {
440 		return (nvlist_lookup_uint64(ee,
441 		    FM_FMRI_MEM_OFFSET, &eeval) == 0 && erval == eeval);
442 	}
443 
444 	if (nvlist_lookup_uint64(er, FM_FMRI_MEM_PHYSADDR, &erval) == 0) {
445 		return (nvlist_lookup_uint64(ee,
446 		    FM_FMRI_MEM_PHYSADDR, &eeval) == 0 && erval == eeval);
447 	}
448 
449 	return (1);
450 }
451 
452 /*
453  * We can only make a usable/unusable determination for pages.  Mem FMRIs
454  * without page addresses will be reported as usable since Solaris has no
455  * way at present to dynamically disable an entire DIMM or DIMM pair.
456  */
457 int
458 fmd_fmri_unusable(nvlist_t *nvl)
459 {
460 	uint64_t val1, val2;
461 	uint8_t version;
462 	int rc, err1 = 0, err2;
463 	nvlist_t *nvlcp = NULL;
464 	int retval;
465 	topo_hdl_t *thp;
466 
467 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &version) != 0 ||
468 	    version > FM_MEM_SCHEME_VERSION)
469 		return (fmd_fmri_set_errno(EINVAL));
470 
471 	/*
472 	 * If the mem-scheme topology exports this method unusable(), invoke it.
473 	 */
474 	if ((thp = fmd_fmri_topo_hold(TOPO_VERSION)) == NULL)
475 		return (fmd_fmri_set_errno(EINVAL));
476 	rc = topo_fmri_unusable(thp, nvl, &err1);
477 	fmd_fmri_topo_rele(thp);
478 	if (err1 != ETOPO_METHOD_NOTSUP)
479 		return (rc);
480 
481 	err1 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_OFFSET, &val1);
482 	err2 = nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR, &val2);
483 
484 	if (err1 == ENOENT && err2 == ENOENT)
485 		return (0); /* no page, so assume it's still usable */
486 
487 	if ((err1 != 0 && err1 != ENOENT) || (err2 != 0 && err2 != ENOENT))
488 		return (fmd_fmri_set_errno(EINVAL));
489 
490 	if ((rc = mem_unum_rewrite(nvl, &nvlcp)) != 0)
491 		return (fmd_fmri_set_errno(rc));
492 
493 	/*
494 	 * Ask the kernel if the page is retired, using either the rewritten
495 	 * hc FMRI or the original mem FMRI with the specified offset or PA.
496 	 * Refer to the kernel's page_retire_check() for the error codes.
497 	 */
498 	rc = page_isretired(nvlcp ? nvlcp : nvl, NULL);
499 
500 	if (rc == FMD_AGENT_RETIRE_FAIL) {
501 		/*
502 		 * The page is not retired and is not scheduled for retirement
503 		 * (i.e. no request pending and has not seen any errors)
504 		 */
505 		retval = 0;
506 	} else if (rc == FMD_AGENT_RETIRE_DONE ||
507 	    rc == FMD_AGENT_RETIRE_ASYNC) {
508 		/*
509 		 * The page has been retired, is in the process of being
510 		 * retired, or doesn't exist.  The latter is valid if the page
511 		 * existed in the past but has been DR'd out.
512 		 */
513 		retval = 1;
514 	} else {
515 		/*
516 		 * Errors are only signalled to the caller if they're the
517 		 * caller's fault.  This isn't - it's a failure of the
518 		 * retirement-check code.  We'll whine about it and tell
519 		 * the caller the page is unusable.
520 		 */
521 		fmd_fmri_warn("failed to determine page %s=%llx usability: "
522 		    "rc=%d errno=%d\n", err1 == 0 ? FM_FMRI_MEM_OFFSET :
523 		    FM_FMRI_MEM_PHYSADDR, err1 == 0 ? (u_longlong_t)val1 :
524 		    (u_longlong_t)val2, rc, errno);
525 		retval = 1;
526 	}
527 
528 	if (nvlcp)
529 		nvlist_free(nvlcp);
530 
531 	return (retval);
532 }
533 
534 int
535 fmd_fmri_init(void)
536 {
537 	return (mem_discover());
538 }
539 
540 void
541 fmd_fmri_fini(void)
542 {
543 	mem_dimm_map_t *dm, *em;
544 	mem_bank_map_t *bm, *cm;
545 	mem_grp_t *gm, *hm;
546 	mem_seg_map_t *sm, *tm;
547 
548 	for (dm = mem.mem_dm; dm != NULL; dm = em) {
549 		em = dm->dm_next;
550 		fmd_fmri_strfree(dm->dm_label);
551 		fmd_fmri_strfree(dm->dm_part);
552 		fmd_fmri_strfree(dm->dm_device);
553 		fmd_fmri_free(dm, sizeof (mem_dimm_map_t));
554 	}
555 	for (bm = mem.mem_bank; bm != NULL; bm = cm) {
556 		cm = bm->bm_next;
557 		fmd_fmri_free(bm, sizeof (mem_bank_map_t));
558 	}
559 	for (gm = mem.mem_group; gm != NULL; gm = hm) {
560 		hm = gm->mg_next;
561 		fmd_fmri_free(gm, sizeof (mem_grp_t));
562 	}
563 	for (sm = mem.mem_seg; sm != NULL; sm = tm) {
564 		tm = sm->sm_next;
565 		fmd_fmri_free(sm, sizeof (mem_seg_map_t));
566 	}
567 }
568