xref: /titanic_44/usr/src/cmd/fm/modules/sun4v/generic-mem/gmem_dimm.c (revision e2dcee5754c56d91c6e1ff847db294541069ca0d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 
26 /*
27  * Support routines for DIMMs.
28  */
29 
30 #include <gmem_mem.h>
31 #include <gmem_dimm.h>
32 #include <gmem.h>
33 #include <errno.h>
34 #include <limits.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <fcntl.h>
38 #include <unistd.h>
39 #include <fm/fmd_api.h>
40 #include <fm/libtopo.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/mem.h>
43 #include <sys/nvpair.h>
44 
45 nvlist_t *dimm_nvl;
46 
47 typedef struct dimmid {
48 	char serial[100];
49 	int type;
50 } dimmid_t;
51 
52 static int gmem_find_dimm_chip(nvlist_t *, uint32_t *);
53 
54 nvlist_t *
gmem_dimm_fru(gmem_dimm_t * dimm)55 gmem_dimm_fru(gmem_dimm_t *dimm)
56 {
57 	return (dimm->dimm_asru_nvl);
58 }
59 
60 static void
gmem_dimm_free(fmd_hdl_t * hdl,gmem_dimm_t * dimm,int destroy)61 gmem_dimm_free(fmd_hdl_t *hdl, gmem_dimm_t *dimm, int destroy)
62 {
63 	gmem_case_t *cc = &dimm->dimm_case;
64 	int i;
65 	gmem_mq_t *q;
66 	tstamp_t *tsp, *next;
67 
68 	if (cc->cc_cp != NULL) {
69 		gmem_case_fini(hdl, cc->cc_cp, destroy);
70 		if (cc->cc_serdnm != NULL) {
71 			if (fmd_serd_exists(hdl, cc->cc_serdnm) &&
72 			    destroy)
73 				fmd_serd_destroy(hdl, cc->cc_serdnm);
74 			fmd_hdl_strfree(hdl, cc->cc_serdnm);
75 		}
76 	}
77 
78 	gmem_fmri_fini(hdl, &dimm->dimm_asru, destroy);
79 
80 	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
81 		while ((q = gmem_list_next(&dimm->mq_root[i])) != NULL) {
82 			if (q->mq_serdnm != NULL) {
83 				if (fmd_serd_exists(hdl, q->mq_serdnm))
84 					fmd_serd_destroy(hdl, q->mq_serdnm);
85 				fmd_hdl_strfree(hdl, q->mq_serdnm);
86 				q->mq_serdnm = NULL;
87 			}
88 
89 			for (tsp = gmem_list_next(&q->mq_dupce_tstamp);
90 			    tsp != NULL; tsp = next) {
91 				next = gmem_list_next(tsp);
92 				gmem_list_delete(&q->mq_dupce_tstamp,
93 				    &tsp->ts_l);
94 				fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
95 			}
96 
97 			gmem_list_delete(&dimm->mq_root[i], q);
98 			fmd_hdl_free(hdl, q, sizeof (gmem_mq_t));
99 		}
100 	}
101 
102 	if (destroy)
103 		fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
104 
105 	gmem_list_delete(&gmem.gm_dimms, dimm);
106 	fmd_hdl_free(hdl, dimm, sizeof (gmem_dimm_t));
107 }
108 
109 void
gmem_dimm_destroy(fmd_hdl_t * hdl,gmem_dimm_t * dimm)110 gmem_dimm_destroy(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
111 {
112 	fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat));
113 	gmem_dimm_free(hdl, dimm, FMD_B_TRUE);
114 }
115 
116 static gmem_dimm_t *
dimm_lookup_by_serial(const char * serial)117 dimm_lookup_by_serial(const char *serial)
118 {
119 	gmem_dimm_t *dimm;
120 
121 	for (dimm = gmem_list_next(&gmem.gm_dimms); dimm != NULL;
122 	    dimm = gmem_list_next(dimm)) {
123 		if (strcmp(dimm->dimm_serial, serial) == 0)
124 			return (dimm);
125 	}
126 
127 	return (NULL);
128 }
129 
130 gmem_dimm_t *
gmem_dimm_create(fmd_hdl_t * hdl,nvlist_t * asru,nvlist_t * det)131 gmem_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru, nvlist_t *det)
132 {
133 	gmem_dimm_t *dimm;
134 	nvlist_t *fmri;
135 	char *serial;
136 	uint32_t chip_id;
137 
138 	if (nvlist_lookup_string(asru, FM_FMRI_HC_SERIAL_ID, &serial) != 0) {
139 		fmd_hdl_debug(hdl, "Unable to get dimm serial\n");
140 		return (NULL);
141 	}
142 
143 	if (nvlist_dup(asru, &fmri, 0) != 0) {
144 		fmd_hdl_debug(hdl, "dimm create nvlist dup failed");
145 		return (NULL);
146 	}
147 
148 	(void) gmem_find_dimm_chip(det, &chip_id);
149 
150 	fmd_hdl_debug(hdl, "dimm_create: creating new DIMM serial=%s\n",
151 	    serial);
152 	GMEM_STAT_BUMP(dimm_creat);
153 
154 	dimm = fmd_hdl_zalloc(hdl, sizeof (gmem_dimm_t), FMD_SLEEP);
155 	dimm->dimm_nodetype = GMEM_NT_DIMM;
156 	dimm->dimm_version = GMEM_DIMM_VERSION;
157 	dimm->dimm_phys_addr_low = ULLONG_MAX;
158 	dimm->dimm_phys_addr_hi = 0;
159 	dimm->dimm_syl_error = USHRT_MAX;
160 	dimm->dimm_chipid = chip_id;
161 
162 	gmem_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s",
163 	    serial);
164 	gmem_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", serial);
165 
166 	nvlist_free(fmri);
167 
168 	(void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_HC_SERIAL_ID,
169 	    (char **)&dimm->dimm_serial);
170 
171 	gmem_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_serial,
172 	    0, GMEM_DIMM_STAT_PREFIX);
173 
174 	gmem_list_append(&gmem.gm_dimms, dimm);
175 	gmem_dimm_dirty(hdl, dimm);
176 
177 	return (dimm);
178 }
179 
180 gmem_dimm_t *
gmem_dimm_lookup(fmd_hdl_t * hdl,nvlist_t * asru)181 gmem_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
182 {
183 	gmem_dimm_t *dimm;
184 	char *serial;
185 	int err;
186 
187 	err = nvlist_lookup_string(asru, FM_FMRI_HC_SERIAL_ID, &serial);
188 
189 	if (err != 0) {
190 		fmd_hdl_debug(hdl, "Can't get dimm serial number\n");
191 		GMEM_STAT_BUMP(bad_mem_resource);
192 		return (NULL);
193 	}
194 
195 	dimm = dimm_lookup_by_serial(serial);
196 	return (dimm);
197 }
198 
199 
200 static gmem_dimm_t *
gmem_dimm_v0tov1(fmd_hdl_t * hdl,gmem_dimm_0_t * old,size_t oldsz)201 gmem_dimm_v0tov1(fmd_hdl_t *hdl, gmem_dimm_0_t *old, size_t oldsz)
202 {
203 	gmem_dimm_t *new;
204 	if (oldsz != sizeof (gmem_dimm_0_t)) {
205 		fmd_hdl_abort(hdl, "size of state doesn't match size of "
206 		    "version 0 state (%u bytes).\n", sizeof (gmem_dimm_0_t));
207 	}
208 
209 	new = fmd_hdl_zalloc(hdl, sizeof (gmem_dimm_t), FMD_SLEEP);
210 	new->dimm_header = old->dimm0_header;
211 	new->dimm_version = GMEM_DIMM_VERSION;
212 	new->dimm_asru = old->dimm0_asru;
213 	new->dimm_nretired = old->dimm0_nretired;
214 	new->dimm_phys_addr_hi = 0;
215 	new->dimm_phys_addr_low = ULLONG_MAX;
216 
217 	fmd_hdl_free(hdl, old, oldsz);
218 	return (new);
219 }
220 
221 static gmem_dimm_t *
gmem_dimm_wrapv1(fmd_hdl_t * hdl,gmem_dimm_pers_t * pers,size_t psz)222 gmem_dimm_wrapv1(fmd_hdl_t *hdl, gmem_dimm_pers_t *pers, size_t psz)
223 {
224 	gmem_dimm_t *dimm;
225 
226 	if (psz != sizeof (gmem_dimm_pers_t)) {
227 		fmd_hdl_abort(hdl, "size of state doesn't match size of "
228 		    "version 0 state (%u bytes).\n", sizeof (gmem_dimm_pers_t));
229 	}
230 
231 	dimm = fmd_hdl_zalloc(hdl, sizeof (gmem_dimm_t), FMD_SLEEP);
232 	bcopy(pers, dimm, sizeof (gmem_dimm_pers_t));
233 	fmd_hdl_free(hdl, pers, psz);
234 	return (dimm);
235 }
236 
237 void *
gmem_dimm_restore(fmd_hdl_t * hdl,fmd_case_t * cp,gmem_case_ptr_t * ptr)238 gmem_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, gmem_case_ptr_t *ptr)
239 {
240 	gmem_dimm_t *dimm;
241 
242 	for (dimm = gmem_list_next(&gmem.gm_dimms); dimm != NULL;
243 	    dimm = gmem_list_next(dimm)) {
244 		if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0)
245 			break;
246 	}
247 
248 	if (dimm == NULL) {
249 		int migrated = 0;
250 		size_t dimmsz;
251 
252 		fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name);
253 
254 		if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
255 			fmd_hdl_abort(hdl, "dimm referenced by case %s does "
256 			    "not exist in saved state\n",
257 			    fmd_case_uuid(hdl, cp));
258 		} else if (dimmsz > GMEM_DIMM_MAXSIZE ||
259 		    dimmsz < GMEM_DIMM_MINSIZE) {
260 			fmd_hdl_abort(hdl, "dimm buffer referenced by case %s "
261 			    "is out of bounds (is %u bytes, max %u, min %u)\n",
262 			    fmd_case_uuid(hdl, cp), dimmsz,
263 			    GMEM_DIMM_MAXSIZE, GMEM_DIMM_MINSIZE);
264 		}
265 
266 		if ((dimm = gmem_buf_read(hdl, NULL, ptr->ptr_name,
267 		    dimmsz)) == NULL) {
268 			fmd_hdl_abort(hdl, "failed to read dimm buf %s",
269 			    ptr->ptr_name);
270 		}
271 
272 		fmd_hdl_debug(hdl, "found %d in version field\n",
273 		    dimm->dimm_version);
274 
275 		if (GMEM_DIMM_VERSIONED(dimm)) {
276 
277 			switch (dimm->dimm_version) {
278 			case GMEM_DIMM_VERSION_1:
279 				dimm = gmem_dimm_wrapv1(hdl,
280 				    (gmem_dimm_pers_t *)dimm, dimmsz);
281 				break;
282 			default:
283 				fmd_hdl_abort(hdl, "unknown version (found %d) "
284 				    "for dimm state referenced by case %s.\n",
285 				    dimm->dimm_version, fmd_case_uuid(hdl, cp));
286 				break;
287 			}
288 		} else {
289 			dimm = gmem_dimm_v0tov1(hdl, (gmem_dimm_0_t *)dimm,
290 			    dimmsz);
291 			migrated = 1;
292 		}
293 
294 		if (migrated) {
295 			GMEM_STAT_BUMP(dimm_migrat);
296 			gmem_dimm_dirty(hdl, dimm);
297 		}
298 
299 		gmem_fmri_restore(hdl, &dimm->dimm_asru);
300 
301 		if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl,
302 		    FM_FMRI_HC_SERIAL_ID, (char **)&dimm->dimm_serial)) != 0)
303 			fmd_hdl_abort(hdl,
304 			    "failed to retrieve serial from asru");
305 
306 
307 		gmem_mem_retirestat_create(hdl, &dimm->dimm_retstat,
308 		    dimm->dimm_serial, dimm->dimm_nretired,
309 		    GMEM_DIMM_STAT_PREFIX);
310 
311 		gmem_list_append(&gmem.gm_dimms, dimm);
312 	}
313 
314 	switch (ptr->ptr_subtype) {
315 	case GMEM_PTR_DIMM_CASE:
316 		gmem_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm",
317 		    dimm->dimm_serial);
318 		break;
319 	default:
320 		fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
321 		    ptr->ptr_name, ptr->ptr_subtype);
322 	}
323 
324 	return (dimm);
325 }
326 
327 void
gmem_dimm_validate(fmd_hdl_t * hdl)328 gmem_dimm_validate(fmd_hdl_t *hdl)
329 {
330 	gmem_dimm_t *dimm, *next;
331 
332 	for (dimm = gmem_list_next(&gmem.gm_dimms); dimm != NULL; dimm = next) {
333 		next = gmem_list_next(dimm);
334 
335 		if (!gmem_dimm_present(hdl, dimm->dimm_asru_nvl))
336 			gmem_dimm_destroy(hdl, dimm);
337 	}
338 }
339 
340 void
gmem_dimm_dirty(fmd_hdl_t * hdl,gmem_dimm_t * dimm)341 gmem_dimm_dirty(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
342 {
343 	if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) !=
344 	    sizeof (gmem_dimm_pers_t))
345 		fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
346 
347 	/* No need to rewrite the FMRIs in the dimm - they don't change */
348 	fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers,
349 	    sizeof (gmem_dimm_pers_t));
350 }
351 
352 void
gmem_dimm_gc(fmd_hdl_t * hdl)353 gmem_dimm_gc(fmd_hdl_t *hdl)
354 {
355 	gmem_dimm_validate(hdl);
356 }
357 
358 void
gmem_dimm_fini(fmd_hdl_t * hdl)359 gmem_dimm_fini(fmd_hdl_t *hdl)
360 {
361 	gmem_dimm_t *dimm;
362 
363 	while ((dimm = gmem_list_next(&gmem.gm_dimms)) != NULL)
364 		gmem_dimm_free(hdl, dimm, FMD_B_FALSE);
365 }
366 
367 
368 /*ARGSUSED*/
369 static int
find_dimm_hc_fmri(topo_hdl_t * thp,tnode_t * node,void * arg)370 find_dimm_hc_fmri(topo_hdl_t *thp, tnode_t *node, void *arg)
371 {
372 
373 	char *topo_sn;
374 	dimmid_t *dimmid = (dimmid_t *)arg;
375 	nvlist_t *fru = NULL;
376 	nvlist_t *rsc = NULL;
377 	nvlist_t *asru = NULL;
378 	int err;
379 
380 	if (topo_node_fru(node, &fru, NULL, &err) < 0)
381 		return (TOPO_WALK_NEXT);
382 
383 	err = nvlist_lookup_string(fru, FM_FMRI_HC_SERIAL_ID, &topo_sn);
384 	if (err != 0) {
385 		nvlist_free(fru);
386 		return (TOPO_WALK_NEXT);
387 	}
388 
389 	if (strcmp(dimmid->serial, topo_sn) != 0) {
390 		nvlist_free(fru);
391 		return (TOPO_WALK_NEXT);
392 	}
393 
394 	switch (dimmid->type) {
395 		case FINDFRU:
396 			(void) nvlist_dup(fru, &dimm_nvl, NV_UNIQUE_NAME);
397 			break;
398 		case FINDRSC:
399 			(void) topo_node_resource(node, &rsc, &err);
400 			if (rsc != NULL) {
401 				(void) nvlist_dup(rsc, &dimm_nvl,
402 				    NV_UNIQUE_NAME);
403 				nvlist_free(rsc);
404 			}
405 			break;
406 		case FINDASRU:
407 			(void) topo_node_asru(node, &asru, NULL, &err);
408 			if (asru != NULL) {
409 				(void) nvlist_dup(asru, &dimm_nvl,
410 				    NV_UNIQUE_NAME);
411 				nvlist_free(asru);
412 			}
413 			break;
414 		default:
415 			break;
416 	}
417 	nvlist_free(fru);
418 	return (TOPO_WALK_TERMINATE);
419 }
420 
421 nvlist_t *
gmem_find_dimm_by_sn(fmd_hdl_t * hdl,dimmid_t * dimmid)422 gmem_find_dimm_by_sn(fmd_hdl_t *hdl, dimmid_t *dimmid) {
423 	topo_hdl_t *thp;
424 	topo_walk_t *twp;
425 	int err;
426 	dimm_nvl = NULL;
427 
428 	if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
429 		return (NULL);
430 
431 	if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
432 	    find_dimm_hc_fmri, dimmid, &err)) == NULL) {
433 		fmd_hdl_topo_rele(hdl, thp);
434 		return (NULL);
435 	}
436 
437 	(void) topo_walk_step(twp, TOPO_WALK_CHILD);
438 	topo_walk_fini(twp);
439 	fmd_hdl_topo_rele(hdl, thp);
440 	return (dimm_nvl);
441 }
442 
443 nvlist_t *
gmem_find_dimm_fru(fmd_hdl_t * hdl,char * sn)444 gmem_find_dimm_fru(fmd_hdl_t *hdl, char *sn)
445 {
446 	dimmid_t fru;
447 	(void) strcpy(fru.serial, sn);
448 	fru.type = FINDFRU;
449 	return (gmem_find_dimm_by_sn(hdl, &fru));
450 }
451 
452 nvlist_t *
gmem_find_dimm_rsc(fmd_hdl_t * hdl,char * sn)453 gmem_find_dimm_rsc(fmd_hdl_t *hdl, char *sn)
454 {
455 	dimmid_t rsc;
456 	(void) strcpy(rsc.serial, sn);
457 	rsc.type = FINDRSC;
458 	return (gmem_find_dimm_by_sn(hdl, &rsc));
459 }
460 
461 nvlist_t *
gmem_find_dimm_asru(fmd_hdl_t * hdl,char * sn)462 gmem_find_dimm_asru(fmd_hdl_t *hdl, char *sn)
463 {
464 	dimmid_t asru;
465 	(void) strcpy(asru.serial, sn);
466 	asru.type = FINDASRU;
467 	return (gmem_find_dimm_by_sn(hdl, &asru));
468 }
469 
470 int
gmem_dimm_present(fmd_hdl_t * hdl,nvlist_t * asru)471 gmem_dimm_present(fmd_hdl_t *hdl, nvlist_t *asru)
472 {
473 	char *sn;
474 	nvlist_t *dimm = NULL;
475 
476 	if (nvlist_lookup_string(asru, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
477 		fmd_hdl_debug(hdl, "Unable to get dimm serial\n");
478 		return (0);
479 	}
480 	dimm = gmem_find_dimm_fru(hdl, sn);
481 	if (dimm == NULL) {
482 		fmd_hdl_debug(hdl, "Dimm sn=%s is not present\n", sn);
483 		return (0);
484 	}
485 	nvlist_free(dimm);
486 	return (1);
487 }
488 
489 static int
gmem_find_dimm_chip(nvlist_t * nvl,uint32_t * chip)490 gmem_find_dimm_chip(nvlist_t *nvl, uint32_t *chip)
491 {
492 
493 	char *name, *id, *end;
494 	nvlist_t **hcl;
495 	uint_t n;
496 	int i;
497 	int rc = 0;
498 	*chip = ULONG_MAX;
499 
500 	if (nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n) < 0)
501 		return (0);
502 	for (i = 0; i < n; i++) {
503 		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name);
504 		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id);
505 
506 		if (strcmp(name, "chip") == 0) {
507 			*chip = (uint32_t)strtoul(id, &end, 10);
508 			rc = 1;
509 			break;
510 		}
511 	}
512 	return (rc);
513 }
514 
515 /*ARGSUSED*/
516 int
gmem_same_datapath_dimms(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2)517 gmem_same_datapath_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
518 {
519 
520 	if (d1->dimm_chipid == ULONG_MAX || d2->dimm_chipid == ULONG_MAX)
521 		return (0);
522 
523 	if (d1->dimm_chipid == d2->dimm_chipid)
524 		return (1);
525 
526 	return (0);
527 }
528 
529 int
gmem_check_symbol_error(fmd_hdl_t * hdl,gmem_dimm_t * d,uint16_t upos)530 gmem_check_symbol_error(fmd_hdl_t *hdl, gmem_dimm_t *d, uint16_t upos)
531 {
532 	gmem_dimm_t *dimm = NULL, *next = NULL;
533 
534 	for (dimm = gmem_list_next(&gmem.gm_dimms); dimm != NULL;
535 	    dimm = next) {
536 		next = gmem_list_next(dimm);
537 		if (gmem_same_datapath_dimms(hdl, dimm, d) &&
538 		    dimm->dimm_syl_error == upos)
539 			return (1);
540 	}
541 	return (0);
542 }
543 
544 void
gmem_save_symbol_error(fmd_hdl_t * hdl,gmem_dimm_t * d,uint16_t upos)545 gmem_save_symbol_error(fmd_hdl_t *hdl, gmem_dimm_t *d, uint16_t upos)
546 {
547 	gmem_dimm_t *dimm = NULL, *next = NULL;
548 
549 	for (dimm = gmem_list_next(&gmem.gm_dimms); dimm != NULL;
550 	    dimm = next) {
551 		next = gmem_list_next(dimm);
552 		if (gmem_same_datapath_dimms(hdl, dimm, d))
553 			dimm->dimm_syl_error = upos;
554 	}
555 }
556