xref: /titanic_41/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_bank.c (revision 19d61fc7991644175873937566d932d8cf52912a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <cmd_mem.h>
29 #include <cmd_bank.h>
30 #include <cmd_dimm.h>
31 #include <cmd.h>
32 
33 #include <errno.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <fcntl.h>
37 #include <unistd.h>
38 #include <fm/fmd_api.h>
39 #include <sys/fm/protocol.h>
40 #include <sys/mem.h>
41 #include <sys/nvpair.h>
42 
43 void
cmd_bank_add_dimm(fmd_hdl_t * hdl,cmd_bank_t * bank,cmd_dimm_t * dimm)44 cmd_bank_add_dimm(fmd_hdl_t *hdl, cmd_bank_t *bank, cmd_dimm_t *dimm)
45 {
46 	cmd_bank_memb_t *bm;
47 
48 	fmd_hdl_debug(hdl, "attaching dimm %s to bank %s\n", dimm->dimm_unum,
49 	    bank->bank_unum);
50 
51 	dimm->dimm_bank = bank;
52 
53 	bm = fmd_hdl_zalloc(hdl, sizeof (cmd_bank_memb_t), FMD_SLEEP);
54 	bm->bm_dimm = dimm;
55 	cmd_list_append(&bank->bank_dimms, bm);
56 }
57 
58 void
cmd_bank_remove_dimm(fmd_hdl_t * hdl,cmd_bank_t * bank,cmd_dimm_t * dimm)59 cmd_bank_remove_dimm(fmd_hdl_t *hdl, cmd_bank_t *bank, cmd_dimm_t *dimm)
60 {
61 	cmd_bank_memb_t *bm;
62 
63 	fmd_hdl_debug(hdl, "detaching dimm %s from bank %s\n", dimm->dimm_unum,
64 	    bank->bank_unum);
65 
66 	for (bm = cmd_list_next(&bank->bank_dimms); bm != NULL;
67 	    bm = cmd_list_next(bm)) {
68 		if (bm->bm_dimm != dimm)
69 			continue;
70 
71 		cmd_list_delete(&bank->bank_dimms, bm);
72 		dimm->dimm_bank = NULL;
73 		fmd_hdl_free(hdl, bm, sizeof (cmd_bank_memb_t));
74 		return;
75 	}
76 
77 	fmd_hdl_abort(hdl, "attempt to disconnect dimm from non-parent bank\n");
78 }
79 
80 static void
bank_dimmlist_create(fmd_hdl_t * hdl,cmd_bank_t * bank)81 bank_dimmlist_create(fmd_hdl_t *hdl, cmd_bank_t *bank)
82 {
83 	cmd_dimm_t *dimm;
84 
85 	for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
86 	    dimm = cmd_list_next(dimm)) {
87 		if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl,
88 		    dimm->dimm_asru_nvl))
89 			cmd_bank_add_dimm(hdl, bank, dimm);
90 	}
91 }
92 
93 static void
bank_dimmlist_free(fmd_hdl_t * hdl,cmd_bank_t * bank)94 bank_dimmlist_free(fmd_hdl_t *hdl, cmd_bank_t *bank)
95 {
96 	cmd_bank_memb_t *bm;
97 
98 	while ((bm = cmd_list_next(&bank->bank_dimms)) != NULL) {
99 		cmd_list_delete(&bank->bank_dimms, bm);
100 		bm->bm_dimm->dimm_bank = NULL;
101 		fmd_hdl_free(hdl, bm, sizeof (cmd_bank_memb_t));
102 	}
103 }
104 
105 nvlist_t *
cmd_bank_fru(cmd_bank_t * bank)106 cmd_bank_fru(cmd_bank_t *bank)
107 {
108 	return (bank->bank_asru_nvl);
109 }
110 
111 nvlist_t *
cmd_bank_create_fault(fmd_hdl_t * hdl,cmd_bank_t * bank,const char * fltnm,uint_t cert)112 cmd_bank_create_fault(fmd_hdl_t *hdl, cmd_bank_t *bank, const char *fltnm,
113     uint_t cert)
114 {
115 	return (cmd_nvl_create_fault(hdl, fltnm, cert, bank->bank_asru_nvl,
116 	    bank->bank_asru_nvl, NULL));
117 }
118 
119 static void
bank_free(fmd_hdl_t * hdl,cmd_bank_t * bank,int destroy)120 bank_free(fmd_hdl_t *hdl, cmd_bank_t *bank, int destroy)
121 {
122 	if (bank->bank_case.cc_cp != NULL)
123 		cmd_case_fini(hdl, bank->bank_case.cc_cp, destroy);
124 
125 	bank_dimmlist_free(hdl, bank);
126 	cmd_fmri_fini(hdl, &bank->bank_asru, destroy);
127 
128 	if (destroy)
129 		fmd_buf_destroy(hdl, NULL, bank->bank_bufname);
130 	cmd_list_delete(&cmd.cmd_banks, bank);
131 	fmd_hdl_free(hdl, bank, sizeof (cmd_bank_t));
132 }
133 
134 void
cmd_bank_destroy(fmd_hdl_t * hdl,cmd_bank_t * bank)135 cmd_bank_destroy(fmd_hdl_t *hdl, cmd_bank_t *bank)
136 {
137 	fmd_stat_destroy(hdl, 1, &(bank->bank_retstat));
138 	bank_free(hdl, bank, FMD_B_TRUE);
139 }
140 
141 static cmd_bank_t *
bank_lookup_by_unum(const char * unum)142 bank_lookup_by_unum(const char *unum)
143 {
144 	cmd_bank_t *bank;
145 
146 	for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
147 	    bank = cmd_list_next(bank)) {
148 		if (strcmp(bank->bank_unum, unum) == 0)
149 			return (bank);
150 	}
151 
152 	return (NULL);
153 }
154 
155 cmd_bank_t *
cmd_bank_create(fmd_hdl_t * hdl,nvlist_t * asru)156 cmd_bank_create(fmd_hdl_t *hdl, nvlist_t *asru)
157 {
158 	cmd_bank_t *bank;
159 	const char *unum;
160 
161 	if (!fmd_nvl_fmri_present(hdl, asru)) {
162 		fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n");
163 		return (NULL);
164 	}
165 
166 	if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
167 		CMD_STAT_BUMP(bad_mem_asru);
168 		return (NULL);
169 	}
170 
171 	fmd_hdl_debug(hdl, "bank_create: creating new bank %s\n", unum);
172 	CMD_STAT_BUMP(bank_creat);
173 
174 	bank = fmd_hdl_zalloc(hdl, sizeof (cmd_bank_t), FMD_SLEEP);
175 	bank->bank_nodetype = CMD_NT_BANK;
176 	bank->bank_version = CMD_BANK_VERSION;
177 
178 	cmd_bufname(bank->bank_bufname, sizeof (bank->bank_bufname), "bank_%s",
179 	    unum);
180 	cmd_fmri_init(hdl, &bank->bank_asru, asru, "bank_asru_%s", unum);
181 
182 	(void) nvlist_lookup_string(bank->bank_asru_nvl, FM_FMRI_MEM_UNUM,
183 	    (char **)&bank->bank_unum);
184 
185 	bank_dimmlist_create(hdl, bank);
186 
187 	cmd_mem_retirestat_create(hdl, &bank->bank_retstat, bank->bank_unum, 0,
188 	    CMD_BANK_STAT_PREFIX);
189 
190 	cmd_list_append(&cmd.cmd_banks, bank);
191 	cmd_bank_dirty(hdl, bank);
192 
193 	return (bank);
194 }
195 
196 cmd_bank_t *
cmd_bank_lookup(fmd_hdl_t * hdl,nvlist_t * asru)197 cmd_bank_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
198 {
199 	cmd_bank_t *bank;
200 	const char *unum;
201 
202 	if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
203 		CMD_STAT_BUMP(bad_mem_asru);
204 		return (NULL);
205 	}
206 
207 	bank = bank_lookup_by_unum(unum);
208 
209 	if (bank != NULL && !fmd_nvl_fmri_present(hdl, bank->bank_asru_nvl)) {
210 		fmd_hdl_debug(hdl, "bank_lookup: discarding old bank\n");
211 		cmd_bank_destroy(hdl, bank);
212 		return (NULL);
213 	}
214 
215 	return (bank);
216 }
217 
218 static cmd_bank_t *
bank_v0tov1(fmd_hdl_t * hdl,cmd_bank_0_t * old,size_t oldsz)219 bank_v0tov1(fmd_hdl_t *hdl, cmd_bank_0_t *old, size_t oldsz)
220 {
221 	cmd_bank_t *new;
222 
223 	if (oldsz != sizeof (cmd_bank_0_t)) {
224 		fmd_hdl_abort(hdl, "size of state doesn't match size of "
225 		    "version 0 state (%u bytes).\n", sizeof (cmd_bank_0_t));
226 	}
227 
228 	new = fmd_hdl_zalloc(hdl, sizeof (cmd_bank_t), FMD_SLEEP);
229 	new->bank_header = old->bank0_header;
230 	new->bank_version = CMD_BANK_VERSION;
231 	new->bank_asru = old->bank0_asru;
232 	new->bank_nretired = old->bank0_nretired;
233 
234 	fmd_hdl_free(hdl, old, oldsz);
235 	return (new);
236 }
237 
238 static cmd_bank_t *
bank_wrapv1(fmd_hdl_t * hdl,cmd_bank_pers_t * pers,size_t psz)239 bank_wrapv1(fmd_hdl_t *hdl, cmd_bank_pers_t *pers, size_t psz)
240 {
241 	cmd_bank_t *bank;
242 
243 	if (psz != sizeof (cmd_bank_pers_t)) {
244 		fmd_hdl_abort(hdl, "size of state doesn't match size of "
245 		    "version 1 state (%u bytes).\n", sizeof (cmd_bank_pers_t));
246 	}
247 
248 	bank = fmd_hdl_zalloc(hdl, sizeof (cmd_bank_t), FMD_SLEEP);
249 	bcopy(pers, bank, sizeof (cmd_bank_pers_t));
250 	fmd_hdl_free(hdl, pers, psz);
251 	return (bank);
252 }
253 
254 void *
cmd_bank_restore(fmd_hdl_t * hdl,fmd_case_t * cp,cmd_case_ptr_t * ptr)255 cmd_bank_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
256 {
257 	cmd_bank_t *bank;
258 
259 	for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
260 	    bank = cmd_list_next(bank)) {
261 		if (strcmp(bank->bank_bufname, ptr->ptr_name) == 0)
262 			break;
263 	}
264 
265 	if (bank == NULL) {
266 		int migrated = 0;
267 		size_t banksz;
268 
269 		fmd_hdl_debug(hdl, "restoring bank from %s\n", ptr->ptr_name);
270 
271 		if ((banksz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
272 			fmd_hdl_abort(hdl, "bank referenced by case %s does "
273 			    "not exist in saved state\n",
274 			    fmd_case_uuid(hdl, cp));
275 		} else if (banksz > CMD_BANK_MAXSIZE ||
276 		    banksz < CMD_BANK_MINSIZE) {
277 			fmd_hdl_abort(hdl, "bank buffer referenced by case %s "
278 			    "is out of bounds (is %u bytes, max %u, min %u)\n",
279 			    fmd_case_uuid(hdl, cp), banksz,
280 			    CMD_BANK_MAXSIZE, CMD_BANK_MAXSIZE);
281 		}
282 
283 		if ((bank = cmd_buf_read(hdl, NULL, ptr->ptr_name,
284 		    banksz)) == NULL) {
285 			fmd_hdl_abort(hdl, "failed to read bank buf %s",
286 			    ptr->ptr_name);
287 		}
288 
289 		fmd_hdl_debug(hdl, "found %d in version field\n",
290 		    bank->bank_version);
291 
292 		if (CMD_BANK_VERSIONED(bank)) {
293 			switch (bank->bank_version) {
294 			case CMD_BANK_VERSION_1:
295 				bank = bank_wrapv1(hdl, (cmd_bank_pers_t *)bank,
296 				    banksz);
297 				break;
298 			default:
299 				fmd_hdl_abort(hdl, "unknown version (found %d) "
300 				    "for bank state referenced by case %s.\n",
301 				    bank->bank_version, fmd_case_uuid(hdl, cp));
302 				break;
303 			}
304 		} else {
305 			bank = bank_v0tov1(hdl, (cmd_bank_0_t *)bank, banksz);
306 			migrated = 1;
307 		}
308 
309 		if (migrated) {
310 			CMD_STAT_BUMP(bank_migrat);
311 			cmd_bank_dirty(hdl, bank);
312 		}
313 
314 		cmd_fmri_restore(hdl, &bank->bank_asru);
315 
316 		if ((errno = nvlist_lookup_string(bank->bank_asru_nvl,
317 		    FM_FMRI_MEM_UNUM, (char **)&bank->bank_unum)) != 0)
318 			fmd_hdl_abort(hdl, "failed to retrieve nuum from asru");
319 
320 		bank_dimmlist_create(hdl, bank);
321 
322 		cmd_mem_retirestat_create(hdl, &bank->bank_retstat,
323 		    bank->bank_unum, bank->bank_nretired, CMD_BANK_STAT_PREFIX);
324 
325 		cmd_list_append(&cmd.cmd_banks, bank);
326 	}
327 
328 	switch (ptr->ptr_subtype) {
329 	case BUG_PTR_BANK_CASE:
330 		fmd_hdl_debug(hdl, "recovering from out of order page ptr\n");
331 		cmd_case_redirect(hdl, cp, CMD_PTR_BANK_CASE);
332 		/*FALLTHROUGH*/
333 	case CMD_PTR_BANK_CASE:
334 		cmd_mem_case_restore(hdl, &bank->bank_case, cp, "bank",
335 		    bank->bank_unum);
336 		break;
337 	default:
338 		fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
339 		    ptr->ptr_name, ptr->ptr_subtype);
340 	}
341 
342 	return (bank);
343 }
344 
345 void
cmd_bank_validate(fmd_hdl_t * hdl)346 cmd_bank_validate(fmd_hdl_t *hdl)
347 {
348 	cmd_bank_t *bank, *next;
349 
350 	for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL; bank = next) {
351 		next = cmd_list_next(bank);
352 
353 		if (!fmd_nvl_fmri_present(hdl, bank->bank_asru_nvl))
354 			cmd_bank_destroy(hdl, bank);
355 	}
356 }
357 
358 void
cmd_bank_dirty(fmd_hdl_t * hdl,cmd_bank_t * bank)359 cmd_bank_dirty(fmd_hdl_t *hdl, cmd_bank_t *bank)
360 {
361 	if (fmd_buf_size(hdl, NULL, bank->bank_bufname) !=
362 	    sizeof (cmd_bank_pers_t))
363 		fmd_buf_destroy(hdl, NULL, bank->bank_bufname);
364 
365 	/* No need to rewrite the FMRIs in the bank - they don't change */
366 	fmd_buf_write(hdl, NULL, bank->bank_bufname, &bank->bank_pers,
367 	    sizeof (cmd_bank_pers_t));
368 }
369 
370 void
cmd_bank_gc(fmd_hdl_t * hdl)371 cmd_bank_gc(fmd_hdl_t *hdl)
372 {
373 	cmd_bank_validate(hdl);
374 }
375 
376 void
cmd_bank_fini(fmd_hdl_t * hdl)377 cmd_bank_fini(fmd_hdl_t *hdl)
378 {
379 	cmd_bank_t *bank;
380 
381 	while ((bank = cmd_list_next(&cmd.cmd_banks)) != NULL)
382 		bank_free(hdl, bank, FMD_B_FALSE);
383 }
384