1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Support routines for DIMMs.
27 */
28
29 #include <cmd_mem.h>
30 #include <limits.h>
31 #include <cmd_dimm.h>
32 #include <cmd_bank.h>
33 #include <cmd.h>
34
35 #include <errno.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 #include <fm/fmd_api.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/mem.h>
43 #include <sys/nvpair.h>
44 #ifdef sun4v
45 #include <cmd_hc_sun4v.h>
46 #include <cmd_branch.h>
47 #endif /* sun4v */
48
49 /*
50 * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs,
51 * because sufficient information was unavailable prior to correlation.
52 * When the DE completes the pair, it uses this routine to retrieve the
53 * correct FMRI.
54 */
55 nvlist_t *
cmd_dimm_fmri_derive(fmd_hdl_t * hdl,uint64_t afar,uint16_t synd,uint64_t afsr)56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd,
57 uint64_t afsr)
58 {
59 nvlist_t *fmri;
60
61 if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL)
62 return (NULL);
63
64 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
65 nvlist_free(fmri);
66 return (NULL);
67 }
68
69 return (fmri);
70 }
71
72 nvlist_t *
cmd_dimm_fru(cmd_dimm_t * dimm)73 cmd_dimm_fru(cmd_dimm_t *dimm)
74 {
75 return (dimm->dimm_asru_nvl);
76 }
77
78 nvlist_t *
cmd_dimm_create_fault(fmd_hdl_t * hdl,cmd_dimm_t * dimm,const char * fltnm,uint_t cert)79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm,
80 uint_t cert)
81 {
82 #ifdef sun4v
83 nvlist_t *flt, *nvlfru;
84 /*
85 * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms.
86 * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI
87 *
88 * Use the BR string as a distinguisher. BR (branch) is only
89 * present in ultraSPARC-T2/T2plus DIMM unums
90 */
91 if (strstr(dimm->dimm_unum, "BR") == NULL) {
92 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
93 dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL);
94 } else {
95 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl);
96 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
97 dimm->dimm_asru_nvl, nvlfru, NULL);
98 nvlist_free(nvlfru);
99 }
100 return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum));
101 #else
102 return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl,
103 dimm->dimm_asru_nvl, NULL));
104 #endif /* sun4v */
105 }
106
107 static void
cmd_dimm_free(fmd_hdl_t * hdl,cmd_dimm_t * dimm,int destroy)108 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy)
109 {
110 cmd_case_t *cc = &dimm->dimm_case;
111 int i;
112 cmd_mq_t *q;
113 tstamp_t *tsp, *next;
114
115 #ifdef sun4v
116 cmd_branch_t *branch;
117 #endif
118 if (cc->cc_cp != NULL) {
119 cmd_case_fini(hdl, cc->cc_cp, destroy);
120 if (cc->cc_serdnm != NULL) {
121 if (fmd_serd_exists(hdl, cc->cc_serdnm) &&
122 destroy)
123 fmd_serd_destroy(hdl, cc->cc_serdnm);
124 fmd_hdl_strfree(hdl, cc->cc_serdnm);
125 }
126 }
127
128 for (i = 0; i < CMD_MAX_CKWDS; i++) {
129 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) {
130 if (q->mq_serdnm != NULL) {
131 if (fmd_serd_exists(hdl, q->mq_serdnm)) {
132 fmd_serd_destroy(hdl, q->mq_serdnm);
133 }
134 fmd_hdl_strfree(hdl, q->mq_serdnm);
135 q->mq_serdnm = NULL;
136 }
137
138 for (tsp = cmd_list_next(&q->mq_dupce_tstamp);
139 tsp != NULL; tsp = next) {
140 next = cmd_list_next(tsp);
141 cmd_list_delete(&q->mq_dupce_tstamp,
142 &tsp->ts_l);
143 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
144 }
145
146 cmd_list_delete(&dimm->mq_root[i], q);
147 fmd_hdl_free(hdl, q, sizeof (cmd_mq_t));
148 }
149 }
150
151 if (dimm->dimm_bank != NULL)
152 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm);
153
154 #ifdef sun4v
155 branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum);
156 if (branch != NULL)
157 cmd_branch_remove_dimm(hdl, branch, dimm);
158 #endif
159
160 cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy);
161
162 if (destroy)
163 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
164
165 cmd_list_delete(&cmd.cmd_dimms, dimm);
166 fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t));
167 }
168
169 void
cmd_dimm_destroy(fmd_hdl_t * hdl,cmd_dimm_t * dimm)170 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
171 {
172
173 fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat));
174 cmd_dimm_free(hdl, dimm, FMD_B_TRUE);
175 }
176
177 static cmd_dimm_t *
dimm_lookup_by_unum(const char * unum)178 dimm_lookup_by_unum(const char *unum)
179 {
180 cmd_dimm_t *dimm;
181
182 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
183 dimm = cmd_list_next(dimm)) {
184 if (strcmp(dimm->dimm_unum, unum) == 0)
185 return (dimm);
186 }
187
188 return (NULL);
189 }
190
191 static void
dimm_attach_to_bank(fmd_hdl_t * hdl,cmd_dimm_t * dimm)192 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
193 {
194 cmd_bank_t *bank;
195
196 for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
197 bank = cmd_list_next(bank)) {
198 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl,
199 dimm->dimm_asru_nvl)) {
200 cmd_bank_add_dimm(hdl, bank, dimm);
201 return;
202 }
203 }
204 }
205
206 cmd_dimm_t *
cmd_dimm_create(fmd_hdl_t * hdl,nvlist_t * asru)207 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru)
208 {
209 cmd_dimm_t *dimm;
210 const char *unum;
211 nvlist_t *fmri;
212 size_t nserids = 0;
213 char **serids = NULL;
214
215 if (!fmd_nvl_fmri_present(hdl, asru)) {
216 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n");
217 return (NULL);
218 }
219
220 if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
221 CMD_STAT_BUMP(bad_mem_asru);
222 return (NULL);
223 }
224
225 #ifdef sun4v
226 if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids,
227 &nserids) != 0) {
228 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not"
229 " have serial_ids\n");
230 CMD_STAT_BUMP(bad_mem_asru);
231 return (NULL);
232 }
233 #endif
234 fmri = cmd_mem_fmri_create(unum, serids, nserids);
235 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
236 CMD_STAT_BUMP(bad_mem_asru);
237 nvlist_free(fmri);
238 return (NULL);
239 }
240
241 fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum);
242 CMD_STAT_BUMP(dimm_creat);
243
244 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
245 dimm->dimm_nodetype = CMD_NT_DIMM;
246 dimm->dimm_version = CMD_DIMM_VERSION;
247 dimm->dimm_phys_addr_low = ULLONG_MAX;
248 dimm->dimm_phys_addr_hi = 0;
249 dimm->dimm_syl_error = USHRT_MAX;
250
251 cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s",
252 unum);
253 cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum);
254
255 nvlist_free(fmri);
256
257 (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM,
258 (char **)&dimm->dimm_unum);
259
260 dimm_attach_to_bank(hdl, dimm);
261
262 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0,
263 CMD_DIMM_STAT_PREFIX);
264
265 cmd_list_append(&cmd.cmd_dimms, dimm);
266 cmd_dimm_dirty(hdl, dimm);
267
268 return (dimm);
269 }
270
271 cmd_dimm_t *
cmd_dimm_lookup(fmd_hdl_t * hdl,nvlist_t * asru)272 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
273 {
274 cmd_dimm_t *dimm;
275 const char *unum;
276
277 if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
278 CMD_STAT_BUMP(bad_mem_asru);
279 return (NULL);
280 }
281
282 dimm = dimm_lookup_by_unum(unum);
283
284 if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) {
285 /*
286 * The DIMM doesn't exist anymore, so we need to delete the
287 * state structure, which is now out of date. The containing
288 * bank (if any) is also out of date, so blow it away too.
289 */
290 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n");
291
292 if (dimm->dimm_bank != NULL)
293 cmd_bank_destroy(hdl, dimm->dimm_bank);
294 cmd_dimm_destroy(hdl, dimm);
295
296 return (NULL);
297 }
298
299 return (dimm);
300 }
301
302 static cmd_dimm_t *
dimm_v0tov2(fmd_hdl_t * hdl,cmd_dimm_0_t * old,size_t oldsz)303 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz)
304 {
305 cmd_dimm_t *new;
306
307 if (oldsz != sizeof (cmd_dimm_0_t)) {
308 fmd_hdl_abort(hdl, "size of state doesn't match size of "
309 "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t));
310 }
311
312 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
313 new->dimm_header = old->dimm0_header;
314 new->dimm_version = CMD_DIMM_VERSION;
315 new->dimm_asru = old->dimm0_asru;
316 new->dimm_nretired = old->dimm0_nretired;
317 new->dimm_phys_addr_hi = 0;
318 new->dimm_phys_addr_low = ULLONG_MAX;
319
320 fmd_hdl_free(hdl, old, oldsz);
321 return (new);
322 }
323
324 static cmd_dimm_t *
dimm_v1tov2(fmd_hdl_t * hdl,cmd_dimm_1_t * old,size_t oldsz)325 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz)
326 {
327
328 cmd_dimm_t *new;
329
330 if (oldsz != sizeof (cmd_dimm_1_t)) {
331 fmd_hdl_abort(hdl, "size of state doesn't match size of "
332 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t));
333 }
334
335 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
336
337 new->dimm_header = old->dimm1_header;
338 new->dimm_version = CMD_DIMM_VERSION;
339 new->dimm_asru = old->dimm1_asru;
340 new->dimm_nretired = old->dimm1_nretired;
341 new->dimm_flags = old->dimm1_flags;
342 new->dimm_phys_addr_hi = 0;
343 new->dimm_phys_addr_low = ULLONG_MAX;
344
345 fmd_hdl_free(hdl, old, oldsz);
346 return (new);
347 }
348
349 static cmd_dimm_t *
dimm_wrapv2(fmd_hdl_t * hdl,cmd_dimm_pers_t * pers,size_t psz)350 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz)
351 {
352 cmd_dimm_t *dimm;
353
354 if (psz != sizeof (cmd_dimm_pers_t)) {
355 fmd_hdl_abort(hdl, "size of state doesn't match size of "
356 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t));
357 }
358
359 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
360 bcopy(pers, dimm, sizeof (cmd_dimm_pers_t));
361 fmd_hdl_free(hdl, pers, psz);
362 return (dimm);
363 }
364
365 void *
cmd_dimm_restore(fmd_hdl_t * hdl,fmd_case_t * cp,cmd_case_ptr_t * ptr)366 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
367 {
368 cmd_dimm_t *dimm;
369
370 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
371 dimm = cmd_list_next(dimm)) {
372 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0)
373 break;
374 }
375
376 if (dimm == NULL) {
377 int migrated = 0;
378 size_t dimmsz;
379
380 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name);
381
382 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
383 fmd_hdl_abort(hdl, "dimm referenced by case %s does "
384 "not exist in saved state\n",
385 fmd_case_uuid(hdl, cp));
386 } else if (dimmsz > CMD_DIMM_MAXSIZE ||
387 dimmsz < CMD_DIMM_MINSIZE) {
388 fmd_hdl_abort(hdl,
389 "dimm buffer referenced by case %s "
390 "is out of bounds (is %u bytes, max %u, min %u)\n",
391 fmd_case_uuid(hdl, cp), dimmsz,
392 CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE);
393 }
394
395 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name,
396 dimmsz)) == NULL) {
397 fmd_hdl_abort(hdl, "failed to read dimm buf %s",
398 ptr->ptr_name);
399 }
400
401 fmd_hdl_debug(hdl, "found %d in version field\n",
402 dimm->dimm_version);
403
404 if (CMD_DIMM_VERSIONED(dimm)) {
405 switch (dimm->dimm_version) {
406 case CMD_DIMM_VERSION_1:
407 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm,
408 dimmsz);
409 break;
410 case CMD_DIMM_VERSION_2:
411 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm,
412 dimmsz);
413 break;
414 default:
415 fmd_hdl_abort(hdl, "unknown version (found %d) "
416 "for dimm state referenced by case %s.\n",
417 dimm->dimm_version, fmd_case_uuid(hdl, cp));
418 break;
419 }
420 } else {
421 dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz);
422 migrated = 1;
423 }
424
425 if (migrated) {
426 CMD_STAT_BUMP(dimm_migrat);
427 cmd_dimm_dirty(hdl, dimm);
428 }
429
430 cmd_fmri_restore(hdl, &dimm->dimm_asru);
431
432 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl,
433 FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0)
434 fmd_hdl_abort(hdl, "failed to retrieve unum from asru");
435
436 dimm_attach_to_bank(hdl, dimm);
437
438 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat,
439 dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX);
440
441 cmd_list_append(&cmd.cmd_dimms, dimm);
442 }
443
444 switch (ptr->ptr_subtype) {
445 case BUG_PTR_DIMM_CASE:
446 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n");
447 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE);
448 /*FALLTHROUGH*/
449 case CMD_PTR_DIMM_CASE:
450 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm",
451 dimm->dimm_unum);
452 break;
453 default:
454 fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
455 ptr->ptr_name, ptr->ptr_subtype);
456 }
457
458 return (dimm);
459 }
460
461 void
cmd_dimm_validate(fmd_hdl_t * hdl)462 cmd_dimm_validate(fmd_hdl_t *hdl)
463 {
464 cmd_dimm_t *dimm, *next;
465
466 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) {
467 next = cmd_list_next(dimm);
468
469 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl))
470 cmd_dimm_destroy(hdl, dimm);
471 }
472 }
473
474 void
cmd_dimm_dirty(fmd_hdl_t * hdl,cmd_dimm_t * dimm)475 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
476 {
477 if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) !=
478 sizeof (cmd_dimm_pers_t))
479 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
480
481 /* No need to rewrite the FMRIs in the dimm - they don't change */
482 fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers,
483 sizeof (cmd_dimm_pers_t));
484 }
485
486 void
cmd_dimm_gc(fmd_hdl_t * hdl)487 cmd_dimm_gc(fmd_hdl_t *hdl)
488 {
489 cmd_dimm_validate(hdl);
490 }
491
492 void
cmd_dimm_fini(fmd_hdl_t * hdl)493 cmd_dimm_fini(fmd_hdl_t *hdl)
494 {
495 cmd_dimm_t *dimm;
496
497 while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL)
498 cmd_dimm_free(hdl, dimm, FMD_B_FALSE);
499 }
500
501
502 void
cmd_dimm_save_symbol_error(cmd_dimm_t * dimm,uint16_t upos)503 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos)
504 {
505 cmd_dimm_t *d = NULL, *next = NULL;
506
507 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
508 next = cmd_list_next(d);
509 if (cmd_same_datapath_dimms(dimm, d))
510 d->dimm_syl_error = upos;
511 }
512 }
513
514 int
cmd_dimm_check_symbol_error(cmd_dimm_t * dimm,uint16_t synd)515 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd)
516 {
517 int upos;
518 cmd_dimm_t *d, *next;
519
520 if ((upos = cmd_synd2upos(synd)) < 0)
521 return (0);
522
523 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
524 next = cmd_list_next(d);
525 if (cmd_same_datapath_dimms(dimm, d) &&
526 (d->dimm_syl_error == upos))
527 return (1);
528 }
529
530 return (0);
531 }
532