1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Support routines for DIMMs.
27 */
28
29 #include <cmd_mem.h>
30 #include <limits.h>
31 #include <cmd_dimm.h>
32 #include <cmd_bank.h>
33 #include <cmd.h>
34
35 #include <errno.h>
36 #include <string.h>
37 #include <strings.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 #include <fm/fmd_api.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/mem.h>
43 #include <sys/nvpair.h>
44 #ifdef sun4v
45 #include <cmd_hc_sun4v.h>
46 #include <cmd_branch.h>
47 #endif /* sun4v */
48
49 /*
50 * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs,
51 * because sufficient information was unavailable prior to correlation.
52 * When the DE completes the pair, it uses this routine to retrieve the
53 * correct FMRI.
54 */
55 nvlist_t *
cmd_dimm_fmri_derive(fmd_hdl_t * hdl,uint64_t afar,uint16_t synd,uint64_t afsr)56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd,
57 uint64_t afsr)
58 {
59 nvlist_t *fmri;
60
61 if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL)
62 return (NULL);
63
64 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
65 nvlist_free(fmri);
66 return (NULL);
67 }
68
69 return (fmri);
70 }
71
72 nvlist_t *
cmd_dimm_fru(cmd_dimm_t * dimm)73 cmd_dimm_fru(cmd_dimm_t *dimm)
74 {
75 return (dimm->dimm_asru_nvl);
76 }
77
78 nvlist_t *
cmd_dimm_create_fault(fmd_hdl_t * hdl,cmd_dimm_t * dimm,const char * fltnm,uint_t cert)79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm,
80 uint_t cert)
81 {
82 #ifdef sun4v
83 nvlist_t *flt, *nvlfru;
84 /*
85 * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms.
86 * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI
87 *
88 * Use the BR string as a distinguisher. BR (branch) is only
89 * present in ultraSPARC-T2/T2plus DIMM unums
90 */
91 if (strstr(dimm->dimm_unum, "BR") == NULL) {
92 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
93 dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL);
94 } else {
95 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl);
96 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
97 dimm->dimm_asru_nvl, nvlfru, NULL);
98 if (nvlfru != NULL)
99 nvlist_free(nvlfru);
100 }
101 return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum));
102 #else
103 return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl,
104 dimm->dimm_asru_nvl, NULL));
105 #endif /* sun4v */
106 }
107
108 static void
cmd_dimm_free(fmd_hdl_t * hdl,cmd_dimm_t * dimm,int destroy)109 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy)
110 {
111 cmd_case_t *cc = &dimm->dimm_case;
112 int i;
113 cmd_mq_t *q;
114 tstamp_t *tsp, *next;
115
116 #ifdef sun4v
117 cmd_branch_t *branch;
118 #endif
119 if (cc->cc_cp != NULL) {
120 cmd_case_fini(hdl, cc->cc_cp, destroy);
121 if (cc->cc_serdnm != NULL) {
122 if (fmd_serd_exists(hdl, cc->cc_serdnm) &&
123 destroy)
124 fmd_serd_destroy(hdl, cc->cc_serdnm);
125 fmd_hdl_strfree(hdl, cc->cc_serdnm);
126 }
127 }
128
129 for (i = 0; i < CMD_MAX_CKWDS; i++) {
130 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) {
131 if (q->mq_serdnm != NULL) {
132 if (fmd_serd_exists(hdl, q->mq_serdnm)) {
133 fmd_serd_destroy(hdl, q->mq_serdnm);
134 }
135 fmd_hdl_strfree(hdl, q->mq_serdnm);
136 q->mq_serdnm = NULL;
137 }
138
139 for (tsp = cmd_list_next(&q->mq_dupce_tstamp);
140 tsp != NULL; tsp = next) {
141 next = cmd_list_next(tsp);
142 cmd_list_delete(&q->mq_dupce_tstamp,
143 &tsp->ts_l);
144 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
145 }
146
147 cmd_list_delete(&dimm->mq_root[i], q);
148 fmd_hdl_free(hdl, q, sizeof (cmd_mq_t));
149 }
150 }
151
152 if (dimm->dimm_bank != NULL)
153 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm);
154
155 #ifdef sun4v
156 branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum);
157 if (branch != NULL)
158 cmd_branch_remove_dimm(hdl, branch, dimm);
159 #endif
160
161 cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy);
162
163 if (destroy)
164 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
165
166 cmd_list_delete(&cmd.cmd_dimms, dimm);
167 fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t));
168 }
169
170 void
cmd_dimm_destroy(fmd_hdl_t * hdl,cmd_dimm_t * dimm)171 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
172 {
173
174 fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat));
175 cmd_dimm_free(hdl, dimm, FMD_B_TRUE);
176 }
177
178 static cmd_dimm_t *
dimm_lookup_by_unum(const char * unum)179 dimm_lookup_by_unum(const char *unum)
180 {
181 cmd_dimm_t *dimm;
182
183 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
184 dimm = cmd_list_next(dimm)) {
185 if (strcmp(dimm->dimm_unum, unum) == 0)
186 return (dimm);
187 }
188
189 return (NULL);
190 }
191
192 static void
dimm_attach_to_bank(fmd_hdl_t * hdl,cmd_dimm_t * dimm)193 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
194 {
195 cmd_bank_t *bank;
196
197 for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
198 bank = cmd_list_next(bank)) {
199 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl,
200 dimm->dimm_asru_nvl)) {
201 cmd_bank_add_dimm(hdl, bank, dimm);
202 return;
203 }
204 }
205 }
206
207 cmd_dimm_t *
cmd_dimm_create(fmd_hdl_t * hdl,nvlist_t * asru)208 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru)
209 {
210 cmd_dimm_t *dimm;
211 const char *unum;
212 nvlist_t *fmri;
213 size_t nserids = 0;
214 char **serids = NULL;
215
216 if (!fmd_nvl_fmri_present(hdl, asru)) {
217 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n");
218 return (NULL);
219 }
220
221 if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
222 CMD_STAT_BUMP(bad_mem_asru);
223 return (NULL);
224 }
225
226 #ifdef sun4v
227 if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids,
228 &nserids) != 0) {
229 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not"
230 " have serial_ids\n");
231 CMD_STAT_BUMP(bad_mem_asru);
232 return (NULL);
233 }
234 #endif
235 fmri = cmd_mem_fmri_create(unum, serids, nserids);
236 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
237 CMD_STAT_BUMP(bad_mem_asru);
238 nvlist_free(fmri);
239 return (NULL);
240 }
241
242 fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum);
243 CMD_STAT_BUMP(dimm_creat);
244
245 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
246 dimm->dimm_nodetype = CMD_NT_DIMM;
247 dimm->dimm_version = CMD_DIMM_VERSION;
248 dimm->dimm_phys_addr_low = ULLONG_MAX;
249 dimm->dimm_phys_addr_hi = 0;
250 dimm->dimm_syl_error = USHRT_MAX;
251
252 cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s",
253 unum);
254 cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum);
255
256 nvlist_free(fmri);
257
258 (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM,
259 (char **)&dimm->dimm_unum);
260
261 dimm_attach_to_bank(hdl, dimm);
262
263 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0,
264 CMD_DIMM_STAT_PREFIX);
265
266 cmd_list_append(&cmd.cmd_dimms, dimm);
267 cmd_dimm_dirty(hdl, dimm);
268
269 return (dimm);
270 }
271
272 cmd_dimm_t *
cmd_dimm_lookup(fmd_hdl_t * hdl,nvlist_t * asru)273 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
274 {
275 cmd_dimm_t *dimm;
276 const char *unum;
277
278 if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
279 CMD_STAT_BUMP(bad_mem_asru);
280 return (NULL);
281 }
282
283 dimm = dimm_lookup_by_unum(unum);
284
285 if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) {
286 /*
287 * The DIMM doesn't exist anymore, so we need to delete the
288 * state structure, which is now out of date. The containing
289 * bank (if any) is also out of date, so blow it away too.
290 */
291 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n");
292
293 if (dimm->dimm_bank != NULL)
294 cmd_bank_destroy(hdl, dimm->dimm_bank);
295 cmd_dimm_destroy(hdl, dimm);
296
297 return (NULL);
298 }
299
300 return (dimm);
301 }
302
303 static cmd_dimm_t *
dimm_v0tov2(fmd_hdl_t * hdl,cmd_dimm_0_t * old,size_t oldsz)304 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz)
305 {
306 cmd_dimm_t *new;
307
308 if (oldsz != sizeof (cmd_dimm_0_t)) {
309 fmd_hdl_abort(hdl, "size of state doesn't match size of "
310 "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t));
311 }
312
313 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
314 new->dimm_header = old->dimm0_header;
315 new->dimm_version = CMD_DIMM_VERSION;
316 new->dimm_asru = old->dimm0_asru;
317 new->dimm_nretired = old->dimm0_nretired;
318 new->dimm_phys_addr_hi = 0;
319 new->dimm_phys_addr_low = ULLONG_MAX;
320
321 fmd_hdl_free(hdl, old, oldsz);
322 return (new);
323 }
324
325 static cmd_dimm_t *
dimm_v1tov2(fmd_hdl_t * hdl,cmd_dimm_1_t * old,size_t oldsz)326 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz)
327 {
328
329 cmd_dimm_t *new;
330
331 if (oldsz != sizeof (cmd_dimm_1_t)) {
332 fmd_hdl_abort(hdl, "size of state doesn't match size of "
333 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t));
334 }
335
336 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
337
338 new->dimm_header = old->dimm1_header;
339 new->dimm_version = CMD_DIMM_VERSION;
340 new->dimm_asru = old->dimm1_asru;
341 new->dimm_nretired = old->dimm1_nretired;
342 new->dimm_flags = old->dimm1_flags;
343 new->dimm_phys_addr_hi = 0;
344 new->dimm_phys_addr_low = ULLONG_MAX;
345
346 fmd_hdl_free(hdl, old, oldsz);
347 return (new);
348 }
349
350 static cmd_dimm_t *
dimm_wrapv2(fmd_hdl_t * hdl,cmd_dimm_pers_t * pers,size_t psz)351 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz)
352 {
353 cmd_dimm_t *dimm;
354
355 if (psz != sizeof (cmd_dimm_pers_t)) {
356 fmd_hdl_abort(hdl, "size of state doesn't match size of "
357 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t));
358 }
359
360 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
361 bcopy(pers, dimm, sizeof (cmd_dimm_pers_t));
362 fmd_hdl_free(hdl, pers, psz);
363 return (dimm);
364 }
365
366 void *
cmd_dimm_restore(fmd_hdl_t * hdl,fmd_case_t * cp,cmd_case_ptr_t * ptr)367 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
368 {
369 cmd_dimm_t *dimm;
370
371 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
372 dimm = cmd_list_next(dimm)) {
373 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0)
374 break;
375 }
376
377 if (dimm == NULL) {
378 int migrated = 0;
379 size_t dimmsz;
380
381 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name);
382
383 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
384 fmd_hdl_abort(hdl, "dimm referenced by case %s does "
385 "not exist in saved state\n",
386 fmd_case_uuid(hdl, cp));
387 } else if (dimmsz > CMD_DIMM_MAXSIZE ||
388 dimmsz < CMD_DIMM_MINSIZE) {
389 fmd_hdl_abort(hdl,
390 "dimm buffer referenced by case %s "
391 "is out of bounds (is %u bytes, max %u, min %u)\n",
392 fmd_case_uuid(hdl, cp), dimmsz,
393 CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE);
394 }
395
396 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name,
397 dimmsz)) == NULL) {
398 fmd_hdl_abort(hdl, "failed to read dimm buf %s",
399 ptr->ptr_name);
400 }
401
402 fmd_hdl_debug(hdl, "found %d in version field\n",
403 dimm->dimm_version);
404
405 if (CMD_DIMM_VERSIONED(dimm)) {
406 switch (dimm->dimm_version) {
407 case CMD_DIMM_VERSION_1:
408 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm,
409 dimmsz);
410 break;
411 case CMD_DIMM_VERSION_2:
412 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm,
413 dimmsz);
414 break;
415 default:
416 fmd_hdl_abort(hdl, "unknown version (found %d) "
417 "for dimm state referenced by case %s.\n",
418 dimm->dimm_version, fmd_case_uuid(hdl, cp));
419 break;
420 }
421 } else {
422 dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz);
423 migrated = 1;
424 }
425
426 if (migrated) {
427 CMD_STAT_BUMP(dimm_migrat);
428 cmd_dimm_dirty(hdl, dimm);
429 }
430
431 cmd_fmri_restore(hdl, &dimm->dimm_asru);
432
433 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl,
434 FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0)
435 fmd_hdl_abort(hdl, "failed to retrieve unum from asru");
436
437 dimm_attach_to_bank(hdl, dimm);
438
439 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat,
440 dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX);
441
442 cmd_list_append(&cmd.cmd_dimms, dimm);
443 }
444
445 switch (ptr->ptr_subtype) {
446 case BUG_PTR_DIMM_CASE:
447 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n");
448 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE);
449 /*FALLTHROUGH*/
450 case CMD_PTR_DIMM_CASE:
451 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm",
452 dimm->dimm_unum);
453 break;
454 default:
455 fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
456 ptr->ptr_name, ptr->ptr_subtype);
457 }
458
459 return (dimm);
460 }
461
462 void
cmd_dimm_validate(fmd_hdl_t * hdl)463 cmd_dimm_validate(fmd_hdl_t *hdl)
464 {
465 cmd_dimm_t *dimm, *next;
466
467 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) {
468 next = cmd_list_next(dimm);
469
470 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl))
471 cmd_dimm_destroy(hdl, dimm);
472 }
473 }
474
475 void
cmd_dimm_dirty(fmd_hdl_t * hdl,cmd_dimm_t * dimm)476 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
477 {
478 if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) !=
479 sizeof (cmd_dimm_pers_t))
480 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
481
482 /* No need to rewrite the FMRIs in the dimm - they don't change */
483 fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers,
484 sizeof (cmd_dimm_pers_t));
485 }
486
487 void
cmd_dimm_gc(fmd_hdl_t * hdl)488 cmd_dimm_gc(fmd_hdl_t *hdl)
489 {
490 cmd_dimm_validate(hdl);
491 }
492
493 void
cmd_dimm_fini(fmd_hdl_t * hdl)494 cmd_dimm_fini(fmd_hdl_t *hdl)
495 {
496 cmd_dimm_t *dimm;
497
498 while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL)
499 cmd_dimm_free(hdl, dimm, FMD_B_FALSE);
500 }
501
502
503 void
cmd_dimm_save_symbol_error(cmd_dimm_t * dimm,uint16_t upos)504 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos)
505 {
506 cmd_dimm_t *d = NULL, *next = NULL;
507
508 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
509 next = cmd_list_next(d);
510 if (cmd_same_datapath_dimms(dimm, d))
511 d->dimm_syl_error = upos;
512 }
513 }
514
515 int
cmd_dimm_check_symbol_error(cmd_dimm_t * dimm,uint16_t synd)516 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd)
517 {
518 int upos;
519 cmd_dimm_t *d, *next;
520
521 if ((upos = cmd_synd2upos(synd)) < 0)
522 return (0);
523
524 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
525 next = cmd_list_next(d);
526 if (cmd_same_datapath_dimms(dimm, d) &&
527 (d->dimm_syl_error == upos))
528 return (1);
529 }
530
531 return (0);
532 }
533