1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Ereport-handling routines for memory errors
27 */
28
29 #include <cmd_mem.h>
30 #include <cmd_dimm.h>
31 #include <cmd_bank.h>
32 #include <cmd_page.h>
33 #include <cmd_cpu.h>
34 #include <cmd_branch.h>
35 #include <cmd_state.h>
36 #include <cmd.h>
37 #include <cmd_hc_sun4v.h>
38
39 #include <assert.h>
40 #include <strings.h>
41 #include <string.h>
42 #include <errno.h>
43 #include <unistd.h>
44 #include <fm/fmd_api.h>
45 #include <sys/fm/ldom.h>
46 #include <sys/fm/protocol.h>
47
48 #include <sys/fm/cpu/UltraSPARC-T1.h>
49 #include <sys/mdesc.h>
50 #include <sys/async.h>
51 #include <sys/errclassify.h>
52 #include <sys/niagararegs.h>
53 #include <sys/fm/ldom.h>
54 #include <ctype.h>
55
56 #define VF_TS3_FCR 0x000000000000FFFFULL
57 #define VF_L2ESYR_C2C 0x8000000000000000ULL
58 #define OFFBIT 0xFFFFFFFFFFFC07FFULL
59 #define BIT28_32 0x00000001F0000000ULL
60 #define BIT13_17 0x000000000003E000ULL
61 #define BIT18_19 0x00000000000C0000ULL
62 #define BIT11_12 0x0000000000001800ULL
63 #define UTS2_CPUS_PER_CHIP 64
64 #define FBR_ERROR ".fbr"
65 #define DSU_ERROR ".dsu"
66 #define FERG_INVALID ".invalid"
67 #define DBU_ERROR ".dbu"
68
69 extern ldom_hdl_t *cpumem_diagnosis_lhp;
70
71 static fmd_hdl_t *cpumem_hdl = NULL;
72
73 #define ERR_CLASS(x, y) (strcmp(strrchr(x, '.'), y))
74
75 static void *
cpumem_alloc(size_t size)76 cpumem_alloc(size_t size)
77 {
78 assert(cpumem_hdl != NULL);
79
80 return (fmd_hdl_alloc(cpumem_hdl, size, FMD_SLEEP));
81 }
82
83 static void
cpumem_free(void * addr,size_t size)84 cpumem_free(void *addr, size_t size)
85 {
86 assert(cpumem_hdl != NULL);
87
88 fmd_hdl_free(cpumem_hdl, addr, size);
89 }
90
91 /*ARGSUSED*/
92 cmd_evdisp_t
cmd_mem_synd_check(fmd_hdl_t * hdl,uint64_t afar,uint8_t afar_status,uint16_t synd,uint8_t synd_status,cmd_cpu_t * cpu)93 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status,
94 uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu)
95 {
96 /*
97 * Niagara writebacks from L2 containing UEs are placed in memory
98 * with the poison syndrome NI_DRAM_POISON_SYND_FROM_LDWU.
99 * Memory UE ereports showing this syndrome are dropped because they
100 * indicate an L2 problem, which should be diagnosed from the
101 * corresponding L2 cache ereport.
102 */
103 switch (cpu->cpu_type) {
104 case CPU_ULTRASPARC_T1:
105 if (synd == NI_DRAM_POISON_SYND_FROM_LDWU) {
106 fmd_hdl_debug(hdl,
107 "discarding UE due to magic syndrome %x\n",
108 synd);
109 return (CMD_EVD_UNUSED);
110 }
111 break;
112 case CPU_ULTRASPARC_T2:
113 case CPU_ULTRASPARC_T2plus:
114 if (synd == N2_DRAM_POISON_SYND_FROM_LDWU) {
115 fmd_hdl_debug(hdl,
116 "discarding UE due to magic syndrome %x\n",
117 synd);
118 return (CMD_EVD_UNUSED);
119 }
120 break;
121 default:
122 break;
123 }
124 return (CMD_EVD_OK);
125 }
126
127 static int
cpu_present(fmd_hdl_t * hdl,nvlist_t * asru,uint32_t * cpuid)128 cpu_present(fmd_hdl_t *hdl, nvlist_t *asru, uint32_t *cpuid)
129 {
130 nvlist_t *cp_asru;
131 uint32_t i;
132
133 if (nvlist_dup(asru, &cp_asru, 0) != 0) {
134 fmd_hdl_debug(hdl, "unable to alloc asru for thread\n");
135 return (-1);
136 }
137
138 for (i = *cpuid; i < *cpuid + UTS2_CPUS_PER_CHIP; i++) {
139
140 (void) nvlist_remove_all(cp_asru, FM_FMRI_CPU_ID);
141
142 if (nvlist_add_uint32(cp_asru, FM_FMRI_CPU_ID, i) == 0) {
143 if (fmd_nvl_fmri_present(hdl, cp_asru) &&
144 !fmd_nvl_fmri_unusable(hdl, cp_asru)) {
145 nvlist_free(cp_asru);
146 *cpuid = i;
147 return (0);
148 }
149 }
150 }
151 nvlist_free(cp_asru);
152 return (-1);
153 }
154
155 /*ARGSUSED*/
156 cmd_evdisp_t
cmd_c2c(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)157 cmd_c2c(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
158 cmd_errcl_t clcode)
159 {
160 uint32_t cpuid;
161 nvlist_t *det;
162 int rc;
163
164 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
165 if (nvlist_lookup_uint32(det, FM_FMRI_CPU_ID, &cpuid) == 0) {
166
167 /*
168 * If the c2c bit is set, the sending cache of the
169 * cpu must be faulted instead of the memory.
170 * If the detector is chip0, the cache of the chip1
171 * is faulted and vice versa.
172 */
173 if (cpuid < UTS2_CPUS_PER_CHIP)
174 cpuid = UTS2_CPUS_PER_CHIP;
175 else
176 cpuid = 0;
177
178 rc = cpu_present(hdl, det, &cpuid);
179
180 if (rc != -1) {
181 (void) nvlist_remove(det, FM_FMRI_CPU_ID,
182 DATA_TYPE_UINT32);
183 if (nvlist_add_uint32(det,
184 FM_FMRI_CPU_ID, cpuid) == 0) {
185 clcode |= CMD_CPU_LEVEL_CHIP;
186 return (cmd_l2u(hdl, ep, nvl, class, clcode));
187 }
188
189 }
190 }
191 fmd_hdl_debug(hdl, "cmd_c2c: no cpuid discarding C2C error");
192 return (CMD_EVD_BAD);
193 }
194
195 /*
196 * sun4v's xe_common routine has an extra argument, clcode, compared
197 * to routine of same name in sun4u.
198 */
199
200 static cmd_evdisp_t
xe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode,cmd_xe_handler_f * hdlr)201 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
202 const char *class, cmd_errcl_t clcode, cmd_xe_handler_f *hdlr)
203 {
204 uint64_t afar, l2_afar, dram_afar;
205 uint64_t l2_afsr, dram_afsr, l2_esyr;
206 uint16_t synd;
207 uint8_t afar_status, synd_status;
208 nvlist_t *rsrc;
209 char *typenm;
210 uint64_t disp = 0;
211 int minorvers = 1;
212
213 if (nvlist_lookup_uint64(nvl,
214 FM_EREPORT_PAYLOAD_NAME_L2_AFSR, &l2_afsr) != 0 &&
215 nvlist_lookup_uint64(nvl,
216 FM_EREPORT_PAYLOAD_NAME_L2_ESR, &l2_afsr) != 0)
217 return (CMD_EVD_BAD);
218
219 if (nvlist_lookup_uint64(nvl,
220 FM_EREPORT_PAYLOAD_NAME_DRAM_AFSR, &dram_afsr) != 0 &&
221 nvlist_lookup_uint64(nvl,
222 FM_EREPORT_PAYLOAD_NAME_DRAM_ESR, &dram_afsr) != 0)
223 return (CMD_EVD_BAD);
224
225 if (nvlist_lookup_uint64(nvl,
226 FM_EREPORT_PAYLOAD_NAME_L2_AFAR, &l2_afar) != 0 &&
227 nvlist_lookup_uint64(nvl,
228 FM_EREPORT_PAYLOAD_NAME_L2_EAR, &l2_afar) != 0)
229 return (CMD_EVD_BAD);
230
231 if (nvlist_lookup_uint64(nvl,
232 FM_EREPORT_PAYLOAD_NAME_DRAM_AFAR, &dram_afar) != 0 &&
233 nvlist_lookup_uint64(nvl,
234 FM_EREPORT_PAYLOAD_NAME_DRAM_EAR, &dram_afar) != 0)
235 return (CMD_EVD_BAD);
236
237 if (nvlist_lookup_pairs(nvl, 0,
238 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
239 FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
240 NULL) != 0)
241 return (CMD_EVD_BAD);
242
243 synd = dram_afsr;
244
245 /*
246 * Niagara afar and synd validity.
247 * For a given set of error registers, the payload value is valid if
248 * no higher priority error status bit is set. See UltraSPARC-T1.h for
249 * error status bit values and priority settings. Note that for DAC
250 * and DAU, afar value is taken from l2 error registers, syndrome
251 * from dram error * registers; for DSC and DSU, both afar and
252 * syndrome are taken from dram * error registers. DSU afar and
253 * syndrome are always valid because no
254 * higher priority error will override.
255 */
256 switch (clcode) {
257 case CMD_ERRCL_DAC:
258 afar = l2_afar;
259 afar_status = ((l2_afsr & NI_L2AFSR_P10) == 0) ?
260 AFLT_STAT_VALID : AFLT_STAT_INVALID;
261 synd_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ?
262 AFLT_STAT_VALID : AFLT_STAT_INVALID;
263 break;
264 case CMD_ERRCL_DSC:
265 afar = dram_afar;
266 afar_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ?
267 AFLT_STAT_VALID : AFLT_STAT_INVALID;
268 synd_status = afar_status;
269 break;
270 case CMD_ERRCL_DAU:
271 afar = l2_afar;
272 afar_status = ((l2_afsr & NI_L2AFSR_P05) == 0) ?
273 AFLT_STAT_VALID : AFLT_STAT_INVALID;
274 synd_status = AFLT_STAT_VALID;
275
276 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESYR,
277 &l2_esyr) == 0) {
278 if (l2_esyr & VF_L2ESYR_C2C) {
279 return (cmd_c2c(hdl, ep, nvl, class, clcode));
280 }
281 }
282 break;
283 case CMD_ERRCL_DSU:
284 afar = dram_afar;
285 afar_status = synd_status = AFLT_STAT_VALID;
286 break;
287 default:
288 fmd_hdl_debug(hdl, "Niagara unrecognized mem error %llx\n",
289 clcode);
290 return (CMD_EVD_UNUSED);
291 }
292
293 return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd,
294 synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
295 }
296
297
298 /*ARGSUSED*/
299 cmd_evdisp_t
cmd_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)300 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
301 cmd_errcl_t clcode)
302 {
303 if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsc") == 0)
304 return (CMD_EVD_UNUSED); /* drop VF dsc's */
305 else
306 return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common));
307 }
308
309 /*ARGSUSED*/
310 cmd_evdisp_t
cmd_ue_train(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)311 cmd_ue_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
312 cmd_errcl_t clcode)
313 {
314 cmd_evdisp_t rc, rc1;
315
316 /*
317 * The DAU is cause of the DAU->DCDP/ICDP train:
318 * - process the cause of the event.
319 * - register the error to the nop event train, so the effected errors
320 * (DCDP/ICDP) will be dropped.
321 */
322 rc = xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common);
323
324 rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
325 if (rc1 != 0)
326 fmd_hdl_debug(hdl,
327 "Fail to add error (%llx) to the train, rc = %d",
328 clcode, rc1);
329
330 return (rc);
331 }
332
333 /*ARGSUSED*/
334 cmd_evdisp_t
cmd_ue(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)335 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
336 cmd_errcl_t clcode)
337 {
338 if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsu") == 0)
339 /*
340 * VF dsu's need to be treated like branch errors,
341 * because we can't localize to a single DIMM or pair of
342 * DIMMs given missing/invalid parts of the dram-ear.
343 */
344 return (cmd_fb(hdl, ep, nvl, class, clcode));
345 else
346 return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common));
347 }
348
349 /*ARGSUSED*/
350 cmd_evdisp_t
cmd_frx(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)351 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
352 cmd_errcl_t clcode)
353 {
354 return (CMD_EVD_UNUSED);
355 }
356
357
358 /*ARGSUSED*/
359 cmd_evdisp_t
cmd_fb(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)360 cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
361 cmd_errcl_t clcode)
362 {
363 cmd_branch_t *branch;
364 const char *uuid;
365 nvlist_t *asru, *det;
366 uint64_t ts3_fcr;
367
368 if (nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &asru) < 0) {
369 CMD_STAT_BUMP(bad_mem_asru);
370 return (NULL);
371 }
372
373 if (nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det) < 0) {
374 CMD_STAT_BUMP(bad_mem_asru);
375 return (NULL);
376 }
377
378 if (fmd_nvl_fmri_expand(hdl, det) < 0) {
379 fmd_hdl_debug(hdl, "Failed to expand detector");
380 return (NULL);
381 }
382
383 branch = cmd_branch_lookup(hdl, asru);
384 if (branch == NULL) {
385 if ((branch = cmd_branch_create(hdl, asru)) == NULL)
386 return (CMD_EVD_UNUSED);
387 }
388
389 if (branch->branch_case.cc_cp != NULL &&
390 fmd_case_solved(hdl, branch->branch_case.cc_cp)) {
391 fmd_hdl_debug(hdl, "Case solved\n");
392 return (CMD_EVD_REDUND);
393 }
394
395 if (branch->branch_case.cc_cp == NULL) {
396 branch->branch_case.cc_cp = cmd_case_create(hdl,
397 &branch->branch_header, CMD_PTR_BRANCH_CASE, &uuid);
398 }
399
400 if (ERR_CLASS(class, FBR_ERROR) == 0) {
401 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_TS3_FCR,
402 &ts3_fcr) == 0 && (ts3_fcr != VF_TS3_FCR)) {
403 fmd_hdl_debug(hdl,
404 "Processing fbr with lane failover\n");
405 cmd_branch_create_fault(hdl, branch,
406 "fault.memory.link-f", det);
407
408 } else {
409 fmd_hdl_debug(hdl, "Adding fbr event to serd engine\n");
410 if (branch->branch_case.cc_serdnm == NULL) {
411 branch->branch_case.cc_serdnm =
412 cmd_mem_serdnm_create(hdl,
413 "branch", branch->branch_unum);
414
415 fmd_serd_create(hdl,
416 branch->branch_case.cc_serdnm,
417 fmd_prop_get_int32(hdl, "fbr_n"),
418 fmd_prop_get_int64(hdl, "fbr_t"));
419 }
420
421 if (fmd_serd_record(hdl,
422 branch->branch_case.cc_serdnm, ep) == FMD_B_FALSE)
423 return (CMD_EVD_OK); /* engine hasn't fired */
424
425 fmd_hdl_debug(hdl, "fbr serd fired\n");
426
427 fmd_case_add_serd(hdl, branch->branch_case.cc_cp,
428 branch->branch_case.cc_serdnm);
429
430 cmd_branch_create_fault(hdl, branch,
431 "fault.memory.link-c", det);
432 }
433 } else if (ERR_CLASS(class, DSU_ERROR) == 0) {
434 fmd_hdl_debug(hdl, "Processing dsu event");
435 cmd_branch_create_fault(hdl, branch, "fault.memory.bank", det);
436 } else {
437 fmd_hdl_debug(hdl, "Processing fbu event");
438 cmd_branch_create_fault(hdl, branch, "fault.memory.link-u",
439 det);
440 }
441
442 branch->branch_flags |= CMD_MEM_F_FAULTING;
443
444 if (branch->branch_case.cc_serdnm != NULL) {
445 fmd_serd_destroy(hdl, branch->branch_case.cc_serdnm);
446 fmd_hdl_strfree(hdl, branch->branch_case.cc_serdnm);
447 branch->branch_case.cc_serdnm = NULL;
448 }
449
450 fmd_case_add_ereport(hdl, branch->branch_case.cc_cp, ep);
451 fmd_case_solve(hdl, branch->branch_case.cc_cp);
452 cmd_branch_dirty(hdl, branch);
453
454 return (CMD_EVD_OK);
455 }
456
457 /*ARGSUSED*/
458 cmd_evdisp_t
cmd_fb_train(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)459 cmd_fb_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
460 cmd_errcl_t clcode)
461 {
462 cmd_evdisp_t rc, rc1;
463
464 /*
465 * The FBU is cause of the FBU->DCDP/ICDP train:
466 * - process the cause of the event.
467 * - register the error to the nop event train, so the effected errors
468 * (DCDP/ICDP) will be dropped.
469 */
470 rc = cmd_fb(hdl, ep, nvl, class, clcode);
471
472 rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
473 if (rc1 != 0)
474 fmd_hdl_debug(hdl,
475 "Fail to add error (%llx) to the train, rc = %d",
476 clcode, rc1);
477
478 return (rc);
479 }
480
481
482 /*ARGSUSED*/
483 cmd_evdisp_t
cmd_fw_defect(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)484 cmd_fw_defect(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
485 cmd_errcl_t clcode)
486 {
487 const char *fltclass = NULL;
488 nvlist_t *rsc = NULL;
489 int solve = 0;
490
491 if ((rsc = init_mb(hdl)) == NULL)
492 return (CMD_EVD_UNUSED);
493
494 if (ERR_CLASS(class, FERG_INVALID) == 0) {
495 fltclass = "defect.fw.generic-sparc.erpt-gen";
496 } else if (ERR_CLASS(class, DBU_ERROR) == 0) {
497 cmd_evdisp_t rc;
498 fltclass = "defect.fw.generic-sparc.addr-oob";
499 /*
500 * add dbu to nop error train
501 */
502 rc = cmd_xxcu_initial(hdl, ep, nvl, class, clcode,
503 CMD_XR_HDLR_NOP);
504 if (rc != 0)
505 fmd_hdl_debug(hdl,
506 "Failed to add error (%llx) to the train, rc = %d",
507 clcode, rc);
508 } else {
509 fmd_hdl_debug(hdl, "Unexpected fw defect event %s", class);
510 }
511
512 if (fltclass) {
513 fmd_case_t *cp = NULL;
514 nvlist_t *fault = NULL;
515
516 fault = fmd_nvl_create_fault(hdl, fltclass, 100, NULL,
517 NULL, rsc);
518 if (fault != NULL) {
519 cp = fmd_case_open(hdl, NULL);
520 fmd_case_add_ereport(hdl, cp, ep);
521 fmd_case_add_suspect(hdl, cp, fault);
522 fmd_case_solve(hdl, cp);
523 solve = 1;
524 }
525 }
526
527 if (rsc)
528 nvlist_free(rsc);
529
530 return (solve ? CMD_EVD_OK : CMD_EVD_UNUSED);
531 }
532
533 void
cmd_branch_close(fmd_hdl_t * hdl,void * arg)534 cmd_branch_close(fmd_hdl_t *hdl, void *arg)
535 {
536 cmd_branch_destroy(hdl, arg);
537 }
538
539
540 /*ARGSUSED*/
541 ulong_t
cmd_mem_get_phys_pages(fmd_hdl_t * hdl)542 cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
543 {
544 /*
545 * Compute and return the total physical memory in pages from the
546 * MD/PRI.
547 * Cache its value.
548 */
549 static ulong_t npage = 0;
550 md_t *mdp;
551 mde_cookie_t *listp;
552 uint64_t bmem, physmem = 0;
553 ssize_t bufsiz = 0;
554 uint64_t *bufp;
555 int num_nodes, nmblocks, i;
556
557 if (npage > 0) {
558 return (npage);
559 }
560
561 if (cpumem_hdl == NULL) {
562 cpumem_hdl = hdl;
563 }
564
565 if ((bufsiz = ldom_get_core_md(cpumem_diagnosis_lhp, &bufp)) <= 0) {
566 return (0);
567 }
568 if ((mdp = md_init_intern(bufp, cpumem_alloc, cpumem_free)) == NULL ||
569 (num_nodes = md_node_count(mdp)) <= 0) {
570 cpumem_free(bufp, (size_t)bufsiz);
571 return (0);
572 }
573
574 listp = (mde_cookie_t *)cpumem_alloc(sizeof (mde_cookie_t) *
575 num_nodes);
576 nmblocks = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
577 md_find_name(mdp, "mblock"),
578 md_find_name(mdp, "fwd"), listp);
579 for (i = 0; i < nmblocks; i++) {
580 if (md_get_prop_val(mdp, listp[i], "size", &bmem) < 0) {
581 physmem = 0;
582 break;
583 }
584 physmem += bmem;
585 }
586 npage = (ulong_t)(physmem / cmd.cmd_pagesize);
587
588 cpumem_free(listp, sizeof (mde_cookie_t) * num_nodes);
589 cpumem_free(bufp, (size_t)bufsiz);
590 (void) md_fini(mdp);
591
592 return (npage);
593 }
594
595 static int galois_mul[16][16] = {
596 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
597 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* 0 */
598 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, /* 1 */
599 { 0, 2, 4, 6, 8, 10, 12, 14, 3, 1, 7, 5, 11, 9, 15, 13}, /* 2 */
600 { 0, 3, 6, 5, 12, 15, 10, 9, 11, 8, 13, 14, 7, 4, 1, 2}, /* 3 */
601 { 0, 4, 8, 12, 3, 7, 11, 15, 6, 2, 14, 10, 5, 1, 13, 9}, /* 4 */
602 { 0, 5, 10, 15, 7, 2, 13, 8, 14, 11, 4, 1, 9, 12, 3, 6}, /* 5 */
603 { 0, 6, 12, 10, 11, 13, 7, 1, 5, 3, 9, 15, 14, 8, 2, 4}, /* 6 */
604 { 0, 7, 14, 9, 15, 8, 1, 6, 13, 10, 3, 4, 2, 5, 12, 11}, /* 7 */
605 { 0, 8, 3, 11, 6, 14, 5, 13, 12, 4, 15, 7, 10, 2, 9, 1}, /* 8 */
606 { 0, 9, 1, 8, 2, 11, 3, 10, 4, 13, 5, 12, 6, 15, 7, 14}, /* 9 */
607 { 0, 10, 7, 13, 14, 4, 9, 3, 15, 5, 8, 2, 1, 11, 6, 12}, /* A */
608 { 0, 11, 5, 14, 10, 1, 15, 4, 7, 12, 2, 9, 13, 6, 8, 3}, /* B */
609 { 0, 12, 11, 7, 5, 9, 14, 2, 10, 6, 1, 13, 15, 3, 4, 8}, /* C */
610 { 0, 13, 9, 4, 1, 12, 8, 5, 2, 15, 11, 6, 3, 14, 10, 7}, /* D */
611 { 0, 14, 15, 1, 13, 3, 2, 12, 9, 7, 6, 8, 4, 10, 11, 5}, /* E */
612 { 0, 15, 13, 2, 9, 6, 4, 11, 1, 14, 12, 3, 8, 7, 5, 10} /* F */
613 };
614
615 static int
galois_div(int num,int denom)616 galois_div(int num, int denom) {
617 int i;
618
619 for (i = 0; i < 16; i++) {
620 if (galois_mul[denom][i] == num)
621 return (i);
622 }
623 return (-1);
624 }
625
626 /*
627 * Data nibbles N0-N31 => 0-31
628 * check nibbles C0-3 => 32-35
629 */
630
631 int
cmd_synd2upos(uint16_t syndrome)632 cmd_synd2upos(uint16_t syndrome) {
633
634 uint16_t s0, s1, s2, s3;
635
636 if (syndrome == 0)
637 return (-1); /* clean syndrome, not a CE */
638
639 s0 = syndrome & 0xF;
640 s1 = (syndrome >> 4) & 0xF;
641 s2 = (syndrome >> 8) & 0xF;
642 s3 = (syndrome >> 12) & 0xF;
643
644 if (s3 == 0) {
645 if (s2 == 0 && s1 == 0)
646 return (32); /* 0 0 0 e => C0 */
647 if (s2 == 0 && s0 == 0)
648 return (33); /* 0 0 e 0 => C1 */
649 if (s1 == 0 && s0 == 0)
650 return (34); /* 0 e 0 0 => C2 */
651 if (s2 == s1 && s1 == s0)
652 return (31); /* 0 d d d => N31 */
653 return (-1); /* multibit error */
654 } else if (s2 == 0) {
655 if (s1 == 0 && s0 == 0)
656 return (35); /* e 0 0 0 => C4 */
657 if (s1 == 0 || s0 == 0)
658 return (-1); /* not a 0 b c */
659 if (s3 != galois_div(galois_mul[s1][s1], s0))
660 return (-1); /* check nibble not valid */
661 return (galois_div(s0, s1) - 1); /* N0 - N14 */
662 } else if (s1 == 0) {
663 if (s2 == 0 || s0 == 0)
664 return (-1); /* not a b 0 c */
665 if (s3 != galois_div(galois_mul[s2][s2], s0))
666 return (-1); /* check nibble not valid */
667 return (galois_div(s0, s2) + 14); /* N15 - N29 */
668 } else if (s0 == 0) {
669 if (s3 == s2 && s2 == s1)
670 return (30); /* d d d 0 => N30 */
671 return (-1);
672 } else return (-1);
673 }
674
675 nvlist_t *
cmd_mem2hc(fmd_hdl_t * hdl,nvlist_t * mem_fmri)676 cmd_mem2hc(fmd_hdl_t *hdl, nvlist_t *mem_fmri) {
677
678 char **snp;
679 uint_t n;
680
681 if (nvlist_lookup_string_array(mem_fmri, FM_FMRI_HC_SERIAL_ID,
682 &snp, &n) != 0)
683 return (NULL); /* doesn't have serial id */
684
685 return (cmd_find_dimm_by_sn(hdl, FM_FMRI_SCHEME_HC, *snp));
686 }
687
688 /*
689 * formula to convert an unhashed address to hashed address
690 * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
691 */
692 void
cmd_to_hashed_addr(uint64_t * addr,uint64_t afar,const char * class)693 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class)
694 {
695
696 if (strstr(class, "ultraSPARC-T1") != NULL)
697 *addr = afar;
698 else {
699 *addr = (afar & OFFBIT) |
700 ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17) |
701 ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
702 }
703 }
704
705 int
cmd_same_datapath_dimms(cmd_dimm_t * d1,cmd_dimm_t * d2)706 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2)
707 {
708 char *p, *q;
709
710 p = strstr(d1->dimm_unum, "CMP");
711 q = strstr(d2->dimm_unum, "CMP");
712 if (p != NULL && q != NULL) {
713 if (strncmp(p, q, 4) == 0)
714 return (1);
715 }
716 return (0);
717 }
718
719 /*
720 * fault the FRU of the common CMP
721 */
722 /*ARGSUSED*/
723 void
cmd_gen_datapath_fault(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,nvlist_t * det)724 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
725 uint16_t upos, nvlist_t *det)
726 {
727 fmd_case_t *cp;
728 char *frustr;
729 nvlist_t *rsrc, *fltlist;
730 char *s;
731 char const *str1, *str2;
732 uint_t len, i;
733
734 s = strstr(d1->dimm_unum, "CMP");
735 if (s == NULL)
736 return;
737
738 frustr = fmd_hdl_zalloc(hdl, strlen(d1->dimm_unum), FMD_SLEEP);
739 len = strlen(d1->dimm_unum) - strlen(s);
740
741 if (strncmp(d1->dimm_unum, d2->dimm_unum, len) != 0) {
742 for (i = 0, str1 = d1->dimm_unum, str2 = d2->dimm_unum;
743 *str1 == *str2 && i <= len;
744 str1++, str2++, i++)
745 ;
746 len = i;
747 }
748
749 (void) strncpy(frustr, d1->dimm_unum, len);
750
751 rsrc = cmd_mkboard_fru(hdl, frustr, NULL, NULL);
752
753 fmd_hdl_free(hdl, frustr, strlen(d1->dimm_unum));
754
755 if (rsrc == NULL)
756 return;
757
758 (void) nvlist_add_nvlist(rsrc, FM_FMRI_AUTHORITY, cmd.cmd_auth);
759
760 cp = fmd_case_open(hdl, NULL);
761
762 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath", 100,
763 rsrc, NULL, rsrc);
764
765 fmd_case_add_suspect(hdl, cp, fltlist);
766 fmd_case_solve(hdl, cp);
767
768 nvlist_free(rsrc);
769 }
770