xref: /titanic_50/usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c (revision aab83bb83be7342f6cfccaed8d5fe0b2f404855d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Ereport-handling routines for memory errors
27  */
28 
29 #include <cmd_mem.h>
30 #include <cmd_dimm.h>
31 #include <cmd_bank.h>
32 #include <cmd_page.h>
33 #include <cmd_cpu.h>
34 #include <cmd_branch.h>
35 #include <cmd_state.h>
36 #include <cmd.h>
37 #include <cmd_hc_sun4v.h>
38 
39 #include <assert.h>
40 #include <strings.h>
41 #include <string.h>
42 #include <errno.h>
43 #include <unistd.h>
44 #include <fm/fmd_api.h>
45 #include <sys/fm/ldom.h>
46 #include <sys/fm/protocol.h>
47 
48 #include <sys/fm/cpu/UltraSPARC-T1.h>
49 #include <sys/mdesc.h>
50 #include <sys/async.h>
51 #include <sys/errclassify.h>
52 #include <sys/niagararegs.h>
53 #include <sys/fm/ldom.h>
54 #include <ctype.h>
55 
56 #define	VF_TS3_FCR	0x000000000000FFFFULL
57 #define	VF_L2ESYR_C2C	0x8000000000000000ULL
58 #define	OFFBIT		0xFFFFFFFFFFFC07FFULL
59 #define	BIT28_32	0x00000001F0000000ULL
60 #define	BIT13_17	0x000000000003E000ULL
61 #define	BIT18_19	0x00000000000C0000ULL
62 #define	BIT11_12	0x0000000000001800ULL
63 #define	UTS2_CPUS_PER_CHIP	64
64 #define	FBR_ERROR	".fbr"
65 #define	DSU_ERROR	".dsu"
66 #define	FERG_INVALID	".invalid"
67 #define	DBU_ERROR 	".dbu"
68 
69 extern ldom_hdl_t *cpumem_diagnosis_lhp;
70 
71 static fmd_hdl_t *cpumem_hdl = NULL;
72 
73 #define	ERR_CLASS(x, y)	(strcmp(strrchr(x, '.'), y))
74 
75 static void *
cpumem_alloc(size_t size)76 cpumem_alloc(size_t size)
77 {
78 	assert(cpumem_hdl != NULL);
79 
80 	return (fmd_hdl_alloc(cpumem_hdl, size, FMD_SLEEP));
81 }
82 
83 static void
cpumem_free(void * addr,size_t size)84 cpumem_free(void *addr, size_t size)
85 {
86 	assert(cpumem_hdl != NULL);
87 
88 	fmd_hdl_free(cpumem_hdl, addr, size);
89 }
90 
91 /*ARGSUSED*/
92 cmd_evdisp_t
cmd_mem_synd_check(fmd_hdl_t * hdl,uint64_t afar,uint8_t afar_status,uint16_t synd,uint8_t synd_status,cmd_cpu_t * cpu)93 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status,
94     uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu)
95 {
96 	/*
97 	 * Niagara writebacks from L2 containing UEs are placed in memory
98 	 * with the poison syndrome NI_DRAM_POISON_SYND_FROM_LDWU.
99 	 * Memory UE ereports showing this syndrome are dropped because they
100 	 * indicate an L2 problem, which should be diagnosed from the
101 	 * corresponding L2 cache ereport.
102 	 */
103 	switch (cpu->cpu_type) {
104 		case CPU_ULTRASPARC_T1:
105 			if (synd == NI_DRAM_POISON_SYND_FROM_LDWU) {
106 				fmd_hdl_debug(hdl,
107 				    "discarding UE due to magic syndrome %x\n",
108 				    synd);
109 				return (CMD_EVD_UNUSED);
110 			}
111 			break;
112 		case CPU_ULTRASPARC_T2:
113 		case CPU_ULTRASPARC_T2plus:
114 			if (synd == N2_DRAM_POISON_SYND_FROM_LDWU) {
115 				fmd_hdl_debug(hdl,
116 				    "discarding UE due to magic syndrome %x\n",
117 				    synd);
118 				return (CMD_EVD_UNUSED);
119 			}
120 			break;
121 		default:
122 			break;
123 	}
124 	return (CMD_EVD_OK);
125 }
126 
127 static int
cpu_present(fmd_hdl_t * hdl,nvlist_t * asru,uint32_t * cpuid)128 cpu_present(fmd_hdl_t *hdl, nvlist_t *asru, uint32_t *cpuid)
129 {
130 	nvlist_t *cp_asru;
131 	uint32_t i;
132 
133 	if (nvlist_dup(asru, &cp_asru, 0) != 0) {
134 		fmd_hdl_debug(hdl, "unable to alloc asru for thread\n");
135 		return (-1);
136 	}
137 
138 	for (i = *cpuid; i < *cpuid + UTS2_CPUS_PER_CHIP; i++) {
139 
140 		(void) nvlist_remove_all(cp_asru, FM_FMRI_CPU_ID);
141 
142 		if (nvlist_add_uint32(cp_asru, FM_FMRI_CPU_ID, i) == 0) {
143 			if (fmd_nvl_fmri_present(hdl, cp_asru) &&
144 			    !fmd_nvl_fmri_unusable(hdl, cp_asru)) {
145 				nvlist_free(cp_asru);
146 				*cpuid = i;
147 				return (0);
148 			}
149 		}
150 	}
151 	nvlist_free(cp_asru);
152 	return (-1);
153 }
154 
155 /*ARGSUSED*/
156 cmd_evdisp_t
cmd_c2c(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)157 cmd_c2c(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
158     cmd_errcl_t clcode)
159 {
160 	uint32_t cpuid;
161 	nvlist_t *det;
162 	int rc;
163 
164 	(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
165 	if (nvlist_lookup_uint32(det, FM_FMRI_CPU_ID, &cpuid) == 0) {
166 
167 		/*
168 		 * If the c2c bit is set, the sending cache of the
169 		 * cpu must be faulted instead of the memory.
170 		 * If the detector is chip0, the cache of the chip1
171 		 * is faulted and vice versa.
172 		 */
173 		if (cpuid < UTS2_CPUS_PER_CHIP)
174 			cpuid = UTS2_CPUS_PER_CHIP;
175 		else
176 			cpuid = 0;
177 
178 		rc = cpu_present(hdl, det, &cpuid);
179 
180 		if (rc != -1) {
181 			(void) nvlist_remove(det, FM_FMRI_CPU_ID,
182 			    DATA_TYPE_UINT32);
183 			if (nvlist_add_uint32(det,
184 			    FM_FMRI_CPU_ID, cpuid) == 0) {
185 				clcode |= CMD_CPU_LEVEL_CHIP;
186 				return (cmd_l2u(hdl, ep, nvl, class, clcode));
187 			}
188 
189 		}
190 	}
191 	fmd_hdl_debug(hdl, "cmd_c2c: no cpuid discarding C2C error");
192 	return (CMD_EVD_BAD);
193 }
194 
195 /*
196  * sun4v's xe_common routine has an extra argument, clcode, compared
197  * to routine of same name in sun4u.
198  */
199 
200 static cmd_evdisp_t
xe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode,cmd_xe_handler_f * hdlr)201 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
202     const char *class, cmd_errcl_t clcode, cmd_xe_handler_f *hdlr)
203 {
204 	uint64_t afar, l2_afar, dram_afar;
205 	uint64_t l2_afsr, dram_afsr, l2_esyr;
206 	uint16_t synd;
207 	uint8_t afar_status, synd_status;
208 	nvlist_t *rsrc;
209 	char *typenm;
210 	uint64_t disp = 0;
211 	int minorvers = 1;
212 
213 	if (nvlist_lookup_uint64(nvl,
214 	    FM_EREPORT_PAYLOAD_NAME_L2_AFSR, &l2_afsr) != 0 &&
215 	    nvlist_lookup_uint64(nvl,
216 	    FM_EREPORT_PAYLOAD_NAME_L2_ESR, &l2_afsr) != 0)
217 		return (CMD_EVD_BAD);
218 
219 	if (nvlist_lookup_uint64(nvl,
220 	    FM_EREPORT_PAYLOAD_NAME_DRAM_AFSR, &dram_afsr) != 0 &&
221 	    nvlist_lookup_uint64(nvl,
222 	    FM_EREPORT_PAYLOAD_NAME_DRAM_ESR, &dram_afsr) != 0)
223 		return (CMD_EVD_BAD);
224 
225 	if (nvlist_lookup_uint64(nvl,
226 	    FM_EREPORT_PAYLOAD_NAME_L2_AFAR, &l2_afar) != 0 &&
227 	    nvlist_lookup_uint64(nvl,
228 	    FM_EREPORT_PAYLOAD_NAME_L2_EAR, &l2_afar) != 0)
229 		return (CMD_EVD_BAD);
230 
231 	if (nvlist_lookup_uint64(nvl,
232 	    FM_EREPORT_PAYLOAD_NAME_DRAM_AFAR, &dram_afar) != 0 &&
233 	    nvlist_lookup_uint64(nvl,
234 	    FM_EREPORT_PAYLOAD_NAME_DRAM_EAR, &dram_afar) != 0)
235 		return (CMD_EVD_BAD);
236 
237 	if (nvlist_lookup_pairs(nvl, 0,
238 	    FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
239 	    FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
240 	    NULL) != 0)
241 		return (CMD_EVD_BAD);
242 
243 	synd = dram_afsr;
244 
245 	/*
246 	 * Niagara afar and synd validity.
247 	 * For a given set of error registers, the payload value is valid if
248 	 * no higher priority error status bit is set.  See UltraSPARC-T1.h for
249 	 * error status bit values and priority settings.  Note that for DAC
250 	 * and DAU, afar value is taken from l2 error registers, syndrome
251 	 * from dram error * registers; for DSC and DSU, both afar and
252 	 * syndrome are taken from dram * error registers.  DSU afar and
253 	 * syndrome are always valid because no
254 	 * higher priority error will override.
255 	 */
256 	switch (clcode) {
257 	case CMD_ERRCL_DAC:
258 		afar = l2_afar;
259 		afar_status = ((l2_afsr & NI_L2AFSR_P10) == 0) ?
260 		    AFLT_STAT_VALID : AFLT_STAT_INVALID;
261 		synd_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ?
262 		    AFLT_STAT_VALID : AFLT_STAT_INVALID;
263 		break;
264 	case CMD_ERRCL_DSC:
265 		afar = dram_afar;
266 		afar_status = ((dram_afsr & NI_DMAFSR_P01) == 0) ?
267 		    AFLT_STAT_VALID : AFLT_STAT_INVALID;
268 		synd_status = afar_status;
269 		break;
270 	case CMD_ERRCL_DAU:
271 		afar = l2_afar;
272 		afar_status = ((l2_afsr & NI_L2AFSR_P05) == 0) ?
273 		    AFLT_STAT_VALID : AFLT_STAT_INVALID;
274 		synd_status = AFLT_STAT_VALID;
275 
276 		if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_L2_ESYR,
277 		    &l2_esyr) == 0) {
278 			if (l2_esyr & VF_L2ESYR_C2C) {
279 				return (cmd_c2c(hdl, ep, nvl, class, clcode));
280 			}
281 		}
282 		break;
283 	case CMD_ERRCL_DSU:
284 		afar = dram_afar;
285 		afar_status = synd_status = AFLT_STAT_VALID;
286 		break;
287 	default:
288 		fmd_hdl_debug(hdl, "Niagara unrecognized mem error %llx\n",
289 		    clcode);
290 		return (CMD_EVD_UNUSED);
291 	}
292 
293 	return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd,
294 	    synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
295 }
296 
297 
298 /*ARGSUSED*/
299 cmd_evdisp_t
cmd_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)300 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
301     cmd_errcl_t clcode)
302 {
303 	if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsc") == 0)
304 		return (CMD_EVD_UNUSED); /* drop VF dsc's */
305 	else
306 		return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common));
307 }
308 
309 /*ARGSUSED*/
310 cmd_evdisp_t
cmd_ue_train(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)311 cmd_ue_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
312     cmd_errcl_t clcode)
313 {
314 	cmd_evdisp_t rc, rc1;
315 
316 	/*
317 	 * The DAU is cause of the DAU->DCDP/ICDP train:
318 	 * - process the cause of the event.
319 	 * - register the error to the nop event train, so the effected errors
320 	 * (DCDP/ICDP) will be dropped.
321 	 */
322 	rc = xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common);
323 
324 	rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
325 	if (rc1 != 0)
326 		fmd_hdl_debug(hdl,
327 		    "Fail to add error (%llx) to the train, rc = %d",
328 		    clcode, rc1);
329 
330 	return (rc);
331 }
332 
333 /*ARGSUSED*/
334 cmd_evdisp_t
cmd_ue(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)335 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
336     cmd_errcl_t clcode)
337 {
338 	if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsu") == 0)
339 		/*
340 		 * VF dsu's need to be treated like branch errors,
341 		 * because we can't localize to a single DIMM or pair of
342 		 * DIMMs given missing/invalid parts of the dram-ear.
343 		 */
344 		return (cmd_fb(hdl, ep, nvl, class, clcode));
345 	else
346 		return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common));
347 }
348 
349 /*ARGSUSED*/
350 cmd_evdisp_t
cmd_frx(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)351 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
352     cmd_errcl_t clcode)
353 {
354 	return (CMD_EVD_UNUSED);
355 }
356 
357 
358 /*ARGSUSED*/
359 cmd_evdisp_t
cmd_fb(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)360 cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
361     cmd_errcl_t clcode)
362 {
363 	cmd_branch_t *branch;
364 	const char *uuid;
365 	nvlist_t *asru, *det;
366 	uint64_t ts3_fcr;
367 
368 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &asru) < 0) {
369 		CMD_STAT_BUMP(bad_mem_asru);
370 		return (NULL);
371 	}
372 
373 	if (nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det) < 0) {
374 		CMD_STAT_BUMP(bad_mem_asru);
375 		return (NULL);
376 	}
377 
378 	if (fmd_nvl_fmri_expand(hdl, det) < 0) {
379 		fmd_hdl_debug(hdl, "Failed to expand detector");
380 		return (NULL);
381 	}
382 
383 	branch = cmd_branch_lookup(hdl, asru);
384 	if (branch == NULL) {
385 		if ((branch = cmd_branch_create(hdl, asru)) == NULL)
386 			return (CMD_EVD_UNUSED);
387 	}
388 
389 	if (branch->branch_case.cc_cp != NULL &&
390 	    fmd_case_solved(hdl, branch->branch_case.cc_cp)) {
391 		fmd_hdl_debug(hdl, "Case solved\n");
392 		return (CMD_EVD_REDUND);
393 	}
394 
395 	if (branch->branch_case.cc_cp == NULL) {
396 		branch->branch_case.cc_cp = cmd_case_create(hdl,
397 		    &branch->branch_header, CMD_PTR_BRANCH_CASE, &uuid);
398 	}
399 
400 	if (ERR_CLASS(class, FBR_ERROR) == 0) {
401 		if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_TS3_FCR,
402 		    &ts3_fcr) == 0 && (ts3_fcr != VF_TS3_FCR)) {
403 			fmd_hdl_debug(hdl,
404 			    "Processing fbr with lane failover\n");
405 			cmd_branch_create_fault(hdl, branch,
406 			    "fault.memory.link-f", det);
407 
408 		} else {
409 			fmd_hdl_debug(hdl, "Adding fbr event to serd engine\n");
410 			if (branch->branch_case.cc_serdnm == NULL) {
411 				branch->branch_case.cc_serdnm =
412 				    cmd_mem_serdnm_create(hdl,
413 				    "branch", branch->branch_unum);
414 
415 				fmd_serd_create(hdl,
416 				    branch->branch_case.cc_serdnm,
417 				    fmd_prop_get_int32(hdl, "fbr_n"),
418 				    fmd_prop_get_int64(hdl, "fbr_t"));
419 			}
420 
421 			if (fmd_serd_record(hdl,
422 			    branch->branch_case.cc_serdnm, ep) == FMD_B_FALSE)
423 				return (CMD_EVD_OK); /* engine hasn't fired */
424 
425 			fmd_hdl_debug(hdl, "fbr serd fired\n");
426 
427 			fmd_case_add_serd(hdl, branch->branch_case.cc_cp,
428 			    branch->branch_case.cc_serdnm);
429 
430 			cmd_branch_create_fault(hdl, branch,
431 			    "fault.memory.link-c", det);
432 		}
433 	} else if (ERR_CLASS(class, DSU_ERROR) == 0) {
434 		fmd_hdl_debug(hdl, "Processing dsu event");
435 		cmd_branch_create_fault(hdl, branch, "fault.memory.bank", det);
436 	} else {
437 		fmd_hdl_debug(hdl, "Processing fbu event");
438 		cmd_branch_create_fault(hdl, branch, "fault.memory.link-u",
439 		    det);
440 	}
441 
442 	branch->branch_flags |= CMD_MEM_F_FAULTING;
443 
444 	if (branch->branch_case.cc_serdnm != NULL) {
445 		fmd_serd_destroy(hdl, branch->branch_case.cc_serdnm);
446 		fmd_hdl_strfree(hdl, branch->branch_case.cc_serdnm);
447 		branch->branch_case.cc_serdnm = NULL;
448 	}
449 
450 	fmd_case_add_ereport(hdl, branch->branch_case.cc_cp, ep);
451 	fmd_case_solve(hdl, branch->branch_case.cc_cp);
452 	cmd_branch_dirty(hdl, branch);
453 
454 	return (CMD_EVD_OK);
455 }
456 
457 /*ARGSUSED*/
458 cmd_evdisp_t
cmd_fb_train(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)459 cmd_fb_train(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
460     cmd_errcl_t clcode)
461 {
462 	cmd_evdisp_t rc, rc1;
463 
464 	/*
465 	 * The FBU is cause of the FBU->DCDP/ICDP train:
466 	 * - process the cause of the event.
467 	 * - register the error to the nop event train, so the effected errors
468 	 * (DCDP/ICDP) will be dropped.
469 	 */
470 	rc = cmd_fb(hdl, ep, nvl, class, clcode);
471 
472 	rc1 = cmd_xxcu_initial(hdl, ep, nvl, class, clcode, CMD_XR_HDLR_NOP);
473 	if (rc1 != 0)
474 		fmd_hdl_debug(hdl,
475 		    "Fail to add error (%llx) to the train, rc = %d",
476 		    clcode, rc1);
477 
478 	return (rc);
479 }
480 
481 
482 /*ARGSUSED*/
483 cmd_evdisp_t
cmd_fw_defect(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)484 cmd_fw_defect(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
485     cmd_errcl_t clcode)
486 {
487 	const char *fltclass = NULL;
488 	nvlist_t *rsc = NULL;
489 	int solve = 0;
490 
491 	if ((rsc = init_mb(hdl)) == NULL)
492 		return (CMD_EVD_UNUSED);
493 
494 	if (ERR_CLASS(class, FERG_INVALID) == 0) {
495 		fltclass = "defect.fw.generic-sparc.erpt-gen";
496 	} else if (ERR_CLASS(class, DBU_ERROR) == 0) {
497 		cmd_evdisp_t rc;
498 		fltclass = "defect.fw.generic-sparc.addr-oob";
499 		/*
500 		 * add dbu to nop error train
501 		 */
502 		rc = cmd_xxcu_initial(hdl, ep, nvl, class, clcode,
503 		    CMD_XR_HDLR_NOP);
504 		if (rc != 0)
505 			fmd_hdl_debug(hdl,
506 			    "Failed to add error (%llx) to the train, rc = %d",
507 			    clcode, rc);
508 	} else {
509 		fmd_hdl_debug(hdl, "Unexpected fw defect event %s", class);
510 	}
511 
512 	if (fltclass) {
513 		fmd_case_t *cp = NULL;
514 		nvlist_t *fault = NULL;
515 
516 		fault = fmd_nvl_create_fault(hdl, fltclass, 100, NULL,
517 		    NULL, rsc);
518 		if (fault != NULL) {
519 			cp = fmd_case_open(hdl, NULL);
520 			fmd_case_add_ereport(hdl, cp, ep);
521 			fmd_case_add_suspect(hdl, cp, fault);
522 			fmd_case_solve(hdl, cp);
523 			solve = 1;
524 		}
525 	}
526 
527 	nvlist_free(rsc);
528 
529 	return (solve ? CMD_EVD_OK : CMD_EVD_UNUSED);
530 }
531 
532 void
cmd_branch_close(fmd_hdl_t * hdl,void * arg)533 cmd_branch_close(fmd_hdl_t *hdl, void *arg)
534 {
535 	cmd_branch_destroy(hdl, arg);
536 }
537 
538 
539 /*ARGSUSED*/
540 ulong_t
cmd_mem_get_phys_pages(fmd_hdl_t * hdl)541 cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
542 {
543 	/*
544 	 * Compute and return the total physical memory in pages from the
545 	 * MD/PRI.
546 	 * Cache its value.
547 	 */
548 	static ulong_t npage = 0;
549 	md_t *mdp;
550 	mde_cookie_t *listp;
551 	uint64_t bmem, physmem = 0;
552 	ssize_t bufsiz = 0;
553 	uint64_t *bufp;
554 	int num_nodes, nmblocks, i;
555 
556 	if (npage > 0) {
557 		return (npage);
558 	}
559 
560 	if (cpumem_hdl == NULL) {
561 		cpumem_hdl = hdl;
562 	}
563 
564 	if ((bufsiz = ldom_get_core_md(cpumem_diagnosis_lhp, &bufp)) <= 0) {
565 		return (0);
566 	}
567 	if ((mdp = md_init_intern(bufp, cpumem_alloc, cpumem_free)) == NULL ||
568 	    (num_nodes = md_node_count(mdp)) <= 0) {
569 		cpumem_free(bufp, (size_t)bufsiz);
570 		return (0);
571 	}
572 
573 	listp = (mde_cookie_t *)cpumem_alloc(sizeof (mde_cookie_t) *
574 	    num_nodes);
575 	nmblocks = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
576 	    md_find_name(mdp, "mblock"),
577 	    md_find_name(mdp, "fwd"), listp);
578 	for (i = 0; i < nmblocks; i++) {
579 		if (md_get_prop_val(mdp, listp[i], "size", &bmem) < 0) {
580 			physmem = 0;
581 			break;
582 		}
583 		physmem += bmem;
584 	}
585 	npage = (ulong_t)(physmem / cmd.cmd_pagesize);
586 
587 	cpumem_free(listp, sizeof (mde_cookie_t) * num_nodes);
588 	cpumem_free(bufp, (size_t)bufsiz);
589 	(void) md_fini(mdp);
590 
591 	return (npage);
592 }
593 
594 static int galois_mul[16][16] = {
595 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
596 {  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0}, /* 0 */
597 {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15}, /* 1 */
598 {  0,  2,  4,  6,  8, 10, 12, 14,  3,  1,  7,  5, 11,  9, 15, 13}, /* 2 */
599 {  0,  3,  6,  5, 12, 15, 10,  9, 11,  8, 13, 14,  7,  4,  1,  2}, /* 3 */
600 {  0,  4,  8, 12,  3,  7, 11, 15,  6,  2, 14, 10,  5,  1, 13,  9}, /* 4 */
601 {  0,  5, 10, 15,  7,  2, 13,  8, 14, 11,  4,  1,  9, 12,  3,  6}, /* 5 */
602 {  0,  6, 12, 10, 11, 13,  7,  1,  5,  3,  9, 15, 14,  8,  2,  4}, /* 6 */
603 {  0,  7, 14,  9, 15,  8,  1,  6, 13, 10,  3,  4,  2,  5, 12, 11}, /* 7 */
604 {  0,  8,  3, 11,  6, 14,  5, 13, 12,  4, 15,  7, 10,  2,  9,  1}, /* 8 */
605 {  0,  9,  1,  8,  2, 11,  3, 10,  4, 13,  5, 12,  6, 15,  7, 14}, /* 9 */
606 {  0, 10,  7, 13, 14,  4,  9,  3, 15,  5,  8,  2,  1, 11,  6, 12}, /* A */
607 {  0, 11,  5, 14, 10,  1, 15,  4,  7, 12,  2,  9, 13,  6,  8,  3}, /* B */
608 {  0, 12, 11,  7,  5,  9, 14,  2, 10,  6,  1, 13, 15,  3,  4,  8}, /* C */
609 {  0, 13,  9,  4,  1, 12,  8,  5,  2, 15, 11,  6,  3, 14, 10,  7}, /* D */
610 {  0, 14, 15,  1, 13,  3,  2, 12,  9,  7,  6,  8,  4, 10, 11,  5}, /* E */
611 {  0, 15, 13,  2,  9,  6,  4, 11,  1, 14, 12,  3,  8,  7,  5, 10}  /* F */
612 };
613 
614 static int
galois_div(int num,int denom)615 galois_div(int num, int denom) {
616 	int i;
617 
618 	for (i = 0; i < 16; i++) {
619 		if (galois_mul[denom][i] == num)
620 		    return (i);
621 	}
622 	return (-1);
623 }
624 
625 /*
626  * Data nibbles N0-N31 => 0-31
627  * check nibbles C0-3 => 32-35
628  */
629 
630 int
cmd_synd2upos(uint16_t syndrome)631 cmd_synd2upos(uint16_t syndrome) {
632 
633 	uint16_t s0, s1, s2, s3;
634 
635 	if (syndrome == 0)
636 		return (-1); /* clean syndrome, not a CE */
637 
638 	s0 = syndrome & 0xF;
639 	s1 = (syndrome >> 4) & 0xF;
640 	s2 = (syndrome >> 8) & 0xF;
641 	s3 = (syndrome >> 12) & 0xF;
642 
643 	if (s3 == 0) {
644 		if (s2 == 0 && s1 == 0)
645 			return (32); /* 0 0 0 e => C0 */
646 		if (s2 == 0 && s0 == 0)
647 			return (33); /* 0 0 e 0 => C1 */
648 		if (s1 == 0 && s0 == 0)
649 			return (34); /* 0 e 0 0 => C2 */
650 		if (s2 == s1 && s1 == s0)
651 			return (31); /* 0 d d d => N31 */
652 		return (-1); /* multibit error */
653 	} else if (s2 == 0) {
654 		if (s1 == 0 && s0 == 0)
655 			return (35); /* e 0 0 0 => C4 */
656 		if (s1 == 0 || s0 == 0)
657 			return (-1); /* not a 0 b c */
658 		if (s3 != galois_div(galois_mul[s1][s1], s0))
659 			return (-1); /* check nibble not valid */
660 		return (galois_div(s0, s1) - 1); /* N0 - N14 */
661 	} else if (s1 == 0) {
662 		if (s2 == 0 || s0 == 0)
663 			return (-1); /* not a b 0 c */
664 		if (s3 != galois_div(galois_mul[s2][s2], s0))
665 			return (-1); /* check nibble not valid */
666 		return (galois_div(s0, s2) + 14); /* N15 - N29 */
667 	} else if (s0 == 0) {
668 		if (s3 == s2 && s2 == s1)
669 			return (30); /* d d d 0 => N30 */
670 		return (-1);
671 	} else return (-1);
672 }
673 
674 nvlist_t *
cmd_mem2hc(fmd_hdl_t * hdl,nvlist_t * mem_fmri)675 cmd_mem2hc(fmd_hdl_t *hdl, nvlist_t *mem_fmri) {
676 
677 	char **snp;
678 	uint_t n;
679 
680 	if (nvlist_lookup_string_array(mem_fmri, FM_FMRI_HC_SERIAL_ID,
681 	    &snp, &n) != 0)
682 		return (NULL); /* doesn't have serial id */
683 
684 	return (cmd_find_dimm_by_sn(hdl, FM_FMRI_SCHEME_HC, *snp));
685 }
686 
687 /*
688  * formula to convert an unhashed address to hashed address
689  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
690  */
691 void
cmd_to_hashed_addr(uint64_t * addr,uint64_t afar,const char * class)692 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class)
693 {
694 
695 	if (strstr(class, "ultraSPARC-T1") != NULL)
696 		*addr = afar;
697 	else {
698 		*addr = (afar & OFFBIT) |
699 		    ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17) |
700 		    ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
701 	}
702 }
703 
704 int
cmd_same_datapath_dimms(cmd_dimm_t * d1,cmd_dimm_t * d2)705 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2)
706 {
707 	char *p, *q;
708 
709 	p = strstr(d1->dimm_unum, "CMP");
710 	q = strstr(d2->dimm_unum, "CMP");
711 	if (p != NULL && q != NULL) {
712 		if (strncmp(p, q, 4) == 0)
713 			return (1);
714 	}
715 	return (0);
716 }
717 
718 /*
719  * fault the FRU of the common CMP
720  */
721 /*ARGSUSED*/
722 void
cmd_gen_datapath_fault(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,nvlist_t * det)723 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
724     uint16_t upos, nvlist_t *det)
725 {
726 	fmd_case_t *cp;
727 	char *frustr;
728 	nvlist_t *rsrc, *fltlist;
729 	char *s;
730 	char const *str1, *str2;
731 	uint_t len, i;
732 
733 	s = strstr(d1->dimm_unum, "CMP");
734 	if (s == NULL)
735 		return;
736 
737 	frustr = fmd_hdl_zalloc(hdl, strlen(d1->dimm_unum), FMD_SLEEP);
738 	len = strlen(d1->dimm_unum) -  strlen(s);
739 
740 	if (strncmp(d1->dimm_unum, d2->dimm_unum, len) != 0) {
741 		for (i = 0, str1 = d1->dimm_unum, str2 = d2->dimm_unum;
742 		    *str1 == *str2 && i <= len;
743 		    str1++, str2++, i++)
744 			;
745 		len = i;
746 	}
747 
748 	(void) strncpy(frustr, d1->dimm_unum, len);
749 
750 	rsrc = cmd_mkboard_fru(hdl, frustr, NULL, NULL);
751 
752 	fmd_hdl_free(hdl, frustr, strlen(d1->dimm_unum));
753 
754 	if (rsrc == NULL)
755 		return;
756 
757 	(void) nvlist_add_nvlist(rsrc, FM_FMRI_AUTHORITY, cmd.cmd_auth);
758 
759 	cp = fmd_case_open(hdl, NULL);
760 
761 	fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath", 100,
762 	    rsrc, NULL, rsrc);
763 
764 	fmd_case_add_suspect(hdl, cp, fltlist);
765 	fmd_case_solve(hdl, cp);
766 
767 	nvlist_free(rsrc);
768 }
769