xref: /titanic_41/usr/src/cmd/fm/modules/sun4v/generic-mem/gmem_memerr.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 
26 /*
27  * Ereport-handling routines for memory errors
28  */
29 
30 #include <gmem_mem.h>
31 #include <gmem_dimm.h>
32 #include <gmem_page.h>
33 #include <gmem.h>
34 
35 #include <strings.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <fm/fmd_api.h>
40 #include <fm/libtopo.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/async.h>
43 #include <sys/errclassify.h>
44 
45 #define	OFFBIT  	0xFFFFFFFFFFFC07FFULL
46 #define	BIT28_32	0x00000001F0000000ULL
47 #define	BIT13_17	0x000000000003E000ULL
48 #define	BIT18_19	0x00000000000C0000ULL
49 #define	BIT11_12	0x0000000000001800ULL
50 
51 struct ce_name2type {
52 	const char *name;
53 	ce_dispact_t type;
54 };
55 
56 nvlist_t *fru_nvl;
57 
58 static ce_dispact_t
gmem_mem_name2type(const char * name)59 gmem_mem_name2type(const char *name)
60 {
61 	static const struct ce_name2type new[] = {
62 		{ "mem-unk",		CE_DISP_UNKNOWN },
63 		{ "mem-is",		CE_DISP_INTERMITTENT },
64 		{ "mem-cs",		CE_DISP_PERS },
65 		{ "mem-ss",		CE_DISP_STICKY },
66 		{ NULL }
67 	};
68 	const struct ce_name2type *names = &new[0];
69 	const struct ce_name2type *tp;
70 
71 	for (tp = names; tp->name != NULL; tp++) {
72 		if (strcasecmp(name, tp->name) == 0)
73 			return (tp->type);
74 	}
75 
76 	return (CE_DISP_UNKNOWN);
77 }
78 
79 /*ARGSUSED*/
80 static int
find_fault_fru(topo_hdl_t * thp,tnode_t * node,void * arg)81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
82 {
83 	nvlist_t *nvl = (nvlist_t *)arg;
84 	nvlist_t *rsc = NULL, *fru = NULL;
85 	nvlist_t **hcl, **topo_hcl;
86 	uint_t n1, n2;
87 	char *name, *name1, *name2;
88 	char *id1, *id2;
89 	int err, i;
90 
91 	if (topo_node_resource(node, &rsc, &err) < 0)
92 		return (TOPO_WALK_NEXT);
93 
94 	err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
95 
96 	if (err != 0) {
97 		nvlist_free(rsc);
98 		return (TOPO_WALK_NEXT);
99 	}
100 
101 	(void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
102 	if (strcmp(name, "chip") != 0) {
103 		nvlist_free(rsc);
104 		return (TOPO_WALK_NEXT);
105 	}
106 
107 	(void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
108 
109 	if (n1 != n2) {
110 		nvlist_free(rsc);
111 		return (TOPO_WALK_NEXT);
112 	}
113 
114 	for (i = 0; i < n1; i++) {
115 		(void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
116 		    &name1);
117 		(void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
118 		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
119 		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
120 		if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
121 			nvlist_free(rsc);
122 			return (TOPO_WALK_NEXT);
123 		}
124 	}
125 
126 	(void) topo_node_fru(node, &fru, NULL, &err);
127 	if (fru != NULL) {
128 		(void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
129 		nvlist_free(fru);
130 	}
131 	nvlist_free(rsc);
132 	return (TOPO_WALK_TERMINATE);
133 }
134 
135 nvlist_t *
gmem_find_fault_fru(fmd_hdl_t * hdl,nvlist_t * nvl)136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
137 	topo_hdl_t *thp;
138 	topo_walk_t *twp;
139 	int err;
140 	fru_nvl = NULL;
141 
142 	if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
143 		return (NULL);
144 
145 	if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
146 	    find_fault_fru, nvl, &err)) == NULL) {
147 		fmd_hdl_topo_rele(hdl, thp);
148 		return (NULL);
149 	}
150 
151 	(void) topo_walk_step(twp, TOPO_WALK_CHILD);
152 	topo_walk_fini(twp);
153 	fmd_hdl_topo_rele(hdl, thp);
154 	return (fru_nvl);
155 }
156 
157 /*
158  * fault the FRU of the common detector between two DIMMs
159  */
160 void
gmem_gen_datapath_fault(fmd_hdl_t * hdl,nvlist_t * det)161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
162 {
163 	char *name, *id;
164 	nvlist_t **hcl1, **hcl;
165 	uint_t n;
166 	int i, j;
167 	fmd_case_t *cp;
168 	nvlist_t *fltlist, *rsrc;
169 	nvlist_t *fru = NULL;
170 
171 	if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
172 		return;
173 
174 	for (i = 0; i < n; i++) {
175 		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
176 		if (strcmp(name, "chip") == 0)
177 			break;
178 	}
179 
180 	n = i + 1;
181 	hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
182 	if (hcl == NULL)
183 		return;
184 
185 	for (i = 0; i < n; i++) {
186 		(void) nvlist_alloc(&hcl[i],
187 		    NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
188 	}
189 
190 	for (i = 0, j = 0; i < n; i++) {
191 		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
192 		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
193 		(void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
194 		(void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
195 		j++;
196 		if (strcmp(name, "chip") == 0)
197 			break;
198 	}
199 
200 	if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
201 		for (i = 0; i < n; i++) {
202 			if (hcl[i] != NULL)
203 				nvlist_free(hcl[i]);
204 		}
205 		fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
206 	}
207 
208 	if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
209 	    nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
210 	    nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
211 	    nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
212 	    nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
213 		for (i = 0; i < n; i++) {
214 			if (hcl[i] != NULL)
215 				nvlist_free(hcl[i]);
216 		}
217 		fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
218 		nvlist_free(rsrc);
219 	}
220 
221 	fru = gmem_find_fault_fru(hdl, rsrc);
222 	if (fru != NULL) {
223 		cp = fmd_case_open(hdl, NULL);
224 		fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
225 		    100, fru, fru, fru);
226 		fmd_case_add_suspect(hdl, cp, fltlist);
227 		fmd_case_solve(hdl, cp);
228 		nvlist_free(fru);
229 	}
230 
231 	for (i = 0; i < n; i++) {
232 		if (hcl[i] != NULL)
233 			nvlist_free(hcl[i]);
234 	}
235 
236 	fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
237 	nvlist_free(rsrc);
238 }
239 
240 /*
241  * formula to conver an unhashed address to hashed address
242  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
243  */
244 static void
gmem_to_hashed_addr(uint64_t * addr,uint64_t afar)245 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
246 {
247 
248 	*addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
249 	    | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
250 }
251 
252 /*
253  * check if a dimm has n CEs that have the same symbol-in-error
254  */
255 int
upos_thresh_check(gmem_dimm_t * dimm,uint16_t upos,uint32_t threshold)256 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
257 {
258 	int i;
259 	gmem_mq_t *ip, *next;
260 	int count = 0;
261 
262 	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
263 		for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
264 		    ip = next) {
265 			next = gmem_list_next(ip);
266 			if (ip->mq_unit_position == upos) {
267 				count++;
268 				if (count >= threshold)
269 					return (1);
270 			}
271 		}
272 	}
273 	return (0);
274 }
275 
276 /*
277  * check if smaller number of retired pages > 1/16 of larger number of
278  * retired pages
279  */
280 int
check_bad_rw_retired_pages(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2)281 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
282 {
283 	uint_t sret, lret;
284 	double ratio;
285 
286 	sret = lret = 0;
287 
288 	if (d2->dimm_nretired < d1->dimm_nretired) {
289 		sret = d2->dimm_nretired;
290 		lret = d1->dimm_nretired;
291 	} else if (d2->dimm_nretired > d1->dimm_nretired) {
292 		sret = d1->dimm_nretired;
293 		lret = d2->dimm_nretired;
294 	} else
295 		return (0);
296 
297 	ratio = lret * GMEM_MQ_RATIO;
298 
299 	if (sret > ratio) {
300 		fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
301 		    sret, lret, ratio);
302 		return (1);
303 	}
304 	return (0);
305 }
306 
307 /*
308  * check bad rw on any two DIMMs. The check succeeds if
309  * - each DIMM has a n CEs which have the same symbol-in-error,
310  * - the smaller number of retired pages > 1/16 larger number of retired pages
311  */
312 static int
check_bad_rw_between_dimms(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2,uint16_t * rupos)313 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
314     uint16_t *rupos)
315 {
316 	int i;
317 	gmem_mq_t *ip, *next;
318 	uint16_t upos;
319 
320 	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
321 		for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
322 		    ip = next) {
323 			next = gmem_list_next(ip);
324 			upos = ip->mq_unit_position;
325 			if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
326 				if (upos_thresh_check(d2, upos,
327 				    gmem.gm_nupos)) {
328 					if (check_bad_rw_retired_pages(hdl,
329 					    d1, d2)) {
330 						*rupos = upos;
331 						return (1);
332 					}
333 				}
334 			}
335 		}
336 	}
337 
338 	return (0);
339 }
340 
341 static void
bad_reader_writer_check(fmd_hdl_t * hdl,nvlist_t * det,gmem_dimm_t * ce_dimm)342 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
343 {
344 	gmem_dimm_t *d, *next;
345 	uint16_t upos;
346 
347 	for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
348 		next = gmem_list_next(d);
349 		if (d == ce_dimm)
350 			continue;
351 		if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
352 			continue;
353 		if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
354 			gmem_gen_datapath_fault(hdl, det);
355 			gmem_save_symbol_error(hdl, ce_dimm, upos);
356 			fmd_hdl_debug(hdl,
357 			    "check_bad_rw_dimms succeeded: %s %s\n",
358 			    ce_dimm->dimm_serial, d->dimm_serial);
359 			return;
360 		}
361 	}
362 }
363 
364 /*
365  * rule 5a checking. The check succeeds if
366  * - nretired >= 512
367  * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
368  */
369 static void
ce_thresh_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)370 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
371 {
372 	nvlist_t *flt, *rsrc;
373 	fmd_case_t *cp;
374 	uint_t nret;
375 	uint64_t delta_addr = 0;
376 
377 	if (dimm->dimm_flags & GMEM_F_FAULTING)
378 		return;
379 
380 	nret = dimm->dimm_nretired;
381 
382 	if (nret < gmem.gm_low_ce_thresh)
383 		return;
384 
385 	if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
386 		delta_addr =
387 		    (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
388 		    (nret - 1);
389 
390 	if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
391 
392 		fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
393 		dimm->dimm_flags |= GMEM_F_FAULTING;
394 		gmem_dimm_dirty(hdl, dimm);
395 
396 		cp = fmd_case_open(hdl, NULL);
397 		rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
398 		flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
399 		    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
400 		fmd_case_add_suspect(hdl, cp, flt);
401 		fmd_case_solve(hdl, cp);
402 		if (rsrc != NULL)
403 			nvlist_free(rsrc);
404 	}
405 }
406 
407 /*
408  * rule 5b checking. The check succeeds if more than 120
409  * non-intermittent CEs are reported against one symbol
410  * position of one afar in 72 hours
411  */
412 static void
mq_5b_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)413 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
414 {
415 	nvlist_t *flt, *rsrc;
416 	fmd_case_t *cp;
417 	gmem_mq_t *ip, *next;
418 	int cw;
419 
420 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
421 		for (ip = gmem_list_next(&dimm->mq_root[cw]);
422 		    ip != NULL; ip = next) {
423 			next = gmem_list_next(ip);
424 			if (ip->mq_dupce_count >= gmem.gm_dupce) {
425 				fmd_hdl_debug(hdl,
426 				    "mq_5b_check succeeded: duplicate CE=%d",
427 				    ip->mq_dupce_count);
428 				cp = fmd_case_open(hdl, NULL);
429 				rsrc = gmem_find_dimm_rsc(hdl,
430 				    dimm->dimm_serial);
431 				flt = fmd_nvl_create_fault(hdl,
432 				    GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
433 				    NULL, gmem_dimm_fru(dimm), rsrc);
434 				dimm->dimm_flags |= GMEM_F_FAULTING;
435 				gmem_dimm_dirty(hdl, dimm);
436 				fmd_case_add_suspect(hdl, cp, flt);
437 				fmd_case_solve(hdl, cp);
438 				if (rsrc != NULL)
439 					nvlist_free(rsrc);
440 				return;
441 			}
442 		}
443 	}
444 }
445 
446 /*
447  * delete the expired duplicate CE time stamps
448  */
449 static void
mq_prune_dup(fmd_hdl_t * hdl,gmem_mq_t * ip,uint64_t now)450 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
451 {
452 	tstamp_t *tsp, *next;
453 
454 	for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
455 	    tsp = next) {
456 		next = gmem_list_next(tsp);
457 		if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
458 			gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
459 			fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
460 			ip->mq_dupce_count--;
461 		}
462 	}
463 }
464 
465 static void
mq_update(fmd_hdl_t * hdl,fmd_event_t * ep,gmem_mq_t * ip,uint64_t now)466 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
467 {
468 	tstamp_t *tsp;
469 
470 	ip->mq_tstamp = now;
471 	ip->mq_ep = ep;
472 	if (fmd_serd_exists(hdl, ip->mq_serdnm))
473 		fmd_serd_destroy(hdl, ip->mq_serdnm);
474 
475 	fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
476 	(void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
477 
478 	tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
479 	tsp->tstamp = now;
480 	gmem_list_append(&ip->mq_dupce_tstamp, tsp);
481 	ip->mq_dupce_count++;
482 }
483 
484 /*
485  * Create a fresh index block for MQSC CE correlation.
486  */
487 gmem_mq_t *
mq_create(fmd_hdl_t * hdl,fmd_event_t * ep,uint64_t afar,uint16_t upos,uint16_t ckwd,uint64_t now)488 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
489     uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
490 {
491 	gmem_mq_t *cp;
492 	tstamp_t *tsp;
493 
494 	cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
495 	cp->mq_tstamp = now;
496 	cp->mq_ckwd = ckwd;
497 	cp->mq_phys_addr = afar;
498 	cp->mq_unit_position = upos;
499 	cp->mq_ep = ep;
500 	cp->mq_serdnm =
501 	    gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
502 
503 	tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
504 	tsp->tstamp = now;
505 	gmem_list_append(&cp->mq_dupce_tstamp, tsp);
506 	cp->mq_dupce_count = 1;
507 
508 	/*
509 	 * Create SERD to keep this event from being removed
510 	 * by fmd which may not know there is an event pointer
511 	 * saved here. This SERD is *never* meant to fire.
512 	 */
513 	if (fmd_serd_exists(hdl, cp->mq_serdnm))
514 		fmd_serd_destroy(hdl, cp->mq_serdnm);
515 
516 	fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
517 	(void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
518 
519 	return (cp);
520 }
521 
522 gmem_mq_t *
mq_destroy(fmd_hdl_t * hdl,gmem_list_t * lp,gmem_mq_t * ip)523 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
524 {
525 	gmem_mq_t *jp = gmem_list_next(ip);
526 	tstamp_t *tsp, *next;
527 
528 
529 	if (ip->mq_serdnm != NULL) {
530 		if (fmd_serd_exists(hdl, ip->mq_serdnm))
531 			fmd_serd_destroy(hdl, ip->mq_serdnm);
532 		fmd_hdl_strfree(hdl, ip->mq_serdnm);
533 		ip->mq_serdnm = NULL;
534 	}
535 
536 	for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
537 	    tsp = next) {
538 		next = gmem_list_next(tsp);
539 		gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
540 		fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
541 	}
542 
543 	gmem_list_delete(lp, &ip->mq_l);
544 	fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
545 
546 	return (jp);
547 }
548 
549 
550 /*
551  * Add an index block for a new CE, sorted
552  * a) by ascending unit position
553  * b) order of arrival (~= time order)
554  */
555 void
mq_add(fmd_hdl_t * hdl,gmem_dimm_t * dimm,fmd_event_t * ep,uint64_t afar,uint16_t unit_position,uint16_t ckwd,uint64_t now)556 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
557     uint64_t afar, uint16_t unit_position, uint16_t ckwd,
558     uint64_t now)
559 {
560 	gmem_mq_t *ip, *jp;
561 	int cw = (int)ckwd;
562 
563 	for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
564 		if (ip->mq_unit_position > unit_position) {
565 			/* list is in unit position order */
566 			break;
567 		} else if (ip->mq_unit_position == unit_position &&
568 		    ip->mq_phys_addr == afar) {
569 			/*
570 			 * Found a duplicate cw, unit_position, and afar.
571 			 * Delete this node, to be superseded by the new
572 			 * node added below.
573 			 * update the mq_t structure
574 			 */
575 			mq_update(hdl, ep, ip, now);
576 			return;
577 		} else {
578 			ip = gmem_list_next(ip);
579 		}
580 	}
581 
582 	jp = mq_create(hdl, ep, afar, unit_position, cw, now);
583 	if (ip == NULL)
584 		gmem_list_append(&dimm->mq_root[cw], jp);
585 	else
586 		gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
587 }
588 
589 /*
590  * Prune the MQSC index lists (one for each checkword), by deleting
591  * outdated index blocks from each list.
592  */
593 
594 void
mq_prune(fmd_hdl_t * hdl,gmem_dimm_t * dimm,uint64_t now)595 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
596 {
597 	gmem_mq_t *ip;
598 	int cw;
599 
600 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
601 		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
602 			if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
603 				/*
604 				 * This event has timed out - delete the
605 				 * mq block as well as serd for the event.
606 				 */
607 				ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
608 			} else {
609 				mq_prune_dup(hdl, ip, now);
610 				/* tstamp < now - ce_t */
611 				ip = gmem_list_next(ip);
612 			}
613 		} /* per checkword */
614 	} /* cw = 0...3 */
615 }
616 
617 /*
618  * Check the MQSC index lists (one for each checkword) by making a
619  * complete pass through each list, checking if the criteria for
620  * Rule 4A has been met.  Rule 4A checking is done for each checkword.
621  *
622  * Rule 4A: fault a DIMM  "whenever Solaris reports two or more CEs from
623  * two or more different physical addresses on each of two or more different
624  * bit positions from the same DIMM within 72 hours of each other, and all
625  * the addresses are in the same relative checkword (that is, the AFARs
626  * are all the same modulo 64).  [Note: This means at least 4 CEs; two
627  * from one bit position, with unique addresses, and two from another,
628  * also with unique addresses, and the lower 6 bits of all the addresses
629  * are the same."
630  */
631 
632 void
mq_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)633 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
634 {
635 	int upos_pairs, curr_upos, cw, i, j;
636 	nvlist_t *flt, *rsc;
637 	typedef struct upos_pair {
638 		int upos;
639 		gmem_mq_t *mq1;
640 		gmem_mq_t *mq2;
641 	} upos_pair_t;
642 	upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
643 	gmem_mq_t *ip;
644 
645 	/*
646 	 * Each upos_array[] member represents a pair of CEs for the same
647 	 * unit position (symbol) which is a 4 bit nibble.
648 	 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
649 	 * for rule 4A, and same DRAM for rule 4B) for a violation - this
650 	 * is why CE pairs are tracked.
651 	 */
652 	upos_pairs = 0;
653 	upos_array[0].mq1 = NULL;
654 
655 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
656 		i = upos_pairs;
657 		curr_upos = -1;
658 
659 		/*
660 		 * mq_root[] is an array of cumulative lists of CEs
661 		 * indexed by checkword where the list is in unit position
662 		 * order. Loop through checking for duplicate unit position
663 		 * entries (filled in at mq_create()).
664 		 * The upos_array[] is filled in each time a duplicate
665 		 * unit position is found; the first time through the loop
666 		 * of a unit position sets curr_upos but does not fill in
667 		 * upos_array[] until the second symbol is found.
668 		 */
669 		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
670 		    ip = gmem_list_next(ip)) {
671 			if (curr_upos != ip->mq_unit_position) {
672 				/* Set initial current position */
673 				curr_upos = ip->mq_unit_position;
674 			} else if (i > upos_pairs &&
675 			    curr_upos == upos_array[i-1].upos) {
676 				/*
677 				 * Only keep track of CE pairs; skip
678 				 * triples, quads, etc...
679 				 */
680 				continue;
681 			} else if (upos_array[i].mq1 == NULL) {
682 				/* Have a pair. Add to upos_array[] */
683 				fmd_hdl_debug(hdl, "pair:upos=%d",
684 				    curr_upos);
685 				upos_array[i].upos = curr_upos;
686 				upos_array[i].mq1 = gmem_list_prev(ip);
687 				upos_array[i].mq2 = ip;
688 				upos_array[++i].mq1 = NULL;
689 			}
690 		}
691 		if (i - upos_pairs >= 2) {
692 			/* Rule 4A violation */
693 			rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
694 			flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
695 			    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
696 			for (j = upos_pairs; j < i; j++) {
697 				fmd_case_add_ereport(hdl,
698 				    dimm->dimm_case.cc_cp,
699 				    upos_array[j].mq1->mq_ep);
700 				fmd_case_add_ereport(hdl,
701 				    dimm->dimm_case.cc_cp,
702 				    upos_array[j].mq2->mq_ep);
703 			}
704 			dimm->dimm_flags |= GMEM_F_FAULTING;
705 			gmem_dimm_dirty(hdl, dimm);
706 			fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
707 			fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
708 			if (rsc != NULL)
709 				nvlist_free(rsc);
710 			return;
711 		}
712 		upos_pairs = i;
713 		assert(upos_pairs < 16);
714 	}
715 }
716 
717 /*ARGSUSED*/
718 gmem_evdisp_t
gmem_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)719 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
720 {
721 	uint16_t symbol_pos, cw;
722 	uint64_t phyaddr, offset, addr;
723 	uint32_t filter_ratio = 0;
724 	gmem_dimm_t *dimm;
725 	gmem_page_t *page;
726 	nvlist_t *fru = NULL;
727 	nvlist_t *topo_rsc = NULL;
728 	nvlist_t *rsrc, *det;
729 	const char *uuid;
730 	ce_dispact_t type;
731 	boolean_t diagnose;
732 	char *sn;
733 	int err, rc;
734 	uint64_t *now;
735 	uint_t nelem;
736 	int skip_error = 0;
737 
738 	err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
739 	    &diagnose);
740 	if (err != 0 || diagnose == 0)
741 		return (GMEM_EVD_UNUSED);
742 
743 	if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
744 	    &phyaddr) != 0) ||
745 	    (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
746 	    &offset) != 0)) {
747 		fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
748 		return (GMEM_EVD_BAD);
749 	}
750 
751 	fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
752 
753 	if ((page = gmem_page_lookup(phyaddr)) != NULL &&
754 	    page->page_case.cc_cp != NULL &&
755 	    fmd_case_solved(hdl, page->page_case.cc_cp))
756 		return (GMEM_EVD_REDUND);
757 
758 	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
759 	    &rsrc) != 0 ||
760 	    nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
761 		fmd_hdl_debug(hdl, "Can't get dimm serial\n");
762 		return (GMEM_EVD_BAD);
763 	}
764 
765 	fmd_hdl_debug(hdl, "serial %s", sn);
766 
767 	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
768 		return (GMEM_EVD_BAD);
769 
770 	/*
771 	 * Find dimm fru by serial number.
772 	 */
773 	fru = gmem_find_dimm_fru(hdl, sn);
774 
775 	if (fru == NULL) {
776 		fmd_hdl_debug(hdl, "Dimm is not present\n");
777 		return (GMEM_EVD_UNUSED);
778 	}
779 
780 	if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
781 	    (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
782 		nvlist_free(fru);
783 		return (GMEM_EVD_UNUSED);
784 	}
785 
786 	if (dimm->dimm_case.cc_cp == NULL) {
787 		dimm->dimm_case.cc_cp = gmem_case_create(hdl,
788 		    &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
789 	}
790 
791 	/*
792 	 * Add to MQSC correlation lists all CEs which pass validity
793 	 * checks above. If there is no symbol_pos & relative ckword
794 	 * in the ereport, skip rule 4A checking.
795 	 */
796 
797 	err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
798 	    &symbol_pos);
799 	err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
800 
801 	if (err == 0) {
802 		fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
803 
804 		if (nvlist_lookup_uint64_array(nvl,
805 		    "__tod", &now, &nelem) == 0) {
806 			skip_error = gmem_check_symbol_error(hdl, dimm,
807 			    symbol_pos);
808 
809 			if (!skip_error ||
810 			    !(dimm->dimm_flags & GMEM_F_FAULTING))
811 				mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
812 				    cw, *now);
813 
814 			mq_prune(hdl, dimm, *now);
815 
816 			if (!skip_error)
817 				bad_reader_writer_check(hdl, det, dimm);
818 			if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
819 				mq_check(hdl, dimm);
820 				mq_5b_check(hdl, dimm);
821 			}
822 		}
823 	}
824 
825 	type = gmem_mem_name2type(strstr(class, "mem"));
826 
827 	switch (type) {
828 	case CE_DISP_UNKNOWN:
829 		GMEM_STAT_BUMP(ce_unknown);
830 		nvlist_free(fru);
831 		return (GMEM_EVD_UNUSED);
832 	case CE_DISP_INTERMITTENT:
833 		GMEM_STAT_BUMP(ce_interm);
834 		nvlist_free(fru);
835 		return (GMEM_EVD_UNUSED);
836 	case CE_DISP_PERS:
837 		GMEM_STAT_BUMP(ce_clearable_persis);
838 		break;
839 	case CE_DISP_STICKY:
840 		GMEM_STAT_BUMP(ce_sticky);
841 		break;
842 	default:
843 		nvlist_free(fru);
844 		return (GMEM_EVD_BAD);
845 	}
846 
847 	if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
848 		nvlist_free(fru);
849 		return (GMEM_EVD_REDUND);
850 	}
851 
852 	if (page == NULL) {
853 		page = gmem_page_create(hdl, fru, phyaddr, offset);
854 		if (page == NULL) {
855 			nvlist_free(fru);
856 			return (GMEM_EVD_UNUSED);
857 		}
858 	}
859 
860 	nvlist_free(fru);
861 
862 	if (page->page_case.cc_cp == NULL) {
863 		page->page_case.cc_cp = gmem_case_create(hdl,
864 		    &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
865 	}
866 
867 	switch (type) {
868 	case CE_DISP_PERS:
869 		fmd_hdl_debug(hdl, "adding persistent event to CE serd");
870 		if (page->page_case.cc_serdnm == NULL)
871 			gmem_page_serd_create(hdl, page, nvl);
872 
873 		filter_ratio = gmem_get_serd_filter_ratio(nvl);
874 
875 		fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
876 
877 		if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
878 		    filter_ratio, ep) == FMD_B_FALSE) {
879 				return (GMEM_EVD_OK); /* engine hasn't fired */
880 		}
881 
882 		fmd_hdl_debug(hdl, "ce page serd fired\n");
883 		fmd_case_add_serd(hdl, page->page_case.cc_cp,
884 		    page->page_case.cc_serdnm);
885 		fmd_serd_reset(hdl, page->page_case.cc_serdnm);
886 		break;	/* to retire */
887 
888 	case CE_DISP_STICKY:
889 		fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
890 		break;	/* to retire */
891 	}
892 
893 
894 	topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
895 	rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
896 	    ep, phyaddr, offset);
897 
898 	if (rc) {
899 		gmem_to_hashed_addr(&addr, phyaddr);
900 
901 		if (addr > dimm->dimm_phys_addr_hi)
902 			dimm->dimm_phys_addr_hi = addr;
903 		if (addr < dimm->dimm_phys_addr_low)
904 			dimm->dimm_phys_addr_low = addr;
905 
906 		dimm->dimm_nretired++;
907 		dimm->dimm_retstat.fmds_value.ui64++;
908 		gmem_dimm_dirty(hdl, dimm);
909 		ce_thresh_check(hdl, dimm);
910 	}
911 	return (GMEM_EVD_OK);
912 }
913 
914 void
gmem_dimm_close(fmd_hdl_t * hdl,void * arg)915 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
916 {
917 	gmem_dimm_destroy(hdl, arg);
918 }
919