1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25
26 /*
27 * Ereport-handling routines for memory errors
28 */
29
30 #include <gmem_mem.h>
31 #include <gmem_dimm.h>
32 #include <gmem_page.h>
33 #include <gmem.h>
34
35 #include <strings.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <fm/fmd_api.h>
40 #include <fm/libtopo.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/async.h>
43 #include <sys/errclassify.h>
44
45 #define OFFBIT 0xFFFFFFFFFFFC07FFULL
46 #define BIT28_32 0x00000001F0000000ULL
47 #define BIT13_17 0x000000000003E000ULL
48 #define BIT18_19 0x00000000000C0000ULL
49 #define BIT11_12 0x0000000000001800ULL
50
51 struct ce_name2type {
52 const char *name;
53 ce_dispact_t type;
54 };
55
56 nvlist_t *fru_nvl;
57
58 static ce_dispact_t
gmem_mem_name2type(const char * name)59 gmem_mem_name2type(const char *name)
60 {
61 static const struct ce_name2type new[] = {
62 { "mem-unk", CE_DISP_UNKNOWN },
63 { "mem-is", CE_DISP_INTERMITTENT },
64 { "mem-cs", CE_DISP_PERS },
65 { "mem-ss", CE_DISP_STICKY },
66 { NULL }
67 };
68 const struct ce_name2type *names = &new[0];
69 const struct ce_name2type *tp;
70
71 for (tp = names; tp->name != NULL; tp++) {
72 if (strcasecmp(name, tp->name) == 0)
73 return (tp->type);
74 }
75
76 return (CE_DISP_UNKNOWN);
77 }
78
79 /*ARGSUSED*/
80 static int
find_fault_fru(topo_hdl_t * thp,tnode_t * node,void * arg)81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
82 {
83 nvlist_t *nvl = (nvlist_t *)arg;
84 nvlist_t *rsc = NULL, *fru = NULL;
85 nvlist_t **hcl, **topo_hcl;
86 uint_t n1, n2;
87 char *name, *name1, *name2;
88 char *id1, *id2;
89 int err, i;
90
91 if (topo_node_resource(node, &rsc, &err) < 0)
92 return (TOPO_WALK_NEXT);
93
94 err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
95
96 if (err != 0) {
97 nvlist_free(rsc);
98 return (TOPO_WALK_NEXT);
99 }
100
101 (void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
102 if (strcmp(name, "chip") != 0) {
103 nvlist_free(rsc);
104 return (TOPO_WALK_NEXT);
105 }
106
107 (void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
108
109 if (n1 != n2) {
110 nvlist_free(rsc);
111 return (TOPO_WALK_NEXT);
112 }
113
114 for (i = 0; i < n1; i++) {
115 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
116 &name1);
117 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
118 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
119 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
120 if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
121 nvlist_free(rsc);
122 return (TOPO_WALK_NEXT);
123 }
124 }
125
126 (void) topo_node_fru(node, &fru, NULL, &err);
127 if (fru != NULL) {
128 (void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
129 nvlist_free(fru);
130 }
131 nvlist_free(rsc);
132 return (TOPO_WALK_TERMINATE);
133 }
134
135 nvlist_t *
gmem_find_fault_fru(fmd_hdl_t * hdl,nvlist_t * nvl)136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
137 topo_hdl_t *thp;
138 topo_walk_t *twp;
139 int err;
140 fru_nvl = NULL;
141
142 if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
143 return (NULL);
144
145 if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
146 find_fault_fru, nvl, &err)) == NULL) {
147 fmd_hdl_topo_rele(hdl, thp);
148 return (NULL);
149 }
150
151 (void) topo_walk_step(twp, TOPO_WALK_CHILD);
152 topo_walk_fini(twp);
153 fmd_hdl_topo_rele(hdl, thp);
154 return (fru_nvl);
155 }
156
157 /*
158 * fault the FRU of the common detector between two DIMMs
159 */
160 void
gmem_gen_datapath_fault(fmd_hdl_t * hdl,nvlist_t * det)161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
162 {
163 char *name, *id;
164 nvlist_t **hcl1, **hcl;
165 uint_t n;
166 int i, j;
167 fmd_case_t *cp;
168 nvlist_t *fltlist, *rsrc;
169 nvlist_t *fru = NULL;
170
171 if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
172 return;
173
174 for (i = 0; i < n; i++) {
175 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
176 if (strcmp(name, "chip") == 0)
177 break;
178 }
179
180 n = i + 1;
181 hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
182 if (hcl == NULL)
183 return;
184
185 for (i = 0; i < n; i++) {
186 (void) nvlist_alloc(&hcl[i],
187 NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
188 }
189
190 for (i = 0, j = 0; i < n; i++) {
191 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
192 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
193 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
194 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
195 j++;
196 if (strcmp(name, "chip") == 0)
197 break;
198 }
199
200 if (nvlist_alloc(&rsrc, NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
201 for (i = 0; i < n; i++) {
202 nvlist_free(hcl[i]);
203 }
204 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
205 }
206
207 if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
208 nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
209 nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
210 nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
211 nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
212 for (i = 0; i < n; i++) {
213 nvlist_free(hcl[i]);
214 }
215 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
216 nvlist_free(rsrc);
217 }
218
219 fru = gmem_find_fault_fru(hdl, rsrc);
220 if (fru != NULL) {
221 cp = fmd_case_open(hdl, NULL);
222 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
223 100, fru, fru, fru);
224 fmd_case_add_suspect(hdl, cp, fltlist);
225 fmd_case_solve(hdl, cp);
226 nvlist_free(fru);
227 }
228
229 for (i = 0; i < n; i++) {
230 nvlist_free(hcl[i]);
231 }
232
233 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
234 nvlist_free(rsrc);
235 }
236
237 /*
238 * formula to conver an unhashed address to hashed address
239 * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
240 */
241 static void
gmem_to_hashed_addr(uint64_t * addr,uint64_t afar)242 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
243 {
244
245 *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
246 | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
247 }
248
249 /*
250 * check if a dimm has n CEs that have the same symbol-in-error
251 */
252 int
upos_thresh_check(gmem_dimm_t * dimm,uint16_t upos,uint32_t threshold)253 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
254 {
255 int i;
256 gmem_mq_t *ip, *next;
257 int count = 0;
258
259 for (i = 0; i < GMEM_MAX_CKWDS; i++) {
260 for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
261 ip = next) {
262 next = gmem_list_next(ip);
263 if (ip->mq_unit_position == upos) {
264 count++;
265 if (count >= threshold)
266 return (1);
267 }
268 }
269 }
270 return (0);
271 }
272
273 /*
274 * check if smaller number of retired pages > 1/16 of larger number of
275 * retired pages
276 */
277 int
check_bad_rw_retired_pages(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2)278 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
279 {
280 uint_t sret, lret;
281 double ratio;
282
283 sret = lret = 0;
284
285 if (d2->dimm_nretired < d1->dimm_nretired) {
286 sret = d2->dimm_nretired;
287 lret = d1->dimm_nretired;
288 } else if (d2->dimm_nretired > d1->dimm_nretired) {
289 sret = d1->dimm_nretired;
290 lret = d2->dimm_nretired;
291 } else
292 return (0);
293
294 ratio = lret * GMEM_MQ_RATIO;
295
296 if (sret > ratio) {
297 fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
298 sret, lret, ratio);
299 return (1);
300 }
301 return (0);
302 }
303
304 /*
305 * check bad rw on any two DIMMs. The check succeeds if
306 * - each DIMM has a n CEs which have the same symbol-in-error,
307 * - the smaller number of retired pages > 1/16 larger number of retired pages
308 */
309 static int
check_bad_rw_between_dimms(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2,uint16_t * rupos)310 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
311 uint16_t *rupos)
312 {
313 int i;
314 gmem_mq_t *ip, *next;
315 uint16_t upos;
316
317 for (i = 0; i < GMEM_MAX_CKWDS; i++) {
318 for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
319 ip = next) {
320 next = gmem_list_next(ip);
321 upos = ip->mq_unit_position;
322 if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
323 if (upos_thresh_check(d2, upos,
324 gmem.gm_nupos)) {
325 if (check_bad_rw_retired_pages(hdl,
326 d1, d2)) {
327 *rupos = upos;
328 return (1);
329 }
330 }
331 }
332 }
333 }
334
335 return (0);
336 }
337
338 static void
bad_reader_writer_check(fmd_hdl_t * hdl,nvlist_t * det,gmem_dimm_t * ce_dimm)339 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
340 {
341 gmem_dimm_t *d, *next;
342 uint16_t upos;
343
344 for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
345 next = gmem_list_next(d);
346 if (d == ce_dimm)
347 continue;
348 if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
349 continue;
350 if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
351 gmem_gen_datapath_fault(hdl, det);
352 gmem_save_symbol_error(hdl, ce_dimm, upos);
353 fmd_hdl_debug(hdl,
354 "check_bad_rw_dimms succeeded: %s %s\n",
355 ce_dimm->dimm_serial, d->dimm_serial);
356 return;
357 }
358 }
359 }
360
361 /*
362 * rule 5a checking. The check succeeds if
363 * - nretired >= 512
364 * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
365 */
366 static void
ce_thresh_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)367 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
368 {
369 nvlist_t *flt, *rsrc;
370 fmd_case_t *cp;
371 uint_t nret;
372 uint64_t delta_addr = 0;
373
374 if (dimm->dimm_flags & GMEM_F_FAULTING)
375 return;
376
377 nret = dimm->dimm_nretired;
378
379 if (nret < gmem.gm_low_ce_thresh)
380 return;
381
382 if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
383 delta_addr =
384 (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
385 (nret - 1);
386
387 if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
388
389 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
390 dimm->dimm_flags |= GMEM_F_FAULTING;
391 gmem_dimm_dirty(hdl, dimm);
392
393 cp = fmd_case_open(hdl, NULL);
394 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
395 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
396 GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
397 fmd_case_add_suspect(hdl, cp, flt);
398 fmd_case_solve(hdl, cp);
399 nvlist_free(rsrc);
400 }
401 }
402
403 /*
404 * rule 5b checking. The check succeeds if more than 120
405 * non-intermittent CEs are reported against one symbol
406 * position of one afar in 72 hours
407 */
408 static void
mq_5b_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)409 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
410 {
411 nvlist_t *flt, *rsrc;
412 fmd_case_t *cp;
413 gmem_mq_t *ip, *next;
414 int cw;
415
416 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
417 for (ip = gmem_list_next(&dimm->mq_root[cw]);
418 ip != NULL; ip = next) {
419 next = gmem_list_next(ip);
420 if (ip->mq_dupce_count >= gmem.gm_dupce) {
421 fmd_hdl_debug(hdl,
422 "mq_5b_check succeeded: duplicate CE=%d",
423 ip->mq_dupce_count);
424 cp = fmd_case_open(hdl, NULL);
425 rsrc = gmem_find_dimm_rsc(hdl,
426 dimm->dimm_serial);
427 flt = fmd_nvl_create_fault(hdl,
428 GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
429 NULL, gmem_dimm_fru(dimm), rsrc);
430 dimm->dimm_flags |= GMEM_F_FAULTING;
431 gmem_dimm_dirty(hdl, dimm);
432 fmd_case_add_suspect(hdl, cp, flt);
433 fmd_case_solve(hdl, cp);
434 nvlist_free(rsrc);
435 return;
436 }
437 }
438 }
439 }
440
441 /*
442 * delete the expired duplicate CE time stamps
443 */
444 static void
mq_prune_dup(fmd_hdl_t * hdl,gmem_mq_t * ip,uint64_t now)445 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
446 {
447 tstamp_t *tsp, *next;
448
449 for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
450 tsp = next) {
451 next = gmem_list_next(tsp);
452 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
453 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
454 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
455 ip->mq_dupce_count--;
456 }
457 }
458 }
459
460 static void
mq_update(fmd_hdl_t * hdl,fmd_event_t * ep,gmem_mq_t * ip,uint64_t now)461 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
462 {
463 tstamp_t *tsp;
464
465 ip->mq_tstamp = now;
466 ip->mq_ep = ep;
467 if (fmd_serd_exists(hdl, ip->mq_serdnm))
468 fmd_serd_destroy(hdl, ip->mq_serdnm);
469
470 fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
471 (void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
472
473 tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
474 tsp->tstamp = now;
475 gmem_list_append(&ip->mq_dupce_tstamp, tsp);
476 ip->mq_dupce_count++;
477 }
478
479 /*
480 * Create a fresh index block for MQSC CE correlation.
481 */
482 gmem_mq_t *
mq_create(fmd_hdl_t * hdl,fmd_event_t * ep,uint64_t afar,uint16_t upos,uint16_t ckwd,uint64_t now)483 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
484 uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
485 {
486 gmem_mq_t *cp;
487 tstamp_t *tsp;
488
489 cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
490 cp->mq_tstamp = now;
491 cp->mq_ckwd = ckwd;
492 cp->mq_phys_addr = afar;
493 cp->mq_unit_position = upos;
494 cp->mq_ep = ep;
495 cp->mq_serdnm =
496 gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
497
498 tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
499 tsp->tstamp = now;
500 gmem_list_append(&cp->mq_dupce_tstamp, tsp);
501 cp->mq_dupce_count = 1;
502
503 /*
504 * Create SERD to keep this event from being removed
505 * by fmd which may not know there is an event pointer
506 * saved here. This SERD is *never* meant to fire.
507 */
508 if (fmd_serd_exists(hdl, cp->mq_serdnm))
509 fmd_serd_destroy(hdl, cp->mq_serdnm);
510
511 fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
512 (void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
513
514 return (cp);
515 }
516
517 gmem_mq_t *
mq_destroy(fmd_hdl_t * hdl,gmem_list_t * lp,gmem_mq_t * ip)518 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
519 {
520 gmem_mq_t *jp = gmem_list_next(ip);
521 tstamp_t *tsp, *next;
522
523
524 if (ip->mq_serdnm != NULL) {
525 if (fmd_serd_exists(hdl, ip->mq_serdnm))
526 fmd_serd_destroy(hdl, ip->mq_serdnm);
527 fmd_hdl_strfree(hdl, ip->mq_serdnm);
528 ip->mq_serdnm = NULL;
529 }
530
531 for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
532 tsp = next) {
533 next = gmem_list_next(tsp);
534 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
535 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
536 }
537
538 gmem_list_delete(lp, &ip->mq_l);
539 fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
540
541 return (jp);
542 }
543
544
545 /*
546 * Add an index block for a new CE, sorted
547 * a) by ascending unit position
548 * b) order of arrival (~= time order)
549 */
550 void
mq_add(fmd_hdl_t * hdl,gmem_dimm_t * dimm,fmd_event_t * ep,uint64_t afar,uint16_t unit_position,uint16_t ckwd,uint64_t now)551 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
552 uint64_t afar, uint16_t unit_position, uint16_t ckwd,
553 uint64_t now)
554 {
555 gmem_mq_t *ip, *jp;
556 int cw = (int)ckwd;
557
558 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
559 if (ip->mq_unit_position > unit_position) {
560 /* list is in unit position order */
561 break;
562 } else if (ip->mq_unit_position == unit_position &&
563 ip->mq_phys_addr == afar) {
564 /*
565 * Found a duplicate cw, unit_position, and afar.
566 * Delete this node, to be superseded by the new
567 * node added below.
568 * update the mq_t structure
569 */
570 mq_update(hdl, ep, ip, now);
571 return;
572 } else {
573 ip = gmem_list_next(ip);
574 }
575 }
576
577 jp = mq_create(hdl, ep, afar, unit_position, cw, now);
578 if (ip == NULL)
579 gmem_list_append(&dimm->mq_root[cw], jp);
580 else
581 gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
582 }
583
584 /*
585 * Prune the MQSC index lists (one for each checkword), by deleting
586 * outdated index blocks from each list.
587 */
588
589 void
mq_prune(fmd_hdl_t * hdl,gmem_dimm_t * dimm,uint64_t now)590 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
591 {
592 gmem_mq_t *ip;
593 int cw;
594
595 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
596 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
597 if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
598 /*
599 * This event has timed out - delete the
600 * mq block as well as serd for the event.
601 */
602 ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
603 } else {
604 mq_prune_dup(hdl, ip, now);
605 /* tstamp < now - ce_t */
606 ip = gmem_list_next(ip);
607 }
608 } /* per checkword */
609 } /* cw = 0...3 */
610 }
611
612 /*
613 * Check the MQSC index lists (one for each checkword) by making a
614 * complete pass through each list, checking if the criteria for
615 * Rule 4A has been met. Rule 4A checking is done for each checkword.
616 *
617 * Rule 4A: fault a DIMM "whenever Solaris reports two or more CEs from
618 * two or more different physical addresses on each of two or more different
619 * bit positions from the same DIMM within 72 hours of each other, and all
620 * the addresses are in the same relative checkword (that is, the AFARs
621 * are all the same modulo 64). [Note: This means at least 4 CEs; two
622 * from one bit position, with unique addresses, and two from another,
623 * also with unique addresses, and the lower 6 bits of all the addresses
624 * are the same."
625 */
626
627 void
mq_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)628 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
629 {
630 int upos_pairs, curr_upos, cw, i, j;
631 nvlist_t *flt, *rsc;
632 typedef struct upos_pair {
633 int upos;
634 gmem_mq_t *mq1;
635 gmem_mq_t *mq2;
636 } upos_pair_t;
637 upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
638 gmem_mq_t *ip;
639
640 /*
641 * Each upos_array[] member represents a pair of CEs for the same
642 * unit position (symbol) which is a 4 bit nibble.
643 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
644 * for rule 4A, and same DRAM for rule 4B) for a violation - this
645 * is why CE pairs are tracked.
646 */
647 upos_pairs = 0;
648 upos_array[0].mq1 = NULL;
649
650 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
651 i = upos_pairs;
652 curr_upos = -1;
653
654 /*
655 * mq_root[] is an array of cumulative lists of CEs
656 * indexed by checkword where the list is in unit position
657 * order. Loop through checking for duplicate unit position
658 * entries (filled in at mq_create()).
659 * The upos_array[] is filled in each time a duplicate
660 * unit position is found; the first time through the loop
661 * of a unit position sets curr_upos but does not fill in
662 * upos_array[] until the second symbol is found.
663 */
664 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
665 ip = gmem_list_next(ip)) {
666 if (curr_upos != ip->mq_unit_position) {
667 /* Set initial current position */
668 curr_upos = ip->mq_unit_position;
669 } else if (i > upos_pairs &&
670 curr_upos == upos_array[i-1].upos) {
671 /*
672 * Only keep track of CE pairs; skip
673 * triples, quads, etc...
674 */
675 continue;
676 } else if (upos_array[i].mq1 == NULL) {
677 /* Have a pair. Add to upos_array[] */
678 fmd_hdl_debug(hdl, "pair:upos=%d",
679 curr_upos);
680 upos_array[i].upos = curr_upos;
681 upos_array[i].mq1 = gmem_list_prev(ip);
682 upos_array[i].mq2 = ip;
683 upos_array[++i].mq1 = NULL;
684 }
685 }
686 if (i - upos_pairs >= 2) {
687 /* Rule 4A violation */
688 rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
689 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
690 GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
691 for (j = upos_pairs; j < i; j++) {
692 fmd_case_add_ereport(hdl,
693 dimm->dimm_case.cc_cp,
694 upos_array[j].mq1->mq_ep);
695 fmd_case_add_ereport(hdl,
696 dimm->dimm_case.cc_cp,
697 upos_array[j].mq2->mq_ep);
698 }
699 dimm->dimm_flags |= GMEM_F_FAULTING;
700 gmem_dimm_dirty(hdl, dimm);
701 fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
702 fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
703 nvlist_free(rsc);
704 return;
705 }
706 upos_pairs = i;
707 assert(upos_pairs < 16);
708 }
709 }
710
711 /*ARGSUSED*/
712 gmem_evdisp_t
gmem_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)713 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
714 {
715 uint16_t symbol_pos, cw;
716 uint64_t phyaddr, offset, addr;
717 uint32_t filter_ratio = 0;
718 gmem_dimm_t *dimm;
719 gmem_page_t *page;
720 nvlist_t *fru = NULL;
721 nvlist_t *topo_rsc = NULL;
722 nvlist_t *rsrc, *det;
723 const char *uuid;
724 ce_dispact_t type;
725 boolean_t diagnose;
726 char *sn;
727 int err, rc;
728 uint64_t *now;
729 uint_t nelem;
730 int skip_error = 0;
731
732 err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
733 &diagnose);
734 if (err != 0 || diagnose == 0)
735 return (GMEM_EVD_UNUSED);
736
737 if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
738 &phyaddr) != 0) ||
739 (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
740 &offset) != 0)) {
741 fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
742 return (GMEM_EVD_BAD);
743 }
744
745 fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
746
747 if ((page = gmem_page_lookup(phyaddr)) != NULL &&
748 page->page_case.cc_cp != NULL &&
749 fmd_case_solved(hdl, page->page_case.cc_cp))
750 return (GMEM_EVD_REDUND);
751
752 if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
753 &rsrc) != 0 ||
754 nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
755 fmd_hdl_debug(hdl, "Can't get dimm serial\n");
756 return (GMEM_EVD_BAD);
757 }
758
759 fmd_hdl_debug(hdl, "serial %s", sn);
760
761 if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
762 return (GMEM_EVD_BAD);
763
764 /*
765 * Find dimm fru by serial number.
766 */
767 fru = gmem_find_dimm_fru(hdl, sn);
768
769 if (fru == NULL) {
770 fmd_hdl_debug(hdl, "Dimm is not present\n");
771 return (GMEM_EVD_UNUSED);
772 }
773
774 if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
775 (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
776 nvlist_free(fru);
777 return (GMEM_EVD_UNUSED);
778 }
779
780 if (dimm->dimm_case.cc_cp == NULL) {
781 dimm->dimm_case.cc_cp = gmem_case_create(hdl,
782 &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
783 }
784
785 /*
786 * Add to MQSC correlation lists all CEs which pass validity
787 * checks above. If there is no symbol_pos & relative ckword
788 * in the ereport, skip rule 4A checking.
789 */
790
791 err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
792 &symbol_pos);
793 err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
794
795 if (err == 0) {
796 fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
797
798 if (nvlist_lookup_uint64_array(nvl,
799 "__tod", &now, &nelem) == 0) {
800 skip_error = gmem_check_symbol_error(hdl, dimm,
801 symbol_pos);
802
803 if (!skip_error ||
804 !(dimm->dimm_flags & GMEM_F_FAULTING))
805 mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
806 cw, *now);
807
808 mq_prune(hdl, dimm, *now);
809
810 if (!skip_error)
811 bad_reader_writer_check(hdl, det, dimm);
812 if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
813 mq_check(hdl, dimm);
814 mq_5b_check(hdl, dimm);
815 }
816 }
817 }
818
819 type = gmem_mem_name2type(strstr(class, "mem"));
820
821 switch (type) {
822 case CE_DISP_UNKNOWN:
823 GMEM_STAT_BUMP(ce_unknown);
824 nvlist_free(fru);
825 return (GMEM_EVD_UNUSED);
826 case CE_DISP_INTERMITTENT:
827 GMEM_STAT_BUMP(ce_interm);
828 nvlist_free(fru);
829 return (GMEM_EVD_UNUSED);
830 case CE_DISP_PERS:
831 GMEM_STAT_BUMP(ce_clearable_persis);
832 break;
833 case CE_DISP_STICKY:
834 GMEM_STAT_BUMP(ce_sticky);
835 break;
836 default:
837 nvlist_free(fru);
838 return (GMEM_EVD_BAD);
839 }
840
841 if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
842 nvlist_free(fru);
843 return (GMEM_EVD_REDUND);
844 }
845
846 if (page == NULL) {
847 page = gmem_page_create(hdl, fru, phyaddr, offset);
848 if (page == NULL) {
849 nvlist_free(fru);
850 return (GMEM_EVD_UNUSED);
851 }
852 }
853
854 nvlist_free(fru);
855
856 if (page->page_case.cc_cp == NULL) {
857 page->page_case.cc_cp = gmem_case_create(hdl,
858 &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
859 }
860
861 switch (type) {
862 case CE_DISP_PERS:
863 fmd_hdl_debug(hdl, "adding persistent event to CE serd");
864 if (page->page_case.cc_serdnm == NULL)
865 gmem_page_serd_create(hdl, page, nvl);
866
867 filter_ratio = gmem_get_serd_filter_ratio(nvl);
868
869 fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
870
871 if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
872 filter_ratio, ep) == FMD_B_FALSE) {
873 return (GMEM_EVD_OK); /* engine hasn't fired */
874 }
875
876 fmd_hdl_debug(hdl, "ce page serd fired\n");
877 fmd_case_add_serd(hdl, page->page_case.cc_cp,
878 page->page_case.cc_serdnm);
879 fmd_serd_reset(hdl, page->page_case.cc_serdnm);
880 break; /* to retire */
881
882 case CE_DISP_STICKY:
883 fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
884 break; /* to retire */
885 }
886
887
888 topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
889 rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
890 ep, phyaddr, offset);
891
892 if (rc) {
893 gmem_to_hashed_addr(&addr, phyaddr);
894
895 if (addr > dimm->dimm_phys_addr_hi)
896 dimm->dimm_phys_addr_hi = addr;
897 if (addr < dimm->dimm_phys_addr_low)
898 dimm->dimm_phys_addr_low = addr;
899
900 dimm->dimm_nretired++;
901 dimm->dimm_retstat.fmds_value.ui64++;
902 gmem_dimm_dirty(hdl, dimm);
903 ce_thresh_check(hdl, dimm);
904 }
905 return (GMEM_EVD_OK);
906 }
907
908 void
gmem_dimm_close(fmd_hdl_t * hdl,void * arg)909 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
910 {
911 gmem_dimm_destroy(hdl, arg);
912 }
913