1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25
26 /*
27 * Ereport-handling routines for memory errors
28 */
29
30 #include <gmem_mem.h>
31 #include <gmem_dimm.h>
32 #include <gmem_page.h>
33 #include <gmem.h>
34
35 #include <strings.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <fm/fmd_api.h>
40 #include <fm/libtopo.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/async.h>
43 #include <sys/errclassify.h>
44
45 #define OFFBIT 0xFFFFFFFFFFFC07FFULL
46 #define BIT28_32 0x00000001F0000000ULL
47 #define BIT13_17 0x000000000003E000ULL
48 #define BIT18_19 0x00000000000C0000ULL
49 #define BIT11_12 0x0000000000001800ULL
50
51 struct ce_name2type {
52 const char *name;
53 ce_dispact_t type;
54 };
55
56 nvlist_t *fru_nvl;
57
58 static ce_dispact_t
gmem_mem_name2type(const char * name)59 gmem_mem_name2type(const char *name)
60 {
61 static const struct ce_name2type new[] = {
62 { "mem-unk", CE_DISP_UNKNOWN },
63 { "mem-is", CE_DISP_INTERMITTENT },
64 { "mem-cs", CE_DISP_PERS },
65 { "mem-ss", CE_DISP_STICKY },
66 { NULL }
67 };
68 const struct ce_name2type *names = &new[0];
69 const struct ce_name2type *tp;
70
71 for (tp = names; tp->name != NULL; tp++) {
72 if (strcasecmp(name, tp->name) == 0)
73 return (tp->type);
74 }
75
76 return (CE_DISP_UNKNOWN);
77 }
78
79 /*ARGSUSED*/
80 static int
find_fault_fru(topo_hdl_t * thp,tnode_t * node,void * arg)81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
82 {
83 nvlist_t *nvl = (nvlist_t *)arg;
84 nvlist_t *rsc = NULL, *fru = NULL;
85 nvlist_t **hcl, **topo_hcl;
86 uint_t n1, n2;
87 char *name, *name1, *name2;
88 char *id1, *id2;
89 int err, i;
90
91 if (topo_node_resource(node, &rsc, &err) < 0)
92 return (TOPO_WALK_NEXT);
93
94 err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
95
96 if (err != 0) {
97 nvlist_free(rsc);
98 return (TOPO_WALK_NEXT);
99 }
100
101 (void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
102 if (strcmp(name, "chip") != 0) {
103 nvlist_free(rsc);
104 return (TOPO_WALK_NEXT);
105 }
106
107 (void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
108
109 if (n1 != n2) {
110 nvlist_free(rsc);
111 return (TOPO_WALK_NEXT);
112 }
113
114 for (i = 0; i < n1; i++) {
115 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
116 &name1);
117 (void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
118 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
119 (void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
120 if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
121 nvlist_free(rsc);
122 return (TOPO_WALK_NEXT);
123 }
124 }
125
126 (void) topo_node_fru(node, &fru, NULL, &err);
127 if (fru != NULL) {
128 (void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
129 nvlist_free(fru);
130 }
131 nvlist_free(rsc);
132 return (TOPO_WALK_TERMINATE);
133 }
134
135 nvlist_t *
gmem_find_fault_fru(fmd_hdl_t * hdl,nvlist_t * nvl)136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
137 topo_hdl_t *thp;
138 topo_walk_t *twp;
139 int err;
140 fru_nvl = NULL;
141
142 if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
143 return (NULL);
144
145 if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
146 find_fault_fru, nvl, &err)) == NULL) {
147 fmd_hdl_topo_rele(hdl, thp);
148 return (NULL);
149 }
150
151 (void) topo_walk_step(twp, TOPO_WALK_CHILD);
152 topo_walk_fini(twp);
153 fmd_hdl_topo_rele(hdl, thp);
154 return (fru_nvl);
155 }
156
157 /*
158 * fault the FRU of the common detector between two DIMMs
159 */
160 void
gmem_gen_datapath_fault(fmd_hdl_t * hdl,nvlist_t * det)161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
162 {
163 char *name, *id;
164 nvlist_t **hcl1, **hcl;
165 uint_t n;
166 int i, j;
167 fmd_case_t *cp;
168 nvlist_t *fltlist, *rsrc;
169 nvlist_t *fru = NULL;
170
171 if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
172 return;
173
174 for (i = 0; i < n; i++) {
175 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
176 if (strcmp(name, "chip") == 0)
177 break;
178 }
179
180 n = i + 1;
181 hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
182 if (hcl == NULL)
183 return;
184
185 for (i = 0; i < n; i++) {
186 (void) nvlist_alloc(&hcl[i],
187 NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
188 }
189
190 for (i = 0, j = 0; i < n; i++) {
191 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
192 (void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
193 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
194 (void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
195 j++;
196 if (strcmp(name, "chip") == 0)
197 break;
198 }
199
200 if (nvlist_alloc(&rsrc, NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
201 for (i = 0; i < n; i++) {
202 if (hcl[i] != NULL)
203 nvlist_free(hcl[i]);
204 }
205 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
206 }
207
208 if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
209 nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
210 nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
211 nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
212 nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
213 for (i = 0; i < n; i++) {
214 if (hcl[i] != NULL)
215 nvlist_free(hcl[i]);
216 }
217 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
218 nvlist_free(rsrc);
219 }
220
221 fru = gmem_find_fault_fru(hdl, rsrc);
222 if (fru != NULL) {
223 cp = fmd_case_open(hdl, NULL);
224 fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
225 100, fru, fru, fru);
226 fmd_case_add_suspect(hdl, cp, fltlist);
227 fmd_case_solve(hdl, cp);
228 nvlist_free(fru);
229 }
230
231 for (i = 0; i < n; i++) {
232 if (hcl[i] != NULL)
233 nvlist_free(hcl[i]);
234 }
235
236 fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
237 nvlist_free(rsrc);
238 }
239
240 /*
241 * formula to conver an unhashed address to hashed address
242 * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
243 */
244 static void
gmem_to_hashed_addr(uint64_t * addr,uint64_t afar)245 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
246 {
247
248 *addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
249 | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
250 }
251
252 /*
253 * check if a dimm has n CEs that have the same symbol-in-error
254 */
255 int
upos_thresh_check(gmem_dimm_t * dimm,uint16_t upos,uint32_t threshold)256 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
257 {
258 int i;
259 gmem_mq_t *ip, *next;
260 int count = 0;
261
262 for (i = 0; i < GMEM_MAX_CKWDS; i++) {
263 for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
264 ip = next) {
265 next = gmem_list_next(ip);
266 if (ip->mq_unit_position == upos) {
267 count++;
268 if (count >= threshold)
269 return (1);
270 }
271 }
272 }
273 return (0);
274 }
275
276 /*
277 * check if smaller number of retired pages > 1/16 of larger number of
278 * retired pages
279 */
280 int
check_bad_rw_retired_pages(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2)281 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
282 {
283 uint_t sret, lret;
284 double ratio;
285
286 sret = lret = 0;
287
288 if (d2->dimm_nretired < d1->dimm_nretired) {
289 sret = d2->dimm_nretired;
290 lret = d1->dimm_nretired;
291 } else if (d2->dimm_nretired > d1->dimm_nretired) {
292 sret = d1->dimm_nretired;
293 lret = d2->dimm_nretired;
294 } else
295 return (0);
296
297 ratio = lret * GMEM_MQ_RATIO;
298
299 if (sret > ratio) {
300 fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
301 sret, lret, ratio);
302 return (1);
303 }
304 return (0);
305 }
306
307 /*
308 * check bad rw on any two DIMMs. The check succeeds if
309 * - each DIMM has a n CEs which have the same symbol-in-error,
310 * - the smaller number of retired pages > 1/16 larger number of retired pages
311 */
312 static int
check_bad_rw_between_dimms(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2,uint16_t * rupos)313 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
314 uint16_t *rupos)
315 {
316 int i;
317 gmem_mq_t *ip, *next;
318 uint16_t upos;
319
320 for (i = 0; i < GMEM_MAX_CKWDS; i++) {
321 for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
322 ip = next) {
323 next = gmem_list_next(ip);
324 upos = ip->mq_unit_position;
325 if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
326 if (upos_thresh_check(d2, upos,
327 gmem.gm_nupos)) {
328 if (check_bad_rw_retired_pages(hdl,
329 d1, d2)) {
330 *rupos = upos;
331 return (1);
332 }
333 }
334 }
335 }
336 }
337
338 return (0);
339 }
340
341 static void
bad_reader_writer_check(fmd_hdl_t * hdl,nvlist_t * det,gmem_dimm_t * ce_dimm)342 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
343 {
344 gmem_dimm_t *d, *next;
345 uint16_t upos;
346
347 for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
348 next = gmem_list_next(d);
349 if (d == ce_dimm)
350 continue;
351 if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
352 continue;
353 if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
354 gmem_gen_datapath_fault(hdl, det);
355 gmem_save_symbol_error(hdl, ce_dimm, upos);
356 fmd_hdl_debug(hdl,
357 "check_bad_rw_dimms succeeded: %s %s\n",
358 ce_dimm->dimm_serial, d->dimm_serial);
359 return;
360 }
361 }
362 }
363
364 /*
365 * rule 5a checking. The check succeeds if
366 * - nretired >= 512
367 * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
368 */
369 static void
ce_thresh_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)370 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
371 {
372 nvlist_t *flt, *rsrc;
373 fmd_case_t *cp;
374 uint_t nret;
375 uint64_t delta_addr = 0;
376
377 if (dimm->dimm_flags & GMEM_F_FAULTING)
378 return;
379
380 nret = dimm->dimm_nretired;
381
382 if (nret < gmem.gm_low_ce_thresh)
383 return;
384
385 if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
386 delta_addr =
387 (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
388 (nret - 1);
389
390 if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
391
392 fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
393 dimm->dimm_flags |= GMEM_F_FAULTING;
394 gmem_dimm_dirty(hdl, dimm);
395
396 cp = fmd_case_open(hdl, NULL);
397 rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
398 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
399 GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
400 fmd_case_add_suspect(hdl, cp, flt);
401 fmd_case_solve(hdl, cp);
402 if (rsrc != NULL)
403 nvlist_free(rsrc);
404 }
405 }
406
407 /*
408 * rule 5b checking. The check succeeds if more than 120
409 * non-intermittent CEs are reported against one symbol
410 * position of one afar in 72 hours
411 */
412 static void
mq_5b_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)413 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
414 {
415 nvlist_t *flt, *rsrc;
416 fmd_case_t *cp;
417 gmem_mq_t *ip, *next;
418 int cw;
419
420 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
421 for (ip = gmem_list_next(&dimm->mq_root[cw]);
422 ip != NULL; ip = next) {
423 next = gmem_list_next(ip);
424 if (ip->mq_dupce_count >= gmem.gm_dupce) {
425 fmd_hdl_debug(hdl,
426 "mq_5b_check succeeded: duplicate CE=%d",
427 ip->mq_dupce_count);
428 cp = fmd_case_open(hdl, NULL);
429 rsrc = gmem_find_dimm_rsc(hdl,
430 dimm->dimm_serial);
431 flt = fmd_nvl_create_fault(hdl,
432 GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
433 NULL, gmem_dimm_fru(dimm), rsrc);
434 dimm->dimm_flags |= GMEM_F_FAULTING;
435 gmem_dimm_dirty(hdl, dimm);
436 fmd_case_add_suspect(hdl, cp, flt);
437 fmd_case_solve(hdl, cp);
438 if (rsrc != NULL)
439 nvlist_free(rsrc);
440 return;
441 }
442 }
443 }
444 }
445
446 /*
447 * delete the expired duplicate CE time stamps
448 */
449 static void
mq_prune_dup(fmd_hdl_t * hdl,gmem_mq_t * ip,uint64_t now)450 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
451 {
452 tstamp_t *tsp, *next;
453
454 for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
455 tsp = next) {
456 next = gmem_list_next(tsp);
457 if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
458 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
459 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
460 ip->mq_dupce_count--;
461 }
462 }
463 }
464
465 static void
mq_update(fmd_hdl_t * hdl,fmd_event_t * ep,gmem_mq_t * ip,uint64_t now)466 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
467 {
468 tstamp_t *tsp;
469
470 ip->mq_tstamp = now;
471 ip->mq_ep = ep;
472 if (fmd_serd_exists(hdl, ip->mq_serdnm))
473 fmd_serd_destroy(hdl, ip->mq_serdnm);
474
475 fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
476 (void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
477
478 tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
479 tsp->tstamp = now;
480 gmem_list_append(&ip->mq_dupce_tstamp, tsp);
481 ip->mq_dupce_count++;
482 }
483
484 /*
485 * Create a fresh index block for MQSC CE correlation.
486 */
487 gmem_mq_t *
mq_create(fmd_hdl_t * hdl,fmd_event_t * ep,uint64_t afar,uint16_t upos,uint16_t ckwd,uint64_t now)488 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
489 uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
490 {
491 gmem_mq_t *cp;
492 tstamp_t *tsp;
493
494 cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
495 cp->mq_tstamp = now;
496 cp->mq_ckwd = ckwd;
497 cp->mq_phys_addr = afar;
498 cp->mq_unit_position = upos;
499 cp->mq_ep = ep;
500 cp->mq_serdnm =
501 gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
502
503 tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
504 tsp->tstamp = now;
505 gmem_list_append(&cp->mq_dupce_tstamp, tsp);
506 cp->mq_dupce_count = 1;
507
508 /*
509 * Create SERD to keep this event from being removed
510 * by fmd which may not know there is an event pointer
511 * saved here. This SERD is *never* meant to fire.
512 */
513 if (fmd_serd_exists(hdl, cp->mq_serdnm))
514 fmd_serd_destroy(hdl, cp->mq_serdnm);
515
516 fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
517 (void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
518
519 return (cp);
520 }
521
522 gmem_mq_t *
mq_destroy(fmd_hdl_t * hdl,gmem_list_t * lp,gmem_mq_t * ip)523 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
524 {
525 gmem_mq_t *jp = gmem_list_next(ip);
526 tstamp_t *tsp, *next;
527
528
529 if (ip->mq_serdnm != NULL) {
530 if (fmd_serd_exists(hdl, ip->mq_serdnm))
531 fmd_serd_destroy(hdl, ip->mq_serdnm);
532 fmd_hdl_strfree(hdl, ip->mq_serdnm);
533 ip->mq_serdnm = NULL;
534 }
535
536 for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
537 tsp = next) {
538 next = gmem_list_next(tsp);
539 gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
540 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
541 }
542
543 gmem_list_delete(lp, &ip->mq_l);
544 fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
545
546 return (jp);
547 }
548
549
550 /*
551 * Add an index block for a new CE, sorted
552 * a) by ascending unit position
553 * b) order of arrival (~= time order)
554 */
555 void
mq_add(fmd_hdl_t * hdl,gmem_dimm_t * dimm,fmd_event_t * ep,uint64_t afar,uint16_t unit_position,uint16_t ckwd,uint64_t now)556 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
557 uint64_t afar, uint16_t unit_position, uint16_t ckwd,
558 uint64_t now)
559 {
560 gmem_mq_t *ip, *jp;
561 int cw = (int)ckwd;
562
563 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
564 if (ip->mq_unit_position > unit_position) {
565 /* list is in unit position order */
566 break;
567 } else if (ip->mq_unit_position == unit_position &&
568 ip->mq_phys_addr == afar) {
569 /*
570 * Found a duplicate cw, unit_position, and afar.
571 * Delete this node, to be superseded by the new
572 * node added below.
573 * update the mq_t structure
574 */
575 mq_update(hdl, ep, ip, now);
576 return;
577 } else {
578 ip = gmem_list_next(ip);
579 }
580 }
581
582 jp = mq_create(hdl, ep, afar, unit_position, cw, now);
583 if (ip == NULL)
584 gmem_list_append(&dimm->mq_root[cw], jp);
585 else
586 gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
587 }
588
589 /*
590 * Prune the MQSC index lists (one for each checkword), by deleting
591 * outdated index blocks from each list.
592 */
593
594 void
mq_prune(fmd_hdl_t * hdl,gmem_dimm_t * dimm,uint64_t now)595 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
596 {
597 gmem_mq_t *ip;
598 int cw;
599
600 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
601 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
602 if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
603 /*
604 * This event has timed out - delete the
605 * mq block as well as serd for the event.
606 */
607 ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
608 } else {
609 mq_prune_dup(hdl, ip, now);
610 /* tstamp < now - ce_t */
611 ip = gmem_list_next(ip);
612 }
613 } /* per checkword */
614 } /* cw = 0...3 */
615 }
616
617 /*
618 * Check the MQSC index lists (one for each checkword) by making a
619 * complete pass through each list, checking if the criteria for
620 * Rule 4A has been met. Rule 4A checking is done for each checkword.
621 *
622 * Rule 4A: fault a DIMM "whenever Solaris reports two or more CEs from
623 * two or more different physical addresses on each of two or more different
624 * bit positions from the same DIMM within 72 hours of each other, and all
625 * the addresses are in the same relative checkword (that is, the AFARs
626 * are all the same modulo 64). [Note: This means at least 4 CEs; two
627 * from one bit position, with unique addresses, and two from another,
628 * also with unique addresses, and the lower 6 bits of all the addresses
629 * are the same."
630 */
631
632 void
mq_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)633 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
634 {
635 int upos_pairs, curr_upos, cw, i, j;
636 nvlist_t *flt, *rsc;
637 typedef struct upos_pair {
638 int upos;
639 gmem_mq_t *mq1;
640 gmem_mq_t *mq2;
641 } upos_pair_t;
642 upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
643 gmem_mq_t *ip;
644
645 /*
646 * Each upos_array[] member represents a pair of CEs for the same
647 * unit position (symbol) which is a 4 bit nibble.
648 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
649 * for rule 4A, and same DRAM for rule 4B) for a violation - this
650 * is why CE pairs are tracked.
651 */
652 upos_pairs = 0;
653 upos_array[0].mq1 = NULL;
654
655 for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
656 i = upos_pairs;
657 curr_upos = -1;
658
659 /*
660 * mq_root[] is an array of cumulative lists of CEs
661 * indexed by checkword where the list is in unit position
662 * order. Loop through checking for duplicate unit position
663 * entries (filled in at mq_create()).
664 * The upos_array[] is filled in each time a duplicate
665 * unit position is found; the first time through the loop
666 * of a unit position sets curr_upos but does not fill in
667 * upos_array[] until the second symbol is found.
668 */
669 for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
670 ip = gmem_list_next(ip)) {
671 if (curr_upos != ip->mq_unit_position) {
672 /* Set initial current position */
673 curr_upos = ip->mq_unit_position;
674 } else if (i > upos_pairs &&
675 curr_upos == upos_array[i-1].upos) {
676 /*
677 * Only keep track of CE pairs; skip
678 * triples, quads, etc...
679 */
680 continue;
681 } else if (upos_array[i].mq1 == NULL) {
682 /* Have a pair. Add to upos_array[] */
683 fmd_hdl_debug(hdl, "pair:upos=%d",
684 curr_upos);
685 upos_array[i].upos = curr_upos;
686 upos_array[i].mq1 = gmem_list_prev(ip);
687 upos_array[i].mq2 = ip;
688 upos_array[++i].mq1 = NULL;
689 }
690 }
691 if (i - upos_pairs >= 2) {
692 /* Rule 4A violation */
693 rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
694 flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
695 GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
696 for (j = upos_pairs; j < i; j++) {
697 fmd_case_add_ereport(hdl,
698 dimm->dimm_case.cc_cp,
699 upos_array[j].mq1->mq_ep);
700 fmd_case_add_ereport(hdl,
701 dimm->dimm_case.cc_cp,
702 upos_array[j].mq2->mq_ep);
703 }
704 dimm->dimm_flags |= GMEM_F_FAULTING;
705 gmem_dimm_dirty(hdl, dimm);
706 fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
707 fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
708 if (rsc != NULL)
709 nvlist_free(rsc);
710 return;
711 }
712 upos_pairs = i;
713 assert(upos_pairs < 16);
714 }
715 }
716
717 /*ARGSUSED*/
718 gmem_evdisp_t
gmem_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)719 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
720 {
721 uint16_t symbol_pos, cw;
722 uint64_t phyaddr, offset, addr;
723 uint32_t filter_ratio = 0;
724 gmem_dimm_t *dimm;
725 gmem_page_t *page;
726 nvlist_t *fru = NULL;
727 nvlist_t *topo_rsc = NULL;
728 nvlist_t *rsrc, *det;
729 const char *uuid;
730 ce_dispact_t type;
731 boolean_t diagnose;
732 char *sn;
733 int err, rc;
734 uint64_t *now;
735 uint_t nelem;
736 int skip_error = 0;
737
738 err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
739 &diagnose);
740 if (err != 0 || diagnose == 0)
741 return (GMEM_EVD_UNUSED);
742
743 if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
744 &phyaddr) != 0) ||
745 (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
746 &offset) != 0)) {
747 fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
748 return (GMEM_EVD_BAD);
749 }
750
751 fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
752
753 if ((page = gmem_page_lookup(phyaddr)) != NULL &&
754 page->page_case.cc_cp != NULL &&
755 fmd_case_solved(hdl, page->page_case.cc_cp))
756 return (GMEM_EVD_REDUND);
757
758 if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
759 &rsrc) != 0 ||
760 nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
761 fmd_hdl_debug(hdl, "Can't get dimm serial\n");
762 return (GMEM_EVD_BAD);
763 }
764
765 fmd_hdl_debug(hdl, "serial %s", sn);
766
767 if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
768 return (GMEM_EVD_BAD);
769
770 /*
771 * Find dimm fru by serial number.
772 */
773 fru = gmem_find_dimm_fru(hdl, sn);
774
775 if (fru == NULL) {
776 fmd_hdl_debug(hdl, "Dimm is not present\n");
777 return (GMEM_EVD_UNUSED);
778 }
779
780 if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
781 (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
782 nvlist_free(fru);
783 return (GMEM_EVD_UNUSED);
784 }
785
786 if (dimm->dimm_case.cc_cp == NULL) {
787 dimm->dimm_case.cc_cp = gmem_case_create(hdl,
788 &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
789 }
790
791 /*
792 * Add to MQSC correlation lists all CEs which pass validity
793 * checks above. If there is no symbol_pos & relative ckword
794 * in the ereport, skip rule 4A checking.
795 */
796
797 err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
798 &symbol_pos);
799 err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
800
801 if (err == 0) {
802 fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
803
804 if (nvlist_lookup_uint64_array(nvl,
805 "__tod", &now, &nelem) == 0) {
806 skip_error = gmem_check_symbol_error(hdl, dimm,
807 symbol_pos);
808
809 if (!skip_error ||
810 !(dimm->dimm_flags & GMEM_F_FAULTING))
811 mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
812 cw, *now);
813
814 mq_prune(hdl, dimm, *now);
815
816 if (!skip_error)
817 bad_reader_writer_check(hdl, det, dimm);
818 if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
819 mq_check(hdl, dimm);
820 mq_5b_check(hdl, dimm);
821 }
822 }
823 }
824
825 type = gmem_mem_name2type(strstr(class, "mem"));
826
827 switch (type) {
828 case CE_DISP_UNKNOWN:
829 GMEM_STAT_BUMP(ce_unknown);
830 nvlist_free(fru);
831 return (GMEM_EVD_UNUSED);
832 case CE_DISP_INTERMITTENT:
833 GMEM_STAT_BUMP(ce_interm);
834 nvlist_free(fru);
835 return (GMEM_EVD_UNUSED);
836 case CE_DISP_PERS:
837 GMEM_STAT_BUMP(ce_clearable_persis);
838 break;
839 case CE_DISP_STICKY:
840 GMEM_STAT_BUMP(ce_sticky);
841 break;
842 default:
843 nvlist_free(fru);
844 return (GMEM_EVD_BAD);
845 }
846
847 if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
848 nvlist_free(fru);
849 return (GMEM_EVD_REDUND);
850 }
851
852 if (page == NULL) {
853 page = gmem_page_create(hdl, fru, phyaddr, offset);
854 if (page == NULL) {
855 nvlist_free(fru);
856 return (GMEM_EVD_UNUSED);
857 }
858 }
859
860 nvlist_free(fru);
861
862 if (page->page_case.cc_cp == NULL) {
863 page->page_case.cc_cp = gmem_case_create(hdl,
864 &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
865 }
866
867 switch (type) {
868 case CE_DISP_PERS:
869 fmd_hdl_debug(hdl, "adding persistent event to CE serd");
870 if (page->page_case.cc_serdnm == NULL)
871 gmem_page_serd_create(hdl, page, nvl);
872
873 filter_ratio = gmem_get_serd_filter_ratio(nvl);
874
875 fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
876
877 if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
878 filter_ratio, ep) == FMD_B_FALSE) {
879 return (GMEM_EVD_OK); /* engine hasn't fired */
880 }
881
882 fmd_hdl_debug(hdl, "ce page serd fired\n");
883 fmd_case_add_serd(hdl, page->page_case.cc_cp,
884 page->page_case.cc_serdnm);
885 fmd_serd_reset(hdl, page->page_case.cc_serdnm);
886 break; /* to retire */
887
888 case CE_DISP_STICKY:
889 fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
890 break; /* to retire */
891 }
892
893
894 topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
895 rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
896 ep, phyaddr, offset);
897
898 if (rc) {
899 gmem_to_hashed_addr(&addr, phyaddr);
900
901 if (addr > dimm->dimm_phys_addr_hi)
902 dimm->dimm_phys_addr_hi = addr;
903 if (addr < dimm->dimm_phys_addr_low)
904 dimm->dimm_phys_addr_low = addr;
905
906 dimm->dimm_nretired++;
907 dimm->dimm_retstat.fmds_value.ui64++;
908 gmem_dimm_dirty(hdl, dimm);
909 ce_thresh_check(hdl, dimm);
910 }
911 return (GMEM_EVD_OK);
912 }
913
914 void
gmem_dimm_close(fmd_hdl_t * hdl,void * arg)915 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
916 {
917 gmem_dimm_destroy(hdl, arg);
918 }
919