xref: /titanic_41/usr/src/cmd/fm/modules/sun4u/cpumem-diagnosis/cmd_Lxcache.c (revision a62774df315360f02521d6470eab7d5080137dad)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * Support routines for managing per-Lxcache state.
29  */
30 
31 #include <cmd_Lxcache.h>
32 #include <cmd_mem.h>
33 #include <cmd_cpu.h>
34 #include <cmd.h>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <unistd.h>
38 #include <stdio.h>
39 #include <strings.h>
40 #include <fm/fmd_api.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/cheetahregs.h>
43 #include <sys/mem_cache.h>
44 
45 #define	PN_ECSTATE_NA	5
46 /*
47  * These values are our threshold values for SERDing CPU's based on the
48  * the # of times we have retired a cache line for each category.
49  */
50 
51 #define	CMD_CPU_SERD_AGG_1  	64
52 #define	CMD_CPU_SERD_AGG_2	64
53 
54 static int8_t cmd_lowest_way[16] = {
55 /*	0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
56 	-1,  0,  1,  0,  2,  0,  1,  0,  3,  0,  1,  0,  2,  0,  1,  0};
57 static int cmd_num_of_bits[16] = {
58 /*	0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
59 	0,  1,  1,  2,  1,  2,  2,  3,  1,  2,  2,  3,  2,  3,  3,  4};
60 
61 
62 void
cmd_Lxcache_write(fmd_hdl_t * hdl,cmd_Lxcache_t * Lxcache)63 cmd_Lxcache_write(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
64 {
65 	fmd_buf_write(hdl, NULL, Lxcache->Lxcache_bufname, Lxcache,
66 	    sizeof (cmd_Lxcache_pers_t));
67 }
68 
69 const char *
cmd_type_to_str(cmd_ptrsubtype_t pstype)70 cmd_type_to_str(cmd_ptrsubtype_t pstype)
71 {
72 	switch (pstype) {
73 		case CMD_PTR_CPU_L2DATA:
74 			return ("l2data");
75 			break;
76 		case CMD_PTR_CPU_L3DATA:
77 			return ("l3data");
78 			break;
79 		case CMD_PTR_CPU_L2TAG:
80 			return ("l2tag");
81 			break;
82 		case CMD_PTR_CPU_L3TAG:
83 			return ("l3tag");
84 			break;
85 		default:
86 			return ("unknown");
87 			break;
88 	}
89 }
90 
91 const char *
cmd_flags_to_str(int flags)92 cmd_flags_to_str(int flags)
93 {
94 	switch (flags) {
95 		case CMD_LxCACHE_F_ACTIVE:
96 			return ("ACTIVE");
97 		case CMD_LxCACHE_F_FAULTING:
98 			return ("FAULTING");
99 		case CMD_LxCACHE_F_RETIRED:
100 			return ("RETIRED");
101 		case CMD_LxCACHE_F_UNRETIRED:
102 			return ("UNRETIRED");
103 		case CMD_LxCACHE_F_RERETIRED:
104 			return ("RERETIRED");
105 		default:
106 			return ("Unknown_flags");
107 	}
108 }
109 
110 const char *
cmd_reason_to_str(int reason)111 cmd_reason_to_str(int reason)
112 {
113 	switch (reason) {
114 		case CMD_LXSUSPECT_DATA:
115 			return ("SUSPECT_DATA");
116 		case CMD_LXSUSPECT_0_TAG:
117 			return ("SUSPECT_0_TAG");
118 		case CMD_LXSUSPECT_1_TAG:
119 			return ("SUSPECT_1_TAG");
120 		case CMD_LXCONVICTED:
121 			return ("CONVICTED");
122 		case CMD_LXFUNCTIONING:
123 			return ("FUNCTIONING");
124 		default:
125 			return ("Unknown_reason");
126 	}
127 }
128 
129 static void
cmd_pretty_print_Lxcache(fmd_hdl_t * hdl,cmd_Lxcache_t * Lxcache)130 cmd_pretty_print_Lxcache(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
131 {
132 	fmd_hdl_debug(hdl,
133 	    "\n"
134 	    "	cpu	= %s\n"
135 	    "	type	= %s\n"
136 	    "	index	= %d\n"
137 	    "	way	= %d\n"
138 	    "	bit	= %d\n"
139 	    "	reason	= %s\n"
140 	    "	flags	= %s\n",
141 	    Lxcache->Lxcache_cpu_bufname,
142 	    cmd_type_to_str(Lxcache->Lxcache_type),
143 	    Lxcache->Lxcache_index,
144 	    Lxcache->Lxcache_way,
145 	    Lxcache->Lxcache_bit,
146 	    cmd_reason_to_str(Lxcache->Lxcache_reason),
147 	    cmd_flags_to_str(Lxcache->Lxcache_flags));
148 }
149 
150 void
cmd_Lxcache_free(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * Lxcache,int destroy)151 cmd_Lxcache_free(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
152     int destroy)
153 {
154 	cmd_case_t *cc = &Lxcache->Lxcache_case;
155 
156 	fmd_hdl_debug(hdl, "Entering cmd_Lxcache_free for %s destroy = %d\n",
157 	    Lxcache->Lxcache_bufname, destroy);
158 
159 	if (cc->cc_cp != NULL)
160 		cmd_case_fini(hdl, cc->cc_cp, destroy);
161 	if (cc->cc_serdnm != NULL) {
162 		if (fmd_serd_exists(hdl, cc->cc_serdnm) && destroy) {
163 			fmd_serd_destroy(hdl, cc->cc_serdnm);
164 			fmd_hdl_strfree(hdl, cc->cc_serdnm);
165 			cc->cc_serdnm = NULL;
166 		}
167 	}
168 	if (Lxcache->Lxcache_nvl) {
169 		nvlist_free(Lxcache->Lxcache_nvl);
170 		Lxcache->Lxcache_nvl = NULL;
171 	}
172 	/*
173 	 * Clean up the SERD engine created to handle recheck of TAGS.
174 	 * This SERD engine was created to save the event pointer.
175 	 */
176 	if (Lxcache->Lxcache_serdnm != NULL) {
177 		if (fmd_serd_exists(hdl, Lxcache->Lxcache_serdnm) && destroy) {
178 			fmd_serd_destroy(hdl, Lxcache->Lxcache_serdnm);
179 			fmd_hdl_strfree(hdl, Lxcache->Lxcache_serdnm);
180 			Lxcache->Lxcache_serdnm = NULL;
181 		}
182 	}
183 	Lxcache->Lxcache_timeout_id = -1;
184 	Lxcache->Lxcache_ep = NULL;
185 	Lxcache->Lxcache_retry_count = 0;
186 	if (destroy)
187 		fmd_buf_destroy(hdl, NULL, Lxcache->Lxcache_bufname);
188 	cmd_fmri_fini(hdl, &Lxcache->Lxcache_asru, destroy);
189 	cmd_list_delete(&cpu->cpu_Lxcaches, Lxcache);
190 	fmd_hdl_free(hdl, Lxcache, sizeof (cmd_Lxcache_t));
191 }
192 
193 void
cmd_Lxcache_destroy(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * Lxcache)194 cmd_Lxcache_destroy(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
195 {
196 	cmd_Lxcache_free(hdl, cpu, Lxcache, FMD_B_TRUE);
197 }
198 
199 cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_way_bit(cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,int32_t index,int8_t way,int16_t bit)200 cmd_Lxcache_lookup_by_type_index_way_bit(cmd_cpu_t *cpu,
201     cmd_ptrsubtype_t pstype, int32_t index, int8_t way, int16_t bit)
202 {
203 	cmd_Lxcache_t *Lxcache;
204 
205 	for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches); Lxcache != NULL;
206 	    Lxcache = cmd_list_next(Lxcache)) {
207 		if ((Lxcache->Lxcache_type == pstype) &&
208 		    (Lxcache->Lxcache_index == (uint32_t)index) &&
209 		    (Lxcache->Lxcache_way == (uint32_t)way) &&
210 		    (Lxcache->Lxcache_bit == (uint16_t)bit))
211 			return (Lxcache);
212 	}
213 
214 	return (NULL);
215 }
216 
217 cmd_Lxcache_t *
cmd_Lxcache_create(fmd_hdl_t * hdl,cmd_xr_t * xr,cmd_cpu_t * cpu,nvlist_t * modasru,cmd_ptrsubtype_t pstype,int32_t index,int8_t way,int16_t bit)218 cmd_Lxcache_create(fmd_hdl_t *hdl, cmd_xr_t *xr, cmd_cpu_t *cpu,
219     nvlist_t *modasru, cmd_ptrsubtype_t pstype, int32_t index,
220     int8_t way, int16_t bit)
221 {
222 	cmd_Lxcache_t *Lxcache;
223 	nvlist_t *asru;
224 	const char	*pstype_name;
225 	uint8_t	fmri_Lxcache_type;
226 
227 	pstype_name = cmd_type_to_str(pstype);
228 	fmd_hdl_debug(hdl,
229 	    "\n%s:cpu_id %d:Creating new Lxcache for index=%d way=%d bit=%d\n",
230 	    pstype_name, cpu->cpu_cpuid, index, way, bit);
231 
232 	CMD_CPU_STAT_BUMP(cpu, Lxcache_creat);
233 
234 	Lxcache = fmd_hdl_zalloc(hdl, sizeof (cmd_Lxcache_t), FMD_SLEEP);
235 	(void) strncpy(Lxcache->Lxcache_cpu_bufname,
236 	    cpu->cpu_bufname, CMD_BUFNMLEN);
237 	Lxcache->Lxcache_nodetype = CMD_NT_LxCACHE;
238 	Lxcache->Lxcache_version = CMD_LxCACHE_VERSION;
239 	Lxcache->Lxcache_type = pstype;
240 	Lxcache->Lxcache_index = (uint32_t)index;
241 	Lxcache->Lxcache_way = (uint32_t)way;
242 	Lxcache->Lxcache_bit = (uint16_t)bit;
243 	Lxcache->Lxcache_reason = CMD_LXFUNCTIONING;
244 	Lxcache->Lxcache_flags = CMD_LxCACHE_F_ACTIVE;
245 	Lxcache->Lxcache_timeout_id = -1;
246 	Lxcache->Lxcache_retry_count = 0;
247 	Lxcache->Lxcache_nvl = NULL;
248 	Lxcache->Lxcache_ep = NULL;
249 	Lxcache->Lxcache_serdnm = NULL;
250 	Lxcache->Lxcache_clcode = 0;
251 	Lxcache->xr = xr;
252 	Lxcache->Lxcache_retired_fmri[0] = '\0';
253 	switch (pstype) {
254 		case CMD_PTR_CPU_L2DATA:
255 			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L2;
256 			break;
257 		case CMD_PTR_CPU_L3DATA:
258 			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L3;
259 			break;
260 		case CMD_PTR_CPU_L2TAG:
261 			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L2;
262 			break;
263 		case CMD_PTR_CPU_L3TAG:
264 			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L3;
265 			break;
266 		default:
267 			break;
268 	}
269 
270 	cmd_bufname(Lxcache->Lxcache_bufname, sizeof (Lxcache->Lxcache_bufname),
271 	    "Lxcache_%s_%d_%d_%d_%d", pstype_name, cpu->cpu_cpuid,
272 	    index, way, bit);
273 	fmd_hdl_debug(hdl,
274 	    "\n%s:cpu_id %d: new Lxcache name is %s\n",
275 	    pstype_name, cpu->cpu_cpuid, Lxcache->Lxcache_bufname);
276 	if ((errno = nvlist_dup(modasru, &asru, 0)) != 0 ||
277 	    (errno = nvlist_add_uint32(asru, FM_FMRI_CPU_CACHE_INDEX,
278 	    index)) != 0 ||
279 	    (errno = nvlist_add_uint32(asru, FM_FMRI_CPU_CACHE_WAY,
280 	    (uint32_t)way)) != 0 ||
281 	    (errno = nvlist_add_uint16(asru, FM_FMRI_CPU_CACHE_BIT,
282 	    bit)) != 0 ||
283 	    (errno = nvlist_add_uint8(asru, FM_FMRI_CPU_CACHE_TYPE,
284 	    fmri_Lxcache_type)) != 0 ||
285 	    (errno = fmd_nvl_fmri_expand(hdl, asru)) != 0)
286 		fmd_hdl_abort(hdl, "failed to build Lxcache fmri");
287 	asru->nvl_nvflag |= NV_UNIQUE_NAME_TYPE;
288 
289 	cmd_fmri_init(hdl, &Lxcache->Lxcache_asru, asru,
290 	    "%s_asru_%d_%d_%d", pstype_name, index, way, bit);
291 
292 	nvlist_free(asru);
293 
294 	cmd_list_append(&cpu->cpu_Lxcaches, Lxcache);
295 	cmd_Lxcache_write(hdl, Lxcache);
296 
297 	return (Lxcache);
298 }
299 
300 cmd_Lxcache_t *
cmd_Lxcache_lookup_by_index_way(cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,int32_t index,int8_t way)301 cmd_Lxcache_lookup_by_index_way(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
302     int32_t index, int8_t way)
303 {
304 	cmd_Lxcache_t *cache;
305 
306 	for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
307 	    cache = cmd_list_next(cache)) {
308 	if ((cache->Lxcache_index == (uint32_t)index) &&
309 	    (cache->Lxcache_way == (uint32_t)way) &&
310 	    (cache->Lxcache_type == pstype)) {
311 		return (cache);
312 		}
313 	}
314 
315 	return (NULL);
316 }
317 
318 static cmd_Lxcache_t *
Lxcache_wrapv1(fmd_hdl_t * hdl,cmd_Lxcache_pers_t * pers,size_t psz)319 Lxcache_wrapv1(fmd_hdl_t *hdl, cmd_Lxcache_pers_t *pers, size_t psz)
320 {
321 	cmd_Lxcache_t *Lxcache;
322 
323 	if (psz != sizeof (cmd_Lxcache_pers_t)) {
324 		fmd_hdl_abort(hdl, "size of state doesn't match size of "
325 		    "version 1 state (%u bytes).\n",
326 		    sizeof (cmd_Lxcache_pers_t));
327 	}
328 
329 	Lxcache = fmd_hdl_zalloc(hdl, sizeof (cmd_Lxcache_t), FMD_SLEEP);
330 	bcopy(pers, Lxcache, sizeof (cmd_Lxcache_pers_t));
331 	fmd_hdl_free(hdl, pers, psz);
332 	return (Lxcache);
333 }
334 
335 void *
cmd_Lxcache_restore(fmd_hdl_t * hdl,fmd_case_t * cp,cmd_case_ptr_t * ptr)336 cmd_Lxcache_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
337 {
338 	cmd_Lxcache_t *Lxcache;
339 	cmd_Lxcache_t *recovered_Lxcache;
340 	cmd_cpu_t	*cpu;
341 	size_t		Lxcachesz;
342 	char		*serdnm;
343 
344 	/*
345 	 * We need to first extract the cpu name by reading directly
346 	 * from fmd buffers in order to begin our search for Lxcache in
347 	 * the appropriate cpu list.
348 	 * After we identify the cpu list using buf name we look
349 	 * in cpu list for our Lxcache states.
350 	 */
351 	fmd_hdl_debug(hdl, "restoring Lxcache from %s\n", ptr->ptr_name);
352 
353 	if ((Lxcachesz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
354 		fmd_hdl_abort(hdl, "Lxcache referenced by case %s does "
355 		    "not exist in saved state\n",
356 		    fmd_case_uuid(hdl, cp));
357 	} else if (Lxcachesz != sizeof (cmd_Lxcache_pers_t)) {
358 		fmd_hdl_abort(hdl, "Lxcache buffer referenced by case %s "
359 		    "is %d bytes. Expected size is %d bytes\n",
360 		    fmd_case_uuid(hdl, cp), Lxcachesz,
361 		    sizeof (cmd_Lxcache_pers_t));
362 	}
363 
364 	if ((Lxcache = cmd_buf_read(hdl, NULL, ptr->ptr_name,
365 	    Lxcachesz)) == NULL) {
366 		fmd_hdl_abort(hdl, "failed to read Lxcache buf %s",
367 		    ptr->ptr_name);
368 	}
369 	cmd_pretty_print_Lxcache(hdl, Lxcache);
370 
371 	fmd_hdl_debug(hdl, "found %d in version field\n",
372 	    Lxcache->Lxcache_version);
373 	cpu = cmd_restore_cpu_only(hdl, cp, Lxcache->Lxcache_cpu_bufname);
374 	if (cpu == NULL) {
375 		fmd_hdl_debug(hdl,
376 		    "\nCould not restore cpu %s\n",
377 		    Lxcache->Lxcache_cpu_bufname);
378 		return (NULL);
379 	}
380 	recovered_Lxcache = Lxcache;	/* save the recovered Lxcache */
381 
382 	for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches); Lxcache != NULL;
383 	    Lxcache = cmd_list_next(Lxcache)) {
384 		if (strcmp(Lxcache->Lxcache_bufname, ptr->ptr_name) == 0)
385 			break;
386 	}
387 
388 	if (Lxcache == NULL) {
389 
390 		switch (recovered_Lxcache->Lxcache_version) {
391 			case CMD_LxCACHE_VERSION_1:
392 				Lxcache = Lxcache_wrapv1(hdl,
393 				    (cmd_Lxcache_pers_t *)recovered_Lxcache,
394 				    Lxcachesz);
395 				break;
396 			default:
397 				fmd_hdl_abort(hdl, "unknown version (found %d) "
398 				"for Lxcache state referenced by case %s.\n",
399 				    recovered_Lxcache->Lxcache_version,
400 				    fmd_case_uuid(hdl, cp));
401 			break;
402 		}
403 
404 		cmd_fmri_restore(hdl, &Lxcache->Lxcache_asru);
405 		/*
406 		 * We need to cleanup the information associated with
407 		 * the timeout routine because these are not checkpointed
408 		 * and cannot be retored.
409 		 */
410 		Lxcache->Lxcache_timeout_id = -1;
411 		Lxcache->Lxcache_retry_count = 0;
412 		Lxcache->Lxcache_nvl = NULL;
413 		Lxcache->Lxcache_ep = NULL;
414 		Lxcache->Lxcache_serdnm = NULL;
415 
416 		cmd_list_append(&cpu->cpu_Lxcaches, Lxcache);
417 	}
418 	serdnm = cmd_Lxcache_serdnm_create(hdl, cpu->cpu_cpuid,
419 	    Lxcache->Lxcache_type, Lxcache->Lxcache_index,
420 	    Lxcache->Lxcache_way, Lxcache->Lxcache_bit);
421 	fmd_hdl_debug(hdl,
422 	    "cpu_id %d: serdname for the case is %s\n",
423 	    cpu->cpu_cpuid, serdnm);
424 	fmd_hdl_debug(hdl,
425 	    "cpu_id %d: restoring the case for index %d way %d bit %d\n",
426 	    cpu->cpu_cpuid, Lxcache->Lxcache_index,
427 	    Lxcache->Lxcache_way, Lxcache->Lxcache_bit);
428 	cmd_case_restore(hdl, &Lxcache->Lxcache_case, cp, serdnm);
429 
430 	return (Lxcache);
431 }
432 
433 /*ARGSUSED*/
434 void
cmd_Lxcache_validate(fmd_hdl_t * hdl,cmd_cpu_t * cpu)435 cmd_Lxcache_validate(fmd_hdl_t *hdl, cmd_cpu_t *cpu)
436 {
437 	cmd_Lxcache_t *Lxcache, *next;
438 
439 	for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
440 	    Lxcache != NULL; Lxcache = next) {
441 		next = cmd_list_next(Lxcache);
442 
443 		if (fmd_nvl_fmri_unusable(hdl, Lxcache->Lxcache_asru_nvl)) {
444 			cmd_Lxcache_destroy(hdl, cpu, Lxcache);
445 		}
446 	}
447 }
448 
449 void
cmd_Lxcache_dirty(fmd_hdl_t * hdl,cmd_Lxcache_t * Lxcache)450 cmd_Lxcache_dirty(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
451 {
452 	if (fmd_buf_size(hdl, NULL, Lxcache->Lxcache_bufname) !=
453 	    sizeof (cmd_Lxcache_pers_t))
454 		fmd_buf_destroy(hdl, NULL, Lxcache->Lxcache_bufname);
455 
456 	/* No need to rewrite the FMRIs in the Lxcache - they don't change */
457 	fmd_buf_write(hdl, NULL,
458 	    Lxcache->Lxcache_bufname, &Lxcache->Lxcache_pers,
459 	    sizeof (cmd_Lxcache_pers_t));
460 }
461 
462 void
cmd_Lxcache_fini(fmd_hdl_t * hdl,cmd_cpu_t * cpu)463 cmd_Lxcache_fini(fmd_hdl_t *hdl, cmd_cpu_t *cpu)
464 {
465 	cmd_Lxcache_t *Lxcache;
466 
467 	while ((Lxcache = cmd_list_next(&cpu->cpu_Lxcaches)) != NULL)
468 		cmd_Lxcache_free(hdl, cpu, Lxcache, FMD_B_FALSE);
469 }
470 
471 char *
cmd_Lxcache_serdnm_create(fmd_hdl_t * hdl,uint32_t cpu_id,cmd_ptrsubtype_t pstype,int32_t index,int8_t way,int16_t bit)472 cmd_Lxcache_serdnm_create(fmd_hdl_t *hdl, uint32_t cpu_id,
473 			    cmd_ptrsubtype_t pstype,
474 			    int32_t index, int8_t way, int16_t bit)
475 {
476 	const char *fmt = "cpu_%d:%s_%d_%d_%d_serd";
477 	const char *serdbase;
478 	size_t sz;
479 	char	*nm;
480 
481 	serdbase = cmd_type_to_str(pstype);
482 	sz = (snprintf(NULL, 0, fmt, cpu_id, serdbase, index, way, bit) + 1);
483 	nm = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
484 	(void) snprintf(nm, sz, fmt, cpu_id, serdbase, index, way, bit);
485 	return (nm);
486 }
487 
488 char *
cmd_Lxcache_anonymous_serdnm_create(fmd_hdl_t * hdl,uint32_t cpu_id,cmd_ptrsubtype_t pstype,int32_t index,int8_t way,int16_t bit)489 cmd_Lxcache_anonymous_serdnm_create(fmd_hdl_t *hdl, uint32_t cpu_id,
490 			    cmd_ptrsubtype_t pstype,
491 			    int32_t index, int8_t way, int16_t bit)
492 {
493 	const char *fmt = "cpu_%d:%s_%d_%d_%d_anonymous_serd";
494 	const char *serdbase;
495 	size_t sz;
496 	char	*nm;
497 
498 	serdbase = cmd_type_to_str(pstype);
499 	sz = (snprintf(NULL, 0, fmt, cpu_id, serdbase, index, way, bit) + 1);
500 	nm = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
501 	(void) snprintf(nm, sz, fmt, cpu_id, serdbase, index, way, bit);
502 	return (nm);
503 }
504 
505 /*
506  * Count the number of SERD type 2 ways retired for a given cpu
507  * These are defined to be L3 Cache data retirements
508  */
509 
510 uint32_t
cmd_Lx_index_count_type2_ways(cmd_cpu_t * cpu)511 cmd_Lx_index_count_type2_ways(cmd_cpu_t *cpu)
512 {
513 	cmd_Lxcache_t *cache = NULL;
514 	uint32_t ret_count = 0;
515 
516 	for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
517 	    cache = cmd_list_next(cache)) {
518 		if ((cache->Lxcache_flags & CMD_LxCACHE_F_RETIRED) &&
519 		    (cache->Lxcache_type == CMD_PTR_CPU_L3DATA)) {
520 			ret_count++;
521 		}
522 	}
523 	return (ret_count);
524 }
525 /*
526  * Count the number of SERD type 1 ways retired for a given cpu
527  * These are defined to be L2 Data, tag and L3 Tag retirements
528  */
529 
530 uint32_t
cmd_Lx_index_count_type1_ways(cmd_cpu_t * cpu)531 cmd_Lx_index_count_type1_ways(cmd_cpu_t *cpu)
532 {
533 	cmd_Lxcache_t *cache = NULL;
534 	uint32_t ret_count = 0;
535 
536 	for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
537 	    cache = cmd_list_next(cache)) {
538 		if ((cache->Lxcache_flags & CMD_LxCACHE_F_RETIRED) &&
539 		    ((cache->Lxcache_type == CMD_PTR_CPU_L2DATA) ||
540 		    IS_TAG(cache->Lxcache_type))) {
541 			ret_count++;
542 		}
543 	}
544 	return (ret_count);
545 }
546 
547 void
cmd_fault_the_cpu(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,const char * fltnm)548 cmd_fault_the_cpu(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
549     const char *fltnm)
550 {
551 	fmd_case_t	*cp;
552 	const char 	*uuid;
553 
554 	cp = cmd_case_create(hdl, &cpu->cpu_header, pstype,
555 	    &uuid);
556 	fmd_hdl_debug(hdl,
557 	    "\n%s:cpu_id %d Created case %s to retire CPU\n",
558 	    fltnm, cpu->cpu_cpuid);
559 
560 	if ((errno = fmd_nvl_fmri_expand(hdl, cpu->cpu_asru_nvl)) != 0)
561 		fmd_hdl_abort(hdl, "failed to build CPU fmri");
562 
563 	cmd_cpu_create_faultlist(hdl, cp, cpu, fltnm, NULL, HUNDRED_PERCENT);
564 	fmd_case_solve(hdl, cp);
565 }
566 
567 void
cmd_retire_cpu_if_limits_exceeded(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,const char * fltnm)568 cmd_retire_cpu_if_limits_exceeded(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
569     cmd_ptrsubtype_t pstype, const char *fltnm)
570 {
571 	int cpu_retired_1, cpu_retired_2;
572 
573 	/* Retrieve the number of retired ways for each category */
574 
575 	cpu_retired_1 = cmd_Lx_index_count_type1_ways(cpu);
576 	cpu_retired_2 = cmd_Lx_index_count_type2_ways(cpu);
577 	fmd_hdl_debug(hdl,
578 	    "\n%s:CPU %d retired Type 1 way count is: %d\n",
579 	    fltnm, cpu->cpu_cpuid, cpu_retired_1);
580 	fmd_hdl_debug(hdl, "\n%s:CPU %d retired Type 2 way count is: %d\n",
581 	    fltnm, cpu->cpu_cpuid, cpu_retired_2);
582 
583 	if (((cpu_retired_1 > CMD_CPU_SERD_AGG_1) ||
584 	    (cpu_retired_2 > CMD_CPU_SERD_AGG_2)) &&
585 	    (cpu->cpu_faulting != FMD_B_TRUE)) {
586 		cmd_fault_the_cpu(hdl, cpu, pstype, fltnm);
587 	}
588 }
589 
590 void
cmd_Lxcache_fault(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * Lxcache,const char * fltnm,nvlist_t * rsrc,uint_t cert)591 cmd_Lxcache_fault(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
592 	const char *fltnm, nvlist_t *rsrc, uint_t cert)
593 {
594 	char fltmsg[64];
595 	nvlist_t *flt;
596 
597 	(void) snprintf(fltmsg, sizeof (fltmsg), "fault.cpu.%s.%s-line",
598 	    cmd_cpu_type2name(hdl, cpu->cpu_type), fltnm);
599 	fmd_hdl_debug(hdl,
600 	    "\n%s:cpu_id %d: fltmsg = %s\n",
601 	    fltnm, cpu->cpu_cpuid, fltmsg);
602 	if (Lxcache->Lxcache_flags & CMD_LxCACHE_F_FAULTING) {
603 		return;
604 	}
605 	Lxcache->Lxcache_flags |= CMD_LxCACHE_F_FAULTING;
606 	flt = fmd_nvl_create_fault(hdl, fltmsg, cert,
607 	    Lxcache->Lxcache_asru.fmri_nvl, cpu->cpu_fru_nvl, rsrc);
608 	if (nvlist_add_boolean_value(flt, FM_SUSPECT_MESSAGE, B_FALSE) != 0)
609 		fmd_hdl_abort(hdl, "failed to add no-message member to fault");
610 
611 	fmd_hdl_debug(hdl,
612 	    "\n%s:cpu_id %d: adding suspect list to case %s\n",
613 	    fltnm, cpu->cpu_cpuid,
614 	    fmd_case_uuid(hdl, Lxcache->Lxcache_case.cc_cp));
615 	fmd_case_add_suspect(hdl, Lxcache->Lxcache_case.cc_cp, flt);
616 	fmd_case_solve(hdl, Lxcache->Lxcache_case.cc_cp);
617 	if (Lxcache->Lxcache_retired_fmri[0] == 0) {
618 		if (cmd_fmri_nvl2str(hdl, Lxcache->Lxcache_asru.fmri_nvl,
619 		    Lxcache->Lxcache_retired_fmri,
620 		    sizeof (Lxcache->Lxcache_retired_fmri)) == -1)
621 			fmd_hdl_debug(hdl,
622 			    "\n%s:cpu_id %d: Failed to save the"
623 			    " retired fmri string\n",
624 			    fltnm, cpu->cpu_cpuid);
625 		else
626 			fmd_hdl_debug(hdl,
627 			    "\n%s:cpu_id %d:Saved the retired fmri string %s\n",
628 			    fltnm, cpu->cpu_cpuid,
629 			    Lxcache->Lxcache_retired_fmri);
630 	}
631 	Lxcache->Lxcache_flags &= ~(CMD_LxCACHE_F_FAULTING);
632 
633 }
634 
635 void
cmd_Lxcache_close(fmd_hdl_t * hdl,void * arg)636 cmd_Lxcache_close(fmd_hdl_t *hdl, void *arg)
637 {
638 	cmd_cpu_t *cpu;
639 	cmd_Lxcache_t *Lxcache;
640 	cmd_case_t *cc;
641 
642 	Lxcache = (cmd_Lxcache_t *)arg;
643 	fmd_hdl_debug(hdl, "cmd_Lxcache_close called  for %s\n",
644 	    Lxcache->Lxcache_bufname);
645 	cc = &Lxcache->Lxcache_case;
646 
647 	for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
648 	    cpu = cmd_list_next(cpu)) {
649 		if (strcmp(cpu->cpu_bufname,
650 		    Lxcache->Lxcache_cpu_bufname) == 0)
651 			break;
652 	}
653 	if (cpu == NULL)
654 		fmd_hdl_abort(hdl, "failed to find the cpu %s for %s\n",
655 		    Lxcache->Lxcache_cpu_bufname,
656 		    Lxcache->Lxcache_bufname);
657 	/*
658 	 * We will destroy the case and serd engine.
659 	 * The rest will be destroyed when we retire the CPU
660 	 * until then we keep the Lxcache strutures alive.
661 	 */
662 	if (cc->cc_cp != NULL) {
663 		cmd_case_fini(hdl, cc->cc_cp, FMD_B_TRUE);
664 		cc->cc_cp = NULL;
665 	}
666 	if (cc->cc_serdnm != NULL) {
667 		if (fmd_serd_exists(hdl, cc->cc_serdnm))
668 			fmd_serd_destroy(hdl, cc->cc_serdnm);
669 		fmd_hdl_strfree(hdl, cc->cc_serdnm);
670 		cc->cc_serdnm = NULL;
671 	}
672 
673 }
674 
675 cmd_Lxcache_t *
cmd_Lxcache_lookup_by_timeout_id(id_t id)676 cmd_Lxcache_lookup_by_timeout_id(id_t id)
677 {
678 	cmd_cpu_t *cpu;
679 	cmd_Lxcache_t *cmd_Lxcache;
680 
681 	for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
682 	    cpu = cmd_list_next(cpu)) {
683 		for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
684 		    cmd_Lxcache != NULL;
685 		    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
686 			if (cmd_Lxcache->Lxcache_timeout_id == id)
687 				return (cmd_Lxcache);
688 		}
689 	}
690 	return (NULL);
691 }
692 
693 void
cmd_Lxcache_gc(fmd_hdl_t * hdl)694 cmd_Lxcache_gc(fmd_hdl_t *hdl)
695 {
696 	cmd_cpu_t *cpu;
697 
698 	for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
699 	    cpu = cmd_list_next(cpu))
700 		cmd_Lxcache_validate(hdl, cpu);
701 }
702 
703 cmd_evdisp_t
get_tagdata(cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,int32_t index,uint64_t * tag_data)704 get_tagdata(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
705 	    int32_t index, uint64_t	*tag_data)
706 {
707 	int		fd;
708 	cache_info_t	cache_info;
709 
710 	fd = open(mem_cache_device, O_RDONLY);
711 	if (fd == -1) {
712 		(void) printf(
713 		    "cpu_id = %d could not open %s to read tag info.\n",
714 		    cpu->cpu_cpuid, mem_cache_device);
715 		return (CMD_EVD_BAD);
716 	}
717 	switch (pstype) {
718 		case CMD_PTR_CPU_L2TAG:
719 		case CMD_PTR_CPU_L2DATA:
720 			cache_info.cache = L2_CACHE_TAG;
721 			break;
722 		case CMD_PTR_CPU_L3TAG:
723 		case CMD_PTR_CPU_L3DATA:
724 			cache_info.cache = L3_CACHE_TAG;
725 			break;
726 	}
727 	cache_info.cpu_id = cpu->cpu_cpuid;
728 	cache_info.index = index;
729 	cache_info.datap = tag_data;
730 	cache_info.way = 0;
731 
732 	if (test_mode) {
733 
734 		if (ioctl(fd, MEM_CACHE_READ_ERROR_INJECTED_TAGS, &cache_info)
735 		    == -1) {
736 			(void) printf("cpu_id = %d ioctl"
737 			    " MEM_CACHE_READ_ERROR_INJECTED_TAGS failed"
738 			    " errno = %d\n",
739 			    cpu->cpu_cpuid, errno);
740 			(void) close(fd);
741 			return (CMD_EVD_BAD);
742 		}
743 	} else {
744 		if (ioctl(fd, MEM_CACHE_READ_TAGS, &cache_info)
745 		    == -1) {
746 			(void) printf("cpu_id = %d ioctl"
747 			    " MEM_CACHE_READ_TAGS failed"
748 			    " errno = %d\n",
749 			    cpu->cpu_cpuid, errno);
750 			(void) close(fd);
751 			return (CMD_EVD_BAD);
752 		}
753 	}
754 	(void) close(fd);
755 	return (CMD_EVD_OK);
756 }
757 
758 int
get_index_retired_ways(cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,int32_t index)759 get_index_retired_ways(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype, int32_t index)
760 {
761 	int		i, retired_ways;
762 	uint64_t	tag_data[PN_CACHE_NWAYS];
763 
764 	if (get_tagdata(cpu, pstype, index, tag_data) != 0) {
765 		return (-1);
766 	}
767 	retired_ways = 0;
768 	for (i = 0; i < PN_CACHE_NWAYS; i++) {
769 		if ((tag_data[i] & CH_ECSTATE_MASK) ==
770 		    PN_ECSTATE_NA)
771 			retired_ways++;
772 	}
773 	return (retired_ways);
774 }
775 
776 boolean_t
cmd_cache_way_retire(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * Lxcache)777 cmd_cache_way_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
778 {
779 	const char		*fltnm;
780 	cache_info_t    cache_info;
781 	int ret, fd;
782 
783 	fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
784 	fd = open(mem_cache_device, O_RDWR);
785 	if (fd == -1) {
786 		fmd_hdl_debug(hdl,
787 		    "fltnm:cpu_id %d open of %s failed\n",
788 		    fltnm, cpu->cpu_cpuid, mem_cache_device);
789 		return (B_FALSE);
790 	}
791 	cache_info.cpu_id = cpu->cpu_cpuid;
792 	cache_info.way = Lxcache->Lxcache_way;
793 	cache_info.bit = Lxcache->Lxcache_bit;
794 	cache_info.index = Lxcache->Lxcache_index;
795 
796 	switch (Lxcache->Lxcache_type) {
797 		case CMD_PTR_CPU_L2TAG:
798 			cache_info.cache = L2_CACHE_TAG;
799 			break;
800 		case CMD_PTR_CPU_L2DATA:
801 			cache_info.cache = L2_CACHE_DATA;
802 			break;
803 		case CMD_PTR_CPU_L3TAG:
804 			cache_info.cache = L3_CACHE_TAG;
805 			break;
806 		case CMD_PTR_CPU_L3DATA:
807 			cache_info.cache = L3_CACHE_DATA;
808 			break;
809 	}
810 
811 	fmd_hdl_debug(hdl,
812 	    "\n%s:cpu %d: Retiring index %d, way %d bit %d\n",
813 	    fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
814 	    (int16_t)cache_info.bit);
815 	ret = ioctl(fd, MEM_CACHE_RETIRE, &cache_info);
816 	(void) close(fd);
817 	if (ret == -1) {
818 		fmd_hdl_debug(hdl,
819 		    "fltnm:cpu_id %d MEM_CACHE_RETIRE ioctl failed\n",
820 		    fltnm, cpu->cpu_cpuid);
821 		return (B_FALSE);
822 	}
823 
824 	return (B_TRUE);
825 }
826 
827 boolean_t
cmd_cache_way_unretire(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * Lxcache)828 cmd_cache_way_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
829 {
830 	const char		*fltnm;
831 	cache_info_t    cache_info;
832 	int ret, fd;
833 
834 	fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
835 	fd = open(mem_cache_device, O_RDWR);
836 	if (fd == -1) {
837 		fmd_hdl_debug(hdl,
838 		    "fltnm:cpu_id %d open of %s failed\n",
839 		    fltnm, cpu->cpu_cpuid, mem_cache_device);
840 		return (B_FALSE);
841 	}
842 	cache_info.cpu_id = cpu->cpu_cpuid;
843 	cache_info.way = Lxcache->Lxcache_way;
844 	cache_info.bit = Lxcache->Lxcache_bit;
845 	cache_info.index = Lxcache->Lxcache_index;
846 
847 	switch (Lxcache->Lxcache_type) {
848 		case CMD_PTR_CPU_L2TAG:
849 			cache_info.cache = L2_CACHE_TAG;
850 			break;
851 		case CMD_PTR_CPU_L2DATA:
852 			cache_info.cache = L2_CACHE_DATA;
853 			break;
854 		case CMD_PTR_CPU_L3TAG:
855 			cache_info.cache = L3_CACHE_TAG;
856 			break;
857 		case CMD_PTR_CPU_L3DATA:
858 			cache_info.cache = L3_CACHE_DATA;
859 			break;
860 	}
861 
862 	fmd_hdl_debug(hdl,
863 	    "\n%s:cpu %d: Unretiring index %d, way %d bit %d\n",
864 	    fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
865 	    (int16_t)cache_info.bit);
866 	ret = ioctl(fd, MEM_CACHE_UNRETIRE, &cache_info);
867 	(void) close(fd);
868 	if (ret == -1) {
869 		fmd_hdl_debug(hdl,
870 		    "fltnm:cpu_id %d MEM_CACHE_UNRETIRE ioctl failed\n",
871 		    fltnm, cpu->cpu_cpuid);
872 		return (B_FALSE);
873 	}
874 
875 	return (B_TRUE);
876 }
877 
878 static cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_way_flags(cmd_cpu_t * cpu,cmd_ptrsubtype_t type,int32_t index,int8_t way,int32_t flags)879 cmd_Lxcache_lookup_by_type_index_way_flags(cmd_cpu_t *cpu,
880     cmd_ptrsubtype_t type, int32_t index, int8_t way, int32_t flags)
881 {
882 	cmd_Lxcache_t *cmd_Lxcache;
883 
884 	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
885 	    cmd_Lxcache != NULL;
886 	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
887 		if ((cmd_Lxcache->Lxcache_index == index) &&
888 		    (cmd_Lxcache->Lxcache_way == way) &&
889 		    (cmd_Lxcache->Lxcache_type == type) &&
890 		    (cmd_Lxcache->Lxcache_flags & flags))
891 			return (cmd_Lxcache);
892 	}
893 	return (NULL);
894 }
895 
896 static int8_t
cmd_Lxcache_get_bit_array_of_available_ways(cmd_cpu_t * cpu,cmd_ptrsubtype_t type,int32_t index)897 cmd_Lxcache_get_bit_array_of_available_ways(cmd_cpu_t *cpu,
898     cmd_ptrsubtype_t type, int32_t index)
899 {
900 	uint8_t bit_array_of_unavailable_ways;
901 	uint8_t bit_array_of_available_ways;
902 	cmd_ptrsubtype_t match_type;
903 	cmd_Lxcache_t *cmd_Lxcache;
904 	uint8_t bit_array_of_retired_ways;
905 
906 
907 	/*
908 	 * We scan the Lxcache structures for this CPU and collect
909 	 * the following 2 information.
910 	 * - bit_array_of_retired_ways
911 	 * - bit_array_of_unavailable_ways
912 	 * If type is Lx_TAG then unavailable_ways will not include ways that
913 	 * were retired due to DATA faults, because these ways can still be
914 	 * re-retired for TAG faults.
915 	 * If 3 ways have been retired then we protect the only remaining
916 	 * unretired way by marking it as unavailable.
917 	 */
918 	bit_array_of_unavailable_ways = 0;
919 	bit_array_of_retired_ways = 0;
920 	switch (type) {
921 		case CMD_PTR_CPU_L2TAG:
922 			match_type = CMD_PTR_CPU_L2DATA;
923 			break;
924 		case CMD_PTR_CPU_L2DATA:
925 			match_type = CMD_PTR_CPU_L2TAG;
926 			break;
927 		case CMD_PTR_CPU_L3TAG:
928 			match_type = CMD_PTR_CPU_L3DATA;
929 			break;
930 		case CMD_PTR_CPU_L3DATA:
931 			match_type = CMD_PTR_CPU_L3TAG;
932 			break;
933 	}
934 
935 	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
936 	    cmd_Lxcache != NULL;
937 	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
938 		if ((cmd_Lxcache->Lxcache_index == index) &&
939 		    ((cmd_Lxcache->Lxcache_type == type) ||
940 		    (cmd_Lxcache->Lxcache_type == match_type)) &&
941 		    (cmd_Lxcache->Lxcache_flags &
942 		    (CMD_LxCACHE_F_RETIRED | CMD_LxCACHE_F_RERETIRED))) {
943 			bit_array_of_retired_ways |=
944 			    (1 << cmd_Lxcache->Lxcache_way);
945 			/*
946 			 * If we are calling this while handling TAG errors
947 			 * we can reretire the cachelines retired due to DATA
948 			 * errors. We will ignore the cachelnes that are
949 			 * retired due to DATA faults.
950 			 */
951 			if ((type == CMD_PTR_CPU_L2TAG) &&
952 			    (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L2DATA))
953 				continue;
954 			if ((type == CMD_PTR_CPU_L3TAG) &&
955 			    (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L3DATA))
956 				continue;
957 			bit_array_of_unavailable_ways |=
958 			    (1 << cmd_Lxcache->Lxcache_way);
959 		}
960 	}
961 	if (cmd_num_of_bits[bit_array_of_retired_ways & 0xf] == 3) {
962 		/*
963 		 * special case: 3 ways are already retired.
964 		 * The Lone unretired way is set as 1, rest are set as 0.
965 		 * We now OR this with bit_array_of_unavailable_ways
966 		 * so that this unretired way will not be allocated.
967 		 */
968 		bit_array_of_retired_ways ^= 0xf;
969 		bit_array_of_retired_ways &= 0xf;
970 		bit_array_of_unavailable_ways |= bit_array_of_retired_ways;
971 	}
972 	bit_array_of_available_ways =
973 	    ((bit_array_of_unavailable_ways ^ 0xf) & 0xf);
974 	return (bit_array_of_available_ways);
975 }
976 
977 
978 /*
979  * Look for a way next to the specified way that is
980  * not in a retired state.
981  * We stop when way 3 is reached.
982  */
983 int8_t
cmd_Lxcache_get_next_retirable_way(cmd_cpu_t * cpu,int32_t index,cmd_ptrsubtype_t pstype,int8_t specified_way)984 cmd_Lxcache_get_next_retirable_way(cmd_cpu_t *cpu,
985     int32_t index, cmd_ptrsubtype_t pstype, int8_t specified_way)
986 {
987 	uint8_t bit_array_of_ways;
988 	int8_t mask;
989 
990 	if (specified_way == 3)
991 		return (-1);
992 	bit_array_of_ways = cmd_Lxcache_get_bit_array_of_available_ways(
993 	    cpu,
994 	    pstype, index);
995 	if (specified_way == 2)
996 		mask = 0x8;
997 	else if (specified_way == 1)
998 		mask = 0xc;
999 	else
1000 		mask = 0xe;
1001 	return (cmd_lowest_way[bit_array_of_ways & mask]);
1002 }
1003 
1004 int8_t
cmd_Lxcache_get_lowest_retirable_way(cmd_cpu_t * cpu,int32_t index,cmd_ptrsubtype_t pstype)1005 cmd_Lxcache_get_lowest_retirable_way(cmd_cpu_t *cpu,
1006     int32_t index, cmd_ptrsubtype_t pstype)
1007 {
1008 	uint8_t bit_array_of_ways;
1009 
1010 	bit_array_of_ways = cmd_Lxcache_get_bit_array_of_available_ways(
1011 	    cpu,
1012 	    pstype, index);
1013 	return (cmd_lowest_way[bit_array_of_ways]);
1014 }
1015 
1016 cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_way_reason(cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,int32_t index,int8_t way,int32_t reason)1017 cmd_Lxcache_lookup_by_type_index_way_reason(cmd_cpu_t *cpu,
1018     cmd_ptrsubtype_t pstype, int32_t index, int8_t way, int32_t reason)
1019 {
1020 	cmd_Lxcache_t *cmd_Lxcache;
1021 
1022 	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
1023 	    cmd_Lxcache != NULL;
1024 	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
1025 		if ((cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
1026 		    (cmd_Lxcache->Lxcache_way == (uint32_t)way) &&
1027 		    (cmd_Lxcache->Lxcache_reason & reason) &&
1028 		    (cmd_Lxcache->Lxcache_type == pstype)) {
1029 			return (cmd_Lxcache);
1030 		}
1031 	}
1032 	return (NULL);
1033 }
1034 
1035 cmd_Lxcache_t *
cmd_Lxcache_lookup_by_type_index_bit_reason(cmd_cpu_t * cpu,cmd_ptrsubtype_t pstype,int32_t index,int16_t bit,int32_t reason)1036 cmd_Lxcache_lookup_by_type_index_bit_reason(cmd_cpu_t *cpu,
1037     cmd_ptrsubtype_t pstype, int32_t index, int16_t bit, int32_t reason)
1038 {
1039 	cmd_Lxcache_t *cmd_Lxcache;
1040 
1041 	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
1042 	    cmd_Lxcache != NULL;
1043 	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
1044 		if ((cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
1045 		    (cmd_Lxcache->Lxcache_bit == (uint16_t)bit) &&
1046 		    (cmd_Lxcache->Lxcache_reason & reason) &&
1047 		    (cmd_Lxcache->Lxcache_type == pstype)) {
1048 			return (cmd_Lxcache);
1049 		}
1050 	}
1051 	return (NULL);
1052 }
1053 
1054 void
cmd_Lxcache_destroy_anonymous_serd_engines(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_ptrsubtype_t type,int32_t index,int16_t bit)1055 cmd_Lxcache_destroy_anonymous_serd_engines(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1056     cmd_ptrsubtype_t type, int32_t index, int16_t bit)
1057 {
1058 	cmd_Lxcache_t *cmd_Lxcache;
1059 	cmd_case_t *cc;
1060 
1061 	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
1062 	    cmd_Lxcache != NULL;
1063 	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
1064 		if ((cmd_Lxcache->Lxcache_type == type) &&
1065 		    (cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
1066 		    (cmd_Lxcache->Lxcache_bit == (uint16_t)bit) &&
1067 		    (cmd_Lxcache->Lxcache_way == (uint32_t)CMD_ANON_WAY)) {
1068 			cc = &cmd_Lxcache->Lxcache_case;
1069 			if (cc == NULL)
1070 				continue;
1071 			if (cc->cc_serdnm != NULL) {
1072 				if (fmd_serd_exists(hdl, cc->cc_serdnm)) {
1073 					fmd_hdl_debug(hdl,
1074 					    "\n%s:cpu_id %d destroying SERD"
1075 					    " engine %s\n",
1076 					    cmd_type_to_str(type),
1077 					    cpu->cpu_cpuid, cc->cc_serdnm);
1078 					fmd_serd_destroy(hdl, cc->cc_serdnm);
1079 				}
1080 				fmd_hdl_strfree(hdl, cc->cc_serdnm);
1081 				cc->cc_serdnm = NULL;
1082 			}
1083 		}
1084 	}
1085 }
1086 
1087 ssize_t
cmd_fmri_nvl2str(fmd_hdl_t * hdl,nvlist_t * nvl,char * buf,size_t buflen)1088 cmd_fmri_nvl2str(fmd_hdl_t *hdl, nvlist_t *nvl, char *buf, size_t buflen)
1089 {
1090 	uint8_t type;
1091 	uint32_t cpuid, way;
1092 	uint32_t	index;
1093 	uint16_t	bit;
1094 	char *serstr = NULL;
1095 	char	missing_list[128];
1096 
1097 	missing_list[0] = 0;
1098 	if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0)
1099 		(void) strcat(missing_list, FM_FMRI_CPU_ID);
1100 	if (nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, &serstr) != 0)
1101 		(void) strcat(missing_list, FM_FMRI_CPU_SERIAL_ID);
1102 	if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_INDEX, &index) != 0)
1103 		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_INDEX);
1104 	if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_WAY, &way) != 0)
1105 		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_WAY);
1106 	if (nvlist_lookup_uint16(nvl, FM_FMRI_CPU_CACHE_BIT, &bit) != 0)
1107 		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_BIT);
1108 	if (nvlist_lookup_uint8(nvl, FM_FMRI_CPU_CACHE_TYPE, &type) != 0)
1109 		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_TYPE);
1110 
1111 	if (strlen(missing_list) != 0) {
1112 		fmd_hdl_debug(hdl,
1113 		    "\ncmd_fmri_nvl2str: missing %s in fmri\n",
1114 		    missing_list);
1115 		return (-1);
1116 	}
1117 
1118 	return (snprintf(buf, buflen,
1119 	    "cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%d/%s=%d",
1120 	    FM_FMRI_CPU_ID, cpuid,
1121 	    FM_FMRI_CPU_SERIAL_ID, serstr,
1122 	    FM_FMRI_CPU_CACHE_INDEX, index,
1123 	    FM_FMRI_CPU_CACHE_WAY, way,
1124 	    FM_FMRI_CPU_CACHE_BIT, bit,
1125 	    FM_FMRI_CPU_CACHE_TYPE, type));
1126 }
1127 
1128 boolean_t
cmd_create_case_for_Lxcache(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * cmd_Lxcache)1129 cmd_create_case_for_Lxcache(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1130     cmd_Lxcache_t *cmd_Lxcache)
1131 {
1132 	const char *fltnm;
1133 	const char *uuid;
1134 
1135 	if (cmd_Lxcache->Lxcache_case.cc_cp != NULL)
1136 		return (B_TRUE);
1137 	cmd_Lxcache->Lxcache_case.cc_cp = cmd_case_create(hdl,
1138 	    &cmd_Lxcache->Lxcache_header, CMD_PTR_LxCACHE_CASE,
1139 	    &uuid);
1140 	fltnm = cmd_type_to_str(cmd_Lxcache->Lxcache_type);
1141 	if (cmd_Lxcache->Lxcache_case.cc_cp == NULL) {
1142 		fmd_hdl_debug(hdl,
1143 		    "\n%s:cpu_id %d:Failed to create a case for"
1144 		    " index %d way %d bit %d\n",
1145 		    fltnm, cpu->cpu_cpuid,
1146 		    cmd_Lxcache->Lxcache_index,
1147 		    cmd_Lxcache->Lxcache_way, cmd_Lxcache->Lxcache_bit);
1148 		return (B_FALSE);
1149 	}
1150 	fmd_hdl_debug(hdl,
1151 	    "\n%s:cpu_id %d: New case %s created.\n",
1152 	    fltnm, cpu->cpu_cpuid, uuid);
1153 	if (cmd_Lxcache->Lxcache_ep)
1154 		fmd_case_add_ereport(hdl, cmd_Lxcache->Lxcache_case.cc_cp,
1155 		    cmd_Lxcache->Lxcache_ep);
1156 	return (B_TRUE);
1157 }
1158 
1159 static int
cmd_repair_fmri(fmd_hdl_t * hdl,char * buf)1160 cmd_repair_fmri(fmd_hdl_t *hdl, char *buf)
1161 {
1162 	int err;
1163 
1164 	err = fmd_repair_asru(hdl, buf);
1165 	if (err) {
1166 		fmd_hdl_debug(hdl,
1167 		    "Failed to repair %s err = %d\n", buf, err);
1168 	}
1169 	return (err);
1170 }
1171 
1172 boolean_t
cmd_Lxcache_unretire(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * unretire_this_Lxcache,const char * fltnm)1173 cmd_Lxcache_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1174     cmd_Lxcache_t *unretire_this_Lxcache, const char *fltnm)
1175 {
1176 	cmd_ptrsubtype_t data_type;
1177 	cmd_Lxcache_t *previously_retired_Lxcache;
1178 	int	found_reretired_cacheline = 0;
1179 	int	certainty;
1180 
1181 	/*
1182 	 * If we are unretiring a cacheline retired due to suspected TAG
1183 	 * fault, then we must first check if we are using a cacheline
1184 	 * that was retired earlier for DATA fault.
1185 	 * If so we will not unretire the cacheline.
1186 	 * We will change the flags to reflect the current condition.
1187 	 * We will return success, though.
1188 	 */
1189 	if (IS_TAG(unretire_this_Lxcache->Lxcache_type)) {
1190 		if (unretire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
1191 			data_type = CMD_PTR_CPU_L2DATA;
1192 		if (unretire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
1193 			data_type = CMD_PTR_CPU_L3DATA;
1194 		fmd_hdl_debug(hdl,
1195 		    "\n%s:cpuid %d checking if there is a %s"
1196 		    " cacheline re-retired at this index %d and way %d\n",
1197 		    fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
1198 		    unretire_this_Lxcache->Lxcache_index,
1199 		    unretire_this_Lxcache->Lxcache_way);
1200 		previously_retired_Lxcache =
1201 		    cmd_Lxcache_lookup_by_type_index_way_flags(
1202 		    cpu, data_type, unretire_this_Lxcache->Lxcache_index,
1203 		    unretire_this_Lxcache->Lxcache_way,
1204 		    CMD_LxCACHE_F_RERETIRED);
1205 		if (previously_retired_Lxcache) {
1206 			fmd_hdl_debug(hdl,
1207 			    "\n%s:cpuid %d Found a %s cacheline re-retired at"
1208 			    " this index %d and way %d. Will mark this"
1209 			    " RETIRED\n",
1210 			    fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
1211 			    unretire_this_Lxcache->Lxcache_index,
1212 			    unretire_this_Lxcache->Lxcache_way);
1213 			/*
1214 			 * We call the cmd_Lxcache_fault to inform fmd
1215 			 * about the suspect fmri. The cacheline is already
1216 			 * retired but the existing suspect fmri is for TAG
1217 			 * fault which will be removed in this routine.
1218 			 */
1219 			if (previously_retired_Lxcache->Lxcache_reason
1220 			    == CMD_LXCONVICTED)
1221 				certainty = HUNDRED_PERCENT;
1222 			else
1223 				certainty = SUSPECT_PERCENT;
1224 			cmd_Lxcache_fault(hdl, cpu, previously_retired_Lxcache,
1225 			    fltnm, cpu->cpu_fru_nvl, certainty);
1226 			previously_retired_Lxcache->Lxcache_flags =
1227 			    CMD_LxCACHE_F_RETIRED;
1228 			/*
1229 			 * Update persistent storage
1230 			 */
1231 			cmd_Lxcache_write(hdl, previously_retired_Lxcache);
1232 			found_reretired_cacheline = 1;
1233 		}
1234 	} else {
1235 		/*
1236 		 * We have been called to unretire a cacheline retired
1237 		 * earlier due to DATA errors.
1238 		 * If this cacheline is marked RERETIRED then it means that
1239 		 * the cacheline has been retired due to TAG errors and
1240 		 * we should not be unretiring the cacheline.
1241 		 */
1242 		if (unretire_this_Lxcache->Lxcache_flags &
1243 		    CMD_LxCACHE_F_RERETIRED) {
1244 			fmd_hdl_debug(hdl,
1245 			    "\n%s:cpuid %d The cacheline at index %d and"
1246 			    " way %d  which we are attempting to unretire"
1247 			    " is in RERETIRED state. Therefore we will not"
1248 			    " unretire it but will mark it as RETIRED.\n",
1249 			    fltnm, cpu->cpu_cpuid,
1250 			    unretire_this_Lxcache->Lxcache_index,
1251 			    unretire_this_Lxcache->Lxcache_way);
1252 			found_reretired_cacheline = 1;
1253 		}
1254 	}
1255 	/*
1256 	 * if we did not find a RERETIRED cacheline above
1257 	 * unretire the cacheline.
1258 	 */
1259 	if (!found_reretired_cacheline) {
1260 		if (cmd_cache_way_unretire(hdl, cpu, unretire_this_Lxcache)
1261 		    == B_FALSE)
1262 			return (B_FALSE);
1263 	}
1264 	unretire_this_Lxcache->Lxcache_flags = CMD_LxCACHE_F_UNRETIRED;
1265 	/*
1266 	 * We have exonerated the cacheline. We need to inform the fmd
1267 	 * that we have repaired the suspect fmri that we retired earlier.
1268 	 * The cpumem agent will not unretire cacheline in response to
1269 	 * the list.repair events it receives.
1270 	 */
1271 	if (unretire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
1272 		fmd_hdl_debug(hdl,
1273 		    "\n%s:cpuid %d Repairing the retired fmri %s",
1274 		    fltnm, cpu->cpu_cpuid,
1275 		    unretire_this_Lxcache->Lxcache_retired_fmri);
1276 		if (cmd_repair_fmri(hdl,
1277 		    unretire_this_Lxcache->Lxcache_retired_fmri) != 0) {
1278 			fmd_hdl_debug(hdl,
1279 			    "\n%s:cpuid %d Failed to repair retired fmri.",
1280 			    fltnm, cpu->cpu_cpuid);
1281 			/*
1282 			 * We need to retire the cacheline that we just
1283 			 * unretired.
1284 			 */
1285 			if (cmd_cache_way_retire(hdl, cpu,
1286 			    unretire_this_Lxcache) == B_FALSE) {
1287 				/*
1288 				 * A hopeless situation.
1289 				 * cannot maintain consistency of cacheline
1290 				 * sate between fmd and DE.
1291 				 * Aborting the DE.
1292 				 */
1293 				fmd_hdl_abort(hdl,
1294 				    "\n%s:cpuid %d We are unable to repair"
1295 				    " the fmri we just unretired and are"
1296 				    " unable to restore the DE and fmd to"
1297 				    " a sane state.\n",
1298 				    fltnm, cpu->cpu_cpuid);
1299 			}
1300 			return (B_FALSE);
1301 		} else {
1302 			unretire_this_Lxcache->Lxcache_retired_fmri[0] = 0;
1303 		}
1304 	}
1305 	return (B_TRUE);
1306 }
1307 
1308 boolean_t
cmd_Lxcache_retire(fmd_hdl_t * hdl,cmd_cpu_t * cpu,cmd_Lxcache_t * retire_this_Lxcache,const char * fltnm,uint_t cert)1309 cmd_Lxcache_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1310     cmd_Lxcache_t *retire_this_Lxcache, const char *fltnm, uint_t cert)
1311 {
1312 	cmd_Lxcache_t *previously_retired_Lxcache;
1313 	cmd_ptrsubtype_t data_type;
1314 	const char	*uuid;
1315 	char	suspect_list[128];
1316 
1317 	fmd_hdl_debug(hdl,
1318 	    "\n%s:cpu_id %d: cmd_Lxcache_retire called for index %d"
1319 	    " way %d bit %d\n",
1320 	    fltnm, cpu->cpu_cpuid, retire_this_Lxcache->Lxcache_index,
1321 	    retire_this_Lxcache->Lxcache_way, retire_this_Lxcache->Lxcache_bit);
1322 	if (fmd_case_solved(hdl, retire_this_Lxcache->Lxcache_case.cc_cp)) {
1323 		/*
1324 		 * Case solved implies that the cache line is already
1325 		 * retired as SUSPECT_0_TAG and we are here to retire this
1326 		 * as SUSPECT_1_TAG.
1327 		 * We will first repair the retired cacheline
1328 		 * so that it does not get retired during replay for
1329 		 *  wrong reason.
1330 		 * If we are able to repair the retired cacheline we close the
1331 		 * case and open a new case for it.
1332 		 */
1333 		if (retire_this_Lxcache->Lxcache_reason !=
1334 		    CMD_LXSUSPECT_0_TAG) {
1335 			fmd_hdl_debug(hdl,
1336 			    "\n%s:cpu_id %d: Unexpected condition encountered."
1337 			    " Expected the reason for retirement as"
1338 			    " SUSPECT_0_TAG however found the reason"
1339 			    " to be %s\n",
1340 			    fltnm, cpu->cpu_cpuid,
1341 			    cmd_reason_to_str(
1342 			    retire_this_Lxcache->Lxcache_reason));
1343 			return (B_FALSE);
1344 		}
1345 		fmd_hdl_debug(hdl,
1346 		    "\n%s:cpu_id %d: We are re-retiring SUSPECT_0_TAG as"
1347 		    " SUSPECT_1_TAG index %d way %d bit %d\n",
1348 		    fltnm, cpu->cpu_cpuid,
1349 		    retire_this_Lxcache->Lxcache_index,
1350 		    retire_this_Lxcache->Lxcache_way,
1351 		    retire_this_Lxcache->Lxcache_bit);
1352 		fmd_hdl_debug(hdl,
1353 		    "\n%s:cpu_id %d: The existing case for this Lxcache has"
1354 		    " has been already solved. We will first repair the suspect"
1355 		    " cacheline and if we are successful then close this case,"
1356 		    " and open a new case.\n",
1357 		    fltnm, cpu->cpu_cpuid);
1358 		/*
1359 		 * repair the retired cacheline.
1360 		 */
1361 		if (retire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
1362 			fmd_hdl_debug(hdl,
1363 			    "\n%s:cpuid %d Repairing the retired suspect"
1364 			    " cacheline %s\n",
1365 			    fltnm, cpu->cpu_cpuid,
1366 			    retire_this_Lxcache->Lxcache_retired_fmri);
1367 			if (cmd_repair_fmri(hdl,
1368 			    retire_this_Lxcache->Lxcache_retired_fmri) != 0) {
1369 				fmd_hdl_debug(hdl,
1370 				    "\n%s:cpuid %d Failed to repair the"
1371 				    " retired fmri.",
1372 				    fltnm, cpu->cpu_cpuid);
1373 				return (B_FALSE);
1374 			} else {
1375 				retire_this_Lxcache->Lxcache_retired_fmri[0] =
1376 				    0;
1377 			}
1378 		}
1379 		uuid = fmd_case_uuid(hdl,
1380 		    retire_this_Lxcache->Lxcache_case.cc_cp);
1381 		fmd_hdl_debug(hdl,
1382 		    "\n%s:cpuid %d: Closing the case %s\n",
1383 		    fltnm, cpu->cpu_cpuid, uuid);
1384 		cmd_case_fini(hdl, retire_this_Lxcache->Lxcache_case.cc_cp,
1385 		    FMD_B_TRUE);
1386 		retire_this_Lxcache->Lxcache_case.cc_cp = NULL;
1387 		if (cmd_create_case_for_Lxcache(hdl, cpu, retire_this_Lxcache)
1388 		    == B_FALSE)
1389 			return (B_FALSE);
1390 	} else {
1391 		/*
1392 		 * Not a SUSPECT_0_TAG.
1393 		 * We should be entering this path if the cacheline is
1394 		 * transitioning  from ACTIVE/UNRETIRED to RETIRED state.
1395 		 * If the cacheline state is not as expected we print debug
1396 		 * message and return failure.
1397 		 */
1398 		if ((retire_this_Lxcache->Lxcache_flags !=
1399 		    CMD_LxCACHE_F_ACTIVE) &&
1400 		    (retire_this_Lxcache->Lxcache_flags
1401 		    != CMD_LxCACHE_F_UNRETIRED)) {
1402 			/*
1403 			 * Unexpected condition.
1404 			 */
1405 			fmd_hdl_debug(hdl,
1406 			    "\n%s:cpu_id %d:Unexpected state %s for the"
1407 			    " cacheline at index %d way %d encountered.\n",
1408 			    fltnm, cpu->cpu_cpuid,
1409 			    cmd_flags_to_str(
1410 			    retire_this_Lxcache->Lxcache_flags),
1411 			    retire_this_Lxcache->Lxcache_index,
1412 			    retire_this_Lxcache->Lxcache_way);
1413 			return (B_FALSE);
1414 		}
1415 	}
1416 	suspect_list[0] = 0;
1417 	(void) cmd_fmri_nvl2str(hdl, retire_this_Lxcache->Lxcache_asru.fmri_nvl,
1418 	    suspect_list, sizeof (suspect_list));
1419 	fmd_hdl_debug(hdl,
1420 	    "\n%s:cpu_id %d:current suspect list is %s\n",
1421 	    fltnm, cpu->cpu_cpuid, suspect_list);
1422 	cmd_Lxcache_fault(hdl, cpu, retire_this_Lxcache, fltnm,
1423 	    cpu->cpu_fru_nvl,
1424 	    cert);
1425 	retire_this_Lxcache->Lxcache_flags = CMD_LxCACHE_F_RETIRED;
1426 	if (IS_TAG(retire_this_Lxcache->Lxcache_type)) {
1427 		/*
1428 		 * If the cacheline we just retired was retired earlier
1429 		 * due to DATA faults we mark the Lxcache
1430 		 * corresponding to DATA as RERETIRED.
1431 		 */
1432 		if (retire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
1433 			data_type = CMD_PTR_CPU_L2DATA;
1434 		if (retire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
1435 			data_type = CMD_PTR_CPU_L3DATA;
1436 		fmd_hdl_debug(hdl,
1437 		    "\n%s:cpuid %d checking if there is a %s"
1438 		    " cacheline retired at this index %d way %d\n",
1439 		    fltnm, cpu->cpu_cpuid,
1440 		    cmd_type_to_str(data_type),
1441 		    retire_this_Lxcache->Lxcache_index,
1442 		    retire_this_Lxcache->Lxcache_way);
1443 		previously_retired_Lxcache =
1444 		    cmd_Lxcache_lookup_by_type_index_way_flags(cpu,
1445 		    data_type, retire_this_Lxcache->Lxcache_index,
1446 		    retire_this_Lxcache->Lxcache_way, CMD_LxCACHE_F_RETIRED);
1447 		if (previously_retired_Lxcache) {
1448 			fmd_hdl_debug(hdl,
1449 			    "\n%s:cpu_id %d: Found  index %d way %d"
1450 			    " retired earlier. Will mark this Lxcache"
1451 			    " as RERETIRED.\n",
1452 			    fltnm, cpu->cpu_cpuid,
1453 			    retire_this_Lxcache->Lxcache_index,
1454 			    retire_this_Lxcache->Lxcache_way);
1455 			/*
1456 			 * First repair the retired cacheline and if successful
1457 			 * close the existing case and create a new case.
1458 			 */
1459 
1460 			/*
1461 			 * This cacheline has already been retired for
1462 			 * TAG fault.
1463 			 * Repair the previously retired DATA fault cacheline so
1464 			 * that it does not get retired by fmd during replay.
1465 			 */
1466 			if (previously_retired_Lxcache->Lxcache_retired_fmri[0]
1467 			    != 0) {
1468 				fmd_hdl_debug(hdl,
1469 				    "\n%s:cpuid %d Repairing the cacheline"
1470 				    " retired due to data errors. %s\n",
1471 				    fltnm, cpu->cpu_cpuid,
1472 				    previously_retired_Lxcache->
1473 				    Lxcache_retired_fmri);
1474 				if (cmd_repair_fmri(hdl,
1475 				    previously_retired_Lxcache->
1476 				    Lxcache_retired_fmri)
1477 				    != 0) {
1478 					fmd_hdl_debug(hdl,
1479 					    "\n%s:cpuid %d Failed to repair the"
1480 					    " retired fmri.",
1481 					    fltnm, cpu->cpu_cpuid);
1482 					return (B_FALSE);
1483 				} else {
1484 					previously_retired_Lxcache->
1485 					    Lxcache_retired_fmri[0] = 0;
1486 				}
1487 			}
1488 			cmd_case_fini(hdl,
1489 			    previously_retired_Lxcache->Lxcache_case.cc_cp,
1490 			    FMD_B_TRUE);
1491 			previously_retired_Lxcache->Lxcache_case.cc_cp = NULL;
1492 			previously_retired_Lxcache->Lxcache_flags =
1493 			    CMD_LxCACHE_F_RERETIRED;
1494 			/*
1495 			 * Update persistent storage
1496 			 */
1497 			cmd_Lxcache_write(hdl, previously_retired_Lxcache);
1498 			/*
1499 			 * Create a new case so that this Lxcache structure
1500 			 * gets restored on replay.
1501 			 */
1502 			if (cmd_create_case_for_Lxcache(hdl, cpu,
1503 			    previously_retired_Lxcache) == B_FALSE)
1504 				return (B_FALSE);
1505 		}
1506 	}
1507 	cmd_retire_cpu_if_limits_exceeded(hdl, cpu,
1508 	    retire_this_Lxcache->Lxcache_type,
1509 	    fltnm);
1510 	return (B_TRUE);
1511 }
1512