xref: /illumos-gate/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c (revision 2aeafac3612e19716bf8164f89c3c9196342979c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <cma.h>
27 
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <strings.h>
31 #include <errno.h>
32 #include <time.h>
33 #include <fm/fmd_api.h>
34 #include <sys/fm/protocol.h>
35 #include <sys/systeminfo.h>
36 #include <sys/utsname.h>
37 
38 #ifdef sun4v
39 #include <sys/fm/ldom.h>
40 
41 static fmd_hdl_t *init_hdl;
42 ldom_hdl_t *cma_lhp;
43 #endif
44 
45 #ifdef i386
46 boolean_t cma_is_native;
47 #endif
48 
49 extern const char *fmd_fmri_get_platform();
50 
51 cma_t cma;
52 
53 cma_stats_t cma_stats = {
54 	{ "cpu_flts", FMD_TYPE_UINT64, "cpu faults resolved" },
55 	{ "cpu_repairs", FMD_TYPE_UINT64, "cpu faults repaired" },
56 	{ "cpu_fails", FMD_TYPE_UINT64, "cpu faults unresolveable" },
57 	{ "cpu_blfails", FMD_TYPE_UINT64, "failed cpu blacklists" },
58 	{ "cpu_supp", FMD_TYPE_UINT64, "cpu offlines suppressed" },
59 	{ "cpu_blsupp", FMD_TYPE_UINT64, "cpu blacklists suppressed" },
60 	{ "page_flts", FMD_TYPE_UINT64, "page faults resolved" },
61 	{ "page_repairs", FMD_TYPE_UINT64, "page faults repaired" },
62 	{ "page_fails", FMD_TYPE_UINT64, "page faults unresolveable" },
63 	{ "page_supp", FMD_TYPE_UINT64, "page retires suppressed" },
64 	{ "page_nonent", FMD_TYPE_UINT64, "retires for non-existent fmris" },
65 	{ "bad_flts", FMD_TYPE_UINT64, "invalid fault events received" },
66 	{ "nop_flts", FMD_TYPE_UINT64, "inapplicable fault events received" },
67 	{ "auto_flts", FMD_TYPE_UINT64, "auto-close faults received" }
68 };
69 
70 typedef struct cma_subscriber {
71 	const char *subr_class;
72 	const char *subr_sname;
73 	uint_t subr_svers;
74 	int (*subr_func)(fmd_hdl_t *, nvlist_t *, nvlist_t *, const char *,
75 	    boolean_t);
76 } cma_subscriber_t;
77 
78 static const cma_subscriber_t cma_subrs[] = {
79 #if defined(i386)
80 	/*
81 	 * On x86, the ASRUs are expected to be in hc scheme.  When
82 	 * cpumem-retire wants to retire a cpu or mem page, it calls the
83 	 * methods registered in the topo node to do that.  The topo
84 	 * enumerator, which necessarily knows all the config info that
85 	 * we'd ever need in deciding what/how to retire etc.  This takes
86 	 * away much of that complexity from the agent into the entity
87 	 * that knows all config/topo information.
88 	 */
89 	{ "fault.memory.page", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
90 	    cma_page_retire },
91 	{ "fault.memory.page_sb", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
92 	    cma_page_retire },
93 	{ "fault.memory.page_ck", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
94 	    cma_page_retire },
95 	{ "fault.memory.page_ue", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
96 	    cma_page_retire },
97 	{ "fault.memory.generic-x86.page_ce", FM_FMRI_SCHEME_HC,
98 	    FM_HC_SCHEME_VERSION, cma_page_retire },
99 	{ "fault.memory.generic-x86.page_ue", FM_FMRI_SCHEME_HC,
100 	    FM_HC_SCHEME_VERSION, cma_page_retire },
101 	{ "fault.memory.intel.page_ce", FM_FMRI_SCHEME_HC,
102 	    FM_HC_SCHEME_VERSION, cma_page_retire },
103 	{ "fault.memory.intel.page_ue", FM_FMRI_SCHEME_HC,
104 	    FM_HC_SCHEME_VERSION, cma_page_retire },
105 	{ "fault.memory.dimm", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
106 	    NULL },
107 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
108 	    NULL },
109 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
110 	    NULL },
111 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
112 	    NULL },
113 	{ "fault.memory.generic-x86.dimm_ce", FM_FMRI_SCHEME_HC,
114 	    FM_HC_SCHEME_VERSION, NULL },
115 	{ "fault.memory.generic-x86.dimm_ue", FM_FMRI_SCHEME_HC,
116 	    FM_HC_SCHEME_VERSION, NULL },
117 	{ "fault.memory.intel.dimm_ce", FM_FMRI_SCHEME_HC,
118 	    FM_HC_SCHEME_VERSION, NULL },
119 	{ "fault.memory.intel.dimm_ue", FM_FMRI_SCHEME_HC,
120 	    FM_HC_SCHEME_VERSION, NULL },
121 	{ "fault.memory.intel.fbd.*", FM_FMRI_SCHEME_HC,
122 	    FM_HC_SCHEME_VERSION, NULL },
123 	{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_HC,
124 	    FM_HC_SCHEME_VERSION, NULL },
125 	{ "fault.memory.bank", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
126 	    NULL },
127 	{ "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
128 	    NULL },
129 	{ "fault.cpu.intel.quickpath.mem_scrubbing", FM_FMRI_SCHEME_HC,
130 	    FM_HC_SCHEME_VERSION, cma_page_retire },
131 	{ "fault.cpu.intel.quickpath.*", FM_FMRI_SCHEME_HC,
132 	    FM_HC_SCHEME_VERSION, NULL },
133 	{ "fault.cpu.generic-x86.mc", FM_FMRI_SCHEME_HC,
134 	    FM_HC_SCHEME_VERSION, NULL },
135 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_HC,
136 	    FM_HC_SCHEME_VERSION, NULL },
137 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_CPU,
138 	    FM_CPU_SCHEME_VERSION, NULL },
139 
140 	/*
141 	 * The ASRU for cpu faults are in cpu scheme on native and in hc
142 	 * scheme on xpv.  So each cpu fault class needs to be listed twice.
143 	 */
144 
145 	/*
146 	 * The following faults do NOT retire a cpu thread,
147 	 * and therefore must be intercepted before
148 	 * the default "fault.cpu.*" dispatch to cma_cpu_hc_retire.
149 	 */
150 	{ "fault.cpu.amd.dramchannel", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
151 	    NULL },
152 	{ "fault.cpu.amd.dramchannel", FM_FMRI_SCHEME_CPU,
153 	    FM_CPU_SCHEME_VERSION, NULL },
154 	{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_HC,
155 	    FM_HC_SCHEME_VERSION, NULL },
156 	{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
157 	    FM_CPU_SCHEME_VERSION, NULL },
158 	{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_HC,
159 	    FM_HC_SCHEME_VERSION, NULL },
160 	{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
161 	    FM_CPU_SCHEME_VERSION, NULL },
162 	{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_HC,
163 	    FM_HC_SCHEME_VERSION, NULL },
164 	{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_CPU,
165 	    FM_CPU_SCHEME_VERSION, NULL },
166 	{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_HC,
167 	    FM_HC_SCHEME_VERSION, NULL },
168 	{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
169 	    FM_CPU_SCHEME_VERSION, NULL },
170 	{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_HC,
171 	    FM_HC_SCHEME_VERSION, NULL },
172 	{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
173 	    FM_CPU_SCHEME_VERSION, NULL },
174 	{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_HC,
175 	    FM_HC_SCHEME_VERSION, NULL },
176 	{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_CPU,
177 	    FM_CPU_SCHEME_VERSION, NULL },
178 	{ "fault.cpu.intel.nb.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
179 	    NULL },
180 	{ "fault.cpu.intel.nb.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
181 	    NULL },
182 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
183 	    NULL },
184 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
185 	    NULL },
186 	{ "fault.cpu.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
187 	    cma_cpu_hc_retire },
188 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
189 	    cma_cpu_hc_retire },
190 #elif defined(sun4v)
191 	/*
192 	 * The following are PI sun4v faults
193 	 */
194 	{ "fault.memory.memlink", FM_FMRI_SCHEME_HC,
195 	    FM_HC_SCHEME_VERSION, NULL },
196 	{ "fault.memory.memlink-uc", FM_FMRI_SCHEME_HC,
197 	    FM_HC_SCHEME_VERSION, NULL },
198 	{ "fault.memory.memlink-failover", FM_FMRI_SCHEME_HC,
199 	    FM_HC_SCHEME_VERSION, NULL },
200 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_HC,
201 	    FM_HC_SCHEME_VERSION, NULL },
202 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_HC,
203 	    FM_HC_SCHEME_VERSION, NULL },
204 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_HC,
205 	    FM_HC_SCHEME_VERSION, NULL },
206 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
207 	    cma_page_retire },
208 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
209 	    NULL },
210 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
211 	    NULL },
212 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
213 	    NULL },
214 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
215 	    NULL },
216 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
217 	    FM_MEM_SCHEME_VERSION, NULL },
218 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
219 	    FM_MEM_SCHEME_VERSION, NULL },
220 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
221 	    FM_MEM_SCHEME_VERSION, NULL },
222 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
223 	    NULL },
224 	{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
225 	    NULL },
226 	{ "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
227 	    NULL },
228 	{ "fault.memory.link-c", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
229 	    NULL },
230 	{ "fault.memory.link-u", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
231 	    NULL },
232 	{ "fault.memory.link-f", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
233 	    NULL },
234 	{ "fault.memory.link-c", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
235 	    NULL },
236 	{ "fault.memory.link-u", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
237 	    NULL },
238 	{ "fault.memory.link-f", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
239 	    NULL },
240 
241 	/*
242 	 * The following ultraSPARC-T1/T2 faults do NOT retire a cpu thread,
243 	 * and therefore must be intercepted before
244 	 * the default "fault.cpu.*" dispatch to cma_cpu_hc_retire.
245 	 */
246 	{ "fault.cpu.*.l2cachedata", FM_FMRI_SCHEME_CPU,
247 	    FM_CPU_SCHEME_VERSION, NULL },
248 	{ "fault.cpu.*.l2cachetag", FM_FMRI_SCHEME_CPU,
249 	    FM_CPU_SCHEME_VERSION, NULL },
250 	{ "fault.cpu.*.l2cachectl", FM_FMRI_SCHEME_CPU,
251 	    FM_CPU_SCHEME_VERSION, NULL },
252 	{ "fault.cpu.*.l2data-c", FM_FMRI_SCHEME_CPU,
253 	    FM_CPU_SCHEME_VERSION, NULL },
254 	{ "fault.cpu.*.l2data-u", FM_FMRI_SCHEME_CPU,
255 	    FM_CPU_SCHEME_VERSION, NULL },
256 	{ "fault.cpu.*.mau", FM_FMRI_SCHEME_CPU,
257 	    FM_CPU_SCHEME_VERSION, NULL },
258 	{ "fault.cpu.*.lfu-u", FM_FMRI_SCHEME_CPU,
259 	    FM_CPU_SCHEME_VERSION, NULL },
260 	{ "fault.cpu.*.lfu-f", FM_FMRI_SCHEME_CPU,
261 	    FM_CPU_SCHEME_VERSION, NULL },
262 	{ "fault.cpu.*.lfu-p", FM_FMRI_SCHEME_CPU,
263 	    FM_CPU_SCHEME_VERSION, NULL },
264 	{ "fault.cpu.ultraSPARC-T1.freg", FM_FMRI_SCHEME_CPU,
265 	    FM_CPU_SCHEME_VERSION, NULL },
266 	{ "fault.cpu.ultraSPARC-T1.l2cachedata", FM_FMRI_SCHEME_CPU,
267 	    FM_CPU_SCHEME_VERSION, NULL },
268 	{ "fault.cpu.ultraSPARC-T1.l2cachetag", FM_FMRI_SCHEME_CPU,
269 	    FM_CPU_SCHEME_VERSION, NULL },
270 	{ "fault.cpu.ultraSPARC-T1.l2cachectl", FM_FMRI_SCHEME_CPU,
271 	    FM_CPU_SCHEME_VERSION, NULL },
272 	{ "fault.cpu.ultraSPARC-T1.mau", FM_FMRI_SCHEME_CPU,
273 	    FM_CPU_SCHEME_VERSION, NULL },
274 	{ "fault.cpu.ultraSPARC-T2plus.chip", FM_FMRI_SCHEME_HC,
275 	    FM_HC_SCHEME_VERSION, NULL },
276 	{ "fault.cpu.*", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
277 	    cma_cpu_hc_retire },
278 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
279 	    cma_cpu_hc_retire },
280 #elif defined(opl)
281 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
282 	    cma_page_retire },
283 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
284 	    NULL },
285 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
286 	    FM_MEM_SCHEME_VERSION, NULL },
287 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
288 	    FM_MEM_SCHEME_VERSION, NULL },
289 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
290 	    FM_MEM_SCHEME_VERSION, NULL },
291 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
292 	    NULL },
293 	{ "fault.cpu.SPARC64-VI.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
294 	    cma_cpu_cpu_retire },
295 	{ "fault.cpu.SPARC64-VII.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
296 	    cma_cpu_cpu_retire },
297 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se",
298 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
299 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se-offlinereq",
300 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
301 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce",
302 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
303 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce-offlinereq",
304 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
305 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se",
306 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
307 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se-offlinereq",
308 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
309 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce",
310 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
311 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce-offlinereq",
312 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
313 #else
314 	/*
315 	 * For platforms excluding i386, sun4v and opl.
316 	 */
317 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
318 	    cma_page_retire },
319 	{ "fault.memory.page_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
320 	    cma_page_retire },
321 	{ "fault.memory.page_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
322 	    cma_page_retire },
323 	{ "fault.memory.page_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
324 	    cma_page_retire },
325 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
326 	    NULL },
327 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
328 	    NULL },
329 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
330 	    NULL },
331 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
332 	    NULL },
333 	{ "fault.memory.dimm-page-retires-excessive", FM_FMRI_SCHEME_MEM,
334 	    FM_MEM_SCHEME_VERSION, NULL },
335 	{ "fault.memory.dimm-ue-imminent", FM_FMRI_SCHEME_MEM,
336 	    FM_MEM_SCHEME_VERSION, NULL },
337 	{ "fault.memory.dram-ue-imminent", FM_FMRI_SCHEME_MEM,
338 	    FM_MEM_SCHEME_VERSION, NULL },
339 	{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_MEM,
340 	    FM_MEM_SCHEME_VERSION, NULL },
341 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
342 	    NULL },
343 	{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
344 	    NULL },
345 	{ "fault.memory.datapath", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
346 	    NULL },
347 	{ "fault.memory.datapath", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
348 	    NULL },
349 
350 	/*
351 	 * The following faults do NOT retire a cpu thread,
352 	 * and therefore must be intercepted before
353 	 * the default "fault.cpu.*" dispatch to cma_cpu_cpu_retire.
354 	 */
355 	{ "fault.cpu.ultraSPARC-IVplus.l2cachedata-line",
356 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
357 	    cma_cache_way_retire },
358 	{ "fault.cpu.ultraSPARC-IVplus.l3cachedata-line",
359 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
360 	    cma_cache_way_retire },
361 	{ "fault.cpu.ultraSPARC-IVplus.l2cachetag-line",
362 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
363 	    cma_cache_way_retire },
364 	{ "fault.cpu.ultraSPARC-IVplus.l3cachetag-line",
365 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
366 	    cma_cache_way_retire },
367 
368 	/*
369 	 * Default "fault.cpu.*" for "cpu" scheme ASRU dispatch.
370 	 */
371 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
372 	    cma_cpu_cpu_retire },
373 #endif
374 	{ NULL, NULL, 0, NULL }
375 };
376 
377 static const cma_subscriber_t *
378 nvl2subr(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t **asrup)
379 {
380 	const cma_subscriber_t *sp;
381 	nvlist_t *asru;
382 	char *scheme;
383 	uint8_t version;
384 	boolean_t retire;
385 
386 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_RETIRE, &retire) == 0 &&
387 	    retire == 0) {
388 		fmd_hdl_debug(hdl, "cma_recv: retire suppressed");
389 		return (NULL);
390 	}
391 
392 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) != 0 ||
393 	    nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
394 	    nvlist_lookup_uint8(asru, FM_VERSION, &version) != 0) {
395 		cma_stats.bad_flts.fmds_value.ui64++;
396 		return (NULL);
397 	}
398 
399 	for (sp = cma_subrs; sp->subr_class != NULL; sp++) {
400 		if (fmd_nvl_class_match(hdl, nvl, sp->subr_class) &&
401 		    strcmp(scheme, sp->subr_sname) == 0 &&
402 		    version <= sp->subr_svers) {
403 			*asrup = asru;
404 			return (sp);
405 		}
406 	}
407 
408 	cma_stats.nop_flts.fmds_value.ui64++;
409 	return (NULL);
410 }
411 
412 static void
413 cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
414 {
415 	char *uuid = NULL;
416 	nvlist_t **nva, **save_nva;
417 	uint_t nvc = 0, save_nvc;
418 	uint_t keepopen;
419 	int err = 0;
420 	nvlist_t *asru = NULL;
421 	uint32_t index;
422 
423 	err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid);
424 	err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
425 	    &nva, &nvc);
426 	if (err != 0) {
427 		cma_stats.bad_flts.fmds_value.ui64++;
428 		return;
429 	}
430 
431 	save_nvc = keepopen = nvc;
432 	save_nva = nva;
433 	while (nvc-- != 0 && (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0 ||
434 	    !fmd_case_uuclosed(hdl, uuid))) {
435 		nvlist_t *nvl = *nva++;
436 		const cma_subscriber_t *subr;
437 		int has_fault;
438 
439 		if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
440 			continue;
441 
442 		/*
443 		 * A handler returns CMA_RA_SUCCESS to indicate that
444 		 * from this suspects  point-of-view the case may be
445 		 * closed, CMA_RA_FAILURE otherwise.
446 		 * A handler must not close the case itself.
447 		 */
448 		if (subr->subr_func != NULL) {
449 			has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
450 			    FMD_HAS_FAULT_ASRU, NULL);
451 			if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
452 				if (has_fault == 1)
453 					err = subr->subr_func(hdl, nvl, asru,
454 					    uuid, 0);
455 			} else {
456 				if (has_fault == 0)
457 					err = subr->subr_func(hdl, nvl, asru,
458 					    uuid, 1);
459 			}
460 			if (err == CMA_RA_SUCCESS)
461 				keepopen--;
462 		}
463 	}
464 
465 	/*
466 	 * Run though again to catch any new faults in list.updated.
467 	 */
468 	while (save_nvc-- != 0 && (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) {
469 		nvlist_t *nvl = *save_nva++;
470 		const cma_subscriber_t *subr;
471 		int has_fault;
472 
473 		if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
474 			continue;
475 		if (subr->subr_func != NULL) {
476 			has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
477 			    FMD_HAS_FAULT_ASRU, NULL);
478 			if (has_fault == 1)
479 				err = subr->subr_func(hdl, nvl, asru, uuid, 0);
480 		}
481 	}
482 
483 	/*
484 	 * Do not close the case if we are handling cache faults.
485 	 */
486 	if (asru != NULL) {
487 		if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_CACHE_INDEX,
488 		    &index) != 0) {
489 			if (!keepopen && strcmp(class,
490 			    FM_LIST_SUSPECT_CLASS) == 0) {
491 				fmd_case_uuclose(hdl, uuid);
492 			}
493 		}
494 	}
495 
496 	if (!keepopen && strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
497 		fmd_case_uuresolved(hdl, uuid);
498 }
499 
500 static void
501 cma_recv_one(fmd_hdl_t *hdl, nvlist_t *nvl)
502 {
503 	const cma_subscriber_t *subr;
504 	nvlist_t *asru;
505 
506 	if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
507 		return;
508 
509 	if (subr->subr_func != NULL) {
510 		if (fmd_nvl_fmri_has_fault(hdl, asru,
511 		    FMD_HAS_FAULT_ASRU, NULL) == 1)
512 			(void) subr->subr_func(hdl, nvl, asru, NULL, 0);
513 	}
514 }
515 
516 /*ARGSUSED*/
517 static void
518 cma_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
519 {
520 	fmd_hdl_debug(hdl, "received %s\n", class);
521 
522 	if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
523 		return;
524 
525 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
526 	    strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
527 	    strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
528 		cma_recv_list(hdl, nvl, class);
529 	else
530 		cma_recv_one(hdl, nvl);
531 }
532 
533 /*ARGSUSED*/
534 static void
535 cma_timeout(fmd_hdl_t *hdl, id_t id, void *arg)
536 {
537 	if (id == cma.cma_page_timerid)
538 		cma_page_retry(hdl);
539 #ifdef sun4v
540 	/*
541 	 * cpu offline/online needs to be retried on sun4v because
542 	 * ldom request can be asynchronous.
543 	 */
544 	else if (id == cma.cma_cpu_timerid)
545 		cma_cpu_retry(hdl);
546 #endif
547 }
548 
549 #ifdef sun4v
550 static void *
551 cma_init_alloc(size_t size)
552 {
553 	return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP));
554 }
555 
556 static void
557 cma_init_free(void *addr, size_t size)
558 {
559 	fmd_hdl_free(init_hdl, addr, size);
560 }
561 #endif
562 
563 static const fmd_hdl_ops_t fmd_ops = {
564 	cma_recv,	/* fmdo_recv */
565 	cma_timeout,	/* fmdo_timeout */
566 	NULL,		/* fmdo_close */
567 	NULL,		/* fmdo_stats */
568 	NULL,		/* fmdo_gc */
569 };
570 
571 static const fmd_prop_t fmd_props[] = {
572 	{ "cpu_tries", FMD_TYPE_UINT32, "10" },
573 	{ "cpu_delay", FMD_TYPE_TIME, "1sec" },
574 #ifdef sun4v
575 	{ "cpu_ret_mindelay", FMD_TYPE_TIME, "5sec" },
576 	{ "cpu_ret_maxdelay", FMD_TYPE_TIME, "5min" },
577 #endif /* sun4v */
578 	{ "cpu_offline_enable", FMD_TYPE_BOOL, "true" },
579 	{ "cpu_online_enable", FMD_TYPE_BOOL, "true" },
580 	{ "cpu_forced_offline", FMD_TYPE_BOOL, "true" },
581 #ifdef opl
582 	{ "cpu_blacklist_enable", FMD_TYPE_BOOL, "false" },
583 	{ "cpu_unblacklist_enable", FMD_TYPE_BOOL, "false" },
584 #else
585 	{ "cpu_blacklist_enable", FMD_TYPE_BOOL, "true" },
586 	{ "cpu_unblacklist_enable", FMD_TYPE_BOOL, "true" },
587 #endif /* opl */
588 	{ "page_ret_mindelay", FMD_TYPE_TIME, "1sec" },
589 	{ "page_ret_maxdelay", FMD_TYPE_TIME, "5min" },
590 	{ "page_retire_enable", FMD_TYPE_BOOL, "true" },
591 	{ "page_unretire_enable", FMD_TYPE_BOOL, "true" },
592 	{ NULL, 0, NULL }
593 };
594 
595 static const fmd_hdl_info_t fmd_info = {
596 	"CPU/Memory Retire Agent", CMA_VERSION, &fmd_ops, fmd_props
597 };
598 
599 void
600 _fmd_init(fmd_hdl_t *hdl)
601 {
602 	hrtime_t nsec;
603 #ifdef i386
604 	char buf[BUFSIZ];
605 	const char *dom0 = "control_d";
606 
607 	/*
608 	 * Abort the cpumem-retire module if Solaris is running under DomU.
609 	 */
610 	if (sysinfo(SI_PLATFORM, buf, sizeof (buf)) == -1)
611 		return;
612 
613 	if (strncmp(buf, "i86pc", sizeof (buf)) == 0) {
614 		cma_is_native = B_TRUE;
615 	} else if (strncmp(buf, "i86xpv", sizeof (buf)) != 0) {
616 		return;
617 	} else {
618 		int fd = open("/dev/xen/domcaps", O_RDONLY);
619 
620 		if (fd != -1) {
621 			if (read(fd, buf, sizeof (buf)) <= 0 ||
622 			    strncmp(buf, dom0, strlen(dom0)) != 0) {
623 				(void) close(fd);
624 				return;
625 			}
626 			(void) close(fd);
627 		}
628 		cma_is_native = B_FALSE;
629 	}
630 #endif /* i386 */
631 
632 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
633 		return; /* invalid data in configuration file */
634 
635 	fmd_hdl_subscribe(hdl, "fault.cpu.*");
636 	fmd_hdl_subscribe(hdl, "fault.memory.*");
637 #ifdef opl
638 	fmd_hdl_subscribe(hdl, "fault.chassis.SPARC-Enterprise.cpu.*");
639 #endif
640 
641 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (cma_stats) /
642 	    sizeof (fmd_stat_t), (fmd_stat_t *)&cma_stats);
643 
644 	cma.cma_cpu_tries = fmd_prop_get_int32(hdl, "cpu_tries");
645 
646 	nsec = fmd_prop_get_int64(hdl, "cpu_delay");
647 	cma.cma_cpu_delay.tv_sec = nsec / NANOSEC;
648 	cma.cma_cpu_delay.tv_nsec = nsec % NANOSEC;
649 
650 	cma.cma_page_mindelay = fmd_prop_get_int64(hdl, "page_ret_mindelay");
651 	cma.cma_page_maxdelay = fmd_prop_get_int64(hdl, "page_ret_maxdelay");
652 
653 #ifdef sun4v
654 	cma.cma_cpu_mindelay = fmd_prop_get_int64(hdl, "cpu_ret_mindelay");
655 	cma.cma_cpu_maxdelay = fmd_prop_get_int64(hdl, "cpu_ret_maxdelay");
656 #endif
657 
658 	cma.cma_cpu_dooffline = fmd_prop_get_int32(hdl, "cpu_offline_enable");
659 	cma.cma_cpu_forcedoffline = fmd_prop_get_int32(hdl,
660 	    "cpu_forced_offline");
661 	cma.cma_cpu_doonline = fmd_prop_get_int32(hdl, "cpu_online_enable");
662 	cma.cma_cpu_doblacklist = fmd_prop_get_int32(hdl,
663 	    "cpu_blacklist_enable");
664 	cma.cma_cpu_dounblacklist = fmd_prop_get_int32(hdl,
665 	    "cpu_unblacklist_enable");
666 	cma.cma_page_doretire = fmd_prop_get_int32(hdl, "page_retire_enable");
667 	cma.cma_page_dounretire = fmd_prop_get_int32(hdl,
668 	    "page_unretire_enable");
669 
670 	if (cma.cma_page_maxdelay < cma.cma_page_mindelay)
671 		fmd_hdl_abort(hdl, "page retirement delays conflict\n");
672 
673 #ifdef sun4v
674 	init_hdl = hdl;
675 	cma_lhp = ldom_init(cma_init_alloc, cma_init_free);
676 #endif
677 }
678 
679 void
680 _fmd_fini(fmd_hdl_t *hdl)
681 {
682 #ifdef sun4v
683 	ldom_fini(cma_lhp);
684 	cma_cpu_fini(hdl);
685 #endif
686 	cma_page_fini(hdl);
687 }
688