xref: /illumos-gate/usr/src/cmd/fm/modules/common/cpumem-retire/cma_main.c (revision 9ffca3735a6d60d1994f185af63e16705e87d2c5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <cma.h>
28 
29 #include <strings.h>
30 #include <errno.h>
31 #include <time.h>
32 #include <fm/fmd_api.h>
33 #include <sys/fm/protocol.h>
34 #include <sys/systeminfo.h>
35 #include <sys/utsname.h>
36 
37 #ifdef sun4v
38 #include <sys/fm/ldom.h>
39 
40 static fmd_hdl_t *init_hdl;
41 ldom_hdl_t *cma_lhp;
42 #endif
43 
44 extern const char *fmd_fmri_get_platform();
45 
46 cma_t cma;
47 
48 cma_stats_t cma_stats = {
49 	{ "cpu_flts", FMD_TYPE_UINT64, "cpu faults resolved" },
50 	{ "cpu_repairs", FMD_TYPE_UINT64, "cpu faults repaired" },
51 	{ "cpu_fails", FMD_TYPE_UINT64, "cpu faults unresolveable" },
52 	{ "cpu_blfails", FMD_TYPE_UINT64, "failed cpu blacklists" },
53 	{ "cpu_supp", FMD_TYPE_UINT64, "cpu offlines suppressed" },
54 	{ "cpu_blsupp", FMD_TYPE_UINT64, "cpu blacklists suppressed" },
55 	{ "page_flts", FMD_TYPE_UINT64, "page faults resolved" },
56 	{ "page_repairs", FMD_TYPE_UINT64, "page faults repaired" },
57 	{ "page_fails", FMD_TYPE_UINT64, "page faults unresolveable" },
58 	{ "page_supp", FMD_TYPE_UINT64, "page retires suppressed" },
59 	{ "page_nonent", FMD_TYPE_UINT64, "retires for non-existent fmris" },
60 	{ "bad_flts", FMD_TYPE_UINT64, "invalid fault events received" },
61 	{ "nop_flts", FMD_TYPE_UINT64, "inapplicable fault events received" },
62 	{ "auto_flts", FMD_TYPE_UINT64, "auto-close faults received" }
63 };
64 
65 typedef struct cma_subscriber {
66 	const char *subr_class;
67 	const char *subr_sname;
68 	uint_t subr_svers;
69 	int (*subr_func)(fmd_hdl_t *, nvlist_t *, nvlist_t *, const char *,
70 	    boolean_t);
71 } cma_subscriber_t;
72 
73 static const cma_subscriber_t cma_subrs[] = {
74 #if defined(sun4v)
75 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
76 	    cma_page_retire },
77 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
78 	    NULL },
79 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
80 	    NULL },
81 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
82 	    NULL },
83 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
84 	    NULL },
85 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
86 	    NULL },
87 	{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
88 	    NULL },
89 	{ "fault.memory.link-c", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
90 	    NULL },
91 	{ "fault.memory.link-u", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
92 	    NULL },
93 	{ "fault.memory.link-f", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
94 	    NULL },
95 	{ "fault.memory.link-c", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
96 	    NULL },
97 	{ "fault.memory.link-u", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
98 	    NULL },
99 	{ "fault.memory.link-f", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
100 	    NULL },
101 
102 	/*
103 	 * The following ultraSPARC-T1/T2 faults do NOT retire a cpu thread,
104 	 * and therefore must be intercepted before
105 	 * the default "fault.cpu.*" dispatch to cma_cpu_retire.
106 	 */
107 	{ "fault.cpu.*.l2cachedata", FM_FMRI_SCHEME_CPU,
108 	    FM_CPU_SCHEME_VERSION, NULL },
109 	{ "fault.cpu.*.l2cachetag", FM_FMRI_SCHEME_CPU,
110 	    FM_CPU_SCHEME_VERSION, NULL },
111 	{ "fault.cpu.*.l2cachectl", FM_FMRI_SCHEME_CPU,
112 	    FM_CPU_SCHEME_VERSION, NULL },
113 	{ "fault.cpu.*.l2data-c", FM_FMRI_SCHEME_CPU,
114 	    FM_CPU_SCHEME_VERSION, NULL },
115 	{ "fault.cpu.*.l2data-u", FM_FMRI_SCHEME_CPU,
116 	    FM_CPU_SCHEME_VERSION, NULL },
117 	{ "fault.cpu.*.mau", FM_FMRI_SCHEME_CPU,
118 	    FM_CPU_SCHEME_VERSION, NULL },
119 	{ "fault.cpu.*.lfu-u", FM_FMRI_SCHEME_CPU,
120 	    FM_CPU_SCHEME_VERSION, NULL },
121 	{ "fault.cpu.*.lfu-f", FM_FMRI_SCHEME_CPU,
122 	    FM_CPU_SCHEME_VERSION, NULL },
123 	{ "fault.cpu.*.lfu-p", FM_FMRI_SCHEME_CPU,
124 	    FM_CPU_SCHEME_VERSION, NULL },
125 	{ "fault.cpu.ultraSPARC-T1.freg", FM_FMRI_SCHEME_CPU,
126 	    FM_CPU_SCHEME_VERSION, NULL },
127 	{ "fault.cpu.ultraSPARC-T1.l2cachedata", FM_FMRI_SCHEME_CPU,
128 	    FM_CPU_SCHEME_VERSION, NULL },
129 	{ "fault.cpu.ultraSPARC-T1.l2cachetag", FM_FMRI_SCHEME_CPU,
130 	    FM_CPU_SCHEME_VERSION, NULL },
131 	{ "fault.cpu.ultraSPARC-T1.l2cachectl", FM_FMRI_SCHEME_CPU,
132 	    FM_CPU_SCHEME_VERSION, NULL },
133 	{ "fault.cpu.ultraSPARC-T1.mau", FM_FMRI_SCHEME_CPU,
134 	    FM_CPU_SCHEME_VERSION, NULL },
135 	{ "fault.cpu.ultraSPARC-T2plus.chip", FM_FMRI_SCHEME_HC,
136 	    FM_HC_SCHEME_VERSION, NULL },
137 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
138 	    cma_cpu_retire },
139 #elif defined(opl)
140 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
141 	    cma_page_retire },
142 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
143 	    NULL },
144 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
145 	    NULL },
146 	{ "fault.cpu.SPARC64-VI.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
147 	    cma_cpu_retire },
148 	{ "fault.cpu.SPARC64-VII.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
149 	    cma_cpu_retire },
150 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se",
151 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
152 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.se-offlinereq",
153 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
154 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce",
155 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
156 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VI.core.ce-offlinereq",
157 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
158 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se",
159 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
160 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.se-offlinereq",
161 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
162 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce",
163 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
164 	{ "fault.chassis.SPARC-Enterprise.cpu.SPARC64-VII.core.ce-offlinereq",
165 		FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION, cma_cpu_hc_retire },
166 #else /* Generic */
167 	{ "fault.memory.page", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
168 	    cma_page_retire },
169 	{ "fault.memory.page_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
170 	    cma_page_retire },
171 	{ "fault.memory.page_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
172 	    cma_page_retire },
173 	{ "fault.memory.page_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
174 	    cma_page_retire },
175 	{ "fault.memory.generic-x86.page_ce", FM_FMRI_SCHEME_MEM,
176 	    FM_MEM_SCHEME_VERSION, cma_page_retire },
177 	{ "fault.memory.generic-x86.page_ue", FM_FMRI_SCHEME_MEM,
178 	    FM_MEM_SCHEME_VERSION, cma_page_retire },
179 	{ "fault.memory.intel.page_ce", FM_FMRI_SCHEME_MEM,
180 	    FM_MEM_SCHEME_VERSION, cma_page_retire },
181 	{ "fault.memory.intel.page_ue", FM_FMRI_SCHEME_MEM,
182 	    FM_MEM_SCHEME_VERSION, cma_page_retire },
183 	{ "fault.memory.dimm", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
184 	    NULL },
185 	{ "fault.memory.dimm_sb", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
186 	    NULL },
187 	{ "fault.memory.dimm_ck", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
188 	    NULL },
189 	{ "fault.memory.dimm_ue", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
190 	    NULL },
191 	{ "fault.memory.generic-x86.dimm_ce", FM_FMRI_SCHEME_MEM,
192 	    FM_MEM_SCHEME_VERSION, NULL },
193 	{ "fault.memory.generic-x86.dimm_ue", FM_FMRI_SCHEME_MEM,
194 	    FM_MEM_SCHEME_VERSION, NULL },
195 	{ "fault.memory.intel.dimm_ce", FM_FMRI_SCHEME_MEM,
196 	    FM_MEM_SCHEME_VERSION, NULL },
197 	{ "fault.memory.intel.dimm_ue", FM_FMRI_SCHEME_MEM,
198 	    FM_MEM_SCHEME_VERSION, NULL },
199 	{ "fault.memory.intel.fbd.*", FM_FMRI_SCHEME_HC,
200 	    FM_HC_SCHEME_VERSION, NULL },
201 	{ "fault.memory.dimm_testfail", FM_FMRI_SCHEME_MEM,
202 	    FM_MEM_SCHEME_VERSION, NULL },
203 	{ "fault.memory.bank", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
204 	    NULL },
205 	{ "fault.memory.datapath", FM_FMRI_SCHEME_MEM, FM_MEM_SCHEME_VERSION,
206 	    NULL },
207 
208 	/*
209 	 * The following faults do NOT retire a cpu thread,
210 	 * and therefore must be intercepted before
211 	 * the default "fault.cpu.*" dispatch to cma_cpu_retire.
212 	 */
213 	{ "fault.cpu.amd.dramchannel", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
214 	    NULL },
215 	{ "fault.cpu.generic-x86.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
216 	    FM_CPU_SCHEME_VERSION, NULL },
217 	{ "fault.cpu.generic-x86.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
218 	    FM_CPU_SCHEME_VERSION, NULL },
219 	{ "fault.cpu.generic-x86.bus_interconnect", FM_FMRI_SCHEME_CPU,
220 	    FM_CPU_SCHEME_VERSION, NULL },
221 	{ "fault.cpu.intel.bus_interconnect_memory", FM_FMRI_SCHEME_CPU,
222 	    FM_CPU_SCHEME_VERSION, NULL },
223 	{ "fault.cpu.intel.bus_interconnect_io", FM_FMRI_SCHEME_CPU,
224 	    FM_CPU_SCHEME_VERSION, NULL },
225 	{ "fault.cpu.intel.bus_interconnect", FM_FMRI_SCHEME_CPU,
226 	    FM_CPU_SCHEME_VERSION, NULL },
227 	{ "fault.cpu.intel.nb.*", FM_FMRI_SCHEME_HC,
228 	    FM_HC_SCHEME_VERSION, NULL },
229 	{ "fault.cpu.intel.quickpath.*", FM_FMRI_SCHEME_HC,
230 	    FM_HC_SCHEME_VERSION, NULL },
231 	{ "fault.cpu.generic-x86.mc", FM_FMRI_SCHEME_HC,
232 	    FM_HC_SCHEME_VERSION, NULL },
233 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_HC,
234 	    FM_HC_SCHEME_VERSION, NULL },
235 	{ "fault.cpu.intel.dma", FM_FMRI_SCHEME_CPU,
236 	    FM_CPU_SCHEME_VERSION, NULL },
237 	{ "fault.cpu.ultraSPARC-IVplus.l2cachedata-line",
238 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
239 	    cma_cache_way_retire },
240 	{ "fault.cpu.ultraSPARC-IVplus.l3cachedata-line",
241 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
242 	    cma_cache_way_retire },
243 	{ "fault.cpu.ultraSPARC-IVplus.l2cachetag-line",
244 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
245 	    cma_cache_way_retire },
246 	{ "fault.cpu.ultraSPARC-IVplus.l3cachetag-line",
247 	    FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
248 	    cma_cache_way_retire },
249 
250 	/*
251 	 * Default "fault.cpu.*" for "mem" scheme ASRU dispatch.
252 	 */
253 	{ "fault.cpu.*", FM_FMRI_SCHEME_CPU, FM_CPU_SCHEME_VERSION,
254 	    cma_cpu_retire },
255 #endif
256 	{ NULL, NULL, 0, NULL }
257 };
258 
259 static const cma_subscriber_t *
260 nvl2subr(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t **asrup)
261 {
262 	const cma_subscriber_t *sp;
263 	nvlist_t *asru;
264 	char *scheme;
265 	uint8_t version;
266 	boolean_t retire;
267 
268 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_RETIRE, &retire) == 0 &&
269 	    retire == 0) {
270 		fmd_hdl_debug(hdl, "cma_recv: retire suppressed");
271 		return (NULL);
272 	}
273 
274 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) != 0 ||
275 	    nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
276 	    nvlist_lookup_uint8(asru, FM_VERSION, &version) != 0) {
277 		cma_stats.bad_flts.fmds_value.ui64++;
278 		return (NULL);
279 	}
280 
281 	for (sp = cma_subrs; sp->subr_class != NULL; sp++) {
282 		if (fmd_nvl_class_match(hdl, nvl, sp->subr_class) &&
283 		    strcmp(scheme, sp->subr_sname) == 0 &&
284 		    version <= sp->subr_svers) {
285 			*asrup = asru;
286 			return (sp);
287 		}
288 	}
289 
290 	cma_stats.nop_flts.fmds_value.ui64++;
291 	return (NULL);
292 }
293 
294 static void
295 cma_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
296 {
297 	char *uuid = NULL;
298 	nvlist_t **nva;
299 	uint_t nvc = 0;
300 	uint_t keepopen;
301 	int err = 0;
302 	nvlist_t *asru;
303 	uint32_t index;
304 
305 	err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid);
306 	err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
307 	    &nva, &nvc);
308 	if (err != 0) {
309 		cma_stats.bad_flts.fmds_value.ui64++;
310 		return;
311 	}
312 
313 	keepopen = nvc;
314 	while (nvc-- != 0 && (strcmp(class, FM_LIST_SUSPECT_CLASS) != 0 ||
315 	    !fmd_case_uuclosed(hdl, uuid))) {
316 		nvlist_t *nvl = *nva++;
317 		const cma_subscriber_t *subr;
318 		int has_fault;
319 
320 		if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
321 			continue;
322 
323 		/*
324 		 * A handler returns CMA_RA_SUCCESS to indicate that
325 		 * from this suspects  point-of-view the case may be
326 		 * closed, CMA_RA_FAILURE otherwise.
327 		 * A handler must not close the case itself.
328 		 */
329 		if (subr->subr_func != NULL) {
330 			has_fault = fmd_nvl_fmri_has_fault(hdl, asru,
331 			    FMD_HAS_FAULT_ASRU, NULL);
332 			if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
333 				if (has_fault == 1)
334 					err = subr->subr_func(hdl, nvl, asru,
335 					    uuid, 0);
336 			} else {
337 				if (has_fault == 0)
338 					err = subr->subr_func(hdl, nvl, asru,
339 					    uuid, 1);
340 			}
341 			if (err == CMA_RA_SUCCESS)
342 				keepopen--;
343 		}
344 	}
345 	/*
346 	 * Do not close the case if we are handling cache faults.
347 	 */
348 	if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_CACHE_INDEX, &index) != 0) {
349 		if (!keepopen && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
350 			fmd_case_uuclose(hdl, uuid);
351 		}
352 	}
353 	if (!keepopen && strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
354 		fmd_case_uuresolved(hdl, uuid);
355 }
356 
357 static void
358 cma_recv_one(fmd_hdl_t *hdl, nvlist_t *nvl)
359 {
360 	const cma_subscriber_t *subr;
361 	nvlist_t *asru;
362 
363 	if ((subr = nvl2subr(hdl, nvl, &asru)) == NULL)
364 		return;
365 
366 	if (subr->subr_func != NULL) {
367 		if (fmd_nvl_fmri_has_fault(hdl, asru,
368 		    FMD_HAS_FAULT_ASRU, NULL) == 1)
369 			(void) subr->subr_func(hdl, nvl, asru, NULL, 0);
370 	}
371 }
372 
373 /*ARGSUSED*/
374 static void
375 cma_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
376 {
377 	fmd_hdl_debug(hdl, "received %s\n", class);
378 
379 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
380 	    strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 ||
381 	    strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
382 		cma_recv_list(hdl, nvl, class);
383 	else
384 		cma_recv_one(hdl, nvl);
385 }
386 
387 /*ARGSUSED*/
388 static void
389 cma_timeout(fmd_hdl_t *hdl, id_t id, void *arg)
390 {
391 	if (id == cma.cma_page_timerid)
392 		cma_page_retry(hdl);
393 #ifdef sun4v
394 	/*
395 	 * cpu offline/online needs to be retried on sun4v because
396 	 * ldom request can be asynchronous.
397 	 */
398 	else if (id == cma.cma_cpu_timerid)
399 		cma_cpu_retry(hdl);
400 #endif
401 }
402 
403 #ifdef sun4v
404 static void *
405 cma_init_alloc(size_t size)
406 {
407 	return (fmd_hdl_alloc(init_hdl, size, FMD_SLEEP));
408 }
409 
410 static void
411 cma_init_free(void *addr, size_t size)
412 {
413 	fmd_hdl_free(init_hdl, addr, size);
414 }
415 #endif
416 
417 static const fmd_hdl_ops_t fmd_ops = {
418 	cma_recv,	/* fmdo_recv */
419 	cma_timeout,	/* fmdo_timeout */
420 	NULL,		/* fmdo_close */
421 	NULL,		/* fmdo_stats */
422 	NULL,		/* fmdo_gc */
423 };
424 
425 static const fmd_prop_t fmd_props[] = {
426 	{ "cpu_tries", FMD_TYPE_UINT32, "10" },
427 	{ "cpu_delay", FMD_TYPE_TIME, "1sec" },
428 #ifdef sun4v
429 	{ "cpu_ret_mindelay", FMD_TYPE_TIME, "5sec" },
430 	{ "cpu_ret_maxdelay", FMD_TYPE_TIME, "5min" },
431 #endif /* sun4v */
432 	{ "cpu_offline_enable", FMD_TYPE_BOOL, "true" },
433 	{ "cpu_online_enable", FMD_TYPE_BOOL, "true" },
434 	{ "cpu_forced_offline", FMD_TYPE_BOOL, "true" },
435 #ifdef opl
436 	{ "cpu_blacklist_enable", FMD_TYPE_BOOL, "false" },
437 	{ "cpu_unblacklist_enable", FMD_TYPE_BOOL, "false" },
438 #else
439 	{ "cpu_blacklist_enable", FMD_TYPE_BOOL, "true" },
440 	{ "cpu_unblacklist_enable", FMD_TYPE_BOOL, "true" },
441 #endif /* opl */
442 	{ "page_ret_mindelay", FMD_TYPE_TIME, "1sec" },
443 	{ "page_ret_maxdelay", FMD_TYPE_TIME, "5min" },
444 	{ "page_retire_enable", FMD_TYPE_BOOL, "true" },
445 	{ "page_unretire_enable", FMD_TYPE_BOOL, "true" },
446 	{ NULL, 0, NULL }
447 };
448 
449 static const fmd_hdl_info_t fmd_info = {
450 	"CPU/Memory Retire Agent", CMA_VERSION, &fmd_ops, fmd_props
451 };
452 
453 void
454 _fmd_init(fmd_hdl_t *hdl)
455 {
456 	hrtime_t nsec;
457 #ifdef i386
458 	/*
459 	 * Abort the cpumem-retire module if Solaris is running under the Xen
460 	 * hypervisor.
461 	 */
462 	if (strcmp(fmd_fmri_get_platform(), "i86xpv") == 0)
463 		return;
464 #endif
465 
466 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
467 		return; /* invalid data in configuration file */
468 
469 	fmd_hdl_subscribe(hdl, "fault.cpu.*");
470 	fmd_hdl_subscribe(hdl, "fault.memory.*");
471 #ifdef opl
472 	fmd_hdl_subscribe(hdl, "fault.chassis.SPARC-Enterprise.cpu.*");
473 #endif
474 
475 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (cma_stats) /
476 	    sizeof (fmd_stat_t), (fmd_stat_t *)&cma_stats);
477 
478 	cma.cma_cpu_tries = fmd_prop_get_int32(hdl, "cpu_tries");
479 
480 	nsec = fmd_prop_get_int64(hdl, "cpu_delay");
481 	cma.cma_cpu_delay.tv_sec = nsec / NANOSEC;
482 	cma.cma_cpu_delay.tv_nsec = nsec % NANOSEC;
483 
484 	cma.cma_page_mindelay = fmd_prop_get_int64(hdl, "page_ret_mindelay");
485 	cma.cma_page_maxdelay = fmd_prop_get_int64(hdl, "page_ret_maxdelay");
486 
487 #ifdef sun4v
488 	cma.cma_cpu_mindelay = fmd_prop_get_int64(hdl, "cpu_ret_mindelay");
489 	cma.cma_cpu_maxdelay = fmd_prop_get_int64(hdl, "cpu_ret_maxdelay");
490 #endif
491 
492 	cma.cma_cpu_dooffline = fmd_prop_get_int32(hdl, "cpu_offline_enable");
493 	cma.cma_cpu_forcedoffline = fmd_prop_get_int32(hdl,
494 	    "cpu_forced_offline");
495 	cma.cma_cpu_doonline = fmd_prop_get_int32(hdl, "cpu_online_enable");
496 	cma.cma_cpu_doblacklist = fmd_prop_get_int32(hdl,
497 	    "cpu_blacklist_enable");
498 	cma.cma_cpu_dounblacklist = fmd_prop_get_int32(hdl,
499 	    "cpu_unblacklist_enable");
500 	cma.cma_page_doretire = fmd_prop_get_int32(hdl, "page_retire_enable");
501 	cma.cma_page_dounretire = fmd_prop_get_int32(hdl,
502 	    "page_unretire_enable");
503 
504 	if (cma.cma_page_maxdelay < cma.cma_page_mindelay)
505 		fmd_hdl_abort(hdl, "page retirement delays conflict\n");
506 
507 #ifdef sun4v
508 	init_hdl = hdl;
509 	cma_lhp = ldom_init(cma_init_alloc, cma_init_free);
510 #endif
511 }
512 
513 void
514 _fmd_fini(fmd_hdl_t *hdl)
515 {
516 #ifdef sun4v
517 	ldom_fini(cma_lhp);
518 	cma_cpu_fini(hdl);
519 #endif
520 	cma_page_fini(hdl);
521 }
522