xref: /titanic_51/usr/src/cmd/fm/fmd/common/fmd.c (revision 8c8a8d17e0c6c68e047f0e531c3f8ce133b9aea6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/utsname.h>
31 #include <sys/param.h>
32 #include <sys/systeminfo.h>
33 #include <sys/fm/util.h>
34 
35 #include <smbios.h>
36 #include <limits.h>
37 #include <unistd.h>
38 #include <signal.h>
39 #include <stdlib.h>
40 #include <stdio.h>
41 #include <door.h>
42 
43 #include <fmd_conf.h>
44 #include <fmd_dispq.h>
45 #include <fmd_timerq.h>
46 #include <fmd_subr.h>
47 #include <fmd_error.h>
48 #include <fmd_module.h>
49 #include <fmd_thread.h>
50 #include <fmd_alloc.h>
51 #include <fmd_string.h>
52 #include <fmd_builtin.h>
53 #include <fmd_ustat.h>
54 #include <fmd_protocol.h>
55 #include <fmd_scheme.h>
56 #include <fmd_asru.h>
57 #include <fmd_case.h>
58 #include <fmd_log.h>
59 #include <fmd_idspace.h>
60 #include <fmd_rpc.h>
61 #include <fmd_dr.h>
62 #include <fmd_topo.h>
63 #include <fmd_xprt.h>
64 #include <fmd_ctl.h>
65 #include <sys/openpromio.h>
66 #include <libdevinfo.h>
67 
68 #include <fmd.h>
69 
70 extern const nv_alloc_ops_t fmd_nv_alloc_ops;	/* see fmd_nv.c */
71 
72 const char _fmd_version[] = "1.2";		/* daemon version string */
73 static char _fmd_plat[MAXNAMELEN];		/* native platform string */
74 static char _fmd_isa[MAXNAMELEN];		/* native instruction set */
75 static struct utsname _fmd_uts;			/* native uname(2) info */
76 static char _fmd_csn[MAXNAMELEN];		/* chassis serial number */
77 static char _fmd_prod[MAXNAMELEN];		/* product name string */
78 
79 /*
80  * Note: the configuration file path is ordered from most common to most host-
81  * specific because new conf files are merged/override previous ones.  The
82  * module paths are in the opposite order, from most specific to most common,
83  * because once a module is loaded fmd will not try to load over the same name.
84  */
85 
86 static const char _fmd_conf_path[] =
87 	"%r/usr/lib/fm/fmd:"
88 	"%r/usr/platform/%m/lib/fm/fmd:"
89 	"%r/usr/platform/%i/lib/fm/fmd:"
90 	"%r/etc/fm/fmd";
91 
92 static const char _fmd_agent_path[] =
93 	"%r/usr/platform/%i/lib/fm/fmd/agents:"
94 	"%r/usr/platform/%m/lib/fm/fmd/agents:"
95 	"%r/usr/lib/fm/fmd/agents";
96 
97 static const char _fmd_plugin_path[] =
98 	"%r/usr/platform/%i/lib/fm/fmd/plugins:"
99 	"%r/usr/platform/%m/lib/fm/fmd/plugins:"
100 	"%r/usr/lib/fm/fmd/plugins";
101 
102 static const char _fmd_scheme_path[] =
103 	"usr/lib/fm/fmd/schemes";
104 
105 static const fmd_conf_mode_t _fmd_cerror_modes[] = {
106 	{ "unload", "unload offending client module", FMD_CERROR_UNLOAD },
107 	{ "stop", "stop daemon for debugger attach", FMD_CERROR_STOP },
108 	{ "abort", "abort daemon and force core dump", FMD_CERROR_ABORT },
109 	{ NULL, NULL, 0 }
110 };
111 
112 static const fmd_conf_mode_t _fmd_dbout_modes[] = {
113 	{ "stderr", "send debug messages to stderr", FMD_DBOUT_STDERR },
114 	{ "syslog", "send debug messages to syslog", FMD_DBOUT_SYSLOG },
115 	{ NULL, NULL, 0 }
116 };
117 
118 static const fmd_conf_mode_t _fmd_debug_modes[] = {
119 	{ "help", "display debugging modes and exit", FMD_DBG_HELP },
120 	{ "mod", "debug module load/unload/locking", FMD_DBG_MOD },
121 	{ "disp", "debug dispatch queue processing", FMD_DBG_DISP },
122 	{ "xprt", "debug transport-specific routines", FMD_DBG_XPRT },
123 	{ "evt", "debug event subsystem routines", FMD_DBG_EVT },
124 	{ "log", "debug log subsystem routines", FMD_DBG_LOG },
125 	{ "tmr", "debug timer subsystem routines", FMD_DBG_TMR },
126 	{ "fmri", "debug fmri subsystem routines", FMD_DBG_FMRI },
127 	{ "asru", "debug asru subsystem routines", FMD_DBG_ASRU },
128 	{ "case", "debug case subsystem routines", FMD_DBG_CASE },
129 	{ "ckpt", "debug checkpoint routines", FMD_DBG_CKPT },
130 	{ "rpc", "debug rpc service routines", FMD_DBG_RPC },
131 	{ "trace", "display matching trace calls", FMD_DBG_TRACE },
132 	{ "all", "enable all available debug modes", FMD_DBG_ALL },
133 	{ NULL, NULL, 0 }
134 };
135 
136 static int
137 fmd_cerror_set(fmd_conf_param_t *pp, const char *value)
138 {
139 	return (fmd_conf_mode_set(_fmd_cerror_modes, pp, value));
140 }
141 
142 static int
143 fmd_dbout_set(fmd_conf_param_t *pp, const char *value)
144 {
145 	return (fmd_conf_mode_set(_fmd_dbout_modes, pp, value));
146 }
147 
148 static int
149 fmd_debug_set(fmd_conf_param_t *pp, const char *value)
150 {
151 	int err = fmd_conf_mode_set(_fmd_debug_modes, pp, value);
152 
153 	if (err == 0)
154 		fmd.d_fmd_debug = pp->cp_value.cpv_num;
155 
156 	return (err);
157 }
158 
159 static int
160 fmd_trmode_set(fmd_conf_param_t *pp, const char *value)
161 {
162 	fmd_tracebuf_f *func;
163 
164 	if (strcasecmp(value, "none") == 0)
165 		func = fmd_trace_none;
166 	else if (strcasecmp(value, "lite") == 0)
167 		func = fmd_trace_lite;
168 	else if (strcasecmp(value, "full") == 0)
169 		func = fmd_trace_full;
170 	else
171 		return (fmd_set_errno(EFMD_CONF_INVAL));
172 
173 	fmd.d_thr_trace = (void (*)())func;
174 	pp->cp_value.cpv_ptr = (void *)func;
175 	return (0);
176 }
177 
178 static void
179 fmd_trmode_get(const fmd_conf_param_t *pp, void *ptr)
180 {
181 	*((void **)ptr) = pp->cp_value.cpv_ptr;
182 }
183 
184 static int
185 fmd_clkmode_set(fmd_conf_param_t *pp, const char *value)
186 {
187 	const fmd_timeops_t *ops;
188 
189 	if (strcasecmp(value, "native") == 0)
190 		ops = &fmd_timeops_native;
191 	else if (strcasecmp(value, "simulated") == 0)
192 		ops = &fmd_timeops_simulated;
193 	else
194 		return (fmd_set_errno(EFMD_CONF_INVAL));
195 
196 	fmd.d_clockops = ops;
197 	pp->cp_value.cpv_ptr = (void *)ops;
198 	return (0);
199 }
200 
201 static void
202 fmd_clkmode_get(const fmd_conf_param_t *pp, void *ptr)
203 {
204 	*((void **)ptr) = pp->cp_value.cpv_ptr;
205 }
206 
207 static const fmd_conf_ops_t fmd_cerror_ops = {
208 	fmd_cerror_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop
209 };
210 
211 static const fmd_conf_ops_t fmd_dbout_ops = {
212 	fmd_dbout_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop
213 };
214 
215 static const fmd_conf_ops_t fmd_debug_ops = {
216 	fmd_debug_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop
217 };
218 
219 static const fmd_conf_ops_t fmd_trmode_ops = {
220 	fmd_trmode_set, fmd_trmode_get, fmd_conf_notsup, fmd_conf_nop
221 };
222 
223 static const fmd_conf_ops_t fmd_clkmode_ops = {
224 	fmd_clkmode_set, fmd_clkmode_get, fmd_conf_notsup, fmd_conf_nop
225 };
226 
227 static const fmd_conf_formal_t _fmd_conf[] = {
228 { "agent.path", &fmd_conf_path, _fmd_agent_path }, /* path for agents */
229 { "alloc_msecs", &fmd_conf_uint32, "10" },	/* msecs before alloc retry */
230 { "alloc_tries", &fmd_conf_uint32, "3" },	/* max # of alloc retries */
231 { "chassis", &fmd_conf_string, _fmd_csn },	/* chassis serial number */
232 { "ckpt.dir", &fmd_conf_string, "var/fm/fmd/ckpt" }, /* ckpt directory path */
233 { "ckpt.dirmode", &fmd_conf_int32, "0700" },	/* ckpt directory perm mode */
234 { "ckpt.mode", &fmd_conf_int32, "0400" },	/* ckpt file perm mode */
235 { "ckpt.restore", &fmd_conf_bool, "true" },	/* restore checkpoints? */
236 { "ckpt.save", &fmd_conf_bool, "true" },	/* save checkpoints? */
237 { "ckpt.zero", &fmd_conf_bool, "false" },	/* zero checkpoints on start? */
238 { "client.buflim", &fmd_conf_size, "10m" },	/* client buffer space limit */
239 { "client.dbout", &fmd_dbout_ops, NULL },	/* client debug output sinks */
240 { "client.debug", &fmd_conf_bool, NULL },	/* client debug enable */
241 { "client.error", &fmd_cerror_ops, "unload" },	/* client error policy */
242 { "client.memlim", &fmd_conf_size, "10m" },	/* client allocation limit */
243 { "client.evqlim", &fmd_conf_uint32, "256" },	/* client event queue limit */
244 { "client.thrlim", &fmd_conf_uint32, "8" },	/* client aux thread limit */
245 { "client.thrsig", &fmd_conf_signal, "SIGUSR1" }, /* fmd_thr_signal() value */
246 { "client.tmrlim", &fmd_conf_uint32, "1024" },	/* client pending timer limit */
247 { "client.xprtlim", &fmd_conf_uint32, "256" },	/* client transport limit */
248 { "client.xprtlog", &fmd_conf_bool, NULL },	/* client transport logging? */
249 { "client.xprtqlim", &fmd_conf_uint32, "1024" }, /* client transport queue li */
250 { "clock", &fmd_clkmode_ops, "native" },	/* clock operation mode */
251 { "conf_path", &fmd_conf_path, _fmd_conf_path }, /* root config file path */
252 { "conf_file", &fmd_conf_string, "fmd.conf" },	/* root config file name */
253 { "core", &fmd_conf_bool, "false" },		/* force core dump on quit */
254 { "dbout", &fmd_dbout_ops, NULL },		/* daemon debug output sinks */
255 { "debug", &fmd_debug_ops, NULL },		/* daemon debugging flags */
256 { "dictdir", &fmd_conf_string, "usr/lib/fm/dict" }, /* default diagcode dir */
257 { "domain", &fmd_conf_string, NULL },		/* domain id for de auth */
258 { "fakenotpresent", &fmd_conf_uint32, "0" },	/* simulate rsrc not present */
259 { "fg", &fmd_conf_bool, "false" },		/* run daemon in foreground */
260 { "gc_interval", &fmd_conf_time, "1d" },	/* garbage collection intvl */
261 { "ids.avg", &fmd_conf_uint32, "4" },		/* desired idspace chain len */
262 { "ids.max", &fmd_conf_uint32, "1024" },	/* maximum idspace buckets */
263 { "isaname", &fmd_conf_string, _fmd_isa },	/* instruction set (uname -p) */
264 { "log.creator", &fmd_conf_string, "fmd" },	/* exacct log creator string */
265 { "log.error", &fmd_conf_string, "var/fm/fmd/errlog" }, /* error log path */
266 { "log.fault", &fmd_conf_string, "var/fm/fmd/fltlog" }, /* fault log path */
267 { "log.minfree", &fmd_conf_size, "2m" },	/* min log fsys free space */
268 { "log.rsrc", &fmd_conf_string, "var/fm/fmd/rsrc" }, /* asru log dir path */
269 { "log.tryrotate", &fmd_conf_uint32, "10" },	/* max log rotation attempts */
270 { "log.waitrotate", &fmd_conf_time, "200ms" },	/* log rotation retry delay */
271 { "log.xprt", &fmd_conf_string, "var/fm/fmd/xprt" }, /* transport log dir */
272 { "machine", &fmd_conf_string, _fmd_uts.machine }, /* machine name (uname -m) */
273 { "nodiagcode", &fmd_conf_string, "-" },	/* diagcode to use if error */
274 { "repaircode", &fmd_conf_string, "-" },	/* diagcode for list.repaired */
275 { "resolvecode", &fmd_conf_string, "-" },	/* diagcode for list.resolved */
276 { "updatecode", &fmd_conf_string, "-" },	/* diagcode for list.updated */
277 { "osrelease", &fmd_conf_string, _fmd_uts.release }, /* release (uname -r) */
278 { "osversion", &fmd_conf_string, _fmd_uts.version }, /* version (uname -v) */
279 { "platform", &fmd_conf_string, _fmd_plat },	/* platform string (uname -i) */
280 { "plugin.close", &fmd_conf_bool, "true" },	/* dlclose plugins on fini */
281 { "plugin.path", &fmd_conf_path, _fmd_plugin_path }, /* path for plugin mods */
282 { "product", &fmd_conf_string, _fmd_prod },	/* product name string */
283 { "rootdir", &fmd_conf_string, "" },		/* root directory for paths */
284 { "rpc.adm.path", &fmd_conf_string, NULL },	/* FMD_ADM rendezvous file */
285 { "rpc.adm.prog", &fmd_conf_uint32, "100169" },	/* FMD_ADM rpc program num */
286 { "rpc.api.path", &fmd_conf_string, NULL },	/* FMD_API rendezvous file */
287 { "rpc.api.prog", &fmd_conf_uint32, "100170" },	/* FMD_API rpc program num */
288 { "rpc.rcvsize", &fmd_conf_size, "128k" },	/* rpc receive buffer size */
289 { "rpc.sndsize", &fmd_conf_size, "128k" },	/* rpc send buffer size */
290 { "rsrc.age", &fmd_conf_time, "30d" },		/* max age of old rsrc log */
291 { "rsrc.zero", &fmd_conf_bool, "false" },	/* zero rsrc cache on start? */
292 { "schemedir", &fmd_conf_string, _fmd_scheme_path }, /* path for scheme mods */
293 { "self.name", &fmd_conf_string, "fmd-self-diagnosis" }, /* self-diag module */
294 { "self.dict", &fmd_conf_list, "FMD.dict" },	/* self-diag dictionary list */
295 { "server", &fmd_conf_string, _fmd_uts.nodename }, /* server id for de auth */
296 { "strbuckets", &fmd_conf_uint32, "211" },	/* size of string hashes */
297 #ifdef DEBUG
298 { "trace.mode", &fmd_trmode_ops, "full" },	/* trace mode: none/lite/full */
299 #else
300 { "trace.mode", &fmd_trmode_ops, "lite" },	/* trace mode: none/lite/full */
301 #endif
302 { "trace.recs", &fmd_conf_uint32, "128" },	/* trace records per thread */
303 { "trace.frames", &fmd_conf_uint32, "16" },	/* max trace rec stack frames */
304 { "uuidlen", &fmd_conf_uint32, "36" },		/* UUID ASCII string length */
305 { "xprt.ttl", &fmd_conf_uint8, "1" },		/* default event time-to-live */
306 };
307 
308 /*
309  * Statistics maintained by fmd itself on behalf of various global subsystems.
310  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
311  * required in the future, the FMD_ADM_MODGSTAT service routine must change.
312  */
313 static fmd_statistics_t _fmd_stats = {
314 { "errlog.replayed", FMD_TYPE_UINT64, "total events replayed from errlog" },
315 { "errlog.partials", FMD_TYPE_UINT64, "events partially committed in errlog" },
316 { "errlog.enospc", FMD_TYPE_UINT64, "events not appended to errlog (ENOSPC)" },
317 { "fltlog.enospc", FMD_TYPE_UINT64, "events not appended to fltlog (ENOSPC)" },
318 { "log.enospc", FMD_TYPE_UINT64, "events not appended to other logs (ENOSPC)" },
319 { "dr.gen", FMD_TYPE_UINT64, "dynamic reconfiguration generation" },
320 { "topo.gen", FMD_TYPE_UINT64, "topology snapshot generation" },
321 { "topo.drgen", FMD_TYPE_UINT64, "current topology DR generation number" },
322 };
323 
324 void
325 fmd_create(fmd_t *dp, const char *arg0, const char *root, const char *conf)
326 {
327 	fmd_conf_path_t *pap;
328 	char file[PATH_MAX];
329 	const char *name;
330 	fmd_stat_t *sp;
331 	int i;
332 
333 	smbios_hdl_t *shp;
334 	smbios_system_t s1;
335 	smbios_info_t s2;
336 	id_t id;
337 
338 	di_prom_handle_t promh = DI_PROM_HANDLE_NIL;
339 	di_node_t rooth = DI_NODE_NIL;
340 	char *bufp;
341 
342 	(void) sysinfo(SI_PLATFORM, _fmd_plat, sizeof (_fmd_plat));
343 	(void) sysinfo(SI_ARCHITECTURE, _fmd_isa, sizeof (_fmd_isa));
344 	(void) uname(&_fmd_uts);
345 
346 	if ((shp = smbios_open(NULL, SMB_VERSION, 0, NULL)) != NULL) {
347 		if ((id = smbios_info_system(shp, &s1)) != SMB_ERR &&
348 		    smbios_info_common(shp, id, &s2) != SMB_ERR) {
349 			(void) strlcpy(_fmd_prod, s2.smbi_product, MAXNAMELEN);
350 			(void) strlcpy(_fmd_csn, s2.smbi_serial, MAXNAMELEN);
351 		}
352 		smbios_close(shp);
353 	} else if ((rooth = di_init("/", DINFOPROP)) != DI_NODE_NIL &&
354 	    (promh = di_prom_init()) != DI_PROM_HANDLE_NIL) {
355 		if (di_prom_prop_lookup_bytes(promh, rooth, "chassis-sn",
356 		    (unsigned char **)&bufp) != -1) {
357 			(void) strlcpy(_fmd_csn, bufp, MAXNAMELEN);
358 		}
359 	}
360 
361 	if (promh != DI_PROM_HANDLE_NIL)
362 		di_prom_fini(promh);
363 	if (rooth != DI_NODE_NIL)
364 		di_fini(rooth);
365 
366 	bzero(dp, sizeof (fmd_t));
367 
368 	dp->d_version = _fmd_version;
369 	dp->d_pname = fmd_strbasename(arg0);
370 	dp->d_pid = getpid();
371 
372 	if (pthread_key_create(&dp->d_key, NULL) != 0)
373 		fmd_error(EFMD_EXIT, "failed to create pthread key");
374 
375 	(void) pthread_mutex_init(&dp->d_xprt_lock, NULL);
376 	(void) pthread_mutex_init(&dp->d_err_lock, NULL);
377 	(void) pthread_mutex_init(&dp->d_thr_lock, NULL);
378 	(void) pthread_mutex_init(&dp->d_mod_lock, NULL);
379 	(void) pthread_mutex_init(&dp->d_stats_lock, NULL);
380 	(void) pthread_mutex_init(&dp->d_topo_lock, NULL);
381 	(void) pthread_rwlock_init(&dp->d_log_lock, NULL);
382 	(void) pthread_mutex_init(&dp->d_fmd_lock, NULL);
383 	(void) pthread_cond_init(&dp->d_fmd_cv, NULL);
384 
385 	/*
386 	 * A small number of properties must be set manually before we open
387 	 * the root configuration file.  These include any settings for our
388 	 * memory allocator and path expansion token values, because these
389 	 * values are needed by the routines in fmd_conf.c itself.  After
390 	 * the root configuration file is processed, we reset these properties
391 	 * based upon the latest values from the configuration file.
392 	 */
393 	dp->d_alloc_msecs = 10;
394 	dp->d_alloc_tries = 3;
395 	dp->d_str_buckets = 211;
396 
397 	dp->d_rootdir = root ? root : "";
398 	dp->d_platform = _fmd_plat;
399 	dp->d_machine = _fmd_uts.machine;
400 	dp->d_isaname = _fmd_isa;
401 
402 	dp->d_conf = fmd_conf_open(conf, sizeof (_fmd_conf) /
403 	    sizeof (_fmd_conf[0]), _fmd_conf, FMD_CONF_DEFER);
404 
405 	if (dp->d_conf == NULL) {
406 		fmd_error(EFMD_EXIT,
407 		    "failed to load required configuration properties\n");
408 	}
409 
410 	(void) fmd_conf_getprop(dp->d_conf, "alloc.msecs", &dp->d_alloc_msecs);
411 	(void) fmd_conf_getprop(dp->d_conf, "alloc.tries", &dp->d_alloc_tries);
412 	(void) fmd_conf_getprop(dp->d_conf, "strbuckets", &dp->d_str_buckets);
413 
414 	(void) fmd_conf_getprop(dp->d_conf, "platform", &dp->d_platform);
415 	(void) fmd_conf_getprop(dp->d_conf, "machine", &dp->d_machine);
416 	(void) fmd_conf_getprop(dp->d_conf, "isaname", &dp->d_isaname);
417 
418 	/*
419 	 * Manually specified rootdirs override config files, so only update
420 	 * d_rootdir based on the config files we parsed if no 'root' was set.
421 	 */
422 	if (root == NULL)
423 		(void) fmd_conf_getprop(dp->d_conf, "rootdir", &dp->d_rootdir);
424 	else
425 		(void) fmd_conf_setprop(dp->d_conf, "rootdir", dp->d_rootdir);
426 
427 	/*
428 	 * Once the base conf file properties are loaded, lookup the values
429 	 * of $conf_path and $conf_file and merge in any other conf files.
430 	 */
431 	(void) fmd_conf_getprop(dp->d_conf, "conf_path", &pap);
432 	(void) fmd_conf_getprop(dp->d_conf, "conf_file", &name);
433 
434 	for (i = 0; i < pap->cpa_argc; i++) {
435 		(void) snprintf(file, sizeof (file),
436 		    "%s/%s", pap->cpa_argv[i], name);
437 		if (access(file, F_OK) == 0)
438 			fmd_conf_merge(dp->d_conf, file);
439 	}
440 
441 	/*
442 	 * Update the value of fmd.d_fg based on "fg".  We cache this property
443 	 * because it must be accessed deep within fmd at fmd_verror() time.
444 	 * Update any other properties that must be cached for performance.
445 	 */
446 	(void) fmd_conf_getprop(fmd.d_conf, "fg", &fmd.d_fg);
447 	(void) fmd_conf_getprop(fmd.d_conf, "xprt.ttl", &fmd.d_xprt_ttl);
448 
449 	/*
450 	 * Initialize our custom libnvpair allocator and create an nvlist for
451 	 * authority elements corresponding to this instance of the daemon.
452 	 */
453 	(void) nv_alloc_init(&dp->d_nva, &fmd_nv_alloc_ops);
454 	dp->d_auth = fmd_protocol_authority();
455 
456 	/*
457 	 * The fmd_module_t for the root module must be created manually.  Most
458 	 * of it remains unused and zero, except for the few things we fill in.
459 	 */
460 	dp->d_rmod = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
461 	dp->d_rmod->mod_name = fmd_strdup(dp->d_pname, FMD_SLEEP);
462 	dp->d_rmod->mod_fmri = fmd_protocol_fmri_module(dp->d_rmod);
463 
464 	fmd_list_append(&dp->d_mod_list, dp->d_rmod);
465 	fmd_module_hold(dp->d_rmod);
466 
467 	(void) pthread_mutex_init(&dp->d_rmod->mod_lock, NULL);
468 	(void) pthread_cond_init(&dp->d_rmod->mod_cv, NULL);
469 	(void) pthread_mutex_init(&dp->d_rmod->mod_stats_lock, NULL);
470 
471 	dp->d_rmod->mod_thread = fmd_thread_xcreate(dp->d_rmod, pthread_self());
472 	dp->d_rmod->mod_stats = fmd_zalloc(sizeof (fmd_modstat_t), FMD_SLEEP);
473 	dp->d_rmod->mod_ustat = fmd_ustat_create();
474 
475 	if (pthread_setspecific(dp->d_key, dp->d_rmod->mod_thread) != 0)
476 		fmd_error(EFMD_EXIT, "failed to attach main thread key");
477 
478 	if ((dp->d_stats = (fmd_statistics_t *)fmd_ustat_insert(
479 	    dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC, sizeof (_fmd_stats) /
480 	    sizeof (fmd_stat_t), (fmd_stat_t *)&_fmd_stats, NULL)) == NULL)
481 		fmd_error(EFMD_EXIT, "failed to initialize statistics");
482 
483 	(void) pthread_mutex_lock(&dp->d_rmod->mod_lock);
484 	dp->d_rmod->mod_flags |= FMD_MOD_INIT;
485 	(void) pthread_mutex_unlock(&dp->d_rmod->mod_lock);
486 
487 	/*
488 	 * In addition to inserting the _fmd_stats collection of program-wide
489 	 * statistics, we also insert a statistic named after each of our
490 	 * errors and update these counts in fmd_verror() (see fmd_subr.c).
491 	 */
492 	dp->d_errstats = sp = fmd_zalloc(sizeof (fmd_stat_t) *
493 	    (EFMD_END - EFMD_UNKNOWN), FMD_SLEEP);
494 
495 	for (i = 0; i < EFMD_END - EFMD_UNKNOWN; i++, sp++) {
496 		(void) snprintf(sp->fmds_name, sizeof (sp->fmds_name), "err.%s",
497 		    strrchr(fmd_errclass(EFMD_UNKNOWN + i), '.') + 1);
498 		sp->fmds_type = FMD_TYPE_UINT64;
499 	}
500 
501 	(void) fmd_ustat_insert(dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC,
502 	    EFMD_END - EFMD_UNKNOWN, dp->d_errstats, NULL);
503 }
504 
505 void
506 fmd_destroy(fmd_t *dp)
507 {
508 	fmd_module_t *mp;
509 	fmd_case_t *cp;
510 	int core;
511 
512 	(void) fmd_conf_getprop(fmd.d_conf, "core", &core);
513 
514 	fmd_rpc_fini();
515 
516 	if (dp->d_xprt_ids != NULL)
517 		fmd_xprt_suspend_all();
518 
519 	/*
520 	 * Unload the self-diagnosis module first.  This ensures that it does
521 	 * not get confused as we start unloading other modules, etc.  We must
522 	 * hold the dispq lock as a writer while doing so since it uses d_self.
523 	 */
524 	if (dp->d_self != NULL) {
525 		fmd_module_t *self;
526 
527 		(void) pthread_rwlock_wrlock(&dp->d_disp->dq_lock);
528 		self = dp->d_self;
529 		dp->d_self = NULL;
530 		(void) pthread_rwlock_unlock(&dp->d_disp->dq_lock);
531 
532 		fmd_module_unload(self);
533 		fmd_module_rele(self);
534 	}
535 
536 	/*
537 	 * Unload modules in reverse order *except* for the root module, which
538 	 * is first in the list.  This allows it to keep its thread and trace.
539 	 */
540 	for (mp = fmd_list_prev(&dp->d_mod_list); mp != dp->d_rmod; ) {
541 		fmd_module_unload(mp);
542 		mp = fmd_list_prev(mp);
543 	}
544 
545 	if (dp->d_mod_hash != NULL) {
546 		fmd_modhash_destroy(dp->d_mod_hash);
547 		dp->d_mod_hash = NULL;
548 	}
549 
550 	/*
551 	 * Close both log files now that modules are no longer active.  We must
552 	 * set these pointers to NULL in case any subsequent errors occur.
553 	 */
554 	if (dp->d_errlog != NULL) {
555 		fmd_log_rele(dp->d_errlog);
556 		dp->d_errlog = NULL;
557 	}
558 
559 	if (dp->d_fltlog != NULL) {
560 		fmd_log_rele(dp->d_fltlog);
561 		dp->d_fltlog = NULL;
562 	}
563 
564 	/*
565 	 * Now destroy the resource cache: each ASRU contains a case reference,
566 	 * which may in turn contain a pointer to a referenced owning module.
567 	 */
568 	if (dp->d_asrus != NULL) {
569 		fmd_asru_hash_destroy(dp->d_asrus);
570 		dp->d_asrus = NULL;
571 	}
572 
573 	/*
574 	 * Now that all data structures that refer to modules are torn down,
575 	 * no modules should be remaining on the module list except for d_rmod.
576 	 * If we trip one of these assertions, we're missing a rele somewhere.
577 	 */
578 	ASSERT(fmd_list_prev(&dp->d_mod_list) == dp->d_rmod);
579 	ASSERT(fmd_list_next(&dp->d_mod_list) == dp->d_rmod);
580 
581 	/*
582 	 * Now destroy the root module.  We clear its thread key first so any
583 	 * calls to fmd_trace() inside of the module code will be ignored.
584 	 */
585 	(void) pthread_setspecific(dp->d_key, NULL);
586 	fmd_module_lock(dp->d_rmod);
587 
588 	while ((cp = fmd_list_next(&dp->d_rmod->mod_cases)) != NULL)
589 		fmd_case_discard(cp);
590 
591 	fmd_module_unlock(dp->d_rmod);
592 	fmd_free(dp->d_rmod->mod_stats, sizeof (fmd_modstat_t));
593 	dp->d_rmod->mod_stats = NULL;
594 
595 	(void) pthread_mutex_lock(&dp->d_rmod->mod_lock);
596 	dp->d_rmod->mod_flags |= FMD_MOD_FINI;
597 	(void) pthread_mutex_unlock(&dp->d_rmod->mod_lock);
598 
599 	fmd_module_rele(dp->d_rmod);
600 	ASSERT(fmd_list_next(&dp->d_mod_list) == NULL);
601 
602 	/*
603 	 * Now destroy the remaining global data structures.  If 'core' was
604 	 * set to true, force a core dump so we can check for memory leaks.
605 	 */
606 	if (dp->d_cases != NULL)
607 		fmd_case_hash_destroy(dp->d_cases);
608 	if (dp->d_disp != NULL)
609 		fmd_dispq_destroy(dp->d_disp);
610 	if (dp->d_timers != NULL)
611 		fmd_timerq_destroy(dp->d_timers);
612 	if (dp->d_schemes != NULL)
613 		fmd_scheme_hash_destroy(dp->d_schemes);
614 	if (dp->d_xprt_ids != NULL)
615 		fmd_idspace_destroy(dp->d_xprt_ids);
616 
617 	if (dp->d_errstats != NULL) {
618 		fmd_free(dp->d_errstats,
619 		    sizeof (fmd_stat_t) * (EFMD_END - EFMD_UNKNOWN));
620 	}
621 
622 	if (dp->d_conf != NULL)
623 		fmd_conf_close(dp->d_conf);
624 
625 	fmd_topo_fini();
626 
627 	nvlist_free(dp->d_auth);
628 	(void) nv_alloc_fini(&dp->d_nva);
629 	dp->d_clockops->fto_fini(dp->d_clockptr);
630 
631 	(void) pthread_key_delete(dp->d_key);
632 	bzero(dp, sizeof (fmd_t));
633 
634 	if (core)
635 		fmd_panic("forcing core dump at user request\n");
636 }
637 
638 /*ARGSUSED*/
639 static void
640 fmd_gc(fmd_t *dp, id_t id, hrtime_t hrt)
641 {
642 	hrtime_t delta;
643 
644 	if (id != 0) {
645 		TRACE((FMD_DBG_MOD, "garbage collect start"));
646 		fmd_modhash_apply(dp->d_mod_hash, fmd_module_gc);
647 		TRACE((FMD_DBG_MOD, "garbage collect end"));
648 
649 		(void) pthread_rwlock_rdlock(&dp->d_log_lock);
650 		fmd_log_update(dp->d_errlog);
651 		(void) pthread_rwlock_unlock(&dp->d_log_lock);
652 	}
653 
654 	(void) fmd_conf_getprop(dp->d_conf, "gc_interval", &delta);
655 	(void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids,
656 	    (fmd_timer_f *)fmd_gc, dp, NULL, delta);
657 }
658 
659 /*ARGSUSED*/
660 static void
661 fmd_clear_aged_rsrcs(fmd_t *dp, id_t id, hrtime_t hrt)
662 {
663 	hrtime_t delta;
664 
665 	fmd_asru_clear_aged_rsrcs();
666 	(void) fmd_conf_getprop(dp->d_conf, "rsrc.age", &delta);
667 	(void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids,
668 	    (fmd_timer_f *)fmd_clear_aged_rsrcs, dp, NULL, delta/10);
669 }
670 
671 /*
672  * Events are committed to the errlog after cases are checkpointed.  If fmd
673  * crashes before an event is ever associated with a module, this function will
674  * be called to replay it to all subscribers.  If fmd crashes in between the
675  * subscriber checkpointing and committing the event in the error log, the
676  * module will have seen the event and we don't want to replay it.  So we look
677  * for the event in all modules and transition it to the proper state.  If
678  * it is found, we commit it to the error log and do not replay it.  The in-
679  * memory case search used by fmd_module_contains() et al isn't particularly
680  * efficient, but it is faster than doing read i/o's on every case event to
681  * check their status or write i/o's on every event to replay to update states.
682  * We can improve the efficiency of this lookup algorithm later if necessary.
683  */
684 /*ARGSUSED*/
685 static void
686 fmd_err_replay(fmd_log_t *lp, fmd_event_t *ep, fmd_t *dp)
687 {
688 	fmd_module_t *mp;
689 	fmd_stat_t *sp;
690 
691 	(void) pthread_mutex_lock(&dp->d_mod_lock);
692 
693 	for (mp = fmd_list_next(&dp->d_mod_list);
694 	    mp != NULL; mp = fmd_list_next(mp)) {
695 		if (fmd_module_contains(mp, ep)) {
696 			fmd_module_hold(mp);
697 			break;
698 		}
699 	}
700 
701 	(void) pthread_mutex_unlock(&dp->d_mod_lock);
702 
703 	if (mp != NULL) {
704 		fmd_event_commit(ep);
705 		fmd_module_rele(mp);
706 		sp = &dp->d_stats->ds_log_partials;
707 	} else {
708 		fmd_dispq_dispatch(dp->d_disp, ep, FMD_EVENT_DATA(ep));
709 		sp = &dp->d_stats->ds_log_replayed;
710 	}
711 
712 	(void) pthread_mutex_lock(&dp->d_stats_lock);
713 	sp->fmds_value.ui64++;
714 	(void) pthread_mutex_unlock(&dp->d_stats_lock);
715 }
716 
717 void
718 fmd_door_server(void *dip)
719 {
720 	fmd_dprintf(FMD_DBG_XPRT, "door server starting for %p\n", dip);
721 	(void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
722 	(void) door_return(NULL, 0, NULL, 0);
723 }
724 
725 /*
726  * Custom door server create callback.  Any fmd services that use doors will
727  * require those threads to have their fmd-specific TSD initialized, etc.
728  */
729 static void
730 fmd_door(door_info_t *dip)
731 {
732 	if (fmd_thread_create(fmd.d_rmod, fmd_door_server, dip) == NULL)
733 		fmd_panic("failed to create server for door %p", (void *)dip);
734 }
735 
736 /*
737  * This signal handler is installed for the client.thrsig signal to be used to
738  * force an auxiliary thread to wake up from a system call and return EINTR in
739  * response to a module's use of fmd_thr_signal().  We also trace the event.
740  */
741 static void
742 fmd_signal(int sig)
743 {
744 	TRACE((FMD_DBG_MOD, "module thread received sig #%d", sig));
745 }
746 
747 void
748 fmd_run(fmd_t *dp, int pfd)
749 {
750 	char *nodc_key[] = { FMD_FLT_NODC, NULL };
751 	char *repair_key[] = { FM_LIST_REPAIRED_CLASS, NULL };
752 	char *resolve_key[] = { FM_LIST_RESOLVED_CLASS, NULL };
753 	char *update_key[] = { FM_LIST_UPDATED_CLASS, NULL };
754 	char code_str[128];
755 	struct sigaction act;
756 
757 	int status = FMD_EXIT_SUCCESS;
758 	const char *name;
759 	fmd_conf_path_t *pap;
760 	fmd_event_t *e;
761 	int dbout;
762 
763 	/*
764 	 * Cache all the current debug property settings in d_fmd_debug,
765 	 * d_fmd_dbout, d_hdl_debug, and d_hdl_dbout.  If a given debug mask
766 	 * is non-zero and the corresponding dbout mask is zero, set dbout
767 	 * to a sensible default value based on whether we have daemonized.
768 	 */
769 	(void) fmd_conf_getprop(dp->d_conf, "dbout", &dbout);
770 
771 	if (dp->d_fmd_debug != 0 && dbout == 0)
772 		dp->d_fmd_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG;
773 	else
774 		dp->d_fmd_dbout = dbout;
775 
776 	(void) fmd_conf_getprop(dp->d_conf, "client.debug", &dp->d_hdl_debug);
777 	(void) fmd_conf_getprop(dp->d_conf, "client.dbout", &dbout);
778 
779 	if (dp->d_hdl_debug != 0 && dbout == 0)
780 		dp->d_hdl_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG;
781 	else
782 		dp->d_hdl_dbout = dbout;
783 
784 	/*
785 	 * Initialize remaining major program data structures such as the
786 	 * clock, dispatch queues, log files, module hash collections, etc.
787 	 * This work is done here rather than in fmd_create() to permit the -o
788 	 * command-line option to modify properties after fmd_create() is done.
789 	 */
790 	name = dp->d_rootdir != NULL &&
791 	    *dp->d_rootdir != '\0' ? dp->d_rootdir : NULL;
792 
793 	/*
794 	 * The clock must be initialized before fmd_topo_init() because
795 	 * fmd_topo_update() calls fmd_time_gethrtime().
796 	 */
797 	dp->d_clockptr = dp->d_clockops->fto_init();
798 
799 	fmd_topo_init();
800 
801 	dp->d_xprt_ids = fmd_idspace_create("xprt_ids", 1, INT_MAX);
802 	fmd_xprt_suspend_all();
803 
804 	(void) door_server_create(fmd_door);
805 
806 	dp->d_rmod->mod_timerids = fmd_idspace_create(dp->d_pname, 1, 16);
807 	dp->d_timers = fmd_timerq_create();
808 	dp->d_disp = fmd_dispq_create();
809 	dp->d_cases = fmd_case_hash_create();
810 
811 	/*
812 	 * The root module's mod_queue is created with limit zero, making it
813 	 * act like /dev/null; anything inserted here is simply ignored.
814 	 */
815 	dp->d_rmod->mod_queue = fmd_eventq_create(dp->d_rmod,
816 	    &dp->d_rmod->mod_stats->ms_evqstat, &dp->d_rmod->mod_stats_lock, 0);
817 
818 	/*
819 	 * Once our subsystems that use signals have been set up, install the
820 	 * signal handler for the fmd_thr_signal() API.  Verify that the signal
821 	 * being used for this purpose doesn't conflict with something else.
822 	 */
823 	(void) fmd_conf_getprop(dp->d_conf, "client.thrsig", &dp->d_thr_sig);
824 
825 	if (sigaction(dp->d_thr_sig, NULL, &act) != 0) {
826 		fmd_error(EFMD_EXIT, "invalid signal selected for "
827 		    "client.thrsig property: %d\n", dp->d_thr_sig);
828 	}
829 
830 	if (act.sa_handler != SIG_IGN && act.sa_handler != SIG_DFL) {
831 		fmd_error(EFMD_EXIT, "signal selected for client.thrsig "
832 		    "property is already in use: %d\n", dp->d_thr_sig);
833 	}
834 
835 	act.sa_handler = fmd_signal;
836 	act.sa_flags = 0;
837 
838 	(void) sigemptyset(&act.sa_mask);
839 	(void) sigaction(dp->d_thr_sig, &act, NULL);
840 
841 	(void) fmd_conf_getprop(dp->d_conf, "schemedir", &name);
842 	dp->d_schemes = fmd_scheme_hash_create(dp->d_rootdir, name);
843 
844 	(void) fmd_conf_getprop(dp->d_conf, "log.rsrc", &name);
845 	dp->d_asrus = fmd_asru_hash_create(dp->d_rootdir, name);
846 
847 	(void) fmd_conf_getprop(dp->d_conf, "log.error", &name);
848 	dp->d_errlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_ERROR);
849 
850 	(void) fmd_conf_getprop(dp->d_conf, "log.fault", &name);
851 	dp->d_fltlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_FAULT);
852 
853 	if (dp->d_asrus == NULL || dp->d_errlog == NULL || dp->d_fltlog == NULL)
854 		fmd_error(EFMD_EXIT, "failed to initialize log files\n");
855 
856 	/*
857 	 * Before loading modules, create an empty control event which will act
858 	 * as a global barrier for module event processing.  Each module we
859 	 * load successfully will insert it at their head of their event queue,
860 	 * and then pause inside of fmd_ctl_rele() after dequeuing the event.
861 	 * This module barrier is required for two reasons:
862 	 *
863 	 * (a) During module loading, the restoration of case checkpoints may
864 	 *    result in a list.* event being recreated for which the intended
865 	 *    subscriber has not yet loaded depending on the load order. Such
866 	 *    events could then result in spurious "no subscriber" errors.
867 	 *
868 	 * (b) During errlog replay, a sequence of errors from a long time ago
869 	 *    may be replayed, and the module may attempt to install relative
870 	 *    timers associated with one or more of these events.  If errlog
871 	 *    replay were "racing" with active module threads, an event E1
872 	 *    that resulted in a relative timer T at time E1 + N nsec could
873 	 *    fire prior to an event E2 being enqueued, even if the relative
874 	 *    time ordering was E1 < E2 < E1 + N, causing mis-diagnosis.
875 	 */
876 	dp->d_mod_event = e = fmd_event_create(FMD_EVT_CTL,
877 	    FMD_HRT_NOW, NULL, fmd_ctl_init(NULL));
878 
879 	fmd_event_hold(e);
880 
881 	/*
882 	 * Once all data structures are initialized, we load all of our modules
883 	 * in order according to class in order to load up any subscriptions.
884 	 * Once built-in modules are loaded, we detach from our waiting parent.
885 	 */
886 	dp->d_mod_hash = fmd_modhash_create();
887 
888 	if (fmd_builtin_loadall(dp->d_mod_hash) != 0 && !dp->d_fg)
889 		fmd_error(EFMD_EXIT, "failed to initialize fault manager\n");
890 
891 	(void) fmd_conf_getprop(dp->d_conf, "self.name", &name);
892 	dp->d_self = fmd_modhash_lookup(dp->d_mod_hash, name);
893 
894 	if (dp->d_self != NULL) {
895 		if (fmd_module_dc_key2code(dp->d_self, nodc_key, code_str,
896 		    sizeof (code_str)) == 0)
897 			(void) fmd_conf_setprop(dp->d_conf, "nodiagcode",
898 			    code_str);
899 		if (fmd_module_dc_key2code(dp->d_self, repair_key, code_str,
900 		    sizeof (code_str)) == 0)
901 			(void) fmd_conf_setprop(dp->d_conf, "repaircode",
902 			    code_str);
903 		if (fmd_module_dc_key2code(dp->d_self, resolve_key, code_str,
904 		    sizeof (code_str)) == 0)
905 			(void) fmd_conf_setprop(dp->d_conf, "resolvecode",
906 			    code_str);
907 		if (fmd_module_dc_key2code(dp->d_self, update_key, code_str,
908 		    sizeof (code_str)) == 0)
909 			(void) fmd_conf_setprop(dp->d_conf, "updatecode",
910 			    code_str);
911 	}
912 
913 	fmd_rpc_init();
914 	dp->d_running = 1; /* we are now officially an active fmd */
915 
916 	/*
917 	 * Now that we're running, if a pipe fd was specified, write an exit
918 	 * status to it to indicate that our parent process can safely detach.
919 	 * Then proceed to loading the remaining non-built-in modules.
920 	 */
921 	if (pfd >= 0)
922 		(void) write(pfd, &status, sizeof (status));
923 
924 	/*
925 	 * Before loading all modules, repopulate the ASRU cache from its
926 	 * persistent repository on disk.  Then during module loading, the
927 	 * restoration of checkpoint files will reparent any active cases.
928 	 */
929 	fmd_asru_hash_refresh(dp->d_asrus);
930 
931 	(void) fmd_conf_getprop(dp->d_conf, "plugin.path", &pap);
932 	fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_rtld_ops, ".so");
933 
934 	(void) fmd_conf_getprop(dp->d_conf, "agent.path", &pap);
935 	fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_proc_ops, NULL);
936 
937 	/*
938 	 * With all modules loaded, replay fault events from the ASRU cache for
939 	 * any ASRUs that must be retired, replay error events from the errlog
940 	 * that did not finish processing the last time ran, and then release
941 	 * the global module barrier by executing a final rele on d_mod_event.
942 	 */
943 	fmd_asru_hash_replay(dp->d_asrus);
944 
945 	(void) pthread_rwlock_rdlock(&dp->d_log_lock);
946 	fmd_log_replay(dp->d_errlog, (fmd_log_f *)fmd_err_replay, dp);
947 	fmd_log_update(dp->d_errlog);
948 	(void) pthread_rwlock_unlock(&dp->d_log_lock);
949 
950 	dp->d_mod_event = NULL;
951 	fmd_event_rele(e);
952 
953 	/*
954 	 * Now replay list.updated and list.repaired events
955 	 */
956 	fmd_case_repair_replay();
957 
958 	/*
959 	 * Finally, awaken any threads associated with receiving events from
960 	 * open transports and tell them to proceed with fmd_xprt_recv().
961 	 */
962 	fmd_xprt_resume_all();
963 	fmd_gc(dp, 0, 0);
964 	fmd_clear_aged_rsrcs(dp, 0, 0);
965 
966 	(void) pthread_mutex_lock(&dp->d_fmd_lock);
967 	dp->d_booted = 1;
968 	(void) pthread_cond_broadcast(&dp->d_fmd_cv);
969 	(void) pthread_mutex_unlock(&dp->d_fmd_lock);
970 }
971 
972 void
973 fmd_help(fmd_t *dp)
974 {
975 	const fmd_conf_mode_t *cmp;
976 
977 	(void) printf("Usage: %s -o debug=mode[,mode]\n", dp->d_pname);
978 
979 	for (cmp = _fmd_debug_modes; cmp->cm_name != NULL; cmp++)
980 		(void) printf("\t%s\t%s\n", cmp->cm_name, cmp->cm_desc);
981 }
982