xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_module.c (revision 0167b58cea98965c58fab4be4e690b6e456f7440)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <signal.h>
30 #include <dirent.h>
31 #include <limits.h>
32 #include <alloca.h>
33 #include <unistd.h>
34 #include <stdio.h>
35 
36 #include <fmd_string.h>
37 #include <fmd_alloc.h>
38 #include <fmd_module.h>
39 #include <fmd_error.h>
40 #include <fmd_conf.h>
41 #include <fmd_dispq.h>
42 #include <fmd_eventq.h>
43 #include <fmd_timerq.h>
44 #include <fmd_subr.h>
45 #include <fmd_thread.h>
46 #include <fmd_ustat.h>
47 #include <fmd_case.h>
48 #include <fmd_protocol.h>
49 #include <fmd_buf.h>
50 #include <fmd_ckpt.h>
51 #include <fmd_xprt.h>
52 
53 #include <fmd.h>
54 
55 /*
56  * Template for per-module statistics installed by fmd on behalf of each active
57  * module.  These are used to initialize the per-module mp->mod_stats below.
58  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
59  * required in the future, the FMD_ADM_MODDSTAT service routine must change.
60  */
61 static const fmd_modstat_t _fmd_modstat_tmpl = {
62 {
63 { "fmd.dispatched", FMD_TYPE_UINT64, "total events dispatched to module" },
64 { "fmd.dequeued", FMD_TYPE_UINT64, "total events dequeued by module" },
65 { "fmd.prdequeued", FMD_TYPE_UINT64, "protocol events dequeued by module" },
66 { "fmd.dropped", FMD_TYPE_UINT64, "total events dropped on queue overflow" },
67 { "fmd.wcnt", FMD_TYPE_UINT32, "count of events waiting on queue" },
68 { "fmd.wtime", FMD_TYPE_TIME, "total wait time on queue" },
69 { "fmd.wlentime", FMD_TYPE_TIME, "total wait length * time product" },
70 { "fmd.wlastupdate", FMD_TYPE_TIME, "hrtime of last wait queue update" },
71 { "fmd.dtime", FMD_TYPE_TIME, "total processing time after dequeue" },
72 { "fmd.dlastupdate", FMD_TYPE_TIME, "hrtime of last event dequeue completion" },
73 },
74 { "fmd.loadtime", FMD_TYPE_TIME, "hrtime at which module was loaded" },
75 { "fmd.snaptime", FMD_TYPE_TIME, "hrtime of last statistics snapshot" },
76 { "fmd.accepted", FMD_TYPE_UINT64, "total events accepted by module" },
77 { "fmd.debugdrop", FMD_TYPE_UINT64, "dropped debug messages" },
78 { "fmd.memtotal", FMD_TYPE_SIZE, "total memory allocated by module" },
79 { "fmd.memlimit", FMD_TYPE_SIZE, "limit on total memory allocated" },
80 { "fmd.buftotal", FMD_TYPE_SIZE, "total buffer space used by module" },
81 { "fmd.buflimit", FMD_TYPE_SIZE, "limit on total buffer space" },
82 { "fmd.thrtotal", FMD_TYPE_UINT32, "total number of auxiliary threads" },
83 { "fmd.thrlimit", FMD_TYPE_UINT32, "limit on number of auxiliary threads" },
84 { "fmd.caseopen", FMD_TYPE_UINT64, "cases currently open by module" },
85 { "fmd.casesolved", FMD_TYPE_UINT64, "total cases solved by module" },
86 { "fmd.caseclosed", FMD_TYPE_UINT64, "total cases closed by module" },
87 { "fmd.ckptsave", FMD_TYPE_BOOL, "save checkpoints for module" },
88 { "fmd.ckptrestore", FMD_TYPE_BOOL, "restore checkpoints for module" },
89 { "fmd.ckptzero", FMD_TYPE_BOOL, "zeroed checkpoint at startup" },
90 { "fmd.ckptcnt", FMD_TYPE_UINT64, "number of checkpoints taken" },
91 { "fmd.ckpttime", FMD_TYPE_TIME, "total checkpoint time" },
92 { "fmd.xprtopen", FMD_TYPE_UINT32, "total number of open transports" },
93 { "fmd.xprtlimit", FMD_TYPE_UINT32, "limit on number of open transports" },
94 { "fmd.xprtqlimit", FMD_TYPE_UINT32, "limit on transport event queue length" },
95 };
96 
97 static void
98 fmd_module_start(void *arg)
99 {
100 	fmd_module_t *mp = arg;
101 	fmd_event_t *ep;
102 	fmd_xprt_t *xp;
103 
104 	(void) pthread_mutex_lock(&mp->mod_lock);
105 
106 	if (mp->mod_ops->mop_init(mp) != 0 || mp->mod_error != 0) {
107 		if (mp->mod_error == 0)
108 			mp->mod_error = errno ? errno : EFMD_MOD_INIT;
109 		goto out;
110 	}
111 
112 	if (fmd.d_mod_event != NULL)
113 		fmd_eventq_insert_at_head(mp->mod_queue, fmd.d_mod_event);
114 
115 	ASSERT(MUTEX_HELD(&mp->mod_lock));
116 	mp->mod_flags |= FMD_MOD_INIT;
117 
118 	(void) pthread_cond_broadcast(&mp->mod_cv);
119 	(void) pthread_mutex_unlock(&mp->mod_lock);
120 
121 	/*
122 	 * If the module opened any transports while executing _fmd_init(),
123 	 * they are suspended. Now that _fmd_init() is done, wake them up.
124 	 */
125 	for (xp = fmd_list_next(&mp->mod_transports);
126 	    xp != NULL; xp = fmd_list_next(xp))
127 		fmd_xprt_xresume(xp, FMD_XPRT_ISUSPENDED);
128 
129 	/*
130 	 * Wait for events to arrive by checking mod_error and then sleeping in
131 	 * fmd_eventq_delete().  If a NULL event is returned, the eventq has
132 	 * been aborted and we continue on to call fini and exit the thread.
133 	 */
134 	while ((ep = fmd_eventq_delete(mp->mod_queue)) != NULL) {
135 		/*
136 		 * If the module has failed, discard the event without ever
137 		 * passing it to the module and go back to sleep.
138 		 */
139 		if (mp->mod_error != 0) {
140 			fmd_eventq_done(mp->mod_queue);
141 			fmd_event_rele(ep);
142 			continue;
143 		}
144 
145 		mp->mod_ops->mop_dispatch(mp, ep);
146 		fmd_eventq_done(mp->mod_queue);
147 
148 		/*
149 		 * Once mop_dispatch() is complete, grab the lock and perform
150 		 * any event-specific post-processing.  Finally, if necessary,
151 		 * checkpoint the state of the module after this event.
152 		 */
153 		fmd_module_lock(mp);
154 
155 		if (FMD_EVENT_TYPE(ep) == FMD_EVT_CLOSE)
156 			fmd_case_delete(FMD_EVENT_DATA(ep));
157 
158 		fmd_ckpt_save(mp);
159 		fmd_module_unlock(mp);
160 		fmd_event_rele(ep);
161 	}
162 
163 	if (mp->mod_ops->mop_fini(mp) != 0 && mp->mod_error == 0)
164 		mp->mod_error = errno ? errno : EFMD_MOD_FINI;
165 
166 	(void) pthread_mutex_lock(&mp->mod_lock);
167 	mp->mod_flags |= FMD_MOD_FINI;
168 
169 out:
170 	(void) pthread_cond_broadcast(&mp->mod_cv);
171 	(void) pthread_mutex_unlock(&mp->mod_lock);
172 }
173 
174 fmd_module_t *
175 fmd_module_create(const char *path, const fmd_modops_t *ops)
176 {
177 	fmd_module_t *mp = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
178 
179 	char buf[PATH_MAX], *p;
180 	const char *dir;
181 	uint32_t limit;
182 	int err;
183 
184 	(void) strlcpy(buf, fmd_strbasename(path), sizeof (buf));
185 	if ((p = strrchr(buf, '.')) != NULL && strcmp(p, ".so") == 0)
186 		*p = '\0'; /* strip trailing .so from any module name */
187 
188 	(void) pthread_mutex_init(&mp->mod_lock, NULL);
189 	(void) pthread_cond_init(&mp->mod_cv, NULL);
190 	(void) pthread_mutex_init(&mp->mod_stats_lock, NULL);
191 
192 	mp->mod_name = fmd_strdup(buf, FMD_SLEEP);
193 	mp->mod_path = fmd_strdup(path, FMD_SLEEP);
194 	mp->mod_ops = ops;
195 	mp->mod_ustat = fmd_ustat_create();
196 
197 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dir", &dir);
198 	(void) snprintf(buf, sizeof (buf),
199 	    "%s/%s/%s", fmd.d_rootdir, dir, mp->mod_name);
200 
201 	mp->mod_ckpt = fmd_strdup(buf, FMD_SLEEP);
202 
203 	(void) fmd_conf_getprop(fmd.d_conf, "client.tmrlim", &limit);
204 	mp->mod_timerids = fmd_idspace_create(mp->mod_name, 1, limit + 1);
205 	mp->mod_threads = fmd_idspace_create(mp->mod_name, 0, INT_MAX);
206 
207 	fmd_buf_hash_create(&mp->mod_bufs);
208 	fmd_serd_hash_create(&mp->mod_serds);
209 
210 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
211 	fmd_list_append(&fmd.d_mod_list, mp);
212 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
213 
214 	/*
215 	 * Initialize the module statistics that are kept on its behalf by fmd.
216 	 * These are set up using a template defined at the top of this file.
217 	 */
218 	if ((mp->mod_stats = (fmd_modstat_t *)fmd_ustat_insert(mp->mod_ustat,
219 	    FMD_USTAT_ALLOC, sizeof (_fmd_modstat_tmpl) / sizeof (fmd_stat_t),
220 	    (fmd_stat_t *)&_fmd_modstat_tmpl, NULL)) == NULL) {
221 		fmd_error(EFMD_MOD_INIT, "failed to initialize per-mod stats");
222 		fmd_module_destroy(mp);
223 		return (NULL);
224 	}
225 
226 	(void) fmd_conf_getprop(fmd.d_conf, "client.evqlim", &limit);
227 
228 	mp->mod_queue = fmd_eventq_create(mp,
229 	    &mp->mod_stats->ms_evqstat, &mp->mod_stats_lock, limit);
230 
231 	(void) fmd_conf_getprop(fmd.d_conf, "client.memlim",
232 	    &mp->mod_stats->ms_memlimit.fmds_value.ui64);
233 
234 	(void) fmd_conf_getprop(fmd.d_conf, "client.buflim",
235 	    &mp->mod_stats->ms_buflimit.fmds_value.ui64);
236 
237 	(void) fmd_conf_getprop(fmd.d_conf, "client.thrlim",
238 	    &mp->mod_stats->ms_thrlimit.fmds_value.ui32);
239 
240 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtlim",
241 	    &mp->mod_stats->ms_xprtlimit.fmds_value.ui32);
242 
243 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtqlim",
244 	    &mp->mod_stats->ms_xprtqlimit.fmds_value.ui32);
245 
246 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.save",
247 	    &mp->mod_stats->ms_ckpt_save.fmds_value.bool);
248 
249 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.restore",
250 	    &mp->mod_stats->ms_ckpt_restore.fmds_value.bool);
251 
252 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.zero",
253 	    &mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool);
254 
255 	if (mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool)
256 		fmd_ckpt_delete(mp); /* blow away any pre-existing checkpoint */
257 
258 	/*
259 	 * Place a hold on the module and grab the module lock before creating
260 	 * the module's thread to ensure that it cannot destroy the module and
261 	 * that it cannot call ops->mop_init() before we're done setting up.
262 	 * NOTE: from now on, we must use fmd_module_rele() for error paths.
263 	 */
264 	fmd_module_hold(mp);
265 	(void) pthread_mutex_lock(&mp->mod_lock);
266 	mp->mod_stats->ms_loadtime.fmds_value.ui64 = gethrtime();
267 	mp->mod_thread = fmd_thread_create(mp, fmd_module_start, mp);
268 
269 	if (mp->mod_thread == NULL) {
270 		fmd_error(EFMD_MOD_THR, "failed to create thread for %s", path);
271 		(void) pthread_mutex_unlock(&mp->mod_lock);
272 		fmd_module_rele(mp);
273 		return (NULL);
274 	}
275 
276 	/*
277 	 * At this point our module structure is nearly finished and its thread
278 	 * is starting execution in fmd_module_start() above, which will begin
279 	 * by blocking for mod_lock.  We now drop mod_lock and wait for either
280 	 * FMD_MOD_INIT or mod_error to be set before proceeding.
281 	 */
282 	while (!(mp->mod_flags & FMD_MOD_INIT) && mp->mod_error == 0)
283 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
284 
285 	/*
286 	 * If the module has failed to initialize, copy its errno to the errno
287 	 * of the caller, wait for it to unload, and then destroy it.
288 	 */
289 	if (!(mp->mod_flags & FMD_MOD_INIT)) {
290 		err = mp->mod_error;
291 		(void) pthread_mutex_unlock(&mp->mod_lock);
292 
293 		if (err == EFMD_CKPT_INVAL)
294 			fmd_ckpt_rename(mp); /* move aside bad checkpoint */
295 
296 		/*
297 		 * If we're in the background, keep quiet about failure to
298 		 * load because a handle wasn't registered: this is a module's
299 		 * way of telling us it didn't want to be loaded for some
300 		 * reason related to system configuration.  If we're in the
301 		 * foreground we log this too in order to inform developers.
302 		 */
303 		if (fmd.d_fg || err != EFMD_HDL_INIT) {
304 			fmd_error(EFMD_MOD_INIT, "failed to load %s: %s\n",
305 			    path, fmd_strerror(err));
306 		}
307 
308 		fmd_module_unload(mp);
309 		fmd_module_rele(mp);
310 
311 		(void) fmd_set_errno(err);
312 		return (NULL);
313 	}
314 
315 	(void) pthread_cond_broadcast(&mp->mod_cv);
316 	(void) pthread_mutex_unlock(&mp->mod_lock);
317 
318 	fmd_dprintf(FMD_DBG_MOD, "loaded module %s\n", mp->mod_name);
319 	return (mp);
320 }
321 
322 static void
323 fmd_module_untimeout(fmd_idspace_t *ids, id_t id, fmd_module_t *mp)
324 {
325 	void *arg = fmd_timerq_remove(fmd.d_timers, ids, id);
326 
327 	/*
328 	 * The root module calls fmd_timerq_install() directly and must take
329 	 * responsibility for any cleanup of timer arguments that is required.
330 	 * All other modules use fmd_modtimer_t's as the arg data; free them.
331 	 */
332 	if (arg != NULL && mp != fmd.d_rmod)
333 		fmd_free(arg, sizeof (fmd_modtimer_t));
334 }
335 
336 void
337 fmd_module_unload(fmd_module_t *mp)
338 {
339 	(void) pthread_mutex_lock(&mp->mod_lock);
340 
341 	if (mp->mod_flags & FMD_MOD_QUIT) {
342 		(void) pthread_mutex_unlock(&mp->mod_lock);
343 		return; /* module is already unloading */
344 	}
345 
346 	ASSERT(mp->mod_thread != NULL);
347 	mp->mod_flags |= FMD_MOD_QUIT;
348 
349 	if (mp->mod_queue != NULL)
350 		fmd_eventq_abort(mp->mod_queue);
351 
352 	/*
353 	 * Wait for the module's thread to stop processing events and call
354 	 * _fmd_fini() and exit.  We do this by waiting for FMD_MOD_FINI to be
355 	 * set if INIT was set, and then attempting to join with the thread.
356 	 */
357 	while ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI)) == FMD_MOD_INIT)
358 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
359 
360 	(void) pthread_cond_broadcast(&mp->mod_cv);
361 	(void) pthread_mutex_unlock(&mp->mod_lock);
362 
363 	fmd_thread_destroy(mp->mod_thread, FMD_THREAD_JOIN);
364 	mp->mod_thread = NULL;
365 
366 	/*
367 	 * Once the module is no longer active, clean up any data structures
368 	 * that are only required when the module is loaded.
369 	 */
370 	fmd_module_lock(mp);
371 
372 	if (mp->mod_timerids != NULL) {
373 		fmd_idspace_apply(mp->mod_timerids,
374 		    (void (*)())fmd_module_untimeout, mp);
375 
376 		fmd_idspace_destroy(mp->mod_timerids);
377 		mp->mod_timerids = NULL;
378 	}
379 
380 	if (mp->mod_threads != NULL) {
381 		fmd_idspace_destroy(mp->mod_threads);
382 		mp->mod_threads = NULL;
383 	}
384 
385 	fmd_buf_hash_destroy(&mp->mod_bufs);
386 	fmd_serd_hash_destroy(&mp->mod_serds);
387 
388 	fmd_module_unlock(mp);
389 	fmd_dprintf(FMD_DBG_MOD, "unloaded module %s\n", mp->mod_name);
390 }
391 
392 void
393 fmd_module_destroy(fmd_module_t *mp)
394 {
395 	fmd_conf_formal_t *cfp = mp->mod_argv;
396 	int i;
397 
398 	ASSERT(MUTEX_HELD(&mp->mod_lock));
399 
400 	if (mp->mod_thread != NULL) {
401 		(void) pthread_mutex_unlock(&mp->mod_lock);
402 		fmd_module_unload(mp);
403 		(void) pthread_mutex_lock(&mp->mod_lock);
404 	}
405 
406 	ASSERT(mp->mod_thread == NULL);
407 	ASSERT(mp->mod_refs == 0);
408 
409 	/*
410 	 * Once the module's thread is dead, we can safely remove the module
411 	 * from global visibility and by removing it from d_mod_list.  Any
412 	 * modhash pointers are already gone by virtue of mod_refs being zero.
413 	 */
414 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
415 	fmd_list_delete(&fmd.d_mod_list, mp);
416 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
417 
418 	/*
419 	 * Once the module is no longer processing events and no longer visible
420 	 * through any program data structures, we can free all of its content.
421 	 */
422 	if (mp->mod_queue != NULL) {
423 		fmd_eventq_destroy(mp->mod_queue);
424 		mp->mod_queue = NULL;
425 	}
426 
427 	if (mp->mod_ustat != NULL) {
428 		(void) pthread_mutex_lock(&mp->mod_stats_lock);
429 		fmd_ustat_destroy(mp->mod_ustat);
430 		mp->mod_ustat = NULL;
431 		mp->mod_stats = NULL;
432 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
433 	}
434 
435 	for (i = 0; i < mp->mod_dictc; i++)
436 		fm_dc_closedict(mp->mod_dictv[i]);
437 
438 	fmd_free(mp->mod_dictv, sizeof (struct fm_dc_handle *) * mp->mod_dictc);
439 
440 	if (mp->mod_conf != NULL)
441 		fmd_conf_close(mp->mod_conf);
442 
443 	for (i = 0; i < mp->mod_argc; i++, cfp++) {
444 		fmd_strfree((char *)cfp->cf_name);
445 		fmd_strfree((char *)cfp->cf_default);
446 	}
447 
448 	fmd_free(mp->mod_argv, sizeof (fmd_conf_formal_t) * mp->mod_argc);
449 
450 	fmd_strfree(mp->mod_name);
451 	fmd_strfree(mp->mod_path);
452 	fmd_strfree(mp->mod_ckpt);
453 	nvlist_free(mp->mod_fmri);
454 
455 	fmd_free(mp, sizeof (fmd_module_t));
456 }
457 
458 /*
459  * fmd_module_error() is called after the stack is unwound from a call to
460  * fmd_module_abort() to indicate that the module has failed.  The mod_error
461  * field is used to hold the error code of the first fatal error to the module.
462  * An EFMD_MOD_FAIL event is then created and sent to fmd-self-diagnosis.
463  */
464 static void
465 fmd_module_error(fmd_module_t *mp, int err)
466 {
467 	fmd_event_t *e;
468 	nvlist_t *nvl;
469 	char *class;
470 
471 	ASSERT(MUTEX_HELD(&mp->mod_lock));
472 	ASSERT(err != 0);
473 
474 	TRACE((FMD_DBG_MOD, "module aborted: err=%d", err));
475 
476 	if (mp->mod_error == 0)
477 		mp->mod_error = err;
478 
479 	if (mp == fmd.d_self)
480 		return; /* do not post event if fmd.d_self itself fails */
481 
482 	/*
483 	 * Send an error indicating the module has now failed to fmd.d_self.
484 	 * Since the error causing the failure has already been logged by
485 	 * fmd_api_xerror(), we do not need to bother logging this event.
486 	 * It only exists for the purpose of notifying fmd.d_self that it can
487 	 * close the case associated with this module because mod_error is set.
488 	 */
489 	nvl = fmd_protocol_moderror(mp, EFMD_MOD_FAIL, fmd_strerror(err));
490 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
491 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
492 	fmd_dispq_dispatch(fmd.d_disp, e, class);
493 }
494 
495 void
496 fmd_module_dispatch(fmd_module_t *mp, fmd_event_t *e)
497 {
498 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
499 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
500 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
501 	fmd_modtimer_t *t;
502 	volatile int err;
503 
504 	/*
505 	 * Before calling the appropriate module callback, enter the module as
506 	 * if by fmd_module_enter() and establish mod_jmpbuf for any aborts.
507 	 */
508 	(void) pthread_mutex_lock(&mp->mod_lock);
509 
510 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
511 	mp->mod_flags |= FMD_MOD_BUSY;
512 
513 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
514 		(void) pthread_mutex_lock(&mp->mod_lock);
515 		fmd_module_error(mp, err);
516 	}
517 
518 	(void) pthread_cond_broadcast(&mp->mod_cv);
519 	(void) pthread_mutex_unlock(&mp->mod_lock);
520 
521 	/*
522 	 * If it's the first time through fmd_module_dispatch(), call the
523 	 * appropriate module callback based on the event type.  If the call
524 	 * triggers an fmd_module_abort(), we'll return to setjmp() above with
525 	 * err set to a non-zero value and then bypass this before exiting.
526 	 */
527 	if (err == 0) {
528 		switch (ep->ev_type) {
529 		case FMD_EVT_PROTOCOL:
530 			ops->fmdo_recv(hdl, e, ep->ev_nvl, ep->ev_data);
531 			break;
532 		case FMD_EVT_TIMEOUT:
533 			t = ep->ev_data;
534 			ASSERT(t->mt_mod == mp);
535 			ops->fmdo_timeout(hdl, t->mt_id, t->mt_arg);
536 			break;
537 		case FMD_EVT_CLOSE:
538 			ops->fmdo_close(hdl, ep->ev_data);
539 			break;
540 		case FMD_EVT_STATS:
541 			ops->fmdo_stats(hdl);
542 			fmd_modstat_publish(mp);
543 			break;
544 		case FMD_EVT_GC:
545 			ops->fmdo_gc(hdl);
546 			break;
547 		case FMD_EVT_PUBLISH:
548 			fmd_case_publish(ep->ev_data, FMD_CASE_CURRENT);
549 			break;
550 		}
551 	}
552 
553 	fmd_module_exit(mp);
554 }
555 
556 int
557 fmd_module_transport(fmd_module_t *mp, fmd_xprt_t *xp, fmd_event_t *e)
558 {
559 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
560 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
561 
562 	ASSERT(ep->ev_type == FMD_EVT_PROTOCOL);
563 	return (mp->mod_info->fmdi_ops->fmdo_send(hdl, xp, e, ep->ev_nvl));
564 }
565 
566 void
567 fmd_module_timeout(fmd_modtimer_t *t, id_t id, hrtime_t hrt)
568 {
569 	fmd_event_t *e;
570 
571 	t->mt_id = id; /* save id in case we need to delete from eventq */
572 	e = fmd_event_create(FMD_EVT_TIMEOUT, hrt, NULL, t);
573 	fmd_eventq_insert_at_time(t->mt_mod->mod_queue, e);
574 }
575 
576 /*
577  * Garbage collection is initiated by a timer callback once per day or at the
578  * request of fmadm.  Purge old SERD entries and send the module a GC event.
579  */
580 void
581 fmd_module_gc(fmd_module_t *mp)
582 {
583 	fmd_hdl_info_t *info;
584 	fmd_event_t *e;
585 
586 	if (mp->mod_error != 0)
587 		return; /* do not do anything if the module has failed */
588 
589 	fmd_module_lock(mp);
590 
591 	if ((info = mp->mod_info) != NULL) {
592 		fmd_serd_hash_apply(&mp->mod_serds,
593 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
594 	}
595 
596 	fmd_module_unlock(mp);
597 
598 	if (info != NULL) {
599 		e = fmd_event_create(FMD_EVT_GC, FMD_HRT_NOW, NULL, NULL);
600 		fmd_eventq_insert_at_head(mp->mod_queue, e);
601 	}
602 }
603 
604 void
605 fmd_module_trygc(fmd_module_t *mp)
606 {
607 	if (fmd_module_trylock(mp)) {
608 		fmd_serd_hash_apply(&mp->mod_serds,
609 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
610 		fmd_module_unlock(mp);
611 	}
612 }
613 
614 int
615 fmd_module_contains(fmd_module_t *mp, fmd_event_t *ep)
616 {
617 	fmd_case_t *cp;
618 	int rv = 0;
619 
620 	fmd_module_lock(mp);
621 
622 	for (cp = fmd_list_next(&mp->mod_cases);
623 	    cp != NULL; cp = fmd_list_next(cp)) {
624 		if ((rv = fmd_case_contains(cp, ep)) != 0)
625 			break;
626 	}
627 
628 	if (rv == 0)
629 		rv = fmd_serd_hash_contains(&mp->mod_serds, ep);
630 
631 	fmd_module_unlock(mp);
632 	return (rv);
633 }
634 
635 void
636 fmd_module_setdirty(fmd_module_t *mp)
637 {
638 	(void) pthread_mutex_lock(&mp->mod_lock);
639 	mp->mod_flags |= FMD_MOD_MDIRTY;
640 	(void) pthread_mutex_unlock(&mp->mod_lock);
641 }
642 
643 void
644 fmd_module_setcdirty(fmd_module_t *mp)
645 {
646 	(void) pthread_mutex_lock(&mp->mod_lock);
647 	mp->mod_flags |= FMD_MOD_CDIRTY;
648 	(void) pthread_mutex_unlock(&mp->mod_lock);
649 }
650 
651 void
652 fmd_module_clrdirty(fmd_module_t *mp)
653 {
654 	fmd_case_t *cp;
655 
656 	fmd_module_lock(mp);
657 
658 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
659 		for (cp = fmd_list_next(&mp->mod_cases);
660 		    cp != NULL; cp = fmd_list_next(cp))
661 			fmd_case_clrdirty(cp);
662 	}
663 
664 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
665 		fmd_serd_hash_apply(&mp->mod_serds,
666 		    (fmd_serd_eng_f *)fmd_serd_eng_clrdirty, NULL);
667 		fmd_buf_hash_commit(&mp->mod_bufs);
668 	}
669 
670 	(void) pthread_mutex_lock(&mp->mod_lock);
671 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
672 	(void) pthread_mutex_unlock(&mp->mod_lock);
673 
674 	fmd_module_unlock(mp);
675 }
676 
677 void
678 fmd_module_commit(fmd_module_t *mp)
679 {
680 	fmd_case_t *cp;
681 
682 	ASSERT(fmd_module_locked(mp));
683 
684 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
685 		for (cp = fmd_list_next(&mp->mod_cases);
686 		    cp != NULL; cp = fmd_list_next(cp))
687 			fmd_case_commit(cp);
688 	}
689 
690 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
691 		fmd_serd_hash_apply(&mp->mod_serds,
692 		    (fmd_serd_eng_f *)fmd_serd_eng_commit, NULL);
693 		fmd_buf_hash_commit(&mp->mod_bufs);
694 	}
695 
696 	(void) pthread_mutex_lock(&mp->mod_lock);
697 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
698 	(void) pthread_mutex_unlock(&mp->mod_lock);
699 
700 	mp->mod_gen++;
701 }
702 
703 void
704 fmd_module_lock(fmd_module_t *mp)
705 {
706 	pthread_t self = pthread_self();
707 
708 	(void) pthread_mutex_lock(&mp->mod_lock);
709 
710 	while (mp->mod_flags & FMD_MOD_LOCK) {
711 		if (mp->mod_owner != self)
712 			(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
713 		else
714 			fmd_panic("recursive module lock of %p\n", (void *)mp);
715 	}
716 
717 	mp->mod_owner = self;
718 	mp->mod_flags |= FMD_MOD_LOCK;
719 
720 	(void) pthread_cond_broadcast(&mp->mod_cv);
721 	(void) pthread_mutex_unlock(&mp->mod_lock);
722 }
723 
724 void
725 fmd_module_unlock(fmd_module_t *mp)
726 {
727 	(void) pthread_mutex_lock(&mp->mod_lock);
728 
729 	ASSERT(mp->mod_owner == pthread_self());
730 	ASSERT(mp->mod_flags & FMD_MOD_LOCK);
731 
732 	mp->mod_owner = 0;
733 	mp->mod_flags &= ~FMD_MOD_LOCK;
734 
735 	(void) pthread_cond_broadcast(&mp->mod_cv);
736 	(void) pthread_mutex_unlock(&mp->mod_lock);
737 }
738 
739 int
740 fmd_module_trylock(fmd_module_t *mp)
741 {
742 	(void) pthread_mutex_lock(&mp->mod_lock);
743 
744 	if (mp->mod_flags & FMD_MOD_LOCK) {
745 		(void) pthread_mutex_unlock(&mp->mod_lock);
746 		return (0);
747 	}
748 
749 	mp->mod_owner = pthread_self();
750 	mp->mod_flags |= FMD_MOD_LOCK;
751 
752 	(void) pthread_cond_broadcast(&mp->mod_cv);
753 	(void) pthread_mutex_unlock(&mp->mod_lock);
754 
755 	return (1);
756 }
757 
758 int
759 fmd_module_locked(fmd_module_t *mp)
760 {
761 	return ((mp->mod_flags & FMD_MOD_LOCK) &&
762 	    mp->mod_owner == pthread_self());
763 }
764 
765 int
766 fmd_module_enter(fmd_module_t *mp, void (*func)(fmd_hdl_t *))
767 {
768 	volatile int err;
769 
770 	(void) pthread_mutex_lock(&mp->mod_lock);
771 
772 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
773 	mp->mod_flags |= FMD_MOD_BUSY;
774 
775 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
776 		(void) pthread_mutex_lock(&mp->mod_lock);
777 		fmd_module_error(mp, err);
778 	}
779 
780 	(void) pthread_cond_broadcast(&mp->mod_cv);
781 	(void) pthread_mutex_unlock(&mp->mod_lock);
782 
783 	/*
784 	 * If it's the first time through fmd_module_enter(), call the provided
785 	 * function on the module.  If no fmd_module_abort() results, we will
786 	 * fall through and return zero.  Otherwise we'll longjmp with an err,
787 	 * return to the setjmp() above, and return the error to our caller.
788 	 */
789 	if (err == 0 && func != NULL)
790 		(*func)((fmd_hdl_t *)mp);
791 
792 	return (err);
793 }
794 
795 void
796 fmd_module_exit(fmd_module_t *mp)
797 {
798 	(void) pthread_mutex_lock(&mp->mod_lock);
799 
800 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
801 	mp->mod_flags &= ~FMD_MOD_BUSY;
802 
803 	(void) pthread_cond_broadcast(&mp->mod_cv);
804 	(void) pthread_mutex_unlock(&mp->mod_lock);
805 }
806 
807 /*
808  * If the client.error policy has been set by a developer, stop or dump core
809  * based on the policy; if we stop and are resumed we'll continue and execute
810  * the default behavior to discard events in fmd_module_start().  If the caller
811  * is the primary module thread, we reach this state by longjmp'ing back to
812  * fmd_module_enter(), above.  If the caller is an auxiliary thread, we cancel
813  * ourself and arrange for the primary thread to call fmd_module_abort().
814  */
815 void
816 fmd_module_abort(fmd_module_t *mp, int err)
817 {
818 	uint_t policy = FMD_CERROR_UNLOAD;
819 	pthread_t tid = pthread_self();
820 
821 	(void) fmd_conf_getprop(fmd.d_conf, "client.error", &policy);
822 
823 	if (policy == FMD_CERROR_STOP) {
824 		fmd_error(err, "stopping after %s in client %s (%p)\n",
825 		    fmd_errclass(err), mp->mod_name, (void *)mp);
826 		(void) raise(SIGSTOP);
827 	} else if (policy == FMD_CERROR_ABORT) {
828 		fmd_panic("aborting due to %s in client %s (%p)\n",
829 		    fmd_errclass(err), mp->mod_name, (void *)mp);
830 	}
831 
832 	/*
833 	 * If the caller is an auxiliary thread, cancel the current thread.  We
834 	 * prefer to cancel because it affords developers the option of using
835 	 * the pthread_cleanup* APIs.  If cancellations have been disabled,
836 	 * fall through to forcing the current thread to exit.  In either case
837 	 * we update mod_error (if zero) to enter the failed state.  Once that
838 	 * is set, further events received by the module will be discarded.
839 	 *
840 	 * We also set the FMD_MOD_FAIL bit, indicating an unrecoverable error.
841 	 * When an auxiliary thread fails, the module is left in a delicate
842 	 * state where it is likely not able to continue execution (even to
843 	 * execute its _fmd_fini() routine) because our caller may hold locks
844 	 * that are private to the module and can no longer be released.  The
845 	 * FMD_MOD_FAIL bit forces fmd_api_module_lock() to abort if any other
846 	 * module threads reach an API call, in an attempt to get them to exit.
847 	 */
848 	if (tid != mp->mod_thread->thr_tid) {
849 		(void) pthread_mutex_lock(&mp->mod_lock);
850 
851 		if (mp->mod_error == 0)
852 			mp->mod_error = err;
853 
854 		mp->mod_flags |= FMD_MOD_FAIL;
855 		(void) pthread_mutex_unlock(&mp->mod_lock);
856 
857 		(void) pthread_cancel(tid);
858 		pthread_exit(NULL);
859 	}
860 
861 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
862 	longjmp(mp->mod_jmpbuf, err);
863 }
864 
865 void
866 fmd_module_hold(fmd_module_t *mp)
867 {
868 	(void) pthread_mutex_lock(&mp->mod_lock);
869 
870 	TRACE((FMD_DBG_MOD, "hold %p (%s/%u)\n",
871 	    (void *)mp, mp->mod_name, mp->mod_refs));
872 
873 	mp->mod_refs++;
874 	ASSERT(mp->mod_refs != 0);
875 
876 	(void) pthread_mutex_unlock(&mp->mod_lock);
877 }
878 
879 void
880 fmd_module_rele(fmd_module_t *mp)
881 {
882 	(void) pthread_mutex_lock(&mp->mod_lock);
883 
884 	TRACE((FMD_DBG_MOD, "rele %p (%s/%u)\n",
885 	    (void *)mp, mp->mod_name, mp->mod_refs));
886 
887 	ASSERT(mp->mod_refs != 0);
888 
889 	if (--mp->mod_refs == 0)
890 		fmd_module_destroy(mp);
891 	else
892 		(void) pthread_mutex_unlock(&mp->mod_lock);
893 }
894 
895 /*
896  * Wrapper around libdiagcode's fm_dc_opendict() to load module dictionaries.
897  * If the dictionary open is successful, the new dictionary is added to the
898  * mod_dictv[] array and mod_codelen is updated with the new maximum length.
899  */
900 int
901 fmd_module_dc_opendict(fmd_module_t *mp, const char *dict)
902 {
903 	struct fm_dc_handle *dcp, **dcv;
904 	char *dictdir, *dictnam, *p;
905 	size_t len;
906 
907 	ASSERT(fmd_module_locked(mp));
908 
909 	dictnam = alloca(strlen(dict) + 1);
910 	(void) strcpy(dictnam, fmd_strbasename(dict));
911 
912 	if ((p = strrchr(dictnam, '.')) != NULL &&
913 	    strcmp(p, ".dict") == 0)
914 		*p = '\0'; /* eliminate any trailing .dict suffix */
915 
916 	/*
917 	 * If 'dict' is an absolute path, dictdir = $rootdir/`dirname dict`
918 	 * If 'dict' is not an absolute path, dictdir = $dictdir/`dirname dict`
919 	 */
920 	if (dict[0] == '/') {
921 		len = strlen(fmd.d_rootdir) + strlen(dict) + 1;
922 		dictdir = alloca(len);
923 		(void) snprintf(dictdir, len, "%s%s", fmd.d_rootdir, dict);
924 		(void) fmd_strdirname(dictdir);
925 	} else {
926 		(void) fmd_conf_getprop(fmd.d_conf, "dictdir", &p);
927 		len = strlen(fmd.d_rootdir) + strlen(p) + strlen(dict) + 3;
928 		dictdir = alloca(len);
929 		(void) snprintf(dictdir, len,
930 		    "%s/%s/%s", fmd.d_rootdir, p, dict);
931 		(void) fmd_strdirname(dictdir);
932 	}
933 
934 	fmd_dprintf(FMD_DBG_MOD, "module %s opening %s -> %s/%s.dict\n",
935 	    mp->mod_name, dict, dictdir, dictnam);
936 
937 	if ((dcp = fm_dc_opendict(FM_DC_VERSION, dictdir, dictnam)) == NULL)
938 		return (-1); /* errno is set for us */
939 
940 	dcv = fmd_alloc(sizeof (dcp) * (mp->mod_dictc + 1), FMD_SLEEP);
941 	bcopy(mp->mod_dictv, dcv, sizeof (dcp) * mp->mod_dictc);
942 	fmd_free(mp->mod_dictv, sizeof (dcp) * mp->mod_dictc);
943 	mp->mod_dictv = dcv;
944 	mp->mod_dictv[mp->mod_dictc++] = dcp;
945 
946 	len = fm_dc_codelen(dcp);
947 	mp->mod_codelen = MAX(mp->mod_codelen, len);
948 
949 	return (0);
950 }
951 
952 /*
953  * Wrapper around libdiagcode's fm_dc_key2code() that examines all the module's
954  * dictionaries.  We adhere to the libdiagcode return values and semantics.
955  */
956 int
957 fmd_module_dc_key2code(fmd_module_t *mp,
958     char *const keys[], char *code, size_t codelen)
959 {
960 	int i, err;
961 
962 	for (i = 0; i < mp->mod_dictc; i++) {
963 		if ((err = fm_dc_key2code(mp->mod_dictv[i], (const char **)keys,
964 		    code, codelen)) == 0 || errno != ENOMSG)
965 			return (err);
966 	}
967 
968 	return (fmd_set_errno(ENOMSG));
969 }
970 
971 fmd_modhash_t *
972 fmd_modhash_create(void)
973 {
974 	fmd_modhash_t *mhp = fmd_alloc(sizeof (fmd_modhash_t), FMD_SLEEP);
975 
976 	(void) pthread_rwlock_init(&mhp->mh_lock, NULL);
977 	mhp->mh_hashlen = fmd.d_str_buckets;
978 	mhp->mh_hash = fmd_zalloc(sizeof (void *) * mhp->mh_hashlen, FMD_SLEEP);
979 	mhp->mh_nelems = 0;
980 
981 	return (mhp);
982 }
983 
984 void
985 fmd_modhash_destroy(fmd_modhash_t *mhp)
986 {
987 	fmd_module_t *mp, *nmp;
988 	uint_t i;
989 
990 	for (i = 0; i < mhp->mh_hashlen; i++) {
991 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = nmp) {
992 			nmp = mp->mod_next;
993 			mp->mod_next = NULL;
994 			fmd_module_rele(mp);
995 		}
996 	}
997 
998 	fmd_free(mhp->mh_hash, sizeof (void *) * mhp->mh_hashlen);
999 	(void) pthread_rwlock_destroy(&mhp->mh_lock);
1000 	fmd_free(mhp, sizeof (fmd_modhash_t));
1001 }
1002 
1003 static void
1004 fmd_modhash_loaddir(fmd_modhash_t *mhp, const char *dir,
1005     const fmd_modops_t *ops)
1006 {
1007 	char path[PATH_MAX];
1008 	struct dirent *dp;
1009 	const char *p;
1010 	DIR *dirp;
1011 
1012 	if ((dirp = opendir(dir)) == NULL)
1013 		return; /* failed to open directory; just skip it */
1014 
1015 	while ((dp = readdir(dirp)) != NULL) {
1016 		if (dp->d_name[0] == '.')
1017 			continue; /* skip "." and ".." */
1018 
1019 		if ((p = strrchr(dp->d_name, '.')) != NULL &&
1020 		    strcmp(p, ".conf") == 0)
1021 			continue; /* skip .conf files */
1022 
1023 		(void) snprintf(path, sizeof (path), "%s/%s", dir, dp->d_name);
1024 		(void) fmd_modhash_load(mhp, path, ops);
1025 	}
1026 
1027 	(void) closedir(dirp);
1028 }
1029 
1030 void
1031 fmd_modhash_loadall(fmd_modhash_t *mhp, const fmd_conf_path_t *pap,
1032     const fmd_modops_t *ops)
1033 {
1034 	int i;
1035 
1036 	for (i = 0; i < pap->cpa_argc; i++)
1037 		fmd_modhash_loaddir(mhp, pap->cpa_argv[i], ops);
1038 }
1039 
1040 void
1041 fmd_modhash_apply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1042 {
1043 	fmd_module_t *mp, *np;
1044 	uint_t i;
1045 
1046 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1047 
1048 	for (i = 0; i < mhp->mh_hashlen; i++) {
1049 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1050 			np = mp->mod_next;
1051 			func(mp);
1052 		}
1053 	}
1054 
1055 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1056 }
1057 
1058 void
1059 fmd_modhash_tryapply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1060 {
1061 	fmd_module_t *mp, *np;
1062 	uint_t i;
1063 
1064 	if (mhp == NULL || pthread_rwlock_tryrdlock(&mhp->mh_lock) != 0)
1065 		return; /* not initialized or couldn't grab lock */
1066 
1067 	for (i = 0; i < mhp->mh_hashlen; i++) {
1068 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1069 			np = mp->mod_next;
1070 			func(mp);
1071 		}
1072 	}
1073 
1074 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1075 }
1076 
1077 void
1078 fmd_modhash_dispatch(fmd_modhash_t *mhp, fmd_event_t *ep)
1079 {
1080 	fmd_module_t *mp;
1081 	uint_t i;
1082 
1083 	fmd_event_hold(ep);
1084 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1085 
1086 	for (i = 0; i < mhp->mh_hashlen; i++) {
1087 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next) {
1088 			/*
1089 			 * If FMD_MOD_INIT is set but MOD_FINI, MOD_QUIT, and
1090 			 * mod_error are all zero, then the module is active:
1091 			 * enqueue the event in the corresponding event queue.
1092 			 */
1093 			(void) pthread_mutex_lock(&mp->mod_lock);
1094 
1095 			if ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI |
1096 			    FMD_MOD_QUIT)) == FMD_MOD_INIT && !mp->mod_error)
1097 				fmd_eventq_insert_at_time(mp->mod_queue, ep);
1098 
1099 			(void) pthread_mutex_unlock(&mp->mod_lock);
1100 		}
1101 	}
1102 
1103 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1104 	fmd_event_rele(ep);
1105 }
1106 
1107 fmd_module_t *
1108 fmd_modhash_lookup(fmd_modhash_t *mhp, const char *name)
1109 {
1110 	fmd_module_t *mp;
1111 	uint_t h;
1112 
1113 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1114 	h = fmd_strhash(name) % mhp->mh_hashlen;
1115 
1116 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1117 		if (strcmp(name, mp->mod_name) == 0)
1118 			break;
1119 	}
1120 
1121 	if (mp != NULL)
1122 		fmd_module_hold(mp);
1123 	else
1124 		(void) fmd_set_errno(EFMD_MOD_NOMOD);
1125 
1126 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1127 	return (mp);
1128 }
1129 
1130 fmd_module_t *
1131 fmd_modhash_load(fmd_modhash_t *mhp, const char *path, const fmd_modops_t *ops)
1132 {
1133 	char name[PATH_MAX], *p;
1134 	fmd_module_t *mp;
1135 	int tries = 0;
1136 	uint_t h;
1137 
1138 	(void) strlcpy(name, fmd_strbasename(path), sizeof (name));
1139 	if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".so") == 0)
1140 		*p = '\0'; /* strip trailing .so from any module name */
1141 
1142 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1143 	h = fmd_strhash(name) % mhp->mh_hashlen;
1144 
1145 	/*
1146 	 * First check to see if a module is already present in the hash table
1147 	 * for this name.  If so, the module is already loaded: skip it.
1148 	 */
1149 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1150 		if (strcmp(name, mp->mod_name) == 0)
1151 			break;
1152 	}
1153 
1154 	if (mp != NULL) {
1155 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1156 		(void) fmd_set_errno(EFMD_MOD_LOADED);
1157 		return (NULL);
1158 	}
1159 
1160 	/*
1161 	 * fmd_module_create() will return a held (as if by fmd_module_hold())
1162 	 * module.  We leave this hold in place to correspond to the hash-in.
1163 	 */
1164 	while ((mp = fmd_module_create(path, ops)) == NULL) {
1165 		if (tries++ != 0 || errno != EFMD_CKPT_INVAL) {
1166 			(void) pthread_rwlock_unlock(&mhp->mh_lock);
1167 			return (NULL); /* errno is set for us */
1168 		}
1169 	}
1170 
1171 	mp->mod_hash = mhp;
1172 	mp->mod_next = mhp->mh_hash[h];
1173 
1174 	mhp->mh_hash[h] = mp;
1175 	mhp->mh_nelems++;
1176 
1177 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1178 	return (mp);
1179 }
1180 
1181 int
1182 fmd_modhash_unload(fmd_modhash_t *mhp, const char *name)
1183 {
1184 	fmd_module_t *mp, **pp;
1185 	uint_t h;
1186 
1187 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1188 	h = fmd_strhash(name) % mhp->mh_hashlen;
1189 	pp = &mhp->mh_hash[h];
1190 
1191 	for (mp = *pp; mp != NULL; mp = mp->mod_next) {
1192 		if (strcmp(name, mp->mod_name) == 0)
1193 			break;
1194 		else
1195 			pp = &mp->mod_next;
1196 	}
1197 
1198 	if (mp == NULL) {
1199 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1200 		return (fmd_set_errno(EFMD_MOD_NOMOD));
1201 	}
1202 
1203 	*pp = mp->mod_next;
1204 	mp->mod_next = NULL;
1205 
1206 	ASSERT(mhp->mh_nelems != 0);
1207 	mhp->mh_nelems--;
1208 
1209 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1210 
1211 	fmd_module_unload(mp);
1212 	fmd_module_rele(mp);
1213 
1214 	return (0);
1215 }
1216 
1217 void
1218 fmd_modstat_publish(fmd_module_t *mp)
1219 {
1220 	(void) pthread_mutex_lock(&mp->mod_lock);
1221 
1222 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1223 	mp->mod_flags |= FMD_MOD_STPUB;
1224 	(void) pthread_cond_broadcast(&mp->mod_cv);
1225 
1226 	while (mp->mod_flags & FMD_MOD_STPUB)
1227 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1228 
1229 	(void) pthread_mutex_unlock(&mp->mod_lock);
1230 }
1231 
1232 int
1233 fmd_modstat_snapshot(fmd_module_t *mp, fmd_ustat_snap_t *uss)
1234 {
1235 	fmd_event_t *e;
1236 	int err;
1237 
1238 	/*
1239 	 * Grab the module lock and wait for the STSUB bit to be clear.  Then
1240 	 * set it to indicate we are a subscriber and everyone else must wait.
1241 	 */
1242 	(void) pthread_mutex_lock(&mp->mod_lock);
1243 
1244 	while (mp->mod_error == 0 && (mp->mod_flags & FMD_MOD_STSUB))
1245 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1246 
1247 	if (mp->mod_error != 0) {
1248 		(void) pthread_mutex_unlock(&mp->mod_lock);
1249 		return (fmd_set_errno(EFMD_HDL_ABORT));
1250 	}
1251 
1252 	mp->mod_flags |= FMD_MOD_STSUB;
1253 	(void) pthread_cond_broadcast(&mp->mod_cv);
1254 	(void) pthread_mutex_unlock(&mp->mod_lock);
1255 
1256 	/*
1257 	 * Create a stats pseudo-event and dispatch it to the module, forcing
1258 	 * it to next execute its custom snapshot routine (or the empty one).
1259 	 */
1260 	e = fmd_event_create(FMD_EVT_STATS, FMD_HRT_NOW, NULL, NULL);
1261 	fmd_eventq_insert_at_head(mp->mod_queue, e);
1262 
1263 	/*
1264 	 * Grab the module lock and then wait on mod_cv for STPUB to be set,
1265 	 * indicating the snapshot routine is completed and the module is idle.
1266 	 */
1267 	(void) pthread_mutex_lock(&mp->mod_lock);
1268 
1269 	while (mp->mod_error == 0 && !(mp->mod_flags & FMD_MOD_STPUB))
1270 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1271 
1272 	if (mp->mod_error != 0) {
1273 		(void) pthread_mutex_unlock(&mp->mod_lock);
1274 		return (fmd_set_errno(EFMD_HDL_ABORT));
1275 	}
1276 
1277 	(void) pthread_cond_broadcast(&mp->mod_cv);
1278 	(void) pthread_mutex_unlock(&mp->mod_lock);
1279 
1280 	/*
1281 	 * Update ms_snaptime and take the actual snapshot of the various
1282 	 * statistics while the module is quiescent and waiting for us.
1283 	 */
1284 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1285 
1286 	if (mp->mod_stats != NULL) {
1287 		mp->mod_stats->ms_snaptime.fmds_value.ui64 = gethrtime();
1288 		err = fmd_ustat_snapshot(mp->mod_ustat, uss);
1289 	} else
1290 		err = fmd_set_errno(EFMD_HDL_ABORT);
1291 
1292 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1293 
1294 	/*
1295 	 * With the snapshot complete, grab the module lock and clear both
1296 	 * STSUB and STPUB, permitting everyone to wake up and continue.
1297 	 */
1298 	(void) pthread_mutex_lock(&mp->mod_lock);
1299 
1300 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1301 	ASSERT(mp->mod_flags & FMD_MOD_STPUB);
1302 	mp->mod_flags &= ~(FMD_MOD_STSUB | FMD_MOD_STPUB);
1303 
1304 	(void) pthread_cond_broadcast(&mp->mod_cv);
1305 	(void) pthread_mutex_unlock(&mp->mod_lock);
1306 
1307 	return (err);
1308 }
1309