xref: /titanic_44/usr/src/cmd/fm/fmd/common/fmd_module.c (revision fc33347812f84907261f6fd501e2409da108b8d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <signal.h>
27 #include <dirent.h>
28 #include <limits.h>
29 #include <alloca.h>
30 #include <unistd.h>
31 #include <stdio.h>
32 
33 #include <fmd_string.h>
34 #include <fmd_alloc.h>
35 #include <fmd_module.h>
36 #include <fmd_error.h>
37 #include <fmd_conf.h>
38 #include <fmd_dispq.h>
39 #include <fmd_eventq.h>
40 #include <fmd_timerq.h>
41 #include <fmd_subr.h>
42 #include <fmd_thread.h>
43 #include <fmd_ustat.h>
44 #include <fmd_case.h>
45 #include <fmd_protocol.h>
46 #include <fmd_buf.h>
47 #include <fmd_ckpt.h>
48 #include <fmd_xprt.h>
49 #include <fmd_topo.h>
50 
51 #include <fmd.h>
52 
53 /*
54  * Template for per-module statistics installed by fmd on behalf of each active
55  * module.  These are used to initialize the per-module mp->mod_stats below.
56  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
57  * required in the future, the FMD_ADM_MODDSTAT service routine must change.
58  */
59 static const fmd_modstat_t _fmd_modstat_tmpl = {
60 {
61 { "fmd.dispatched", FMD_TYPE_UINT64, "total events dispatched to module" },
62 { "fmd.dequeued", FMD_TYPE_UINT64, "total events dequeued by module" },
63 { "fmd.prdequeued", FMD_TYPE_UINT64, "protocol events dequeued by module" },
64 { "fmd.dropped", FMD_TYPE_UINT64, "total events dropped on queue overflow" },
65 { "fmd.wcnt", FMD_TYPE_UINT32, "count of events waiting on queue" },
66 { "fmd.wtime", FMD_TYPE_TIME, "total wait time on queue" },
67 { "fmd.wlentime", FMD_TYPE_TIME, "total wait length * time product" },
68 { "fmd.wlastupdate", FMD_TYPE_TIME, "hrtime of last wait queue update" },
69 { "fmd.dtime", FMD_TYPE_TIME, "total processing time after dequeue" },
70 { "fmd.dlastupdate", FMD_TYPE_TIME, "hrtime of last event dequeue completion" },
71 },
72 { "fmd.loadtime", FMD_TYPE_TIME, "hrtime at which module was loaded" },
73 { "fmd.snaptime", FMD_TYPE_TIME, "hrtime of last statistics snapshot" },
74 { "fmd.accepted", FMD_TYPE_UINT64, "total events accepted by module" },
75 { "fmd.debugdrop", FMD_TYPE_UINT64, "dropped debug messages" },
76 { "fmd.memtotal", FMD_TYPE_SIZE, "total memory allocated by module" },
77 { "fmd.memlimit", FMD_TYPE_SIZE, "limit on total memory allocated" },
78 { "fmd.buftotal", FMD_TYPE_SIZE, "total buffer space used by module" },
79 { "fmd.buflimit", FMD_TYPE_SIZE, "limit on total buffer space" },
80 { "fmd.thrtotal", FMD_TYPE_UINT32, "total number of auxiliary threads" },
81 { "fmd.thrlimit", FMD_TYPE_UINT32, "limit on number of auxiliary threads" },
82 { "fmd.caseopen", FMD_TYPE_UINT64, "cases currently open by module" },
83 { "fmd.casesolved", FMD_TYPE_UINT64, "total cases solved by module" },
84 { "fmd.caseclosed", FMD_TYPE_UINT64, "total cases closed by module" },
85 { "fmd.ckptsave", FMD_TYPE_BOOL, "save checkpoints for module" },
86 { "fmd.ckptrestore", FMD_TYPE_BOOL, "restore checkpoints for module" },
87 { "fmd.ckptzero", FMD_TYPE_BOOL, "zeroed checkpoint at startup" },
88 { "fmd.ckptcnt", FMD_TYPE_UINT64, "number of checkpoints taken" },
89 { "fmd.ckpttime", FMD_TYPE_TIME, "total checkpoint time" },
90 { "fmd.xprtopen", FMD_TYPE_UINT32, "total number of open transports" },
91 { "fmd.xprtlimit", FMD_TYPE_UINT32, "limit on number of open transports" },
92 { "fmd.xprtqlimit", FMD_TYPE_UINT32, "limit on transport event queue length" },
93 };
94 
95 static void
96 fmd_module_start(void *arg)
97 {
98 	fmd_module_t *mp = arg;
99 	fmd_event_t *ep;
100 	fmd_xprt_t *xp;
101 
102 	(void) pthread_mutex_lock(&mp->mod_lock);
103 
104 	if (mp->mod_ops->mop_init(mp) != 0 || mp->mod_error != 0) {
105 		if (mp->mod_error == 0)
106 			mp->mod_error = errno ? errno : EFMD_MOD_INIT;
107 		goto out;
108 	}
109 
110 	if (fmd.d_mod_event != NULL)
111 		fmd_eventq_insert_at_head(mp->mod_queue, fmd.d_mod_event);
112 
113 	ASSERT(MUTEX_HELD(&mp->mod_lock));
114 	mp->mod_flags |= FMD_MOD_INIT;
115 
116 	(void) pthread_cond_broadcast(&mp->mod_cv);
117 	(void) pthread_mutex_unlock(&mp->mod_lock);
118 
119 	/*
120 	 * If the module opened any transports while executing _fmd_init(),
121 	 * they are suspended. Now that _fmd_init() is done, wake them up.
122 	 */
123 	for (xp = fmd_list_next(&mp->mod_transports);
124 	    xp != NULL; xp = fmd_list_next(xp))
125 		fmd_xprt_xresume(xp, FMD_XPRT_ISUSPENDED);
126 
127 	/*
128 	 * Wait for events to arrive by checking mod_error and then sleeping in
129 	 * fmd_eventq_delete().  If a NULL event is returned, the eventq has
130 	 * been aborted and we continue on to call fini and exit the thread.
131 	 */
132 	while ((ep = fmd_eventq_delete(mp->mod_queue)) != NULL) {
133 		/*
134 		 * If the module has failed, discard the event without ever
135 		 * passing it to the module and go back to sleep.
136 		 */
137 		if (mp->mod_error != 0) {
138 			fmd_eventq_done(mp->mod_queue);
139 			fmd_event_rele(ep);
140 			continue;
141 		}
142 
143 		mp->mod_ops->mop_dispatch(mp, ep);
144 		fmd_eventq_done(mp->mod_queue);
145 
146 		/*
147 		 * Once mop_dispatch() is complete, grab the lock and perform
148 		 * any event-specific post-processing.  Finally, if necessary,
149 		 * checkpoint the state of the module after this event.
150 		 */
151 		fmd_module_lock(mp);
152 
153 		if (FMD_EVENT_TYPE(ep) == FMD_EVT_CLOSE)
154 			fmd_case_delete(FMD_EVENT_DATA(ep));
155 
156 		fmd_ckpt_save(mp);
157 		fmd_module_unlock(mp);
158 		fmd_event_rele(ep);
159 	}
160 
161 	if (mp->mod_ops->mop_fini(mp) != 0 && mp->mod_error == 0)
162 		mp->mod_error = errno ? errno : EFMD_MOD_FINI;
163 
164 	(void) pthread_mutex_lock(&mp->mod_lock);
165 	mp->mod_flags |= FMD_MOD_FINI;
166 
167 out:
168 	(void) pthread_cond_broadcast(&mp->mod_cv);
169 	(void) pthread_mutex_unlock(&mp->mod_lock);
170 }
171 
172 fmd_module_t *
173 fmd_module_create(const char *path, const fmd_modops_t *ops)
174 {
175 	fmd_module_t *mp = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
176 
177 	char buf[PATH_MAX], *p;
178 	const char *dir;
179 	uint32_t limit;
180 	int err;
181 
182 	(void) strlcpy(buf, fmd_strbasename(path), sizeof (buf));
183 	if ((p = strrchr(buf, '.')) != NULL && strcmp(p, ".so") == 0)
184 		*p = '\0'; /* strip trailing .so from any module name */
185 
186 	(void) pthread_mutex_init(&mp->mod_lock, NULL);
187 	(void) pthread_cond_init(&mp->mod_cv, NULL);
188 	(void) pthread_mutex_init(&mp->mod_stats_lock, NULL);
189 
190 	mp->mod_name = fmd_strdup(buf, FMD_SLEEP);
191 	mp->mod_path = fmd_strdup(path, FMD_SLEEP);
192 	mp->mod_ops = ops;
193 	mp->mod_ustat = fmd_ustat_create();
194 
195 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dir", &dir);
196 	(void) snprintf(buf, sizeof (buf),
197 	    "%s/%s/%s", fmd.d_rootdir, dir, mp->mod_name);
198 
199 	mp->mod_ckpt = fmd_strdup(buf, FMD_SLEEP);
200 
201 	(void) fmd_conf_getprop(fmd.d_conf, "client.tmrlim", &limit);
202 	mp->mod_timerids = fmd_idspace_create(mp->mod_name, 1, limit + 1);
203 	mp->mod_threads = fmd_idspace_create(mp->mod_name, 0, INT_MAX);
204 
205 	fmd_buf_hash_create(&mp->mod_bufs);
206 	fmd_serd_hash_create(&mp->mod_serds);
207 
208 	mp->mod_topo_current = fmd_topo_hold();
209 
210 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
211 	fmd_list_append(&fmd.d_mod_list, mp);
212 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
213 
214 	/*
215 	 * Initialize the module statistics that are kept on its behalf by fmd.
216 	 * These are set up using a template defined at the top of this file.
217 	 */
218 	if ((mp->mod_stats = (fmd_modstat_t *)fmd_ustat_insert(mp->mod_ustat,
219 	    FMD_USTAT_ALLOC, sizeof (_fmd_modstat_tmpl) / sizeof (fmd_stat_t),
220 	    (fmd_stat_t *)&_fmd_modstat_tmpl, NULL)) == NULL) {
221 		fmd_error(EFMD_MOD_INIT, "failed to initialize per-mod stats");
222 		fmd_module_destroy(mp);
223 		return (NULL);
224 	}
225 
226 	if (nv_alloc_init(&mp->mod_nva_sleep,
227 	    &fmd_module_nva_ops_sleep, mp) != 0 ||
228 	    nv_alloc_init(&mp->mod_nva_nosleep,
229 	    &fmd_module_nva_ops_nosleep, mp) != 0) {
230 		fmd_error(EFMD_MOD_INIT, "failed to initialize nvlist "
231 		    "allocation routines");
232 		fmd_module_destroy(mp);
233 		return (NULL);
234 	}
235 
236 	(void) fmd_conf_getprop(fmd.d_conf, "client.evqlim", &limit);
237 
238 	mp->mod_queue = fmd_eventq_create(mp,
239 	    &mp->mod_stats->ms_evqstat, &mp->mod_stats_lock, limit);
240 
241 	(void) fmd_conf_getprop(fmd.d_conf, "client.memlim",
242 	    &mp->mod_stats->ms_memlimit.fmds_value.ui64);
243 
244 	(void) fmd_conf_getprop(fmd.d_conf, "client.buflim",
245 	    &mp->mod_stats->ms_buflimit.fmds_value.ui64);
246 
247 	(void) fmd_conf_getprop(fmd.d_conf, "client.thrlim",
248 	    &mp->mod_stats->ms_thrlimit.fmds_value.ui32);
249 
250 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtlim",
251 	    &mp->mod_stats->ms_xprtlimit.fmds_value.ui32);
252 
253 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtqlim",
254 	    &mp->mod_stats->ms_xprtqlimit.fmds_value.ui32);
255 
256 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.save",
257 	    &mp->mod_stats->ms_ckpt_save.fmds_value.bool);
258 
259 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.restore",
260 	    &mp->mod_stats->ms_ckpt_restore.fmds_value.bool);
261 
262 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.zero",
263 	    &mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool);
264 
265 	if (mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool)
266 		fmd_ckpt_delete(mp); /* blow away any pre-existing checkpoint */
267 
268 	/*
269 	 * Place a hold on the module and grab the module lock before creating
270 	 * the module's thread to ensure that it cannot destroy the module and
271 	 * that it cannot call ops->mop_init() before we're done setting up.
272 	 * NOTE: from now on, we must use fmd_module_rele() for error paths.
273 	 */
274 	fmd_module_hold(mp);
275 	(void) pthread_mutex_lock(&mp->mod_lock);
276 	mp->mod_stats->ms_loadtime.fmds_value.ui64 = gethrtime();
277 	mp->mod_thread = fmd_thread_create(mp, fmd_module_start, mp);
278 
279 	if (mp->mod_thread == NULL) {
280 		fmd_error(EFMD_MOD_THR, "failed to create thread for %s", path);
281 		(void) pthread_mutex_unlock(&mp->mod_lock);
282 		fmd_module_rele(mp);
283 		return (NULL);
284 	}
285 
286 	/*
287 	 * At this point our module structure is nearly finished and its thread
288 	 * is starting execution in fmd_module_start() above, which will begin
289 	 * by blocking for mod_lock.  We now drop mod_lock and wait for either
290 	 * FMD_MOD_INIT or mod_error to be set before proceeding.
291 	 */
292 	while (!(mp->mod_flags & FMD_MOD_INIT) && mp->mod_error == 0)
293 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
294 
295 	/*
296 	 * If the module has failed to initialize, copy its errno to the errno
297 	 * of the caller, wait for it to unload, and then destroy it.
298 	 */
299 	if (!(mp->mod_flags & FMD_MOD_INIT)) {
300 		err = mp->mod_error;
301 		(void) pthread_mutex_unlock(&mp->mod_lock);
302 
303 		if (err == EFMD_CKPT_INVAL)
304 			fmd_ckpt_rename(mp); /* move aside bad checkpoint */
305 
306 		/*
307 		 * If we're in the background, keep quiet about failure to
308 		 * load because a handle wasn't registered: this is a module's
309 		 * way of telling us it didn't want to be loaded for some
310 		 * reason related to system configuration.  If we're in the
311 		 * foreground we log this too in order to inform developers.
312 		 */
313 		if (fmd.d_fg || err != EFMD_HDL_INIT) {
314 			fmd_error(EFMD_MOD_INIT, "failed to load %s: %s\n",
315 			    path, fmd_strerror(err));
316 		}
317 
318 		fmd_module_unload(mp);
319 		fmd_module_rele(mp);
320 
321 		(void) fmd_set_errno(err);
322 		return (NULL);
323 	}
324 
325 	(void) pthread_cond_broadcast(&mp->mod_cv);
326 	(void) pthread_mutex_unlock(&mp->mod_lock);
327 
328 	fmd_dprintf(FMD_DBG_MOD, "loaded module %s\n", mp->mod_name);
329 	return (mp);
330 }
331 
332 static void
333 fmd_module_untimeout(fmd_idspace_t *ids, id_t id, fmd_module_t *mp)
334 {
335 	void *arg = fmd_timerq_remove(fmd.d_timers, ids, id);
336 
337 	/*
338 	 * The root module calls fmd_timerq_install() directly and must take
339 	 * responsibility for any cleanup of timer arguments that is required.
340 	 * All other modules use fmd_modtimer_t's as the arg data; free them.
341 	 */
342 	if (arg != NULL && mp != fmd.d_rmod)
343 		fmd_free(arg, sizeof (fmd_modtimer_t));
344 }
345 
346 void
347 fmd_module_unload(fmd_module_t *mp)
348 {
349 	fmd_modtopo_t *mtp;
350 
351 	(void) pthread_mutex_lock(&mp->mod_lock);
352 
353 	if (mp->mod_flags & FMD_MOD_QUIT) {
354 		(void) pthread_mutex_unlock(&mp->mod_lock);
355 		return; /* module is already unloading */
356 	}
357 
358 	ASSERT(mp->mod_thread != NULL);
359 	mp->mod_flags |= FMD_MOD_QUIT;
360 
361 	if (mp->mod_queue != NULL)
362 		fmd_eventq_abort(mp->mod_queue);
363 
364 	/*
365 	 * Wait for the module's thread to stop processing events and call
366 	 * _fmd_fini() and exit.  We do this by waiting for FMD_MOD_FINI to be
367 	 * set if INIT was set, and then attempting to join with the thread.
368 	 */
369 	while ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI)) == FMD_MOD_INIT)
370 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
371 
372 	(void) pthread_cond_broadcast(&mp->mod_cv);
373 	(void) pthread_mutex_unlock(&mp->mod_lock);
374 
375 	fmd_thread_destroy(mp->mod_thread, FMD_THREAD_JOIN);
376 	mp->mod_thread = NULL;
377 
378 	/*
379 	 * Once the module is no longer active, clean up any data structures
380 	 * that are only required when the module is loaded.
381 	 */
382 	fmd_module_lock(mp);
383 
384 	if (mp->mod_timerids != NULL) {
385 		fmd_idspace_apply(mp->mod_timerids,
386 		    (void (*)())fmd_module_untimeout, mp);
387 
388 		fmd_idspace_destroy(mp->mod_timerids);
389 		mp->mod_timerids = NULL;
390 	}
391 
392 	if (mp->mod_threads != NULL) {
393 		fmd_idspace_destroy(mp->mod_threads);
394 		mp->mod_threads = NULL;
395 	}
396 
397 	(void) fmd_buf_hash_destroy(&mp->mod_bufs);
398 	fmd_serd_hash_destroy(&mp->mod_serds);
399 
400 	while ((mtp = fmd_list_next(&mp->mod_topolist)) != NULL) {
401 		fmd_list_delete(&mp->mod_topolist, mtp);
402 		fmd_topo_rele(mtp->mt_topo);
403 		fmd_free(mtp, sizeof (fmd_modtopo_t));
404 	}
405 
406 	fmd_module_unlock(mp);
407 	fmd_dprintf(FMD_DBG_MOD, "unloaded module %s\n", mp->mod_name);
408 }
409 
410 void
411 fmd_module_destroy(fmd_module_t *mp)
412 {
413 	fmd_conf_formal_t *cfp = mp->mod_argv;
414 	int i;
415 
416 	ASSERT(MUTEX_HELD(&mp->mod_lock));
417 
418 	if (mp->mod_thread != NULL) {
419 		(void) pthread_mutex_unlock(&mp->mod_lock);
420 		fmd_module_unload(mp);
421 		(void) pthread_mutex_lock(&mp->mod_lock);
422 	}
423 
424 	ASSERT(mp->mod_thread == NULL);
425 	ASSERT(mp->mod_refs == 0);
426 
427 	/*
428 	 * Once the module's thread is dead, we can safely remove the module
429 	 * from global visibility and by removing it from d_mod_list.  Any
430 	 * modhash pointers are already gone by virtue of mod_refs being zero.
431 	 */
432 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
433 	fmd_list_delete(&fmd.d_mod_list, mp);
434 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
435 
436 	if (mp->mod_topo_current != NULL)
437 		fmd_topo_rele(mp->mod_topo_current);
438 
439 	if (mp->mod_nva_sleep.nva_ops != NULL)
440 		nv_alloc_fini(&mp->mod_nva_sleep);
441 	if (mp->mod_nva_nosleep.nva_ops != NULL)
442 		nv_alloc_fini(&mp->mod_nva_nosleep);
443 
444 	/*
445 	 * Once the module is no longer processing events and no longer visible
446 	 * through any program data structures, we can free all of its content.
447 	 */
448 	if (mp->mod_queue != NULL) {
449 		fmd_eventq_destroy(mp->mod_queue);
450 		mp->mod_queue = NULL;
451 	}
452 
453 	if (mp->mod_ustat != NULL) {
454 		(void) pthread_mutex_lock(&mp->mod_stats_lock);
455 		fmd_ustat_destroy(mp->mod_ustat);
456 		mp->mod_ustat = NULL;
457 		mp->mod_stats = NULL;
458 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
459 	}
460 
461 	for (i = 0; i < mp->mod_dictc; i++)
462 		fm_dc_closedict(mp->mod_dictv[i]);
463 
464 	fmd_free(mp->mod_dictv, sizeof (struct fm_dc_handle *) * mp->mod_dictc);
465 
466 	if (mp->mod_conf != NULL)
467 		fmd_conf_close(mp->mod_conf);
468 
469 	for (i = 0; i < mp->mod_argc; i++, cfp++) {
470 		fmd_strfree((char *)cfp->cf_name);
471 		fmd_strfree((char *)cfp->cf_default);
472 	}
473 
474 	fmd_free(mp->mod_argv, sizeof (fmd_conf_formal_t) * mp->mod_argc);
475 
476 	fmd_strfree(mp->mod_name);
477 	fmd_strfree(mp->mod_path);
478 	fmd_strfree(mp->mod_ckpt);
479 	nvlist_free(mp->mod_fmri);
480 	fmd_strfree(mp->mod_vers);
481 
482 	fmd_free(mp, sizeof (fmd_module_t));
483 }
484 
485 /*
486  * fmd_module_error() is called after the stack is unwound from a call to
487  * fmd_module_abort() to indicate that the module has failed.  The mod_error
488  * field is used to hold the error code of the first fatal error to the module.
489  * An EFMD_MOD_FAIL event is then created and sent to fmd-self-diagnosis.
490  */
491 static void
492 fmd_module_error(fmd_module_t *mp, int err)
493 {
494 	fmd_event_t *e;
495 	nvlist_t *nvl;
496 	char *class;
497 
498 	ASSERT(MUTEX_HELD(&mp->mod_lock));
499 	ASSERT(err != 0);
500 
501 	TRACE((FMD_DBG_MOD, "module aborted: err=%d", err));
502 
503 	if (mp->mod_error == 0)
504 		mp->mod_error = err;
505 
506 	if (mp == fmd.d_self)
507 		return; /* do not post event if fmd.d_self itself fails */
508 
509 	/*
510 	 * Send an error indicating the module has now failed to fmd.d_self.
511 	 * Since the error causing the failure has already been logged by
512 	 * fmd_api_xerror(), we do not need to bother logging this event.
513 	 * It only exists for the purpose of notifying fmd.d_self that it can
514 	 * close the case associated with this module because mod_error is set.
515 	 */
516 	nvl = fmd_protocol_moderror(mp, EFMD_MOD_FAIL, fmd_strerror(err));
517 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
518 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
519 	fmd_dispq_dispatch(fmd.d_disp, e, class);
520 }
521 
522 void
523 fmd_module_dispatch(fmd_module_t *mp, fmd_event_t *e)
524 {
525 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
526 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
527 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
528 	fmd_modtimer_t *t;
529 	fmd_topo_t *old_topo;
530 	volatile int err;
531 
532 	/*
533 	 * Before calling the appropriate module callback, enter the module as
534 	 * if by fmd_module_enter() and establish mod_jmpbuf for any aborts.
535 	 */
536 	(void) pthread_mutex_lock(&mp->mod_lock);
537 
538 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
539 	mp->mod_flags |= FMD_MOD_BUSY;
540 
541 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
542 		(void) pthread_mutex_lock(&mp->mod_lock);
543 		fmd_module_error(mp, err);
544 	}
545 
546 	(void) pthread_cond_broadcast(&mp->mod_cv);
547 	(void) pthread_mutex_unlock(&mp->mod_lock);
548 
549 	/*
550 	 * If it's the first time through fmd_module_dispatch(), call the
551 	 * appropriate module callback based on the event type.  If the call
552 	 * triggers an fmd_module_abort(), we'll return to setjmp() above with
553 	 * err set to a non-zero value and then bypass this before exiting.
554 	 */
555 	if (err == 0) {
556 		switch (ep->ev_type) {
557 		case FMD_EVT_PROTOCOL:
558 			ops->fmdo_recv(hdl, e, ep->ev_nvl, ep->ev_data);
559 			break;
560 		case FMD_EVT_TIMEOUT:
561 			t = ep->ev_data;
562 			ASSERT(t->mt_mod == mp);
563 			ops->fmdo_timeout(hdl, t->mt_id, t->mt_arg);
564 			break;
565 		case FMD_EVT_CLOSE:
566 			ops->fmdo_close(hdl, ep->ev_data);
567 			break;
568 		case FMD_EVT_STATS:
569 			ops->fmdo_stats(hdl);
570 			fmd_modstat_publish(mp);
571 			break;
572 		case FMD_EVT_GC:
573 			ops->fmdo_gc(hdl);
574 			break;
575 		case FMD_EVT_PUBLISH:
576 			fmd_case_publish(ep->ev_data, FMD_CASE_CURRENT);
577 			break;
578 		case FMD_EVT_TOPO:
579 			/*
580 			 * Save the pointer to the old topology and update
581 			 * the pointer with the updated topology.
582 			 * With this approach, other threads that reference the
583 			 * topology either
584 			 *  - finishes with old topology since
585 			 *	it is released after updating
586 			 *	mod_topo_current.
587 			 *  - or is blocked while mod_topo_current is updated.
588 			 */
589 			old_topo = mp->mod_topo_current;
590 			fmd_module_lock(mp);
591 			mp->mod_topo_current = (fmd_topo_t *)ep->ev_data;
592 			fmd_topo_addref(mp->mod_topo_current);
593 			fmd_module_unlock(mp);
594 			fmd_topo_rele(old_topo);
595 			ops->fmdo_topo(hdl, mp->mod_topo_current->ft_hdl);
596 			break;
597 		}
598 	}
599 
600 	fmd_module_exit(mp);
601 }
602 
603 int
604 fmd_module_transport(fmd_module_t *mp, fmd_xprt_t *xp, fmd_event_t *e)
605 {
606 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
607 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
608 
609 	ASSERT(ep->ev_type == FMD_EVT_PROTOCOL);
610 	return (mp->mod_info->fmdi_ops->fmdo_send(hdl, xp, e, ep->ev_nvl));
611 }
612 
613 void
614 fmd_module_timeout(fmd_modtimer_t *t, id_t id, hrtime_t hrt)
615 {
616 	fmd_event_t *e;
617 
618 	t->mt_id = id; /* save id in case we need to delete from eventq */
619 	e = fmd_event_create(FMD_EVT_TIMEOUT, hrt, NULL, t);
620 	fmd_eventq_insert_at_time(t->mt_mod->mod_queue, e);
621 }
622 
623 /*
624  * Garbage collection is initiated by a timer callback once per day or at the
625  * request of fmadm.  Purge old SERD entries and send the module a GC event.
626  */
627 void
628 fmd_module_gc(fmd_module_t *mp)
629 {
630 	fmd_hdl_info_t *info;
631 	fmd_event_t *e;
632 
633 	if (mp->mod_error != 0)
634 		return; /* do not do anything if the module has failed */
635 
636 	fmd_module_lock(mp);
637 
638 	if ((info = mp->mod_info) != NULL) {
639 		fmd_serd_hash_apply(&mp->mod_serds,
640 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
641 	}
642 
643 	fmd_module_unlock(mp);
644 
645 	if (info != NULL) {
646 		e = fmd_event_create(FMD_EVT_GC, FMD_HRT_NOW, NULL, NULL);
647 		fmd_eventq_insert_at_head(mp->mod_queue, e);
648 	}
649 }
650 
651 void
652 fmd_module_trygc(fmd_module_t *mp)
653 {
654 	if (fmd_module_trylock(mp)) {
655 		fmd_serd_hash_apply(&mp->mod_serds,
656 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
657 		fmd_module_unlock(mp);
658 	}
659 }
660 
661 int
662 fmd_module_contains(fmd_module_t *mp, fmd_event_t *ep)
663 {
664 	fmd_case_t *cp;
665 	int rv = 0;
666 
667 	fmd_module_lock(mp);
668 
669 	for (cp = fmd_list_next(&mp->mod_cases);
670 	    cp != NULL; cp = fmd_list_next(cp)) {
671 		if ((rv = fmd_case_contains(cp, ep)) != 0)
672 			break;
673 	}
674 
675 	if (rv == 0)
676 		rv = fmd_serd_hash_contains(&mp->mod_serds, ep);
677 
678 	fmd_module_unlock(mp);
679 	return (rv);
680 }
681 
682 void
683 fmd_module_setdirty(fmd_module_t *mp)
684 {
685 	(void) pthread_mutex_lock(&mp->mod_lock);
686 	mp->mod_flags |= FMD_MOD_MDIRTY;
687 	(void) pthread_mutex_unlock(&mp->mod_lock);
688 }
689 
690 void
691 fmd_module_setcdirty(fmd_module_t *mp)
692 {
693 	(void) pthread_mutex_lock(&mp->mod_lock);
694 	mp->mod_flags |= FMD_MOD_CDIRTY;
695 	(void) pthread_mutex_unlock(&mp->mod_lock);
696 }
697 
698 void
699 fmd_module_clrdirty(fmd_module_t *mp)
700 {
701 	fmd_case_t *cp;
702 
703 	fmd_module_lock(mp);
704 
705 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
706 		for (cp = fmd_list_next(&mp->mod_cases);
707 		    cp != NULL; cp = fmd_list_next(cp))
708 			fmd_case_clrdirty(cp);
709 	}
710 
711 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
712 		fmd_serd_hash_apply(&mp->mod_serds,
713 		    (fmd_serd_eng_f *)fmd_serd_eng_clrdirty, NULL);
714 		fmd_buf_hash_commit(&mp->mod_bufs);
715 	}
716 
717 	(void) pthread_mutex_lock(&mp->mod_lock);
718 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
719 	(void) pthread_mutex_unlock(&mp->mod_lock);
720 
721 	fmd_module_unlock(mp);
722 }
723 
724 void
725 fmd_module_commit(fmd_module_t *mp)
726 {
727 	fmd_case_t *cp;
728 
729 	ASSERT(fmd_module_locked(mp));
730 
731 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
732 		for (cp = fmd_list_next(&mp->mod_cases);
733 		    cp != NULL; cp = fmd_list_next(cp))
734 			fmd_case_commit(cp);
735 	}
736 
737 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
738 		fmd_serd_hash_apply(&mp->mod_serds,
739 		    (fmd_serd_eng_f *)fmd_serd_eng_commit, NULL);
740 		fmd_buf_hash_commit(&mp->mod_bufs);
741 	}
742 
743 	(void) pthread_mutex_lock(&mp->mod_lock);
744 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
745 	(void) pthread_mutex_unlock(&mp->mod_lock);
746 
747 	mp->mod_gen++;
748 }
749 
750 void
751 fmd_module_lock(fmd_module_t *mp)
752 {
753 	pthread_t self = pthread_self();
754 
755 	(void) pthread_mutex_lock(&mp->mod_lock);
756 
757 	while (mp->mod_flags & FMD_MOD_LOCK) {
758 		if (mp->mod_owner != self)
759 			(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
760 		else
761 			fmd_panic("recursive module lock of %p\n", (void *)mp);
762 	}
763 
764 	mp->mod_owner = self;
765 	mp->mod_flags |= FMD_MOD_LOCK;
766 
767 	(void) pthread_cond_broadcast(&mp->mod_cv);
768 	(void) pthread_mutex_unlock(&mp->mod_lock);
769 }
770 
771 void
772 fmd_module_unlock(fmd_module_t *mp)
773 {
774 	(void) pthread_mutex_lock(&mp->mod_lock);
775 
776 	ASSERT(mp->mod_owner == pthread_self());
777 	ASSERT(mp->mod_flags & FMD_MOD_LOCK);
778 
779 	mp->mod_owner = 0;
780 	mp->mod_flags &= ~FMD_MOD_LOCK;
781 
782 	(void) pthread_cond_broadcast(&mp->mod_cv);
783 	(void) pthread_mutex_unlock(&mp->mod_lock);
784 }
785 
786 int
787 fmd_module_trylock(fmd_module_t *mp)
788 {
789 	(void) pthread_mutex_lock(&mp->mod_lock);
790 
791 	if (mp->mod_flags & FMD_MOD_LOCK) {
792 		(void) pthread_mutex_unlock(&mp->mod_lock);
793 		return (0);
794 	}
795 
796 	mp->mod_owner = pthread_self();
797 	mp->mod_flags |= FMD_MOD_LOCK;
798 
799 	(void) pthread_cond_broadcast(&mp->mod_cv);
800 	(void) pthread_mutex_unlock(&mp->mod_lock);
801 
802 	return (1);
803 }
804 
805 int
806 fmd_module_locked(fmd_module_t *mp)
807 {
808 	return ((mp->mod_flags & FMD_MOD_LOCK) &&
809 	    mp->mod_owner == pthread_self());
810 }
811 
812 int
813 fmd_module_enter(fmd_module_t *mp, void (*func)(fmd_hdl_t *))
814 {
815 	volatile int err;
816 
817 	(void) pthread_mutex_lock(&mp->mod_lock);
818 
819 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
820 	mp->mod_flags |= FMD_MOD_BUSY;
821 
822 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
823 		(void) pthread_mutex_lock(&mp->mod_lock);
824 		fmd_module_error(mp, err);
825 	}
826 
827 	(void) pthread_cond_broadcast(&mp->mod_cv);
828 	(void) pthread_mutex_unlock(&mp->mod_lock);
829 
830 	/*
831 	 * If it's the first time through fmd_module_enter(), call the provided
832 	 * function on the module.  If no fmd_module_abort() results, we will
833 	 * fall through and return zero.  Otherwise we'll longjmp with an err,
834 	 * return to the setjmp() above, and return the error to our caller.
835 	 */
836 	if (err == 0 && func != NULL)
837 		(*func)((fmd_hdl_t *)mp);
838 
839 	return (err);
840 }
841 
842 void
843 fmd_module_exit(fmd_module_t *mp)
844 {
845 	(void) pthread_mutex_lock(&mp->mod_lock);
846 
847 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
848 	mp->mod_flags &= ~FMD_MOD_BUSY;
849 
850 	(void) pthread_cond_broadcast(&mp->mod_cv);
851 	(void) pthread_mutex_unlock(&mp->mod_lock);
852 }
853 
854 /*
855  * If the client.error policy has been set by a developer, stop or dump core
856  * based on the policy; if we stop and are resumed we'll continue and execute
857  * the default behavior to discard events in fmd_module_start().  If the caller
858  * is the primary module thread, we reach this state by longjmp'ing back to
859  * fmd_module_enter(), above.  If the caller is an auxiliary thread, we cancel
860  * ourself and arrange for the primary thread to call fmd_module_abort().
861  */
862 void
863 fmd_module_abort(fmd_module_t *mp, int err)
864 {
865 	uint_t policy = FMD_CERROR_UNLOAD;
866 	pthread_t tid = pthread_self();
867 
868 	(void) fmd_conf_getprop(fmd.d_conf, "client.error", &policy);
869 
870 	if (policy == FMD_CERROR_STOP) {
871 		fmd_error(err, "stopping after %s in client %s (%p)\n",
872 		    fmd_errclass(err), mp->mod_name, (void *)mp);
873 		(void) raise(SIGSTOP);
874 	} else if (policy == FMD_CERROR_ABORT) {
875 		fmd_panic("aborting due to %s in client %s (%p)\n",
876 		    fmd_errclass(err), mp->mod_name, (void *)mp);
877 	}
878 
879 	/*
880 	 * If the caller is an auxiliary thread, cancel the current thread.  We
881 	 * prefer to cancel because it affords developers the option of using
882 	 * the pthread_cleanup* APIs.  If cancellations have been disabled,
883 	 * fall through to forcing the current thread to exit.  In either case
884 	 * we update mod_error (if zero) to enter the failed state.  Once that
885 	 * is set, further events received by the module will be discarded.
886 	 *
887 	 * We also set the FMD_MOD_FAIL bit, indicating an unrecoverable error.
888 	 * When an auxiliary thread fails, the module is left in a delicate
889 	 * state where it is likely not able to continue execution (even to
890 	 * execute its _fmd_fini() routine) because our caller may hold locks
891 	 * that are private to the module and can no longer be released.  The
892 	 * FMD_MOD_FAIL bit forces fmd_api_module_lock() to abort if any other
893 	 * module threads reach an API call, in an attempt to get them to exit.
894 	 */
895 	if (tid != mp->mod_thread->thr_tid) {
896 		(void) pthread_mutex_lock(&mp->mod_lock);
897 
898 		if (mp->mod_error == 0)
899 			mp->mod_error = err;
900 
901 		mp->mod_flags |= FMD_MOD_FAIL;
902 		(void) pthread_mutex_unlock(&mp->mod_lock);
903 
904 		(void) pthread_cancel(tid);
905 		pthread_exit(NULL);
906 	}
907 
908 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
909 	longjmp(mp->mod_jmpbuf, err);
910 }
911 
912 void
913 fmd_module_hold(fmd_module_t *mp)
914 {
915 	(void) pthread_mutex_lock(&mp->mod_lock);
916 
917 	TRACE((FMD_DBG_MOD, "hold %p (%s/%u)\n",
918 	    (void *)mp, mp->mod_name, mp->mod_refs));
919 
920 	mp->mod_refs++;
921 	ASSERT(mp->mod_refs != 0);
922 
923 	(void) pthread_mutex_unlock(&mp->mod_lock);
924 }
925 
926 void
927 fmd_module_rele(fmd_module_t *mp)
928 {
929 	(void) pthread_mutex_lock(&mp->mod_lock);
930 
931 	TRACE((FMD_DBG_MOD, "rele %p (%s/%u)\n",
932 	    (void *)mp, mp->mod_name, mp->mod_refs));
933 
934 	ASSERT(mp->mod_refs != 0);
935 
936 	if (--mp->mod_refs == 0)
937 		fmd_module_destroy(mp);
938 	else
939 		(void) pthread_mutex_unlock(&mp->mod_lock);
940 }
941 
942 /*
943  * Wrapper around libdiagcode's fm_dc_opendict() to load module dictionaries.
944  * If the dictionary open is successful, the new dictionary is added to the
945  * mod_dictv[] array and mod_codelen is updated with the new maximum length.
946  */
947 int
948 fmd_module_dc_opendict(fmd_module_t *mp, const char *dict)
949 {
950 	struct fm_dc_handle *dcp, **dcv;
951 	char *dictdir, *dictnam, *p;
952 	size_t len;
953 
954 	ASSERT(fmd_module_locked(mp));
955 
956 	dictnam = alloca(strlen(dict) + 1);
957 	(void) strcpy(dictnam, fmd_strbasename(dict));
958 
959 	if ((p = strrchr(dictnam, '.')) != NULL &&
960 	    strcmp(p, ".dict") == 0)
961 		*p = '\0'; /* eliminate any trailing .dict suffix */
962 
963 	/*
964 	 * If 'dict' is an absolute path, dictdir = $rootdir/`dirname dict`
965 	 * If 'dict' is not an absolute path, dictdir = $dictdir/`dirname dict`
966 	 */
967 	if (dict[0] == '/') {
968 		len = strlen(fmd.d_rootdir) + strlen(dict) + 1;
969 		dictdir = alloca(len);
970 		(void) snprintf(dictdir, len, "%s%s", fmd.d_rootdir, dict);
971 		(void) fmd_strdirname(dictdir);
972 	} else {
973 		(void) fmd_conf_getprop(fmd.d_conf, "dictdir", &p);
974 		len = strlen(fmd.d_rootdir) + strlen(p) + strlen(dict) + 3;
975 		dictdir = alloca(len);
976 		(void) snprintf(dictdir, len,
977 		    "%s/%s/%s", fmd.d_rootdir, p, dict);
978 		(void) fmd_strdirname(dictdir);
979 	}
980 
981 	fmd_dprintf(FMD_DBG_MOD, "module %s opening %s -> %s/%s.dict\n",
982 	    mp->mod_name, dict, dictdir, dictnam);
983 
984 	if ((dcp = fm_dc_opendict(FM_DC_VERSION, dictdir, dictnam)) == NULL)
985 		return (-1); /* errno is set for us */
986 
987 	dcv = fmd_alloc(sizeof (dcp) * (mp->mod_dictc + 1), FMD_SLEEP);
988 	bcopy(mp->mod_dictv, dcv, sizeof (dcp) * mp->mod_dictc);
989 	fmd_free(mp->mod_dictv, sizeof (dcp) * mp->mod_dictc);
990 	mp->mod_dictv = dcv;
991 	mp->mod_dictv[mp->mod_dictc++] = dcp;
992 
993 	len = fm_dc_codelen(dcp);
994 	mp->mod_codelen = MAX(mp->mod_codelen, len);
995 
996 	return (0);
997 }
998 
999 /*
1000  * Wrapper around libdiagcode's fm_dc_key2code() that examines all the module's
1001  * dictionaries.  We adhere to the libdiagcode return values and semantics.
1002  */
1003 int
1004 fmd_module_dc_key2code(fmd_module_t *mp,
1005     char *const keys[], char *code, size_t codelen)
1006 {
1007 	int i, err;
1008 
1009 	for (i = 0; i < mp->mod_dictc; i++) {
1010 		if ((err = fm_dc_key2code(mp->mod_dictv[i], (const char **)keys,
1011 		    code, codelen)) == 0 || errno != ENOMSG)
1012 			return (err);
1013 	}
1014 
1015 	return (fmd_set_errno(ENOMSG));
1016 }
1017 
1018 fmd_modhash_t *
1019 fmd_modhash_create(void)
1020 {
1021 	fmd_modhash_t *mhp = fmd_alloc(sizeof (fmd_modhash_t), FMD_SLEEP);
1022 
1023 	(void) pthread_rwlock_init(&mhp->mh_lock, NULL);
1024 	mhp->mh_hashlen = fmd.d_str_buckets;
1025 	mhp->mh_hash = fmd_zalloc(sizeof (void *) * mhp->mh_hashlen, FMD_SLEEP);
1026 	mhp->mh_nelems = 0;
1027 
1028 	return (mhp);
1029 }
1030 
1031 void
1032 fmd_modhash_destroy(fmd_modhash_t *mhp)
1033 {
1034 	fmd_module_t *mp, *nmp;
1035 	uint_t i;
1036 
1037 	for (i = 0; i < mhp->mh_hashlen; i++) {
1038 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = nmp) {
1039 			nmp = mp->mod_next;
1040 			mp->mod_next = NULL;
1041 			fmd_module_rele(mp);
1042 		}
1043 	}
1044 
1045 	fmd_free(mhp->mh_hash, sizeof (void *) * mhp->mh_hashlen);
1046 	(void) pthread_rwlock_destroy(&mhp->mh_lock);
1047 	fmd_free(mhp, sizeof (fmd_modhash_t));
1048 }
1049 
1050 static void
1051 fmd_modhash_loaddir(fmd_modhash_t *mhp, const char *dir,
1052     const fmd_modops_t *ops, const char *suffix)
1053 {
1054 	char path[PATH_MAX];
1055 	struct dirent *dp;
1056 	const char *p;
1057 	DIR *dirp;
1058 
1059 	if ((dirp = opendir(dir)) == NULL)
1060 		return; /* failed to open directory; just skip it */
1061 
1062 	while ((dp = readdir(dirp)) != NULL) {
1063 		if (dp->d_name[0] == '.')
1064 			continue; /* skip "." and ".." */
1065 
1066 		p = strrchr(dp->d_name, '.');
1067 
1068 		if (p != NULL && strcmp(p, ".conf") == 0)
1069 			continue; /* skip .conf files */
1070 
1071 		if (suffix != NULL && (p == NULL || strcmp(p, suffix) != 0))
1072 			continue; /* skip files with the wrong suffix */
1073 
1074 		(void) snprintf(path, sizeof (path), "%s/%s", dir, dp->d_name);
1075 		(void) fmd_modhash_load(mhp, path, ops);
1076 	}
1077 
1078 	(void) closedir(dirp);
1079 }
1080 
1081 void
1082 fmd_modhash_loadall(fmd_modhash_t *mhp, const fmd_conf_path_t *pap,
1083     const fmd_modops_t *ops, const char *suffix)
1084 {
1085 	int i;
1086 
1087 	for (i = 0; i < pap->cpa_argc; i++)
1088 		fmd_modhash_loaddir(mhp, pap->cpa_argv[i], ops, suffix);
1089 }
1090 
1091 void
1092 fmd_modhash_apply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1093 {
1094 	fmd_module_t *mp, *np;
1095 	uint_t i;
1096 
1097 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1098 
1099 	for (i = 0; i < mhp->mh_hashlen; i++) {
1100 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1101 			np = mp->mod_next;
1102 			func(mp);
1103 		}
1104 	}
1105 
1106 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1107 }
1108 
1109 void
1110 fmd_modhash_tryapply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1111 {
1112 	fmd_module_t *mp, *np;
1113 	uint_t i;
1114 
1115 	if (mhp == NULL || pthread_rwlock_tryrdlock(&mhp->mh_lock) != 0)
1116 		return; /* not initialized or couldn't grab lock */
1117 
1118 	for (i = 0; i < mhp->mh_hashlen; i++) {
1119 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1120 			np = mp->mod_next;
1121 			func(mp);
1122 		}
1123 	}
1124 
1125 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1126 }
1127 
1128 void
1129 fmd_modhash_dispatch(fmd_modhash_t *mhp, fmd_event_t *ep)
1130 {
1131 	fmd_module_t *mp;
1132 	uint_t i;
1133 
1134 	fmd_event_hold(ep);
1135 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1136 
1137 	for (i = 0; i < mhp->mh_hashlen; i++) {
1138 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next) {
1139 			/*
1140 			 * If FMD_MOD_INIT is set but MOD_FINI, MOD_QUIT, and
1141 			 * mod_error are all zero, then the module is active:
1142 			 * enqueue the event in the corresponding event queue.
1143 			 */
1144 			(void) pthread_mutex_lock(&mp->mod_lock);
1145 
1146 			if ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI |
1147 			    FMD_MOD_QUIT)) == FMD_MOD_INIT && !mp->mod_error) {
1148 
1149 				/*
1150 				 * If the event we're dispatching is of type
1151 				 * FMD_EVT_TOPO and there are already redundant
1152 				 * FMD_EVT_TOPO events in this module's queue,
1153 				 * then drop those before adding the new one.
1154 				 */
1155 				if (FMD_EVENT_TYPE(ep) == FMD_EVT_TOPO)
1156 					fmd_eventq_drop_topo(mp->mod_queue);
1157 
1158 				fmd_eventq_insert_at_time(mp->mod_queue, ep);
1159 
1160 			}
1161 			(void) pthread_mutex_unlock(&mp->mod_lock);
1162 		}
1163 	}
1164 
1165 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1166 	fmd_event_rele(ep);
1167 }
1168 
1169 fmd_module_t *
1170 fmd_modhash_lookup(fmd_modhash_t *mhp, const char *name)
1171 {
1172 	fmd_module_t *mp;
1173 	uint_t h;
1174 
1175 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1176 	h = fmd_strhash(name) % mhp->mh_hashlen;
1177 
1178 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1179 		if (strcmp(name, mp->mod_name) == 0)
1180 			break;
1181 	}
1182 
1183 	if (mp != NULL)
1184 		fmd_module_hold(mp);
1185 	else
1186 		(void) fmd_set_errno(EFMD_MOD_NOMOD);
1187 
1188 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1189 	return (mp);
1190 }
1191 
1192 fmd_module_t *
1193 fmd_modhash_load(fmd_modhash_t *mhp, const char *path, const fmd_modops_t *ops)
1194 {
1195 	char name[PATH_MAX], *p;
1196 	fmd_module_t *mp;
1197 	int tries = 0;
1198 	uint_t h;
1199 
1200 	(void) strlcpy(name, fmd_strbasename(path), sizeof (name));
1201 	if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".so") == 0)
1202 		*p = '\0'; /* strip trailing .so from any module name */
1203 
1204 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1205 	h = fmd_strhash(name) % mhp->mh_hashlen;
1206 
1207 	/*
1208 	 * First check to see if a module is already present in the hash table
1209 	 * for this name.  If so, the module is already loaded: skip it.
1210 	 */
1211 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1212 		if (strcmp(name, mp->mod_name) == 0)
1213 			break;
1214 	}
1215 
1216 	if (mp != NULL) {
1217 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1218 		(void) fmd_set_errno(EFMD_MOD_LOADED);
1219 		return (NULL);
1220 	}
1221 
1222 	/*
1223 	 * fmd_module_create() will return a held (as if by fmd_module_hold())
1224 	 * module.  We leave this hold in place to correspond to the hash-in.
1225 	 */
1226 	while ((mp = fmd_module_create(path, ops)) == NULL) {
1227 		if (tries++ != 0 || errno != EFMD_CKPT_INVAL) {
1228 			(void) pthread_rwlock_unlock(&mhp->mh_lock);
1229 			return (NULL); /* errno is set for us */
1230 		}
1231 	}
1232 
1233 	mp->mod_hash = mhp;
1234 	mp->mod_next = mhp->mh_hash[h];
1235 
1236 	mhp->mh_hash[h] = mp;
1237 	mhp->mh_nelems++;
1238 
1239 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1240 	return (mp);
1241 }
1242 
1243 int
1244 fmd_modhash_unload(fmd_modhash_t *mhp, const char *name)
1245 {
1246 	fmd_module_t *mp, **pp;
1247 	uint_t h;
1248 
1249 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1250 	h = fmd_strhash(name) % mhp->mh_hashlen;
1251 	pp = &mhp->mh_hash[h];
1252 
1253 	for (mp = *pp; mp != NULL; mp = mp->mod_next) {
1254 		if (strcmp(name, mp->mod_name) == 0)
1255 			break;
1256 		else
1257 			pp = &mp->mod_next;
1258 	}
1259 
1260 	if (mp == NULL) {
1261 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1262 		return (fmd_set_errno(EFMD_MOD_NOMOD));
1263 	}
1264 
1265 	*pp = mp->mod_next;
1266 	mp->mod_next = NULL;
1267 
1268 	ASSERT(mhp->mh_nelems != 0);
1269 	mhp->mh_nelems--;
1270 
1271 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1272 
1273 	fmd_module_unload(mp);
1274 	fmd_module_rele(mp);
1275 
1276 	return (0);
1277 }
1278 
1279 void
1280 fmd_modstat_publish(fmd_module_t *mp)
1281 {
1282 	(void) pthread_mutex_lock(&mp->mod_lock);
1283 
1284 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1285 	mp->mod_flags |= FMD_MOD_STPUB;
1286 	(void) pthread_cond_broadcast(&mp->mod_cv);
1287 
1288 	while (mp->mod_flags & FMD_MOD_STPUB)
1289 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1290 
1291 	(void) pthread_mutex_unlock(&mp->mod_lock);
1292 }
1293 
1294 int
1295 fmd_modstat_snapshot(fmd_module_t *mp, fmd_ustat_snap_t *uss)
1296 {
1297 	fmd_event_t *e;
1298 	int err;
1299 
1300 	/*
1301 	 * Grab the module lock and wait for the STSUB bit to be clear.  Then
1302 	 * set it to indicate we are a subscriber and everyone else must wait.
1303 	 */
1304 	(void) pthread_mutex_lock(&mp->mod_lock);
1305 
1306 	while (mp->mod_error == 0 && (mp->mod_flags & FMD_MOD_STSUB))
1307 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1308 
1309 	if (mp->mod_error != 0) {
1310 		(void) pthread_mutex_unlock(&mp->mod_lock);
1311 		return (fmd_set_errno(EFMD_HDL_ABORT));
1312 	}
1313 
1314 	mp->mod_flags |= FMD_MOD_STSUB;
1315 	(void) pthread_cond_broadcast(&mp->mod_cv);
1316 	(void) pthread_mutex_unlock(&mp->mod_lock);
1317 
1318 	/*
1319 	 * Create a stats pseudo-event and dispatch it to the module, forcing
1320 	 * it to next execute its custom snapshot routine (or the empty one).
1321 	 */
1322 	e = fmd_event_create(FMD_EVT_STATS, FMD_HRT_NOW, NULL, NULL);
1323 	fmd_eventq_insert_at_head(mp->mod_queue, e);
1324 
1325 	/*
1326 	 * Grab the module lock and then wait on mod_cv for STPUB to be set,
1327 	 * indicating the snapshot routine is completed and the module is idle.
1328 	 */
1329 	(void) pthread_mutex_lock(&mp->mod_lock);
1330 
1331 	while (mp->mod_error == 0 && !(mp->mod_flags & FMD_MOD_STPUB)) {
1332 		struct timespec tms;
1333 
1334 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1335 		(void) pthread_mutex_unlock(&mp->mod_lock);
1336 		tms.tv_sec = 0;
1337 		tms.tv_nsec = 10000000;
1338 		(void) nanosleep(&tms, NULL);
1339 		(void) pthread_mutex_lock(&mp->mod_lock);
1340 	}
1341 
1342 	if (mp->mod_error != 0) {
1343 		(void) pthread_mutex_unlock(&mp->mod_lock);
1344 		return (fmd_set_errno(EFMD_HDL_ABORT));
1345 	}
1346 
1347 	(void) pthread_cond_broadcast(&mp->mod_cv);
1348 	(void) pthread_mutex_unlock(&mp->mod_lock);
1349 
1350 	/*
1351 	 * Update ms_snaptime and take the actual snapshot of the various
1352 	 * statistics while the module is quiescent and waiting for us.
1353 	 */
1354 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1355 
1356 	if (mp->mod_stats != NULL) {
1357 		mp->mod_stats->ms_snaptime.fmds_value.ui64 = gethrtime();
1358 		err = fmd_ustat_snapshot(mp->mod_ustat, uss);
1359 	} else
1360 		err = fmd_set_errno(EFMD_HDL_ABORT);
1361 
1362 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1363 
1364 	/*
1365 	 * With the snapshot complete, grab the module lock and clear both
1366 	 * STSUB and STPUB, permitting everyone to wake up and continue.
1367 	 */
1368 	(void) pthread_mutex_lock(&mp->mod_lock);
1369 
1370 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1371 	ASSERT(mp->mod_flags & FMD_MOD_STPUB);
1372 	mp->mod_flags &= ~(FMD_MOD_STSUB | FMD_MOD_STPUB);
1373 
1374 	(void) pthread_cond_broadcast(&mp->mod_cv);
1375 	(void) pthread_mutex_unlock(&mp->mod_lock);
1376 
1377 	return (err);
1378 }
1379 
1380 struct topo_hdl *
1381 fmd_module_topo_hold(fmd_module_t *mp)
1382 {
1383 	fmd_modtopo_t *mtp;
1384 
1385 	ASSERT(fmd_module_locked(mp));
1386 
1387 	mtp = fmd_zalloc(sizeof (fmd_modtopo_t), FMD_SLEEP);
1388 	mtp->mt_topo = mp->mod_topo_current;
1389 	fmd_topo_addref(mtp->mt_topo);
1390 	fmd_list_prepend(&mp->mod_topolist, mtp);
1391 
1392 	return (mtp->mt_topo->ft_hdl);
1393 }
1394 
1395 int
1396 fmd_module_topo_rele(fmd_module_t *mp, struct topo_hdl *hdl)
1397 {
1398 	fmd_modtopo_t *mtp;
1399 
1400 	ASSERT(fmd_module_locked(mp));
1401 
1402 	for (mtp = fmd_list_next(&mp->mod_topolist); mtp != NULL;
1403 	    mtp = fmd_list_next(mtp)) {
1404 		if (mtp->mt_topo->ft_hdl == hdl)
1405 			break;
1406 	}
1407 
1408 	if (mtp == NULL)
1409 		return (-1);
1410 
1411 	fmd_list_delete(&mp->mod_topolist, mtp);
1412 	fmd_topo_rele(mtp->mt_topo);
1413 	fmd_free(mtp, sizeof (fmd_modtopo_t));
1414 	return (0);
1415 }
1416