xref: /titanic_51/usr/src/cmd/fm/fmd/common/fmd_module.c (revision 657a8c206b913d1ee578fd725f0b25eca5b77253)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <signal.h>
30 #include <dirent.h>
31 #include <limits.h>
32 #include <alloca.h>
33 #include <unistd.h>
34 #include <stdio.h>
35 
36 #include <fmd_string.h>
37 #include <fmd_alloc.h>
38 #include <fmd_module.h>
39 #include <fmd_error.h>
40 #include <fmd_conf.h>
41 #include <fmd_dispq.h>
42 #include <fmd_eventq.h>
43 #include <fmd_timerq.h>
44 #include <fmd_subr.h>
45 #include <fmd_thread.h>
46 #include <fmd_ustat.h>
47 #include <fmd_case.h>
48 #include <fmd_protocol.h>
49 #include <fmd_buf.h>
50 #include <fmd_ckpt.h>
51 #include <fmd_xprt.h>
52 #include <fmd_topo.h>
53 
54 #include <fmd.h>
55 
56 /*
57  * Template for per-module statistics installed by fmd on behalf of each active
58  * module.  These are used to initialize the per-module mp->mod_stats below.
59  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
60  * required in the future, the FMD_ADM_MODDSTAT service routine must change.
61  */
62 static const fmd_modstat_t _fmd_modstat_tmpl = {
63 {
64 { "fmd.dispatched", FMD_TYPE_UINT64, "total events dispatched to module" },
65 { "fmd.dequeued", FMD_TYPE_UINT64, "total events dequeued by module" },
66 { "fmd.prdequeued", FMD_TYPE_UINT64, "protocol events dequeued by module" },
67 { "fmd.dropped", FMD_TYPE_UINT64, "total events dropped on queue overflow" },
68 { "fmd.wcnt", FMD_TYPE_UINT32, "count of events waiting on queue" },
69 { "fmd.wtime", FMD_TYPE_TIME, "total wait time on queue" },
70 { "fmd.wlentime", FMD_TYPE_TIME, "total wait length * time product" },
71 { "fmd.wlastupdate", FMD_TYPE_TIME, "hrtime of last wait queue update" },
72 { "fmd.dtime", FMD_TYPE_TIME, "total processing time after dequeue" },
73 { "fmd.dlastupdate", FMD_TYPE_TIME, "hrtime of last event dequeue completion" },
74 },
75 { "fmd.loadtime", FMD_TYPE_TIME, "hrtime at which module was loaded" },
76 { "fmd.snaptime", FMD_TYPE_TIME, "hrtime of last statistics snapshot" },
77 { "fmd.accepted", FMD_TYPE_UINT64, "total events accepted by module" },
78 { "fmd.debugdrop", FMD_TYPE_UINT64, "dropped debug messages" },
79 { "fmd.memtotal", FMD_TYPE_SIZE, "total memory allocated by module" },
80 { "fmd.memlimit", FMD_TYPE_SIZE, "limit on total memory allocated" },
81 { "fmd.buftotal", FMD_TYPE_SIZE, "total buffer space used by module" },
82 { "fmd.buflimit", FMD_TYPE_SIZE, "limit on total buffer space" },
83 { "fmd.thrtotal", FMD_TYPE_UINT32, "total number of auxiliary threads" },
84 { "fmd.thrlimit", FMD_TYPE_UINT32, "limit on number of auxiliary threads" },
85 { "fmd.caseopen", FMD_TYPE_UINT64, "cases currently open by module" },
86 { "fmd.casesolved", FMD_TYPE_UINT64, "total cases solved by module" },
87 { "fmd.caseclosed", FMD_TYPE_UINT64, "total cases closed by module" },
88 { "fmd.ckptsave", FMD_TYPE_BOOL, "save checkpoints for module" },
89 { "fmd.ckptrestore", FMD_TYPE_BOOL, "restore checkpoints for module" },
90 { "fmd.ckptzero", FMD_TYPE_BOOL, "zeroed checkpoint at startup" },
91 { "fmd.ckptcnt", FMD_TYPE_UINT64, "number of checkpoints taken" },
92 { "fmd.ckpttime", FMD_TYPE_TIME, "total checkpoint time" },
93 { "fmd.xprtopen", FMD_TYPE_UINT32, "total number of open transports" },
94 { "fmd.xprtlimit", FMD_TYPE_UINT32, "limit on number of open transports" },
95 { "fmd.xprtqlimit", FMD_TYPE_UINT32, "limit on transport event queue length" },
96 };
97 
98 static void
99 fmd_module_start(void *arg)
100 {
101 	fmd_module_t *mp = arg;
102 	fmd_event_t *ep;
103 	fmd_xprt_t *xp;
104 
105 	(void) pthread_mutex_lock(&mp->mod_lock);
106 
107 	if (mp->mod_ops->mop_init(mp) != 0 || mp->mod_error != 0) {
108 		if (mp->mod_error == 0)
109 			mp->mod_error = errno ? errno : EFMD_MOD_INIT;
110 		goto out;
111 	}
112 
113 	if (fmd.d_mod_event != NULL)
114 		fmd_eventq_insert_at_head(mp->mod_queue, fmd.d_mod_event);
115 
116 	ASSERT(MUTEX_HELD(&mp->mod_lock));
117 	mp->mod_flags |= FMD_MOD_INIT;
118 
119 	(void) pthread_cond_broadcast(&mp->mod_cv);
120 	(void) pthread_mutex_unlock(&mp->mod_lock);
121 
122 	/*
123 	 * If the module opened any transports while executing _fmd_init(),
124 	 * they are suspended. Now that _fmd_init() is done, wake them up.
125 	 */
126 	for (xp = fmd_list_next(&mp->mod_transports);
127 	    xp != NULL; xp = fmd_list_next(xp))
128 		fmd_xprt_xresume(xp, FMD_XPRT_ISUSPENDED);
129 
130 	/*
131 	 * Wait for events to arrive by checking mod_error and then sleeping in
132 	 * fmd_eventq_delete().  If a NULL event is returned, the eventq has
133 	 * been aborted and we continue on to call fini and exit the thread.
134 	 */
135 	while ((ep = fmd_eventq_delete(mp->mod_queue)) != NULL) {
136 		/*
137 		 * If the module has failed, discard the event without ever
138 		 * passing it to the module and go back to sleep.
139 		 */
140 		if (mp->mod_error != 0) {
141 			fmd_eventq_done(mp->mod_queue);
142 			fmd_event_rele(ep);
143 			continue;
144 		}
145 
146 		mp->mod_ops->mop_dispatch(mp, ep);
147 		fmd_eventq_done(mp->mod_queue);
148 
149 		/*
150 		 * Once mop_dispatch() is complete, grab the lock and perform
151 		 * any event-specific post-processing.  Finally, if necessary,
152 		 * checkpoint the state of the module after this event.
153 		 */
154 		fmd_module_lock(mp);
155 
156 		if (FMD_EVENT_TYPE(ep) == FMD_EVT_CLOSE)
157 			fmd_case_delete(FMD_EVENT_DATA(ep));
158 
159 		fmd_ckpt_save(mp);
160 		fmd_module_unlock(mp);
161 		fmd_event_rele(ep);
162 	}
163 
164 	if (mp->mod_ops->mop_fini(mp) != 0 && mp->mod_error == 0)
165 		mp->mod_error = errno ? errno : EFMD_MOD_FINI;
166 
167 	(void) pthread_mutex_lock(&mp->mod_lock);
168 	mp->mod_flags |= FMD_MOD_FINI;
169 
170 out:
171 	(void) pthread_cond_broadcast(&mp->mod_cv);
172 	(void) pthread_mutex_unlock(&mp->mod_lock);
173 }
174 
175 fmd_module_t *
176 fmd_module_create(const char *path, const fmd_modops_t *ops)
177 {
178 	fmd_module_t *mp = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
179 
180 	char buf[PATH_MAX], *p;
181 	const char *dir;
182 	uint32_t limit;
183 	int err;
184 
185 	(void) strlcpy(buf, fmd_strbasename(path), sizeof (buf));
186 	if ((p = strrchr(buf, '.')) != NULL && strcmp(p, ".so") == 0)
187 		*p = '\0'; /* strip trailing .so from any module name */
188 
189 	(void) pthread_mutex_init(&mp->mod_lock, NULL);
190 	(void) pthread_cond_init(&mp->mod_cv, NULL);
191 	(void) pthread_mutex_init(&mp->mod_stats_lock, NULL);
192 
193 	mp->mod_name = fmd_strdup(buf, FMD_SLEEP);
194 	mp->mod_path = fmd_strdup(path, FMD_SLEEP);
195 	mp->mod_ops = ops;
196 	mp->mod_ustat = fmd_ustat_create();
197 
198 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dir", &dir);
199 	(void) snprintf(buf, sizeof (buf),
200 	    "%s/%s/%s", fmd.d_rootdir, dir, mp->mod_name);
201 
202 	mp->mod_ckpt = fmd_strdup(buf, FMD_SLEEP);
203 
204 	(void) fmd_conf_getprop(fmd.d_conf, "client.tmrlim", &limit);
205 	mp->mod_timerids = fmd_idspace_create(mp->mod_name, 1, limit + 1);
206 	mp->mod_threads = fmd_idspace_create(mp->mod_name, 0, INT_MAX);
207 
208 	fmd_buf_hash_create(&mp->mod_bufs);
209 	fmd_serd_hash_create(&mp->mod_serds);
210 
211 	mp->mod_topo_current = fmd_topo_hold();
212 
213 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
214 	fmd_list_append(&fmd.d_mod_list, mp);
215 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
216 
217 	/*
218 	 * Initialize the module statistics that are kept on its behalf by fmd.
219 	 * These are set up using a template defined at the top of this file.
220 	 */
221 	if ((mp->mod_stats = (fmd_modstat_t *)fmd_ustat_insert(mp->mod_ustat,
222 	    FMD_USTAT_ALLOC, sizeof (_fmd_modstat_tmpl) / sizeof (fmd_stat_t),
223 	    (fmd_stat_t *)&_fmd_modstat_tmpl, NULL)) == NULL) {
224 		fmd_error(EFMD_MOD_INIT, "failed to initialize per-mod stats");
225 		fmd_module_destroy(mp);
226 		return (NULL);
227 	}
228 
229 	if (nv_alloc_init(&mp->mod_nva_sleep,
230 	    &fmd_module_nva_ops_sleep, mp) != 0 ||
231 	    nv_alloc_init(&mp->mod_nva_nosleep,
232 	    &fmd_module_nva_ops_nosleep, mp) != 0) {
233 		fmd_error(EFMD_MOD_INIT, "failed to initialize nvlist "
234 		    "allocation routines");
235 		fmd_module_destroy(mp);
236 		return (NULL);
237 	}
238 
239 	(void) fmd_conf_getprop(fmd.d_conf, "client.evqlim", &limit);
240 
241 	mp->mod_queue = fmd_eventq_create(mp,
242 	    &mp->mod_stats->ms_evqstat, &mp->mod_stats_lock, limit);
243 
244 	(void) fmd_conf_getprop(fmd.d_conf, "client.memlim",
245 	    &mp->mod_stats->ms_memlimit.fmds_value.ui64);
246 
247 	(void) fmd_conf_getprop(fmd.d_conf, "client.buflim",
248 	    &mp->mod_stats->ms_buflimit.fmds_value.ui64);
249 
250 	(void) fmd_conf_getprop(fmd.d_conf, "client.thrlim",
251 	    &mp->mod_stats->ms_thrlimit.fmds_value.ui32);
252 
253 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtlim",
254 	    &mp->mod_stats->ms_xprtlimit.fmds_value.ui32);
255 
256 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtqlim",
257 	    &mp->mod_stats->ms_xprtqlimit.fmds_value.ui32);
258 
259 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.save",
260 	    &mp->mod_stats->ms_ckpt_save.fmds_value.bool);
261 
262 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.restore",
263 	    &mp->mod_stats->ms_ckpt_restore.fmds_value.bool);
264 
265 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.zero",
266 	    &mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool);
267 
268 	if (mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool)
269 		fmd_ckpt_delete(mp); /* blow away any pre-existing checkpoint */
270 
271 	/*
272 	 * Place a hold on the module and grab the module lock before creating
273 	 * the module's thread to ensure that it cannot destroy the module and
274 	 * that it cannot call ops->mop_init() before we're done setting up.
275 	 * NOTE: from now on, we must use fmd_module_rele() for error paths.
276 	 */
277 	fmd_module_hold(mp);
278 	(void) pthread_mutex_lock(&mp->mod_lock);
279 	mp->mod_stats->ms_loadtime.fmds_value.ui64 = gethrtime();
280 	mp->mod_thread = fmd_thread_create(mp, fmd_module_start, mp);
281 
282 	if (mp->mod_thread == NULL) {
283 		fmd_error(EFMD_MOD_THR, "failed to create thread for %s", path);
284 		(void) pthread_mutex_unlock(&mp->mod_lock);
285 		fmd_module_rele(mp);
286 		return (NULL);
287 	}
288 
289 	/*
290 	 * At this point our module structure is nearly finished and its thread
291 	 * is starting execution in fmd_module_start() above, which will begin
292 	 * by blocking for mod_lock.  We now drop mod_lock and wait for either
293 	 * FMD_MOD_INIT or mod_error to be set before proceeding.
294 	 */
295 	while (!(mp->mod_flags & FMD_MOD_INIT) && mp->mod_error == 0)
296 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
297 
298 	/*
299 	 * If the module has failed to initialize, copy its errno to the errno
300 	 * of the caller, wait for it to unload, and then destroy it.
301 	 */
302 	if (!(mp->mod_flags & FMD_MOD_INIT)) {
303 		err = mp->mod_error;
304 		(void) pthread_mutex_unlock(&mp->mod_lock);
305 
306 		if (err == EFMD_CKPT_INVAL)
307 			fmd_ckpt_rename(mp); /* move aside bad checkpoint */
308 
309 		/*
310 		 * If we're in the background, keep quiet about failure to
311 		 * load because a handle wasn't registered: this is a module's
312 		 * way of telling us it didn't want to be loaded for some
313 		 * reason related to system configuration.  If we're in the
314 		 * foreground we log this too in order to inform developers.
315 		 */
316 		if (fmd.d_fg || err != EFMD_HDL_INIT) {
317 			fmd_error(EFMD_MOD_INIT, "failed to load %s: %s\n",
318 			    path, fmd_strerror(err));
319 		}
320 
321 		fmd_module_unload(mp);
322 		fmd_module_rele(mp);
323 
324 		(void) fmd_set_errno(err);
325 		return (NULL);
326 	}
327 
328 	(void) pthread_cond_broadcast(&mp->mod_cv);
329 	(void) pthread_mutex_unlock(&mp->mod_lock);
330 
331 	fmd_dprintf(FMD_DBG_MOD, "loaded module %s\n", mp->mod_name);
332 	return (mp);
333 }
334 
335 static void
336 fmd_module_untimeout(fmd_idspace_t *ids, id_t id, fmd_module_t *mp)
337 {
338 	void *arg = fmd_timerq_remove(fmd.d_timers, ids, id);
339 
340 	/*
341 	 * The root module calls fmd_timerq_install() directly and must take
342 	 * responsibility for any cleanup of timer arguments that is required.
343 	 * All other modules use fmd_modtimer_t's as the arg data; free them.
344 	 */
345 	if (arg != NULL && mp != fmd.d_rmod)
346 		fmd_free(arg, sizeof (fmd_modtimer_t));
347 }
348 
349 void
350 fmd_module_unload(fmd_module_t *mp)
351 {
352 	fmd_modtopo_t *mtp;
353 
354 	(void) pthread_mutex_lock(&mp->mod_lock);
355 
356 	if (mp->mod_flags & FMD_MOD_QUIT) {
357 		(void) pthread_mutex_unlock(&mp->mod_lock);
358 		return; /* module is already unloading */
359 	}
360 
361 	ASSERT(mp->mod_thread != NULL);
362 	mp->mod_flags |= FMD_MOD_QUIT;
363 
364 	if (mp->mod_queue != NULL)
365 		fmd_eventq_abort(mp->mod_queue);
366 
367 	/*
368 	 * Wait for the module's thread to stop processing events and call
369 	 * _fmd_fini() and exit.  We do this by waiting for FMD_MOD_FINI to be
370 	 * set if INIT was set, and then attempting to join with the thread.
371 	 */
372 	while ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI)) == FMD_MOD_INIT)
373 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
374 
375 	(void) pthread_cond_broadcast(&mp->mod_cv);
376 	(void) pthread_mutex_unlock(&mp->mod_lock);
377 
378 	fmd_thread_destroy(mp->mod_thread, FMD_THREAD_JOIN);
379 	mp->mod_thread = NULL;
380 
381 	/*
382 	 * Once the module is no longer active, clean up any data structures
383 	 * that are only required when the module is loaded.
384 	 */
385 	fmd_module_lock(mp);
386 
387 	if (mp->mod_timerids != NULL) {
388 		fmd_idspace_apply(mp->mod_timerids,
389 		    (void (*)())fmd_module_untimeout, mp);
390 
391 		fmd_idspace_destroy(mp->mod_timerids);
392 		mp->mod_timerids = NULL;
393 	}
394 
395 	if (mp->mod_threads != NULL) {
396 		fmd_idspace_destroy(mp->mod_threads);
397 		mp->mod_threads = NULL;
398 	}
399 
400 	(void) fmd_buf_hash_destroy(&mp->mod_bufs);
401 	fmd_serd_hash_destroy(&mp->mod_serds);
402 
403 	while ((mtp = fmd_list_next(&mp->mod_topolist)) != NULL) {
404 		fmd_list_delete(&mp->mod_topolist, mtp);
405 		fmd_topo_rele(mtp->mt_topo);
406 		fmd_free(mtp, sizeof (fmd_modtopo_t));
407 	}
408 
409 	fmd_module_unlock(mp);
410 	fmd_dprintf(FMD_DBG_MOD, "unloaded module %s\n", mp->mod_name);
411 }
412 
413 void
414 fmd_module_destroy(fmd_module_t *mp)
415 {
416 	fmd_conf_formal_t *cfp = mp->mod_argv;
417 	int i;
418 
419 	ASSERT(MUTEX_HELD(&mp->mod_lock));
420 
421 	if (mp->mod_thread != NULL) {
422 		(void) pthread_mutex_unlock(&mp->mod_lock);
423 		fmd_module_unload(mp);
424 		(void) pthread_mutex_lock(&mp->mod_lock);
425 	}
426 
427 	ASSERT(mp->mod_thread == NULL);
428 	ASSERT(mp->mod_refs == 0);
429 
430 	/*
431 	 * Once the module's thread is dead, we can safely remove the module
432 	 * from global visibility and by removing it from d_mod_list.  Any
433 	 * modhash pointers are already gone by virtue of mod_refs being zero.
434 	 */
435 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
436 	fmd_list_delete(&fmd.d_mod_list, mp);
437 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
438 
439 	if (mp->mod_topo_current != NULL)
440 		fmd_topo_rele(mp->mod_topo_current);
441 
442 	if (mp->mod_nva_sleep.nva_ops != NULL)
443 		nv_alloc_fini(&mp->mod_nva_sleep);
444 	if (mp->mod_nva_nosleep.nva_ops != NULL)
445 		nv_alloc_fini(&mp->mod_nva_nosleep);
446 
447 	/*
448 	 * Once the module is no longer processing events and no longer visible
449 	 * through any program data structures, we can free all of its content.
450 	 */
451 	if (mp->mod_queue != NULL) {
452 		fmd_eventq_destroy(mp->mod_queue);
453 		mp->mod_queue = NULL;
454 	}
455 
456 	if (mp->mod_ustat != NULL) {
457 		(void) pthread_mutex_lock(&mp->mod_stats_lock);
458 		fmd_ustat_destroy(mp->mod_ustat);
459 		mp->mod_ustat = NULL;
460 		mp->mod_stats = NULL;
461 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
462 	}
463 
464 	for (i = 0; i < mp->mod_dictc; i++)
465 		fm_dc_closedict(mp->mod_dictv[i]);
466 
467 	fmd_free(mp->mod_dictv, sizeof (struct fm_dc_handle *) * mp->mod_dictc);
468 
469 	if (mp->mod_conf != NULL)
470 		fmd_conf_close(mp->mod_conf);
471 
472 	for (i = 0; i < mp->mod_argc; i++, cfp++) {
473 		fmd_strfree((char *)cfp->cf_name);
474 		fmd_strfree((char *)cfp->cf_default);
475 	}
476 
477 	fmd_free(mp->mod_argv, sizeof (fmd_conf_formal_t) * mp->mod_argc);
478 
479 	fmd_strfree(mp->mod_name);
480 	fmd_strfree(mp->mod_path);
481 	fmd_strfree(mp->mod_ckpt);
482 	nvlist_free(mp->mod_fmri);
483 	fmd_strfree(mp->mod_vers);
484 
485 	fmd_free(mp, sizeof (fmd_module_t));
486 }
487 
488 /*
489  * fmd_module_error() is called after the stack is unwound from a call to
490  * fmd_module_abort() to indicate that the module has failed.  The mod_error
491  * field is used to hold the error code of the first fatal error to the module.
492  * An EFMD_MOD_FAIL event is then created and sent to fmd-self-diagnosis.
493  */
494 static void
495 fmd_module_error(fmd_module_t *mp, int err)
496 {
497 	fmd_event_t *e;
498 	nvlist_t *nvl;
499 	char *class;
500 
501 	ASSERT(MUTEX_HELD(&mp->mod_lock));
502 	ASSERT(err != 0);
503 
504 	TRACE((FMD_DBG_MOD, "module aborted: err=%d", err));
505 
506 	if (mp->mod_error == 0)
507 		mp->mod_error = err;
508 
509 	if (mp == fmd.d_self)
510 		return; /* do not post event if fmd.d_self itself fails */
511 
512 	/*
513 	 * Send an error indicating the module has now failed to fmd.d_self.
514 	 * Since the error causing the failure has already been logged by
515 	 * fmd_api_xerror(), we do not need to bother logging this event.
516 	 * It only exists for the purpose of notifying fmd.d_self that it can
517 	 * close the case associated with this module because mod_error is set.
518 	 */
519 	nvl = fmd_protocol_moderror(mp, EFMD_MOD_FAIL, fmd_strerror(err));
520 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
521 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
522 	fmd_dispq_dispatch(fmd.d_disp, e, class);
523 }
524 
525 void
526 fmd_module_dispatch(fmd_module_t *mp, fmd_event_t *e)
527 {
528 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
529 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
530 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
531 	fmd_modtimer_t *t;
532 	volatile int err;
533 
534 	/*
535 	 * Before calling the appropriate module callback, enter the module as
536 	 * if by fmd_module_enter() and establish mod_jmpbuf for any aborts.
537 	 */
538 	(void) pthread_mutex_lock(&mp->mod_lock);
539 
540 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
541 	mp->mod_flags |= FMD_MOD_BUSY;
542 
543 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
544 		(void) pthread_mutex_lock(&mp->mod_lock);
545 		fmd_module_error(mp, err);
546 	}
547 
548 	(void) pthread_cond_broadcast(&mp->mod_cv);
549 	(void) pthread_mutex_unlock(&mp->mod_lock);
550 
551 	/*
552 	 * If it's the first time through fmd_module_dispatch(), call the
553 	 * appropriate module callback based on the event type.  If the call
554 	 * triggers an fmd_module_abort(), we'll return to setjmp() above with
555 	 * err set to a non-zero value and then bypass this before exiting.
556 	 */
557 	if (err == 0) {
558 		switch (ep->ev_type) {
559 		case FMD_EVT_PROTOCOL:
560 			ops->fmdo_recv(hdl, e, ep->ev_nvl, ep->ev_data);
561 			break;
562 		case FMD_EVT_TIMEOUT:
563 			t = ep->ev_data;
564 			ASSERT(t->mt_mod == mp);
565 			ops->fmdo_timeout(hdl, t->mt_id, t->mt_arg);
566 			break;
567 		case FMD_EVT_CLOSE:
568 			ops->fmdo_close(hdl, ep->ev_data);
569 			break;
570 		case FMD_EVT_STATS:
571 			ops->fmdo_stats(hdl);
572 			fmd_modstat_publish(mp);
573 			break;
574 		case FMD_EVT_GC:
575 			ops->fmdo_gc(hdl);
576 			break;
577 		case FMD_EVT_PUBLISH:
578 			fmd_case_publish(ep->ev_data, FMD_CASE_CURRENT);
579 			break;
580 		case FMD_EVT_TOPO:
581 			fmd_topo_rele(mp->mod_topo_current);
582 			mp->mod_topo_current = (fmd_topo_t *)ep->ev_data;
583 			fmd_topo_addref(mp->mod_topo_current);
584 			ops->fmdo_topo(hdl, mp->mod_topo_current->ft_hdl);
585 			break;
586 		}
587 	}
588 
589 	fmd_module_exit(mp);
590 }
591 
592 int
593 fmd_module_transport(fmd_module_t *mp, fmd_xprt_t *xp, fmd_event_t *e)
594 {
595 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
596 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
597 
598 	ASSERT(ep->ev_type == FMD_EVT_PROTOCOL);
599 	return (mp->mod_info->fmdi_ops->fmdo_send(hdl, xp, e, ep->ev_nvl));
600 }
601 
602 void
603 fmd_module_timeout(fmd_modtimer_t *t, id_t id, hrtime_t hrt)
604 {
605 	fmd_event_t *e;
606 
607 	t->mt_id = id; /* save id in case we need to delete from eventq */
608 	e = fmd_event_create(FMD_EVT_TIMEOUT, hrt, NULL, t);
609 	fmd_eventq_insert_at_time(t->mt_mod->mod_queue, e);
610 }
611 
612 /*
613  * Garbage collection is initiated by a timer callback once per day or at the
614  * request of fmadm.  Purge old SERD entries and send the module a GC event.
615  */
616 void
617 fmd_module_gc(fmd_module_t *mp)
618 {
619 	fmd_hdl_info_t *info;
620 	fmd_event_t *e;
621 
622 	if (mp->mod_error != 0)
623 		return; /* do not do anything if the module has failed */
624 
625 	fmd_module_lock(mp);
626 
627 	if ((info = mp->mod_info) != NULL) {
628 		fmd_serd_hash_apply(&mp->mod_serds,
629 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
630 	}
631 
632 	fmd_module_unlock(mp);
633 
634 	if (info != NULL) {
635 		e = fmd_event_create(FMD_EVT_GC, FMD_HRT_NOW, NULL, NULL);
636 		fmd_eventq_insert_at_head(mp->mod_queue, e);
637 	}
638 }
639 
640 void
641 fmd_module_trygc(fmd_module_t *mp)
642 {
643 	if (fmd_module_trylock(mp)) {
644 		fmd_serd_hash_apply(&mp->mod_serds,
645 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
646 		fmd_module_unlock(mp);
647 	}
648 }
649 
650 int
651 fmd_module_contains(fmd_module_t *mp, fmd_event_t *ep)
652 {
653 	fmd_case_t *cp;
654 	int rv = 0;
655 
656 	fmd_module_lock(mp);
657 
658 	for (cp = fmd_list_next(&mp->mod_cases);
659 	    cp != NULL; cp = fmd_list_next(cp)) {
660 		if ((rv = fmd_case_contains(cp, ep)) != 0)
661 			break;
662 	}
663 
664 	if (rv == 0)
665 		rv = fmd_serd_hash_contains(&mp->mod_serds, ep);
666 
667 	fmd_module_unlock(mp);
668 	return (rv);
669 }
670 
671 void
672 fmd_module_setdirty(fmd_module_t *mp)
673 {
674 	(void) pthread_mutex_lock(&mp->mod_lock);
675 	mp->mod_flags |= FMD_MOD_MDIRTY;
676 	(void) pthread_mutex_unlock(&mp->mod_lock);
677 }
678 
679 void
680 fmd_module_setcdirty(fmd_module_t *mp)
681 {
682 	(void) pthread_mutex_lock(&mp->mod_lock);
683 	mp->mod_flags |= FMD_MOD_CDIRTY;
684 	(void) pthread_mutex_unlock(&mp->mod_lock);
685 }
686 
687 void
688 fmd_module_clrdirty(fmd_module_t *mp)
689 {
690 	fmd_case_t *cp;
691 
692 	fmd_module_lock(mp);
693 
694 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
695 		for (cp = fmd_list_next(&mp->mod_cases);
696 		    cp != NULL; cp = fmd_list_next(cp))
697 			fmd_case_clrdirty(cp);
698 	}
699 
700 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
701 		fmd_serd_hash_apply(&mp->mod_serds,
702 		    (fmd_serd_eng_f *)fmd_serd_eng_clrdirty, NULL);
703 		fmd_buf_hash_commit(&mp->mod_bufs);
704 	}
705 
706 	(void) pthread_mutex_lock(&mp->mod_lock);
707 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
708 	(void) pthread_mutex_unlock(&mp->mod_lock);
709 
710 	fmd_module_unlock(mp);
711 }
712 
713 void
714 fmd_module_commit(fmd_module_t *mp)
715 {
716 	fmd_case_t *cp;
717 
718 	ASSERT(fmd_module_locked(mp));
719 
720 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
721 		for (cp = fmd_list_next(&mp->mod_cases);
722 		    cp != NULL; cp = fmd_list_next(cp))
723 			fmd_case_commit(cp);
724 	}
725 
726 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
727 		fmd_serd_hash_apply(&mp->mod_serds,
728 		    (fmd_serd_eng_f *)fmd_serd_eng_commit, NULL);
729 		fmd_buf_hash_commit(&mp->mod_bufs);
730 	}
731 
732 	(void) pthread_mutex_lock(&mp->mod_lock);
733 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
734 	(void) pthread_mutex_unlock(&mp->mod_lock);
735 
736 	mp->mod_gen++;
737 }
738 
739 void
740 fmd_module_lock(fmd_module_t *mp)
741 {
742 	pthread_t self = pthread_self();
743 
744 	(void) pthread_mutex_lock(&mp->mod_lock);
745 
746 	while (mp->mod_flags & FMD_MOD_LOCK) {
747 		if (mp->mod_owner != self)
748 			(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
749 		else
750 			fmd_panic("recursive module lock of %p\n", (void *)mp);
751 	}
752 
753 	mp->mod_owner = self;
754 	mp->mod_flags |= FMD_MOD_LOCK;
755 
756 	(void) pthread_cond_broadcast(&mp->mod_cv);
757 	(void) pthread_mutex_unlock(&mp->mod_lock);
758 }
759 
760 void
761 fmd_module_unlock(fmd_module_t *mp)
762 {
763 	(void) pthread_mutex_lock(&mp->mod_lock);
764 
765 	ASSERT(mp->mod_owner == pthread_self());
766 	ASSERT(mp->mod_flags & FMD_MOD_LOCK);
767 
768 	mp->mod_owner = 0;
769 	mp->mod_flags &= ~FMD_MOD_LOCK;
770 
771 	(void) pthread_cond_broadcast(&mp->mod_cv);
772 	(void) pthread_mutex_unlock(&mp->mod_lock);
773 }
774 
775 int
776 fmd_module_trylock(fmd_module_t *mp)
777 {
778 	(void) pthread_mutex_lock(&mp->mod_lock);
779 
780 	if (mp->mod_flags & FMD_MOD_LOCK) {
781 		(void) pthread_mutex_unlock(&mp->mod_lock);
782 		return (0);
783 	}
784 
785 	mp->mod_owner = pthread_self();
786 	mp->mod_flags |= FMD_MOD_LOCK;
787 
788 	(void) pthread_cond_broadcast(&mp->mod_cv);
789 	(void) pthread_mutex_unlock(&mp->mod_lock);
790 
791 	return (1);
792 }
793 
794 int
795 fmd_module_locked(fmd_module_t *mp)
796 {
797 	return ((mp->mod_flags & FMD_MOD_LOCK) &&
798 	    mp->mod_owner == pthread_self());
799 }
800 
801 int
802 fmd_module_enter(fmd_module_t *mp, void (*func)(fmd_hdl_t *))
803 {
804 	volatile int err;
805 
806 	(void) pthread_mutex_lock(&mp->mod_lock);
807 
808 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
809 	mp->mod_flags |= FMD_MOD_BUSY;
810 
811 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
812 		(void) pthread_mutex_lock(&mp->mod_lock);
813 		fmd_module_error(mp, err);
814 	}
815 
816 	(void) pthread_cond_broadcast(&mp->mod_cv);
817 	(void) pthread_mutex_unlock(&mp->mod_lock);
818 
819 	/*
820 	 * If it's the first time through fmd_module_enter(), call the provided
821 	 * function on the module.  If no fmd_module_abort() results, we will
822 	 * fall through and return zero.  Otherwise we'll longjmp with an err,
823 	 * return to the setjmp() above, and return the error to our caller.
824 	 */
825 	if (err == 0 && func != NULL)
826 		(*func)((fmd_hdl_t *)mp);
827 
828 	return (err);
829 }
830 
831 void
832 fmd_module_exit(fmd_module_t *mp)
833 {
834 	(void) pthread_mutex_lock(&mp->mod_lock);
835 
836 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
837 	mp->mod_flags &= ~FMD_MOD_BUSY;
838 
839 	(void) pthread_cond_broadcast(&mp->mod_cv);
840 	(void) pthread_mutex_unlock(&mp->mod_lock);
841 }
842 
843 /*
844  * If the client.error policy has been set by a developer, stop or dump core
845  * based on the policy; if we stop and are resumed we'll continue and execute
846  * the default behavior to discard events in fmd_module_start().  If the caller
847  * is the primary module thread, we reach this state by longjmp'ing back to
848  * fmd_module_enter(), above.  If the caller is an auxiliary thread, we cancel
849  * ourself and arrange for the primary thread to call fmd_module_abort().
850  */
851 void
852 fmd_module_abort(fmd_module_t *mp, int err)
853 {
854 	uint_t policy = FMD_CERROR_UNLOAD;
855 	pthread_t tid = pthread_self();
856 
857 	(void) fmd_conf_getprop(fmd.d_conf, "client.error", &policy);
858 
859 	if (policy == FMD_CERROR_STOP) {
860 		fmd_error(err, "stopping after %s in client %s (%p)\n",
861 		    fmd_errclass(err), mp->mod_name, (void *)mp);
862 		(void) raise(SIGSTOP);
863 	} else if (policy == FMD_CERROR_ABORT) {
864 		fmd_panic("aborting due to %s in client %s (%p)\n",
865 		    fmd_errclass(err), mp->mod_name, (void *)mp);
866 	}
867 
868 	/*
869 	 * If the caller is an auxiliary thread, cancel the current thread.  We
870 	 * prefer to cancel because it affords developers the option of using
871 	 * the pthread_cleanup* APIs.  If cancellations have been disabled,
872 	 * fall through to forcing the current thread to exit.  In either case
873 	 * we update mod_error (if zero) to enter the failed state.  Once that
874 	 * is set, further events received by the module will be discarded.
875 	 *
876 	 * We also set the FMD_MOD_FAIL bit, indicating an unrecoverable error.
877 	 * When an auxiliary thread fails, the module is left in a delicate
878 	 * state where it is likely not able to continue execution (even to
879 	 * execute its _fmd_fini() routine) because our caller may hold locks
880 	 * that are private to the module and can no longer be released.  The
881 	 * FMD_MOD_FAIL bit forces fmd_api_module_lock() to abort if any other
882 	 * module threads reach an API call, in an attempt to get them to exit.
883 	 */
884 	if (tid != mp->mod_thread->thr_tid) {
885 		(void) pthread_mutex_lock(&mp->mod_lock);
886 
887 		if (mp->mod_error == 0)
888 			mp->mod_error = err;
889 
890 		mp->mod_flags |= FMD_MOD_FAIL;
891 		(void) pthread_mutex_unlock(&mp->mod_lock);
892 
893 		(void) pthread_cancel(tid);
894 		pthread_exit(NULL);
895 	}
896 
897 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
898 	longjmp(mp->mod_jmpbuf, err);
899 }
900 
901 void
902 fmd_module_hold(fmd_module_t *mp)
903 {
904 	(void) pthread_mutex_lock(&mp->mod_lock);
905 
906 	TRACE((FMD_DBG_MOD, "hold %p (%s/%u)\n",
907 	    (void *)mp, mp->mod_name, mp->mod_refs));
908 
909 	mp->mod_refs++;
910 	ASSERT(mp->mod_refs != 0);
911 
912 	(void) pthread_mutex_unlock(&mp->mod_lock);
913 }
914 
915 void
916 fmd_module_rele(fmd_module_t *mp)
917 {
918 	(void) pthread_mutex_lock(&mp->mod_lock);
919 
920 	TRACE((FMD_DBG_MOD, "rele %p (%s/%u)\n",
921 	    (void *)mp, mp->mod_name, mp->mod_refs));
922 
923 	ASSERT(mp->mod_refs != 0);
924 
925 	if (--mp->mod_refs == 0)
926 		fmd_module_destroy(mp);
927 	else
928 		(void) pthread_mutex_unlock(&mp->mod_lock);
929 }
930 
931 /*
932  * Wrapper around libdiagcode's fm_dc_opendict() to load module dictionaries.
933  * If the dictionary open is successful, the new dictionary is added to the
934  * mod_dictv[] array and mod_codelen is updated with the new maximum length.
935  */
936 int
937 fmd_module_dc_opendict(fmd_module_t *mp, const char *dict)
938 {
939 	struct fm_dc_handle *dcp, **dcv;
940 	char *dictdir, *dictnam, *p;
941 	size_t len;
942 
943 	ASSERT(fmd_module_locked(mp));
944 
945 	dictnam = alloca(strlen(dict) + 1);
946 	(void) strcpy(dictnam, fmd_strbasename(dict));
947 
948 	if ((p = strrchr(dictnam, '.')) != NULL &&
949 	    strcmp(p, ".dict") == 0)
950 		*p = '\0'; /* eliminate any trailing .dict suffix */
951 
952 	/*
953 	 * If 'dict' is an absolute path, dictdir = $rootdir/`dirname dict`
954 	 * If 'dict' is not an absolute path, dictdir = $dictdir/`dirname dict`
955 	 */
956 	if (dict[0] == '/') {
957 		len = strlen(fmd.d_rootdir) + strlen(dict) + 1;
958 		dictdir = alloca(len);
959 		(void) snprintf(dictdir, len, "%s%s", fmd.d_rootdir, dict);
960 		(void) fmd_strdirname(dictdir);
961 	} else {
962 		(void) fmd_conf_getprop(fmd.d_conf, "dictdir", &p);
963 		len = strlen(fmd.d_rootdir) + strlen(p) + strlen(dict) + 3;
964 		dictdir = alloca(len);
965 		(void) snprintf(dictdir, len,
966 		    "%s/%s/%s", fmd.d_rootdir, p, dict);
967 		(void) fmd_strdirname(dictdir);
968 	}
969 
970 	fmd_dprintf(FMD_DBG_MOD, "module %s opening %s -> %s/%s.dict\n",
971 	    mp->mod_name, dict, dictdir, dictnam);
972 
973 	if ((dcp = fm_dc_opendict(FM_DC_VERSION, dictdir, dictnam)) == NULL)
974 		return (-1); /* errno is set for us */
975 
976 	dcv = fmd_alloc(sizeof (dcp) * (mp->mod_dictc + 1), FMD_SLEEP);
977 	bcopy(mp->mod_dictv, dcv, sizeof (dcp) * mp->mod_dictc);
978 	fmd_free(mp->mod_dictv, sizeof (dcp) * mp->mod_dictc);
979 	mp->mod_dictv = dcv;
980 	mp->mod_dictv[mp->mod_dictc++] = dcp;
981 
982 	len = fm_dc_codelen(dcp);
983 	mp->mod_codelen = MAX(mp->mod_codelen, len);
984 
985 	return (0);
986 }
987 
988 /*
989  * Wrapper around libdiagcode's fm_dc_key2code() that examines all the module's
990  * dictionaries.  We adhere to the libdiagcode return values and semantics.
991  */
992 int
993 fmd_module_dc_key2code(fmd_module_t *mp,
994     char *const keys[], char *code, size_t codelen)
995 {
996 	int i, err;
997 
998 	for (i = 0; i < mp->mod_dictc; i++) {
999 		if ((err = fm_dc_key2code(mp->mod_dictv[i], (const char **)keys,
1000 		    code, codelen)) == 0 || errno != ENOMSG)
1001 			return (err);
1002 	}
1003 
1004 	return (fmd_set_errno(ENOMSG));
1005 }
1006 
1007 fmd_modhash_t *
1008 fmd_modhash_create(void)
1009 {
1010 	fmd_modhash_t *mhp = fmd_alloc(sizeof (fmd_modhash_t), FMD_SLEEP);
1011 
1012 	(void) pthread_rwlock_init(&mhp->mh_lock, NULL);
1013 	mhp->mh_hashlen = fmd.d_str_buckets;
1014 	mhp->mh_hash = fmd_zalloc(sizeof (void *) * mhp->mh_hashlen, FMD_SLEEP);
1015 	mhp->mh_nelems = 0;
1016 
1017 	return (mhp);
1018 }
1019 
1020 void
1021 fmd_modhash_destroy(fmd_modhash_t *mhp)
1022 {
1023 	fmd_module_t *mp, *nmp;
1024 	uint_t i;
1025 
1026 	for (i = 0; i < mhp->mh_hashlen; i++) {
1027 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = nmp) {
1028 			nmp = mp->mod_next;
1029 			mp->mod_next = NULL;
1030 			fmd_module_rele(mp);
1031 		}
1032 	}
1033 
1034 	fmd_free(mhp->mh_hash, sizeof (void *) * mhp->mh_hashlen);
1035 	(void) pthread_rwlock_destroy(&mhp->mh_lock);
1036 	fmd_free(mhp, sizeof (fmd_modhash_t));
1037 }
1038 
1039 static void
1040 fmd_modhash_loaddir(fmd_modhash_t *mhp, const char *dir,
1041     const fmd_modops_t *ops, const char *suffix)
1042 {
1043 	char path[PATH_MAX];
1044 	struct dirent *dp;
1045 	const char *p;
1046 	DIR *dirp;
1047 
1048 	if ((dirp = opendir(dir)) == NULL)
1049 		return; /* failed to open directory; just skip it */
1050 
1051 	while ((dp = readdir(dirp)) != NULL) {
1052 		if (dp->d_name[0] == '.')
1053 			continue; /* skip "." and ".." */
1054 
1055 		p = strrchr(dp->d_name, '.');
1056 
1057 		if (p != NULL && strcmp(p, ".conf") == 0)
1058 			continue; /* skip .conf files */
1059 
1060 		if (suffix != NULL && (p == NULL || strcmp(p, suffix) != 0))
1061 			continue; /* skip files with the wrong suffix */
1062 
1063 		(void) snprintf(path, sizeof (path), "%s/%s", dir, dp->d_name);
1064 		(void) fmd_modhash_load(mhp, path, ops);
1065 	}
1066 
1067 	(void) closedir(dirp);
1068 }
1069 
1070 void
1071 fmd_modhash_loadall(fmd_modhash_t *mhp, const fmd_conf_path_t *pap,
1072     const fmd_modops_t *ops, const char *suffix)
1073 {
1074 	int i;
1075 
1076 	for (i = 0; i < pap->cpa_argc; i++)
1077 		fmd_modhash_loaddir(mhp, pap->cpa_argv[i], ops, suffix);
1078 }
1079 
1080 void
1081 fmd_modhash_apply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1082 {
1083 	fmd_module_t *mp, *np;
1084 	uint_t i;
1085 
1086 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1087 
1088 	for (i = 0; i < mhp->mh_hashlen; i++) {
1089 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1090 			np = mp->mod_next;
1091 			func(mp);
1092 		}
1093 	}
1094 
1095 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1096 }
1097 
1098 void
1099 fmd_modhash_tryapply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1100 {
1101 	fmd_module_t *mp, *np;
1102 	uint_t i;
1103 
1104 	if (mhp == NULL || pthread_rwlock_tryrdlock(&mhp->mh_lock) != 0)
1105 		return; /* not initialized or couldn't grab lock */
1106 
1107 	for (i = 0; i < mhp->mh_hashlen; i++) {
1108 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1109 			np = mp->mod_next;
1110 			func(mp);
1111 		}
1112 	}
1113 
1114 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1115 }
1116 
1117 void
1118 fmd_modhash_dispatch(fmd_modhash_t *mhp, fmd_event_t *ep)
1119 {
1120 	fmd_module_t *mp;
1121 	uint_t i;
1122 
1123 	fmd_event_hold(ep);
1124 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1125 
1126 	for (i = 0; i < mhp->mh_hashlen; i++) {
1127 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next) {
1128 			/*
1129 			 * If FMD_MOD_INIT is set but MOD_FINI, MOD_QUIT, and
1130 			 * mod_error are all zero, then the module is active:
1131 			 * enqueue the event in the corresponding event queue.
1132 			 */
1133 			(void) pthread_mutex_lock(&mp->mod_lock);
1134 
1135 			if ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI |
1136 			    FMD_MOD_QUIT)) == FMD_MOD_INIT && !mp->mod_error)
1137 				fmd_eventq_insert_at_time(mp->mod_queue, ep);
1138 
1139 			(void) pthread_mutex_unlock(&mp->mod_lock);
1140 		}
1141 	}
1142 
1143 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1144 	fmd_event_rele(ep);
1145 }
1146 
1147 fmd_module_t *
1148 fmd_modhash_lookup(fmd_modhash_t *mhp, const char *name)
1149 {
1150 	fmd_module_t *mp;
1151 	uint_t h;
1152 
1153 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1154 	h = fmd_strhash(name) % mhp->mh_hashlen;
1155 
1156 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1157 		if (strcmp(name, mp->mod_name) == 0)
1158 			break;
1159 	}
1160 
1161 	if (mp != NULL)
1162 		fmd_module_hold(mp);
1163 	else
1164 		(void) fmd_set_errno(EFMD_MOD_NOMOD);
1165 
1166 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1167 	return (mp);
1168 }
1169 
1170 fmd_module_t *
1171 fmd_modhash_load(fmd_modhash_t *mhp, const char *path, const fmd_modops_t *ops)
1172 {
1173 	char name[PATH_MAX], *p;
1174 	fmd_module_t *mp;
1175 	int tries = 0;
1176 	uint_t h;
1177 
1178 	(void) strlcpy(name, fmd_strbasename(path), sizeof (name));
1179 	if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".so") == 0)
1180 		*p = '\0'; /* strip trailing .so from any module name */
1181 
1182 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1183 	h = fmd_strhash(name) % mhp->mh_hashlen;
1184 
1185 	/*
1186 	 * First check to see if a module is already present in the hash table
1187 	 * for this name.  If so, the module is already loaded: skip it.
1188 	 */
1189 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1190 		if (strcmp(name, mp->mod_name) == 0)
1191 			break;
1192 	}
1193 
1194 	if (mp != NULL) {
1195 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1196 		(void) fmd_set_errno(EFMD_MOD_LOADED);
1197 		return (NULL);
1198 	}
1199 
1200 	/*
1201 	 * fmd_module_create() will return a held (as if by fmd_module_hold())
1202 	 * module.  We leave this hold in place to correspond to the hash-in.
1203 	 */
1204 	while ((mp = fmd_module_create(path, ops)) == NULL) {
1205 		if (tries++ != 0 || errno != EFMD_CKPT_INVAL) {
1206 			(void) pthread_rwlock_unlock(&mhp->mh_lock);
1207 			return (NULL); /* errno is set for us */
1208 		}
1209 	}
1210 
1211 	mp->mod_hash = mhp;
1212 	mp->mod_next = mhp->mh_hash[h];
1213 
1214 	mhp->mh_hash[h] = mp;
1215 	mhp->mh_nelems++;
1216 
1217 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1218 	return (mp);
1219 }
1220 
1221 int
1222 fmd_modhash_unload(fmd_modhash_t *mhp, const char *name)
1223 {
1224 	fmd_module_t *mp, **pp;
1225 	uint_t h;
1226 
1227 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1228 	h = fmd_strhash(name) % mhp->mh_hashlen;
1229 	pp = &mhp->mh_hash[h];
1230 
1231 	for (mp = *pp; mp != NULL; mp = mp->mod_next) {
1232 		if (strcmp(name, mp->mod_name) == 0)
1233 			break;
1234 		else
1235 			pp = &mp->mod_next;
1236 	}
1237 
1238 	if (mp == NULL) {
1239 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1240 		return (fmd_set_errno(EFMD_MOD_NOMOD));
1241 	}
1242 
1243 	*pp = mp->mod_next;
1244 	mp->mod_next = NULL;
1245 
1246 	ASSERT(mhp->mh_nelems != 0);
1247 	mhp->mh_nelems--;
1248 
1249 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1250 
1251 	fmd_module_unload(mp);
1252 	fmd_module_rele(mp);
1253 
1254 	return (0);
1255 }
1256 
1257 void
1258 fmd_modstat_publish(fmd_module_t *mp)
1259 {
1260 	(void) pthread_mutex_lock(&mp->mod_lock);
1261 
1262 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1263 	mp->mod_flags |= FMD_MOD_STPUB;
1264 	(void) pthread_cond_broadcast(&mp->mod_cv);
1265 
1266 	while (mp->mod_flags & FMD_MOD_STPUB)
1267 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1268 
1269 	(void) pthread_mutex_unlock(&mp->mod_lock);
1270 }
1271 
1272 int
1273 fmd_modstat_snapshot(fmd_module_t *mp, fmd_ustat_snap_t *uss)
1274 {
1275 	fmd_event_t *e;
1276 	int err;
1277 
1278 	/*
1279 	 * Grab the module lock and wait for the STSUB bit to be clear.  Then
1280 	 * set it to indicate we are a subscriber and everyone else must wait.
1281 	 */
1282 	(void) pthread_mutex_lock(&mp->mod_lock);
1283 
1284 	while (mp->mod_error == 0 && (mp->mod_flags & FMD_MOD_STSUB))
1285 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1286 
1287 	if (mp->mod_error != 0) {
1288 		(void) pthread_mutex_unlock(&mp->mod_lock);
1289 		return (fmd_set_errno(EFMD_HDL_ABORT));
1290 	}
1291 
1292 	mp->mod_flags |= FMD_MOD_STSUB;
1293 	(void) pthread_cond_broadcast(&mp->mod_cv);
1294 	(void) pthread_mutex_unlock(&mp->mod_lock);
1295 
1296 	/*
1297 	 * Create a stats pseudo-event and dispatch it to the module, forcing
1298 	 * it to next execute its custom snapshot routine (or the empty one).
1299 	 */
1300 	e = fmd_event_create(FMD_EVT_STATS, FMD_HRT_NOW, NULL, NULL);
1301 	fmd_eventq_insert_at_head(mp->mod_queue, e);
1302 
1303 	/*
1304 	 * Grab the module lock and then wait on mod_cv for STPUB to be set,
1305 	 * indicating the snapshot routine is completed and the module is idle.
1306 	 */
1307 	(void) pthread_mutex_lock(&mp->mod_lock);
1308 
1309 	while (mp->mod_error == 0 && !(mp->mod_flags & FMD_MOD_STPUB))
1310 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1311 
1312 	if (mp->mod_error != 0) {
1313 		(void) pthread_mutex_unlock(&mp->mod_lock);
1314 		return (fmd_set_errno(EFMD_HDL_ABORT));
1315 	}
1316 
1317 	(void) pthread_cond_broadcast(&mp->mod_cv);
1318 	(void) pthread_mutex_unlock(&mp->mod_lock);
1319 
1320 	/*
1321 	 * Update ms_snaptime and take the actual snapshot of the various
1322 	 * statistics while the module is quiescent and waiting for us.
1323 	 */
1324 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1325 
1326 	if (mp->mod_stats != NULL) {
1327 		mp->mod_stats->ms_snaptime.fmds_value.ui64 = gethrtime();
1328 		err = fmd_ustat_snapshot(mp->mod_ustat, uss);
1329 	} else
1330 		err = fmd_set_errno(EFMD_HDL_ABORT);
1331 
1332 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1333 
1334 	/*
1335 	 * With the snapshot complete, grab the module lock and clear both
1336 	 * STSUB and STPUB, permitting everyone to wake up and continue.
1337 	 */
1338 	(void) pthread_mutex_lock(&mp->mod_lock);
1339 
1340 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1341 	ASSERT(mp->mod_flags & FMD_MOD_STPUB);
1342 	mp->mod_flags &= ~(FMD_MOD_STSUB | FMD_MOD_STPUB);
1343 
1344 	(void) pthread_cond_broadcast(&mp->mod_cv);
1345 	(void) pthread_mutex_unlock(&mp->mod_lock);
1346 
1347 	return (err);
1348 }
1349 
1350 struct topo_hdl *
1351 fmd_module_topo_hold(fmd_module_t *mp)
1352 {
1353 	fmd_modtopo_t *mtp;
1354 
1355 	ASSERT(fmd_module_locked(mp));
1356 
1357 	mtp = fmd_zalloc(sizeof (fmd_modtopo_t), FMD_SLEEP);
1358 	mtp->mt_topo = mp->mod_topo_current;
1359 	fmd_topo_addref(mtp->mt_topo);
1360 	fmd_list_prepend(&mp->mod_topolist, mtp);
1361 
1362 	return (mtp->mt_topo->ft_hdl);
1363 }
1364 
1365 int
1366 fmd_module_topo_rele(fmd_module_t *mp, struct topo_hdl *hdl)
1367 {
1368 	fmd_modtopo_t *mtp;
1369 
1370 	ASSERT(fmd_module_locked(mp));
1371 
1372 	for (mtp = fmd_list_next(&mp->mod_topolist); mtp != NULL;
1373 	    mtp = fmd_list_next(mtp)) {
1374 		if (mtp->mt_topo->ft_hdl == hdl)
1375 			break;
1376 	}
1377 
1378 	if (mtp == NULL)
1379 		return (-1);
1380 
1381 	fmd_list_delete(&mp->mod_topolist, mtp);
1382 	fmd_topo_rele(mtp->mt_topo);
1383 	fmd_free(mtp, sizeof (fmd_modtopo_t));
1384 	return (0);
1385 }
1386