xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_module.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <signal.h>
30 #include <dirent.h>
31 #include <limits.h>
32 #include <alloca.h>
33 #include <unistd.h>
34 #include <stdio.h>
35 
36 #include <fmd_string.h>
37 #include <fmd_alloc.h>
38 #include <fmd_module.h>
39 #include <fmd_error.h>
40 #include <fmd_conf.h>
41 #include <fmd_dispq.h>
42 #include <fmd_eventq.h>
43 #include <fmd_timerq.h>
44 #include <fmd_subr.h>
45 #include <fmd_thread.h>
46 #include <fmd_ustat.h>
47 #include <fmd_case.h>
48 #include <fmd_protocol.h>
49 #include <fmd_buf.h>
50 #include <fmd_ckpt.h>
51 
52 #include <fmd.h>
53 
54 /*
55  * Template for per-module statistics installed by fmd on behalf of each active
56  * module.  These are used to initialize the per-module mp->mod_stats below.
57  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
58  * required in the future, the FMD_ADM_MODDSTAT service routine must change.
59  */
60 static const fmd_modstat_t _fmd_modstat_tmpl = {
61 { "fmd.loadtime", FMD_TYPE_TIME, "hrtime at which module was loaded" },
62 { "fmd.snaptime", FMD_TYPE_TIME, "hrtime of last statistics snapshot" },
63 { "fmd.dispatched", FMD_TYPE_UINT64, "total events dispatched to module" },
64 { "fmd.dequeued", FMD_TYPE_UINT64, "total events dequeued by module" },
65 { "fmd.prdequeued", FMD_TYPE_UINT64, "protocol events dequeued by module" },
66 { "fmd.accepted", FMD_TYPE_UINT64, "total events accepted by module" },
67 { "fmd.dropped", FMD_TYPE_UINT64, "total events dropped on queue overflow" },
68 { "fmd.wcnt", FMD_TYPE_UINT32, "count of events waiting on queue" },
69 { "fmd.wtime", FMD_TYPE_TIME, "total wait time on queue" },
70 { "fmd.wlentime", FMD_TYPE_TIME, "total wait length * time product" },
71 { "fmd.wlastupdate", FMD_TYPE_TIME, "hrtime of last wait queue update" },
72 { "fmd.dtime", FMD_TYPE_TIME, "total processing time after dequeue" },
73 { "fmd.dlastupdate", FMD_TYPE_TIME, "hrtime of last event dequeue completion" },
74 { "fmd.debugdrop", FMD_TYPE_UINT64, "dropped debug messages" },
75 { "fmd.memtotal", FMD_TYPE_SIZE, "total memory allocated by module" },
76 { "fmd.memlimit", FMD_TYPE_SIZE, "limit on total memory allocated" },
77 { "fmd.buftotal", FMD_TYPE_SIZE, "total buffer space used by module" },
78 { "fmd.buflimit", FMD_TYPE_SIZE, "limit on total buffer space" },
79 { "fmd.thrtotal", FMD_TYPE_UINT32, "total number of auxiliary threads" },
80 { "fmd.thrlimit", FMD_TYPE_UINT32, "limit on number of auxiliary threads" },
81 { "fmd.caseopen", FMD_TYPE_UINT64, "cases currently open by module" },
82 { "fmd.casesolved", FMD_TYPE_UINT64, "total cases solved by module" },
83 { "fmd.caseclosed", FMD_TYPE_UINT64, "total cases closed by module" },
84 { "fmd.ckptsave", FMD_TYPE_BOOL, "save checkpoints for module" },
85 { "fmd.ckptrestore", FMD_TYPE_BOOL, "restore checkpoints for module" },
86 { "fmd.ckptzero", FMD_TYPE_BOOL, "zeroed checkpoint at startup" },
87 { "fmd.ckptcnt", FMD_TYPE_UINT64, "number of checkpoints taken" },
88 { "fmd.ckpttime", FMD_TYPE_TIME, "total checkpoint time" },
89 };
90 
91 static void
92 fmd_module_start(void *arg)
93 {
94 	fmd_module_t *mp = arg;
95 	fmd_event_t *ep;
96 
97 	(void) pthread_mutex_lock(&mp->mod_lock);
98 
99 	if (mp->mod_ops->mop_init(mp) != 0 || mp->mod_error != 0) {
100 		if (mp->mod_error == 0)
101 			mp->mod_error = errno ? errno : EFMD_MOD_INIT;
102 		goto out;
103 	}
104 
105 	ASSERT(MUTEX_HELD(&mp->mod_lock));
106 	mp->mod_flags |= FMD_MOD_INIT;
107 
108 	(void) pthread_mutex_unlock(&mp->mod_lock);
109 	(void) pthread_cond_broadcast(&mp->mod_cv);
110 
111 	/*
112 	 * Wait for events to arrive by checking mod_error and then sleeping in
113 	 * fmd_eventq_delete().  If a NULL event is returned, the eventq has
114 	 * been aborted and we continue on to call fini and exit the thread.
115 	 */
116 	while ((ep = fmd_eventq_delete(mp->mod_queue)) != NULL) {
117 		fmd_event_impl_t *eip = (fmd_event_impl_t *)ep;
118 
119 		/*
120 		 * If the event is a control event or the module has failed,
121 		 * discard the event without ever passing it to the module.
122 		 * If it's a control event, fmd_event_rele() will block in
123 		 * fmd_ctl_rele() until all modules have reached a barrier.
124 		 */
125 		if (eip->ev_type == FMD_EVT_CTL || mp->mod_error != 0) {
126 			if (eip->ev_type != FMD_EVT_CTL) {
127 				fmd_modstat_eventq_dequeue(mp, eip->ev_type);
128 				fmd_modstat_eventq_done(mp);
129 			}
130 			fmd_event_rele(ep);
131 			continue;
132 		}
133 
134 		fmd_modstat_eventq_dequeue(mp, eip->ev_type);
135 		mp->mod_ops->mop_dispatch(mp, ep);
136 		fmd_modstat_eventq_done(mp);
137 
138 		/*
139 		 * Once mop_dispatch() is complete, grab the lock and perform
140 		 * any event-specific post-processing.  Finally, if necessary,
141 		 * checkpoint the state of the module after this event.
142 		 */
143 		fmd_module_lock(mp);
144 
145 		if (eip->ev_type == FMD_EVT_CLOSE) {
146 			fmd_list_delete(&mp->mod_cases, eip->ev_data);
147 			fmd_case_rele(eip->ev_data);
148 		}
149 
150 		fmd_ckpt_save(mp);
151 		fmd_module_unlock(mp);
152 		fmd_event_rele(ep);
153 	}
154 
155 	if (mp->mod_ops->mop_fini(mp) != 0 && mp->mod_error == 0)
156 		mp->mod_error = errno ? errno : EFMD_MOD_FINI;
157 
158 	(void) pthread_mutex_lock(&mp->mod_lock);
159 	mp->mod_flags |= FMD_MOD_FINI;
160 
161 out:
162 	(void) pthread_mutex_unlock(&mp->mod_lock);
163 	(void) pthread_cond_broadcast(&mp->mod_cv);
164 }
165 
166 fmd_module_t *
167 fmd_module_create(const char *path, const fmd_modops_t *ops)
168 {
169 	fmd_module_t *mp = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
170 
171 	char buf[PATH_MAX], *p;
172 	const char *dir;
173 	uint32_t limit;
174 	int err;
175 
176 	(void) strlcpy(buf, fmd_strbasename(path), sizeof (buf));
177 	if ((p = strrchr(buf, '.')) != NULL && strcmp(p, ".so") == 0)
178 		*p = '\0'; /* strip trailing .so from any module name */
179 
180 	(void) pthread_mutex_init(&mp->mod_lock, NULL);
181 	(void) pthread_cond_init(&mp->mod_cv, NULL);
182 	(void) pthread_mutex_init(&mp->mod_stats_lock, NULL);
183 
184 	mp->mod_name = fmd_strdup(buf, FMD_SLEEP);
185 	mp->mod_path = fmd_strdup(path, FMD_SLEEP);
186 	mp->mod_ops = ops;
187 	mp->mod_ustat = fmd_ustat_create();
188 
189 	(void) fmd_conf_getprop(fmd.d_conf, "client.evqlim", &limit);
190 	mp->mod_queue = fmd_eventq_create(mp, limit);
191 
192 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dir", &dir);
193 	(void) snprintf(buf, sizeof (buf),
194 	    "%s/%s/%s", fmd.d_rootdir, dir, mp->mod_name);
195 
196 	mp->mod_ckpt = fmd_strdup(buf, FMD_SLEEP);
197 
198 	(void) fmd_conf_getprop(fmd.d_conf, "client.tmrlim", &limit);
199 	mp->mod_timerids = fmd_idspace_create(mp->mod_name, 1, limit + 1);
200 	mp->mod_threads = fmd_idspace_create(mp->mod_name, 0, INT_MAX);
201 
202 	fmd_buf_hash_create(&mp->mod_bufs);
203 	fmd_serd_hash_create(&mp->mod_serds);
204 
205 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
206 	fmd_list_append(&fmd.d_mod_list, mp);
207 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
208 
209 	/*
210 	 * Initialize the module statistics that are kept on its behalf by fmd.
211 	 * These are set up using a template defined at the top of this file.
212 	 */
213 	if ((mp->mod_stats = (fmd_modstat_t *)fmd_ustat_insert(mp->mod_ustat,
214 	    FMD_USTAT_ALLOC, sizeof (_fmd_modstat_tmpl) / sizeof (fmd_stat_t),
215 	    (fmd_stat_t *)&_fmd_modstat_tmpl, NULL)) == NULL) {
216 		fmd_error(EFMD_MOD_INIT, "failed to initialize per-mod stats");
217 		fmd_module_destroy(mp);
218 		return (NULL);
219 	}
220 
221 	(void) fmd_conf_getprop(fmd.d_conf, "client.memlim",
222 	    &mp->mod_stats->ms_memlimit.fmds_value.ui64);
223 
224 	(void) fmd_conf_getprop(fmd.d_conf, "client.buflim",
225 	    &mp->mod_stats->ms_buflimit.fmds_value.ui64);
226 
227 	(void) fmd_conf_getprop(fmd.d_conf, "client.thrlim",
228 	    &mp->mod_stats->ms_thrlimit.fmds_value.ui32);
229 
230 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.save",
231 	    &mp->mod_stats->ms_ckpt_save.fmds_value.bool);
232 
233 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.restore",
234 	    &mp->mod_stats->ms_ckpt_restore.fmds_value.bool);
235 
236 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.zero",
237 	    &mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool);
238 
239 	if (mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool)
240 		fmd_ckpt_delete(mp); /* blow away any pre-existing checkpoint */
241 
242 	/*
243 	 * Place a hold on the module and grab the module lock before creating
244 	 * the module's thread to ensure that it cannot destroy the module and
245 	 * that it cannot call ops->mop_init() before we're done setting up.
246 	 * NOTE: from now on, we must use fmd_module_rele() for error paths.
247 	 */
248 	fmd_module_hold(mp);
249 	(void) pthread_mutex_lock(&mp->mod_lock);
250 	mp->mod_stats->ms_loadtime.fmds_value.ui64 = gethrtime();
251 	mp->mod_thread = fmd_thread_create(mp, fmd_module_start, mp);
252 
253 	if (mp->mod_thread == NULL) {
254 		fmd_error(EFMD_MOD_THR, "failed to create thread for %s", path);
255 		(void) pthread_mutex_unlock(&mp->mod_lock);
256 		fmd_module_rele(mp);
257 		return (NULL);
258 	}
259 
260 	/*
261 	 * At this point our module structure is nearly finished and its thread
262 	 * is starting execution in fmd_module_start() above, which will begin
263 	 * by blocking for mod_lock.  We now drop mod_lock and wait for either
264 	 * FMD_MOD_INIT or mod_error to be set before proceeding.
265 	 */
266 	while (!(mp->mod_flags & FMD_MOD_INIT) && mp->mod_error == 0)
267 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
268 
269 	/*
270 	 * If the module has failed to initialize, copy its errno to the errno
271 	 * of the caller, wait for it to unload, and then destroy it.
272 	 */
273 	if (!(mp->mod_flags & FMD_MOD_INIT)) {
274 		err = mp->mod_error;
275 		(void) pthread_mutex_unlock(&mp->mod_lock);
276 
277 		if (err == EFMD_CKPT_INVAL)
278 			fmd_ckpt_rename(mp); /* move aside bad checkpoint */
279 
280 		/*
281 		 * If we're in the background, keep quiet about failure to
282 		 * load because a handle wasn't registered: this is a module's
283 		 * way of telling us it didn't want to be loaded for some
284 		 * reason related to system configuration.  If we're in the
285 		 * foreground we log this too in order to inform developers.
286 		 */
287 		if (fmd.d_fg || err != EFMD_HDL_INIT) {
288 			fmd_error(EFMD_MOD_INIT, "failed to load %s: %s\n",
289 			    path, fmd_strerror(err));
290 		}
291 
292 		fmd_module_unload(mp);
293 		fmd_module_rele(mp);
294 
295 		(void) fmd_set_errno(err);
296 		return (NULL);
297 	}
298 
299 	(void) pthread_mutex_unlock(&mp->mod_lock);
300 	(void) pthread_cond_broadcast(&mp->mod_cv);
301 
302 	fmd_dprintf(FMD_DBG_MOD, "loaded module %s\n", mp->mod_name);
303 	return (mp);
304 }
305 
306 static void
307 fmd_module_untimeout(fmd_module_t *mp, id_t id)
308 {
309 	void *arg = fmd_timerq_remove(fmd.d_timers, mp->mod_timerids, id);
310 
311 	/*
312 	 * The root module calls fmd_timerq_install() directly and must take
313 	 * responsibility for any cleanup of timer arguments that is required.
314 	 * All other modules use fmd_modtimer_t's as the arg data; free them.
315 	 */
316 	if (arg != NULL && mp != fmd.d_rmod)
317 		fmd_free(arg, sizeof (fmd_modtimer_t));
318 }
319 
320 /*
321  * If an auxiliary thread exists for the specified module once _fmd_fini has
322  * completed, send it an asynchronous cancellation to force it to exit and then
323  * join with it (we expect this to either succeed quickly or return ESRCH).
324  * Once this is complete we can destroy the associated fmd_thread_t data.
325  */
326 static void
327 fmd_module_thrcancel(fmd_module_t *mp, id_t id)
328 {
329 	fmd_thread_t *tp = fmd_idspace_getspecific(mp->mod_threads, id);
330 
331 	fmd_dprintf(FMD_DBG_MOD, "cancelling %s auxiliary thread %u\n",
332 	    mp->mod_name, tp->thr_tid);
333 
334 	ASSERT(tp->thr_tid == id);
335 	(void) pthread_cancel(tp->thr_tid);
336 	(void) pthread_join(tp->thr_tid, NULL);
337 
338 	fmd_thread_destroy(tp, FMD_THREAD_NOJOIN);
339 }
340 
341 void
342 fmd_module_unload(fmd_module_t *mp)
343 {
344 	fmd_case_t *cp;
345 
346 	(void) pthread_mutex_lock(&mp->mod_lock);
347 
348 	if (mp->mod_flags & FMD_MOD_QUIT) {
349 		(void) pthread_mutex_unlock(&mp->mod_lock);
350 		return; /* module is already unloading */
351 	}
352 
353 	ASSERT(mp->mod_thread != NULL);
354 	mp->mod_flags |= FMD_MOD_QUIT;
355 
356 	if (mp->mod_queue != NULL)
357 		fmd_eventq_abort(mp->mod_queue);
358 
359 	/*
360 	 * Wait for the module's thread to stop processing events and call
361 	 * _fmd_fini() and exit.  We do this by waiting for FMD_MOD_FINI to be
362 	 * set if INIT was set, and then attempting to join with the thread.
363 	 */
364 	while ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI)) == FMD_MOD_INIT)
365 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
366 
367 	(void) pthread_mutex_unlock(&mp->mod_lock);
368 	(void) pthread_cond_broadcast(&mp->mod_cv);
369 
370 	fmd_thread_destroy(mp->mod_thread, FMD_THREAD_JOIN);
371 	mp->mod_thread = NULL;
372 
373 	/*
374 	 * Once the module is no longer active, clean up any data structures
375 	 * that are only required when the module is loaded, such as cases.
376 	 */
377 	fmd_module_lock(mp);
378 
379 	while ((cp = fmd_list_next(&mp->mod_cases)) != NULL) {
380 		fmd_list_delete(&mp->mod_cases, cp);
381 		fmd_case_rele(cp);
382 	}
383 
384 	if (mp->mod_timerids != NULL) {
385 		fmd_idspace_apply(mp->mod_timerids,
386 		    (void (*)())fmd_module_untimeout, mp);
387 
388 		fmd_idspace_destroy(mp->mod_timerids);
389 		mp->mod_timerids = NULL;
390 	}
391 
392 	if (mp->mod_threads != NULL) {
393 		fmd_idspace_apply(mp->mod_threads,
394 		    (void (*)())fmd_module_thrcancel, mp);
395 
396 		fmd_idspace_destroy(mp->mod_threads);
397 		mp->mod_threads = NULL;
398 	}
399 
400 	fmd_buf_hash_destroy(&mp->mod_bufs);
401 	fmd_serd_hash_destroy(&mp->mod_serds);
402 
403 	fmd_module_unlock(mp);
404 	fmd_dprintf(FMD_DBG_MOD, "unloaded module %s\n", mp->mod_name);
405 }
406 
407 void
408 fmd_module_destroy(fmd_module_t *mp)
409 {
410 	fmd_conf_formal_t *cfp = mp->mod_argv;
411 	int i;
412 
413 	ASSERT(MUTEX_HELD(&mp->mod_lock));
414 
415 	if (mp->mod_thread != NULL) {
416 		(void) pthread_mutex_unlock(&mp->mod_lock);
417 		fmd_module_unload(mp);
418 		(void) pthread_mutex_lock(&mp->mod_lock);
419 	}
420 
421 	ASSERT(mp->mod_thread == NULL);
422 	ASSERT(mp->mod_refs == 0);
423 
424 	/*
425 	 * Once the module's thread is dead, we can safely remove the module
426 	 * from global visibility and by removing it from d_mod_list.  Any
427 	 * modhash pointers are already gone by virtue of mod_refs being zero.
428 	 */
429 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
430 	fmd_list_delete(&fmd.d_mod_list, mp);
431 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
432 
433 	/*
434 	 * Once the module is no longer processing events and no longer visible
435 	 * through any program data structures, we can free all of its content.
436 	 */
437 	if (mp->mod_queue != NULL) {
438 		fmd_eventq_destroy(mp->mod_queue);
439 		mp->mod_queue = NULL;
440 	}
441 
442 	for (i = 0; i < mp->mod_dictc; i++)
443 		fm_dc_closedict(mp->mod_dictv[i]);
444 
445 	fmd_free(mp->mod_dictv, sizeof (struct fm_dc_handle *) * mp->mod_dictc);
446 
447 	if (mp->mod_conf != NULL)
448 		fmd_conf_close(mp->mod_conf);
449 
450 	for (i = 0; i < mp->mod_argc; i++, cfp++) {
451 		fmd_strfree((char *)cfp->cf_name);
452 		fmd_strfree((char *)cfp->cf_default);
453 	}
454 
455 	fmd_free(mp->mod_argv, sizeof (fmd_conf_formal_t) * mp->mod_argc);
456 
457 	if (mp->mod_ustat != NULL)
458 		fmd_ustat_destroy(mp->mod_ustat);
459 
460 	fmd_strfree(mp->mod_name);
461 	fmd_strfree(mp->mod_path);
462 	fmd_strfree(mp->mod_ckpt);
463 	nvlist_free(mp->mod_fmri);
464 
465 	fmd_free(mp, sizeof (fmd_module_t));
466 }
467 
468 /*
469  * fmd_module_error() is called after the stack is unwound from a call to
470  * fmd_module_abort() to indicate that the module has failed.  The mod_error
471  * field is used to hold the error code of the first fatal error to the module.
472  * An EFMD_MOD_FAIL event is then created and sent to fmd-self-diagnosis.
473  */
474 static void
475 fmd_module_error(fmd_module_t *mp, int err)
476 {
477 	fmd_event_t *e;
478 	nvlist_t *nvl;
479 	char *class;
480 
481 	ASSERT(MUTEX_HELD(&mp->mod_lock));
482 	ASSERT(err != 0);
483 
484 	TRACE((FMD_DBG_MOD, "module aborted: err=%d", err));
485 
486 	if (mp->mod_error == 0)
487 		mp->mod_error = err;
488 
489 	if (mp == fmd.d_self)
490 		return; /* do not post event if fmd.d_self itself fails */
491 
492 	/*
493 	 * Send an error indicating the module has now failed to fmd.d_self.
494 	 * Since the error causing the failure has already been logged by
495 	 * fmd_api_xerror(), we do not need to bother logging this event.
496 	 * It only exists for the purpose of notifying fmd.d_self that it can
497 	 * close the case associated with this module because mod_error is set.
498 	 */
499 	nvl = fmd_protocol_moderror(mp, EFMD_MOD_FAIL, fmd_strerror(err));
500 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
501 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
502 	fmd_dispq_dispatch(fmd.d_disp, e, class);
503 }
504 
505 void
506 fmd_module_dispatch(fmd_module_t *mp, fmd_event_t *e)
507 {
508 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
509 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
510 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
511 	fmd_modtimer_t *t;
512 	volatile int err;
513 
514 	/*
515 	 * Before calling the appropriate module callback, enter the module as
516 	 * if by fmd_module_enter() and establish mod_jmpbuf for any aborts.
517 	 */
518 	(void) pthread_mutex_lock(&mp->mod_lock);
519 
520 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
521 	mp->mod_flags |= FMD_MOD_BUSY;
522 
523 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
524 		(void) pthread_mutex_lock(&mp->mod_lock);
525 		fmd_module_error(mp, err);
526 	}
527 
528 	(void) pthread_mutex_unlock(&mp->mod_lock);
529 	(void) pthread_cond_broadcast(&mp->mod_cv);
530 
531 	/*
532 	 * If it's the first time through fmd_module_dispatch(), call the
533 	 * appropriate module callback based on the event type.  If the call
534 	 * triggers an fmd_module_abort(), we'll return to setjmp() above with
535 	 * err set to a non-zero value and then bypass this before exiting.
536 	 */
537 	if (err == 0) {
538 		switch (ep->ev_type) {
539 		case FMD_EVT_PROTOCOL:
540 			ops->fmdo_recv(hdl, e, ep->ev_nvl, ep->ev_data);
541 			break;
542 		case FMD_EVT_TIMEOUT:
543 			t = ep->ev_data;
544 			ASSERT(t->mt_mod == mp);
545 			ops->fmdo_timeout(hdl, t->mt_id, t->mt_arg);
546 			fmd_free(t, sizeof (fmd_modtimer_t));
547 			break;
548 		case FMD_EVT_CLOSE:
549 			ops->fmdo_close(hdl, ep->ev_data);
550 			break;
551 		case FMD_EVT_STATS:
552 			ops->fmdo_stats(hdl);
553 			fmd_modstat_publish(mp);
554 			break;
555 		case FMD_EVT_GC:
556 			ops->fmdo_gc(hdl);
557 			break;
558 		}
559 	}
560 
561 	fmd_module_exit(mp);
562 }
563 
564 void
565 fmd_module_timeout(fmd_modtimer_t *t, id_t id, hrtime_t hrt)
566 {
567 	fmd_event_t *e;
568 
569 	t->mt_id = id; /* save id in case we need to delete from eventq */
570 	e = fmd_event_create(FMD_EVT_TIMEOUT, hrt, NULL, t);
571 	fmd_eventq_insert_at_time(t->mt_mod->mod_queue, e);
572 }
573 
574 /*
575  * Garbage collection is initiated by a timer callback once per day or at the
576  * request of fmadm.  Purge old SERD entries and send the module a GC event.
577  */
578 void
579 fmd_module_gc(fmd_module_t *mp)
580 {
581 	fmd_hdl_info_t *info;
582 	fmd_event_t *e;
583 
584 	if (mp->mod_error != 0)
585 		return; /* do not do anything if the module has failed */
586 
587 	fmd_module_lock(mp);
588 
589 	if ((info = mp->mod_info) != NULL) {
590 		fmd_serd_hash_apply(&mp->mod_serds,
591 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
592 	}
593 
594 	fmd_module_unlock(mp);
595 
596 	if (info != NULL) {
597 		e = fmd_event_create(FMD_EVT_GC, FMD_HRT_NOW, NULL, NULL);
598 		fmd_eventq_insert_at_head(mp->mod_queue, e);
599 	}
600 }
601 
602 void
603 fmd_module_trygc(fmd_module_t *mp)
604 {
605 	if (fmd_module_trylock(mp)) {
606 		fmd_serd_hash_apply(&mp->mod_serds,
607 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
608 		fmd_module_unlock(mp);
609 	}
610 }
611 
612 int
613 fmd_module_contains(fmd_module_t *mp, fmd_event_t *ep)
614 {
615 	fmd_case_t *cp;
616 	int rv = 0;
617 
618 	fmd_module_lock(mp);
619 
620 	for (cp = fmd_list_next(&mp->mod_cases);
621 	    cp != NULL; cp = fmd_list_next(cp)) {
622 		if ((rv = fmd_case_contains(cp, ep)) != 0)
623 			break;
624 	}
625 
626 	if (rv == 0)
627 		rv = fmd_serd_hash_contains(&mp->mod_serds, ep);
628 
629 	fmd_module_unlock(mp);
630 	return (rv);
631 }
632 
633 void
634 fmd_module_setdirty(fmd_module_t *mp)
635 {
636 	(void) pthread_mutex_lock(&mp->mod_lock);
637 	mp->mod_flags |= FMD_MOD_MDIRTY;
638 	(void) pthread_mutex_unlock(&mp->mod_lock);
639 }
640 
641 void
642 fmd_module_setcdirty(fmd_module_t *mp)
643 {
644 	(void) pthread_mutex_lock(&mp->mod_lock);
645 	mp->mod_flags |= FMD_MOD_CDIRTY;
646 	(void) pthread_mutex_unlock(&mp->mod_lock);
647 }
648 
649 void
650 fmd_module_clrdirty(fmd_module_t *mp)
651 {
652 	fmd_case_t *cp;
653 
654 	fmd_module_lock(mp);
655 
656 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
657 		for (cp = fmd_list_next(&mp->mod_cases);
658 		    cp != NULL; cp = fmd_list_next(cp))
659 			fmd_case_clrdirty(cp);
660 	}
661 
662 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
663 		fmd_serd_hash_apply(&mp->mod_serds,
664 		    (fmd_serd_eng_f *)fmd_serd_eng_clrdirty, NULL);
665 		fmd_buf_hash_commit(&mp->mod_bufs);
666 	}
667 
668 	(void) pthread_mutex_lock(&mp->mod_lock);
669 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
670 	(void) pthread_mutex_unlock(&mp->mod_lock);
671 
672 	fmd_module_unlock(mp);
673 }
674 
675 void
676 fmd_module_commit(fmd_module_t *mp)
677 {
678 	fmd_case_t *cp;
679 
680 	ASSERT(fmd_module_locked(mp));
681 
682 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
683 		for (cp = fmd_list_next(&mp->mod_cases);
684 		    cp != NULL; cp = fmd_list_next(cp))
685 			fmd_case_commit(cp);
686 	}
687 
688 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
689 		fmd_serd_hash_apply(&mp->mod_serds,
690 		    (fmd_serd_eng_f *)fmd_serd_eng_commit, NULL);
691 		fmd_buf_hash_commit(&mp->mod_bufs);
692 	}
693 
694 	(void) pthread_mutex_lock(&mp->mod_lock);
695 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
696 	(void) pthread_mutex_unlock(&mp->mod_lock);
697 
698 	mp->mod_gen++;
699 }
700 
701 void
702 fmd_module_lock(fmd_module_t *mp)
703 {
704 	pthread_t self = pthread_self();
705 
706 	(void) pthread_mutex_lock(&mp->mod_lock);
707 
708 	while (mp->mod_flags & FMD_MOD_LOCK) {
709 		if (mp->mod_owner != self)
710 			(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
711 		else
712 			fmd_panic("recursive module lock of %p\n", (void *)mp);
713 	}
714 
715 	mp->mod_owner = self;
716 	mp->mod_flags |= FMD_MOD_LOCK;
717 
718 	(void) pthread_mutex_unlock(&mp->mod_lock);
719 	(void) pthread_cond_broadcast(&mp->mod_cv);
720 }
721 
722 void
723 fmd_module_unlock(fmd_module_t *mp)
724 {
725 	(void) pthread_mutex_lock(&mp->mod_lock);
726 
727 	ASSERT(mp->mod_owner == pthread_self());
728 	ASSERT(mp->mod_flags & FMD_MOD_LOCK);
729 
730 	mp->mod_owner = 0;
731 	mp->mod_flags &= ~FMD_MOD_LOCK;
732 
733 	(void) pthread_mutex_unlock(&mp->mod_lock);
734 	(void) pthread_cond_broadcast(&mp->mod_cv);
735 }
736 
737 int
738 fmd_module_trylock(fmd_module_t *mp)
739 {
740 	(void) pthread_mutex_lock(&mp->mod_lock);
741 
742 	if (mp->mod_flags & FMD_MOD_LOCK) {
743 		(void) pthread_mutex_unlock(&mp->mod_lock);
744 		return (0);
745 	}
746 
747 	mp->mod_owner = pthread_self();
748 	mp->mod_flags |= FMD_MOD_LOCK;
749 
750 	(void) pthread_mutex_unlock(&mp->mod_lock);
751 	(void) pthread_cond_broadcast(&mp->mod_cv);
752 
753 	return (1);
754 }
755 
756 int
757 fmd_module_locked(fmd_module_t *mp)
758 {
759 	return ((mp->mod_flags & FMD_MOD_LOCK) &&
760 	    mp->mod_owner == pthread_self());
761 }
762 
763 int
764 fmd_module_enter(fmd_module_t *mp, void (*func)(fmd_hdl_t *))
765 {
766 	volatile int err;
767 
768 	(void) pthread_mutex_lock(&mp->mod_lock);
769 
770 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
771 	mp->mod_flags |= FMD_MOD_BUSY;
772 
773 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
774 		(void) pthread_mutex_lock(&mp->mod_lock);
775 		fmd_module_error(mp, err);
776 	}
777 
778 	(void) pthread_mutex_unlock(&mp->mod_lock);
779 	(void) pthread_cond_broadcast(&mp->mod_cv);
780 
781 	/*
782 	 * If it's the first time through fmd_module_enter(), call the provided
783 	 * function on the module.  If no fmd_module_abort() results, we will
784 	 * fall through and return zero.  Otherwise we'll longjmp with an err,
785 	 * return to the setjmp() above, and return the error to our caller.
786 	 */
787 	if (err == 0 && func != NULL)
788 		(*func)((fmd_hdl_t *)mp);
789 
790 	return (err);
791 }
792 
793 void
794 fmd_module_exit(fmd_module_t *mp)
795 {
796 	(void) pthread_mutex_lock(&mp->mod_lock);
797 
798 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
799 	mp->mod_flags &= ~FMD_MOD_BUSY;
800 
801 	(void) pthread_mutex_unlock(&mp->mod_lock);
802 	(void) pthread_cond_broadcast(&mp->mod_cv);
803 }
804 
805 /*
806  * If the client.error policy has been set by a developer, stop or dump core
807  * based on the policy; if we stop and are resumed we'll continue and execute
808  * the default behavior to discard events in fmd_module_start().  If the caller
809  * is the primary module thread, we reach this state by longjmp'ing back to
810  * fmd_module_enter(), above.  If the caller is an auxiliary thread, we cancel
811  * ourself and arrange for the primary thread to call fmd_module_abort().
812  */
813 void
814 fmd_module_abort(fmd_module_t *mp, int err)
815 {
816 	uint_t policy = FMD_CERROR_UNLOAD;
817 	pthread_t tid = pthread_self();
818 
819 	(void) fmd_conf_getprop(fmd.d_conf, "client.error", &policy);
820 
821 	if (policy == FMD_CERROR_STOP) {
822 		fmd_error(err, "stopping after %s in client %s (%p)\n",
823 		    fmd_errclass(err), mp->mod_name, (void *)mp);
824 		(void) raise(SIGSTOP);
825 	} else if (policy == FMD_CERROR_ABORT) {
826 		fmd_panic("aborting due to %s in client %s (%p)\n",
827 		    fmd_errclass(err), mp->mod_name, (void *)mp);
828 	}
829 
830 	/*
831 	 * If the caller is an auxiliary thread, cancel the current thread.  We
832 	 * prefer to cancel because it affords developers the option of using
833 	 * the pthread_cleanup* APIs.  If cancellations have been disabled,
834 	 * fall through to forcing the current thread to exit.  In either case
835 	 * we update mod_error (if zero) to enter the failed state.  Once that
836 	 * is set, further events received by the module will be discarded.
837 	 *
838 	 * We also set the FMD_MOD_FAIL bit, indicating an unrecoverable error.
839 	 * When an auxiliary thread fails, the module is left in a delicate
840 	 * state where it is likely not able to continue execution (even to
841 	 * execute its _fmd_fini() routine) because our caller may hold locks
842 	 * that are private to the module and can no longer be released.  The
843 	 * FMD_MOD_FAIL bit forces fmd_api_module_lock() to abort if any other
844 	 * module threads reach an API call, in an attempt to get them to exit.
845 	 */
846 	if (tid != mp->mod_thread->thr_tid) {
847 		(void) pthread_mutex_lock(&mp->mod_lock);
848 
849 		if (mp->mod_error == 0)
850 			mp->mod_error = err;
851 
852 		mp->mod_flags |= FMD_MOD_FAIL;
853 		(void) pthread_mutex_unlock(&mp->mod_lock);
854 
855 		(void) pthread_cancel(tid);
856 		pthread_exit(NULL);
857 	}
858 
859 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
860 	longjmp(mp->mod_jmpbuf, err);
861 }
862 
863 void
864 fmd_module_hold(fmd_module_t *mp)
865 {
866 	(void) pthread_mutex_lock(&mp->mod_lock);
867 
868 	TRACE((FMD_DBG_MOD, "hold %p (%s/%u)\n",
869 	    (void *)mp, mp->mod_name, mp->mod_refs));
870 
871 	mp->mod_refs++;
872 	ASSERT(mp->mod_refs != 0);
873 
874 	(void) pthread_mutex_unlock(&mp->mod_lock);
875 	(void) pthread_cond_broadcast(&mp->mod_cv);
876 }
877 
878 void
879 fmd_module_rele(fmd_module_t *mp)
880 {
881 	(void) pthread_mutex_lock(&mp->mod_lock);
882 
883 	TRACE((FMD_DBG_MOD, "rele %p (%s/%u)\n",
884 	    (void *)mp, mp->mod_name, mp->mod_refs));
885 
886 	ASSERT(mp->mod_refs != 0);
887 	if (--mp->mod_refs == 0) {
888 		fmd_module_destroy(mp);
889 		return;
890 	}
891 
892 	(void) pthread_mutex_unlock(&mp->mod_lock);
893 	(void) pthread_cond_broadcast(&mp->mod_cv);
894 }
895 
896 /*
897  * Wrapper around libdiagcode's fm_dc_opendict() to load module dictionaries.
898  * If the dictionary open is successful, the new dictionary is added to the
899  * mod_dictv[] array and mod_codelen is updated with the new maximum length.
900  */
901 int
902 fmd_module_dc_opendict(fmd_module_t *mp, const char *dict)
903 {
904 	struct fm_dc_handle *dcp, **dcv;
905 	char *dictdir, *dictnam, *p;
906 	size_t len;
907 
908 	ASSERT(fmd_module_locked(mp));
909 
910 	dictnam = alloca(strlen(dict) + 1);
911 	(void) strcpy(dictnam, fmd_strbasename(dict));
912 
913 	if ((p = strrchr(dictnam, '.')) != NULL &&
914 	    strcmp(p, ".dict") == 0)
915 		*p = '\0'; /* eliminate any trailing .dict suffix */
916 
917 	/*
918 	 * If 'dict' is an absolute path, dictdir = $rootdir/`dirname dict`
919 	 * If 'dict' is not an absolute path, dictdir = $dictdir/`dirname dict`
920 	 */
921 	if (dict[0] == '/') {
922 		len = strlen(fmd.d_rootdir) + strlen(dict) + 1;
923 		dictdir = alloca(len);
924 		(void) snprintf(dictdir, len, "%s%s", fmd.d_rootdir, dict);
925 		(void) fmd_strdirname(dictdir);
926 	} else {
927 		(void) fmd_conf_getprop(fmd.d_conf, "dictdir", &p);
928 		len = strlen(fmd.d_rootdir) + strlen(p) + strlen(dict) + 3;
929 		dictdir = alloca(len);
930 		(void) snprintf(dictdir, len,
931 		    "%s/%s/%s", fmd.d_rootdir, p, dict);
932 		(void) fmd_strdirname(dictdir);
933 	}
934 
935 	fmd_dprintf(FMD_DBG_MOD, "module %s opening %s -> %s/%s.dict\n",
936 	    mp->mod_name, dict, dictdir, dictnam);
937 
938 	if ((dcp = fm_dc_opendict(FM_DC_VERSION, dictdir, dictnam)) == NULL)
939 		return (-1); /* errno is set for us */
940 
941 	dcv = fmd_alloc(sizeof (dcp) * (mp->mod_dictc + 1), FMD_SLEEP);
942 	bcopy(mp->mod_dictv, dcv, sizeof (dcp) * mp->mod_dictc);
943 	fmd_free(mp->mod_dictv, sizeof (dcp) * mp->mod_dictc);
944 	mp->mod_dictv = dcv;
945 	mp->mod_dictv[mp->mod_dictc++] = dcp;
946 
947 	len = fm_dc_codelen(dcp);
948 	mp->mod_codelen = MAX(mp->mod_codelen, len);
949 
950 	return (0);
951 }
952 
953 /*
954  * Wrapper around libdiagcode's fm_dc_key2code() that examines all the module's
955  * dictionaries.  We adhere to the libdiagcode return values and semantics.
956  */
957 int
958 fmd_module_dc_key2code(fmd_module_t *mp,
959     char *const keys[], char *code, size_t codelen)
960 {
961 	int i, err;
962 
963 	for (i = 0; i < mp->mod_dictc; i++) {
964 		if ((err = fm_dc_key2code(mp->mod_dictv[i], (const char **)keys,
965 		    code, codelen)) == 0 || errno != ENOMSG)
966 			return (err);
967 	}
968 
969 	return (fmd_set_errno(ENOMSG));
970 }
971 
972 fmd_modhash_t *
973 fmd_modhash_create(void)
974 {
975 	fmd_modhash_t *mhp = fmd_alloc(sizeof (fmd_modhash_t), FMD_SLEEP);
976 
977 	(void) pthread_rwlock_init(&mhp->mh_lock, NULL);
978 	mhp->mh_hashlen = fmd.d_str_buckets;
979 	mhp->mh_hash = fmd_zalloc(sizeof (void *) * mhp->mh_hashlen, FMD_SLEEP);
980 	mhp->mh_nelems = 0;
981 
982 	return (mhp);
983 }
984 
985 void
986 fmd_modhash_destroy(fmd_modhash_t *mhp)
987 {
988 	fmd_module_t *mp, *nmp;
989 	uint_t i;
990 
991 	for (i = 0; i < mhp->mh_hashlen; i++) {
992 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = nmp) {
993 			nmp = mp->mod_next;
994 			mp->mod_next = NULL;
995 			fmd_module_rele(mp);
996 		}
997 	}
998 
999 	fmd_free(mhp->mh_hash, sizeof (void *) * mhp->mh_hashlen);
1000 	(void) pthread_rwlock_destroy(&mhp->mh_lock);
1001 	fmd_free(mhp, sizeof (fmd_modhash_t));
1002 }
1003 
1004 static void
1005 fmd_modhash_loaddir(fmd_modhash_t *mhp, const char *dir,
1006     const fmd_modops_t *ops)
1007 {
1008 	char path[PATH_MAX];
1009 	struct dirent *dp, *ep;
1010 	const char *p;
1011 	DIR *dirp;
1012 
1013 	if ((dirp = opendir(dir)) == NULL)
1014 		return; /* failed to open directory; just skip it */
1015 
1016 	ep = alloca(sizeof (struct dirent) + PATH_MAX + 1);
1017 	bzero(ep, sizeof (struct dirent) + PATH_MAX + 1);
1018 
1019 	while (readdir_r(dirp, ep, &dp) == 0 && dp != NULL) {
1020 		if (dp->d_name[0] == '.')
1021 			continue; /* skip "." and ".." */
1022 
1023 		if ((p = strrchr(dp->d_name, '.')) != NULL &&
1024 		    strcmp(p, ".conf") == 0)
1025 			continue; /* skip .conf files */
1026 
1027 		(void) snprintf(path, sizeof (path), "%s/%s", dir, dp->d_name);
1028 		(void) fmd_modhash_load(mhp, path, ops);
1029 	}
1030 
1031 	(void) closedir(dirp);
1032 }
1033 
1034 void
1035 fmd_modhash_loadall(fmd_modhash_t *mhp, const fmd_conf_path_t *pap,
1036     const fmd_modops_t *ops)
1037 {
1038 	int i;
1039 
1040 	for (i = 0; i < pap->cpa_argc; i++)
1041 		fmd_modhash_loaddir(mhp, pap->cpa_argv[i], ops);
1042 }
1043 
1044 void
1045 fmd_modhash_apply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1046 {
1047 	fmd_module_t *mp, *np;
1048 	uint_t i;
1049 
1050 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1051 
1052 	for (i = 0; i < mhp->mh_hashlen; i++) {
1053 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1054 			np = mp->mod_next;
1055 			func(mp);
1056 		}
1057 	}
1058 
1059 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1060 }
1061 
1062 void
1063 fmd_modhash_tryapply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1064 {
1065 	fmd_module_t *mp, *np;
1066 	uint_t i;
1067 
1068 	if (mhp == NULL || pthread_rwlock_tryrdlock(&mhp->mh_lock) != 0)
1069 		return; /* not initialized or couldn't grab lock */
1070 
1071 	for (i = 0; i < mhp->mh_hashlen; i++) {
1072 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1073 			np = mp->mod_next;
1074 			func(mp);
1075 		}
1076 	}
1077 
1078 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1079 }
1080 
1081 void
1082 fmd_modhash_dispatch(fmd_modhash_t *mhp, fmd_event_t *ep)
1083 {
1084 	fmd_module_t *mp;
1085 	uint_t i;
1086 
1087 	fmd_event_hold(ep);
1088 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1089 
1090 	for (i = 0; i < mhp->mh_hashlen; i++) {
1091 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next) {
1092 			/*
1093 			 * If FMD_MOD_INIT is set but MOD_FINI, MOD_QUIT, and
1094 			 * mod_error are all zero, then the module is active:
1095 			 * enqueue the event in the corresponding event queue.
1096 			 */
1097 			(void) pthread_mutex_lock(&mp->mod_lock);
1098 
1099 			if ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI |
1100 			    FMD_MOD_QUIT)) == FMD_MOD_INIT && !mp->mod_error)
1101 				fmd_eventq_insert_at_time(mp->mod_queue, ep);
1102 
1103 			(void) pthread_mutex_unlock(&mp->mod_lock);
1104 		}
1105 	}
1106 
1107 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1108 	fmd_event_rele(ep);
1109 }
1110 
1111 fmd_module_t *
1112 fmd_modhash_lookup(fmd_modhash_t *mhp, const char *name)
1113 {
1114 	fmd_module_t *mp;
1115 	uint_t h;
1116 
1117 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1118 	h = fmd_strhash(name) % mhp->mh_hashlen;
1119 
1120 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1121 		if (strcmp(name, mp->mod_name) == 0)
1122 			break;
1123 	}
1124 
1125 	if (mp != NULL)
1126 		fmd_module_hold(mp);
1127 	else
1128 		(void) fmd_set_errno(EFMD_MOD_NOMOD);
1129 
1130 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1131 	return (mp);
1132 }
1133 
1134 fmd_module_t *
1135 fmd_modhash_load(fmd_modhash_t *mhp, const char *path, const fmd_modops_t *ops)
1136 {
1137 	char name[PATH_MAX], *p;
1138 	fmd_module_t *mp;
1139 	int tries = 0;
1140 	uint_t h;
1141 
1142 	(void) strlcpy(name, fmd_strbasename(path), sizeof (name));
1143 	if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".so") == 0)
1144 		*p = '\0'; /* strip trailing .so from any module name */
1145 
1146 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1147 	h = fmd_strhash(name) % mhp->mh_hashlen;
1148 
1149 	/*
1150 	 * First check to see if a module is already present in the hash table
1151 	 * for this name.  If so, the module is already loaded: skip it.
1152 	 */
1153 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1154 		if (strcmp(name, mp->mod_name) == 0)
1155 			break;
1156 	}
1157 
1158 	if (mp != NULL) {
1159 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1160 		(void) fmd_set_errno(EFMD_MOD_LOADED);
1161 		return (NULL);
1162 	}
1163 
1164 	/*
1165 	 * fmd_module_create() will return a held (as if by fmd_module_hold())
1166 	 * module.  We leave this hold in place to correspond to the hash-in.
1167 	 */
1168 	while ((mp = fmd_module_create(path, ops)) == NULL) {
1169 		if (tries++ != 0 || errno != EFMD_CKPT_INVAL) {
1170 			(void) pthread_rwlock_unlock(&mhp->mh_lock);
1171 			return (NULL); /* errno is set for us */
1172 		}
1173 	}
1174 
1175 	mp->mod_hash = mhp;
1176 	mp->mod_next = mhp->mh_hash[h];
1177 
1178 	mhp->mh_hash[h] = mp;
1179 	mhp->mh_nelems++;
1180 
1181 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1182 	return (mp);
1183 }
1184 
1185 int
1186 fmd_modhash_unload(fmd_modhash_t *mhp, const char *name)
1187 {
1188 	fmd_module_t *mp, **pp;
1189 	uint_t h;
1190 
1191 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1192 	h = fmd_strhash(name) % mhp->mh_hashlen;
1193 	pp = &mhp->mh_hash[h];
1194 
1195 	for (mp = *pp; mp != NULL; mp = mp->mod_next) {
1196 		if (strcmp(name, mp->mod_name) == 0)
1197 			break;
1198 		else
1199 			pp = &mp->mod_next;
1200 	}
1201 
1202 	if (mp == NULL) {
1203 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1204 		return (fmd_set_errno(EFMD_MOD_NOMOD));
1205 	}
1206 
1207 	*pp = mp->mod_next;
1208 	mp->mod_next = NULL;
1209 
1210 	ASSERT(mhp->mh_nelems != 0);
1211 	mhp->mh_nelems--;
1212 
1213 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1214 
1215 	fmd_module_unload(mp);
1216 	fmd_module_rele(mp);
1217 
1218 	return (0);
1219 }
1220 
1221 /*
1222  * Update statistics when an event is dispatched and placed on a module's event
1223  * queue.  This is essentially the same code as kstat_waitq_enter(9F).
1224  */
1225 void
1226 fmd_modstat_eventq_dispatch(fmd_module_t *mp)
1227 {
1228 	fmd_modstat_t *msp;
1229 	hrtime_t new, delta;
1230 	uint32_t wcnt;
1231 
1232 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1233 
1234 	if ((msp = mp->mod_stats) == NULL) {
1235 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1236 		return; /* module is no longer registered */
1237 	}
1238 
1239 	new = gethrtime();
1240 	delta = new - msp->ms_wlastupdate.fmds_value.ui64;
1241 	msp->ms_wlastupdate.fmds_value.ui64 = new;
1242 	wcnt = msp->ms_wcnt.fmds_value.ui32++;
1243 
1244 	if (wcnt != 0) {
1245 		msp->ms_wlentime.fmds_value.ui64 += delta * wcnt;
1246 		msp->ms_wtime.fmds_value.ui64 += delta;
1247 	}
1248 
1249 	msp->ms_dispatched.fmds_value.ui64++;
1250 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1251 }
1252 
1253 /*
1254  * Update statistics when an event is dequeued by a module and is sent to the
1255  * dispatch entry point.  This is essentially kstat_waitq_to_runq(9F), except
1256  * simplified because our modules are single-threaded (i.e. runq len == 1).
1257  */
1258 void
1259 fmd_modstat_eventq_dequeue(fmd_module_t *mp, uint_t type)
1260 {
1261 	fmd_modstat_t *msp;
1262 	hrtime_t new, delta;
1263 	uint32_t wcnt;
1264 
1265 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1266 
1267 	if ((msp = mp->mod_stats) == NULL) {
1268 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1269 		return; /* module is no longer registered */
1270 	}
1271 
1272 	new = gethrtime();
1273 	delta = new - msp->ms_wlastupdate.fmds_value.ui64;
1274 
1275 	msp->ms_wlastupdate.fmds_value.ui64 = new;
1276 	msp->ms_dlastupdate.fmds_value.ui64 = new;
1277 
1278 	ASSERT(msp->ms_wcnt.fmds_value.ui32 != 0);
1279 	wcnt = msp->ms_wcnt.fmds_value.ui32--;
1280 
1281 	msp->ms_wlentime.fmds_value.ui64 += delta * wcnt;
1282 	msp->ms_wtime.fmds_value.ui64 += delta;
1283 
1284 	if (type == FMD_EVT_PROTOCOL)
1285 		msp->ms_prdequeued.fmds_value.ui64++;
1286 
1287 	msp->ms_dequeued.fmds_value.ui64++;
1288 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1289 }
1290 
1291 /*
1292  * Update statistics when an event is done being processed by the module's
1293  * dispatch entry point.  This is essentially kstat_runq_exit(9F) simplified
1294  * for our principle that modules are single-threaded (i.e. runq len == 1).
1295  */
1296 void
1297 fmd_modstat_eventq_done(fmd_module_t *mp)
1298 {
1299 	fmd_modstat_t *msp;
1300 	hrtime_t new, delta;
1301 
1302 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1303 
1304 	if ((msp = mp->mod_stats) == NULL) {
1305 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1306 		return; /* module is no longer registered */
1307 	}
1308 
1309 	new = gethrtime();
1310 	delta = new - msp->ms_dlastupdate.fmds_value.ui64;
1311 
1312 	msp->ms_dlastupdate.fmds_value.ui64 = new;
1313 	msp->ms_dtime.fmds_value.ui64 += delta;
1314 
1315 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1316 }
1317 
1318 void
1319 fmd_modstat_publish(fmd_module_t *mp)
1320 {
1321 	(void) pthread_mutex_lock(&mp->mod_lock);
1322 
1323 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1324 	mp->mod_flags |= FMD_MOD_STPUB;
1325 	(void) pthread_cond_broadcast(&mp->mod_cv);
1326 
1327 	while (mp->mod_flags & FMD_MOD_STPUB)
1328 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1329 
1330 	(void) pthread_mutex_unlock(&mp->mod_lock);
1331 }
1332 
1333 int
1334 fmd_modstat_snapshot(fmd_module_t *mp, fmd_ustat_snap_t *uss)
1335 {
1336 	fmd_event_t *e;
1337 	int err;
1338 
1339 	/*
1340 	 * Grab the module lock and wait for the STSUB bit to be clear.  Then
1341 	 * set it to indicate we are a subscriber and everyone else must wait.
1342 	 */
1343 	(void) pthread_mutex_lock(&mp->mod_lock);
1344 
1345 	while (mp->mod_error == 0 && (mp->mod_flags & FMD_MOD_STSUB))
1346 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1347 
1348 	if (mp->mod_error != 0) {
1349 		(void) pthread_mutex_unlock(&mp->mod_lock);
1350 		return (fmd_set_errno(EFMD_HDL_ABORT));
1351 	}
1352 
1353 	mp->mod_flags |= FMD_MOD_STSUB;
1354 	(void) pthread_mutex_unlock(&mp->mod_lock);
1355 	(void) pthread_cond_broadcast(&mp->mod_cv);
1356 
1357 	/*
1358 	 * Create a stats pseudo-event and dispatch it to the module, forcing
1359 	 * it to next execute its custom snapshot routine (or the empty one).
1360 	 */
1361 	e = fmd_event_create(FMD_EVT_STATS, FMD_HRT_NOW, NULL, NULL);
1362 	fmd_eventq_insert_at_head(mp->mod_queue, e);
1363 
1364 	/*
1365 	 * Grab the module lock and then wait on mod_cv for STPUB to be set,
1366 	 * indicating the snapshot routine is completed and the module is idle.
1367 	 */
1368 	(void) pthread_mutex_lock(&mp->mod_lock);
1369 
1370 	while (mp->mod_error == 0 && !(mp->mod_flags & FMD_MOD_STPUB))
1371 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1372 
1373 	if (mp->mod_error != 0) {
1374 		(void) pthread_mutex_unlock(&mp->mod_lock);
1375 		return (fmd_set_errno(EFMD_HDL_ABORT));
1376 	}
1377 
1378 	(void) pthread_mutex_unlock(&mp->mod_lock);
1379 	(void) pthread_cond_broadcast(&mp->mod_cv);
1380 
1381 	/*
1382 	 * Update ms_snaptime and take the actual snapshot of the various
1383 	 * statistics while the module is quiescent and waiting for us.
1384 	 */
1385 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1386 
1387 	if (mp->mod_stats != NULL) {
1388 		mp->mod_stats->ms_snaptime.fmds_value.ui64 = gethrtime();
1389 		err = fmd_ustat_snapshot(mp->mod_ustat, uss);
1390 	} else
1391 		err = fmd_set_errno(EFMD_HDL_ABORT);
1392 
1393 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1394 
1395 	/*
1396 	 * With the snapshot complete, grab the module lock and clear both
1397 	 * STSUB and STPUB, permitting everyone to wake up and continue.
1398 	 */
1399 	(void) pthread_mutex_lock(&mp->mod_lock);
1400 
1401 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1402 	ASSERT(mp->mod_flags & FMD_MOD_STPUB);
1403 	mp->mod_flags &= ~(FMD_MOD_STSUB | FMD_MOD_STPUB);
1404 
1405 	(void) pthread_mutex_unlock(&mp->mod_lock);
1406 	(void) pthread_cond_broadcast(&mp->mod_cv);
1407 
1408 	return (err);
1409 }
1410