xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_module.c (revision d2b5b2d357ee3172eacb6860be1891259902203d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <signal.h>
27 #include <dirent.h>
28 #include <limits.h>
29 #include <alloca.h>
30 #include <unistd.h>
31 #include <stdio.h>
32 
33 #include <fmd_string.h>
34 #include <fmd_alloc.h>
35 #include <fmd_module.h>
36 #include <fmd_error.h>
37 #include <fmd_conf.h>
38 #include <fmd_dispq.h>
39 #include <fmd_eventq.h>
40 #include <fmd_timerq.h>
41 #include <fmd_subr.h>
42 #include <fmd_thread.h>
43 #include <fmd_ustat.h>
44 #include <fmd_case.h>
45 #include <fmd_protocol.h>
46 #include <fmd_buf.h>
47 #include <fmd_ckpt.h>
48 #include <fmd_xprt.h>
49 #include <fmd_topo.h>
50 
51 #include <fmd.h>
52 
53 /*
54  * Template for per-module statistics installed by fmd on behalf of each active
55  * module.  These are used to initialize the per-module mp->mod_stats below.
56  * NOTE: FMD_TYPE_STRING statistics should not be used here.  If they are
57  * required in the future, the FMD_ADM_MODDSTAT service routine must change.
58  */
59 static const fmd_modstat_t _fmd_modstat_tmpl = {
60 {
61 { "fmd.dispatched", FMD_TYPE_UINT64, "total events dispatched to module" },
62 { "fmd.dequeued", FMD_TYPE_UINT64, "total events dequeued by module" },
63 { "fmd.prdequeued", FMD_TYPE_UINT64, "protocol events dequeued by module" },
64 { "fmd.dropped", FMD_TYPE_UINT64, "total events dropped on queue overflow" },
65 { "fmd.wcnt", FMD_TYPE_UINT32, "count of events waiting on queue" },
66 { "fmd.wtime", FMD_TYPE_TIME, "total wait time on queue" },
67 { "fmd.wlentime", FMD_TYPE_TIME, "total wait length * time product" },
68 { "fmd.wlastupdate", FMD_TYPE_TIME, "hrtime of last wait queue update" },
69 { "fmd.dtime", FMD_TYPE_TIME, "total processing time after dequeue" },
70 { "fmd.dlastupdate", FMD_TYPE_TIME, "hrtime of last event dequeue completion" },
71 },
72 { "fmd.loadtime", FMD_TYPE_TIME, "hrtime at which module was loaded" },
73 { "fmd.snaptime", FMD_TYPE_TIME, "hrtime of last statistics snapshot" },
74 { "fmd.accepted", FMD_TYPE_UINT64, "total events accepted by module" },
75 { "fmd.debugdrop", FMD_TYPE_UINT64, "dropped debug messages" },
76 { "fmd.memtotal", FMD_TYPE_SIZE, "total memory allocated by module" },
77 { "fmd.memlimit", FMD_TYPE_SIZE, "limit on total memory allocated" },
78 { "fmd.buftotal", FMD_TYPE_SIZE, "total buffer space used by module" },
79 { "fmd.buflimit", FMD_TYPE_SIZE, "limit on total buffer space" },
80 { "fmd.thrtotal", FMD_TYPE_UINT32, "total number of auxiliary threads" },
81 { "fmd.thrlimit", FMD_TYPE_UINT32, "limit on number of auxiliary threads" },
82 { "fmd.doorthrtotal", FMD_TYPE_UINT32, "total number of door server threads" },
83 { "fmd.doorthrlimit", FMD_TYPE_UINT32, "limit on door server threads" },
84 { "fmd.caseopen", FMD_TYPE_UINT64, "cases currently open by module" },
85 { "fmd.casesolved", FMD_TYPE_UINT64, "total cases solved by module" },
86 { "fmd.caseclosed", FMD_TYPE_UINT64, "total cases closed by module" },
87 { "fmd.ckptsave", FMD_TYPE_BOOL, "save checkpoints for module" },
88 { "fmd.ckptrestore", FMD_TYPE_BOOL, "restore checkpoints for module" },
89 { "fmd.ckptzero", FMD_TYPE_BOOL, "zeroed checkpoint at startup" },
90 { "fmd.ckptcnt", FMD_TYPE_UINT64, "number of checkpoints taken" },
91 { "fmd.ckpttime", FMD_TYPE_TIME, "total checkpoint time" },
92 { "fmd.xprtopen", FMD_TYPE_UINT32, "total number of open transports" },
93 { "fmd.xprtlimit", FMD_TYPE_UINT32, "limit on number of open transports" },
94 { "fmd.xprtqlimit", FMD_TYPE_UINT32, "limit on transport event queue length" },
95 };
96 
97 static void
98 fmd_module_start(void *arg)
99 {
100 	fmd_module_t *mp = arg;
101 	fmd_event_t *ep;
102 	fmd_xprt_t *xp;
103 
104 	(void) pthread_mutex_lock(&mp->mod_lock);
105 
106 	if (mp->mod_ops->mop_init(mp) != 0 || mp->mod_error != 0) {
107 		if (mp->mod_error == 0)
108 			mp->mod_error = errno ? errno : EFMD_MOD_INIT;
109 		goto out;
110 	}
111 
112 	if (fmd.d_mod_event != NULL)
113 		fmd_eventq_insert_at_head(mp->mod_queue, fmd.d_mod_event);
114 
115 	ASSERT(MUTEX_HELD(&mp->mod_lock));
116 	mp->mod_flags |= FMD_MOD_INIT;
117 
118 	(void) pthread_cond_broadcast(&mp->mod_cv);
119 	(void) pthread_mutex_unlock(&mp->mod_lock);
120 
121 	/*
122 	 * If the module opened any transports while executing _fmd_init(),
123 	 * they are suspended. Now that _fmd_init() is done, wake them up.
124 	 */
125 	for (xp = fmd_list_next(&mp->mod_transports);
126 	    xp != NULL; xp = fmd_list_next(xp))
127 		fmd_xprt_xresume(xp, FMD_XPRT_ISUSPENDED);
128 
129 	/*
130 	 * Wait for events to arrive by checking mod_error and then sleeping in
131 	 * fmd_eventq_delete().  If a NULL event is returned, the eventq has
132 	 * been aborted and we continue on to call fini and exit the thread.
133 	 */
134 	while ((ep = fmd_eventq_delete(mp->mod_queue)) != NULL) {
135 		/*
136 		 * If the module has failed, discard the event without ever
137 		 * passing it to the module and go back to sleep.
138 		 */
139 		if (mp->mod_error != 0) {
140 			fmd_eventq_done(mp->mod_queue);
141 			fmd_event_rele(ep);
142 			continue;
143 		}
144 
145 		mp->mod_ops->mop_dispatch(mp, ep);
146 		fmd_eventq_done(mp->mod_queue);
147 
148 		/*
149 		 * Once mop_dispatch() is complete, grab the lock and perform
150 		 * any event-specific post-processing.  Finally, if necessary,
151 		 * checkpoint the state of the module after this event.
152 		 */
153 		fmd_module_lock(mp);
154 
155 		if (FMD_EVENT_TYPE(ep) == FMD_EVT_CLOSE)
156 			fmd_case_delete(FMD_EVENT_DATA(ep));
157 
158 		fmd_ckpt_save(mp);
159 		fmd_module_unlock(mp);
160 		fmd_event_rele(ep);
161 	}
162 
163 	if (mp->mod_ops->mop_fini(mp) != 0 && mp->mod_error == 0)
164 		mp->mod_error = errno ? errno : EFMD_MOD_FINI;
165 
166 	(void) pthread_mutex_lock(&mp->mod_lock);
167 	mp->mod_flags |= FMD_MOD_FINI;
168 
169 out:
170 	(void) pthread_cond_broadcast(&mp->mod_cv);
171 	(void) pthread_mutex_unlock(&mp->mod_lock);
172 }
173 
174 fmd_module_t *
175 fmd_module_create(const char *path, const fmd_modops_t *ops)
176 {
177 	fmd_module_t *mp = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP);
178 
179 	char buf[PATH_MAX], *p;
180 	const char *dir;
181 	uint32_t limit;
182 	int err;
183 
184 	(void) strlcpy(buf, fmd_strbasename(path), sizeof (buf));
185 	if ((p = strrchr(buf, '.')) != NULL && strcmp(p, ".so") == 0)
186 		*p = '\0'; /* strip trailing .so from any module name */
187 
188 	(void) pthread_mutex_init(&mp->mod_lock, NULL);
189 	(void) pthread_cond_init(&mp->mod_cv, NULL);
190 	(void) pthread_mutex_init(&mp->mod_stats_lock, NULL);
191 
192 	mp->mod_name = fmd_strdup(buf, FMD_SLEEP);
193 	mp->mod_path = fmd_strdup(path, FMD_SLEEP);
194 	mp->mod_ops = ops;
195 	mp->mod_ustat = fmd_ustat_create();
196 
197 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dir", &dir);
198 	(void) snprintf(buf, sizeof (buf),
199 	    "%s/%s/%s", fmd.d_rootdir, dir, mp->mod_name);
200 
201 	mp->mod_ckpt = fmd_strdup(buf, FMD_SLEEP);
202 
203 	(void) fmd_conf_getprop(fmd.d_conf, "client.tmrlim", &limit);
204 	mp->mod_timerids = fmd_idspace_create(mp->mod_name, 1, limit + 1);
205 	mp->mod_threads = fmd_idspace_create(mp->mod_name, 0, INT_MAX);
206 
207 	fmd_buf_hash_create(&mp->mod_bufs);
208 	fmd_serd_hash_create(&mp->mod_serds);
209 
210 	mp->mod_topo_current = fmd_topo_hold();
211 
212 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
213 	fmd_list_append(&fmd.d_mod_list, mp);
214 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
215 
216 	/*
217 	 * Initialize the module statistics that are kept on its behalf by fmd.
218 	 * These are set up using a template defined at the top of this file.
219 	 */
220 	if ((mp->mod_stats = (fmd_modstat_t *)fmd_ustat_insert(mp->mod_ustat,
221 	    FMD_USTAT_ALLOC, sizeof (_fmd_modstat_tmpl) / sizeof (fmd_stat_t),
222 	    (fmd_stat_t *)&_fmd_modstat_tmpl, NULL)) == NULL) {
223 		fmd_error(EFMD_MOD_INIT, "failed to initialize per-mod stats");
224 		fmd_module_destroy(mp);
225 		return (NULL);
226 	}
227 
228 	if (nv_alloc_init(&mp->mod_nva_sleep,
229 	    &fmd_module_nva_ops_sleep, mp) != 0 ||
230 	    nv_alloc_init(&mp->mod_nva_nosleep,
231 	    &fmd_module_nva_ops_nosleep, mp) != 0) {
232 		fmd_error(EFMD_MOD_INIT, "failed to initialize nvlist "
233 		    "allocation routines");
234 		fmd_module_destroy(mp);
235 		return (NULL);
236 	}
237 
238 	(void) fmd_conf_getprop(fmd.d_conf, "client.evqlim", &limit);
239 
240 	mp->mod_queue = fmd_eventq_create(mp,
241 	    &mp->mod_stats->ms_evqstat, &mp->mod_stats_lock, limit);
242 
243 	(void) fmd_conf_getprop(fmd.d_conf, "client.memlim",
244 	    &mp->mod_stats->ms_memlimit.fmds_value.ui64);
245 
246 	(void) fmd_conf_getprop(fmd.d_conf, "client.buflim",
247 	    &mp->mod_stats->ms_buflimit.fmds_value.ui64);
248 
249 	(void) fmd_conf_getprop(fmd.d_conf, "client.thrlim",
250 	    &mp->mod_stats->ms_thrlimit.fmds_value.ui32);
251 
252 	(void) fmd_conf_getprop(fmd.d_conf, "client.doorthrlim",
253 	    &mp->mod_stats->ms_doorthrlimit.fmds_value.ui32);
254 
255 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtlim",
256 	    &mp->mod_stats->ms_xprtlimit.fmds_value.ui32);
257 
258 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtqlim",
259 	    &mp->mod_stats->ms_xprtqlimit.fmds_value.ui32);
260 
261 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.save",
262 	    &mp->mod_stats->ms_ckpt_save.fmds_value.bool);
263 
264 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.restore",
265 	    &mp->mod_stats->ms_ckpt_restore.fmds_value.bool);
266 
267 	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.zero",
268 	    &mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool);
269 
270 	if (mp->mod_stats->ms_ckpt_zeroed.fmds_value.bool)
271 		fmd_ckpt_delete(mp); /* blow away any pre-existing checkpoint */
272 
273 	/*
274 	 * Place a hold on the module and grab the module lock before creating
275 	 * the module's thread to ensure that it cannot destroy the module and
276 	 * that it cannot call ops->mop_init() before we're done setting up.
277 	 * NOTE: from now on, we must use fmd_module_rele() for error paths.
278 	 */
279 	fmd_module_hold(mp);
280 	(void) pthread_mutex_lock(&mp->mod_lock);
281 	mp->mod_stats->ms_loadtime.fmds_value.ui64 = gethrtime();
282 	mp->mod_thread = fmd_thread_create(mp, fmd_module_start, mp);
283 
284 	if (mp->mod_thread == NULL) {
285 		fmd_error(EFMD_MOD_THR, "failed to create thread for %s", path);
286 		(void) pthread_mutex_unlock(&mp->mod_lock);
287 		fmd_module_rele(mp);
288 		return (NULL);
289 	}
290 
291 	/*
292 	 * At this point our module structure is nearly finished and its thread
293 	 * is starting execution in fmd_module_start() above, which will begin
294 	 * by blocking for mod_lock.  We now drop mod_lock and wait for either
295 	 * FMD_MOD_INIT or mod_error to be set before proceeding.
296 	 */
297 	while (!(mp->mod_flags & FMD_MOD_INIT) && mp->mod_error == 0)
298 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
299 
300 	/*
301 	 * If the module has failed to initialize, copy its errno to the errno
302 	 * of the caller, wait for it to unload, and then destroy it.
303 	 */
304 	if (!(mp->mod_flags & FMD_MOD_INIT)) {
305 		err = mp->mod_error;
306 		(void) pthread_mutex_unlock(&mp->mod_lock);
307 
308 		if (err == EFMD_CKPT_INVAL)
309 			fmd_ckpt_rename(mp); /* move aside bad checkpoint */
310 
311 		/*
312 		 * If we're in the background, keep quiet about failure to
313 		 * load because a handle wasn't registered: this is a module's
314 		 * way of telling us it didn't want to be loaded for some
315 		 * reason related to system configuration.  If we're in the
316 		 * foreground we log this too in order to inform developers.
317 		 */
318 		if (fmd.d_fg || err != EFMD_HDL_INIT) {
319 			fmd_error(EFMD_MOD_INIT, "failed to load %s: %s\n",
320 			    path, fmd_strerror(err));
321 		}
322 
323 		fmd_module_unload(mp);
324 		fmd_module_rele(mp);
325 
326 		(void) fmd_set_errno(err);
327 		return (NULL);
328 	}
329 
330 	(void) pthread_cond_broadcast(&mp->mod_cv);
331 	(void) pthread_mutex_unlock(&mp->mod_lock);
332 
333 	fmd_dprintf(FMD_DBG_MOD, "loaded module %s\n", mp->mod_name);
334 	return (mp);
335 }
336 
337 static void
338 fmd_module_untimeout(fmd_idspace_t *ids, id_t id, fmd_module_t *mp)
339 {
340 	void *arg = fmd_timerq_remove(fmd.d_timers, ids, id);
341 
342 	/*
343 	 * The root module calls fmd_timerq_install() directly and must take
344 	 * responsibility for any cleanup of timer arguments that is required.
345 	 * All other modules use fmd_modtimer_t's as the arg data; free them.
346 	 */
347 	if (arg != NULL && mp != fmd.d_rmod)
348 		fmd_free(arg, sizeof (fmd_modtimer_t));
349 }
350 
351 void
352 fmd_module_unload(fmd_module_t *mp)
353 {
354 	fmd_modtopo_t *mtp;
355 
356 	(void) pthread_mutex_lock(&mp->mod_lock);
357 
358 	if (mp->mod_flags & FMD_MOD_QUIT) {
359 		(void) pthread_mutex_unlock(&mp->mod_lock);
360 		return; /* module is already unloading */
361 	}
362 
363 	ASSERT(mp->mod_thread != NULL);
364 	mp->mod_flags |= FMD_MOD_QUIT;
365 
366 	if (mp->mod_queue != NULL)
367 		fmd_eventq_abort(mp->mod_queue);
368 
369 	/*
370 	 * Wait for the module's thread to stop processing events and call
371 	 * _fmd_fini() and exit.  We do this by waiting for FMD_MOD_FINI to be
372 	 * set if INIT was set, and then attempting to join with the thread.
373 	 */
374 	while ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI)) == FMD_MOD_INIT)
375 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
376 
377 	(void) pthread_cond_broadcast(&mp->mod_cv);
378 	(void) pthread_mutex_unlock(&mp->mod_lock);
379 
380 	fmd_thread_destroy(mp->mod_thread, FMD_THREAD_JOIN);
381 	mp->mod_thread = NULL;
382 
383 	/*
384 	 * Once the module is no longer active, clean up any data structures
385 	 * that are only required when the module is loaded.
386 	 */
387 	fmd_module_lock(mp);
388 
389 	if (mp->mod_timerids != NULL) {
390 		fmd_idspace_apply(mp->mod_timerids,
391 		    (void (*)())fmd_module_untimeout, mp);
392 
393 		fmd_idspace_destroy(mp->mod_timerids);
394 		mp->mod_timerids = NULL;
395 	}
396 
397 	if (mp->mod_threads != NULL) {
398 		fmd_idspace_destroy(mp->mod_threads);
399 		mp->mod_threads = NULL;
400 	}
401 
402 	(void) fmd_buf_hash_destroy(&mp->mod_bufs);
403 	fmd_serd_hash_destroy(&mp->mod_serds);
404 
405 	while ((mtp = fmd_list_next(&mp->mod_topolist)) != NULL) {
406 		fmd_list_delete(&mp->mod_topolist, mtp);
407 		fmd_topo_rele(mtp->mt_topo);
408 		fmd_free(mtp, sizeof (fmd_modtopo_t));
409 	}
410 
411 	fmd_module_unlock(mp);
412 	fmd_dprintf(FMD_DBG_MOD, "unloaded module %s\n", mp->mod_name);
413 }
414 
415 void
416 fmd_module_destroy(fmd_module_t *mp)
417 {
418 	fmd_conf_formal_t *cfp = mp->mod_argv;
419 	int i;
420 
421 	ASSERT(MUTEX_HELD(&mp->mod_lock));
422 
423 	if (mp->mod_thread != NULL) {
424 		(void) pthread_mutex_unlock(&mp->mod_lock);
425 		fmd_module_unload(mp);
426 		(void) pthread_mutex_lock(&mp->mod_lock);
427 	}
428 
429 	ASSERT(mp->mod_thread == NULL);
430 	ASSERT(mp->mod_refs == 0);
431 
432 	/*
433 	 * Once the module's thread is dead, we can safely remove the module
434 	 * from global visibility and by removing it from d_mod_list.  Any
435 	 * modhash pointers are already gone by virtue of mod_refs being zero.
436 	 */
437 	(void) pthread_mutex_lock(&fmd.d_mod_lock);
438 	fmd_list_delete(&fmd.d_mod_list, mp);
439 	(void) pthread_mutex_unlock(&fmd.d_mod_lock);
440 
441 	if (mp->mod_topo_current != NULL)
442 		fmd_topo_rele(mp->mod_topo_current);
443 
444 	if (mp->mod_nva_sleep.nva_ops != NULL)
445 		nv_alloc_fini(&mp->mod_nva_sleep);
446 	if (mp->mod_nva_nosleep.nva_ops != NULL)
447 		nv_alloc_fini(&mp->mod_nva_nosleep);
448 
449 	/*
450 	 * Once the module is no longer processing events and no longer visible
451 	 * through any program data structures, we can free all of its content.
452 	 */
453 	if (mp->mod_queue != NULL) {
454 		fmd_eventq_destroy(mp->mod_queue);
455 		mp->mod_queue = NULL;
456 	}
457 
458 	if (mp->mod_ustat != NULL) {
459 		(void) pthread_mutex_lock(&mp->mod_stats_lock);
460 		fmd_ustat_destroy(mp->mod_ustat);
461 		mp->mod_ustat = NULL;
462 		mp->mod_stats = NULL;
463 		(void) pthread_mutex_unlock(&mp->mod_stats_lock);
464 	}
465 
466 	for (i = 0; i < mp->mod_dictc; i++)
467 		fm_dc_closedict(mp->mod_dictv[i]);
468 
469 	fmd_free(mp->mod_dictv, sizeof (struct fm_dc_handle *) * mp->mod_dictc);
470 
471 	if (mp->mod_conf != NULL)
472 		fmd_conf_close(mp->mod_conf);
473 
474 	for (i = 0; i < mp->mod_argc; i++, cfp++) {
475 		fmd_strfree((char *)cfp->cf_name);
476 		fmd_strfree((char *)cfp->cf_default);
477 	}
478 
479 	fmd_free(mp->mod_argv, sizeof (fmd_conf_formal_t) * mp->mod_argc);
480 
481 	fmd_strfree(mp->mod_name);
482 	fmd_strfree(mp->mod_path);
483 	fmd_strfree(mp->mod_ckpt);
484 	nvlist_free(mp->mod_fmri);
485 	fmd_strfree(mp->mod_vers);
486 
487 	fmd_free(mp, sizeof (fmd_module_t));
488 }
489 
490 /*
491  * fmd_module_error() is called after the stack is unwound from a call to
492  * fmd_module_abort() to indicate that the module has failed.  The mod_error
493  * field is used to hold the error code of the first fatal error to the module.
494  * An EFMD_MOD_FAIL event is then created and sent to fmd-self-diagnosis.
495  */
496 static void
497 fmd_module_error(fmd_module_t *mp, int err)
498 {
499 	fmd_event_t *e;
500 	nvlist_t *nvl;
501 	char *class;
502 
503 	ASSERT(MUTEX_HELD(&mp->mod_lock));
504 	ASSERT(err != 0);
505 
506 	TRACE((FMD_DBG_MOD, "module aborted: err=%d", err));
507 
508 	if (mp->mod_error == 0)
509 		mp->mod_error = err;
510 
511 	if (mp == fmd.d_self)
512 		return; /* do not post event if fmd.d_self itself fails */
513 
514 	/*
515 	 * Send an error indicating the module has now failed to fmd.d_self.
516 	 * Since the error causing the failure has already been logged by
517 	 * fmd_api_xerror(), we do not need to bother logging this event.
518 	 * It only exists for the purpose of notifying fmd.d_self that it can
519 	 * close the case associated with this module because mod_error is set.
520 	 */
521 	nvl = fmd_protocol_moderror(mp, EFMD_MOD_FAIL, fmd_strerror(err));
522 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
523 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
524 	fmd_dispq_dispatch(fmd.d_disp, e, class);
525 }
526 
527 void
528 fmd_module_dispatch(fmd_module_t *mp, fmd_event_t *e)
529 {
530 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
531 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
532 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
533 	fmd_modtimer_t *t;
534 	fmd_topo_t *old_topo;
535 	volatile int err;
536 
537 	/*
538 	 * Before calling the appropriate module callback, enter the module as
539 	 * if by fmd_module_enter() and establish mod_jmpbuf for any aborts.
540 	 */
541 	(void) pthread_mutex_lock(&mp->mod_lock);
542 
543 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
544 	mp->mod_flags |= FMD_MOD_BUSY;
545 
546 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
547 		(void) pthread_mutex_lock(&mp->mod_lock);
548 		fmd_module_error(mp, err);
549 	}
550 
551 	(void) pthread_cond_broadcast(&mp->mod_cv);
552 	(void) pthread_mutex_unlock(&mp->mod_lock);
553 
554 	/*
555 	 * If it's the first time through fmd_module_dispatch(), call the
556 	 * appropriate module callback based on the event type.  If the call
557 	 * triggers an fmd_module_abort(), we'll return to setjmp() above with
558 	 * err set to a non-zero value and then bypass this before exiting.
559 	 */
560 	if (err == 0) {
561 		switch (ep->ev_type) {
562 		case FMD_EVT_PROTOCOL:
563 			ops->fmdo_recv(hdl, e, ep->ev_nvl, ep->ev_data);
564 			break;
565 		case FMD_EVT_TIMEOUT:
566 			t = ep->ev_data;
567 			ASSERT(t->mt_mod == mp);
568 			ops->fmdo_timeout(hdl, t->mt_id, t->mt_arg);
569 			break;
570 		case FMD_EVT_CLOSE:
571 			ops->fmdo_close(hdl, ep->ev_data);
572 			break;
573 		case FMD_EVT_STATS:
574 			ops->fmdo_stats(hdl);
575 			fmd_modstat_publish(mp);
576 			break;
577 		case FMD_EVT_GC:
578 			ops->fmdo_gc(hdl);
579 			break;
580 		case FMD_EVT_PUBLISH:
581 			fmd_case_publish(ep->ev_data, FMD_CASE_CURRENT);
582 			break;
583 		case FMD_EVT_TOPO:
584 			/*
585 			 * Save the pointer to the old topology and update
586 			 * the pointer with the updated topology.
587 			 * With this approach, other threads that reference the
588 			 * topology either
589 			 *  - finishes with old topology since
590 			 *	it is released after updating
591 			 *	mod_topo_current.
592 			 *  - or is blocked while mod_topo_current is updated.
593 			 */
594 			old_topo = mp->mod_topo_current;
595 			fmd_module_lock(mp);
596 			mp->mod_topo_current = (fmd_topo_t *)ep->ev_data;
597 			fmd_topo_addref(mp->mod_topo_current);
598 			fmd_module_unlock(mp);
599 			fmd_topo_rele(old_topo);
600 			ops->fmdo_topo(hdl, mp->mod_topo_current->ft_hdl);
601 			break;
602 		}
603 	}
604 
605 	fmd_module_exit(mp);
606 }
607 
608 int
609 fmd_module_transport(fmd_module_t *mp, fmd_xprt_t *xp, fmd_event_t *e)
610 {
611 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
612 	fmd_hdl_t *hdl = (fmd_hdl_t *)mp;
613 
614 	ASSERT(ep->ev_type == FMD_EVT_PROTOCOL);
615 	return (mp->mod_info->fmdi_ops->fmdo_send(hdl, xp, e, ep->ev_nvl));
616 }
617 
618 void
619 fmd_module_timeout(fmd_modtimer_t *t, id_t id, hrtime_t hrt)
620 {
621 	fmd_event_t *e;
622 
623 	t->mt_id = id; /* save id in case we need to delete from eventq */
624 	e = fmd_event_create(FMD_EVT_TIMEOUT, hrt, NULL, t);
625 	fmd_eventq_insert_at_time(t->mt_mod->mod_queue, e);
626 }
627 
628 /*
629  * Garbage collection is initiated by a timer callback once per day or at the
630  * request of fmadm.  Purge old SERD entries and send the module a GC event.
631  */
632 void
633 fmd_module_gc(fmd_module_t *mp)
634 {
635 	fmd_hdl_info_t *info;
636 	fmd_event_t *e;
637 
638 	if (mp->mod_error != 0)
639 		return; /* do not do anything if the module has failed */
640 
641 	fmd_module_lock(mp);
642 
643 	if ((info = mp->mod_info) != NULL) {
644 		fmd_serd_hash_apply(&mp->mod_serds,
645 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
646 	}
647 
648 	fmd_module_unlock(mp);
649 
650 	if (info != NULL) {
651 		e = fmd_event_create(FMD_EVT_GC, FMD_HRT_NOW, NULL, NULL);
652 		fmd_eventq_insert_at_head(mp->mod_queue, e);
653 	}
654 }
655 
656 void
657 fmd_module_trygc(fmd_module_t *mp)
658 {
659 	if (fmd_module_trylock(mp)) {
660 		fmd_serd_hash_apply(&mp->mod_serds,
661 		    (fmd_serd_eng_f *)fmd_serd_eng_gc, NULL);
662 		fmd_module_unlock(mp);
663 	}
664 }
665 
666 int
667 fmd_module_contains(fmd_module_t *mp, fmd_event_t *ep)
668 {
669 	fmd_case_t *cp;
670 	int rv = 0;
671 
672 	fmd_module_lock(mp);
673 
674 	for (cp = fmd_list_next(&mp->mod_cases);
675 	    cp != NULL; cp = fmd_list_next(cp)) {
676 		if ((rv = fmd_case_contains(cp, ep)) != 0)
677 			break;
678 	}
679 
680 	if (rv == 0)
681 		rv = fmd_serd_hash_contains(&mp->mod_serds, ep);
682 
683 	fmd_module_unlock(mp);
684 	return (rv);
685 }
686 
687 void
688 fmd_module_setdirty(fmd_module_t *mp)
689 {
690 	(void) pthread_mutex_lock(&mp->mod_lock);
691 	mp->mod_flags |= FMD_MOD_MDIRTY;
692 	(void) pthread_mutex_unlock(&mp->mod_lock);
693 }
694 
695 void
696 fmd_module_setcdirty(fmd_module_t *mp)
697 {
698 	(void) pthread_mutex_lock(&mp->mod_lock);
699 	mp->mod_flags |= FMD_MOD_CDIRTY;
700 	(void) pthread_mutex_unlock(&mp->mod_lock);
701 }
702 
703 void
704 fmd_module_clrdirty(fmd_module_t *mp)
705 {
706 	fmd_case_t *cp;
707 
708 	fmd_module_lock(mp);
709 
710 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
711 		for (cp = fmd_list_next(&mp->mod_cases);
712 		    cp != NULL; cp = fmd_list_next(cp))
713 			fmd_case_clrdirty(cp);
714 	}
715 
716 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
717 		fmd_serd_hash_apply(&mp->mod_serds,
718 		    (fmd_serd_eng_f *)fmd_serd_eng_clrdirty, NULL);
719 		fmd_buf_hash_commit(&mp->mod_bufs);
720 	}
721 
722 	(void) pthread_mutex_lock(&mp->mod_lock);
723 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
724 	(void) pthread_mutex_unlock(&mp->mod_lock);
725 
726 	fmd_module_unlock(mp);
727 }
728 
729 void
730 fmd_module_commit(fmd_module_t *mp)
731 {
732 	fmd_case_t *cp;
733 
734 	ASSERT(fmd_module_locked(mp));
735 
736 	if (mp->mod_flags & FMD_MOD_CDIRTY) {
737 		for (cp = fmd_list_next(&mp->mod_cases);
738 		    cp != NULL; cp = fmd_list_next(cp))
739 			fmd_case_commit(cp);
740 	}
741 
742 	if (mp->mod_flags & FMD_MOD_MDIRTY) {
743 		fmd_serd_hash_apply(&mp->mod_serds,
744 		    (fmd_serd_eng_f *)fmd_serd_eng_commit, NULL);
745 		fmd_buf_hash_commit(&mp->mod_bufs);
746 	}
747 
748 	(void) pthread_mutex_lock(&mp->mod_lock);
749 	mp->mod_flags &= ~(FMD_MOD_MDIRTY | FMD_MOD_CDIRTY);
750 	(void) pthread_mutex_unlock(&mp->mod_lock);
751 
752 	mp->mod_gen++;
753 }
754 
755 void
756 fmd_module_lock(fmd_module_t *mp)
757 {
758 	pthread_t self = pthread_self();
759 
760 	(void) pthread_mutex_lock(&mp->mod_lock);
761 
762 	while (mp->mod_flags & FMD_MOD_LOCK) {
763 		if (mp->mod_owner != self)
764 			(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
765 		else
766 			fmd_panic("recursive module lock of %p\n", (void *)mp);
767 	}
768 
769 	mp->mod_owner = self;
770 	mp->mod_flags |= FMD_MOD_LOCK;
771 
772 	(void) pthread_cond_broadcast(&mp->mod_cv);
773 	(void) pthread_mutex_unlock(&mp->mod_lock);
774 }
775 
776 void
777 fmd_module_unlock(fmd_module_t *mp)
778 {
779 	(void) pthread_mutex_lock(&mp->mod_lock);
780 
781 	ASSERT(mp->mod_owner == pthread_self());
782 	ASSERT(mp->mod_flags & FMD_MOD_LOCK);
783 
784 	mp->mod_owner = 0;
785 	mp->mod_flags &= ~FMD_MOD_LOCK;
786 
787 	(void) pthread_cond_broadcast(&mp->mod_cv);
788 	(void) pthread_mutex_unlock(&mp->mod_lock);
789 }
790 
791 int
792 fmd_module_trylock(fmd_module_t *mp)
793 {
794 	(void) pthread_mutex_lock(&mp->mod_lock);
795 
796 	if (mp->mod_flags & FMD_MOD_LOCK) {
797 		(void) pthread_mutex_unlock(&mp->mod_lock);
798 		return (0);
799 	}
800 
801 	mp->mod_owner = pthread_self();
802 	mp->mod_flags |= FMD_MOD_LOCK;
803 
804 	(void) pthread_cond_broadcast(&mp->mod_cv);
805 	(void) pthread_mutex_unlock(&mp->mod_lock);
806 
807 	return (1);
808 }
809 
810 int
811 fmd_module_locked(fmd_module_t *mp)
812 {
813 	return ((mp->mod_flags & FMD_MOD_LOCK) &&
814 	    mp->mod_owner == pthread_self());
815 }
816 
817 int
818 fmd_module_enter(fmd_module_t *mp, void (*func)(fmd_hdl_t *))
819 {
820 	volatile int err;
821 
822 	(void) pthread_mutex_lock(&mp->mod_lock);
823 
824 	ASSERT(!(mp->mod_flags & FMD_MOD_BUSY));
825 	mp->mod_flags |= FMD_MOD_BUSY;
826 
827 	if ((err = setjmp(mp->mod_jmpbuf)) != 0) {
828 		(void) pthread_mutex_lock(&mp->mod_lock);
829 		fmd_module_error(mp, err);
830 	}
831 
832 	(void) pthread_cond_broadcast(&mp->mod_cv);
833 	(void) pthread_mutex_unlock(&mp->mod_lock);
834 
835 	/*
836 	 * If it's the first time through fmd_module_enter(), call the provided
837 	 * function on the module.  If no fmd_module_abort() results, we will
838 	 * fall through and return zero.  Otherwise we'll longjmp with an err,
839 	 * return to the setjmp() above, and return the error to our caller.
840 	 */
841 	if (err == 0 && func != NULL)
842 		(*func)((fmd_hdl_t *)mp);
843 
844 	return (err);
845 }
846 
847 void
848 fmd_module_exit(fmd_module_t *mp)
849 {
850 	(void) pthread_mutex_lock(&mp->mod_lock);
851 
852 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
853 	mp->mod_flags &= ~FMD_MOD_BUSY;
854 
855 	(void) pthread_cond_broadcast(&mp->mod_cv);
856 	(void) pthread_mutex_unlock(&mp->mod_lock);
857 }
858 
859 /*
860  * If the client.error policy has been set by a developer, stop or dump core
861  * based on the policy; if we stop and are resumed we'll continue and execute
862  * the default behavior to discard events in fmd_module_start().  If the caller
863  * is the primary module thread, we reach this state by longjmp'ing back to
864  * fmd_module_enter(), above.  If the caller is an auxiliary thread, we cancel
865  * ourself and arrange for the primary thread to call fmd_module_abort().
866  */
867 void
868 fmd_module_abort(fmd_module_t *mp, int err)
869 {
870 	uint_t policy = FMD_CERROR_UNLOAD;
871 	pthread_t tid = pthread_self();
872 
873 	(void) fmd_conf_getprop(fmd.d_conf, "client.error", &policy);
874 
875 	if (policy == FMD_CERROR_STOP) {
876 		fmd_error(err, "stopping after %s in client %s (%p)\n",
877 		    fmd_errclass(err), mp->mod_name, (void *)mp);
878 		(void) raise(SIGSTOP);
879 	} else if (policy == FMD_CERROR_ABORT) {
880 		fmd_panic("aborting due to %s in client %s (%p)\n",
881 		    fmd_errclass(err), mp->mod_name, (void *)mp);
882 	}
883 
884 	/*
885 	 * If the caller is an auxiliary thread, cancel the current thread.  We
886 	 * prefer to cancel because it affords developers the option of using
887 	 * the pthread_cleanup* APIs.  If cancellations have been disabled,
888 	 * fall through to forcing the current thread to exit.  In either case
889 	 * we update mod_error (if zero) to enter the failed state.  Once that
890 	 * is set, further events received by the module will be discarded.
891 	 *
892 	 * We also set the FMD_MOD_FAIL bit, indicating an unrecoverable error.
893 	 * When an auxiliary thread fails, the module is left in a delicate
894 	 * state where it is likely not able to continue execution (even to
895 	 * execute its _fmd_fini() routine) because our caller may hold locks
896 	 * that are private to the module and can no longer be released.  The
897 	 * FMD_MOD_FAIL bit forces fmd_api_module_lock() to abort if any other
898 	 * module threads reach an API call, in an attempt to get them to exit.
899 	 */
900 	if (tid != mp->mod_thread->thr_tid) {
901 		(void) pthread_mutex_lock(&mp->mod_lock);
902 
903 		if (mp->mod_error == 0)
904 			mp->mod_error = err;
905 
906 		mp->mod_flags |= FMD_MOD_FAIL;
907 		(void) pthread_mutex_unlock(&mp->mod_lock);
908 
909 		(void) pthread_cancel(tid);
910 		pthread_exit(NULL);
911 	}
912 
913 	ASSERT(mp->mod_flags & FMD_MOD_BUSY);
914 	longjmp(mp->mod_jmpbuf, err);
915 }
916 
917 void
918 fmd_module_hold(fmd_module_t *mp)
919 {
920 	(void) pthread_mutex_lock(&mp->mod_lock);
921 
922 	TRACE((FMD_DBG_MOD, "hold %p (%s/%u)\n",
923 	    (void *)mp, mp->mod_name, mp->mod_refs));
924 
925 	mp->mod_refs++;
926 	ASSERT(mp->mod_refs != 0);
927 
928 	(void) pthread_mutex_unlock(&mp->mod_lock);
929 }
930 
931 void
932 fmd_module_rele(fmd_module_t *mp)
933 {
934 	(void) pthread_mutex_lock(&mp->mod_lock);
935 
936 	TRACE((FMD_DBG_MOD, "rele %p (%s/%u)\n",
937 	    (void *)mp, mp->mod_name, mp->mod_refs));
938 
939 	ASSERT(mp->mod_refs != 0);
940 
941 	if (--mp->mod_refs == 0)
942 		fmd_module_destroy(mp);
943 	else
944 		(void) pthread_mutex_unlock(&mp->mod_lock);
945 }
946 
947 /*
948  * Wrapper around libdiagcode's fm_dc_opendict() to load module dictionaries.
949  * If the dictionary open is successful, the new dictionary is added to the
950  * mod_dictv[] array and mod_codelen is updated with the new maximum length.
951  */
952 int
953 fmd_module_dc_opendict(fmd_module_t *mp, const char *dict)
954 {
955 	struct fm_dc_handle *dcp, **dcv;
956 	char *dictdir, *dictnam, *p;
957 	size_t len;
958 
959 	ASSERT(fmd_module_locked(mp));
960 
961 	dictnam = alloca(strlen(dict) + 1);
962 	(void) strcpy(dictnam, fmd_strbasename(dict));
963 
964 	if ((p = strrchr(dictnam, '.')) != NULL &&
965 	    strcmp(p, ".dict") == 0)
966 		*p = '\0'; /* eliminate any trailing .dict suffix */
967 
968 	/*
969 	 * If 'dict' is an absolute path, dictdir = $rootdir/`dirname dict`
970 	 * If 'dict' is not an absolute path, dictdir = $dictdir/`dirname dict`
971 	 */
972 	if (dict[0] == '/') {
973 		len = strlen(fmd.d_rootdir) + strlen(dict) + 1;
974 		dictdir = alloca(len);
975 		(void) snprintf(dictdir, len, "%s%s", fmd.d_rootdir, dict);
976 		(void) fmd_strdirname(dictdir);
977 	} else {
978 		(void) fmd_conf_getprop(fmd.d_conf, "dictdir", &p);
979 		len = strlen(fmd.d_rootdir) + strlen(p) + strlen(dict) + 3;
980 		dictdir = alloca(len);
981 		(void) snprintf(dictdir, len,
982 		    "%s/%s/%s", fmd.d_rootdir, p, dict);
983 		(void) fmd_strdirname(dictdir);
984 	}
985 
986 	fmd_dprintf(FMD_DBG_MOD, "module %s opening %s -> %s/%s.dict\n",
987 	    mp->mod_name, dict, dictdir, dictnam);
988 
989 	if ((dcp = fm_dc_opendict(FM_DC_VERSION, dictdir, dictnam)) == NULL)
990 		return (-1); /* errno is set for us */
991 
992 	dcv = fmd_alloc(sizeof (dcp) * (mp->mod_dictc + 1), FMD_SLEEP);
993 	bcopy(mp->mod_dictv, dcv, sizeof (dcp) * mp->mod_dictc);
994 	fmd_free(mp->mod_dictv, sizeof (dcp) * mp->mod_dictc);
995 	mp->mod_dictv = dcv;
996 	mp->mod_dictv[mp->mod_dictc++] = dcp;
997 
998 	len = fm_dc_codelen(dcp);
999 	mp->mod_codelen = MAX(mp->mod_codelen, len);
1000 
1001 	return (0);
1002 }
1003 
1004 /*
1005  * Wrapper around libdiagcode's fm_dc_key2code() that examines all the module's
1006  * dictionaries.  We adhere to the libdiagcode return values and semantics.
1007  */
1008 int
1009 fmd_module_dc_key2code(fmd_module_t *mp,
1010     char *const keys[], char *code, size_t codelen)
1011 {
1012 	int i, err;
1013 
1014 	for (i = 0; i < mp->mod_dictc; i++) {
1015 		if ((err = fm_dc_key2code(mp->mod_dictv[i], (const char **)keys,
1016 		    code, codelen)) == 0 || errno != ENOMSG)
1017 			return (err);
1018 	}
1019 
1020 	return (fmd_set_errno(ENOMSG));
1021 }
1022 
1023 fmd_modhash_t *
1024 fmd_modhash_create(void)
1025 {
1026 	fmd_modhash_t *mhp = fmd_alloc(sizeof (fmd_modhash_t), FMD_SLEEP);
1027 
1028 	(void) pthread_rwlock_init(&mhp->mh_lock, NULL);
1029 	mhp->mh_hashlen = fmd.d_str_buckets;
1030 	mhp->mh_hash = fmd_zalloc(sizeof (void *) * mhp->mh_hashlen, FMD_SLEEP);
1031 	mhp->mh_nelems = 0;
1032 
1033 	return (mhp);
1034 }
1035 
1036 void
1037 fmd_modhash_destroy(fmd_modhash_t *mhp)
1038 {
1039 	fmd_module_t *mp, *nmp;
1040 	uint_t i;
1041 
1042 	for (i = 0; i < mhp->mh_hashlen; i++) {
1043 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = nmp) {
1044 			nmp = mp->mod_next;
1045 			mp->mod_next = NULL;
1046 			fmd_module_rele(mp);
1047 		}
1048 	}
1049 
1050 	fmd_free(mhp->mh_hash, sizeof (void *) * mhp->mh_hashlen);
1051 	(void) pthread_rwlock_destroy(&mhp->mh_lock);
1052 	fmd_free(mhp, sizeof (fmd_modhash_t));
1053 }
1054 
1055 static void
1056 fmd_modhash_loaddir(fmd_modhash_t *mhp, const char *dir,
1057     const fmd_modops_t *ops, const char *suffix)
1058 {
1059 	char path[PATH_MAX];
1060 	struct dirent *dp;
1061 	const char *p;
1062 	DIR *dirp;
1063 
1064 	if ((dirp = opendir(dir)) == NULL)
1065 		return; /* failed to open directory; just skip it */
1066 
1067 	while ((dp = readdir(dirp)) != NULL) {
1068 		if (dp->d_name[0] == '.')
1069 			continue; /* skip "." and ".." */
1070 
1071 		p = strrchr(dp->d_name, '.');
1072 
1073 		if (p != NULL && strcmp(p, ".conf") == 0)
1074 			continue; /* skip .conf files */
1075 
1076 		if (suffix != NULL && (p == NULL || strcmp(p, suffix) != 0))
1077 			continue; /* skip files with the wrong suffix */
1078 
1079 		(void) snprintf(path, sizeof (path), "%s/%s", dir, dp->d_name);
1080 		(void) fmd_modhash_load(mhp, path, ops);
1081 	}
1082 
1083 	(void) closedir(dirp);
1084 }
1085 
1086 void
1087 fmd_modhash_loadall(fmd_modhash_t *mhp, const fmd_conf_path_t *pap,
1088     const fmd_modops_t *ops, const char *suffix)
1089 {
1090 	int i;
1091 
1092 	for (i = 0; i < pap->cpa_argc; i++)
1093 		fmd_modhash_loaddir(mhp, pap->cpa_argv[i], ops, suffix);
1094 }
1095 
1096 void
1097 fmd_modhash_apply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1098 {
1099 	fmd_module_t *mp, *np;
1100 	uint_t i;
1101 
1102 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1103 
1104 	for (i = 0; i < mhp->mh_hashlen; i++) {
1105 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1106 			np = mp->mod_next;
1107 			func(mp);
1108 		}
1109 	}
1110 
1111 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1112 }
1113 
1114 void
1115 fmd_modhash_tryapply(fmd_modhash_t *mhp, void (*func)(fmd_module_t *))
1116 {
1117 	fmd_module_t *mp, *np;
1118 	uint_t i;
1119 
1120 	if (mhp == NULL || pthread_rwlock_tryrdlock(&mhp->mh_lock) != 0)
1121 		return; /* not initialized or couldn't grab lock */
1122 
1123 	for (i = 0; i < mhp->mh_hashlen; i++) {
1124 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = np) {
1125 			np = mp->mod_next;
1126 			func(mp);
1127 		}
1128 	}
1129 
1130 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1131 }
1132 
1133 void
1134 fmd_modhash_dispatch(fmd_modhash_t *mhp, fmd_event_t *ep)
1135 {
1136 	fmd_module_t *mp;
1137 	uint_t i;
1138 
1139 	fmd_event_hold(ep);
1140 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1141 
1142 	for (i = 0; i < mhp->mh_hashlen; i++) {
1143 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next) {
1144 			/*
1145 			 * If FMD_MOD_INIT is set but MOD_FINI, MOD_QUIT, and
1146 			 * mod_error are all zero, then the module is active:
1147 			 * enqueue the event in the corresponding event queue.
1148 			 */
1149 			(void) pthread_mutex_lock(&mp->mod_lock);
1150 
1151 			if ((mp->mod_flags & (FMD_MOD_INIT | FMD_MOD_FINI |
1152 			    FMD_MOD_QUIT)) == FMD_MOD_INIT && !mp->mod_error) {
1153 
1154 				/*
1155 				 * If the event we're dispatching is of type
1156 				 * FMD_EVT_TOPO and there are already redundant
1157 				 * FMD_EVT_TOPO events in this module's queue,
1158 				 * then drop those before adding the new one.
1159 				 */
1160 				if (FMD_EVENT_TYPE(ep) == FMD_EVT_TOPO)
1161 					fmd_eventq_drop_topo(mp->mod_queue);
1162 
1163 				fmd_eventq_insert_at_time(mp->mod_queue, ep);
1164 
1165 			}
1166 			(void) pthread_mutex_unlock(&mp->mod_lock);
1167 		}
1168 	}
1169 
1170 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1171 	fmd_event_rele(ep);
1172 }
1173 
1174 fmd_module_t *
1175 fmd_modhash_lookup(fmd_modhash_t *mhp, const char *name)
1176 {
1177 	fmd_module_t *mp;
1178 	uint_t h;
1179 
1180 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
1181 	h = fmd_strhash(name) % mhp->mh_hashlen;
1182 
1183 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1184 		if (strcmp(name, mp->mod_name) == 0)
1185 			break;
1186 	}
1187 
1188 	if (mp != NULL)
1189 		fmd_module_hold(mp);
1190 	else
1191 		(void) fmd_set_errno(EFMD_MOD_NOMOD);
1192 
1193 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1194 	return (mp);
1195 }
1196 
1197 fmd_module_t *
1198 fmd_modhash_load(fmd_modhash_t *mhp, const char *path, const fmd_modops_t *ops)
1199 {
1200 	char name[PATH_MAX], *p;
1201 	fmd_module_t *mp;
1202 	int tries = 0;
1203 	uint_t h;
1204 
1205 	(void) strlcpy(name, fmd_strbasename(path), sizeof (name));
1206 	if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".so") == 0)
1207 		*p = '\0'; /* strip trailing .so from any module name */
1208 
1209 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1210 	h = fmd_strhash(name) % mhp->mh_hashlen;
1211 
1212 	/*
1213 	 * First check to see if a module is already present in the hash table
1214 	 * for this name.  If so, the module is already loaded: skip it.
1215 	 */
1216 	for (mp = mhp->mh_hash[h]; mp != NULL; mp = mp->mod_next) {
1217 		if (strcmp(name, mp->mod_name) == 0)
1218 			break;
1219 	}
1220 
1221 	if (mp != NULL) {
1222 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1223 		(void) fmd_set_errno(EFMD_MOD_LOADED);
1224 		return (NULL);
1225 	}
1226 
1227 	/*
1228 	 * fmd_module_create() will return a held (as if by fmd_module_hold())
1229 	 * module.  We leave this hold in place to correspond to the hash-in.
1230 	 */
1231 	while ((mp = fmd_module_create(path, ops)) == NULL) {
1232 		if (tries++ != 0 || errno != EFMD_CKPT_INVAL) {
1233 			(void) pthread_rwlock_unlock(&mhp->mh_lock);
1234 			return (NULL); /* errno is set for us */
1235 		}
1236 	}
1237 
1238 	mp->mod_hash = mhp;
1239 	mp->mod_next = mhp->mh_hash[h];
1240 
1241 	mhp->mh_hash[h] = mp;
1242 	mhp->mh_nelems++;
1243 
1244 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1245 	return (mp);
1246 }
1247 
1248 int
1249 fmd_modhash_unload(fmd_modhash_t *mhp, const char *name)
1250 {
1251 	fmd_module_t *mp, **pp;
1252 	uint_t h;
1253 
1254 	(void) pthread_rwlock_wrlock(&mhp->mh_lock);
1255 	h = fmd_strhash(name) % mhp->mh_hashlen;
1256 	pp = &mhp->mh_hash[h];
1257 
1258 	for (mp = *pp; mp != NULL; mp = mp->mod_next) {
1259 		if (strcmp(name, mp->mod_name) == 0)
1260 			break;
1261 		else
1262 			pp = &mp->mod_next;
1263 	}
1264 
1265 	if (mp == NULL) {
1266 		(void) pthread_rwlock_unlock(&mhp->mh_lock);
1267 		return (fmd_set_errno(EFMD_MOD_NOMOD));
1268 	}
1269 
1270 	*pp = mp->mod_next;
1271 	mp->mod_next = NULL;
1272 
1273 	ASSERT(mhp->mh_nelems != 0);
1274 	mhp->mh_nelems--;
1275 
1276 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
1277 
1278 	fmd_module_unload(mp);
1279 	fmd_module_rele(mp);
1280 
1281 	return (0);
1282 }
1283 
1284 void
1285 fmd_modstat_publish(fmd_module_t *mp)
1286 {
1287 	(void) pthread_mutex_lock(&mp->mod_lock);
1288 
1289 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1290 	mp->mod_flags |= FMD_MOD_STPUB;
1291 	(void) pthread_cond_broadcast(&mp->mod_cv);
1292 
1293 	while (mp->mod_flags & FMD_MOD_STPUB)
1294 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1295 
1296 	(void) pthread_mutex_unlock(&mp->mod_lock);
1297 }
1298 
1299 int
1300 fmd_modstat_snapshot(fmd_module_t *mp, fmd_ustat_snap_t *uss)
1301 {
1302 	fmd_event_t *e;
1303 	int err;
1304 
1305 	/*
1306 	 * Grab the module lock and wait for the STSUB bit to be clear.  Then
1307 	 * set it to indicate we are a subscriber and everyone else must wait.
1308 	 */
1309 	(void) pthread_mutex_lock(&mp->mod_lock);
1310 
1311 	while (mp->mod_error == 0 && (mp->mod_flags & FMD_MOD_STSUB))
1312 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1313 
1314 	if (mp->mod_error != 0) {
1315 		(void) pthread_mutex_unlock(&mp->mod_lock);
1316 		return (fmd_set_errno(EFMD_HDL_ABORT));
1317 	}
1318 
1319 	mp->mod_flags |= FMD_MOD_STSUB;
1320 	(void) pthread_cond_broadcast(&mp->mod_cv);
1321 	(void) pthread_mutex_unlock(&mp->mod_lock);
1322 
1323 	/*
1324 	 * Create a stats pseudo-event and dispatch it to the module, forcing
1325 	 * it to next execute its custom snapshot routine (or the empty one).
1326 	 */
1327 	e = fmd_event_create(FMD_EVT_STATS, FMD_HRT_NOW, NULL, NULL);
1328 	fmd_eventq_insert_at_head(mp->mod_queue, e);
1329 
1330 	/*
1331 	 * Grab the module lock and then wait on mod_cv for STPUB to be set,
1332 	 * indicating the snapshot routine is completed and the module is idle.
1333 	 */
1334 	(void) pthread_mutex_lock(&mp->mod_lock);
1335 
1336 	while (mp->mod_error == 0 && !(mp->mod_flags & FMD_MOD_STPUB)) {
1337 		struct timespec tms;
1338 
1339 		(void) pthread_cond_wait(&mp->mod_cv, &mp->mod_lock);
1340 		(void) pthread_mutex_unlock(&mp->mod_lock);
1341 		tms.tv_sec = 0;
1342 		tms.tv_nsec = 10000000;
1343 		(void) nanosleep(&tms, NULL);
1344 		(void) pthread_mutex_lock(&mp->mod_lock);
1345 	}
1346 
1347 	if (mp->mod_error != 0) {
1348 		(void) pthread_mutex_unlock(&mp->mod_lock);
1349 		return (fmd_set_errno(EFMD_HDL_ABORT));
1350 	}
1351 
1352 	(void) pthread_cond_broadcast(&mp->mod_cv);
1353 	(void) pthread_mutex_unlock(&mp->mod_lock);
1354 
1355 	/*
1356 	 * Update ms_snaptime and take the actual snapshot of the various
1357 	 * statistics while the module is quiescent and waiting for us.
1358 	 */
1359 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
1360 
1361 	if (mp->mod_stats != NULL) {
1362 		mp->mod_stats->ms_snaptime.fmds_value.ui64 = gethrtime();
1363 		err = fmd_ustat_snapshot(mp->mod_ustat, uss);
1364 	} else
1365 		err = fmd_set_errno(EFMD_HDL_ABORT);
1366 
1367 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
1368 
1369 	/*
1370 	 * With the snapshot complete, grab the module lock and clear both
1371 	 * STSUB and STPUB, permitting everyone to wake up and continue.
1372 	 */
1373 	(void) pthread_mutex_lock(&mp->mod_lock);
1374 
1375 	ASSERT(mp->mod_flags & FMD_MOD_STSUB);
1376 	ASSERT(mp->mod_flags & FMD_MOD_STPUB);
1377 	mp->mod_flags &= ~(FMD_MOD_STSUB | FMD_MOD_STPUB);
1378 
1379 	(void) pthread_cond_broadcast(&mp->mod_cv);
1380 	(void) pthread_mutex_unlock(&mp->mod_lock);
1381 
1382 	return (err);
1383 }
1384 
1385 struct topo_hdl *
1386 fmd_module_topo_hold(fmd_module_t *mp)
1387 {
1388 	fmd_modtopo_t *mtp;
1389 
1390 	ASSERT(fmd_module_locked(mp));
1391 
1392 	mtp = fmd_zalloc(sizeof (fmd_modtopo_t), FMD_SLEEP);
1393 	mtp->mt_topo = mp->mod_topo_current;
1394 	fmd_topo_addref(mtp->mt_topo);
1395 	fmd_list_prepend(&mp->mod_topolist, mtp);
1396 
1397 	return (mtp->mt_topo->ft_hdl);
1398 }
1399 
1400 int
1401 fmd_module_topo_rele(fmd_module_t *mp, struct topo_hdl *hdl)
1402 {
1403 	fmd_modtopo_t *mtp;
1404 
1405 	ASSERT(fmd_module_locked(mp));
1406 
1407 	for (mtp = fmd_list_next(&mp->mod_topolist); mtp != NULL;
1408 	    mtp = fmd_list_next(mtp)) {
1409 		if (mtp->mt_topo->ft_hdl == hdl)
1410 			break;
1411 	}
1412 
1413 	if (mtp == NULL)
1414 		return (-1);
1415 
1416 	fmd_list_delete(&mp->mod_topolist, mtp);
1417 	fmd_topo_rele(mtp->mt_topo);
1418 	fmd_free(mtp, sizeof (fmd_modtopo_t));
1419 	return (0);
1420 }
1421