xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd_xprt.c (revision 24f5a37652e188ebdcdd6da454511686935025df)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * FMD Transport Subsystem
28  *
29  * A transport module uses some underlying mechanism to transport events.
30  * This mechanism may use any underlying link-layer protocol and may support
31  * additional link-layer packets unrelated to FMA.  Some appropriate link-
32  * layer mechanism to create the underlying connection is expected to be
33  * called prior to calling fmd_xprt_open() itself.  Alternatively, a transport
34  * may be created in the suspended state by specifying the FMD_XPRT_SUSPENDED
35  * flag as part of the call to fmd_xprt_open(), and then may be resumed later.
36  * The underlying transport mechanism is *required* to provide ordering: that
37  * is, the sequences of bytes written across the transport must be read by
38  * the remote peer in the order that they are written, even across separate
39  * calls to fmdo_send().  As an example, the Internet TCP protocol would be
40  * a valid transport as it guarantees ordering, whereas the Internet UDP
41  * protocol would not because UDP datagrams may be delivered in any order
42  * as a result of delays introduced when datagrams pass through routers.
43  *
44  * Similar to sending events, a transport module receives events that are from
45  * its peer remote endpoint using some transport-specific mechanism that is
46  * unknown to FMD.  As each event is received, the transport module is
47  * responsible for constructing a valid nvlist_t object from the data and then
48  * calling fmd_xprt_post() to post the event to the containing FMD's dispatch
49  * queue, making it available to all local modules that are not transport
50  * modules that have subscribed to the event.
51  *
52  * The following state machine is used for each transport.  The initial state
53  * is either SYN, ACK, or RUN, depending on the flags specified to xprt_create.
54  *
55  *       FMD_XPRT_ACCEPT   !FMD_XPRT_ACCEPT
56  *             |                 |
57  * waiting  +--v--+           +--v--+  waiting
58  * for syn  | SYN |--+     --+| ACK |  for ack
59  * event    +-----+   \   /   +-----+  event
60  *             |       \ /       |
61  * drop all +--v--+     X     +--v--+  send subscriptions,
62  * events   | ERR |<---+ +--->| SUB |  recv subscriptions,
63  *          +-----+           +-----+  wait for run event
64  *             ^                 |
65  *             |     +-----+     |
66  *             +-----| RUN |<----+
67  *                   +--^--+
68  *                      |
69  *               FMD_XPRT_RDONLY
70  *
71  * When fmd_xprt_open() is called without FMD_XPRT_ACCEPT, the Common Transport
72  * Layer enqueues a "syn" event for the module in its event queue and sets the
73  * state to ACK.  In state ACK, we are waiting for the transport to get an
74  * "ack" event and call fmd_xprt_post() on this event.  Other events will be
75  * discarded.  If an "ack" is received, we transition to state SUB.  If a
76  * configurable timeout occurs or if the "ack" is invalid (e.g. invalid version
77  * exchange), we transition to state ERR.  Once in state ERR, no further
78  * operations are valid except fmd_xprt_close() and fmd_xprt_error() will
79  * return a non-zero value to the caller indicating the transport has failed.
80  *
81  * When fmd_xprt_open() is called with FMD_XPRT_ACCEPT, the Common Transport
82  * Layer assumes this transport is being used to accept a virtual connection
83  * from a remote peer that is sending a "syn", and sets the initial state to
84  * SYN.  In this state, the transport waits for a "syn" event, validates it,
85  * and then transitions to state SUB if it is valid or state ERR if it is not.
86  *
87  * Once in state SUB, the transport module is expected to receive a sequence of
88  * zero or more "subscribe" events from the remote peer, followed by a "run"
89  * event.  Once in state RUN, the transport is active and any events can be
90  * sent or received.  The transport module is free to call fmd_xprt_close()
91  * from any state.  The fmd_xprt_error() function will return zero if the
92  * transport is not in the ERR state, or non-zero if it is in the ERR state.
93  *
94  * Once the state machine reaches RUN, other FMA protocol events can be sent
95  * and received across the transport in addition to the various control events.
96  *
97  * Table of Common Transport Layer Control Events
98  * ==============================================
99  *
100  * FMA Class                     Payload
101  * ---------                     -------
102  * resource.fm.xprt.uuclose      string (uuid of case)
103  * resource.fm.xprt.uuresolved   string (uuid of case)
104  * resource.fm.xprt.updated      string (uuid of case)
105  * resource.fm.xprt.subscribe    string (class pattern)
106  * resource.fm.xprt.unsubscribe  string (class pattern)
107  * resource.fm.xprt.unsuback     string (class pattern)
108  * resource.fm.xprt.syn          version information
109  * resource.fm.xprt.ack          version information
110  * resource.fm.xprt.run          version information
111  *
112  * Control events are used to add and delete proxy subscriptions on the remote
113  * transport peer module, and to set up connections.  When a "syn" event is
114  * sent, FMD will include in the payload the highest version of the FMA event
115  * protocol that is supported by the sender.  When a "syn" event is received,
116  * the receiving FMD will use the minimum of this version and its version of
117  * the protocol, and reply with this new minimum version in the "ack" event.
118  * The receiver will then use this new minimum for subsequent event semantics.
119  */
120 
121 #include <sys/fm/protocol.h>
122 #include <strings.h>
123 #include <limits.h>
124 
125 #include <fmd_alloc.h>
126 #include <fmd_error.h>
127 #include <fmd_conf.h>
128 #include <fmd_subr.h>
129 #include <fmd_string.h>
130 #include <fmd_protocol.h>
131 #include <fmd_thread.h>
132 #include <fmd_eventq.h>
133 #include <fmd_dispq.h>
134 #include <fmd_ctl.h>
135 #include <fmd_log.h>
136 #include <fmd_ustat.h>
137 #include <fmd_case.h>
138 #include <fmd_api.h>
139 #include <fmd_fmri.h>
140 #include <fmd_asru.h>
141 #include <fmd_xprt.h>
142 
143 #include <fmd.h>
144 
145 /*
146  * The states shown above in the transport state machine diagram are encoded
147  * using arrays of class patterns and a corresponding action function.  These
148  * arrays are then passed to fmd_xprt_transition() to change transport states.
149  */
150 
151 const fmd_xprt_rule_t _fmd_xprt_state_syn[] = {
152 { "resource.fm.xprt.syn", fmd_xprt_event_syn },
153 { "*", fmd_xprt_event_error },
154 { NULL, NULL }
155 };
156 
157 const fmd_xprt_rule_t _fmd_xprt_state_ack[] = {
158 { "resource.fm.xprt.ack", fmd_xprt_event_ack },
159 { "*", fmd_xprt_event_error },
160 };
161 
162 const fmd_xprt_rule_t _fmd_xprt_state_err[] = {
163 { "*", fmd_xprt_event_drop },
164 { NULL, NULL }
165 };
166 
167 const fmd_xprt_rule_t _fmd_xprt_state_sub[] = {
168 { "resource.fm.xprt.subscribe", fmd_xprt_event_sub },
169 { "resource.fm.xprt.run", fmd_xprt_event_run },
170 { "resource.fm.xprt.*", fmd_xprt_event_error },
171 { "*", fmd_xprt_event_drop },
172 { NULL, NULL }
173 };
174 
175 const fmd_xprt_rule_t _fmd_xprt_state_run[] = {
176 { "resource.fm.xprt.subscribe", fmd_xprt_event_sub },
177 { "resource.fm.xprt.unsubscribe", fmd_xprt_event_unsub },
178 { "resource.fm.xprt.unsuback", fmd_xprt_event_unsuback },
179 { "resource.fm.xprt.uuclose", fmd_xprt_event_uuclose },
180 { "resource.fm.xprt.uuresolved", fmd_xprt_event_uuresolved },
181 { "resource.fm.xprt.updated", fmd_xprt_event_updated },
182 { "resource.fm.xprt.*", fmd_xprt_event_error },
183 { NULL, NULL }
184 };
185 
186 /*
187  * Template for per-transport statistics installed by fmd on behalf of each
188  * transport.  These are used to initialize the per-transport xi_stats.  For
189  * each statistic, the name is prepended with "fmd.xprt.%u", where %u is the
190  * transport ID (xi_id) and then are inserted into the per-module stats hash.
191  * The values in this array must match fmd_xprt_stat_t from <fmd_xprt.h>.
192  */
193 static const fmd_xprt_stat_t _fmd_xprt_stat_tmpl = {
194 {
195 { "dispatched", FMD_TYPE_UINT64, "total events dispatched to transport" },
196 { "dequeued", FMD_TYPE_UINT64, "total events dequeued by transport" },
197 { "prdequeued", FMD_TYPE_UINT64, "protocol events dequeued by transport" },
198 { "dropped", FMD_TYPE_UINT64, "total events dropped on queue overflow" },
199 { "wcnt", FMD_TYPE_UINT32, "count of events waiting on queue" },
200 { "wtime", FMD_TYPE_TIME, "total wait time on queue" },
201 { "wlentime", FMD_TYPE_TIME, "total wait length * time product" },
202 { "wlastupdate", FMD_TYPE_TIME, "hrtime of last wait queue update" },
203 { "dtime", FMD_TYPE_TIME, "total processing time after dequeue" },
204 { "dlastupdate", FMD_TYPE_TIME, "hrtime of last event dequeue completion" },
205 },
206 { "module", FMD_TYPE_STRING, "module that owns this transport" },
207 { "authority", FMD_TYPE_STRING, "authority associated with this transport" },
208 { "state", FMD_TYPE_STRING, "current transport state" },
209 { "received", FMD_TYPE_UINT64, "events received by transport" },
210 { "discarded", FMD_TYPE_UINT64, "bad events discarded by transport" },
211 { "retried", FMD_TYPE_UINT64, "retries requested of transport" },
212 { "replayed", FMD_TYPE_UINT64, "events replayed by transport" },
213 { "lost", FMD_TYPE_UINT64, "events lost by transport" },
214 { "timeouts", FMD_TYPE_UINT64, "events received by transport with ttl=0" },
215 { "subscriptions", FMD_TYPE_UINT64, "subscriptions registered to transport" },
216 };
217 
218 static void
219 fmd_xprt_class_hash_create(fmd_xprt_class_hash_t *xch, fmd_eventq_t *eq)
220 {
221 	uint_t hashlen = fmd.d_str_buckets;
222 
223 	xch->xch_queue = eq;
224 	xch->xch_hashlen = hashlen;
225 	xch->xch_hash = fmd_zalloc(sizeof (void *) * hashlen, FMD_SLEEP);
226 }
227 
228 static void
229 fmd_xprt_class_hash_destroy(fmd_xprt_class_hash_t *xch)
230 {
231 	fmd_eventq_t *eq = xch->xch_queue;
232 	fmd_xprt_class_t *xcp, *ncp;
233 	uint_t i;
234 
235 	for (i = 0; i < xch->xch_hashlen; i++) {
236 		for (xcp = xch->xch_hash[i]; xcp != NULL; xcp = ncp) {
237 			ncp = xcp->xc_next;
238 
239 			if (eq != NULL)
240 				fmd_dispq_delete(fmd.d_disp, eq, xcp->xc_class);
241 
242 			fmd_strfree(xcp->xc_class);
243 			fmd_free(xcp, sizeof (fmd_xprt_class_t));
244 		}
245 	}
246 
247 	fmd_free(xch->xch_hash, sizeof (void *) * xch->xch_hashlen);
248 }
249 
250 /*
251  * Insert the specified class into the specified class hash, and return the
252  * reference count.  A return value of one indicates this is the first insert.
253  * If an eventq is associated with the hash, insert a dispq subscription for it.
254  */
255 static uint_t
256 fmd_xprt_class_hash_insert(fmd_xprt_impl_t *xip,
257     fmd_xprt_class_hash_t *xch, const char *class)
258 {
259 	uint_t h = fmd_strhash(class) % xch->xch_hashlen;
260 	fmd_xprt_class_t *xcp;
261 
262 	ASSERT(MUTEX_HELD(&xip->xi_lock));
263 
264 	for (xcp = xch->xch_hash[h]; xcp != NULL; xcp = xcp->xc_next) {
265 		if (strcmp(class, xcp->xc_class) == 0)
266 			return (++xcp->xc_refs);
267 	}
268 
269 	xcp = fmd_alloc(sizeof (fmd_xprt_class_t), FMD_SLEEP);
270 	xcp->xc_class = fmd_strdup(class, FMD_SLEEP);
271 	xcp->xc_next = xch->xch_hash[h];
272 	xcp->xc_refs = 1;
273 	xch->xch_hash[h] = xcp;
274 
275 	if (xch->xch_queue != NULL)
276 		fmd_dispq_insert(fmd.d_disp, xch->xch_queue, class);
277 
278 	return (xcp->xc_refs);
279 }
280 
281 /*
282  * Delete the specified class from the specified class hash, and return the
283  * reference count.  A return value of zero indicates the class was deleted.
284  * If an eventq is associated with the hash, delete the dispq subscription.
285  */
286 static uint_t
287 fmd_xprt_class_hash_delete(fmd_xprt_impl_t *xip,
288     fmd_xprt_class_hash_t *xch, const char *class)
289 {
290 	uint_t h = fmd_strhash(class) % xch->xch_hashlen;
291 	fmd_xprt_class_t *xcp, **pp;
292 
293 	ASSERT(MUTEX_HELD(&xip->xi_lock));
294 	pp = &xch->xch_hash[h];
295 
296 	for (xcp = *pp; xcp != NULL; xcp = xcp->xc_next) {
297 		if (strcmp(class, xcp->xc_class) == 0)
298 			break;
299 		else
300 			pp = &xcp->xc_next;
301 	}
302 
303 	if (xcp == NULL)
304 		return (-1U); /* explicitly permit an invalid delete */
305 
306 	if (--xcp->xc_refs != 0)
307 		return (xcp->xc_refs);
308 
309 	ASSERT(xcp->xc_refs == 0);
310 	*pp = xcp->xc_next;
311 
312 	fmd_strfree(xcp->xc_class);
313 	fmd_free(xcp, sizeof (fmd_xprt_class_t));
314 
315 	if (xch->xch_queue != NULL)
316 		fmd_dispq_delete(fmd.d_disp, xch->xch_queue, class);
317 
318 	return (0);
319 }
320 
321 /*
322  * Queue subscribe events for the specified transport corresponding to all of
323  * the active module subscriptions.  This is an extremely heavyweight operation
324  * that we expect to take place rarely (i.e. when loading a transport module
325  * or when it establishes a connection).  We lock all of the known modules to
326  * prevent them from adding or deleting subscriptions, then snapshot their
327  * subscriptions, and then unlock all of the modules.  We hold the modhash
328  * lock for the duration of this operation to prevent new modules from loading.
329  */
330 static void
331 fmd_xprt_subscribe_modhash(fmd_xprt_impl_t *xip, fmd_modhash_t *mhp)
332 {
333 	fmd_xprt_t *xp = (fmd_xprt_t *)xip;
334 	const fmd_conf_path_t *pap;
335 	fmd_module_t *mp;
336 	uint_t i, j;
337 
338 	(void) pthread_rwlock_rdlock(&mhp->mh_lock);
339 
340 	for (i = 0; i < mhp->mh_hashlen; i++) {
341 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next)
342 			fmd_module_lock(mp);
343 	}
344 
345 	(void) pthread_mutex_lock(&xip->xi_lock);
346 	ASSERT(!(xip->xi_flags & FMD_XPRT_SUBSCRIBER));
347 	xip->xi_flags |= FMD_XPRT_SUBSCRIBER;
348 	(void) pthread_mutex_unlock(&xip->xi_lock);
349 
350 	for (i = 0; i < mhp->mh_hashlen; i++) {
351 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next) {
352 			(void) fmd_conf_getprop(mp->mod_conf,
353 			    FMD_PROP_SUBSCRIPTIONS, &pap);
354 			for (j = 0; j < pap->cpa_argc; j++)
355 				fmd_xprt_subscribe(xp, pap->cpa_argv[j]);
356 		}
357 	}
358 
359 	for (i = 0; i < mhp->mh_hashlen; i++) {
360 		for (mp = mhp->mh_hash[i]; mp != NULL; mp = mp->mod_next)
361 			fmd_module_unlock(mp);
362 	}
363 
364 	(void) pthread_rwlock_unlock(&mhp->mh_lock);
365 }
366 
367 static void
368 fmd_xprt_transition(fmd_xprt_impl_t *xip,
369     const fmd_xprt_rule_t *state, const char *tag)
370 {
371 	fmd_xprt_t *xp = (fmd_xprt_t *)xip;
372 	fmd_event_t *e;
373 	nvlist_t *nvl;
374 	char *s;
375 
376 	TRACE((FMD_DBG_XPRT, "xprt %u -> %s\n", xip->xi_id, tag));
377 
378 	xip->xi_state = state;
379 	s = fmd_strdup(tag, FMD_SLEEP);
380 
381 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
382 	fmd_strfree(xip->xi_stats->xs_state.fmds_value.str);
383 	xip->xi_stats->xs_state.fmds_value.str = s;
384 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
385 
386 	/*
387 	 * If we've reached the SUB state, take out the big hammer and snapshot
388 	 * all of the subscriptions of all of the loaded modules.  Then queue a
389 	 * run event for our remote peer indicating that it can enter RUN.
390 	 */
391 	if (state == _fmd_xprt_state_sub) {
392 		fmd_xprt_subscribe_modhash(xip, fmd.d_mod_hash);
393 
394 		/*
395 		 * For read-write transports, we always want to set up remote
396 		 * subscriptions to the bultin list.* events, regardless of
397 		 * whether any agents have subscribed to them.
398 		 */
399 		if (xip->xi_flags & FMD_XPRT_RDWR) {
400 			fmd_xprt_subscribe(xp, FM_LIST_SUSPECT_CLASS);
401 			fmd_xprt_subscribe(xp, FM_LIST_ISOLATED_CLASS);
402 			fmd_xprt_subscribe(xp, FM_LIST_UPDATED_CLASS);
403 			fmd_xprt_subscribe(xp, FM_LIST_RESOLVED_CLASS);
404 			fmd_xprt_subscribe(xp, FM_LIST_REPAIRED_CLASS);
405 		}
406 
407 		nvl = fmd_protocol_xprt_ctl(xip->xi_queue->eq_mod,
408 		    "resource.fm.xprt.run", xip->xi_version);
409 
410 		(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
411 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
412 		fmd_eventq_insert_at_time(xip->xi_queue, e);
413 	}
414 }
415 
416 static void
417 fmd_xprt_authupdate(fmd_xprt_impl_t *xip)
418 {
419 	char *s = fmd_fmri_auth2str(xip->xi_auth);
420 
421 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
422 	fmd_strfree(xip->xi_stats->xs_authority.fmds_value.str);
423 	xip->xi_stats->xs_authority.fmds_value.str = s;
424 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
425 }
426 
427 static int
428 fmd_xprt_vmismatch(fmd_xprt_impl_t *xip, nvlist_t *nvl, uint_t *rversionp)
429 {
430 	uint8_t rversion;
431 
432 	if (nvlist_lookup_uint8(nvl, FM_VERSION, &rversion) != 0) {
433 		(void) pthread_mutex_lock(&xip->xi_stats_lock);
434 		xip->xi_stats->xs_discarded.fmds_value.ui64++;
435 		(void) pthread_mutex_unlock(&xip->xi_stats_lock);
436 
437 		fmd_xprt_transition(xip, _fmd_xprt_state_err, "ERR");
438 		return (1);
439 	}
440 
441 	if (rversion > xip->xi_version) {
442 		fmd_dprintf(FMD_DBG_XPRT, "xprt %u protocol mismatch: %u>%u\n",
443 		    xip->xi_id, rversion, xip->xi_version);
444 
445 		(void) pthread_mutex_lock(&xip->xi_stats_lock);
446 		xip->xi_stats->xs_discarded.fmds_value.ui64++;
447 		(void) pthread_mutex_unlock(&xip->xi_stats_lock);
448 
449 		fmd_xprt_transition(xip, _fmd_xprt_state_err, "ERR");
450 		return (1);
451 	}
452 
453 	if (rversionp != NULL)
454 		*rversionp = rversion;
455 
456 	return (0);
457 }
458 
459 void
460 fmd_xprt_event_syn(fmd_xprt_impl_t *xip, nvlist_t *nvl)
461 {
462 	fmd_event_t *e;
463 	uint_t vers;
464 	char *class;
465 
466 	if (fmd_xprt_vmismatch(xip, nvl, &vers))
467 		return; /* transitioned to error state */
468 
469 	/*
470 	 * If the transport module didn't specify an authority, extract the
471 	 * one that is passed along with the xprt.syn event and use that.
472 	 */
473 	if (xip->xi_auth == NULL &&
474 	    nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &nvl) == 0 &&
475 	    nvlist_lookup_nvlist(nvl, FM_FMRI_AUTHORITY, &nvl) == 0) {
476 		(void) nvlist_xdup(nvl, &xip->xi_auth, &fmd.d_nva);
477 		fmd_xprt_authupdate(xip);
478 	}
479 
480 	nvl = fmd_protocol_xprt_ctl(xip->xi_queue->eq_mod,
481 	    "resource.fm.xprt.ack", xip->xi_version);
482 
483 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
484 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
485 	fmd_eventq_insert_at_time(xip->xi_queue, e);
486 
487 	xip->xi_version = MIN(FM_RSRC_XPRT_VERSION, vers);
488 	fmd_xprt_transition(xip, _fmd_xprt_state_sub, "SUB");
489 }
490 
491 void
492 fmd_xprt_event_ack(fmd_xprt_impl_t *xip, nvlist_t *nvl)
493 {
494 	uint_t vers;
495 
496 	if (fmd_xprt_vmismatch(xip, nvl, &vers))
497 		return; /* transitioned to error state */
498 
499 	/*
500 	 * If the transport module didn't specify an authority, extract the
501 	 * one that is passed along with the xprt.syn event and use that.
502 	 */
503 	if (xip->xi_auth == NULL &&
504 	    nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &nvl) == 0 &&
505 	    nvlist_lookup_nvlist(nvl, FM_FMRI_AUTHORITY, &nvl) == 0) {
506 		(void) nvlist_xdup(nvl, &xip->xi_auth, &fmd.d_nva);
507 		fmd_xprt_authupdate(xip);
508 	}
509 
510 	xip->xi_version = MIN(FM_RSRC_XPRT_VERSION, vers);
511 	fmd_xprt_transition(xip, _fmd_xprt_state_sub, "SUB");
512 }
513 
514 /*
515  * Upon transition to RUN, we take every solved case and resend a list.suspect
516  * event for it to our remote peer.  If a case transitions from solved to a
517  * future state (CLOSE_WAIT, CLOSED, or REPAIRED) while we are iterating over
518  * the case hash, we will get it as part of examining the resource cache, next.
519  */
520 static void
521 fmd_xprt_send_case(fmd_case_t *cp, void *arg)
522 {
523 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
524 	fmd_xprt_impl_t *xip = arg;
525 
526 	fmd_event_t *e;
527 	nvlist_t *nvl;
528 	char *class;
529 
530 	if (cip->ci_state != FMD_CASE_SOLVED)
531 		return;
532 
533 	nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
534 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
535 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
536 
537 	fmd_dprintf(FMD_DBG_XPRT, "re-send %s for %s to transport %u\n",
538 	    FM_LIST_SUSPECT_CLASS, cip->ci_uuid, xip->xi_id);
539 
540 	fmd_dispq_dispatch_gid(fmd.d_disp, e, class, xip->xi_queue->eq_sgid);
541 }
542 
543 /*
544  * Similar to the above function, but for use with readonly transport. Puts
545  * the event on the module's queue such that it's fmdo_recv function can pick
546  * it up and send it if appropriate.
547  */
548 static void
549 fmd_xprt_send_case_ro(fmd_case_t *cp, void *arg)
550 {
551 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
552 	fmd_module_t *mp = arg;
553 
554 	fmd_event_t *e;
555 	nvlist_t *nvl;
556 	char *class;
557 
558 	if (cip->ci_state != FMD_CASE_SOLVED)
559 		return;
560 
561 	nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
562 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
563 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
564 
565 	fmd_dprintf(FMD_DBG_XPRT, "re-send %s for %s to rdonly transport %s\n",
566 	    FM_LIST_SUSPECT_CLASS, cip->ci_uuid, mp->mod_name);
567 
568 	fmd_dispq_dispatch_gid(fmd.d_disp, e, class, mp->mod_queue->eq_sgid);
569 }
570 
571 void
572 fmd_xprt_event_run(fmd_xprt_impl_t *xip, nvlist_t *nvl)
573 {
574 	if (!fmd_xprt_vmismatch(xip, nvl, NULL)) {
575 		fmd_xprt_transition(xip, _fmd_xprt_state_run, "RUN");
576 		fmd_case_hash_apply(fmd.d_cases, fmd_xprt_send_case, xip);
577 	}
578 }
579 
580 void
581 fmd_xprt_event_sub(fmd_xprt_impl_t *xip, nvlist_t *nvl)
582 {
583 	char *class;
584 
585 	if (fmd_xprt_vmismatch(xip, nvl, NULL))
586 		return; /* transitioned to error state */
587 
588 	if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_SUBCLASS, &class) != 0)
589 		return; /* malformed protocol event */
590 
591 	(void) pthread_mutex_lock(&xip->xi_lock);
592 	(void) fmd_xprt_class_hash_insert(xip, &xip->xi_lsub, class);
593 	(void) pthread_mutex_unlock(&xip->xi_lock);
594 
595 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
596 	xip->xi_stats->xs_subscriptions.fmds_value.ui64++;
597 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
598 }
599 
600 void
601 fmd_xprt_event_unsub(fmd_xprt_impl_t *xip, nvlist_t *nvl)
602 {
603 	fmd_event_t *e;
604 	char *class;
605 
606 	if (fmd_xprt_vmismatch(xip, nvl, NULL))
607 		return; /* transitioned to error state */
608 
609 	if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_SUBCLASS, &class) != 0)
610 		return; /* malformed protocol event */
611 
612 	(void) pthread_mutex_lock(&xip->xi_lock);
613 	(void) fmd_xprt_class_hash_delete(xip, &xip->xi_lsub, class);
614 	(void) pthread_mutex_unlock(&xip->xi_lock);
615 
616 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
617 	xip->xi_stats->xs_subscriptions.fmds_value.ui64--;
618 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
619 
620 	nvl = fmd_protocol_xprt_sub(xip->xi_queue->eq_mod,
621 	    "resource.fm.xprt.unsuback", xip->xi_version, class);
622 
623 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
624 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
625 	fmd_eventq_insert_at_time(xip->xi_queue, e);
626 }
627 
628 void
629 fmd_xprt_event_unsuback(fmd_xprt_impl_t *xip, nvlist_t *nvl)
630 {
631 	char *class;
632 
633 	if (fmd_xprt_vmismatch(xip, nvl, NULL))
634 		return; /* transitioned to error state */
635 
636 	if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_SUBCLASS, &class) != 0)
637 		return; /* malformed protocol event */
638 
639 	(void) pthread_mutex_lock(&xip->xi_lock);
640 	(void) fmd_xprt_class_hash_delete(xip, &xip->xi_usub, class);
641 	(void) pthread_mutex_unlock(&xip->xi_lock);
642 }
643 
644 /*
645  * on diagnosing side, receive a uuclose from the proxy.
646  */
647 void
648 fmd_xprt_event_uuclose(fmd_xprt_impl_t *xip, nvlist_t *nvl)
649 {
650 	fmd_case_t *cp;
651 	char *uuid;
652 
653 	if (fmd_xprt_vmismatch(xip, nvl, NULL))
654 		return; /* transitioned to error state */
655 
656 	if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_UUID, &uuid) == 0 &&
657 	    (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
658 		/*
659 		 * update resource cache status and transition case
660 		 */
661 		fmd_case_close_status(cp);
662 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_ISOLATED);
663 		fmd_case_rele(cp);
664 	}
665 }
666 
667 /*
668  * on diagnosing side, receive a uuresolved from the proxy.
669  */
670 void
671 fmd_xprt_event_uuresolved(fmd_xprt_impl_t *xip, nvlist_t *nvl)
672 {
673 	fmd_case_t *cp;
674 	char *uuid;
675 
676 	if (fmd_xprt_vmismatch(xip, nvl, NULL))
677 		return; /* transitioned to error state */
678 
679 	if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_UUID, &uuid) == 0 &&
680 	    (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
681 		fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
682 
683 		fmd_case_transition(cp, (cip->ci_state == FMD_CASE_REPAIRED) ?
684 		    FMD_CASE_RESOLVED : (cip->ci_state == FMD_CASE_CLOSED) ?
685 		    FMD_CASE_REPAIRED : FMD_CASE_CLOSE_WAIT, FMD_CF_RESOLVED);
686 		fmd_case_rele(cp);
687 	}
688 }
689 
690 /*
691  * on diagnosing side, receive a repair/acquit from the proxy.
692  */
693 void
694 fmd_xprt_event_updated(fmd_xprt_impl_t *xip, nvlist_t *nvl)
695 {
696 	fmd_case_t *cp;
697 	char *uuid;
698 
699 	if (fmd_xprt_vmismatch(xip, nvl, NULL))
700 		return; /* transitioned to error state */
701 
702 	if (nvlist_lookup_string(nvl, FM_RSRC_XPRT_UUID, &uuid) == 0 &&
703 	    (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
704 		uint8_t *statusp, *proxy_asrup = NULL;
705 		uint_t nelem = 0;
706 
707 		/*
708 		 * Only update status with new repairs if "no remote repair"
709 		 * is not set. Do the case_update anyway though (as this will
710 		 * refresh the status on the proxy side).
711 		 */
712 		if (!(xip->xi_flags & FMD_XPRT_NO_REMOTE_REPAIR)) {
713 			if (nvlist_lookup_uint8_array(nvl,
714 			    FM_RSRC_XPRT_FAULT_STATUS, &statusp, &nelem) == 0 &&
715 			    nelem != 0) {
716 				(void) nvlist_lookup_uint8_array(nvl,
717 				    FM_RSRC_XPRT_FAULT_HAS_ASRU, &proxy_asrup,
718 				    &nelem);
719 				fmd_case_update_status(cp, statusp,
720 				    proxy_asrup, NULL);
721 			}
722 			fmd_case_update_containees(cp);
723 		}
724 		fmd_case_update(cp);
725 		fmd_case_rele(cp);
726 	}
727 }
728 
729 void
730 fmd_xprt_event_error(fmd_xprt_impl_t *xip, nvlist_t *nvl)
731 {
732 	char *class = "<unknown>";
733 
734 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
735 	xip->xi_stats->xs_discarded.fmds_value.ui64++;
736 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
737 
738 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
739 	TRACE((FMD_DBG_XPRT, "xprt %u bad event %s\n", xip->xi_id, class));
740 
741 	fmd_xprt_transition(xip, _fmd_xprt_state_err, "ERR");
742 }
743 
744 void
745 fmd_xprt_event_drop(fmd_xprt_impl_t *xip, nvlist_t *nvl)
746 {
747 	char *class = "<unknown>";
748 
749 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
750 	xip->xi_stats->xs_discarded.fmds_value.ui64++;
751 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
752 
753 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
754 	TRACE((FMD_DBG_XPRT, "xprt %u drop event %s\n", xip->xi_id, class));
755 
756 }
757 
758 fmd_xprt_t *
759 fmd_xprt_create(fmd_module_t *mp, uint_t flags, nvlist_t *auth, void *data)
760 {
761 	fmd_xprt_impl_t *xip = fmd_zalloc(sizeof (fmd_xprt_impl_t), FMD_SLEEP);
762 	fmd_stat_t *statv;
763 	uint_t i, statc;
764 
765 	char buf[PATH_MAX];
766 	fmd_event_t *e;
767 	nvlist_t *nvl;
768 	char *s;
769 
770 	(void) pthread_mutex_init(&xip->xi_lock, NULL);
771 	(void) pthread_cond_init(&xip->xi_cv, NULL);
772 	(void) pthread_mutex_init(&xip->xi_stats_lock, NULL);
773 
774 	xip->xi_auth = auth;
775 	xip->xi_data = data;
776 	xip->xi_version = FM_RSRC_XPRT_VERSION;
777 	xip->xi_flags = flags;
778 
779 	/*
780 	 * Grab fmd.d_xprt_lock to block fmd_xprt_suspend_all() and then create
781 	 * a transport ID and make it visible in fmd.d_xprt_ids.  If transports
782 	 * were previously suspended, set the FMD_XPRT_DSUSPENDED flag on us to
783 	 * ensure that this transport will not run until fmd_xprt_resume_all().
784 	 */
785 	(void) pthread_mutex_lock(&fmd.d_xprt_lock);
786 	xip->xi_id = fmd_idspace_alloc(fmd.d_xprt_ids, xip);
787 
788 	if (fmd.d_xprt_suspend != 0)
789 		xip->xi_flags |= FMD_XPRT_DSUSPENDED;
790 
791 	(void) pthread_mutex_unlock(&fmd.d_xprt_lock);
792 
793 	/*
794 	 * If the module has not yet finished _fmd_init(), set the ISUSPENDED
795 	 * bit so that fmdo_send() is not called until _fmd_init() completes.
796 	 */
797 	if (!(mp->mod_flags & FMD_MOD_INIT))
798 		xip->xi_flags |= FMD_XPRT_ISUSPENDED;
799 
800 	/*
801 	 * Initialize the transport statistics that we keep on behalf of fmd.
802 	 * These are set up using a template defined at the top of this file.
803 	 * We rename each statistic with a prefix ensuring its uniqueness.
804 	 */
805 	statc = sizeof (_fmd_xprt_stat_tmpl) / sizeof (fmd_stat_t);
806 	statv = fmd_alloc(sizeof (_fmd_xprt_stat_tmpl), FMD_SLEEP);
807 	bcopy(&_fmd_xprt_stat_tmpl, statv, sizeof (_fmd_xprt_stat_tmpl));
808 
809 	for (i = 0; i < statc; i++) {
810 		(void) snprintf(statv[i].fmds_name,
811 		    sizeof (statv[i].fmds_name), "fmd.xprt.%u.%s", xip->xi_id,
812 		    ((fmd_stat_t *)&_fmd_xprt_stat_tmpl + i)->fmds_name);
813 	}
814 
815 	xip->xi_stats = (fmd_xprt_stat_t *)fmd_ustat_insert(
816 	    mp->mod_ustat, FMD_USTAT_NOALLOC, statc, statv, NULL);
817 
818 	if (xip->xi_stats == NULL)
819 		fmd_panic("failed to create xi_stats (%p)\n", (void *)statv);
820 
821 	xip->xi_stats->xs_module.fmds_value.str =
822 	    fmd_strdup(mp->mod_name, FMD_SLEEP);
823 
824 	if (xip->xi_auth != NULL)
825 		fmd_xprt_authupdate(xip);
826 
827 	/*
828 	 * Create the outbound eventq for this transport and link to its stats.
829 	 * If any suspend bits were set above, suspend the eventq immediately.
830 	 */
831 	xip->xi_queue = fmd_eventq_create(mp, &xip->xi_stats->xs_evqstat,
832 	    &xip->xi_stats_lock, mp->mod_stats->ms_xprtqlimit.fmds_value.ui32);
833 
834 	if (xip->xi_flags & FMD_XPRT_SMASK)
835 		fmd_eventq_suspend(xip->xi_queue);
836 
837 	/*
838 	 * Create our subscription hashes: local subscriptions go to xi_queue,
839 	 * remote subscriptions are tracked only for protocol requests, and
840 	 * pending unsubscriptions are associated with the /dev/null eventq.
841 	 */
842 	fmd_xprt_class_hash_create(&xip->xi_lsub, xip->xi_queue);
843 	fmd_xprt_class_hash_create(&xip->xi_rsub, NULL);
844 	fmd_xprt_class_hash_create(&xip->xi_usub, fmd.d_rmod->mod_queue);
845 
846 	/*
847 	 * Determine our initial state based upon the creation flags.  If we're
848 	 * read-only, go directly to RUN.  If we're accepting a new connection,
849 	 * wait for a SYN.  Otherwise send a SYN and wait for an ACK.
850 	 */
851 	if ((flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY) {
852 		/*
853 		 * Send the list.suspects across here for readonly transports.
854 		 * For read-write transport they will be sent on transition to
855 		 * RUN state in fmd_xprt_event_run().
856 		 */
857 		fmd_case_hash_apply(fmd.d_cases, fmd_xprt_send_case_ro, mp);
858 		fmd_xprt_transition(xip, _fmd_xprt_state_run, "RUN");
859 	} else if (flags & FMD_XPRT_ACCEPT)
860 		fmd_xprt_transition(xip, _fmd_xprt_state_syn, "SYN");
861 	else
862 		fmd_xprt_transition(xip, _fmd_xprt_state_ack, "ACK");
863 
864 	/*
865 	 * If client.xprtlog is set to TRUE, create a debugging log for the
866 	 * events received by the transport in var/fm/fmd/xprt/.
867 	 */
868 	(void) fmd_conf_getprop(fmd.d_conf, "client.xprtlog", &i);
869 	(void) fmd_conf_getprop(fmd.d_conf, "log.xprt", &s);
870 
871 	if (i) {
872 		(void) snprintf(buf, sizeof (buf), "%s/%u.log", s, xip->xi_id);
873 		xip->xi_log = fmd_log_open(fmd.d_rootdir, buf, FMD_LOG_XPRT);
874 	}
875 
876 	ASSERT(fmd_module_locked(mp));
877 	fmd_list_append(&mp->mod_transports, xip);
878 
879 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
880 	mp->mod_stats->ms_xprtopen.fmds_value.ui32++;
881 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
882 
883 	/*
884 	 * If this is a read-only transport, return without creating a send
885 	 * queue thread and setting up any connection events in our queue.
886 	 */
887 	if ((flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
888 		goto out;
889 
890 	/*
891 	 * Once the transport is fully initialized, create a send queue thread
892 	 * and start any connect events flowing to complete our initialization.
893 	 */
894 	if ((xip->xi_thread = fmd_thread_create(mp,
895 	    (fmd_thread_f *)fmd_xprt_send, xip)) == NULL) {
896 
897 		fmd_error(EFMD_XPRT_THR,
898 		    "failed to create thread for transport %u", xip->xi_id);
899 
900 		fmd_xprt_destroy((fmd_xprt_t *)xip);
901 		(void) fmd_set_errno(EFMD_XPRT_THR);
902 		return (NULL);
903 	}
904 
905 	/*
906 	 * If the transport is not being opened to accept an inbound connect,
907 	 * start an outbound connection by enqueuing a SYN event for our peer.
908 	 */
909 	if (!(flags & FMD_XPRT_ACCEPT)) {
910 		nvl = fmd_protocol_xprt_ctl(mp,
911 		    "resource.fm.xprt.syn", FM_RSRC_XPRT_VERSION);
912 
913 		(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
914 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
915 		fmd_eventq_insert_at_time(xip->xi_queue, e);
916 	}
917 out:
918 	fmd_dprintf(FMD_DBG_XPRT, "opened transport %u\n", xip->xi_id);
919 	return ((fmd_xprt_t *)xip);
920 }
921 
922 void
923 fmd_xprt_destroy(fmd_xprt_t *xp)
924 {
925 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
926 	fmd_module_t *mp = xip->xi_queue->eq_mod;
927 	uint_t id = xip->xi_id;
928 
929 	fmd_case_impl_t *cip, *nip;
930 	fmd_stat_t *sp;
931 	uint_t i, n;
932 
933 	ASSERT(fmd_module_locked(mp));
934 	fmd_list_delete(&mp->mod_transports, xip);
935 
936 	(void) pthread_mutex_lock(&mp->mod_stats_lock);
937 	mp->mod_stats->ms_xprtopen.fmds_value.ui32--;
938 	(void) pthread_mutex_unlock(&mp->mod_stats_lock);
939 
940 	(void) pthread_mutex_lock(&xip->xi_lock);
941 
942 	while (xip->xi_busy != 0)
943 		(void) pthread_cond_wait(&xip->xi_cv, &xip->xi_lock);
944 
945 	/*
946 	 * Remove the transport from global visibility, cancel its send-side
947 	 * thread, join with it, and then remove the transport from module
948 	 * visibility.  Once all this is done, destroy and free the transport.
949 	 */
950 	(void) fmd_idspace_free(fmd.d_xprt_ids, xip->xi_id);
951 
952 	if (xip->xi_thread != NULL) {
953 		fmd_eventq_abort(xip->xi_queue);
954 		fmd_module_unlock(mp);
955 		fmd_thread_destroy(xip->xi_thread, FMD_THREAD_JOIN);
956 		fmd_module_lock(mp);
957 	}
958 
959 	if (xip->xi_log != NULL)
960 		fmd_log_rele(xip->xi_log);
961 
962 	/*
963 	 * Release every case handle in the module that was cached by this
964 	 * transport.  This will result in these cases disappearing from the
965 	 * local case hash so that fmd_case_uuclose() and fmd_case_repaired()
966 	 * etc can no longer be used.
967 	 */
968 	for (cip = fmd_list_next(&mp->mod_cases); cip != NULL; cip = nip) {
969 		nip = fmd_list_next(cip);
970 		if (cip->ci_xprt == xp)
971 			fmd_case_discard((fmd_case_t *)cip, B_TRUE);
972 	}
973 
974 	/*
975 	 * Destroy every class in the various subscription hashes and remove
976 	 * any corresponding subscriptions from the event dispatch queue.
977 	 */
978 	fmd_xprt_class_hash_destroy(&xip->xi_lsub);
979 	fmd_xprt_class_hash_destroy(&xip->xi_rsub);
980 	fmd_xprt_class_hash_destroy(&xip->xi_usub);
981 
982 	/*
983 	 * Uniquify the stat names exactly as was done in fmd_xprt_create()
984 	 * before calling fmd_ustat_insert(), otherwise fmd_ustat_delete()
985 	 * won't find the entries in the hash table.
986 	 */
987 	n = sizeof (_fmd_xprt_stat_tmpl) / sizeof (fmd_stat_t);
988 	sp = fmd_alloc(sizeof (_fmd_xprt_stat_tmpl), FMD_SLEEP);
989 	bcopy(&_fmd_xprt_stat_tmpl, sp, sizeof (_fmd_xprt_stat_tmpl));
990 	for (i = 0; i < n; i++) {
991 		(void) snprintf(sp[i].fmds_name,
992 		    sizeof (sp[i].fmds_name), "fmd.xprt.%u.%s", xip->xi_id,
993 		    ((fmd_stat_t *)&_fmd_xprt_stat_tmpl + i)->fmds_name);
994 	}
995 	fmd_ustat_delete(mp->mod_ustat, n, sp);
996 	fmd_free(sp, sizeof (_fmd_xprt_stat_tmpl));
997 
998 	fmd_free(xip->xi_stats, sizeof (fmd_xprt_stat_t));
999 	fmd_eventq_destroy(xip->xi_queue);
1000 	nvlist_free(xip->xi_auth);
1001 	fmd_free(xip, sizeof (fmd_xprt_impl_t));
1002 
1003 	fmd_dprintf(FMD_DBG_XPRT, "closed transport %u\n", id);
1004 }
1005 
1006 void
1007 fmd_xprt_xsuspend(fmd_xprt_t *xp, uint_t flags)
1008 {
1009 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1010 	uint_t oflags;
1011 
1012 	ASSERT((flags & ~FMD_XPRT_SMASK) == 0);
1013 	(void) pthread_mutex_lock(&xip->xi_lock);
1014 
1015 	oflags = xip->xi_flags;
1016 	xip->xi_flags |= flags;
1017 
1018 	if (!(oflags & FMD_XPRT_SMASK) && (xip->xi_flags & FMD_XPRT_SMASK) != 0)
1019 		fmd_eventq_suspend(xip->xi_queue);
1020 
1021 	(void) pthread_cond_broadcast(&xip->xi_cv);
1022 
1023 	while (xip->xi_busy != 0)
1024 		(void) pthread_cond_wait(&xip->xi_cv, &xip->xi_lock);
1025 
1026 	(void) pthread_mutex_unlock(&xip->xi_lock);
1027 }
1028 
1029 void
1030 fmd_xprt_xresume(fmd_xprt_t *xp, uint_t flags)
1031 {
1032 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1033 	uint_t oflags;
1034 
1035 	ASSERT((flags & ~FMD_XPRT_SMASK) == 0);
1036 	(void) pthread_mutex_lock(&xip->xi_lock);
1037 
1038 	oflags = xip->xi_flags;
1039 	xip->xi_flags &= ~flags;
1040 
1041 	if ((oflags & FMD_XPRT_SMASK) != 0 && !(xip->xi_flags & FMD_XPRT_SMASK))
1042 		fmd_eventq_resume(xip->xi_queue);
1043 
1044 	(void) pthread_cond_broadcast(&xip->xi_cv);
1045 	(void) pthread_mutex_unlock(&xip->xi_lock);
1046 }
1047 
1048 void
1049 fmd_xprt_send(fmd_xprt_t *xp)
1050 {
1051 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1052 	fmd_module_t *mp = xip->xi_queue->eq_mod;
1053 	fmd_event_t *ep;
1054 	int err;
1055 
1056 	while ((ep = fmd_eventq_delete(xip->xi_queue)) != NULL) {
1057 		if (FMD_EVENT_TTL(ep) == 0) {
1058 			fmd_event_rele(ep);
1059 			continue;
1060 		}
1061 
1062 		fmd_dprintf(FMD_DBG_XPRT, "xprt %u sending %s\n",
1063 		    xip->xi_id, (char *)FMD_EVENT_DATA(ep));
1064 
1065 		err = mp->mod_ops->mop_transport(mp, xp, ep);
1066 		fmd_eventq_done(xip->xi_queue);
1067 
1068 		if (err == FMD_SEND_RETRY) {
1069 			fmd_eventq_insert_at_time(xip->xi_queue, ep);
1070 			(void) pthread_mutex_lock(&xip->xi_stats_lock);
1071 			xip->xi_stats->xs_retried.fmds_value.ui64++;
1072 			(void) pthread_mutex_unlock(&xip->xi_stats_lock);
1073 		}
1074 
1075 		if (err != FMD_SEND_SUCCESS && err != FMD_SEND_RETRY) {
1076 			(void) pthread_mutex_lock(&xip->xi_stats_lock);
1077 			xip->xi_stats->xs_lost.fmds_value.ui64++;
1078 			(void) pthread_mutex_unlock(&xip->xi_stats_lock);
1079 		}
1080 
1081 		fmd_event_rele(ep);
1082 	}
1083 }
1084 
1085 /*
1086  * This function creates a local suspect list. This is used when a suspect list
1087  * is created directly by an external source like fminject.
1088  */
1089 static void
1090 fmd_xprt_list_suspect_local(fmd_xprt_t *xp, nvlist_t *nvl)
1091 {
1092 	nvlist_t **nvlp;
1093 	nvlist_t *de_fmri, *de_fmri_dup = NULL;
1094 	int64_t *diag_time;
1095 	char *code = NULL;
1096 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1097 	fmd_case_t *cp;
1098 	uint_t nelem = 0, nelem2 = 0, i;
1099 	boolean_t injected;
1100 
1101 	fmd_module_lock(xip->xi_queue->eq_mod);
1102 	cp = fmd_case_create(xip->xi_queue->eq_mod, NULL, NULL);
1103 	if (cp == NULL) {
1104 		fmd_module_unlock(xip->xi_queue->eq_mod);
1105 		return;
1106 	}
1107 
1108 	/*
1109 	 * copy diag_code if present
1110 	 */
1111 	(void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &code);
1112 	if (code != NULL) {
1113 		fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1114 
1115 		cip->ci_precanned = 1;
1116 		fmd_case_setcode(cp, code);
1117 	}
1118 
1119 	/*
1120 	 * copy suspects
1121 	 */
1122 	(void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &nvlp,
1123 	    &nelem);
1124 	for (i = 0; i < nelem; i++) {
1125 		nvlist_t *flt_copy, *asru = NULL, *fru = NULL, *rsrc = NULL;
1126 		topo_hdl_t *thp;
1127 		char *loc = NULL;
1128 		int err;
1129 
1130 		thp = fmd_fmri_topo_hold(TOPO_VERSION);
1131 		(void) nvlist_xdup(nvlp[i], &flt_copy, &fmd.d_nva);
1132 		(void) nvlist_lookup_nvlist(nvlp[i], FM_FAULT_RESOURCE, &rsrc);
1133 
1134 		/*
1135 		 * If no fru specified, get it from topo
1136 		 */
1137 		if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_FRU, &fru) != 0 &&
1138 		    rsrc && topo_fmri_fru(thp, rsrc, &fru, &err) == 0)
1139 			(void) nvlist_add_nvlist(flt_copy, FM_FAULT_FRU, fru);
1140 		/*
1141 		 * If no asru specified, get it from topo
1142 		 */
1143 		if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_ASRU, &asru) != 0 &&
1144 		    rsrc && topo_fmri_asru(thp, rsrc, &asru, &err) == 0)
1145 			(void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
1146 		/*
1147 		 * If no location specified, get it from topo
1148 		 */
1149 		if (nvlist_lookup_string(nvlp[i], FM_FAULT_LOCATION,
1150 		    &loc) != 0) {
1151 			if (fru && topo_fmri_label(thp, fru, &loc, &err) == 0)
1152 				(void) nvlist_add_string(flt_copy,
1153 				    FM_FAULT_LOCATION, loc);
1154 			else if (rsrc && topo_fmri_label(thp, rsrc, &loc,
1155 			    &err) == 0)
1156 				(void) nvlist_add_string(flt_copy,
1157 				    FM_FAULT_LOCATION, loc);
1158 			if (loc)
1159 				topo_hdl_strfree(thp, loc);
1160 		}
1161 		if (fru)
1162 			nvlist_free(fru);
1163 		if (asru)
1164 			nvlist_free(asru);
1165 		if (rsrc)
1166 			nvlist_free(rsrc);
1167 		fmd_fmri_topo_rele(thp);
1168 		fmd_case_insert_suspect(cp, flt_copy);
1169 	}
1170 
1171 	/*
1172 	 * copy diag_time if present
1173 	 */
1174 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
1175 	    &nelem2) == 0 && nelem2 >= 2)
1176 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
1177 
1178 	/*
1179 	 * copy DE fmri if present
1180 	 */
1181 	if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) {
1182 		(void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva);
1183 		fmd_case_set_de_fmri(cp, de_fmri_dup);
1184 	}
1185 
1186 	/*
1187 	 * copy injected if present
1188 	 */
1189 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED,
1190 	    &injected) == 0 && injected)
1191 		fmd_case_set_injected(cp);
1192 
1193 	fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1194 	fmd_module_unlock(xip->xi_queue->eq_mod);
1195 }
1196 
1197 /*
1198  * This function is called to create a proxy case on receipt of a list.suspect
1199  * from the diagnosing side of the transport.
1200  */
1201 static void
1202 fmd_xprt_list_suspect(fmd_xprt_t *xp, nvlist_t *nvl)
1203 {
1204 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1205 	nvlist_t **nvlp;
1206 	uint_t nelem = 0, nelem2 = 0, i;
1207 	int64_t *diag_time;
1208 	topo_hdl_t *thp;
1209 	char *class;
1210 	nvlist_t *rsrc, *asru, *de_fmri, *de_fmri_dup = NULL;
1211 	nvlist_t *flt_copy;
1212 	int err;
1213 	nvlist_t **asrua;
1214 	uint8_t *proxy_asru = NULL;
1215 	int got_proxy_asru = 0;
1216 	int got_hc_rsrc = 0;
1217 	int got_hc_asru = 0;
1218 	int got_present_rsrc = 0;
1219 	uint8_t *diag_asru = NULL;
1220 	char *scheme;
1221 	uint8_t *statusp;
1222 	char *uuid, *code;
1223 	fmd_case_t *cp;
1224 	fmd_case_impl_t *cip;
1225 	int need_update = 0;
1226 	boolean_t injected;
1227 
1228 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0)
1229 		return;
1230 	if (nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &code) != 0)
1231 		return;
1232 	(void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &nvlp,
1233 	    &nelem);
1234 
1235 	/*
1236 	 * In order to implement FMD_XPRT_HCONLY and FMD_XPRT_HC_PRESENT_ONLY
1237 	 * etc we first scan the suspects to see if
1238 	 * - there was an asru in the received fault
1239 	 * - there was an hc-scheme resource in the received fault
1240 	 * - any hc-scheme resource in the received fault is present in the
1241 	 *   local topology
1242 	 * - any hc-scheme resource in the received fault has an asru in the
1243 	 *   local topology
1244 	 */
1245 	if (nelem > 0) {
1246 		asrua = fmd_zalloc(sizeof (nvlist_t *) * nelem, FMD_SLEEP);
1247 		proxy_asru = fmd_zalloc(sizeof (uint8_t) * nelem, FMD_SLEEP);
1248 		diag_asru = fmd_zalloc(sizeof (uint8_t) * nelem, FMD_SLEEP);
1249 		thp = fmd_fmri_topo_hold(TOPO_VERSION);
1250 		for (i = 0; i < nelem; i++) {
1251 			if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_ASRU,
1252 			    &asru) == 0 && asru != NULL)
1253 				diag_asru[i] = 1;
1254 			if (nvlist_lookup_string(nvlp[i], FM_CLASS,
1255 			    &class) != 0 || strncmp(class, "fault", 5) != 0)
1256 				continue;
1257 			/*
1258 			 * If there is an hc-scheme asru, use that to find the
1259 			 * real asru. Otherwise if there is an hc-scheme
1260 			 * resource, work out the old asru from that.
1261 			 * This order is to allow a two stage evaluation
1262 			 * of the asru where a fault in the diagnosing side
1263 			 * is in a component not visible to the proxy side,
1264 			 * but prevents a component that is visible from
1265 			 * working. So the diagnosing side sets the asru to
1266 			 * the latter component (in hc-scheme as the diagnosing
1267 			 * side doesn't know about the proxy side's virtual
1268 			 * schemes), and then the proxy side can convert that
1269 			 * to a suitable virtual scheme asru.
1270 			 */
1271 			if (nvlist_lookup_nvlist(nvlp[i], FM_FAULT_ASRU,
1272 			    &asru) == 0 && asru != NULL &&
1273 			    nvlist_lookup_string(asru, FM_FMRI_SCHEME,
1274 			    &scheme) == 0 &&
1275 			    strcmp(scheme, FM_FMRI_SCHEME_HC) == 0) {
1276 				got_hc_asru = 1;
1277 				if (xip->xi_flags & FMD_XPRT_EXTERNAL)
1278 					continue;
1279 				if (topo_fmri_present(thp, asru, &err) != 0)
1280 					got_present_rsrc = 1;
1281 				if (topo_fmri_asru(thp, asru, &asrua[i],
1282 				    &err) == 0) {
1283 					proxy_asru[i] =
1284 					    FMD_PROXY_ASRU_FROM_ASRU;
1285 					got_proxy_asru = 1;
1286 				}
1287 			} else if (nvlist_lookup_nvlist(nvlp[i],
1288 			    FM_FAULT_RESOURCE, &rsrc) == 0 && rsrc != NULL &&
1289 			    nvlist_lookup_string(rsrc, FM_FMRI_SCHEME,
1290 			    &scheme) == 0 &&
1291 			    strcmp(scheme, FM_FMRI_SCHEME_HC) == 0) {
1292 				got_hc_rsrc = 1;
1293 				if (xip->xi_flags & FMD_XPRT_EXTERNAL)
1294 					continue;
1295 				if (topo_fmri_present(thp, rsrc, &err) != 0)
1296 					got_present_rsrc = 1;
1297 				if (topo_fmri_asru(thp, rsrc, &asrua[i],
1298 				    &err) == 0) {
1299 					proxy_asru[i] =
1300 					    FMD_PROXY_ASRU_FROM_RSRC;
1301 					got_proxy_asru = 1;
1302 				}
1303 			}
1304 		}
1305 		fmd_fmri_topo_rele(thp);
1306 	}
1307 
1308 	/*
1309 	 * If we're set up only to report hc-scheme faults, and
1310 	 * there aren't any, then just drop the event.
1311 	 */
1312 	if (got_hc_rsrc == 0 && got_hc_asru == 0 &&
1313 	    (xip->xi_flags & FMD_XPRT_HCONLY)) {
1314 		if (nelem > 0) {
1315 			fmd_free(proxy_asru, sizeof (uint8_t) * nelem);
1316 			fmd_free(diag_asru, sizeof (uint8_t) * nelem);
1317 			fmd_free(asrua, sizeof (nvlist_t *) * nelem);
1318 		}
1319 		return;
1320 	}
1321 
1322 	/*
1323 	 * If we're set up only to report locally present hc-scheme
1324 	 * faults, and there aren't any, then just drop the event.
1325 	 */
1326 	if (got_present_rsrc == 0 &&
1327 	    (xip->xi_flags & FMD_XPRT_HC_PRESENT_ONLY)) {
1328 		if (nelem > 0) {
1329 			for (i = 0; i < nelem; i++)
1330 				if (asrua[i])
1331 					nvlist_free(asrua[i]);
1332 			fmd_free(proxy_asru, sizeof (uint8_t) * nelem);
1333 			fmd_free(diag_asru, sizeof (uint8_t) * nelem);
1334 			fmd_free(asrua, sizeof (nvlist_t *) * nelem);
1335 		}
1336 		return;
1337 	}
1338 
1339 	/*
1340 	 * If fmd_case_recreate() returns NULL, UUID is already known.
1341 	 */
1342 	fmd_module_lock(xip->xi_queue->eq_mod);
1343 	if ((cp = fmd_case_recreate(xip->xi_queue->eq_mod, xp,
1344 	    FMD_CASE_UNSOLVED, uuid, code)) == NULL) {
1345 		if (nelem > 0) {
1346 			for (i = 0; i < nelem; i++)
1347 				if (asrua[i])
1348 					nvlist_free(asrua[i]);
1349 			fmd_free(proxy_asru, sizeof (uint8_t) * nelem);
1350 			fmd_free(diag_asru, sizeof (uint8_t) * nelem);
1351 			fmd_free(asrua, sizeof (nvlist_t *) * nelem);
1352 		}
1353 		fmd_module_unlock(xip->xi_queue->eq_mod);
1354 		return;
1355 	}
1356 
1357 	cip = (fmd_case_impl_t *)cp;
1358 	cip->ci_diag_asru = diag_asru;
1359 	cip->ci_proxy_asru = proxy_asru;
1360 	for (i = 0; i < nelem; i++) {
1361 		(void) nvlist_xdup(nvlp[i], &flt_copy, &fmd.d_nva);
1362 		if (proxy_asru[i] != FMD_PROXY_ASRU_NOT_NEEDED) {
1363 			/*
1364 			 * Copy suspects, but remove/replace asru first. Also if
1365 			 * the original asru was hc-scheme use that as resource.
1366 			 */
1367 			if (proxy_asru[i] == FMD_PROXY_ASRU_FROM_ASRU) {
1368 				(void) nvlist_remove(flt_copy,
1369 				    FM_FAULT_RESOURCE, DATA_TYPE_NVLIST);
1370 				(void) nvlist_lookup_nvlist(flt_copy,
1371 				    FM_FAULT_ASRU, &asru);
1372 				(void) nvlist_add_nvlist(flt_copy,
1373 				    FM_FAULT_RESOURCE, asru);
1374 			}
1375 			(void) nvlist_remove(flt_copy, FM_FAULT_ASRU,
1376 			    DATA_TYPE_NVLIST);
1377 			(void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU,
1378 			    asrua[i]);
1379 			nvlist_free(asrua[i]);
1380 		} else if (got_hc_asru == 0 &&
1381 		    nvlist_lookup_nvlist(flt_copy, FM_FAULT_ASRU,
1382 		    &asru) == 0 && asru != NULL) {
1383 			/*
1384 			 * If we have an asru from diag side, but it's not
1385 			 * in hc scheme, then we can't be sure what it
1386 			 * represents, so mark as no retire.
1387 			 */
1388 			(void) nvlist_add_boolean_value(flt_copy,
1389 			    FM_SUSPECT_RETIRE, B_FALSE);
1390 		}
1391 		fmd_case_insert_suspect(cp, flt_copy);
1392 	}
1393 	/*
1394 	 * copy diag_time
1395 	 */
1396 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
1397 	    &nelem2) == 0 && nelem2 >= 2)
1398 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
1399 	/*
1400 	 * copy DE fmri
1401 	 */
1402 	if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) {
1403 		(void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva);
1404 		fmd_case_set_de_fmri(cp, de_fmri_dup);
1405 	}
1406 
1407 	/*
1408 	 * copy injected if present
1409 	 */
1410 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED,
1411 	    &injected) == 0 && injected)
1412 		fmd_case_set_injected(cp);
1413 
1414 	/*
1415 	 * Transition to solved. This will log the suspect list and create
1416 	 * the resource cache entries.
1417 	 */
1418 	fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1419 
1420 	/*
1421 	 * Update status if it is not simply "all faulty" (can happen if
1422 	 * list.suspects are being re-sent when the transport has reconnected).
1423 	 */
1424 	(void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS, &statusp,
1425 	    &nelem);
1426 	for (i = 0; i < nelem; i++) {
1427 		if ((statusp[i] & (FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE |
1428 		    FM_SUSPECT_NOT_PRESENT | FM_SUSPECT_DEGRADED)) !=
1429 		    FM_SUSPECT_FAULTY)
1430 			need_update = 1;
1431 	}
1432 	if (need_update) {
1433 		fmd_case_update_status(cp, statusp, cip->ci_proxy_asru,
1434 		    cip->ci_diag_asru);
1435 		fmd_case_update_containees(cp);
1436 		fmd_case_update(cp);
1437 	}
1438 
1439 	/*
1440 	 * if asru on proxy side, send an update back to the diagnosing side to
1441 	 * update UNUSABLE/DEGRADED.
1442 	 */
1443 	if (got_proxy_asru)
1444 		fmd_case_xprt_updated(cp);
1445 
1446 	if (nelem > 0)
1447 		fmd_free(asrua, sizeof (nvlist_t *) * nelem);
1448 	fmd_module_unlock(xip->xi_queue->eq_mod);
1449 }
1450 
1451 void
1452 fmd_xprt_recv(fmd_xprt_t *xp, nvlist_t *nvl, hrtime_t hrt, boolean_t logonly)
1453 {
1454 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1455 	const fmd_xprt_rule_t *xrp;
1456 	fmd_t *dp = &fmd;
1457 
1458 	fmd_event_t *e;
1459 	char *class, *uuid;
1460 	boolean_t isproto, isereport, isireport, ishvireport, issysevent;
1461 
1462 	uint64_t *tod;
1463 	uint8_t ttl;
1464 	uint_t n;
1465 	fmd_case_t *cp;
1466 
1467 	/*
1468 	 * Grab the transport lock and set the busy flag to indicate we are
1469 	 * busy receiving an event.  If [DI]SUSPEND is pending, wait until fmd
1470 	 * resumes the transport before continuing on with the receive.
1471 	 */
1472 	(void) pthread_mutex_lock(&xip->xi_lock);
1473 
1474 	while (xip->xi_flags & (FMD_XPRT_DSUSPENDED | FMD_XPRT_ISUSPENDED)) {
1475 
1476 		if (fmd.d_signal != 0) {
1477 			(void) pthread_mutex_unlock(&xip->xi_lock);
1478 			return; /* fmd_destroy() is in progress */
1479 		}
1480 
1481 		(void) pthread_cond_wait(&xip->xi_cv, &xip->xi_lock);
1482 	}
1483 
1484 	xip->xi_busy++;
1485 	ASSERT(xip->xi_busy != 0);
1486 
1487 	(void) pthread_mutex_unlock(&xip->xi_lock);
1488 
1489 	(void) pthread_mutex_lock(&xip->xi_stats_lock);
1490 	xip->xi_stats->xs_received.fmds_value.ui64++;
1491 	(void) pthread_mutex_unlock(&xip->xi_stats_lock);
1492 
1493 	if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) {
1494 		fmd_error(EFMD_XPRT_PAYLOAD, "discarding nvlist %p: missing "
1495 		    "required \"%s\" payload element", (void *)nvl, FM_CLASS);
1496 
1497 		(void) pthread_mutex_lock(&xip->xi_stats_lock);
1498 		xip->xi_stats->xs_discarded.fmds_value.ui64++;
1499 		(void) pthread_mutex_unlock(&xip->xi_stats_lock);
1500 
1501 		nvlist_free(nvl);
1502 		goto done;
1503 	}
1504 
1505 	fmd_dprintf(FMD_DBG_XPRT, "xprt %u %s %s\n", xip->xi_id,
1506 	    ((logonly == FMD_B_TRUE) ? "logging" : "posting"), class);
1507 
1508 	isereport = (strncmp(class, FM_EREPORT_CLASS ".",
1509 	    sizeof (FM_EREPORT_CLASS)) == 0) ? FMD_B_TRUE : FMD_B_FALSE;
1510 
1511 	isireport = (strncmp(class, FM_IREPORT_CLASS ".",
1512 	    sizeof (FM_IREPORT_CLASS)) == 0) ?  FMD_B_TRUE : FMD_B_FALSE;
1513 
1514 	issysevent = (strncmp(class, SYSEVENT_RSRC_CLASS,
1515 	    sizeof (SYSEVENT_RSRC_CLASS) - 1)) == 0 ? FMD_B_TRUE : FMD_B_FALSE;
1516 
1517 	if (isireport) {
1518 		char *pri;
1519 
1520 		if (nvlist_lookup_string(nvl, FM_IREPORT_PRIORITY, &pri) == 0 &&
1521 		    strncmp(pri, "high", 5) == 0) {
1522 			ishvireport = 1;
1523 		} else {
1524 			ishvireport = 0;
1525 		}
1526 	}
1527 
1528 	/*
1529 	 * The logonly flag should only be set for ereports.
1530 	 */
1531 	if (logonly == FMD_B_TRUE && isereport == FMD_B_FALSE) {
1532 		fmd_error(EFMD_XPRT_INVAL, "discarding nvlist %p: "
1533 		    "logonly flag is not valid for class %s",
1534 		    (void *)nvl, class);
1535 
1536 		(void) pthread_mutex_lock(&xip->xi_stats_lock);
1537 		xip->xi_stats->xs_discarded.fmds_value.ui64++;
1538 		(void) pthread_mutex_unlock(&xip->xi_stats_lock);
1539 
1540 		nvlist_free(nvl);
1541 		goto done;
1542 	}
1543 
1544 	/*
1545 	 * If a time-to-live value is present in the event and is zero, drop
1546 	 * the event and bump xs_timeouts.  Otherwise decrement the TTL value.
1547 	 */
1548 	if (nvlist_lookup_uint8(nvl, FMD_EVN_TTL, &ttl) == 0) {
1549 		if (ttl == 0) {
1550 			fmd_dprintf(FMD_DBG_XPRT, "xprt %u nvlist %p (%s) "
1551 			    "timeout: event received with ttl=0\n",
1552 			    xip->xi_id, (void *)nvl, class);
1553 
1554 			(void) pthread_mutex_lock(&xip->xi_stats_lock);
1555 			xip->xi_stats->xs_timeouts.fmds_value.ui64++;
1556 			(void) pthread_mutex_unlock(&xip->xi_stats_lock);
1557 
1558 			nvlist_free(nvl);
1559 			goto done;
1560 		}
1561 		(void) nvlist_remove(nvl, FMD_EVN_TTL, DATA_TYPE_UINT8);
1562 		(void) nvlist_add_uint8(nvl, FMD_EVN_TTL, ttl - 1);
1563 	}
1564 
1565 	/*
1566 	 * If we are using the native system clock, the underlying transport
1567 	 * code can provide a tighter event time bound by telling us when the
1568 	 * event was enqueued.  If we're using simulated clocks, this time
1569 	 * has no meaning to us, so just reset the value to use HRT_NOW.
1570 	 */
1571 	if (dp->d_clockops != &fmd_timeops_native)
1572 		hrt = FMD_HRT_NOW;
1573 
1574 	/*
1575 	 * If an event's class is in the FMD_CTL_CLASS family, then create a
1576 	 * control event.  If a FMD_EVN_TOD member is found, create a protocol
1577 	 * event using this time.  Otherwise create a protocol event using hrt.
1578 	 */
1579 	isproto = (strncmp(class, FMD_CTL_CLASS, FMD_CTL_CLASS_LEN) == 0) ?
1580 	    FMD_B_FALSE : FMD_B_TRUE;
1581 	if (isproto == FMD_B_FALSE)
1582 		e = fmd_event_create(FMD_EVT_CTL, hrt, nvl, fmd_ctl_init(nvl));
1583 	else if (nvlist_lookup_uint64_array(nvl, FMD_EVN_TOD, &tod, &n) != 0)
1584 		e = fmd_event_create(FMD_EVT_PROTOCOL, hrt, nvl, class);
1585 	else {
1586 		e = fmd_event_recreate(FMD_EVT_PROTOCOL,
1587 		    NULL, nvl, class, NULL, 0, 0);
1588 	}
1589 
1590 	/*
1591 	 * If the debug log is enabled, create a temporary event, log it to the
1592 	 * debug log, and then reset the underlying state of the event.
1593 	 */
1594 	if (xip->xi_log != NULL) {
1595 		fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
1596 
1597 		fmd_log_append(xip->xi_log, e, NULL);
1598 
1599 		ep->ev_flags |= FMD_EVF_VOLATILE;
1600 		ep->ev_off = 0;
1601 		ep->ev_len = 0;
1602 
1603 		if (ep->ev_log != NULL) {
1604 			fmd_log_rele(ep->ev_log);
1605 			ep->ev_log = NULL;
1606 		}
1607 	}
1608 
1609 	/*
1610 	 * Iterate over the rules for the current state trying to match the
1611 	 * event class to one of our special rules.  If a rule is matched, the
1612 	 * event is consumed and not dispatched to other modules.  If the rule
1613 	 * set ends without matching an event, we fall through to dispatching.
1614 	 */
1615 	for (xrp = xip->xi_state; xrp->xr_class != NULL; xrp++) {
1616 		if (fmd_event_match(e, FMD_EVT_PROTOCOL, xrp->xr_class)) {
1617 			fmd_event_hold(e);
1618 			xrp->xr_func(xip, nvl);
1619 			fmd_event_rele(e);
1620 			goto done;
1621 		}
1622 	}
1623 
1624 	/*
1625 	 * Record ereports and ireports in the log.  This code will
1626 	 * be replaced later with a per-transport intent log instead.
1627 	 */
1628 	if (isereport == FMD_B_TRUE || isireport == FMD_B_TRUE ||
1629 	    issysevent == B_TRUE) {
1630 		pthread_rwlock_t *lockp;
1631 		fmd_log_t *lp;
1632 
1633 		if (isereport == FMD_B_TRUE) {
1634 			lp = fmd.d_errlog;
1635 			lockp = &fmd.d_log_lock;
1636 		} else {
1637 			if (ishvireport || issysevent) {
1638 				lp = fmd.d_hvilog;
1639 				lockp = &fmd.d_hvilog_lock;
1640 			} else {
1641 				lp = fmd.d_ilog;
1642 				lockp = &fmd.d_ilog_lock;
1643 			}
1644 		}
1645 
1646 		(void) pthread_rwlock_rdlock(lockp);
1647 		fmd_log_append(lp, e, NULL);
1648 		(void) pthread_rwlock_unlock(lockp);
1649 	}
1650 
1651 	/*
1652 	 * If a list.suspect event is received, create a case for the specified
1653 	 * UUID in the case hash, with the transport module as its owner.
1654 	 */
1655 	if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_SUSPECT_CLASS)) {
1656 		if (xip->xi_flags & FMD_XPRT_CACHE_AS_LOCAL)
1657 			fmd_xprt_list_suspect_local(xp, nvl);
1658 		else
1659 			fmd_xprt_list_suspect(xp, nvl);
1660 		fmd_event_hold(e);
1661 		fmd_event_rele(e);
1662 		goto done;
1663 	}
1664 
1665 	/*
1666 	 * If a list.updated or list.repaired event is received, update the
1667 	 * resource cache status and the local case.
1668 	 */
1669 	if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_REPAIRED_CLASS) ||
1670 	    fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_UPDATED_CLASS)) {
1671 		uint8_t *statusp;
1672 		uint_t nelem = 0;
1673 
1674 		(void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS,
1675 		    &statusp, &nelem);
1676 		fmd_module_lock(xip->xi_queue->eq_mod);
1677 		if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
1678 		    (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
1679 			fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1680 			if (cip->ci_xprt != NULL) {
1681 				fmd_case_update_status(cp, statusp,
1682 				    cip->ci_proxy_asru, cip->ci_diag_asru);
1683 				fmd_case_update_containees(cp);
1684 				fmd_case_update(cp);
1685 			}
1686 			fmd_case_rele(cp);
1687 		}
1688 		fmd_module_unlock(xip->xi_queue->eq_mod);
1689 		fmd_event_hold(e);
1690 		fmd_event_rele(e);
1691 		goto done;
1692 	}
1693 
1694 	/*
1695 	 * If a list.isolated event is received, update resource cache status
1696 	 */
1697 	if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_ISOLATED_CLASS)) {
1698 		uint8_t *statusp;
1699 		uint_t nelem = 0;
1700 
1701 		(void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS,
1702 		    &statusp, &nelem);
1703 		fmd_module_lock(xip->xi_queue->eq_mod);
1704 		if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
1705 		    (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
1706 			fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1707 			if (cip->ci_xprt != NULL)
1708 				fmd_case_update_status(cp, statusp,
1709 				    cip->ci_proxy_asru, cip->ci_diag_asru);
1710 			fmd_case_rele(cp);
1711 		}
1712 		fmd_module_unlock(xip->xi_queue->eq_mod);
1713 		fmd_event_hold(e);
1714 		fmd_event_rele(e);
1715 		goto done;
1716 	}
1717 
1718 	/*
1719 	 * If a list.resolved event is received, resolve the local case.
1720 	 */
1721 	if (fmd_event_match(e, FMD_EVT_PROTOCOL, FM_LIST_RESOLVED_CLASS)) {
1722 		fmd_module_lock(xip->xi_queue->eq_mod);
1723 		if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
1724 		    (cp = fmd_case_hash_lookup(fmd.d_cases, uuid)) != NULL) {
1725 			fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1726 			if (cip->ci_xprt != NULL)
1727 				fmd_case_transition(cp, (cip->ci_state ==
1728 				    FMD_CASE_REPAIRED) ? FMD_CASE_RESOLVED :
1729 				    (cip->ci_state == FMD_CASE_CLOSED) ?
1730 				    FMD_CASE_REPAIRED : FMD_CASE_CLOSE_WAIT,
1731 				    FMD_CF_RESOLVED);
1732 			fmd_case_rele(cp);
1733 		}
1734 		fmd_module_unlock(xip->xi_queue->eq_mod);
1735 		fmd_event_hold(e);
1736 		fmd_event_rele(e);
1737 		goto done;
1738 	}
1739 
1740 	if (logonly == FMD_B_TRUE || (xip->xi_flags & FMD_XPRT_EXTERNAL)) {
1741 		/*
1742 		 * Don't proxy ereports on an EXTERNAL transport - we won't
1743 		 * know how to diagnose them with the wrong topology. Note
1744 		 * that here (and above) we have to hold/release the event in
1745 		 * order for it to be freed.
1746 		 */
1747 		fmd_event_hold(e);
1748 		fmd_event_rele(e);
1749 	} else if (isproto == FMD_B_TRUE)
1750 		fmd_dispq_dispatch(dp->d_disp, e, class);
1751 	else
1752 		fmd_modhash_dispatch(dp->d_mod_hash, e);
1753 done:
1754 	(void) pthread_mutex_lock(&xip->xi_lock);
1755 
1756 	ASSERT(xip->xi_busy != 0);
1757 	xip->xi_busy--;
1758 
1759 	(void) pthread_cond_broadcast(&xip->xi_cv);
1760 	(void) pthread_mutex_unlock(&xip->xi_lock);
1761 }
1762 
1763 void
1764 fmd_xprt_uuclose(fmd_xprt_t *xp, const char *uuid)
1765 {
1766 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1767 
1768 	fmd_event_t *e;
1769 	nvlist_t *nvl;
1770 	char *s;
1771 
1772 	if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
1773 		return; /* read-only transports do not proxy uuclose */
1774 
1775 	TRACE((FMD_DBG_XPRT, "xprt %u closing case %s\n", xip->xi_id, uuid));
1776 
1777 	nvl = fmd_protocol_xprt_uuclose(xip->xi_queue->eq_mod,
1778 	    "resource.fm.xprt.uuclose", xip->xi_version, uuid);
1779 
1780 	(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
1781 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
1782 	fmd_eventq_insert_at_time(xip->xi_queue, e);
1783 }
1784 
1785 /*
1786  * On proxy side, send back uuresolved request to diagnosing side
1787  */
1788 void
1789 fmd_xprt_uuresolved(fmd_xprt_t *xp, const char *uuid)
1790 {
1791 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1792 
1793 	fmd_event_t *e;
1794 	nvlist_t *nvl;
1795 	char *s;
1796 
1797 	if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
1798 		return; /* read-only transports do not proxy uuresolved */
1799 
1800 	TRACE((FMD_DBG_XPRT, "xprt %u resolving case %s\n", xip->xi_id, uuid));
1801 
1802 	nvl = fmd_protocol_xprt_uuresolved(xip->xi_queue->eq_mod,
1803 	    "resource.fm.xprt.uuresolved", xip->xi_version, uuid);
1804 
1805 	(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
1806 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
1807 	fmd_eventq_insert_at_time(xip->xi_queue, e);
1808 }
1809 
1810 /*
1811  * On proxy side, send back repair/acquit/etc request to diagnosing side
1812  */
1813 void
1814 fmd_xprt_updated(fmd_xprt_t *xp, const char *uuid, uint8_t *statusp,
1815 	uint8_t *has_asrup, uint_t nelem)
1816 {
1817 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1818 
1819 	fmd_event_t *e;
1820 	nvlist_t *nvl;
1821 	char *s;
1822 
1823 	if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
1824 		return; /* read-only transports do not support remote repairs */
1825 
1826 	TRACE((FMD_DBG_XPRT, "xprt %u updating case %s\n", xip->xi_id, uuid));
1827 
1828 	nvl = fmd_protocol_xprt_updated(xip->xi_queue->eq_mod,
1829 	    "resource.fm.xprt.updated", xip->xi_version, uuid, statusp,
1830 	    has_asrup, nelem);
1831 
1832 	(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
1833 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
1834 	fmd_eventq_insert_at_time(xip->xi_queue, e);
1835 }
1836 
1837 /*
1838  * Insert the specified class into our remote subscription hash.  If the class
1839  * is already present, bump the reference count; otherwise add it to the hash
1840  * and then enqueue an event for our remote peer to proxy our subscription.
1841  */
1842 void
1843 fmd_xprt_subscribe(fmd_xprt_t *xp, const char *class)
1844 {
1845 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1846 
1847 	uint_t refs;
1848 	nvlist_t *nvl;
1849 	fmd_event_t *e;
1850 	char *s;
1851 
1852 	if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
1853 		return; /* read-only transports do not proxy subscriptions */
1854 
1855 	if (!(xip->xi_flags & FMD_XPRT_SUBSCRIBER))
1856 		return; /* transport is not yet an active subscriber */
1857 
1858 	(void) pthread_mutex_lock(&xip->xi_lock);
1859 	refs = fmd_xprt_class_hash_insert(xip, &xip->xi_rsub, class);
1860 	(void) pthread_mutex_unlock(&xip->xi_lock);
1861 
1862 	if (refs > 1)
1863 		return; /* we've already asked our peer for this subscription */
1864 
1865 	fmd_dprintf(FMD_DBG_XPRT,
1866 	    "xprt %u subscribing to %s\n", xip->xi_id, class);
1867 
1868 	nvl = fmd_protocol_xprt_sub(xip->xi_queue->eq_mod,
1869 	    "resource.fm.xprt.subscribe", xip->xi_version, class);
1870 
1871 	(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
1872 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
1873 	fmd_eventq_insert_at_time(xip->xi_queue, e);
1874 }
1875 
1876 /*
1877  * Delete the specified class from the remote subscription hash.  If the
1878  * reference count drops to zero, ask our remote peer to unsubscribe by proxy.
1879  */
1880 void
1881 fmd_xprt_unsubscribe(fmd_xprt_t *xp, const char *class)
1882 {
1883 	fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)xp;
1884 
1885 	uint_t refs;
1886 	nvlist_t *nvl;
1887 	fmd_event_t *e;
1888 	char *s;
1889 
1890 	if ((xip->xi_flags & FMD_XPRT_RDWR) == FMD_XPRT_RDONLY)
1891 		return; /* read-only transports do not proxy subscriptions */
1892 
1893 	if (!(xip->xi_flags & FMD_XPRT_SUBSCRIBER))
1894 		return; /* transport is not yet an active subscriber */
1895 
1896 	/*
1897 	 * If the subscription reference count drops to zero in xi_rsub, insert
1898 	 * an entry into the xi_usub hash indicating we await an unsuback event.
1899 	 */
1900 	(void) pthread_mutex_lock(&xip->xi_lock);
1901 
1902 	if ((refs = fmd_xprt_class_hash_delete(xip, &xip->xi_rsub, class)) == 0)
1903 		(void) fmd_xprt_class_hash_insert(xip, &xip->xi_usub, class);
1904 
1905 	(void) pthread_mutex_unlock(&xip->xi_lock);
1906 
1907 	if (refs != 0)
1908 		return; /* other subscriptions for this class still active */
1909 
1910 	fmd_dprintf(FMD_DBG_XPRT,
1911 	    "xprt %u unsubscribing from %s\n", xip->xi_id, class);
1912 
1913 	nvl = fmd_protocol_xprt_sub(xip->xi_queue->eq_mod,
1914 	    "resource.fm.xprt.unsubscribe", xip->xi_version, class);
1915 
1916 	(void) nvlist_lookup_string(nvl, FM_CLASS, &s);
1917 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, s);
1918 	fmd_eventq_insert_at_time(xip->xi_queue, e);
1919 }
1920 
1921 static void
1922 fmd_xprt_subscribe_xid(fmd_idspace_t *ids, id_t id, void *class)
1923 {
1924 	fmd_xprt_t *xp;
1925 
1926 	if ((xp = fmd_idspace_hold(ids, id)) != NULL) {
1927 		fmd_xprt_subscribe(xp, class);
1928 		fmd_idspace_rele(ids, id);
1929 	}
1930 }
1931 
1932 void
1933 fmd_xprt_subscribe_all(const char *class)
1934 {
1935 	fmd_idspace_t *ids = fmd.d_xprt_ids;
1936 
1937 	if (ids->ids_count != 0)
1938 		fmd_idspace_apply(ids, fmd_xprt_subscribe_xid, (void *)class);
1939 }
1940 
1941 static void
1942 fmd_xprt_unsubscribe_xid(fmd_idspace_t *ids, id_t id, void *class)
1943 {
1944 	fmd_xprt_t *xp;
1945 
1946 	if ((xp = fmd_idspace_hold(ids, id)) != NULL) {
1947 		fmd_xprt_unsubscribe(xp, class);
1948 		fmd_idspace_rele(ids, id);
1949 	}
1950 }
1951 
1952 void
1953 fmd_xprt_unsubscribe_all(const char *class)
1954 {
1955 	fmd_idspace_t *ids = fmd.d_xprt_ids;
1956 
1957 	if (ids->ids_count != 0)
1958 		fmd_idspace_apply(ids, fmd_xprt_unsubscribe_xid, (void *)class);
1959 }
1960 
1961 /*ARGSUSED*/
1962 static void
1963 fmd_xprt_suspend_xid(fmd_idspace_t *ids, id_t id, void *arg)
1964 {
1965 	fmd_xprt_t *xp;
1966 
1967 	if ((xp = fmd_idspace_hold(ids, id)) != NULL) {
1968 		fmd_xprt_xsuspend(xp, FMD_XPRT_DSUSPENDED);
1969 		fmd_idspace_rele(ids, id);
1970 	}
1971 }
1972 
1973 void
1974 fmd_xprt_suspend_all(void)
1975 {
1976 	fmd_idspace_t *ids = fmd.d_xprt_ids;
1977 
1978 	(void) pthread_mutex_lock(&fmd.d_xprt_lock);
1979 
1980 	if (fmd.d_xprt_suspend++ != 0) {
1981 		(void) pthread_mutex_unlock(&fmd.d_xprt_lock);
1982 		return; /* already suspended */
1983 	}
1984 
1985 	if (ids->ids_count != 0)
1986 		fmd_idspace_apply(ids, fmd_xprt_suspend_xid, NULL);
1987 
1988 	(void) pthread_mutex_unlock(&fmd.d_xprt_lock);
1989 }
1990 
1991 /*ARGSUSED*/
1992 static void
1993 fmd_xprt_resume_xid(fmd_idspace_t *ids, id_t id, void *arg)
1994 {
1995 	fmd_xprt_t *xp;
1996 
1997 	if ((xp = fmd_idspace_hold(ids, id)) != NULL) {
1998 		fmd_xprt_xresume(xp, FMD_XPRT_DSUSPENDED);
1999 		fmd_idspace_rele(ids, id);
2000 	}
2001 }
2002 
2003 void
2004 fmd_xprt_resume_all(void)
2005 {
2006 	fmd_idspace_t *ids = fmd.d_xprt_ids;
2007 
2008 	(void) pthread_mutex_lock(&fmd.d_xprt_lock);
2009 
2010 	if (fmd.d_xprt_suspend == 0)
2011 		fmd_panic("fmd_xprt_suspend/resume_all mismatch\n");
2012 
2013 	if (--fmd.d_xprt_suspend != 0) {
2014 		(void) pthread_mutex_unlock(&fmd.d_xprt_lock);
2015 		return; /* not ready to be resumed */
2016 	}
2017 
2018 	if (ids->ids_count != 0)
2019 		fmd_idspace_apply(ids, fmd_xprt_resume_xid, NULL);
2020 
2021 	(void) pthread_mutex_unlock(&fmd.d_xprt_lock);
2022 }
2023