xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd_sysevent.c (revision cbf75e67acb6c32a2f4884f28a839d59f7988d37)
1d9638e54Smws /*
2d9638e54Smws  * CDDL HEADER START
3d9638e54Smws  *
4d9638e54Smws  * The contents of this file are subject to the terms of the
59f8ca725Sstephh  * Common Development and Distribution License (the "License").
69f8ca725Sstephh  * You may not use this file except in compliance with the License.
7d9638e54Smws  *
8d9638e54Smws  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9d9638e54Smws  * or http://www.opensolaris.org/os/licensing.
10d9638e54Smws  * See the License for the specific language governing permissions
11d9638e54Smws  * and limitations under the License.
12d9638e54Smws  *
13d9638e54Smws  * When distributing Covered Code, include this CDDL HEADER in each
14d9638e54Smws  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15d9638e54Smws  * If applicable, add the following below this CDDL HEADER, with the
16d9638e54Smws  * fields enclosed by brackets "[]" replaced with your own identifying
17d9638e54Smws  * information: Portions Copyright [yyyy] [name of copyright owner]
18d9638e54Smws  *
19d9638e54Smws  * CDDL HEADER END
20d9638e54Smws  */
21d9638e54Smws 
22d9638e54Smws /*
23*cbf75e67SStephen Hanson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24d9638e54Smws  * Use is subject to license terms.
25d9638e54Smws  */
26d9638e54Smws 
27d9638e54Smws #include <sys/sysevent/eventdefs.h>
28d9638e54Smws #include <sys/sysevent.h>
29d9638e54Smws #include <sys/sysevent_impl.h>
30d9638e54Smws #include <sys/fm/protocol.h>
31d9638e54Smws #include <sys/sysmacros.h>
32d9638e54Smws #include <sys/dumphdr.h>
33d9638e54Smws #include <sys/dumpadm.h>
347aec1d6eScindi #include <sys/fm/util.h>
35d9638e54Smws 
36d9638e54Smws #include <libsysevent.h>
37d9638e54Smws #include <libnvpair.h>
38d9638e54Smws #include <alloca.h>
39d9638e54Smws #include <limits.h>
40d9638e54Smws #include <strings.h>
41d9638e54Smws #include <unistd.h>
42d9638e54Smws #include <fcntl.h>
43d9638e54Smws #include <errno.h>
44d9638e54Smws 
45d9638e54Smws #undef MUTEX_HELD
46d9638e54Smws #undef RW_READ_HELD
47d9638e54Smws #undef RW_WRITE_HELD
48d9638e54Smws 
49d9638e54Smws #include <fmd_api.h>
50d9638e54Smws #include <fmd_log.h>
51d9638e54Smws #include <fmd_subr.h>
52d9638e54Smws #include <fmd_dispq.h>
539af3851aSeschrock #include <fmd_dr.h>
54d9638e54Smws #include <fmd_module.h>
559af3851aSeschrock #include <fmd_protocol.h>
56d9638e54Smws #include <fmd_scheme.h>
57d9638e54Smws #include <fmd_error.h>
58d9638e54Smws 
59d9638e54Smws #include <fmd.h>
60d9638e54Smws 
61d9638e54Smws static char *sysev_channel;	/* event channel to which we are subscribed */
62d9638e54Smws static char *sysev_class;	/* event class to which we are subscribed */
63d9638e54Smws static char *sysev_device;	/* device path to use for replaying events */
64d9638e54Smws static char *sysev_sid;		/* event channel subscriber identifier */
65d9638e54Smws static void *sysev_evc;		/* event channel cookie from evc_bind */
66d9638e54Smws 
67d9638e54Smws static fmd_xprt_t *sysev_xprt;
6897c04605Scy152378 static int sysev_xprt_refcnt;
69d9638e54Smws static fmd_hdl_t *sysev_hdl;
70d9638e54Smws 
71d9638e54Smws static struct sysev_stats {
72d9638e54Smws 	fmd_stat_t dump_replay;
73d9638e54Smws 	fmd_stat_t dump_lost;
74d9638e54Smws 	fmd_stat_t bad_class;
75d9638e54Smws 	fmd_stat_t bad_attr;
76d9638e54Smws 	fmd_stat_t eagain;
77d9638e54Smws } sysev_stats = {
78d9638e54Smws 	{ "dump_replay", FMD_TYPE_UINT64, "events replayed from dump device" },
79d9638e54Smws 	{ "dump_lost", FMD_TYPE_UINT64, "events lost from dump device" },
80d9638e54Smws 	{ "bad_class", FMD_TYPE_UINT64, "events dropped due to invalid class" },
81d9638e54Smws 	{ "bad_attr", FMD_TYPE_UINT64, "events dropped due to invalid nvlist" },
82d9638e54Smws 	{ "eagain", FMD_TYPE_UINT64, "events retried due to low memory" },
83d9638e54Smws };
84d9638e54Smws 
8597c04605Scy152378 static pthread_cond_t sysev_cv = PTHREAD_COND_INITIALIZER;
8697c04605Scy152378 static pthread_mutex_t sysev_mutex = PTHREAD_MUTEX_INITIALIZER;
879f8ca725Sstephh static int sysev_replay_wait = 1;
8897c04605Scy152378 static int sysev_exiting;
8997c04605Scy152378 
909af3851aSeschrock /*
919af3851aSeschrock  * Entry point for legacy sysevents.  This function is responsible for two
929af3851aSeschrock  * things: passing off interesting events to the DR handler, and converting
939af3851aSeschrock  * sysevents into resource events that modules can then subscribe to.
949af3851aSeschrock  */
959af3851aSeschrock static void
969af3851aSeschrock sysev_legacy(sysevent_t *sep)
979af3851aSeschrock {
989af3851aSeschrock 	const char *class = sysevent_get_class_name(sep);
999af3851aSeschrock 	const char *subclass = sysevent_get_subclass_name(sep);
1009af3851aSeschrock 	char *fullclass;
1019af3851aSeschrock 	size_t len;
1029af3851aSeschrock 	nvlist_t *attr, *nvl;
1039af3851aSeschrock 	fmd_event_t *e;
1049af3851aSeschrock 	hrtime_t hrt;
1059af3851aSeschrock 
1069af3851aSeschrock 	/* notify the DR subsystem of the event */
1079af3851aSeschrock 	fmd_dr_event(sep);
1089af3851aSeschrock 
1099af3851aSeschrock 	/* get the matching sysevent name */
1109af3851aSeschrock 	len = snprintf(NULL, 0, "%s%s.%s", SYSEVENT_RSRC_CLASS,
1119af3851aSeschrock 	    class, subclass);
1129af3851aSeschrock 	fullclass = alloca(len + 1);
1139af3851aSeschrock 	(void) snprintf(fullclass, len + 1, "%s%s.%s",
1149af3851aSeschrock 	    SYSEVENT_RSRC_CLASS, class, subclass);
1159af3851aSeschrock 
1169af3851aSeschrock 	/* construct the event payload */
1179af3851aSeschrock 	(void) nvlist_xalloc(&nvl, NV_UNIQUE_NAME, &fmd.d_nva);
1189af3851aSeschrock 	(void) nvlist_add_string(nvl, FM_CLASS, fullclass);
1199af3851aSeschrock 	(void) nvlist_add_uint8(nvl, FM_VERSION, FM_RSRC_VERSION);
120825ba0f2Srobj 	if (sysevent_get_attr_list(sep, &attr) == 0) {
1219af3851aSeschrock 		(void) nvlist_merge(nvl, attr, 0);
122825ba0f2Srobj 		nvlist_free(attr);
123825ba0f2Srobj 	}
1249af3851aSeschrock 
1259af3851aSeschrock 	/*
1269af3851aSeschrock 	 * Dispatch the event.  Ideally, we'd like to use the same transport
1279af3851aSeschrock 	 * interface as sysev_recv(), but because the legacy sysevent mechanism
1289af3851aSeschrock 	 * puts in a thread outside fmd's control, using the module APIs is
1299af3851aSeschrock 	 * impossible.
1309af3851aSeschrock 	 */
1319af3851aSeschrock 	sysevent_get_time(sep, &hrt);
1329af3851aSeschrock 	(void) nvlist_lookup_string(nvl, FM_CLASS, &fullclass);
1339af3851aSeschrock 	e = fmd_event_create(FMD_EVT_PROTOCOL, hrt, nvl, fullclass);
1349af3851aSeschrock 	fmd_dispq_dispatch(fmd.d_disp, e, fullclass);
1359af3851aSeschrock }
1369f8ca725Sstephh 
137d9638e54Smws /*
138d9638e54Smws  * Receive an event from the SysEvent channel and post it to our transport.
139d9638e54Smws  * Under extreme low-memory situations where we cannot event unpack the event,
140d9638e54Smws  * we can request that SysEvent redeliver the event later by returning EAGAIN.
141d9638e54Smws  * If we do this too many times, the kernel will drop the event.  Rather than
142d9638e54Smws  * keeping state per-event, we simply attempt a garbage-collect, hoping that
143d9638e54Smws  * enough free memory will be available by the time the event is redelivered.
144d9638e54Smws  */
145d9638e54Smws static int
146d9638e54Smws sysev_recv(sysevent_t *sep, void *arg)
147d9638e54Smws {
148d9638e54Smws 	uint64_t seq = sysevent_get_seq(sep);
149d9638e54Smws 	fmd_xprt_t *xp = arg;
150d9638e54Smws 	nvlist_t *nvl;
151d9638e54Smws 	hrtime_t hrt;
15297c04605Scy152378 	int rc = 0;
153d9638e54Smws 
15497c04605Scy152378 	(void) pthread_mutex_lock(&sysev_mutex);
15597c04605Scy152378 	if (sysev_exiting == 1) {
15697c04605Scy152378 		while (sysev_xprt_refcnt > 0)
15797c04605Scy152378 			(void) pthread_cond_wait(&sysev_cv, &sysev_mutex);
15897c04605Scy152378 		(void) pthread_mutex_unlock(&sysev_mutex);
15997c04605Scy152378 		return (EAGAIN);
16097c04605Scy152378 	}
16197c04605Scy152378 	sysev_xprt_refcnt++;
1629f8ca725Sstephh 	while (sysev_replay_wait)
16397c04605Scy152378 		(void) pthread_cond_wait(&sysev_cv, &sysev_mutex);
16497c04605Scy152378 	(void) pthread_mutex_unlock(&sysev_mutex);
1659f8ca725Sstephh 
166d9638e54Smws 	if (strcmp(sysevent_get_class_name(sep), EC_FM) != 0) {
167d9638e54Smws 		fmd_hdl_error(sysev_hdl, "discarding event 0x%llx: unexpected"
168d9638e54Smws 		    " transport class %s\n", seq, sysevent_get_class_name(sep));
169d9638e54Smws 		sysev_stats.bad_class.fmds_value.ui64++;
17097c04605Scy152378 	} else if (sysevent_get_attr_list(sep, &nvl) != 0) {
171d9638e54Smws 		if (errno == EAGAIN || errno == ENOMEM) {
172d9638e54Smws 			fmd_modhash_tryapply(fmd.d_mod_hash, fmd_module_trygc);
173d9638e54Smws 			fmd_scheme_hash_trygc(fmd.d_schemes);
174d9638e54Smws 			sysev_stats.eagain.fmds_value.ui64++;
17597c04605Scy152378 			rc = EAGAIN;
17697c04605Scy152378 		} else {
17797c04605Scy152378 			fmd_hdl_error(sysev_hdl, "discarding event 0x%llx: "
17897c04605Scy152378 			    "missing or invalid payload", seq);
179d9638e54Smws 			sysev_stats.bad_attr.fmds_value.ui64++;
180d9638e54Smws 		}
18197c04605Scy152378 	} else {
182d9638e54Smws 		sysevent_get_time(sep, &hrt);
183d9638e54Smws 		fmd_xprt_post(sysev_hdl, xp, nvl, hrt);
18497c04605Scy152378 	}
18597c04605Scy152378 
18697c04605Scy152378 	(void) pthread_mutex_lock(&sysev_mutex);
18797c04605Scy152378 	if (--sysev_xprt_refcnt == 0 && sysev_exiting == 1)
18897c04605Scy152378 		(void) pthread_cond_broadcast(&sysev_cv);
18997c04605Scy152378 	(void) pthread_mutex_unlock(&sysev_mutex);
19097c04605Scy152378 
19197c04605Scy152378 	return (rc);
192d9638e54Smws }
193d9638e54Smws 
194d9638e54Smws /*
195d9638e54Smws  * Checksum algorithm used by the dump transport for verifying the content of
196d9638e54Smws  * error reports saved on the dump device (copy of the kernel's checksum32()).
197d9638e54Smws  */
198d9638e54Smws static uint32_t
199d9638e54Smws sysev_checksum(void *cp_arg, size_t length)
200d9638e54Smws {
201d9638e54Smws 	uchar_t *cp, *ep;
202d9638e54Smws 	uint32_t sum = 0;
203d9638e54Smws 
204d9638e54Smws 	for (cp = cp_arg, ep = cp + length; cp < ep; cp++)
205d9638e54Smws 		sum = ((sum >> 1) | (sum << 31)) + *cp;
206d9638e54Smws 
207d9638e54Smws 	return (sum);
208d9638e54Smws }
209d9638e54Smws 
210d9638e54Smws /*
211d9638e54Smws  * Replay saved events from the dump transport.  This function is installed as
212d9638e54Smws  * the timer callback and is called only once during the module's lifetime.
213d9638e54Smws  */
214d9638e54Smws /*ARGSUSED*/
215d9638e54Smws static void
216d9638e54Smws sysev_replay(fmd_hdl_t *hdl, id_t id, void *arg)
217d9638e54Smws {
218d9638e54Smws 	char *dumpdev;
219d9638e54Smws 	off64_t off, off0;
220d9638e54Smws 	int fd, err;
221d9638e54Smws 
222d9638e54Smws 	/*
223d9638e54Smws 	 * Determine the appropriate dump device to use for replaying pending
224d9638e54Smws 	 * error reports.  If the device property is NULL (default), we
225d9638e54Smws 	 * open and query /dev/dump to determine the current dump device.
226d9638e54Smws 	 */
227d9638e54Smws 	if ((dumpdev = sysev_device) == NULL) {
228d9638e54Smws 		if ((fd = open("/dev/dump", O_RDONLY)) == -1) {
229d9638e54Smws 			fmd_hdl_error(hdl, "failed to open /dev/dump "
230d9638e54Smws 			    "to locate dump device for event replay");
2319f8ca725Sstephh 			goto done;
232d9638e54Smws 		}
233d9638e54Smws 
234d9638e54Smws 		dumpdev = alloca(PATH_MAX);
235d9638e54Smws 		err = ioctl(fd, DIOCGETDEV, dumpdev);
236d9638e54Smws 		(void) close(fd);
237d9638e54Smws 
238d9638e54Smws 		if (err == -1) {
239d9638e54Smws 			if (errno != ENODEV) {
240d9638e54Smws 				fmd_hdl_error(hdl, "failed to obtain "
241d9638e54Smws 				    "path to dump device for event replay");
242d9638e54Smws 			}
2439f8ca725Sstephh 			goto done;
244d9638e54Smws 		}
245d9638e54Smws 	}
246d9638e54Smws 
247d9638e54Smws 	if (strcmp(dumpdev, "/dev/null") == 0)
2489f8ca725Sstephh 		goto done; /* return silently and skip replay for /dev/null */
249d9638e54Smws 
250d9638e54Smws 	/*
251d9638e54Smws 	 * Open the appropriate device and then determine the offset of the
252d9638e54Smws 	 * start of the ereport dump region located at the end of the device.
253d9638e54Smws 	 */
254d9638e54Smws 	if ((fd = open64(dumpdev, O_RDWR | O_DSYNC)) == -1) {
255d9638e54Smws 		fmd_hdl_error(hdl, "failed to open dump transport %s "
256d9638e54Smws 		    "(pending events will not be replayed)", dumpdev);
2579f8ca725Sstephh 		goto done;
258d9638e54Smws 	}
259d9638e54Smws 
260d9638e54Smws 	off = DUMP_OFFSET + DUMP_LOGSIZE + DUMP_ERPTSIZE;
261d9638e54Smws 	off = off0 = lseek64(fd, -off, SEEK_END) & -DUMP_OFFSET;
262d9638e54Smws 
263d9638e54Smws 	if (off == (off64_t)-1LL) {
264d9638e54Smws 		fmd_hdl_error(hdl, "failed to seek dump transport %s "
265d9638e54Smws 		    "(pending events will not be replayed)", dumpdev);
266d9638e54Smws 		(void) close(fd);
2679f8ca725Sstephh 		goto done;
268d9638e54Smws 	}
269d9638e54Smws 
270d9638e54Smws 	/*
271d9638e54Smws 	 * The ereport dump region is a sequence of erpt_dump_t headers each of
272d9638e54Smws 	 * which is followed by packed nvlist data.  We iterate over them in
273d9638e54Smws 	 * order, unpacking and dispatching each one to our dispatch queue.
274d9638e54Smws 	 */
275d9638e54Smws 	for (;;) {
276d9638e54Smws 		char nvbuf[ERPT_DATA_SZ];
277d9638e54Smws 		uint32_t chksum;
278d9638e54Smws 		erpt_dump_t ed;
279d9638e54Smws 		nvlist_t *nvl;
280d9638e54Smws 
281d9638e54Smws 		fmd_timeval_t ftv, tod;
282d9638e54Smws 		hrtime_t hrt;
283d9638e54Smws 		uint64_t ena;
284d9638e54Smws 
285d9638e54Smws 		if (pread64(fd, &ed, sizeof (ed), off) != sizeof (ed)) {
286d9638e54Smws 			fmd_hdl_error(hdl, "failed to read from dump "
287d9638e54Smws 			    "transport %s (pending events lost)", dumpdev);
288d9638e54Smws 			break;
289d9638e54Smws 		}
290d9638e54Smws 
291d9638e54Smws 		if (ed.ed_magic == 0 && ed.ed_size == 0)
292d9638e54Smws 			break; /* end of list: all zero */
293d9638e54Smws 
294d9638e54Smws 		if (ed.ed_magic == 0) {
295d9638e54Smws 			off += sizeof (ed) + ed.ed_size;
296d9638e54Smws 			continue; /* continue searching */
297d9638e54Smws 		}
298d9638e54Smws 
299d9638e54Smws 		if (ed.ed_magic != ERPT_MAGIC) {
300d9638e54Smws 			/*
301d9638e54Smws 			 * Stop reading silently if the first record has the
302d9638e54Smws 			 * wrong magic number; this likely indicates that we
303d9638e54Smws 			 * rebooted from non-FMA bits or paged over the dump.
304d9638e54Smws 			 */
305d9638e54Smws 			if (off == off0)
306d9638e54Smws 				break;
307d9638e54Smws 
308d9638e54Smws 			fmd_hdl_error(hdl, "invalid dump transport "
309d9638e54Smws 			    "record at %llx (magic number %x, expected %x)\n",
310d9638e54Smws 			    (u_longlong_t)off, ed.ed_magic, ERPT_MAGIC);
311d9638e54Smws 			break;
312d9638e54Smws 		}
313d9638e54Smws 
314d9638e54Smws 		if (ed.ed_size > ERPT_DATA_SZ) {
315d9638e54Smws 			fmd_hdl_error(hdl, "invalid dump transport "
316d9638e54Smws 			    "record at %llx size (%u exceeds limit)\n",
317d9638e54Smws 			    (u_longlong_t)off, ed.ed_size);
318d9638e54Smws 			break;
319d9638e54Smws 		}
320d9638e54Smws 
321d9638e54Smws 		if (pread64(fd, nvbuf, ed.ed_size,
322d9638e54Smws 		    off + sizeof (ed)) != ed.ed_size) {
323d9638e54Smws 			fmd_hdl_error(hdl, "failed to read dump "
324d9638e54Smws 			    "transport event (offset %llx)", (u_longlong_t)off);
325d9638e54Smws 
326d9638e54Smws 			sysev_stats.dump_lost.fmds_value.ui64++;
327d9638e54Smws 			goto next;
328d9638e54Smws 		}
329d9638e54Smws 
330d9638e54Smws 		if ((chksum = sysev_checksum(nvbuf,
331d9638e54Smws 		    ed.ed_size)) != ed.ed_chksum) {
332d9638e54Smws 			fmd_hdl_error(hdl, "dump transport event at "
333d9638e54Smws 			    "offset %llx is corrupt (checksum %x != %x)\n",
334d9638e54Smws 			    (u_longlong_t)off, chksum, ed.ed_chksum);
335d9638e54Smws 
336d9638e54Smws 			sysev_stats.dump_lost.fmds_value.ui64++;
337d9638e54Smws 			goto next;
338d9638e54Smws 		}
339d9638e54Smws 
340d9638e54Smws 		if ((err = nvlist_xunpack(nvbuf,
341d9638e54Smws 		    ed.ed_size, &nvl, &fmd.d_nva)) != 0) {
342d9638e54Smws 			fmd_hdl_error(hdl, "failed to unpack dump "
343d9638e54Smws 			    "transport event at offset %llx: %s\n",
344d9638e54Smws 			    (u_longlong_t)off, fmd_strerror(err));
345d9638e54Smws 
346d9638e54Smws 			sysev_stats.dump_lost.fmds_value.ui64++;
347d9638e54Smws 			goto next;
348d9638e54Smws 		}
349d9638e54Smws 
350d9638e54Smws 		/*
351d9638e54Smws 		 * If ed_hrt_nsec is set it contains the gethrtime() value from
352d9638e54Smws 		 * when the event was originally enqueued for the transport.
353d9638e54Smws 		 * If it is zero, we use the weaker bound ed_hrt_base instead.
354d9638e54Smws 		 */
355d9638e54Smws 		if (ed.ed_hrt_nsec != 0)
356d9638e54Smws 			hrt = ed.ed_hrt_nsec;
357d9638e54Smws 		else
358d9638e54Smws 			hrt = ed.ed_hrt_base;
359d9638e54Smws 
360d9638e54Smws 		/*
361d9638e54Smws 		 * If this is an FMA protocol event of class "ereport.*" that
362d9638e54Smws 		 * contains valid ENA, we can improve the precision of 'hrt'.
363d9638e54Smws 		 */
364d9638e54Smws 		if (nvlist_lookup_uint64(nvl, FM_EREPORT_ENA, &ena) == 0)
365d9638e54Smws 			hrt = fmd_time_ena2hrt(hrt, ena);
366d9638e54Smws 
367d9638e54Smws 		/*
368d9638e54Smws 		 * Now convert 'hrt' to an adjustable TOD based on the values
369d9638e54Smws 		 * in ed_tod_base which correspond to one another and are
370d9638e54Smws 		 * sampled before reboot using the old gethrtime() clock.
371d9638e54Smws 		 * fmd_event_recreate() will use this TOD value to re-assign
372d9638e54Smws 		 * the event an updated gethrtime() value based on the current
373d9638e54Smws 		 * value of the non-adjustable gethrtime() clock.  Phew.
374d9638e54Smws 		 */
375d9638e54Smws 		tod.ftv_sec = ed.ed_tod_base.sec;
376d9638e54Smws 		tod.ftv_nsec = ed.ed_tod_base.nsec;
377d9638e54Smws 		fmd_time_hrt2tod(ed.ed_hrt_base, &tod, hrt, &ftv);
378d9638e54Smws 
379d9638e54Smws 		(void) nvlist_remove_all(nvl, FMD_EVN_TOD);
380d9638e54Smws 		(void) nvlist_add_uint64_array(nvl,
381d9638e54Smws 		    FMD_EVN_TOD, (uint64_t *)&ftv, 2);
382d9638e54Smws 
383d9638e54Smws 		fmd_xprt_post(hdl, sysev_xprt, nvl, 0);
384d9638e54Smws 		sysev_stats.dump_replay.fmds_value.ui64++;
385d9638e54Smws 
386d9638e54Smws next:
387d9638e54Smws 		/*
388d9638e54Smws 		 * Reset the magic number for the event record to zero so that
389d9638e54Smws 		 * we do not replay the same event multiple times.
390d9638e54Smws 		 */
391d9638e54Smws 		ed.ed_magic = 0;
392d9638e54Smws 
393d9638e54Smws 		if (pwrite64(fd, &ed, sizeof (ed), off) != sizeof (ed)) {
394d9638e54Smws 			fmd_hdl_error(hdl, "failed to mark dump "
395d9638e54Smws 			    "transport event (offset %llx)", (u_longlong_t)off);
396d9638e54Smws 		}
397d9638e54Smws 
398d9638e54Smws 		off += sizeof (ed) + ed.ed_size;
399d9638e54Smws 	}
400d9638e54Smws 
401d9638e54Smws 	(void) close(fd);
4029f8ca725Sstephh done:
40397c04605Scy152378 	(void) pthread_mutex_lock(&sysev_mutex);
4049f8ca725Sstephh 	sysev_replay_wait = 0;
40597c04605Scy152378 	(void) pthread_cond_broadcast(&sysev_cv);
40697c04605Scy152378 	(void) pthread_mutex_unlock(&sysev_mutex);
407d9638e54Smws }
408d9638e54Smws 
409d9638e54Smws static const fmd_prop_t sysev_props[] = {
410d9638e54Smws 	{ "class", FMD_TYPE_STRING, EC_ALL },		/* event class */
411d9638e54Smws 	{ "device", FMD_TYPE_STRING, NULL },		/* replay device */
412d9638e54Smws 	{ "channel", FMD_TYPE_STRING, FM_ERROR_CHAN },	/* channel name */
413d9638e54Smws 	{ "sid", FMD_TYPE_STRING, "fmd" },		/* subscriber id */
414d9638e54Smws 	{ NULL, 0, NULL }
415d9638e54Smws };
416d9638e54Smws 
417d9638e54Smws static const fmd_hdl_ops_t sysev_ops = {
418d9638e54Smws 	NULL,		/* fmdo_recv */
419d9638e54Smws 	sysev_replay,	/* fmdo_timeout */
420d9638e54Smws 	NULL,		/* fmdo_close */
421d9638e54Smws 	NULL,		/* fmdo_stats */
422d9638e54Smws 	NULL,		/* fmdo_gc */
423d9638e54Smws 	NULL,		/* fmdo_send */
424d9638e54Smws };
425d9638e54Smws 
426d9638e54Smws static const fmd_hdl_info_t sysev_info = {
427d9638e54Smws 	"SysEvent Transport Agent", "1.0", &sysev_ops, sysev_props
428d9638e54Smws };
429d9638e54Smws 
430d9638e54Smws /*
431d9638e54Smws  * Bind to the sysevent channel we use for listening for error events and then
4329af3851aSeschrock  * subscribe to appropriate events received over this channel.  Setup the
4339af3851aSeschrock  * legacy sysevent handler for creating sysevent resources and forwarding DR
4349af3851aSeschrock  * events.
435d9638e54Smws  */
436d9638e54Smws void
437d9638e54Smws sysev_init(fmd_hdl_t *hdl)
438d9638e54Smws {
439d9638e54Smws 	uint_t flags;
4409af3851aSeschrock 	const char *subclasses[] = { EC_SUB_ALL };
441d9638e54Smws 
442d9638e54Smws 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &sysev_info) != 0)
443d9638e54Smws 		return; /* invalid property settings */
444d9638e54Smws 
445d9638e54Smws 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (sysev_stats) /
446d9638e54Smws 	    sizeof (fmd_stat_t), (fmd_stat_t *)&sysev_stats);
447d9638e54Smws 
448d9638e54Smws 	sysev_channel = fmd_prop_get_string(hdl, "channel");
449d9638e54Smws 	sysev_class = fmd_prop_get_string(hdl, "class");
450d9638e54Smws 	sysev_device = fmd_prop_get_string(hdl, "device");
451d9638e54Smws 	sysev_sid = fmd_prop_get_string(hdl, "sid");
452d9638e54Smws 
453d9638e54Smws 	if (sysev_channel == NULL)
454d9638e54Smws 		fmd_hdl_abort(hdl, "channel property must be defined\n");
455d9638e54Smws 
456d9638e54Smws 	if (sysev_sid == NULL)
457d9638e54Smws 		fmd_hdl_abort(hdl, "sid property must be defined\n");
458d9638e54Smws 
459d9638e54Smws 	if ((errno = sysevent_evc_bind(sysev_channel, &sysev_evc,
460d9638e54Smws 	    EVCH_CREAT | EVCH_HOLD_PEND)) != 0) {
461d9638e54Smws 		fmd_hdl_abort(hdl, "failed to bind to event transport "
462d9638e54Smws 		    "channel %s", sysev_channel);
463d9638e54Smws 	}
464d9638e54Smws 
465*cbf75e67SStephen Hanson 	sysev_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY |
466*cbf75e67SStephen Hanson 	    FMD_XPRT_CACHE_AS_LOCAL, NULL, NULL);
467d9638e54Smws 	sysev_hdl = hdl;
468d9638e54Smws 
469d9638e54Smws 	/*
470d9638e54Smws 	 * If we're subscribing to the default channel, keep our subscription
471d9638e54Smws 	 * active even if we die unexpectedly so we continue queuing events.
472d9638e54Smws 	 * If we're not (e.g. running under fmsim), do not specify SUB_KEEP so
473d9638e54Smws 	 * that our event channel will be destroyed if we die unpleasantly.
474d9638e54Smws 	 */
475d9638e54Smws 	if (strcmp(sysev_channel, FM_ERROR_CHAN) == 0)
476d9638e54Smws 		flags = EVCH_SUB_KEEP | EVCH_SUB_DUMP;
477d9638e54Smws 	else
478d9638e54Smws 		flags = EVCH_SUB_DUMP;
479d9638e54Smws 
480d9638e54Smws 	errno = sysevent_evc_subscribe(sysev_evc,
481d9638e54Smws 	    sysev_sid, sysev_class, sysev_recv, sysev_xprt, flags);
482d9638e54Smws 
483d9638e54Smws 	if (errno != 0) {
484d9638e54Smws 		if (errno == EEXIST) {
485d9638e54Smws 			fmd_hdl_abort(hdl, "another fault management daemon is "
486d9638e54Smws 			    "active on transport channel %s\n", sysev_channel);
487d9638e54Smws 		} else {
488d9638e54Smws 			fmd_hdl_abort(hdl, "failed to subscribe to %s on "
489d9638e54Smws 			    "transport channel %s", sysev_class, sysev_channel);
490d9638e54Smws 		}
491d9638e54Smws 	}
492d9638e54Smws 
493d9638e54Smws 	/*
494d9638e54Smws 	 * Once the transport is open, install a single timer to fire at once
495d9638e54Smws 	 * in the context of the module's thread to run sysev_replay().  This
496d9638e54Smws 	 * thread will block in its first fmd_xprt_post() until fmd is ready.
497d9638e54Smws 	 */
498d9638e54Smws 	fmd_hdl_debug(hdl, "transport '%s' open\n", sysev_channel);
499d9638e54Smws 	(void) fmd_timer_install(hdl, NULL, NULL, 0);
5009af3851aSeschrock 
5019af3851aSeschrock 	/*
5029af3851aSeschrock 	 * Open the legacy sysevent handle and subscribe to all events.  These
5039af3851aSeschrock 	 * are automatically converted to "resource.sysevent.*" events so that
5049af3851aSeschrock 	 * modules can manage these events without additional infrastructure.
5059af3851aSeschrock 	 */
5069af3851aSeschrock 	if (geteuid() != 0)
5079af3851aSeschrock 		return;
5089af3851aSeschrock 
5099af3851aSeschrock 	if ((fmd.d_sysev_hdl =
5109af3851aSeschrock 	    sysevent_bind_handle(sysev_legacy)) == NULL)
5119af3851aSeschrock 		fmd_hdl_abort(hdl, "failed to bind to legacy sysevent channel");
5129af3851aSeschrock 
5139af3851aSeschrock 	if (sysevent_subscribe_event(fmd.d_sysev_hdl, EC_ALL,
5149af3851aSeschrock 	    subclasses, 1) != 0)
5159af3851aSeschrock 		fmd_hdl_abort(hdl, "failed to subscribe to legacy sysevents");
516d9638e54Smws }
517d9638e54Smws 
518d9638e54Smws /*
519d9638e54Smws  * Close the channel by unsubscribing and unbinding.  We only do this when a
520d9638e54Smws  * a non-default channel has been selected.  If we're using FM_ERROR_CHAN,
521d9638e54Smws  * the system default, we do *not* want to unsubscribe because the kernel will
522d9638e54Smws  * remove the subscriber queue and any events published in our absence will
523d9638e54Smws  * therefore be lost.  This scenario may occur when, for example, fmd is sent
524d9638e54Smws  * a SIGTERM by init(1M) during reboot but an error is detected and makes it
525d9638e54Smws  * into the sysevent channel queue before init(1M) manages to call uadmin(2).
526d9638e54Smws  */
527d9638e54Smws void
528d9638e54Smws sysev_fini(fmd_hdl_t *hdl)
529d9638e54Smws {
530d9638e54Smws 	if (strcmp(sysev_channel, FM_ERROR_CHAN) != 0) {
531d9638e54Smws 		sysevent_evc_unsubscribe(sysev_evc, sysev_sid);
532d9638e54Smws 		sysevent_evc_unbind(sysev_evc);
533d9638e54Smws 	}
534d9638e54Smws 
5359af3851aSeschrock 	if (fmd.d_sysev_hdl != NULL)
5369af3851aSeschrock 		sysevent_unbind_handle(fmd.d_sysev_hdl);
5379af3851aSeschrock 
53897c04605Scy152378 	if (sysev_xprt != NULL) {
53997c04605Scy152378 		/*
54097c04605Scy152378 		 * Wait callback returns before destroy the transport.
54197c04605Scy152378 		 */
54297c04605Scy152378 		(void) pthread_mutex_lock(&sysev_mutex);
54397c04605Scy152378 		sysev_exiting = 1;
54497c04605Scy152378 		while (sysev_xprt_refcnt > 0)
54597c04605Scy152378 			(void) pthread_cond_wait(&sysev_cv, &sysev_mutex);
54697c04605Scy152378 		(void) pthread_mutex_unlock(&sysev_mutex);
547d9638e54Smws 		fmd_xprt_close(hdl, sysev_xprt);
54897c04605Scy152378 	}
549d9638e54Smws 
550d9638e54Smws 	fmd_prop_free_string(hdl, sysev_class);
551d9638e54Smws 	fmd_prop_free_string(hdl, sysev_channel);
552d9638e54Smws 	fmd_prop_free_string(hdl, sysev_device);
553d9638e54Smws 	fmd_prop_free_string(hdl, sysev_sid);
554d9638e54Smws }
555