xref: /illumos-gate/usr/src/uts/common/os/evchannels.c (revision bde334a8dbd66dfa70ce4d7fc9dcad6e1ae45fe4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Copyright (c) 2018, Joyent, Inc.
27  */
28 
29 /*
30  * This file contains the source of the general purpose event channel extension
31  * to the sysevent framework. This implementation is made up mainly of four
32  * layers of functionality: the event queues (evch_evq_*()), the handling of
33  * channels (evch_ch*()), the kernel interface (sysevent_evc_*()) and the
34  * interface for the sysevent pseudo driver (evch_usr*()).
35  * Libsysevent.so uses the pseudo driver sysevent's ioctl to access the event
36  * channel extensions. The driver in turn uses the evch_usr*() functions below.
37  *
38  * The interfaces for user land and kernel are declared in sys/sysevent.h
39  * Internal data structures for event channels are defined in
40  * sys/sysevent_impl.h.
41  *
42  * The basic data structure for an event channel is of type evch_chan_t.
43  * All channels are maintained by a list named evch_list. The list head
44  * is of type evch_dlist_t.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/errno.h>
49 #include <sys/stropts.h>
50 #include <sys/debug.h>
51 #include <sys/ddi.h>
52 #include <sys/vmem.h>
53 #include <sys/cmn_err.h>
54 #include <sys/callb.h>
55 #include <sys/sysevent.h>
56 #include <sys/sysevent_impl.h>
57 #include <sys/sysmacros.h>
58 #include <sys/disp.h>
59 #include <sys/atomic.h>
60 #include <sys/door.h>
61 #include <sys/zone.h>
62 #include <sys/sdt.h>
63 
64 /* Back-off delay for door_ki_upcall */
65 #define	EVCH_MIN_PAUSE	8
66 #define	EVCH_MAX_PAUSE	128
67 
68 #define	GEVENT(ev)	((evch_gevent_t *)((char *)ev - \
69 			    offsetof(evch_gevent_t, ge_payload)))
70 
71 #define	EVCH_EVQ_EVCOUNT(x)	((&(x)->eq_eventq)->sq_count)
72 #define	EVCH_EVQ_HIGHWM(x)	((&(x)->eq_eventq)->sq_highwm)
73 
74 #define	CH_HOLD_PEND		1
75 #define	CH_HOLD_PEND_INDEF	2
76 
77 struct evch_globals {
78 	evch_dlist_t evch_list;
79 	kmutex_t evch_list_lock;
80 };
81 
82 /* Variables used by event channel routines */
83 static int		evq_initcomplete = 0;
84 static zone_key_t	evch_zone_key;
85 static uint32_t		evch_channels_max;
86 static uint32_t		evch_bindings_max = EVCH_MAX_BINDS_PER_CHANNEL;
87 static uint32_t		evch_events_max;
88 
89 static void evch_evq_unsub(evch_eventq_t *, evch_evqsub_t *);
90 static void evch_evq_destroy(evch_eventq_t *);
91 
92 /*
93  * List handling. These functions handle a doubly linked list. The list has
94  * to be protected by the calling functions. evch_dlist_t is the list head.
95  * Every node of the list has to put a evch_dlelem_t data type in its data
96  * structure as its first element.
97  *
98  * evch_dl_init		- Initialize list head
99  * evch_dl_fini		- Terminate list handling
100  * evch_dl_is_init	- Returns one if list is initialized
101  * evch_dl_add		- Add element to end of list
102  * evch_dl_del		- Remove given element from list
103  * evch_dl_search	- Lookup element in list
104  * evch_dl_getnum	- Get number of elements in list
105  * evch_dl_next		- Get next elements of list
106  */
107 
108 static void
109 evch_dl_init(evch_dlist_t *hp)
110 {
111 	hp->dh_head.dl_prev = hp->dh_head.dl_next = &hp->dh_head;
112 	hp->dh_count = 0;
113 }
114 
115 /*
116  * Assumes that list is empty.
117  */
118 static void
119 evch_dl_fini(evch_dlist_t *hp)
120 {
121 	hp->dh_head.dl_prev = hp->dh_head.dl_next = NULL;
122 }
123 
124 static int
125 evch_dl_is_init(evch_dlist_t *hp)
126 {
127 	return (hp->dh_head.dl_next != NULL ? 1 : 0);
128 }
129 
130 /*
131  * Add an element at the end of the list.
132  */
133 static void
134 evch_dl_add(evch_dlist_t *hp, evch_dlelem_t *el)
135 {
136 	evch_dlelem_t	*x = hp->dh_head.dl_prev;
137 	evch_dlelem_t	*y = &hp->dh_head;
138 
139 	x->dl_next = el;
140 	y->dl_prev = el;
141 	el->dl_next = y;
142 	el->dl_prev = x;
143 	hp->dh_count++;
144 }
145 
146 /*
147  * Remove arbitrary element out of dlist.
148  */
149 static void
150 evch_dl_del(evch_dlist_t *hp, evch_dlelem_t *p)
151 {
152 	ASSERT(hp->dh_count > 0 && p != &hp->dh_head);
153 	p->dl_prev->dl_next = p->dl_next;
154 	p->dl_next->dl_prev = p->dl_prev;
155 	p->dl_prev = NULL;
156 	p->dl_next = NULL;
157 	hp->dh_count--;
158 }
159 
160 /*
161  * Search an element in a list. Caller provides comparison callback function.
162  */
163 static evch_dlelem_t *
164 evch_dl_search(evch_dlist_t *hp, int (*cmp)(evch_dlelem_t *, char *), char *s)
165 {
166 	evch_dlelem_t *p;
167 
168 	for (p = hp->dh_head.dl_next; p != &hp->dh_head; p = p->dl_next) {
169 		if (cmp(p, s) == 0) {
170 			return (p);
171 		}
172 	}
173 	return (NULL);
174 }
175 
176 /*
177  * Return number of elements in the list.
178  */
179 static int
180 evch_dl_getnum(evch_dlist_t *hp)
181 {
182 	return (hp->dh_count);
183 }
184 
185 /*
186  * Find next element of a evch_dlist_t list. Find first element if el == NULL.
187  * Returns NULL if end of list is reached.
188  */
189 static void *
190 evch_dl_next(evch_dlist_t *hp, void *el)
191 {
192 	evch_dlelem_t *ep = (evch_dlelem_t *)el;
193 
194 	if (hp->dh_count == 0) {
195 		return (NULL);
196 	}
197 	if (ep == NULL) {
198 		return (hp->dh_head.dl_next);
199 	}
200 	if ((ep = ep->dl_next) == (evch_dlelem_t *)hp) {
201 		return (NULL);
202 	}
203 	return ((void *)ep);
204 }
205 
206 /*
207  * Queue handling routines. Mutexes have to be entered previously.
208  *
209  * evch_q_init	- Initialize queue head
210  * evch_q_in	- Put element into queue
211  * evch_q_out	- Get element out of queue
212  * evch_q_next	- Iterate over the elements of a queue
213  */
214 static void
215 evch_q_init(evch_squeue_t *q)
216 {
217 	q->sq_head = NULL;
218 	q->sq_tail = (evch_qelem_t *)q;
219 	q->sq_count = 0;
220 	q->sq_highwm = 0;
221 }
222 
223 /*
224  * Put element into the queue q
225  */
226 static void
227 evch_q_in(evch_squeue_t *q, evch_qelem_t *el)
228 {
229 	q->sq_tail->q_next = el;
230 	el->q_next = NULL;
231 	q->sq_tail = el;
232 	q->sq_count++;
233 	if (q->sq_count > q->sq_highwm) {
234 		q->sq_highwm = q->sq_count;
235 	}
236 }
237 
238 /*
239  * Returns NULL if queue is empty.
240  */
241 static evch_qelem_t *
242 evch_q_out(evch_squeue_t *q)
243 {
244 	evch_qelem_t *el;
245 
246 	if ((el = q->sq_head) != NULL) {
247 		q->sq_head = el->q_next;
248 		q->sq_count--;
249 		if (q->sq_head == NULL) {
250 			q->sq_tail = (evch_qelem_t *)q;
251 		}
252 	}
253 	return (el);
254 }
255 
256 /*
257  * Returns element after *el or first if el == NULL. NULL is returned
258  * if queue is empty or *el points to the last element in the queue.
259  */
260 static evch_qelem_t *
261 evch_q_next(evch_squeue_t *q, evch_qelem_t *el)
262 {
263 	if (el == NULL)
264 		return (q->sq_head);
265 	return (el->q_next);
266 }
267 
268 /*
269  * Event queue handling functions. An event queue is the basic building block
270  * of an event channel. One event queue makes up the publisher-side event queue.
271  * Further event queues build the per-subscriber queues of an event channel.
272  * Each queue is associated an event delivery thread.
273  * These functions support a two-step initialization. First step, when kernel
274  * memory is ready and second when threads are ready.
275  * Events consist of an administrating evch_gevent_t structure with the event
276  * data appended as variable length payload.
277  * The internal interface functions for the event queue handling are:
278  *
279  * evch_evq_create	- create an event queue
280  * evch_evq_thrcreate	- create thread for an event queue.
281  * evch_evq_destroy	- delete an event queue
282  * evch_evq_sub		- Subscribe to event delivery from an event queue
283  * evch_evq_unsub	- Unsubscribe
284  * evch_evq_pub		- Post an event into an event queue
285  * evch_evq_stop	- Put delivery thread on hold
286  * evch_evq_continue	- Resume event delivery thread
287  * evch_evq_status	- Return status of delivery thread, running or on hold
288  * evch_evq_evzalloc	- Allocate an event structure
289  * evch_evq_evfree	- Free an event structure
290  * evch_evq_evadd_dest	- Add a destructor function to an event structure
291  * evch_evq_evnext	- Iterate over events non-destructive
292  */
293 
294 /*ARGSUSED*/
295 static void *
296 evch_zoneinit(zoneid_t zoneid)
297 {
298 	struct evch_globals *eg;
299 
300 	eg = kmem_zalloc(sizeof (*eg), KM_SLEEP);
301 	evch_dl_init(&eg->evch_list);
302 	return (eg);
303 }
304 
305 /*ARGSUSED*/
306 static void
307 evch_zonefree(zoneid_t zoneid, void *arg)
308 {
309 	struct evch_globals *eg = arg;
310 	evch_chan_t *chp;
311 	evch_subd_t *sdp;
312 
313 	mutex_enter(&eg->evch_list_lock);
314 
315 	/*
316 	 * Keep picking the head element off the list until there are no
317 	 * more.
318 	 */
319 	while ((chp = evch_dl_next(&eg->evch_list, NULL)) != NULL) {
320 
321 		/*
322 		 * Since all processes are gone, all bindings should be gone,
323 		 * and only channels with SUB_KEEP subscribers should remain.
324 		 */
325 		mutex_enter(&chp->ch_mutex);
326 		ASSERT(chp->ch_bindings == 0);
327 		ASSERT(evch_dl_getnum(&chp->ch_subscr) != 0 ||
328 		    chp->ch_holdpend == CH_HOLD_PEND_INDEF);
329 
330 		/* Forcibly unsubscribe each remaining subscription */
331 		while ((sdp = evch_dl_next(&chp->ch_subscr, NULL)) != NULL) {
332 			/*
333 			 * We should only be tearing down persistent
334 			 * subscribers at this point, since all processes
335 			 * from this zone are gone.
336 			 */
337 			ASSERT(sdp->sd_active == 0);
338 			ASSERT((sdp->sd_persist & EVCH_SUB_KEEP) != 0);
339 			/*
340 			 * Disconnect subscriber queue from main event queue.
341 			 */
342 			evch_evq_unsub(chp->ch_queue, sdp->sd_msub);
343 
344 			/* Destruct per subscriber queue */
345 			evch_evq_unsub(sdp->sd_queue, sdp->sd_ssub);
346 			evch_evq_destroy(sdp->sd_queue);
347 			/*
348 			 * Eliminate the subscriber data from channel list.
349 			 */
350 			evch_dl_del(&chp->ch_subscr, &sdp->sd_link);
351 			kmem_free(sdp->sd_classname, sdp->sd_clnsize);
352 			kmem_free(sdp->sd_ident, strlen(sdp->sd_ident) + 1);
353 			kmem_free(sdp, sizeof (evch_subd_t));
354 		}
355 
356 		/* Channel must now have no subscribers */
357 		ASSERT(evch_dl_getnum(&chp->ch_subscr) == 0);
358 
359 		/* Just like unbind */
360 		mutex_exit(&chp->ch_mutex);
361 		evch_dl_del(&eg->evch_list, &chp->ch_link);
362 		evch_evq_destroy(chp->ch_queue);
363 		mutex_destroy(&chp->ch_mutex);
364 		mutex_destroy(&chp->ch_pubmx);
365 		cv_destroy(&chp->ch_pubcv);
366 		kmem_free(chp->ch_name, chp->ch_namelen);
367 		kmem_free(chp, sizeof (evch_chan_t));
368 	}
369 
370 	mutex_exit(&eg->evch_list_lock);
371 	/* all channels should now be gone */
372 	ASSERT(evch_dl_getnum(&eg->evch_list) == 0);
373 	kmem_free(eg, sizeof (*eg));
374 }
375 
376 /*
377  * Frees evch_gevent_t structure including the payload, if the reference count
378  * drops to or below zero. Below zero happens when the event is freed
379  * without beeing queued into a queue.
380  */
381 static void
382 evch_gevent_free(evch_gevent_t *evp)
383 {
384 	int32_t refcnt;
385 
386 	refcnt = (int32_t)atomic_dec_32_nv(&evp->ge_refcount);
387 	if (refcnt <= 0) {
388 		if (evp->ge_destruct != NULL) {
389 			evp->ge_destruct((void *)&(evp->ge_payload),
390 			    evp->ge_dstcookie);
391 		}
392 		kmem_free(evp, evp->ge_size);
393 	}
394 }
395 
396 /*
397  * Deliver is called for every subscription to the current event
398  * It calls the registered filter function and then the registered delivery
399  * callback routine. Returns 0 on success. The callback routine returns
400  * EVQ_AGAIN or EVQ_SLEEP in case the event could not be delivered.
401  */
402 static int
403 evch_deliver(evch_evqsub_t *sp, evch_gevent_t *ep)
404 {
405 	void		*uep = &ep->ge_payload;
406 	int		res = EVQ_DELIVER;
407 
408 	if (sp->su_filter != NULL) {
409 		res = sp->su_filter(uep, sp->su_fcookie);
410 	}
411 	if (res == EVQ_DELIVER) {
412 		return (sp->su_callb(uep, sp->su_cbcookie));
413 	}
414 	return (0);
415 }
416 
417 /*
418  * Holds event delivery in case of eq_holdmode set or in case the
419  * event queue is empty. Mutex must be held when called.
420  * Wakes up a thread waiting for the delivery thread reaching the hold mode.
421  */
422 static void
423 evch_delivery_hold(evch_eventq_t *eqp, callb_cpr_t *cpip)
424 {
425 	if (eqp->eq_tabortflag == 0) {
426 		do {
427 			if (eqp->eq_holdmode) {
428 				cv_signal(&eqp->eq_onholdcv);
429 			}
430 			CALLB_CPR_SAFE_BEGIN(cpip);
431 			cv_wait(&eqp->eq_thrsleepcv, &eqp->eq_queuemx);
432 			CALLB_CPR_SAFE_END(cpip, &eqp->eq_queuemx);
433 		} while (eqp->eq_holdmode);
434 	}
435 }
436 
437 /*
438  * Event delivery thread. Enumerates all subscribers and calls evch_deliver()
439  * for each one.
440  */
441 static void
442 evch_delivery_thr(evch_eventq_t *eqp)
443 {
444 	evch_qelem_t	*qep;
445 	callb_cpr_t	cprinfo;
446 	int		res;
447 	evch_evqsub_t	*sub;
448 	int		deltime;
449 	int		repeatcount;
450 	char		thnam[32];
451 
452 	(void) snprintf(thnam, sizeof (thnam), "sysevent_chan-%d",
453 	    (int)eqp->eq_thrid);
454 	CALLB_CPR_INIT(&cprinfo, &eqp->eq_queuemx, callb_generic_cpr, thnam);
455 	mutex_enter(&eqp->eq_queuemx);
456 	while (eqp->eq_tabortflag == 0) {
457 		while (eqp->eq_holdmode == 0 && eqp->eq_tabortflag == 0 &&
458 		    (qep = evch_q_out(&eqp->eq_eventq)) != NULL) {
459 
460 			/* Filter and deliver event to all subscribers */
461 			deltime = EVCH_MIN_PAUSE;
462 			repeatcount = EVCH_MAX_TRY_DELIVERY;
463 			eqp->eq_curevent = qep->q_objref;
464 			sub = evch_dl_next(&eqp->eq_subscr, NULL);
465 			while (sub != NULL) {
466 				eqp->eq_dactive = 1;
467 				mutex_exit(&eqp->eq_queuemx);
468 				res = evch_deliver(sub, qep->q_objref);
469 				mutex_enter(&eqp->eq_queuemx);
470 				eqp->eq_dactive = 0;
471 				cv_signal(&eqp->eq_dactivecv);
472 				switch (res) {
473 				case EVQ_SLEEP:
474 					/*
475 					 * Wait for subscriber to return.
476 					 */
477 					eqp->eq_holdmode = 1;
478 					evch_delivery_hold(eqp, &cprinfo);
479 					if (eqp->eq_tabortflag) {
480 						break;
481 					}
482 					continue;
483 				case EVQ_AGAIN:
484 					CALLB_CPR_SAFE_BEGIN(&cprinfo);
485 					mutex_exit(&eqp->eq_queuemx);
486 					delay(deltime);
487 					deltime =
488 					    deltime > EVCH_MAX_PAUSE ?
489 					    deltime : deltime << 1;
490 					mutex_enter(&eqp->eq_queuemx);
491 					CALLB_CPR_SAFE_END(&cprinfo,
492 					    &eqp->eq_queuemx);
493 					if (repeatcount-- > 0) {
494 						continue;
495 					}
496 					break;
497 				}
498 				if (eqp->eq_tabortflag) {
499 					break;
500 				}
501 				sub = evch_dl_next(&eqp->eq_subscr, sub);
502 				repeatcount = EVCH_MAX_TRY_DELIVERY;
503 			}
504 			eqp->eq_curevent = NULL;
505 
506 			/* Free event data and queue element */
507 			evch_gevent_free((evch_gevent_t *)qep->q_objref);
508 			kmem_free(qep, qep->q_objsize);
509 		}
510 
511 		/* Wait for next event or end of hold mode if set */
512 		evch_delivery_hold(eqp, &cprinfo);
513 	}
514 	CALLB_CPR_EXIT(&cprinfo);	/* Does mutex_exit of eqp->eq_queuemx */
515 	thread_exit();
516 }
517 
518 /*
519  * Create the event delivery thread for an existing event queue.
520  */
521 static void
522 evch_evq_thrcreate(evch_eventq_t *eqp)
523 {
524 	kthread_t *thp;
525 
526 	thp = thread_create(NULL, 0, evch_delivery_thr, (char *)eqp, 0, &p0,
527 	    TS_RUN, minclsyspri);
528 	eqp->eq_thrid = thp->t_did;
529 }
530 
531 /*
532  * Create event queue.
533  */
534 static evch_eventq_t *
535 evch_evq_create()
536 {
537 	evch_eventq_t *p;
538 
539 	/* Allocate and initialize event queue descriptor */
540 	p = kmem_zalloc(sizeof (evch_eventq_t), KM_SLEEP);
541 	mutex_init(&p->eq_queuemx, NULL, MUTEX_DEFAULT, NULL);
542 	cv_init(&p->eq_thrsleepcv, NULL, CV_DEFAULT, NULL);
543 	evch_q_init(&p->eq_eventq);
544 	evch_dl_init(&p->eq_subscr);
545 	cv_init(&p->eq_dactivecv, NULL, CV_DEFAULT, NULL);
546 	cv_init(&p->eq_onholdcv, NULL, CV_DEFAULT, NULL);
547 
548 	/* Create delivery thread */
549 	if (evq_initcomplete) {
550 		evch_evq_thrcreate(p);
551 	}
552 	return (p);
553 }
554 
555 /*
556  * Destroy an event queue. All subscribers have to be unsubscribed prior to
557  * this call.
558  */
559 static void
560 evch_evq_destroy(evch_eventq_t *eqp)
561 {
562 	evch_qelem_t *qep;
563 
564 	ASSERT(evch_dl_getnum(&eqp->eq_subscr) == 0);
565 	/* Kill delivery thread */
566 	if (eqp->eq_thrid != 0) {
567 		mutex_enter(&eqp->eq_queuemx);
568 		eqp->eq_tabortflag = 1;
569 		eqp->eq_holdmode = 0;
570 		cv_signal(&eqp->eq_thrsleepcv);
571 		mutex_exit(&eqp->eq_queuemx);
572 		thread_join(eqp->eq_thrid);
573 	}
574 
575 	/* Get rid of stale events in the event queue */
576 	while ((qep = (evch_qelem_t *)evch_q_out(&eqp->eq_eventq)) != NULL) {
577 		evch_gevent_free((evch_gevent_t *)qep->q_objref);
578 		kmem_free(qep, qep->q_objsize);
579 	}
580 
581 	/* Wrap up event queue structure */
582 	cv_destroy(&eqp->eq_onholdcv);
583 	cv_destroy(&eqp->eq_dactivecv);
584 	cv_destroy(&eqp->eq_thrsleepcv);
585 	evch_dl_fini(&eqp->eq_subscr);
586 	mutex_destroy(&eqp->eq_queuemx);
587 
588 	/* Free descriptor structure */
589 	kmem_free(eqp, sizeof (evch_eventq_t));
590 }
591 
592 /*
593  * Subscribe to an event queue. Every subscriber provides a filter callback
594  * routine and an event delivery callback routine.
595  */
596 static evch_evqsub_t *
597 evch_evq_sub(evch_eventq_t *eqp, filter_f filter, void *fcookie,
598     deliver_f callb, void *cbcookie)
599 {
600 	evch_evqsub_t *sp = kmem_zalloc(sizeof (evch_evqsub_t), KM_SLEEP);
601 
602 	/* Initialize subscriber structure */
603 	sp->su_filter = filter;
604 	sp->su_fcookie = fcookie;
605 	sp->su_callb = callb;
606 	sp->su_cbcookie = cbcookie;
607 
608 	/* Add subscription to queue */
609 	mutex_enter(&eqp->eq_queuemx);
610 	evch_dl_add(&eqp->eq_subscr, &sp->su_link);
611 	mutex_exit(&eqp->eq_queuemx);
612 	return (sp);
613 }
614 
615 /*
616  * Unsubscribe from an event queue.
617  */
618 static void
619 evch_evq_unsub(evch_eventq_t *eqp, evch_evqsub_t *sp)
620 {
621 	mutex_enter(&eqp->eq_queuemx);
622 
623 	/* Wait if delivery is just in progress */
624 	if (eqp->eq_dactive) {
625 		cv_wait(&eqp->eq_dactivecv, &eqp->eq_queuemx);
626 	}
627 	evch_dl_del(&eqp->eq_subscr, &sp->su_link);
628 	mutex_exit(&eqp->eq_queuemx);
629 	kmem_free(sp, sizeof (evch_evqsub_t));
630 }
631 
632 /*
633  * Publish an event. Returns 0 on success and -1 if memory alloc failed.
634  */
635 static int
636 evch_evq_pub(evch_eventq_t *eqp, void *ev, int flags)
637 {
638 	size_t size;
639 	evch_qelem_t	*qep;
640 	evch_gevent_t	*evp = GEVENT(ev);
641 
642 	size = sizeof (evch_qelem_t);
643 	if (flags & EVCH_TRYHARD) {
644 		qep = kmem_alloc_tryhard(size, &size, KM_NOSLEEP);
645 	} else {
646 		qep = kmem_alloc(size, flags & EVCH_NOSLEEP ?
647 		    KM_NOSLEEP : KM_SLEEP);
648 	}
649 	if (qep == NULL) {
650 		return (-1);
651 	}
652 	qep->q_objref = (void *)evp;
653 	qep->q_objsize = size;
654 	atomic_inc_32(&evp->ge_refcount);
655 	mutex_enter(&eqp->eq_queuemx);
656 	evch_q_in(&eqp->eq_eventq, qep);
657 
658 	/* Wakeup delivery thread */
659 	cv_signal(&eqp->eq_thrsleepcv);
660 	mutex_exit(&eqp->eq_queuemx);
661 	return (0);
662 }
663 
664 /*
665  * Enter hold mode of an event queue. Event delivery thread stops event
666  * handling after delivery of current event (if any).
667  */
668 static void
669 evch_evq_stop(evch_eventq_t *eqp)
670 {
671 	mutex_enter(&eqp->eq_queuemx);
672 	eqp->eq_holdmode = 1;
673 	if (evq_initcomplete) {
674 		cv_signal(&eqp->eq_thrsleepcv);
675 		cv_wait(&eqp->eq_onholdcv, &eqp->eq_queuemx);
676 	}
677 	mutex_exit(&eqp->eq_queuemx);
678 }
679 
680 /*
681  * Continue event delivery.
682  */
683 static void
684 evch_evq_continue(evch_eventq_t *eqp)
685 {
686 	mutex_enter(&eqp->eq_queuemx);
687 	eqp->eq_holdmode = 0;
688 	cv_signal(&eqp->eq_thrsleepcv);
689 	mutex_exit(&eqp->eq_queuemx);
690 }
691 
692 /*
693  * Returns status of delivery thread. 0 if running and 1 if on hold.
694  */
695 static int
696 evch_evq_status(evch_eventq_t *eqp)
697 {
698 	return (eqp->eq_holdmode);
699 }
700 
701 /*
702  * Add a destructor function to an event structure.
703  */
704 static void
705 evch_evq_evadd_dest(void *ev, destr_f destructor, void *cookie)
706 {
707 	evch_gevent_t *evp = GEVENT(ev);
708 
709 	evp->ge_destruct = destructor;
710 	evp->ge_dstcookie = cookie;
711 }
712 
713 /*
714  * Allocate evch_gevent_t structure. Return address of payload offset of
715  * evch_gevent_t.  If EVCH_TRYHARD allocation is requested, we use
716  * kmem_alloc_tryhard to alloc memory of at least paylsize bytes.
717  *
718  * If either memory allocation is unsuccessful, we return NULL.
719  */
720 static void *
721 evch_evq_evzalloc(size_t paylsize, int flag)
722 {
723 	evch_gevent_t	*evp;
724 	size_t		rsize, evsize, ge_size;
725 
726 	rsize = offsetof(evch_gevent_t, ge_payload) + paylsize;
727 	if (flag & EVCH_TRYHARD) {
728 		evp = kmem_alloc_tryhard(rsize, &evsize, KM_NOSLEEP);
729 		ge_size = evsize;
730 	} else {
731 		evp = kmem_alloc(rsize, flag & EVCH_NOSLEEP ? KM_NOSLEEP :
732 		    KM_SLEEP);
733 		ge_size = rsize;
734 	}
735 
736 	if (evp) {
737 		bzero(evp, rsize);
738 		evp->ge_size = ge_size;
739 		return (&evp->ge_payload);
740 	}
741 	return (evp);
742 }
743 
744 /*
745  * Free event structure. Argument ev is address of payload offset.
746  */
747 static void
748 evch_evq_evfree(void *ev)
749 {
750 	evch_gevent_free(GEVENT(ev));
751 }
752 
753 /*
754  * Iterate over all events in the event queue. Begin with an event
755  * which is currently being delivered. No mutexes are grabbed and no
756  * resources allocated so that this function can be called in panic
757  * context too. This function has to be called with ev == NULL initially.
758  * Actually argument ev is only a flag. Internally the member eq_nextev
759  * is used to determine the next event. But ev allows for the convenient
760  * use like
761  *	ev = NULL;
762  *	while ((ev = evch_evq_evnext(evp, ev)) != NULL) ...
763  */
764 static void *
765 evch_evq_evnext(evch_eventq_t *evq, void *ev)
766 {
767 	if (ev == NULL) {
768 		evq->eq_nextev = NULL;
769 		if (evq->eq_curevent != NULL)
770 			return (&evq->eq_curevent->ge_payload);
771 	}
772 	evq->eq_nextev = evch_q_next(&evq->eq_eventq, evq->eq_nextev);
773 	if (evq->eq_nextev == NULL)
774 		return (NULL);
775 	return (&((evch_gevent_t *)evq->eq_nextev->q_objref)->ge_payload);
776 }
777 
778 /*
779  * Channel handling functions. First some support functions. Functions belonging
780  * to the channel handling interface start with evch_ch. The following functions
781  * make up the channel handling internal interfaces:
782  *
783  * evch_chinit		- Initialize channel handling
784  * evch_chinitthr	- Second step init: initialize threads
785  * evch_chbind		- Bind to a channel
786  * evch_chunbind	- Unbind from a channel
787  * evch_chsubscribe	- Subscribe to a sysevent class
788  * evch_chunsubscribe	- Unsubscribe
789  * evch_chpublish	- Publish an event
790  * evch_chgetnames	- Get names of all channels
791  * evch_chgetchdata	- Get data of a channel
792  * evch_chrdevent_init  - Init event q traversal
793  * evch_chgetnextev	- Read out events queued for a subscriber
794  * evch_chrdevent_fini  - Finish event q traversal
795  */
796 
797 /*
798  * Compare channel name. Used for evch_dl_search to find a channel with the
799  * name s.
800  */
801 static int
802 evch_namecmp(evch_dlelem_t *ep, char *s)
803 {
804 	return (strcmp(((evch_chan_t *)ep)->ch_name, s));
805 }
806 
807 /*
808  * Simple wildcarded match test of event class string 'class' to
809  * wildcarded subscription string 'pat'.  Recursive only if
810  * 'pat' includes a wildcard, otherwise essentially just strcmp.
811  */
812 static int
813 evch_clsmatch(char *class, const char *pat)
814 {
815 	char c;
816 
817 	do {
818 		if ((c = *pat++) == '\0')
819 			return (*class == '\0');
820 
821 		if (c == '*') {
822 			while (*pat == '*')
823 				pat++; /* consecutive *'s can be collapsed */
824 
825 			if (*pat == '\0')
826 				return (1);
827 
828 			while (*class != '\0') {
829 				if (evch_clsmatch(class++, pat) != 0)
830 					return (1);
831 			}
832 
833 			return (0);
834 		}
835 	} while (c == *class++);
836 
837 	return (0);
838 }
839 
840 /*
841  * Sysevent filter callback routine. Enables event delivery only if it matches
842  * the event class pattern string given by parameter cookie.
843  */
844 static int
845 evch_class_filter(void *ev, void *cookie)
846 {
847 	const char *pat = (const char *)cookie;
848 
849 	if (pat == NULL || evch_clsmatch(SE_CLASS_NAME(ev), pat))
850 		return (EVQ_DELIVER);
851 
852 	return (EVQ_IGNORE);
853 }
854 
855 /*
856  * Callback routine to propagate the event into a per subscriber queue.
857  */
858 static int
859 evch_subq_deliver(void *evp, void *cookie)
860 {
861 	evch_subd_t *p = (evch_subd_t *)cookie;
862 
863 	(void) evch_evq_pub(p->sd_queue, evp, EVCH_SLEEP);
864 	return (EVQ_CONT);
865 }
866 
867 /*
868  * Call kernel callback routine for sysevent kernel delivery.
869  */
870 static int
871 evch_kern_deliver(void *evp, void *cookie)
872 {
873 	sysevent_impl_t	*ev = (sysevent_impl_t *)evp;
874 	evch_subd_t	*sdp = (evch_subd_t *)cookie;
875 
876 	return (sdp->sd_callback(ev, sdp->sd_cbcookie));
877 }
878 
879 /*
880  * Door upcall for user land sysevent delivery.
881  */
882 static int
883 evch_door_deliver(void *evp, void *cookie)
884 {
885 	int		error;
886 	size_t		size;
887 	sysevent_impl_t	*ev = (sysevent_impl_t *)evp;
888 	door_arg_t	darg;
889 	evch_subd_t	*sdp = (evch_subd_t *)cookie;
890 	int		nticks = EVCH_MIN_PAUSE;
891 	uint32_t	retval;
892 	int		retry = 20;
893 
894 	/* Initialize door args */
895 	size = sizeof (sysevent_impl_t) + SE_PAYLOAD_SZ(ev);
896 
897 	darg.rbuf = (char *)&retval;
898 	darg.rsize = sizeof (retval);
899 	darg.data_ptr = (char *)ev;
900 	darg.data_size = size;
901 	darg.desc_ptr = NULL;
902 	darg.desc_num = 0;
903 
904 	for (;;) {
905 		if ((error = door_ki_upcall_limited(sdp->sd_door, &darg,
906 		    NULL, SIZE_MAX, 0)) == 0) {
907 			break;
908 		}
909 		switch (error) {
910 		case EAGAIN:
911 			/* Cannot deliver event - process may be forking */
912 			delay(nticks);
913 			nticks <<= 1;
914 			if (nticks > EVCH_MAX_PAUSE) {
915 				nticks = EVCH_MAX_PAUSE;
916 			}
917 			if (retry-- <= 0) {
918 				cmn_err(CE_CONT, "event delivery thread: "
919 				    "door_ki_upcall error EAGAIN\n");
920 				return (EVQ_CONT);
921 			}
922 			break;
923 		case EINTR:
924 		case EBADF:
925 			/* Process died */
926 			return (EVQ_SLEEP);
927 		default:
928 			cmn_err(CE_CONT,
929 			    "event delivery thread: door_ki_upcall error %d\n",
930 			    error);
931 			return (EVQ_CONT);
932 		}
933 	}
934 	if (retval == EAGAIN) {
935 		return (EVQ_AGAIN);
936 	}
937 	return (EVQ_CONT);
938 }
939 
940 /*
941  * Callback routine for evch_dl_search() to compare subscriber id's. Used by
942  * evch_subscribe() and evch_chrdevent_init().
943  */
944 static int
945 evch_subidcmp(evch_dlelem_t *ep, char *s)
946 {
947 	return (strcmp(((evch_subd_t *)ep)->sd_ident, s));
948 }
949 
950 /*
951  * Callback routine for evch_dl_search() to find a subscriber with EVCH_SUB_DUMP
952  * set (indicated by sub->sd_dump != 0). Used by evch_chrdevent_init() and
953  * evch_subscribe(). Needs to returns 0 if subscriber with sd_dump set is
954  * found.
955  */
956 /*ARGSUSED1*/
957 static int
958 evch_dumpflgcmp(evch_dlelem_t *ep, char *s)
959 {
960 	return (((evch_subd_t *)ep)->sd_dump ? 0 : 1);
961 }
962 
963 /*
964  * Event destructor function. Used to maintain the number of events per channel.
965  */
966 /*ARGSUSED*/
967 static void
968 evch_destr_event(void *ev, void *ch)
969 {
970 	evch_chan_t *chp = (evch_chan_t *)ch;
971 
972 	mutex_enter(&chp->ch_pubmx);
973 	chp->ch_nevents--;
974 	cv_signal(&chp->ch_pubcv);
975 	mutex_exit(&chp->ch_pubmx);
976 }
977 
978 /*
979  * Integer square root according to Newton's iteration.
980  */
981 static uint32_t
982 evch_isqrt(uint64_t n)
983 {
984 	uint64_t	x = n >> 1;
985 	uint64_t	xn = x - 1;
986 	static uint32_t	lowval[] = { 0, 1, 1, 2 };
987 
988 	if (n < 4) {
989 		return (lowval[n]);
990 	}
991 	while (xn < x) {
992 		x = xn;
993 		xn = (x + n / x) / 2;
994 	}
995 	return ((uint32_t)xn);
996 }
997 
998 /*
999  * First step sysevent channel initialization. Called when kernel memory
1000  * allocator is initialized.
1001  */
1002 static void
1003 evch_chinit()
1004 {
1005 	size_t k;
1006 
1007 	/*
1008 	 * Calculate limits: max no of channels and max no of events per
1009 	 * channel. The smallest machine with 128 MByte will allow for
1010 	 * >= 8 channels and an upper limit of 2048 events per channel.
1011 	 * The event limit is the number of channels times 256 (hence
1012 	 * the shift factor of 8). These number where selected arbitrarily.
1013 	 */
1014 	k = kmem_maxavail() >> 20;
1015 	evch_channels_max = min(evch_isqrt(k), EVCH_MAX_CHANNELS);
1016 	evch_events_max = evch_channels_max << 8;
1017 
1018 	/*
1019 	 * Will trigger creation of the global zone's evch state.
1020 	 */
1021 	zone_key_create(&evch_zone_key, evch_zoneinit, NULL, evch_zonefree);
1022 }
1023 
1024 /*
1025  * Second step sysevent channel initialization. Called when threads are ready.
1026  */
1027 static void
1028 evch_chinitthr()
1029 {
1030 	struct evch_globals *eg;
1031 	evch_chan_t	*chp;
1032 	evch_subd_t	*sdp;
1033 
1034 	/*
1035 	 * We're early enough in boot that we know that only the global
1036 	 * zone exists; we only need to initialize its threads.
1037 	 */
1038 	eg = zone_getspecific(evch_zone_key, global_zone);
1039 	ASSERT(eg != NULL);
1040 
1041 	for (chp = evch_dl_next(&eg->evch_list, NULL); chp != NULL;
1042 	    chp = evch_dl_next(&eg->evch_list, chp)) {
1043 		for (sdp = evch_dl_next(&chp->ch_subscr, NULL); sdp;
1044 		    sdp = evch_dl_next(&chp->ch_subscr, sdp)) {
1045 			evch_evq_thrcreate(sdp->sd_queue);
1046 		}
1047 		evch_evq_thrcreate(chp->ch_queue);
1048 	}
1049 	evq_initcomplete = 1;
1050 }
1051 
1052 /*
1053  * Sysevent channel bind. Create channel and allocate binding structure.
1054  */
1055 static int
1056 evch_chbind(const char *chnam, evch_bind_t **scpp, uint32_t flags)
1057 {
1058 	struct evch_globals *eg;
1059 	evch_bind_t	*bp;
1060 	evch_chan_t	*p;
1061 	char		*chn;
1062 	size_t		namlen;
1063 	int		rv;
1064 
1065 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1066 	ASSERT(eg != NULL);
1067 
1068 	/* Create channel if it does not exist */
1069 	ASSERT(evch_dl_is_init(&eg->evch_list));
1070 	if ((namlen = strlen(chnam) + 1) > MAX_CHNAME_LEN) {
1071 		return (EINVAL);
1072 	}
1073 	mutex_enter(&eg->evch_list_lock);
1074 	if ((p = (evch_chan_t *)evch_dl_search(&eg->evch_list, evch_namecmp,
1075 	    (char *)chnam)) == NULL) {
1076 		if (flags & EVCH_CREAT) {
1077 			if (evch_dl_getnum(&eg->evch_list) >=
1078 			    evch_channels_max) {
1079 				mutex_exit(&eg->evch_list_lock);
1080 				return (ENOMEM);
1081 			}
1082 			chn = kmem_alloc(namlen, KM_SLEEP);
1083 			bcopy(chnam, chn, namlen);
1084 
1085 			/* Allocate and initialize channel descriptor */
1086 			p = kmem_zalloc(sizeof (evch_chan_t), KM_SLEEP);
1087 			p->ch_name = chn;
1088 			p->ch_namelen = namlen;
1089 			mutex_init(&p->ch_mutex, NULL, MUTEX_DEFAULT, NULL);
1090 			p->ch_queue = evch_evq_create();
1091 			evch_dl_init(&p->ch_subscr);
1092 			if (evq_initcomplete) {
1093 				p->ch_uid = crgetuid(curthread->t_cred);
1094 				p->ch_gid = crgetgid(curthread->t_cred);
1095 			}
1096 			cv_init(&p->ch_pubcv, NULL, CV_DEFAULT, NULL);
1097 			mutex_init(&p->ch_pubmx, NULL, MUTEX_DEFAULT, NULL);
1098 			p->ch_maxev = min(EVCH_DEFAULT_EVENTS, evch_events_max);
1099 			p->ch_maxsubscr = EVCH_MAX_SUBSCRIPTIONS;
1100 			p->ch_maxbinds = evch_bindings_max;
1101 			p->ch_ctime = gethrestime_sec();
1102 
1103 			if (flags & (EVCH_HOLD_PEND | EVCH_HOLD_PEND_INDEF)) {
1104 				if (flags & EVCH_HOLD_PEND_INDEF)
1105 					p->ch_holdpend = CH_HOLD_PEND_INDEF;
1106 				else
1107 					p->ch_holdpend = CH_HOLD_PEND;
1108 
1109 				evch_evq_stop(p->ch_queue);
1110 			}
1111 
1112 			/* Put new descriptor into channel list */
1113 			evch_dl_add(&eg->evch_list, (evch_dlelem_t *)p);
1114 		} else {
1115 			mutex_exit(&eg->evch_list_lock);
1116 			return (ENOENT);
1117 		}
1118 	}
1119 
1120 	/* Check for max binds and create binding */
1121 	mutex_enter(&p->ch_mutex);
1122 	if (p->ch_bindings >= p->ch_maxbinds) {
1123 		rv = ENOMEM;
1124 		/*
1125 		 * No need to destroy the channel because this call did not
1126 		 * create it. Other bindings will be present if ch_maxbinds
1127 		 * is exceeded.
1128 		 */
1129 		goto errorexit;
1130 	}
1131 	bp = kmem_alloc(sizeof (evch_bind_t), KM_SLEEP);
1132 	bp->bd_channel = p;
1133 	bp->bd_sublst = NULL;
1134 	p->ch_bindings++;
1135 	rv = 0;
1136 	*scpp = bp;
1137 errorexit:
1138 	mutex_exit(&p->ch_mutex);
1139 	mutex_exit(&eg->evch_list_lock);
1140 	return (rv);
1141 }
1142 
1143 /*
1144  * Unbind: Free bind structure. Remove channel if last binding was freed.
1145  */
1146 static void
1147 evch_chunbind(evch_bind_t *bp)
1148 {
1149 	struct evch_globals *eg;
1150 	evch_chan_t *chp = bp->bd_channel;
1151 
1152 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1153 	ASSERT(eg != NULL);
1154 
1155 	mutex_enter(&eg->evch_list_lock);
1156 	mutex_enter(&chp->ch_mutex);
1157 	ASSERT(chp->ch_bindings > 0);
1158 	chp->ch_bindings--;
1159 	kmem_free(bp, sizeof (evch_bind_t));
1160 	if (chp->ch_bindings == 0 && evch_dl_getnum(&chp->ch_subscr) == 0 &&
1161 	    (chp->ch_nevents == 0 || chp->ch_holdpend != CH_HOLD_PEND_INDEF)) {
1162 		/*
1163 		 * No more bindings and no persistent subscriber(s).  If there
1164 		 * are no events in the channel then destroy the channel;
1165 		 * otherwise destroy the channel only if we're not holding
1166 		 * pending events indefinitely.
1167 		 */
1168 		mutex_exit(&chp->ch_mutex);
1169 		evch_dl_del(&eg->evch_list, &chp->ch_link);
1170 		evch_evq_destroy(chp->ch_queue);
1171 		nvlist_free(chp->ch_propnvl);
1172 		mutex_destroy(&chp->ch_mutex);
1173 		mutex_destroy(&chp->ch_pubmx);
1174 		cv_destroy(&chp->ch_pubcv);
1175 		kmem_free(chp->ch_name, chp->ch_namelen);
1176 		kmem_free(chp, sizeof (evch_chan_t));
1177 	} else
1178 		mutex_exit(&chp->ch_mutex);
1179 	mutex_exit(&eg->evch_list_lock);
1180 }
1181 
1182 static int
1183 wildcard_count(const char *class)
1184 {
1185 	int count = 0;
1186 	char c;
1187 
1188 	if (class == NULL)
1189 		return (0);
1190 
1191 	while ((c = *class++) != '\0') {
1192 		if (c == '*')
1193 			count++;
1194 	}
1195 
1196 	return (count);
1197 }
1198 
1199 /*
1200  * Subscribe to a channel. dtype is either EVCH_DELKERN for kernel callbacks
1201  * or EVCH_DELDOOR for door upcall delivery to user land. Depending on dtype
1202  * dinfo gives the call back routine address or the door handle.
1203  */
1204 static int
1205 evch_chsubscribe(evch_bind_t *bp, int dtype, const char *sid, const char *class,
1206     void *dinfo, void *cookie, int flags, pid_t pid)
1207 {
1208 	evch_chan_t	*chp = bp->bd_channel;
1209 	evch_eventq_t	*eqp = chp->ch_queue;
1210 	evch_subd_t	*sdp;
1211 	evch_subd_t	*esp;
1212 	int		(*delivfkt)();
1213 	char		*clb = NULL;
1214 	int		clblen = 0;
1215 	char		*subid;
1216 	int		subidblen;
1217 
1218 	/*
1219 	 * Check if only known flags are set.
1220 	 */
1221 	if (flags & ~(EVCH_SUB_KEEP | EVCH_SUB_DUMP))
1222 		return (EINVAL);
1223 
1224 	/*
1225 	 * Enforce a limit on the number of wildcards allowed in the class
1226 	 * subscription string (limits recursion in pattern matching).
1227 	 */
1228 	if (wildcard_count(class) > EVCH_WILDCARD_MAX)
1229 		return (EINVAL);
1230 
1231 	/*
1232 	 * Check if we have already a subscription with that name and if we
1233 	 * have to reconnect the subscriber to a persistent subscription.
1234 	 */
1235 	mutex_enter(&chp->ch_mutex);
1236 	if ((esp = (evch_subd_t *)evch_dl_search(&chp->ch_subscr,
1237 	    evch_subidcmp, (char *)sid)) != NULL) {
1238 		int error = 0;
1239 		if ((flags & EVCH_SUB_KEEP) && (esp->sd_active == 0)) {
1240 			/*
1241 			 * Subscription with the name on hold, reconnect to
1242 			 * existing queue.
1243 			 */
1244 			ASSERT(dtype == EVCH_DELDOOR);
1245 			esp->sd_subnxt = bp->bd_sublst;
1246 			bp->bd_sublst = esp;
1247 			esp->sd_pid = pid;
1248 			esp->sd_door = (door_handle_t)dinfo;
1249 			esp->sd_active++;
1250 			evch_evq_continue(esp->sd_queue);
1251 		} else {
1252 			/* Subscriber with given name already exists */
1253 			error = EEXIST;
1254 		}
1255 		mutex_exit(&chp->ch_mutex);
1256 		return (error);
1257 	}
1258 
1259 	if (evch_dl_getnum(&chp->ch_subscr) >= chp->ch_maxsubscr) {
1260 		mutex_exit(&chp->ch_mutex);
1261 		return (ENOMEM);
1262 	}
1263 
1264 	if (flags & EVCH_SUB_DUMP && evch_dl_search(&chp->ch_subscr,
1265 	    evch_dumpflgcmp, NULL) != NULL) {
1266 		/*
1267 		 * Subscription with EVCH_SUB_DUMP flagged already exists.
1268 		 * Only one subscription with EVCH_SUB_DUMP possible. Return
1269 		 * error.
1270 		 */
1271 		mutex_exit(&chp->ch_mutex);
1272 		return (EINVAL);
1273 	}
1274 
1275 	if (class != NULL) {
1276 		clblen = strlen(class) + 1;
1277 		clb = kmem_alloc(clblen, KM_SLEEP);
1278 		bcopy(class, clb, clblen);
1279 	}
1280 
1281 	subidblen = strlen(sid) + 1;
1282 	subid = kmem_alloc(subidblen, KM_SLEEP);
1283 	bcopy(sid, subid, subidblen);
1284 
1285 	/* Create per subscriber queue */
1286 	sdp = kmem_zalloc(sizeof (evch_subd_t), KM_SLEEP);
1287 	sdp->sd_queue = evch_evq_create();
1288 
1289 	/* Subscribe to subscriber queue */
1290 	sdp->sd_persist = flags & EVCH_SUB_KEEP ? 1 : 0;
1291 	sdp->sd_dump = flags & EVCH_SUB_DUMP ? 1 : 0;
1292 	sdp->sd_type = dtype;
1293 	sdp->sd_cbcookie = cookie;
1294 	sdp->sd_ident = subid;
1295 	if (dtype == EVCH_DELKERN) {
1296 		sdp->sd_callback = (kerndlv_f)dinfo;
1297 		delivfkt = evch_kern_deliver;
1298 	} else {
1299 		sdp->sd_door = (door_handle_t)dinfo;
1300 		delivfkt = evch_door_deliver;
1301 	}
1302 	sdp->sd_ssub =
1303 	    evch_evq_sub(sdp->sd_queue, NULL, NULL, delivfkt, (void *)sdp);
1304 
1305 	/* Connect per subscriber queue to main event queue */
1306 	sdp->sd_msub = evch_evq_sub(eqp, evch_class_filter, clb,
1307 	    evch_subq_deliver, (void *)sdp);
1308 	sdp->sd_classname = clb;
1309 	sdp->sd_clnsize = clblen;
1310 	sdp->sd_pid = pid;
1311 	sdp->sd_active++;
1312 
1313 	/* Add subscription to binding */
1314 	sdp->sd_subnxt = bp->bd_sublst;
1315 	bp->bd_sublst = sdp;
1316 
1317 	/* Add subscription to channel */
1318 	evch_dl_add(&chp->ch_subscr, &sdp->sd_link);
1319 	if (chp->ch_holdpend && evch_dl_getnum(&chp->ch_subscr) == 1) {
1320 
1321 		/* Let main event queue run in case of HOLDPEND */
1322 		evch_evq_continue(eqp);
1323 	}
1324 	mutex_exit(&chp->ch_mutex);
1325 
1326 	return (0);
1327 }
1328 
1329 /*
1330  * If flag == EVCH_SUB_KEEP only non-persistent subscriptions are deleted.
1331  * When sid == NULL all subscriptions except the ones with EVCH_SUB_KEEP set
1332  * are removed.
1333  */
1334 static void
1335 evch_chunsubscribe(evch_bind_t *bp, const char *sid, uint32_t flags)
1336 {
1337 	evch_subd_t	*sdp;
1338 	evch_subd_t	*next;
1339 	evch_subd_t	*prev;
1340 	evch_chan_t	*chp = bp->bd_channel;
1341 
1342 	mutex_enter(&chp->ch_mutex);
1343 	if (chp->ch_holdpend) {
1344 		evch_evq_stop(chp->ch_queue);	/* Hold main event queue */
1345 	}
1346 	prev = NULL;
1347 	for (sdp = bp->bd_sublst; sdp; sdp = next) {
1348 		if (sid == NULL || strcmp(sid, sdp->sd_ident) == 0) {
1349 			if (flags == 0 || sdp->sd_persist == 0) {
1350 				/*
1351 				 * Disconnect subscriber queue from main event
1352 				 * queue.
1353 				 */
1354 				evch_evq_unsub(chp->ch_queue, sdp->sd_msub);
1355 
1356 				/* Destruct per subscriber queue */
1357 				evch_evq_unsub(sdp->sd_queue, sdp->sd_ssub);
1358 				evch_evq_destroy(sdp->sd_queue);
1359 				/*
1360 				 * Eliminate the subscriber data from channel
1361 				 * list.
1362 				 */
1363 				evch_dl_del(&chp->ch_subscr, &sdp->sd_link);
1364 				kmem_free(sdp->sd_classname, sdp->sd_clnsize);
1365 				if (sdp->sd_type == EVCH_DELDOOR) {
1366 					door_ki_rele(sdp->sd_door);
1367 				}
1368 				next = sdp->sd_subnxt;
1369 				if (prev) {
1370 					prev->sd_subnxt = next;
1371 				} else {
1372 					bp->bd_sublst = next;
1373 				}
1374 				kmem_free(sdp->sd_ident,
1375 				    strlen(sdp->sd_ident) + 1);
1376 				kmem_free(sdp, sizeof (evch_subd_t));
1377 			} else {
1378 				/*
1379 				 * EVCH_SUB_KEEP case
1380 				 */
1381 				evch_evq_stop(sdp->sd_queue);
1382 				if (sdp->sd_type == EVCH_DELDOOR) {
1383 					door_ki_rele(sdp->sd_door);
1384 				}
1385 				sdp->sd_active--;
1386 				ASSERT(sdp->sd_active == 0);
1387 				next = sdp->sd_subnxt;
1388 				prev = sdp;
1389 			}
1390 			if (sid != NULL) {
1391 				break;
1392 			}
1393 		} else {
1394 			next = sdp->sd_subnxt;
1395 			prev = sdp;
1396 		}
1397 	}
1398 	if (!(chp->ch_holdpend && evch_dl_getnum(&chp->ch_subscr) == 0)) {
1399 		/*
1400 		 * Continue dispatch thread except if no subscribers are present
1401 		 * in HOLDPEND mode.
1402 		 */
1403 		evch_evq_continue(chp->ch_queue);
1404 	}
1405 	mutex_exit(&chp->ch_mutex);
1406 }
1407 
1408 /*
1409  * Publish an event. Returns zero on success and an error code else.
1410  */
1411 static int
1412 evch_chpublish(evch_bind_t *bp, sysevent_impl_t *ev, int flags)
1413 {
1414 	evch_chan_t *chp = bp->bd_channel;
1415 
1416 	DTRACE_SYSEVENT2(post, evch_bind_t *, bp, sysevent_impl_t *, ev);
1417 
1418 	mutex_enter(&chp->ch_pubmx);
1419 	if (chp->ch_nevents >= chp->ch_maxev) {
1420 		if (!(flags & EVCH_QWAIT)) {
1421 			evch_evq_evfree(ev);
1422 			mutex_exit(&chp->ch_pubmx);
1423 			return (EAGAIN);
1424 		} else {
1425 			while (chp->ch_nevents >= chp->ch_maxev) {
1426 				if (cv_wait_sig(&chp->ch_pubcv,
1427 				    &chp->ch_pubmx) == 0) {
1428 
1429 					/* Got Signal, return EINTR */
1430 					evch_evq_evfree(ev);
1431 					mutex_exit(&chp->ch_pubmx);
1432 					return (EINTR);
1433 				}
1434 			}
1435 		}
1436 	}
1437 	chp->ch_nevents++;
1438 	mutex_exit(&chp->ch_pubmx);
1439 	SE_TIME(ev) = gethrtime();
1440 	SE_SEQ(ev) = log_sysevent_new_id();
1441 	/*
1442 	 * Add the destructor function to the event structure, now that the
1443 	 * event is accounted for. The only task of the descructor is to
1444 	 * decrement the channel event count. The evq_*() routines (including
1445 	 * the event delivery thread) do not have knowledge of the channel
1446 	 * data. So the anonymous destructor handles the channel data for it.
1447 	 */
1448 	evch_evq_evadd_dest(ev, evch_destr_event, (void *)chp);
1449 	return (evch_evq_pub(chp->ch_queue, ev, flags) == 0 ? 0 : EAGAIN);
1450 }
1451 
1452 /*
1453  * Fills a buffer consecutive with the names of all available channels.
1454  * Returns the length of all name strings or -1 if buffer size was unsufficient.
1455  */
1456 static int
1457 evch_chgetnames(char *buf, size_t size)
1458 {
1459 	struct evch_globals *eg;
1460 	int		len = 0;
1461 	char		*addr = buf;
1462 	int		max = size;
1463 	evch_chan_t	*chp;
1464 
1465 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1466 	ASSERT(eg != NULL);
1467 
1468 	mutex_enter(&eg->evch_list_lock);
1469 	for (chp = evch_dl_next(&eg->evch_list, NULL); chp != NULL;
1470 	    chp = evch_dl_next(&eg->evch_list, chp)) {
1471 		len += chp->ch_namelen;
1472 		if (len >= max) {
1473 			mutex_exit(&eg->evch_list_lock);
1474 			return (-1);
1475 		}
1476 		bcopy(chp->ch_name, addr, chp->ch_namelen);
1477 		addr += chp->ch_namelen;
1478 	}
1479 	mutex_exit(&eg->evch_list_lock);
1480 	addr[0] = 0;
1481 	return (len + 1);
1482 }
1483 
1484 /*
1485  * Fills the data of one channel and all subscribers of that channel into
1486  * a buffer. Returns -1 if the channel name is invalid and 0 on buffer overflow.
1487  */
1488 static int
1489 evch_chgetchdata(char *chname, void *buf, size_t size)
1490 {
1491 	struct evch_globals *eg;
1492 	char		*cpaddr;
1493 	int		bufmax;
1494 	int		buflen;
1495 	evch_chan_t	*chp;
1496 	sev_chinfo_t	*p = (sev_chinfo_t *)buf;
1497 	int		chdlen;
1498 	evch_subd_t	*sdp;
1499 	sev_subinfo_t	*subp;
1500 	int		idlen;
1501 	int		len;
1502 
1503 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1504 	ASSERT(eg != NULL);
1505 
1506 	mutex_enter(&eg->evch_list_lock);
1507 	chp = (evch_chan_t *)evch_dl_search(&eg->evch_list, evch_namecmp,
1508 	    chname);
1509 	if (chp == NULL) {
1510 		mutex_exit(&eg->evch_list_lock);
1511 		return (-1);
1512 	}
1513 	chdlen = offsetof(sev_chinfo_t, cd_subinfo);
1514 	if (size < chdlen) {
1515 		mutex_exit(&eg->evch_list_lock);
1516 		return (0);
1517 	}
1518 	p->cd_version = 0;
1519 	p->cd_suboffs = chdlen;
1520 	p->cd_uid = chp->ch_uid;
1521 	p->cd_gid = chp->ch_gid;
1522 	p->cd_perms = 0;
1523 	p->cd_ctime = chp->ch_ctime;
1524 	p->cd_maxev = chp->ch_maxev;
1525 	p->cd_evhwm = EVCH_EVQ_HIGHWM(chp->ch_queue);
1526 	p->cd_nevents = EVCH_EVQ_EVCOUNT(chp->ch_queue);
1527 	p->cd_maxsub = chp->ch_maxsubscr;
1528 	p->cd_nsub = evch_dl_getnum(&chp->ch_subscr);
1529 	p->cd_maxbinds = chp->ch_maxbinds;
1530 	p->cd_nbinds = chp->ch_bindings;
1531 	p->cd_holdpend = chp->ch_holdpend;
1532 	p->cd_limev = evch_events_max;
1533 	cpaddr = (char *)p + chdlen;
1534 	bufmax = size - chdlen;
1535 	buflen = 0;
1536 
1537 	for (sdp = evch_dl_next(&chp->ch_subscr, NULL); sdp != NULL;
1538 	    sdp = evch_dl_next(&chp->ch_subscr, sdp)) {
1539 		idlen = strlen(sdp->sd_ident) + 1;
1540 		len = SE_ALIGN(offsetof(sev_subinfo_t, sb_strings) + idlen +
1541 		    sdp->sd_clnsize);
1542 		buflen += len;
1543 		if (buflen >= bufmax) {
1544 			mutex_exit(&eg->evch_list_lock);
1545 			return (0);
1546 		}
1547 		subp = (sev_subinfo_t *)cpaddr;
1548 		subp->sb_nextoff = len;
1549 		subp->sb_stroff = offsetof(sev_subinfo_t, sb_strings);
1550 		if (sdp->sd_classname) {
1551 			bcopy(sdp->sd_classname, subp->sb_strings + idlen,
1552 			    sdp->sd_clnsize);
1553 			subp->sb_clnamoff = idlen;
1554 		} else {
1555 			subp->sb_clnamoff = idlen - 1;
1556 		}
1557 		subp->sb_pid = sdp->sd_pid;
1558 		subp->sb_nevents = EVCH_EVQ_EVCOUNT(sdp->sd_queue);
1559 		subp->sb_evhwm = EVCH_EVQ_HIGHWM(sdp->sd_queue);
1560 		subp->sb_persist = sdp->sd_persist;
1561 		subp->sb_status = evch_evq_status(sdp->sd_queue);
1562 		subp->sb_active = sdp->sd_active;
1563 		subp->sb_dump = sdp->sd_dump;
1564 		bcopy(sdp->sd_ident, subp->sb_strings, idlen);
1565 		cpaddr += len;
1566 	}
1567 	mutex_exit(&eg->evch_list_lock);
1568 	return (chdlen + buflen);
1569 }
1570 
1571 static void
1572 evch_chsetpropnvl(evch_bind_t *bp, nvlist_t *nvl)
1573 {
1574 	evch_chan_t *chp = bp->bd_channel;
1575 
1576 	mutex_enter(&chp->ch_mutex);
1577 
1578 	nvlist_free(chp->ch_propnvl);
1579 
1580 	chp->ch_propnvl = nvl;
1581 	chp->ch_propnvlgen++;
1582 
1583 	mutex_exit(&chp->ch_mutex);
1584 }
1585 
1586 static int
1587 evch_chgetpropnvl(evch_bind_t *bp, nvlist_t **nvlp, int64_t *genp)
1588 {
1589 	evch_chan_t *chp = bp->bd_channel;
1590 	int rc = 0;
1591 
1592 	mutex_enter(&chp->ch_mutex);
1593 
1594 	if (chp->ch_propnvl != NULL)
1595 		rc = (nvlist_dup(chp->ch_propnvl, nvlp, 0) == 0) ? 0 : ENOMEM;
1596 	else
1597 		*nvlp = NULL;	/* rc still 0 */
1598 
1599 	if (genp)
1600 		*genp = chp->ch_propnvlgen;
1601 
1602 	mutex_exit(&chp->ch_mutex);
1603 
1604 	if (rc != 0)
1605 		*nvlp = NULL;
1606 
1607 	return (rc);
1608 
1609 }
1610 
1611 /*
1612  * Init iteration of all events of a channel. This function creates a new
1613  * event queue and puts all events from the channel into that queue.
1614  * Subsequent calls to evch_chgetnextev will deliver the events from that
1615  * queue. Only one thread per channel is allowed to read through the events.
1616  * Returns 0 on success and 1 if there is already someone reading the
1617  * events.
1618  * If argument subid == NULL, we look for a subscriber which has
1619  * flag EVCH_SUB_DUMP set.
1620  */
1621 /*
1622  * Static variables that are used to traverse events of a channel in panic case.
1623  */
1624 static evch_chan_t	*evch_chan;
1625 static evch_eventq_t	*evch_subq;
1626 static sysevent_impl_t	*evch_curev;
1627 
1628 static evchanq_t *
1629 evch_chrdevent_init(evch_chan_t *chp, char *subid)
1630 {
1631 	evch_subd_t	*sdp;
1632 	void		*ev;
1633 	int		pmqstat;	/* Prev status of main queue */
1634 	int		psqstat;	/* Prev status of subscriber queue */
1635 	evchanq_t	*snp;		/* Pointer to q with snapshot of ev */
1636 	compare_f	compfunc;
1637 
1638 	compfunc = subid == NULL ? evch_dumpflgcmp : evch_subidcmp;
1639 	if (panicstr != NULL) {
1640 		evch_chan = chp;
1641 		evch_subq = NULL;
1642 		evch_curev = NULL;
1643 		if ((sdp = (evch_subd_t *)evch_dl_search(&chp->ch_subscr,
1644 		    compfunc, subid)) != NULL) {
1645 			evch_subq = sdp->sd_queue;
1646 		}
1647 		return (NULL);
1648 	}
1649 	mutex_enter(&chp->ch_mutex);
1650 	sdp = (evch_subd_t *)evch_dl_search(&chp->ch_subscr, compfunc, subid);
1651 	/*
1652 	 * Stop main event queue and subscriber queue if not already
1653 	 * in stop mode.
1654 	 */
1655 	pmqstat = evch_evq_status(chp->ch_queue);
1656 	if (pmqstat == 0)
1657 		evch_evq_stop(chp->ch_queue);
1658 	if (sdp != NULL) {
1659 		psqstat = evch_evq_status(sdp->sd_queue);
1660 		if (psqstat == 0)
1661 			evch_evq_stop(sdp->sd_queue);
1662 	}
1663 	/*
1664 	 * Create event queue to make a snapshot of all events in the
1665 	 * channel.
1666 	 */
1667 	snp = kmem_alloc(sizeof (evchanq_t), KM_SLEEP);
1668 	snp->sn_queue = evch_evq_create();
1669 	evch_evq_stop(snp->sn_queue);
1670 	/*
1671 	 * Make a snapshot of the subscriber queue and the main event queue.
1672 	 */
1673 	if (sdp != NULL) {
1674 		ev = NULL;
1675 		while ((ev = evch_evq_evnext(sdp->sd_queue, ev)) != NULL) {
1676 			(void) evch_evq_pub(snp->sn_queue, ev, EVCH_SLEEP);
1677 		}
1678 	}
1679 	ev = NULL;
1680 	while ((ev = evch_evq_evnext(chp->ch_queue, ev)) != NULL) {
1681 		(void) evch_evq_pub(snp->sn_queue, ev, EVCH_SLEEP);
1682 	}
1683 	snp->sn_nxtev = NULL;
1684 	/*
1685 	 * Restart main and subscriber queue if previously stopped
1686 	 */
1687 	if (sdp != NULL && psqstat == 0)
1688 		evch_evq_continue(sdp->sd_queue);
1689 	if (pmqstat == 0)
1690 		evch_evq_continue(chp->ch_queue);
1691 	mutex_exit(&chp->ch_mutex);
1692 	return (snp);
1693 }
1694 
1695 /*
1696  * Free all resources of the event queue snapshot. In case of panic
1697  * context snp must be NULL and no resources need to be free'ed.
1698  */
1699 static void
1700 evch_chrdevent_fini(evchanq_t *snp)
1701 {
1702 	if (snp != NULL) {
1703 		evch_evq_destroy(snp->sn_queue);
1704 		kmem_free(snp, sizeof (evchanq_t));
1705 	}
1706 }
1707 
1708 /*
1709  * Get address of next event from an event channel.
1710  * This function might be called in a panic context. In that case
1711  * no resources will be allocated and no locks grabbed.
1712  * In normal operation context a snapshot of the event queues of the
1713  * specified event channel will be taken.
1714  */
1715 static sysevent_impl_t *
1716 evch_chgetnextev(evchanq_t *snp)
1717 {
1718 	if (panicstr != NULL) {
1719 		if (evch_chan == NULL)
1720 			return (NULL);
1721 		if (evch_subq != NULL) {
1722 			/*
1723 			 * We have a subscriber queue. Traverse this queue
1724 			 * first.
1725 			 */
1726 			if ((evch_curev = (sysevent_impl_t *)
1727 			    evch_evq_evnext(evch_subq, evch_curev)) != NULL) {
1728 				return (evch_curev);
1729 			} else {
1730 				/*
1731 				 * All subscriber events traversed. evch_subq
1732 				 * == NULL indicates to take the main event
1733 				 * queue now.
1734 				 */
1735 				evch_subq = NULL;
1736 			}
1737 		}
1738 		/*
1739 		 * Traverse the main event queue.
1740 		 */
1741 		if ((evch_curev = (sysevent_impl_t *)
1742 		    evch_evq_evnext(evch_chan->ch_queue, evch_curev)) ==
1743 		    NULL) {
1744 			evch_chan = NULL;
1745 		}
1746 		return (evch_curev);
1747 	}
1748 	ASSERT(snp != NULL);
1749 	snp->sn_nxtev = (sysevent_impl_t *)evch_evq_evnext(snp->sn_queue,
1750 	    snp->sn_nxtev);
1751 	return (snp->sn_nxtev);
1752 }
1753 
1754 /*
1755  * The functions below build up the interface for the kernel to bind/unbind,
1756  * subscribe/unsubscribe and publish to event channels. It consists of the
1757  * following functions:
1758  *
1759  * sysevent_evc_bind	    - Bind to a channel. Create a channel if required
1760  * sysevent_evc_unbind	    - Unbind from a channel. Destroy ch. if last unbind
1761  * sysevent_evc_subscribe   - Subscribe to events from a channel
1762  * sysevent_evc_unsubscribe - Unsubscribe from an event class
1763  * sysevent_evc_publish	    - Publish an event to an event channel
1764  * sysevent_evc_control	    - Various control operation on event channel
1765  * sysevent_evc_setpropnvl  - Set channel property nvlist
1766  * sysevent_evc_getpropnvl  - Get channel property nvlist
1767  *
1768  * The function below are for evaluating a sysevent:
1769  *
1770  * sysevent_get_class_name  - Get pointer to event class string
1771  * sysevent_get_subclass_name - Get pointer to event subclass string
1772  * sysevent_get_seq	    - Get unique event sequence number
1773  * sysevent_get_time	    - Get hrestime of event publish
1774  * sysevent_get_size	    - Get size of event structure
1775  * sysevent_get_pub	    - Get publisher string
1776  * sysevent_get_attr_list   - Get copy of attribute list
1777  *
1778  * The following interfaces represent stability level project privat
1779  * and allow to save the events of an event channel even in a panic case.
1780  *
1781  * sysevent_evc_walk_init   - Take a snapshot of the events in a channel
1782  * sysevent_evc_walk_step   - Read next event from snapshot
1783  * sysevent_evc_walk_fini   - Free resources from event channel snapshot
1784  * sysevent_evc_event_attr  - Get event payload address and size
1785  */
1786 /*
1787  * allocate sysevent structure with optional space for attributes
1788  */
1789 static sysevent_impl_t *
1790 sysevent_evc_alloc(const char *class, const char *subclass, const char *pub,
1791     size_t pub_sz, size_t atsz, uint32_t flag)
1792 {
1793 	int		payload_sz;
1794 	int		class_sz, subclass_sz;
1795 	int 		aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
1796 	sysevent_impl_t	*ev;
1797 
1798 	/*
1799 	 * Calculate and reserve space for the class, subclass and
1800 	 * publisher strings in the event buffer
1801 	 */
1802 	class_sz = strlen(class) + 1;
1803 	subclass_sz = strlen(subclass) + 1;
1804 
1805 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz <=
1806 	    MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
1807 
1808 	/* String sizes must be 64-bit aligned in the event buffer */
1809 	aligned_class_sz = SE_ALIGN(class_sz);
1810 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
1811 	aligned_pub_sz = SE_ALIGN(pub_sz);
1812 
1813 	/*
1814 	 * Calculate payload size. Consider the space needed for alignment
1815 	 * and subtract the size of the uint64_t placeholder variables of
1816 	 * sysevent_impl_t.
1817 	 */
1818 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
1819 	    (aligned_subclass_sz - sizeof (uint64_t)) +
1820 	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t) +
1821 	    atsz;
1822 
1823 	/*
1824 	 * Allocate event buffer plus additional payload overhead
1825 	 */
1826 	if ((ev = evch_evq_evzalloc(sizeof (sysevent_impl_t) +
1827 	    payload_sz, flag)) == NULL) {
1828 		return (NULL);
1829 	}
1830 
1831 	/* Initialize the event buffer data */
1832 	SE_VERSION(ev) = SYS_EVENT_VERSION;
1833 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
1834 
1835 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t,
1836 	    se_class_name)) + aligned_class_sz;
1837 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
1838 
1839 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
1840 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
1841 
1842 	SE_ATTR_PTR(ev) = (uint64_t)0;
1843 	SE_PAYLOAD_SZ(ev) = payload_sz;
1844 
1845 	return (ev);
1846 }
1847 
1848 /*
1849  * Initialize event channel handling queues.
1850  */
1851 void
1852 sysevent_evc_init()
1853 {
1854 	evch_chinit();
1855 }
1856 
1857 /*
1858  * Second initialization step: create threads, if event channels are already
1859  * created
1860  */
1861 void
1862 sysevent_evc_thrinit()
1863 {
1864 	evch_chinitthr();
1865 }
1866 
1867 int
1868 sysevent_evc_bind(const char *ch_name, evchan_t **scpp, uint32_t flags)
1869 {
1870 	ASSERT(ch_name != NULL && scpp != NULL);
1871 	ASSERT((flags & ~EVCH_B_FLAGS) == 0);
1872 	return (evch_chbind(ch_name, (evch_bind_t **)scpp, flags));
1873 }
1874 
1875 int
1876 sysevent_evc_unbind(evchan_t *scp)
1877 {
1878 	evch_bind_t *bp = (evch_bind_t *)scp;
1879 
1880 	ASSERT(scp != NULL);
1881 	evch_chunsubscribe(bp, NULL, 0);
1882 	evch_chunbind(bp);
1883 
1884 	return (0);
1885 }
1886 
1887 int
1888 sysevent_evc_subscribe(evchan_t *scp, const char *sid, const char *class,
1889     int (*callb)(sysevent_t *ev, void *cookie),
1890     void *cookie, uint32_t flags)
1891 {
1892 	ASSERT(scp != NULL && sid != NULL && class != NULL && callb != NULL);
1893 	ASSERT(flags == 0);
1894 	if (strlen(sid) > MAX_SUBID_LEN) {
1895 		return (EINVAL);
1896 	}
1897 	if (strcmp(class, EC_ALL) == 0) {
1898 		class = NULL;
1899 	}
1900 	return (evch_chsubscribe((evch_bind_t *)scp, EVCH_DELKERN, sid, class,
1901 	    (void *)callb, cookie, 0, 0));
1902 }
1903 
1904 int
1905 sysevent_evc_unsubscribe(evchan_t *scp, const char *sid)
1906 {
1907 	ASSERT(scp != NULL && sid != NULL);
1908 	if (strcmp(sid, EVCH_ALLSUB) == 0) {
1909 		sid = NULL;
1910 	}
1911 	evch_chunsubscribe((evch_bind_t *)scp, sid, 0);
1912 
1913 	return (0);
1914 }
1915 
1916 /*
1917  * Publish kernel event. Returns 0 on success, error code else.
1918  * Optional attribute data is packed into the event structure.
1919  */
1920 int
1921 sysevent_evc_publish(evchan_t *scp, const char *class, const char *subclass,
1922     const char *vendor, const char *pubs, nvlist_t *attr, uint32_t flags)
1923 {
1924 	sysevent_impl_t	*evp;
1925 	char		pub[MAX_PUB_LEN];
1926 	int		pub_sz;		/* includes terminating 0 */
1927 	int		km_flags;
1928 	size_t		asz = 0;
1929 	uint64_t	attr_offset;
1930 	caddr_t		patt;
1931 	int		err;
1932 
1933 	ASSERT(scp != NULL && class != NULL && subclass != NULL &&
1934 	    vendor != NULL && pubs != NULL);
1935 
1936 	ASSERT((flags & ~(EVCH_SLEEP | EVCH_NOSLEEP | EVCH_TRYHARD |
1937 	    EVCH_QWAIT)) == 0);
1938 
1939 	km_flags = flags & (EVCH_SLEEP | EVCH_NOSLEEP | EVCH_TRYHARD);
1940 	ASSERT(km_flags == EVCH_SLEEP || km_flags == EVCH_NOSLEEP ||
1941 	    km_flags == EVCH_TRYHARD);
1942 
1943 	pub_sz = snprintf(pub, MAX_PUB_LEN, "%s:kern:%s", vendor, pubs) + 1;
1944 	if (pub_sz > MAX_PUB_LEN)
1945 		return (EINVAL);
1946 
1947 	if (attr != NULL) {
1948 		if ((err = nvlist_size(attr, &asz, NV_ENCODE_NATIVE)) != 0) {
1949 			return (err);
1950 		}
1951 	}
1952 	evp = sysevent_evc_alloc(class, subclass, pub, pub_sz, asz, km_flags);
1953 	if (evp == NULL) {
1954 		return (ENOMEM);
1955 	}
1956 	if (attr != NULL) {
1957 		/*
1958 		 * Pack attributes into event buffer. Event buffer already
1959 		 * has enough room for the packed nvlist.
1960 		 */
1961 		attr_offset = SE_ATTR_OFF(evp);
1962 		patt = (caddr_t)evp + attr_offset;
1963 
1964 		err = nvlist_pack(attr, &patt, &asz, NV_ENCODE_NATIVE,
1965 		    km_flags & EVCH_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1966 
1967 		ASSERT(err != ENOMEM);
1968 
1969 		if (err != 0) {
1970 			return (EINVAL);
1971 		}
1972 
1973 		evp->seh_attr_off = attr_offset;
1974 		SE_FLAG(evp) = SE_PACKED_BUF;
1975 	}
1976 	return (evch_chpublish((evch_bind_t *)scp, evp, flags));
1977 }
1978 
1979 int
1980 sysevent_evc_control(evchan_t *scp, int cmd, ...)
1981 {
1982 	va_list		ap;
1983 	evch_chan_t	*chp;
1984 	uint32_t	*chlenp;
1985 	uint32_t	chlen;
1986 	uint32_t	ochlen;
1987 	int		rc = 0;
1988 
1989 	if (scp == NULL) {
1990 		return (EINVAL);
1991 	}
1992 
1993 	chp = ((evch_bind_t *)scp)->bd_channel;
1994 
1995 	va_start(ap, cmd);
1996 	mutex_enter(&chp->ch_mutex);
1997 	switch (cmd) {
1998 	case EVCH_GET_CHAN_LEN:
1999 		chlenp = va_arg(ap, uint32_t *);
2000 		*chlenp = chp->ch_maxev;
2001 		break;
2002 	case EVCH_SET_CHAN_LEN:
2003 		chlen = va_arg(ap, uint32_t);
2004 		ochlen = chp->ch_maxev;
2005 		chp->ch_maxev = min(chlen, evch_events_max);
2006 		if (ochlen < chp->ch_maxev) {
2007 			cv_signal(&chp->ch_pubcv);
2008 		}
2009 		break;
2010 	case EVCH_GET_CHAN_LEN_MAX:
2011 		*va_arg(ap, uint32_t *) = evch_events_max;
2012 		break;
2013 	default:
2014 		rc = EINVAL;
2015 	}
2016 
2017 	mutex_exit(&chp->ch_mutex);
2018 	va_end(ap);
2019 	return (rc);
2020 }
2021 
2022 int
2023 sysevent_evc_setpropnvl(evchan_t *scp, nvlist_t *nvl)
2024 {
2025 	nvlist_t *nvlcp = nvl;
2026 
2027 	if (nvl != NULL && nvlist_dup(nvl, &nvlcp, 0) != 0)
2028 		return (ENOMEM);
2029 
2030 	evch_chsetpropnvl((evch_bind_t *)scp, nvlcp);
2031 
2032 	return (0);
2033 }
2034 
2035 int
2036 sysevent_evc_getpropnvl(evchan_t *scp, nvlist_t **nvlp)
2037 {
2038 	return (evch_chgetpropnvl((evch_bind_t *)scp, nvlp, NULL));
2039 }
2040 
2041 /*
2042  * Project private interface to take a snapshot of all events of the
2043  * specified event channel. Argument subscr may be a subscriber id, the empty
2044  * string "", or NULL. The empty string indicates that no subscriber is
2045  * selected, for example if a previous subscriber died. sysevent_evc_walk_next()
2046  * will deliver events from the main event queue in this case. If subscr is
2047  * NULL, the subscriber with the EVCH_SUB_DUMP flag set (subd->sd_dump != 0)
2048  * will be selected.
2049  *
2050  * In panic case this function returns NULL. This is legal. The NULL has
2051  * to be delivered to sysevent_evc_walk_step() and sysevent_evc_walk_fini().
2052  */
2053 evchanq_t *
2054 sysevent_evc_walk_init(evchan_t *scp, char *subscr)
2055 {
2056 	if (panicstr != NULL && scp == NULL)
2057 		return (NULL);
2058 	ASSERT(scp != NULL);
2059 	return (evch_chrdevent_init(((evch_bind_t *)scp)->bd_channel, subscr));
2060 }
2061 
2062 /*
2063  * Project private interface to read events from a previously taken
2064  * snapshot (with sysevent_evc_walk_init). In case of panic events
2065  * are retrieved directly from the channel data structures. No resources
2066  * are allocated and no mutexes are grabbed in panic context.
2067  */
2068 sysevent_t *
2069 sysevent_evc_walk_step(evchanq_t *evcq)
2070 {
2071 	return ((sysevent_t *)evch_chgetnextev(evcq));
2072 }
2073 
2074 /*
2075  * Project private interface to free a previously taken snapshot.
2076  */
2077 void
2078 sysevent_evc_walk_fini(evchanq_t *evcq)
2079 {
2080 	evch_chrdevent_fini(evcq);
2081 }
2082 
2083 /*
2084  * Get address and size of an event payload. Returns NULL when no
2085  * payload present.
2086  */
2087 char *
2088 sysevent_evc_event_attr(sysevent_t *ev, size_t *plsize)
2089 {
2090 	char	*attrp;
2091 	size_t	aoff;
2092 	size_t	asz;
2093 
2094 	aoff = SE_ATTR_OFF(ev);
2095 	attrp = (char *)ev + aoff;
2096 	asz = *plsize = SE_SIZE(ev) - aoff;
2097 	return (asz ? attrp : NULL);
2098 }
2099 
2100 /*
2101  * sysevent_get_class_name - Get class name string
2102  */
2103 char *
2104 sysevent_get_class_name(sysevent_t *ev)
2105 {
2106 	return (SE_CLASS_NAME(ev));
2107 }
2108 
2109 /*
2110  * sysevent_get_subclass_name - Get subclass name string
2111  */
2112 char *
2113 sysevent_get_subclass_name(sysevent_t *ev)
2114 {
2115 	return (SE_SUBCLASS_NAME(ev));
2116 }
2117 
2118 /*
2119  * sysevent_get_seq - Get event sequence id
2120  */
2121 uint64_t
2122 sysevent_get_seq(sysevent_t *ev)
2123 {
2124 	return (SE_SEQ(ev));
2125 }
2126 
2127 /*
2128  * sysevent_get_time - Get event timestamp
2129  */
2130 void
2131 sysevent_get_time(sysevent_t *ev, hrtime_t *etime)
2132 {
2133 	*etime = SE_TIME(ev);
2134 }
2135 
2136 /*
2137  * sysevent_get_size - Get event buffer size
2138  */
2139 size_t
2140 sysevent_get_size(sysevent_t *ev)
2141 {
2142 	return ((size_t)SE_SIZE(ev));
2143 }
2144 
2145 /*
2146  * sysevent_get_pub - Get publisher name string
2147  */
2148 char *
2149 sysevent_get_pub(sysevent_t *ev)
2150 {
2151 	return (SE_PUB_NAME(ev));
2152 }
2153 
2154 /*
2155  * sysevent_get_attr_list - stores address of a copy of the attribute list
2156  * associated with the given sysevent buffer. The list must be freed by the
2157  * caller.
2158  */
2159 int
2160 sysevent_get_attr_list(sysevent_t *ev, nvlist_t **nvlist)
2161 {
2162 	int		error;
2163 	caddr_t		attr;
2164 	size_t		attr_len;
2165 	uint64_t	attr_offset;
2166 
2167 	*nvlist = NULL;
2168 	if (SE_FLAG(ev) != SE_PACKED_BUF) {
2169 		return (EINVAL);
2170 	}
2171 	attr_offset = SE_ATTR_OFF(ev);
2172 	if (SE_SIZE(ev) == attr_offset) {
2173 		return (EINVAL);
2174 	}
2175 
2176 	/* unpack nvlist */
2177 	attr = (caddr_t)ev + attr_offset;
2178 	attr_len = SE_SIZE(ev) - attr_offset;
2179 	if ((error = nvlist_unpack(attr, attr_len, nvlist, 0)) != 0) {
2180 		error = error != ENOMEM ? EINVAL : error;
2181 		return (error);
2182 	}
2183 	return (0);
2184 }
2185 
2186 /*
2187  * Functions called by the sysevent driver for general purpose event channels
2188  *
2189  * evch_usrchanopen	- Create/Bind to an event channel
2190  * evch_usrchanclose	- Unbind/Destroy event channel
2191  * evch_usrallocev	- Allocate event data structure
2192  * evch_usrfreeev	- Free event data structure
2193  * evch_usrpostevent	- Publish event
2194  * evch_usrsubscribe	- Subscribe (register callback function)
2195  * evch_usrunsubscribe	- Unsubscribe
2196  * evch_usrcontrol_set	- Set channel properties
2197  * evch_usrcontrol_get	- Get channel properties
2198  * evch_usrgetchnames	- Get list of channel names
2199  * evch_usrgetchdata	- Get data of an event channel
2200  * evch_usrsetpropnvl	- Set channel properties nvlist
2201  * evch_usrgetpropnvl	- Get channel properties nvlist
2202  */
2203 evchan_t *
2204 evch_usrchanopen(const char *name, uint32_t flags, int *err)
2205 {
2206 	evch_bind_t *bp = NULL;
2207 
2208 	*err = evch_chbind(name, &bp, flags);
2209 	return ((evchan_t *)bp);
2210 }
2211 
2212 /*
2213  * Unbind from the channel.
2214  */
2215 void
2216 evch_usrchanclose(evchan_t *cbp)
2217 {
2218 	evch_chunbind((evch_bind_t *)cbp);
2219 }
2220 
2221 /*
2222  * Allocates log_evch_eventq_t structure but returns the pointer of the embedded
2223  * sysevent_impl_t structure as the opaque sysevent_t * data type
2224  */
2225 sysevent_impl_t *
2226 evch_usrallocev(size_t evsize, uint32_t flags)
2227 {
2228 	return ((sysevent_impl_t *)evch_evq_evzalloc(evsize, flags));
2229 }
2230 
2231 /*
2232  * Free evch_eventq_t structure
2233  */
2234 void
2235 evch_usrfreeev(sysevent_impl_t *ev)
2236 {
2237 	evch_evq_evfree((void *)ev);
2238 }
2239 
2240 /*
2241  * Posts an event to the given channel. The event structure has to be
2242  * allocated by evch_usrallocev(). Returns zero on success and an error
2243  * code else. Attributes have to be packed and included in the event structure.
2244  *
2245  */
2246 int
2247 evch_usrpostevent(evchan_t *bp, sysevent_impl_t *ev, uint32_t flags)
2248 {
2249 	return (evch_chpublish((evch_bind_t *)bp, ev, flags));
2250 }
2251 
2252 /*
2253  * Subscribe function for user land subscriptions
2254  */
2255 int
2256 evch_usrsubscribe(evchan_t *bp, const char *sid, const char *class,
2257     int d, uint32_t flags)
2258 {
2259 	door_handle_t	dh = door_ki_lookup(d);
2260 	int		rv;
2261 
2262 	if (dh == NULL) {
2263 		return (EINVAL);
2264 	}
2265 	if ((rv = evch_chsubscribe((evch_bind_t *)bp, EVCH_DELDOOR, sid, class,
2266 	    (void *)dh, NULL, flags, curproc->p_pid)) != 0) {
2267 		door_ki_rele(dh);
2268 	}
2269 	return (rv);
2270 }
2271 
2272 /*
2273  * Flag can be EVCH_SUB_KEEP or 0. EVCH_SUB_KEEP preserves persistent
2274  * subscribers
2275  */
2276 void
2277 evch_usrunsubscribe(evchan_t *bp, const char *subid, uint32_t flags)
2278 {
2279 	evch_chunsubscribe((evch_bind_t *)bp, subid, flags);
2280 }
2281 
2282 /*ARGSUSED*/
2283 int
2284 evch_usrcontrol_set(evchan_t *bp, int cmd, uint32_t value)
2285 {
2286 	evch_chan_t	*chp = ((evch_bind_t *)bp)->bd_channel;
2287 	uid_t		uid = crgetuid(curthread->t_cred);
2288 	int		rc = 0;
2289 
2290 	mutex_enter(&chp->ch_mutex);
2291 	switch (cmd) {
2292 	case EVCH_SET_CHAN_LEN:
2293 		if (uid && uid != chp->ch_uid) {
2294 			rc = EACCES;
2295 			break;
2296 		}
2297 		chp->ch_maxev = min(value, evch_events_max);
2298 		break;
2299 	default:
2300 		rc = EINVAL;
2301 	}
2302 	mutex_exit(&chp->ch_mutex);
2303 	return (rc);
2304 }
2305 
2306 /*ARGSUSED*/
2307 int
2308 evch_usrcontrol_get(evchan_t *bp, int cmd, uint32_t *value)
2309 {
2310 	evch_chan_t	*chp = ((evch_bind_t *)bp)->bd_channel;
2311 	int		rc = 0;
2312 
2313 	mutex_enter(&chp->ch_mutex);
2314 	switch (cmd) {
2315 	case EVCH_GET_CHAN_LEN:
2316 		*value = chp->ch_maxev;
2317 		break;
2318 	case EVCH_GET_CHAN_LEN_MAX:
2319 		*value = evch_events_max;
2320 		break;
2321 	default:
2322 		rc = EINVAL;
2323 	}
2324 	mutex_exit(&chp->ch_mutex);
2325 	return (rc);
2326 }
2327 
2328 int
2329 evch_usrgetchnames(char *buf, size_t size)
2330 {
2331 	return (evch_chgetnames(buf, size));
2332 }
2333 
2334 int
2335 evch_usrgetchdata(char *chname, void *buf, size_t size)
2336 {
2337 	return (evch_chgetchdata(chname, buf, size));
2338 }
2339 
2340 void
2341 evch_usrsetpropnvl(evchan_t *bp, nvlist_t *nvl)
2342 {
2343 	evch_chsetpropnvl((evch_bind_t *)bp, nvl);
2344 }
2345 
2346 int
2347 evch_usrgetpropnvl(evchan_t *bp, nvlist_t **nvlp, int64_t *genp)
2348 {
2349 	return (evch_chgetpropnvl((evch_bind_t *)bp, nvlp, genp));
2350 }
2351