xref: /illumos-gate/usr/src/uts/common/os/evchannels.c (revision e77c795bcbe51aebd7579fe13cbf2a6d56eca47f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * This file contains the source of the general purpose event channel extension
27  * to the sysevent framework. This implementation is made up mainly of four
28  * layers of functionality: the event queues (evch_evq_*()), the handling of
29  * channels (evch_ch*()), the kernel interface (sysevent_evc_*()) and the
30  * interface for the sysevent pseudo driver (evch_usr*()).
31  * Libsysevent.so uses the pseudo driver sysevent's ioctl to access the event
32  * channel extensions. The driver in turn uses the evch_usr*() functions below.
33  *
34  * The interfaces for user land and kernel are declared in sys/sysevent.h
35  * Internal data structures for event channels are defined in
36  * sys/sysevent_impl.h.
37  *
38  * The basic data structure for an event channel is of type evch_chan_t.
39  * All channels are maintained by a list named evch_list. The list head
40  * is of type evch_dlist_t.
41  */
42 
43 #include <sys/types.h>
44 #include <sys/errno.h>
45 #include <sys/stropts.h>
46 #include <sys/debug.h>
47 #include <sys/ddi.h>
48 #include <sys/vmem.h>
49 #include <sys/cmn_err.h>
50 #include <sys/callb.h>
51 #include <sys/sysevent.h>
52 #include <sys/sysevent_impl.h>
53 #include <sys/sysmacros.h>
54 #include <sys/disp.h>
55 #include <sys/atomic.h>
56 #include <sys/door.h>
57 #include <sys/zone.h>
58 #include <sys/sdt.h>
59 
60 /* Back-off delay for door_ki_upcall */
61 #define	EVCH_MIN_PAUSE	8
62 #define	EVCH_MAX_PAUSE	128
63 
64 #define	GEVENT(ev)	((evch_gevent_t *)((char *)ev - \
65 			    offsetof(evch_gevent_t, ge_payload)))
66 
67 #define	EVCH_EVQ_EVCOUNT(x)	((&(x)->eq_eventq)->sq_count)
68 #define	EVCH_EVQ_HIGHWM(x)	((&(x)->eq_eventq)->sq_highwm)
69 
70 #define	CH_HOLD_PEND		1
71 #define	CH_HOLD_PEND_INDEF	2
72 
73 struct evch_globals {
74 	evch_dlist_t evch_list;
75 	kmutex_t evch_list_lock;
76 };
77 
78 /* Variables used by event channel routines */
79 static int		evq_initcomplete = 0;
80 static zone_key_t	evch_zone_key;
81 static uint32_t		evch_channels_max;
82 static uint32_t		evch_bindings_max = EVCH_MAX_BINDS_PER_CHANNEL;
83 static uint32_t		evch_events_max;
84 
85 static void evch_evq_unsub(evch_eventq_t *, evch_evqsub_t *);
86 static void evch_evq_destroy(evch_eventq_t *);
87 
88 /*
89  * List handling. These functions handle a doubly linked list. The list has
90  * to be protected by the calling functions. evch_dlist_t is the list head.
91  * Every node of the list has to put a evch_dlelem_t data type in its data
92  * structure as its first element.
93  *
94  * evch_dl_init		- Initialize list head
95  * evch_dl_fini		- Terminate list handling
96  * evch_dl_is_init	- Returns one if list is initialized
97  * evch_dl_add		- Add element to end of list
98  * evch_dl_del		- Remove given element from list
99  * evch_dl_search	- Lookup element in list
100  * evch_dl_getnum	- Get number of elements in list
101  * evch_dl_next		- Get next elements of list
102  */
103 
104 static void
105 evch_dl_init(evch_dlist_t *hp)
106 {
107 	hp->dh_head.dl_prev = hp->dh_head.dl_next = &hp->dh_head;
108 	hp->dh_count = 0;
109 }
110 
111 /*
112  * Assumes that list is empty.
113  */
114 static void
115 evch_dl_fini(evch_dlist_t *hp)
116 {
117 	hp->dh_head.dl_prev = hp->dh_head.dl_next = NULL;
118 }
119 
120 static int
121 evch_dl_is_init(evch_dlist_t *hp)
122 {
123 	return (hp->dh_head.dl_next != NULL ? 1 : 0);
124 }
125 
126 /*
127  * Add an element at the end of the list.
128  */
129 static void
130 evch_dl_add(evch_dlist_t *hp, evch_dlelem_t *el)
131 {
132 	evch_dlelem_t	*x = hp->dh_head.dl_prev;
133 	evch_dlelem_t	*y = &hp->dh_head;
134 
135 	x->dl_next = el;
136 	y->dl_prev = el;
137 	el->dl_next = y;
138 	el->dl_prev = x;
139 	hp->dh_count++;
140 }
141 
142 /*
143  * Remove arbitrary element out of dlist.
144  */
145 static void
146 evch_dl_del(evch_dlist_t *hp, evch_dlelem_t *p)
147 {
148 	ASSERT(hp->dh_count > 0 && p != &hp->dh_head);
149 	p->dl_prev->dl_next = p->dl_next;
150 	p->dl_next->dl_prev = p->dl_prev;
151 	p->dl_prev = NULL;
152 	p->dl_next = NULL;
153 	hp->dh_count--;
154 }
155 
156 /*
157  * Search an element in a list. Caller provides comparison callback function.
158  */
159 static evch_dlelem_t *
160 evch_dl_search(evch_dlist_t *hp, int (*cmp)(evch_dlelem_t *, char *), char *s)
161 {
162 	evch_dlelem_t *p;
163 
164 	for (p = hp->dh_head.dl_next; p != &hp->dh_head; p = p->dl_next) {
165 		if (cmp(p, s) == 0) {
166 			return (p);
167 		}
168 	}
169 	return (NULL);
170 }
171 
172 /*
173  * Return number of elements in the list.
174  */
175 static int
176 evch_dl_getnum(evch_dlist_t *hp)
177 {
178 	return (hp->dh_count);
179 }
180 
181 /*
182  * Find next element of a evch_dlist_t list. Find first element if el == NULL.
183  * Returns NULL if end of list is reached.
184  */
185 static void *
186 evch_dl_next(evch_dlist_t *hp, void *el)
187 {
188 	evch_dlelem_t *ep = (evch_dlelem_t *)el;
189 
190 	if (hp->dh_count == 0) {
191 		return (NULL);
192 	}
193 	if (ep == NULL) {
194 		return (hp->dh_head.dl_next);
195 	}
196 	if ((ep = ep->dl_next) == (evch_dlelem_t *)hp) {
197 		return (NULL);
198 	}
199 	return ((void *)ep);
200 }
201 
202 /*
203  * Queue handling routines. Mutexes have to be entered previously.
204  *
205  * evch_q_init	- Initialize queue head
206  * evch_q_in	- Put element into queue
207  * evch_q_out	- Get element out of queue
208  * evch_q_next	- Iterate over the elements of a queue
209  */
210 static void
211 evch_q_init(evch_squeue_t *q)
212 {
213 	q->sq_head = NULL;
214 	q->sq_tail = (evch_qelem_t *)q;
215 	q->sq_count = 0;
216 	q->sq_highwm = 0;
217 }
218 
219 /*
220  * Put element into the queue q
221  */
222 static void
223 evch_q_in(evch_squeue_t *q, evch_qelem_t *el)
224 {
225 	q->sq_tail->q_next = el;
226 	el->q_next = NULL;
227 	q->sq_tail = el;
228 	q->sq_count++;
229 	if (q->sq_count > q->sq_highwm) {
230 		q->sq_highwm = q->sq_count;
231 	}
232 }
233 
234 /*
235  * Returns NULL if queue is empty.
236  */
237 static evch_qelem_t *
238 evch_q_out(evch_squeue_t *q)
239 {
240 	evch_qelem_t *el;
241 
242 	if ((el = q->sq_head) != NULL) {
243 		q->sq_head = el->q_next;
244 		q->sq_count--;
245 		if (q->sq_head == NULL) {
246 			q->sq_tail = (evch_qelem_t *)q;
247 		}
248 	}
249 	return (el);
250 }
251 
252 /*
253  * Returns element after *el or first if el == NULL. NULL is returned
254  * if queue is empty or *el points to the last element in the queue.
255  */
256 static evch_qelem_t *
257 evch_q_next(evch_squeue_t *q, evch_qelem_t *el)
258 {
259 	if (el == NULL)
260 		return (q->sq_head);
261 	return (el->q_next);
262 }
263 
264 /*
265  * Event queue handling functions. An event queue is the basic building block
266  * of an event channel. One event queue makes up the publisher-side event queue.
267  * Further event queues build the per-subscriber queues of an event channel.
268  * Each queue is associated an event delivery thread.
269  * These functions support a two-step initialization. First step, when kernel
270  * memory is ready and second when threads are ready.
271  * Events consist of an administrating evch_gevent_t structure with the event
272  * data appended as variable length payload.
273  * The internal interface functions for the event queue handling are:
274  *
275  * evch_evq_create	- create an event queue
276  * evch_evq_thrcreate	- create thread for an event queue.
277  * evch_evq_destroy	- delete an event queue
278  * evch_evq_sub		- Subscribe to event delivery from an event queue
279  * evch_evq_unsub	- Unsubscribe
280  * evch_evq_pub		- Post an event into an event queue
281  * evch_evq_stop	- Put delivery thread on hold
282  * evch_evq_continue	- Resume event delivery thread
283  * evch_evq_status	- Return status of delivery thread, running or on hold
284  * evch_evq_evzalloc	- Allocate an event structure
285  * evch_evq_evfree	- Free an event structure
286  * evch_evq_evadd_dest	- Add a destructor function to an event structure
287  * evch_evq_evnext	- Iterate over events non-destructive
288  */
289 
290 /*ARGSUSED*/
291 static void *
292 evch_zoneinit(zoneid_t zoneid)
293 {
294 	struct evch_globals *eg;
295 
296 	eg = kmem_zalloc(sizeof (*eg), KM_SLEEP);
297 	evch_dl_init(&eg->evch_list);
298 	return (eg);
299 }
300 
301 /*ARGSUSED*/
302 static void
303 evch_zonefree(zoneid_t zoneid, void *arg)
304 {
305 	struct evch_globals *eg = arg;
306 	evch_chan_t *chp;
307 	evch_subd_t *sdp;
308 
309 	mutex_enter(&eg->evch_list_lock);
310 
311 	/*
312 	 * Keep picking the head element off the list until there are no
313 	 * more.
314 	 */
315 	while ((chp = evch_dl_next(&eg->evch_list, NULL)) != NULL) {
316 
317 		/*
318 		 * Since all processes are gone, all bindings should be gone,
319 		 * and only channels with SUB_KEEP subscribers should remain.
320 		 */
321 		mutex_enter(&chp->ch_mutex);
322 		ASSERT(chp->ch_bindings == 0);
323 		ASSERT(evch_dl_getnum(&chp->ch_subscr) != 0 ||
324 		    chp->ch_holdpend == CH_HOLD_PEND_INDEF);
325 
326 		/* Forcibly unsubscribe each remaining subscription */
327 		while ((sdp = evch_dl_next(&chp->ch_subscr, NULL)) != NULL) {
328 			/*
329 			 * We should only be tearing down persistent
330 			 * subscribers at this point, since all processes
331 			 * from this zone are gone.
332 			 */
333 			ASSERT(sdp->sd_active == 0);
334 			ASSERT((sdp->sd_persist & EVCH_SUB_KEEP) != 0);
335 			/*
336 			 * Disconnect subscriber queue from main event queue.
337 			 */
338 			evch_evq_unsub(chp->ch_queue, sdp->sd_msub);
339 
340 			/* Destruct per subscriber queue */
341 			evch_evq_unsub(sdp->sd_queue, sdp->sd_ssub);
342 			evch_evq_destroy(sdp->sd_queue);
343 			/*
344 			 * Eliminate the subscriber data from channel list.
345 			 */
346 			evch_dl_del(&chp->ch_subscr, &sdp->sd_link);
347 			kmem_free(sdp->sd_classname, sdp->sd_clnsize);
348 			kmem_free(sdp->sd_ident, strlen(sdp->sd_ident) + 1);
349 			kmem_free(sdp, sizeof (evch_subd_t));
350 		}
351 
352 		/* Channel must now have no subscribers */
353 		ASSERT(evch_dl_getnum(&chp->ch_subscr) == 0);
354 
355 		/* Just like unbind */
356 		mutex_exit(&chp->ch_mutex);
357 		evch_dl_del(&eg->evch_list, &chp->ch_link);
358 		evch_evq_destroy(chp->ch_queue);
359 		mutex_destroy(&chp->ch_mutex);
360 		mutex_destroy(&chp->ch_pubmx);
361 		cv_destroy(&chp->ch_pubcv);
362 		kmem_free(chp->ch_name, chp->ch_namelen);
363 		kmem_free(chp, sizeof (evch_chan_t));
364 	}
365 
366 	mutex_exit(&eg->evch_list_lock);
367 	/* all channels should now be gone */
368 	ASSERT(evch_dl_getnum(&eg->evch_list) == 0);
369 	kmem_free(eg, sizeof (*eg));
370 }
371 
372 /*
373  * Frees evch_gevent_t structure including the payload, if the reference count
374  * drops to or below zero. Below zero happens when the event is freed
375  * without beeing queued into a queue.
376  */
377 static void
378 evch_gevent_free(evch_gevent_t *evp)
379 {
380 	int32_t refcnt;
381 
382 	refcnt = (int32_t)atomic_add_32_nv(&evp->ge_refcount, -1);
383 	if (refcnt <= 0) {
384 		if (evp->ge_destruct != NULL) {
385 			evp->ge_destruct((void *)&(evp->ge_payload),
386 			    evp->ge_dstcookie);
387 		}
388 		kmem_free(evp, evp->ge_size);
389 	}
390 }
391 
392 /*
393  * Deliver is called for every subscription to the current event
394  * It calls the registered filter function and then the registered delivery
395  * callback routine. Returns 0 on success. The callback routine returns
396  * EVQ_AGAIN or EVQ_SLEEP in case the event could not be delivered.
397  */
398 static int
399 evch_deliver(evch_evqsub_t *sp, evch_gevent_t *ep)
400 {
401 	void		*uep = &ep->ge_payload;
402 	int		res = EVQ_DELIVER;
403 
404 	if (sp->su_filter != NULL) {
405 		res = sp->su_filter(uep, sp->su_fcookie);
406 	}
407 	if (res == EVQ_DELIVER) {
408 		return (sp->su_callb(uep, sp->su_cbcookie));
409 	}
410 	return (0);
411 }
412 
413 /*
414  * Holds event delivery in case of eq_holdmode set or in case the
415  * event queue is empty. Mutex must be held when called.
416  * Wakes up a thread waiting for the delivery thread reaching the hold mode.
417  */
418 static void
419 evch_delivery_hold(evch_eventq_t *eqp, callb_cpr_t *cpip)
420 {
421 	if (eqp->eq_tabortflag == 0) {
422 		do {
423 			if (eqp->eq_holdmode) {
424 				cv_signal(&eqp->eq_onholdcv);
425 			}
426 			CALLB_CPR_SAFE_BEGIN(cpip);
427 			cv_wait(&eqp->eq_thrsleepcv, &eqp->eq_queuemx);
428 			CALLB_CPR_SAFE_END(cpip, &eqp->eq_queuemx);
429 		} while (eqp->eq_holdmode);
430 	}
431 }
432 
433 /*
434  * Event delivery thread. Enumerates all subscribers and calls evch_deliver()
435  * for each one.
436  */
437 static void
438 evch_delivery_thr(evch_eventq_t *eqp)
439 {
440 	evch_qelem_t	*qep;
441 	callb_cpr_t	cprinfo;
442 	int		res;
443 	evch_evqsub_t	*sub;
444 	int		deltime;
445 	int		repeatcount;
446 	char		thnam[32];
447 
448 	(void) snprintf(thnam, sizeof (thnam), "sysevent_chan-%d",
449 	    (int)eqp->eq_thrid);
450 	CALLB_CPR_INIT(&cprinfo, &eqp->eq_queuemx, callb_generic_cpr, thnam);
451 	mutex_enter(&eqp->eq_queuemx);
452 	while (eqp->eq_tabortflag == 0) {
453 		while (eqp->eq_holdmode == 0 && eqp->eq_tabortflag == 0 &&
454 		    (qep = evch_q_out(&eqp->eq_eventq)) != NULL) {
455 
456 			/* Filter and deliver event to all subscribers */
457 			deltime = EVCH_MIN_PAUSE;
458 			repeatcount = EVCH_MAX_TRY_DELIVERY;
459 			eqp->eq_curevent = qep->q_objref;
460 			sub = evch_dl_next(&eqp->eq_subscr, NULL);
461 			while (sub != NULL) {
462 				eqp->eq_dactive = 1;
463 				mutex_exit(&eqp->eq_queuemx);
464 				res = evch_deliver(sub, qep->q_objref);
465 				mutex_enter(&eqp->eq_queuemx);
466 				eqp->eq_dactive = 0;
467 				cv_signal(&eqp->eq_dactivecv);
468 				switch (res) {
469 				case EVQ_SLEEP:
470 					/*
471 					 * Wait for subscriber to return.
472 					 */
473 					eqp->eq_holdmode = 1;
474 					evch_delivery_hold(eqp, &cprinfo);
475 					if (eqp->eq_tabortflag) {
476 						break;
477 					}
478 					continue;
479 				case EVQ_AGAIN:
480 					CALLB_CPR_SAFE_BEGIN(&cprinfo);
481 					mutex_exit(&eqp->eq_queuemx);
482 					delay(deltime);
483 					deltime =
484 					    deltime > EVCH_MAX_PAUSE ?
485 					    deltime : deltime << 1;
486 					mutex_enter(&eqp->eq_queuemx);
487 					CALLB_CPR_SAFE_END(&cprinfo,
488 					    &eqp->eq_queuemx);
489 					if (repeatcount-- > 0) {
490 						continue;
491 					}
492 					break;
493 				}
494 				if (eqp->eq_tabortflag) {
495 					break;
496 				}
497 				sub = evch_dl_next(&eqp->eq_subscr, sub);
498 				repeatcount = EVCH_MAX_TRY_DELIVERY;
499 			}
500 			eqp->eq_curevent = NULL;
501 
502 			/* Free event data and queue element */
503 			evch_gevent_free((evch_gevent_t *)qep->q_objref);
504 			kmem_free(qep, qep->q_objsize);
505 		}
506 
507 		/* Wait for next event or end of hold mode if set */
508 		evch_delivery_hold(eqp, &cprinfo);
509 	}
510 	CALLB_CPR_EXIT(&cprinfo);	/* Does mutex_exit of eqp->eq_queuemx */
511 	thread_exit();
512 }
513 
514 /*
515  * Create the event delivery thread for an existing event queue.
516  */
517 static void
518 evch_evq_thrcreate(evch_eventq_t *eqp)
519 {
520 	kthread_t *thp;
521 
522 	thp = thread_create(NULL, 0, evch_delivery_thr, (char *)eqp, 0, &p0,
523 	    TS_RUN, minclsyspri);
524 	eqp->eq_thrid = thp->t_did;
525 }
526 
527 /*
528  * Create event queue.
529  */
530 static evch_eventq_t *
531 evch_evq_create()
532 {
533 	evch_eventq_t *p;
534 
535 	/* Allocate and initialize event queue descriptor */
536 	p = kmem_zalloc(sizeof (evch_eventq_t), KM_SLEEP);
537 	mutex_init(&p->eq_queuemx, NULL, MUTEX_DEFAULT, NULL);
538 	cv_init(&p->eq_thrsleepcv, NULL, CV_DEFAULT, NULL);
539 	evch_q_init(&p->eq_eventq);
540 	evch_dl_init(&p->eq_subscr);
541 	cv_init(&p->eq_dactivecv, NULL, CV_DEFAULT, NULL);
542 	cv_init(&p->eq_onholdcv, NULL, CV_DEFAULT, NULL);
543 
544 	/* Create delivery thread */
545 	if (evq_initcomplete) {
546 		evch_evq_thrcreate(p);
547 	}
548 	return (p);
549 }
550 
551 /*
552  * Destroy an event queue. All subscribers have to be unsubscribed prior to
553  * this call.
554  */
555 static void
556 evch_evq_destroy(evch_eventq_t *eqp)
557 {
558 	evch_qelem_t *qep;
559 
560 	ASSERT(evch_dl_getnum(&eqp->eq_subscr) == 0);
561 	/* Kill delivery thread */
562 	if (eqp->eq_thrid != NULL) {
563 		mutex_enter(&eqp->eq_queuemx);
564 		eqp->eq_tabortflag = 1;
565 		eqp->eq_holdmode = 0;
566 		cv_signal(&eqp->eq_thrsleepcv);
567 		mutex_exit(&eqp->eq_queuemx);
568 		thread_join(eqp->eq_thrid);
569 	}
570 
571 	/* Get rid of stale events in the event queue */
572 	while ((qep = (evch_qelem_t *)evch_q_out(&eqp->eq_eventq)) != NULL) {
573 		evch_gevent_free((evch_gevent_t *)qep->q_objref);
574 		kmem_free(qep, qep->q_objsize);
575 	}
576 
577 	/* Wrap up event queue structure */
578 	cv_destroy(&eqp->eq_onholdcv);
579 	cv_destroy(&eqp->eq_dactivecv);
580 	cv_destroy(&eqp->eq_thrsleepcv);
581 	evch_dl_fini(&eqp->eq_subscr);
582 	mutex_destroy(&eqp->eq_queuemx);
583 
584 	/* Free descriptor structure */
585 	kmem_free(eqp, sizeof (evch_eventq_t));
586 }
587 
588 /*
589  * Subscribe to an event queue. Every subscriber provides a filter callback
590  * routine and an event delivery callback routine.
591  */
592 static evch_evqsub_t *
593 evch_evq_sub(evch_eventq_t *eqp, filter_f filter, void *fcookie,
594     deliver_f callb, void *cbcookie)
595 {
596 	evch_evqsub_t *sp = kmem_zalloc(sizeof (evch_evqsub_t), KM_SLEEP);
597 
598 	/* Initialize subscriber structure */
599 	sp->su_filter = filter;
600 	sp->su_fcookie = fcookie;
601 	sp->su_callb = callb;
602 	sp->su_cbcookie = cbcookie;
603 
604 	/* Add subscription to queue */
605 	mutex_enter(&eqp->eq_queuemx);
606 	evch_dl_add(&eqp->eq_subscr, &sp->su_link);
607 	mutex_exit(&eqp->eq_queuemx);
608 	return (sp);
609 }
610 
611 /*
612  * Unsubscribe from an event queue.
613  */
614 static void
615 evch_evq_unsub(evch_eventq_t *eqp, evch_evqsub_t *sp)
616 {
617 	mutex_enter(&eqp->eq_queuemx);
618 
619 	/* Wait if delivery is just in progress */
620 	if (eqp->eq_dactive) {
621 		cv_wait(&eqp->eq_dactivecv, &eqp->eq_queuemx);
622 	}
623 	evch_dl_del(&eqp->eq_subscr, &sp->su_link);
624 	mutex_exit(&eqp->eq_queuemx);
625 	kmem_free(sp, sizeof (evch_evqsub_t));
626 }
627 
628 /*
629  * Publish an event. Returns 0 on success and -1 if memory alloc failed.
630  */
631 static int
632 evch_evq_pub(evch_eventq_t *eqp, void *ev, int flags)
633 {
634 	size_t size;
635 	evch_qelem_t	*qep;
636 	evch_gevent_t	*evp = GEVENT(ev);
637 
638 	size = sizeof (evch_qelem_t);
639 	if (flags & EVCH_TRYHARD) {
640 		qep = kmem_alloc_tryhard(size, &size, KM_NOSLEEP);
641 	} else {
642 		qep = kmem_alloc(size, flags & EVCH_NOSLEEP ?
643 		    KM_NOSLEEP : KM_SLEEP);
644 	}
645 	if (qep == NULL) {
646 		return (-1);
647 	}
648 	qep->q_objref = (void *)evp;
649 	qep->q_objsize = size;
650 	atomic_add_32(&evp->ge_refcount, 1);
651 	mutex_enter(&eqp->eq_queuemx);
652 	evch_q_in(&eqp->eq_eventq, qep);
653 
654 	/* Wakeup delivery thread */
655 	cv_signal(&eqp->eq_thrsleepcv);
656 	mutex_exit(&eqp->eq_queuemx);
657 	return (0);
658 }
659 
660 /*
661  * Enter hold mode of an event queue. Event delivery thread stops event
662  * handling after delivery of current event (if any).
663  */
664 static void
665 evch_evq_stop(evch_eventq_t *eqp)
666 {
667 	mutex_enter(&eqp->eq_queuemx);
668 	eqp->eq_holdmode = 1;
669 	if (evq_initcomplete) {
670 		cv_signal(&eqp->eq_thrsleepcv);
671 		cv_wait(&eqp->eq_onholdcv, &eqp->eq_queuemx);
672 	}
673 	mutex_exit(&eqp->eq_queuemx);
674 }
675 
676 /*
677  * Continue event delivery.
678  */
679 static void
680 evch_evq_continue(evch_eventq_t *eqp)
681 {
682 	mutex_enter(&eqp->eq_queuemx);
683 	eqp->eq_holdmode = 0;
684 	cv_signal(&eqp->eq_thrsleepcv);
685 	mutex_exit(&eqp->eq_queuemx);
686 }
687 
688 /*
689  * Returns status of delivery thread. 0 if running and 1 if on hold.
690  */
691 static int
692 evch_evq_status(evch_eventq_t *eqp)
693 {
694 	return (eqp->eq_holdmode);
695 }
696 
697 /*
698  * Add a destructor function to an event structure.
699  */
700 static void
701 evch_evq_evadd_dest(void *ev, destr_f destructor, void *cookie)
702 {
703 	evch_gevent_t *evp = GEVENT(ev);
704 
705 	evp->ge_destruct = destructor;
706 	evp->ge_dstcookie = cookie;
707 }
708 
709 /*
710  * Allocate evch_gevent_t structure. Return address of payload offset of
711  * evch_gevent_t.  If EVCH_TRYHARD allocation is requested, we use
712  * kmem_alloc_tryhard to alloc memory of at least paylsize bytes.
713  *
714  * If either memory allocation is unsuccessful, we return NULL.
715  */
716 static void *
717 evch_evq_evzalloc(size_t paylsize, int flag)
718 {
719 	evch_gevent_t	*evp;
720 	size_t		rsize, evsize, ge_size;
721 
722 	rsize = offsetof(evch_gevent_t, ge_payload) + paylsize;
723 	if (flag & EVCH_TRYHARD) {
724 		evp = kmem_alloc_tryhard(rsize, &evsize, KM_NOSLEEP);
725 		ge_size = evsize;
726 	} else {
727 		evp = kmem_alloc(rsize, flag & EVCH_NOSLEEP ? KM_NOSLEEP :
728 		    KM_SLEEP);
729 		ge_size = rsize;
730 	}
731 
732 	if (evp) {
733 		bzero(evp, rsize);
734 		evp->ge_size = ge_size;
735 		return (&evp->ge_payload);
736 	}
737 	return (evp);
738 }
739 
740 /*
741  * Free event structure. Argument ev is address of payload offset.
742  */
743 static void
744 evch_evq_evfree(void *ev)
745 {
746 	evch_gevent_free(GEVENT(ev));
747 }
748 
749 /*
750  * Iterate over all events in the event queue. Begin with an event
751  * which is currently being delivered. No mutexes are grabbed and no
752  * resources allocated so that this function can be called in panic
753  * context too. This function has to be called with ev == NULL initially.
754  * Actually argument ev is only a flag. Internally the member eq_nextev
755  * is used to determine the next event. But ev allows for the convenient
756  * use like
757  *	ev = NULL;
758  *	while ((ev = evch_evq_evnext(evp, ev)) != NULL) ...
759  */
760 static void *
761 evch_evq_evnext(evch_eventq_t *evq, void *ev)
762 {
763 	if (ev == NULL) {
764 		evq->eq_nextev = NULL;
765 		if (evq->eq_curevent != NULL)
766 			return (&evq->eq_curevent->ge_payload);
767 	}
768 	evq->eq_nextev = evch_q_next(&evq->eq_eventq, evq->eq_nextev);
769 	if (evq->eq_nextev == NULL)
770 		return (NULL);
771 	return (&((evch_gevent_t *)evq->eq_nextev->q_objref)->ge_payload);
772 }
773 
774 /*
775  * Channel handling functions. First some support functions. Functions belonging
776  * to the channel handling interface start with evch_ch. The following functions
777  * make up the channel handling internal interfaces:
778  *
779  * evch_chinit		- Initialize channel handling
780  * evch_chinitthr	- Second step init: initialize threads
781  * evch_chbind		- Bind to a channel
782  * evch_chunbind	- Unbind from a channel
783  * evch_chsubscribe	- Subscribe to a sysevent class
784  * evch_chunsubscribe	- Unsubscribe
785  * evch_chpublish	- Publish an event
786  * evch_chgetnames	- Get names of all channels
787  * evch_chgetchdata	- Get data of a channel
788  * evch_chrdevent_init  - Init event q traversal
789  * evch_chgetnextev	- Read out events queued for a subscriber
790  * evch_chrdevent_fini  - Finish event q traversal
791  */
792 
793 /*
794  * Compare channel name. Used for evch_dl_search to find a channel with the
795  * name s.
796  */
797 static int
798 evch_namecmp(evch_dlelem_t *ep, char *s)
799 {
800 	return (strcmp(((evch_chan_t *)ep)->ch_name, s));
801 }
802 
803 /*
804  * Simple wildcarded match test of event class string 'class' to
805  * wildcarded subscription string 'pat'.  Recursive only if
806  * 'pat' includes a wildcard, otherwise essentially just strcmp.
807  */
808 static int
809 evch_clsmatch(char *class, const char *pat)
810 {
811 	char c;
812 
813 	do {
814 		if ((c = *pat++) == '\0')
815 			return (*class == '\0');
816 
817 		if (c == '*') {
818 			while (*pat == '*')
819 				pat++; /* consecutive *'s can be collapsed */
820 
821 			if (*pat == '\0')
822 				return (1);
823 
824 			while (*class != '\0') {
825 				if (evch_clsmatch(class++, pat) != 0)
826 					return (1);
827 			}
828 
829 			return (0);
830 		}
831 	} while (c == *class++);
832 
833 	return (0);
834 }
835 
836 /*
837  * Sysevent filter callback routine. Enables event delivery only if it matches
838  * the event class pattern string given by parameter cookie.
839  */
840 static int
841 evch_class_filter(void *ev, void *cookie)
842 {
843 	const char *pat = (const char *)cookie;
844 
845 	if (pat == NULL || evch_clsmatch(SE_CLASS_NAME(ev), pat))
846 		return (EVQ_DELIVER);
847 
848 	return (EVQ_IGNORE);
849 }
850 
851 /*
852  * Callback routine to propagate the event into a per subscriber queue.
853  */
854 static int
855 evch_subq_deliver(void *evp, void *cookie)
856 {
857 	evch_subd_t *p = (evch_subd_t *)cookie;
858 
859 	(void) evch_evq_pub(p->sd_queue, evp, EVCH_SLEEP);
860 	return (EVQ_CONT);
861 }
862 
863 /*
864  * Call kernel callback routine for sysevent kernel delivery.
865  */
866 static int
867 evch_kern_deliver(void *evp, void *cookie)
868 {
869 	sysevent_impl_t	*ev = (sysevent_impl_t *)evp;
870 	evch_subd_t	*sdp = (evch_subd_t *)cookie;
871 
872 	return (sdp->sd_callback(ev, sdp->sd_cbcookie));
873 }
874 
875 /*
876  * Door upcall for user land sysevent delivery.
877  */
878 static int
879 evch_door_deliver(void *evp, void *cookie)
880 {
881 	int		error;
882 	size_t		size;
883 	sysevent_impl_t	*ev = (sysevent_impl_t *)evp;
884 	door_arg_t	darg;
885 	evch_subd_t	*sdp = (evch_subd_t *)cookie;
886 	int		nticks = EVCH_MIN_PAUSE;
887 	uint32_t	retval;
888 	int		retry = 20;
889 
890 	/* Initialize door args */
891 	size = sizeof (sysevent_impl_t) + SE_PAYLOAD_SZ(ev);
892 
893 	darg.rbuf = (char *)&retval;
894 	darg.rsize = sizeof (retval);
895 	darg.data_ptr = (char *)ev;
896 	darg.data_size = size;
897 	darg.desc_ptr = NULL;
898 	darg.desc_num = 0;
899 
900 	for (;;) {
901 		if ((error = door_ki_upcall_limited(sdp->sd_door, &darg,
902 		    NULL, SIZE_MAX, 0)) == 0) {
903 			break;
904 		}
905 		switch (error) {
906 		case EAGAIN:
907 			/* Cannot deliver event - process may be forking */
908 			delay(nticks);
909 			nticks <<= 1;
910 			if (nticks > EVCH_MAX_PAUSE) {
911 				nticks = EVCH_MAX_PAUSE;
912 			}
913 			if (retry-- <= 0) {
914 				cmn_err(CE_CONT, "event delivery thread: "
915 				    "door_ki_upcall error EAGAIN\n");
916 				return (EVQ_CONT);
917 			}
918 			break;
919 		case EINTR:
920 		case EBADF:
921 			/* Process died */
922 			return (EVQ_SLEEP);
923 		default:
924 			cmn_err(CE_CONT,
925 			    "event delivery thread: door_ki_upcall error %d\n",
926 			    error);
927 			return (EVQ_CONT);
928 		}
929 	}
930 	if (retval == EAGAIN) {
931 		return (EVQ_AGAIN);
932 	}
933 	return (EVQ_CONT);
934 }
935 
936 /*
937  * Callback routine for evch_dl_search() to compare subscriber id's. Used by
938  * evch_subscribe() and evch_chrdevent_init().
939  */
940 static int
941 evch_subidcmp(evch_dlelem_t *ep, char *s)
942 {
943 	return (strcmp(((evch_subd_t *)ep)->sd_ident, s));
944 }
945 
946 /*
947  * Callback routine for evch_dl_search() to find a subscriber with EVCH_SUB_DUMP
948  * set (indicated by sub->sd_dump != 0). Used by evch_chrdevent_init() and
949  * evch_subscribe(). Needs to returns 0 if subscriber with sd_dump set is
950  * found.
951  */
952 /*ARGSUSED1*/
953 static int
954 evch_dumpflgcmp(evch_dlelem_t *ep, char *s)
955 {
956 	return (((evch_subd_t *)ep)->sd_dump ? 0 : 1);
957 }
958 
959 /*
960  * Event destructor function. Used to maintain the number of events per channel.
961  */
962 /*ARGSUSED*/
963 static void
964 evch_destr_event(void *ev, void *ch)
965 {
966 	evch_chan_t *chp = (evch_chan_t *)ch;
967 
968 	mutex_enter(&chp->ch_pubmx);
969 	chp->ch_nevents--;
970 	cv_signal(&chp->ch_pubcv);
971 	mutex_exit(&chp->ch_pubmx);
972 }
973 
974 /*
975  * Integer square root according to Newton's iteration.
976  */
977 static uint32_t
978 evch_isqrt(uint64_t n)
979 {
980 	uint64_t	x = n >> 1;
981 	uint64_t	xn = x - 1;
982 	static uint32_t	lowval[] = { 0, 1, 1, 2 };
983 
984 	if (n < 4) {
985 		return (lowval[n]);
986 	}
987 	while (xn < x) {
988 		x = xn;
989 		xn = (x + n / x) / 2;
990 	}
991 	return ((uint32_t)xn);
992 }
993 
994 /*
995  * First step sysevent channel initialization. Called when kernel memory
996  * allocator is initialized.
997  */
998 static void
999 evch_chinit()
1000 {
1001 	size_t k;
1002 
1003 	/*
1004 	 * Calculate limits: max no of channels and max no of events per
1005 	 * channel. The smallest machine with 128 MByte will allow for
1006 	 * >= 8 channels and an upper limit of 2048 events per channel.
1007 	 * The event limit is the number of channels times 256 (hence
1008 	 * the shift factor of 8). These number where selected arbitrarily.
1009 	 */
1010 	k = kmem_maxavail() >> 20;
1011 	evch_channels_max = min(evch_isqrt(k), EVCH_MAX_CHANNELS);
1012 	evch_events_max = evch_channels_max << 8;
1013 
1014 	/*
1015 	 * Will trigger creation of the global zone's evch state.
1016 	 */
1017 	zone_key_create(&evch_zone_key, evch_zoneinit, NULL, evch_zonefree);
1018 }
1019 
1020 /*
1021  * Second step sysevent channel initialization. Called when threads are ready.
1022  */
1023 static void
1024 evch_chinitthr()
1025 {
1026 	struct evch_globals *eg;
1027 	evch_chan_t	*chp;
1028 	evch_subd_t	*sdp;
1029 
1030 	/*
1031 	 * We're early enough in boot that we know that only the global
1032 	 * zone exists; we only need to initialize its threads.
1033 	 */
1034 	eg = zone_getspecific(evch_zone_key, global_zone);
1035 	ASSERT(eg != NULL);
1036 
1037 	for (chp = evch_dl_next(&eg->evch_list, NULL); chp != NULL;
1038 	    chp = evch_dl_next(&eg->evch_list, chp)) {
1039 		for (sdp = evch_dl_next(&chp->ch_subscr, NULL); sdp;
1040 		    sdp = evch_dl_next(&chp->ch_subscr, sdp)) {
1041 			evch_evq_thrcreate(sdp->sd_queue);
1042 		}
1043 		evch_evq_thrcreate(chp->ch_queue);
1044 	}
1045 	evq_initcomplete = 1;
1046 }
1047 
1048 /*
1049  * Sysevent channel bind. Create channel and allocate binding structure.
1050  */
1051 static int
1052 evch_chbind(const char *chnam, evch_bind_t **scpp, uint32_t flags)
1053 {
1054 	struct evch_globals *eg;
1055 	evch_bind_t	*bp;
1056 	evch_chan_t	*p;
1057 	char		*chn;
1058 	size_t		namlen;
1059 	int		rv;
1060 
1061 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1062 	ASSERT(eg != NULL);
1063 
1064 	/* Create channel if it does not exist */
1065 	ASSERT(evch_dl_is_init(&eg->evch_list));
1066 	if ((namlen = strlen(chnam) + 1) > MAX_CHNAME_LEN) {
1067 		return (EINVAL);
1068 	}
1069 	mutex_enter(&eg->evch_list_lock);
1070 	if ((p = (evch_chan_t *)evch_dl_search(&eg->evch_list, evch_namecmp,
1071 	    (char *)chnam)) == NULL) {
1072 		if (flags & EVCH_CREAT) {
1073 			if (evch_dl_getnum(&eg->evch_list) >=
1074 			    evch_channels_max) {
1075 				mutex_exit(&eg->evch_list_lock);
1076 				return (ENOMEM);
1077 			}
1078 			chn = kmem_alloc(namlen, KM_SLEEP);
1079 			bcopy(chnam, chn, namlen);
1080 
1081 			/* Allocate and initialize channel descriptor */
1082 			p = kmem_zalloc(sizeof (evch_chan_t), KM_SLEEP);
1083 			p->ch_name = chn;
1084 			p->ch_namelen = namlen;
1085 			mutex_init(&p->ch_mutex, NULL, MUTEX_DEFAULT, NULL);
1086 			p->ch_queue = evch_evq_create();
1087 			evch_dl_init(&p->ch_subscr);
1088 			if (evq_initcomplete) {
1089 				p->ch_uid = crgetuid(curthread->t_cred);
1090 				p->ch_gid = crgetgid(curthread->t_cred);
1091 			}
1092 			cv_init(&p->ch_pubcv, NULL, CV_DEFAULT, NULL);
1093 			mutex_init(&p->ch_pubmx, NULL, MUTEX_DEFAULT, NULL);
1094 			p->ch_maxev = min(EVCH_DEFAULT_EVENTS, evch_events_max);
1095 			p->ch_maxsubscr = EVCH_MAX_SUBSCRIPTIONS;
1096 			p->ch_maxbinds = evch_bindings_max;
1097 			p->ch_ctime = gethrestime_sec();
1098 
1099 			if (flags & (EVCH_HOLD_PEND | EVCH_HOLD_PEND_INDEF)) {
1100 				if (flags & EVCH_HOLD_PEND_INDEF)
1101 					p->ch_holdpend = CH_HOLD_PEND_INDEF;
1102 				else
1103 					p->ch_holdpend = CH_HOLD_PEND;
1104 
1105 				evch_evq_stop(p->ch_queue);
1106 			}
1107 
1108 			/* Put new descriptor into channel list */
1109 			evch_dl_add(&eg->evch_list, (evch_dlelem_t *)p);
1110 		} else {
1111 			mutex_exit(&eg->evch_list_lock);
1112 			return (ENOENT);
1113 		}
1114 	}
1115 
1116 	/* Check for max binds and create binding */
1117 	mutex_enter(&p->ch_mutex);
1118 	if (p->ch_bindings >= p->ch_maxbinds) {
1119 		rv = ENOMEM;
1120 		/*
1121 		 * No need to destroy the channel because this call did not
1122 		 * create it. Other bindings will be present if ch_maxbinds
1123 		 * is exceeded.
1124 		 */
1125 		goto errorexit;
1126 	}
1127 	bp = kmem_alloc(sizeof (evch_bind_t), KM_SLEEP);
1128 	bp->bd_channel = p;
1129 	bp->bd_sublst = NULL;
1130 	p->ch_bindings++;
1131 	rv = 0;
1132 	*scpp = bp;
1133 errorexit:
1134 	mutex_exit(&p->ch_mutex);
1135 	mutex_exit(&eg->evch_list_lock);
1136 	return (rv);
1137 }
1138 
1139 /*
1140  * Unbind: Free bind structure. Remove channel if last binding was freed.
1141  */
1142 static void
1143 evch_chunbind(evch_bind_t *bp)
1144 {
1145 	struct evch_globals *eg;
1146 	evch_chan_t *chp = bp->bd_channel;
1147 
1148 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1149 	ASSERT(eg != NULL);
1150 
1151 	mutex_enter(&eg->evch_list_lock);
1152 	mutex_enter(&chp->ch_mutex);
1153 	ASSERT(chp->ch_bindings > 0);
1154 	chp->ch_bindings--;
1155 	kmem_free(bp, sizeof (evch_bind_t));
1156 	if (chp->ch_bindings == 0 && evch_dl_getnum(&chp->ch_subscr) == 0 &&
1157 	    (chp->ch_nevents == 0 || chp->ch_holdpend != CH_HOLD_PEND_INDEF)) {
1158 		/*
1159 		 * No more bindings and no persistent subscriber(s).  If there
1160 		 * are no events in the channel then destroy the channel;
1161 		 * otherwise destroy the channel only if we're not holding
1162 		 * pending events indefinitely.
1163 		 */
1164 		mutex_exit(&chp->ch_mutex);
1165 		evch_dl_del(&eg->evch_list, &chp->ch_link);
1166 		evch_evq_destroy(chp->ch_queue);
1167 		if (chp->ch_propnvl)
1168 			nvlist_free(chp->ch_propnvl);
1169 		mutex_destroy(&chp->ch_mutex);
1170 		mutex_destroy(&chp->ch_pubmx);
1171 		cv_destroy(&chp->ch_pubcv);
1172 		kmem_free(chp->ch_name, chp->ch_namelen);
1173 		kmem_free(chp, sizeof (evch_chan_t));
1174 	} else
1175 		mutex_exit(&chp->ch_mutex);
1176 	mutex_exit(&eg->evch_list_lock);
1177 }
1178 
1179 static int
1180 wildcard_count(const char *class)
1181 {
1182 	int count = 0;
1183 	char c;
1184 
1185 	if (class == NULL)
1186 		return (0);
1187 
1188 	while ((c = *class++) != '\0') {
1189 		if (c == '*')
1190 			count++;
1191 	}
1192 
1193 	return (count);
1194 }
1195 
1196 /*
1197  * Subscribe to a channel. dtype is either EVCH_DELKERN for kernel callbacks
1198  * or EVCH_DELDOOR for door upcall delivery to user land. Depending on dtype
1199  * dinfo gives the call back routine address or the door handle.
1200  */
1201 static int
1202 evch_chsubscribe(evch_bind_t *bp, int dtype, const char *sid, const char *class,
1203     void *dinfo, void *cookie, int flags, pid_t pid)
1204 {
1205 	evch_chan_t	*chp = bp->bd_channel;
1206 	evch_eventq_t	*eqp = chp->ch_queue;
1207 	evch_subd_t	*sdp;
1208 	evch_subd_t	*esp;
1209 	int		(*delivfkt)();
1210 	char		*clb = NULL;
1211 	int		clblen = 0;
1212 	char		*subid;
1213 	int		subidblen;
1214 
1215 	/*
1216 	 * Check if only known flags are set.
1217 	 */
1218 	if (flags & ~(EVCH_SUB_KEEP | EVCH_SUB_DUMP))
1219 		return (EINVAL);
1220 
1221 	/*
1222 	 * Enforce a limit on the number of wildcards allowed in the class
1223 	 * subscription string (limits recursion in pattern matching).
1224 	 */
1225 	if (wildcard_count(class) > EVCH_WILDCARD_MAX)
1226 		return (EINVAL);
1227 
1228 	/*
1229 	 * Check if we have already a subscription with that name and if we
1230 	 * have to reconnect the subscriber to a persistent subscription.
1231 	 */
1232 	mutex_enter(&chp->ch_mutex);
1233 	if ((esp = (evch_subd_t *)evch_dl_search(&chp->ch_subscr,
1234 	    evch_subidcmp, (char *)sid)) != NULL) {
1235 		int error = 0;
1236 		if ((flags & EVCH_SUB_KEEP) && (esp->sd_active == 0)) {
1237 			/*
1238 			 * Subscription with the name on hold, reconnect to
1239 			 * existing queue.
1240 			 */
1241 			ASSERT(dtype == EVCH_DELDOOR);
1242 			esp->sd_subnxt = bp->bd_sublst;
1243 			bp->bd_sublst = esp;
1244 			esp->sd_pid = pid;
1245 			esp->sd_door = (door_handle_t)dinfo;
1246 			esp->sd_active++;
1247 			evch_evq_continue(esp->sd_queue);
1248 		} else {
1249 			/* Subscriber with given name already exists */
1250 			error = EEXIST;
1251 		}
1252 		mutex_exit(&chp->ch_mutex);
1253 		return (error);
1254 	}
1255 
1256 	if (evch_dl_getnum(&chp->ch_subscr) >= chp->ch_maxsubscr) {
1257 		mutex_exit(&chp->ch_mutex);
1258 		return (ENOMEM);
1259 	}
1260 
1261 	if (flags & EVCH_SUB_DUMP && evch_dl_search(&chp->ch_subscr,
1262 	    evch_dumpflgcmp, NULL) != NULL) {
1263 		/*
1264 		 * Subscription with EVCH_SUB_DUMP flagged already exists.
1265 		 * Only one subscription with EVCH_SUB_DUMP possible. Return
1266 		 * error.
1267 		 */
1268 		mutex_exit(&chp->ch_mutex);
1269 		return (EINVAL);
1270 	}
1271 
1272 	if (class != NULL) {
1273 		clblen = strlen(class) + 1;
1274 		clb = kmem_alloc(clblen, KM_SLEEP);
1275 		bcopy(class, clb, clblen);
1276 	}
1277 
1278 	subidblen = strlen(sid) + 1;
1279 	subid = kmem_alloc(subidblen, KM_SLEEP);
1280 	bcopy(sid, subid, subidblen);
1281 
1282 	/* Create per subscriber queue */
1283 	sdp = kmem_zalloc(sizeof (evch_subd_t), KM_SLEEP);
1284 	sdp->sd_queue = evch_evq_create();
1285 
1286 	/* Subscribe to subscriber queue */
1287 	sdp->sd_persist = flags & EVCH_SUB_KEEP ? 1 : 0;
1288 	sdp->sd_dump = flags & EVCH_SUB_DUMP ? 1 : 0;
1289 	sdp->sd_type = dtype;
1290 	sdp->sd_cbcookie = cookie;
1291 	sdp->sd_ident = subid;
1292 	if (dtype == EVCH_DELKERN) {
1293 		sdp->sd_callback = (kerndlv_f)dinfo;
1294 		delivfkt = evch_kern_deliver;
1295 	} else {
1296 		sdp->sd_door = (door_handle_t)dinfo;
1297 		delivfkt = evch_door_deliver;
1298 	}
1299 	sdp->sd_ssub =
1300 	    evch_evq_sub(sdp->sd_queue, NULL, NULL, delivfkt, (void *)sdp);
1301 
1302 	/* Connect per subscriber queue to main event queue */
1303 	sdp->sd_msub = evch_evq_sub(eqp, evch_class_filter, clb,
1304 	    evch_subq_deliver, (void *)sdp);
1305 	sdp->sd_classname = clb;
1306 	sdp->sd_clnsize = clblen;
1307 	sdp->sd_pid = pid;
1308 	sdp->sd_active++;
1309 
1310 	/* Add subscription to binding */
1311 	sdp->sd_subnxt = bp->bd_sublst;
1312 	bp->bd_sublst = sdp;
1313 
1314 	/* Add subscription to channel */
1315 	evch_dl_add(&chp->ch_subscr, &sdp->sd_link);
1316 	if (chp->ch_holdpend && evch_dl_getnum(&chp->ch_subscr) == 1) {
1317 
1318 		/* Let main event queue run in case of HOLDPEND */
1319 		evch_evq_continue(eqp);
1320 	}
1321 	mutex_exit(&chp->ch_mutex);
1322 
1323 	return (0);
1324 }
1325 
1326 /*
1327  * If flag == EVCH_SUB_KEEP only non-persistent subscriptions are deleted.
1328  * When sid == NULL all subscriptions except the ones with EVCH_SUB_KEEP set
1329  * are removed.
1330  */
1331 static void
1332 evch_chunsubscribe(evch_bind_t *bp, const char *sid, uint32_t flags)
1333 {
1334 	evch_subd_t	*sdp;
1335 	evch_subd_t	*next;
1336 	evch_subd_t	*prev;
1337 	evch_chan_t	*chp = bp->bd_channel;
1338 
1339 	mutex_enter(&chp->ch_mutex);
1340 	if (chp->ch_holdpend) {
1341 		evch_evq_stop(chp->ch_queue);	/* Hold main event queue */
1342 	}
1343 	prev = NULL;
1344 	for (sdp = bp->bd_sublst; sdp; sdp = next) {
1345 		if (sid == NULL || strcmp(sid, sdp->sd_ident) == 0) {
1346 			if (flags == 0 || sdp->sd_persist == 0) {
1347 				/*
1348 				 * Disconnect subscriber queue from main event
1349 				 * queue.
1350 				 */
1351 				evch_evq_unsub(chp->ch_queue, sdp->sd_msub);
1352 
1353 				/* Destruct per subscriber queue */
1354 				evch_evq_unsub(sdp->sd_queue, sdp->sd_ssub);
1355 				evch_evq_destroy(sdp->sd_queue);
1356 				/*
1357 				 * Eliminate the subscriber data from channel
1358 				 * list.
1359 				 */
1360 				evch_dl_del(&chp->ch_subscr, &sdp->sd_link);
1361 				kmem_free(sdp->sd_classname, sdp->sd_clnsize);
1362 				if (sdp->sd_type == EVCH_DELDOOR) {
1363 					door_ki_rele(sdp->sd_door);
1364 				}
1365 				next = sdp->sd_subnxt;
1366 				if (prev) {
1367 					prev->sd_subnxt = next;
1368 				} else {
1369 					bp->bd_sublst = next;
1370 				}
1371 				kmem_free(sdp->sd_ident,
1372 				    strlen(sdp->sd_ident) + 1);
1373 				kmem_free(sdp, sizeof (evch_subd_t));
1374 			} else {
1375 				/*
1376 				 * EVCH_SUB_KEEP case
1377 				 */
1378 				evch_evq_stop(sdp->sd_queue);
1379 				if (sdp->sd_type == EVCH_DELDOOR) {
1380 					door_ki_rele(sdp->sd_door);
1381 				}
1382 				sdp->sd_active--;
1383 				ASSERT(sdp->sd_active == 0);
1384 				next = sdp->sd_subnxt;
1385 				prev = sdp;
1386 			}
1387 			if (sid != NULL) {
1388 				break;
1389 			}
1390 		} else {
1391 			next = sdp->sd_subnxt;
1392 			prev = sdp;
1393 		}
1394 	}
1395 	if (!(chp->ch_holdpend && evch_dl_getnum(&chp->ch_subscr) == 0)) {
1396 		/*
1397 		 * Continue dispatch thread except if no subscribers are present
1398 		 * in HOLDPEND mode.
1399 		 */
1400 		evch_evq_continue(chp->ch_queue);
1401 	}
1402 	mutex_exit(&chp->ch_mutex);
1403 }
1404 
1405 /*
1406  * Publish an event. Returns zero on success and an error code else.
1407  */
1408 static int
1409 evch_chpublish(evch_bind_t *bp, sysevent_impl_t *ev, int flags)
1410 {
1411 	evch_chan_t *chp = bp->bd_channel;
1412 
1413 	DTRACE_SYSEVENT2(post, evch_bind_t *, bp, sysevent_impl_t *, ev);
1414 
1415 	mutex_enter(&chp->ch_pubmx);
1416 	if (chp->ch_nevents >= chp->ch_maxev) {
1417 		if (!(flags & EVCH_QWAIT)) {
1418 			evch_evq_evfree(ev);
1419 			mutex_exit(&chp->ch_pubmx);
1420 			return (EAGAIN);
1421 		} else {
1422 			while (chp->ch_nevents >= chp->ch_maxev) {
1423 				if (cv_wait_sig(&chp->ch_pubcv,
1424 				    &chp->ch_pubmx) == 0) {
1425 
1426 					/* Got Signal, return EINTR */
1427 					evch_evq_evfree(ev);
1428 					mutex_exit(&chp->ch_pubmx);
1429 					return (EINTR);
1430 				}
1431 			}
1432 		}
1433 	}
1434 	chp->ch_nevents++;
1435 	mutex_exit(&chp->ch_pubmx);
1436 	SE_TIME(ev) = gethrtime();
1437 	SE_SEQ(ev) = log_sysevent_new_id();
1438 	/*
1439 	 * Add the destructor function to the event structure, now that the
1440 	 * event is accounted for. The only task of the descructor is to
1441 	 * decrement the channel event count. The evq_*() routines (including
1442 	 * the event delivery thread) do not have knowledge of the channel
1443 	 * data. So the anonymous destructor handles the channel data for it.
1444 	 */
1445 	evch_evq_evadd_dest(ev, evch_destr_event, (void *)chp);
1446 	return (evch_evq_pub(chp->ch_queue, ev, flags) == 0 ? 0 : EAGAIN);
1447 }
1448 
1449 /*
1450  * Fills a buffer consecutive with the names of all available channels.
1451  * Returns the length of all name strings or -1 if buffer size was unsufficient.
1452  */
1453 static int
1454 evch_chgetnames(char *buf, size_t size)
1455 {
1456 	struct evch_globals *eg;
1457 	int		len = 0;
1458 	char		*addr = buf;
1459 	int		max = size;
1460 	evch_chan_t	*chp;
1461 
1462 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1463 	ASSERT(eg != NULL);
1464 
1465 	mutex_enter(&eg->evch_list_lock);
1466 	for (chp = evch_dl_next(&eg->evch_list, NULL); chp != NULL;
1467 	    chp = evch_dl_next(&eg->evch_list, chp)) {
1468 		len += chp->ch_namelen;
1469 		if (len >= max) {
1470 			mutex_exit(&eg->evch_list_lock);
1471 			return (-1);
1472 		}
1473 		bcopy(chp->ch_name, addr, chp->ch_namelen);
1474 		addr += chp->ch_namelen;
1475 	}
1476 	mutex_exit(&eg->evch_list_lock);
1477 	addr[0] = 0;
1478 	return (len + 1);
1479 }
1480 
1481 /*
1482  * Fills the data of one channel and all subscribers of that channel into
1483  * a buffer. Returns -1 if the channel name is invalid and 0 on buffer overflow.
1484  */
1485 static int
1486 evch_chgetchdata(char *chname, void *buf, size_t size)
1487 {
1488 	struct evch_globals *eg;
1489 	char		*cpaddr;
1490 	int		bufmax;
1491 	int		buflen;
1492 	evch_chan_t	*chp;
1493 	sev_chinfo_t	*p = (sev_chinfo_t *)buf;
1494 	int		chdlen;
1495 	evch_subd_t	*sdp;
1496 	sev_subinfo_t	*subp;
1497 	int		idlen;
1498 	int		len;
1499 
1500 	eg = zone_getspecific(evch_zone_key, curproc->p_zone);
1501 	ASSERT(eg != NULL);
1502 
1503 	mutex_enter(&eg->evch_list_lock);
1504 	chp = (evch_chan_t *)evch_dl_search(&eg->evch_list, evch_namecmp,
1505 	    chname);
1506 	if (chp == NULL) {
1507 		mutex_exit(&eg->evch_list_lock);
1508 		return (-1);
1509 	}
1510 	chdlen = offsetof(sev_chinfo_t, cd_subinfo);
1511 	if (size < chdlen) {
1512 		mutex_exit(&eg->evch_list_lock);
1513 		return (0);
1514 	}
1515 	p->cd_version = 0;
1516 	p->cd_suboffs = chdlen;
1517 	p->cd_uid = chp->ch_uid;
1518 	p->cd_gid = chp->ch_gid;
1519 	p->cd_perms = 0;
1520 	p->cd_ctime = chp->ch_ctime;
1521 	p->cd_maxev = chp->ch_maxev;
1522 	p->cd_evhwm = EVCH_EVQ_HIGHWM(chp->ch_queue);
1523 	p->cd_nevents = EVCH_EVQ_EVCOUNT(chp->ch_queue);
1524 	p->cd_maxsub = chp->ch_maxsubscr;
1525 	p->cd_nsub = evch_dl_getnum(&chp->ch_subscr);
1526 	p->cd_maxbinds = chp->ch_maxbinds;
1527 	p->cd_nbinds = chp->ch_bindings;
1528 	p->cd_holdpend = chp->ch_holdpend;
1529 	p->cd_limev = evch_events_max;
1530 	cpaddr = (char *)p + chdlen;
1531 	bufmax = size - chdlen;
1532 	buflen = 0;
1533 
1534 	for (sdp = evch_dl_next(&chp->ch_subscr, NULL); sdp != NULL;
1535 	    sdp = evch_dl_next(&chp->ch_subscr, sdp)) {
1536 		idlen = strlen(sdp->sd_ident) + 1;
1537 		len = SE_ALIGN(offsetof(sev_subinfo_t, sb_strings) + idlen +
1538 		    sdp->sd_clnsize);
1539 		buflen += len;
1540 		if (buflen >= bufmax) {
1541 			mutex_exit(&eg->evch_list_lock);
1542 			return (0);
1543 		}
1544 		subp = (sev_subinfo_t *)cpaddr;
1545 		subp->sb_nextoff = len;
1546 		subp->sb_stroff = offsetof(sev_subinfo_t, sb_strings);
1547 		if (sdp->sd_classname) {
1548 			bcopy(sdp->sd_classname, subp->sb_strings + idlen,
1549 			    sdp->sd_clnsize);
1550 			subp->sb_clnamoff = idlen;
1551 		} else {
1552 			subp->sb_clnamoff = idlen - 1;
1553 		}
1554 		subp->sb_pid = sdp->sd_pid;
1555 		subp->sb_nevents = EVCH_EVQ_EVCOUNT(sdp->sd_queue);
1556 		subp->sb_evhwm = EVCH_EVQ_HIGHWM(sdp->sd_queue);
1557 		subp->sb_persist = sdp->sd_persist;
1558 		subp->sb_status = evch_evq_status(sdp->sd_queue);
1559 		subp->sb_active = sdp->sd_active;
1560 		subp->sb_dump = sdp->sd_dump;
1561 		bcopy(sdp->sd_ident, subp->sb_strings, idlen);
1562 		cpaddr += len;
1563 	}
1564 	mutex_exit(&eg->evch_list_lock);
1565 	return (chdlen + buflen);
1566 }
1567 
1568 static void
1569 evch_chsetpropnvl(evch_bind_t *bp, nvlist_t *nvl)
1570 {
1571 	evch_chan_t *chp = bp->bd_channel;
1572 
1573 	mutex_enter(&chp->ch_mutex);
1574 
1575 	if (chp->ch_propnvl)
1576 		nvlist_free(chp->ch_propnvl);
1577 
1578 	chp->ch_propnvl = nvl;
1579 	chp->ch_propnvlgen++;
1580 
1581 	mutex_exit(&chp->ch_mutex);
1582 }
1583 
1584 static int
1585 evch_chgetpropnvl(evch_bind_t *bp, nvlist_t **nvlp, int64_t *genp)
1586 {
1587 	evch_chan_t *chp = bp->bd_channel;
1588 	int rc = 0;
1589 
1590 	mutex_enter(&chp->ch_mutex);
1591 
1592 	if (chp->ch_propnvl != NULL)
1593 		rc = (nvlist_dup(chp->ch_propnvl, nvlp, 0) == 0) ? 0 : ENOMEM;
1594 	else
1595 		*nvlp = NULL;	/* rc still 0 */
1596 
1597 	if (genp)
1598 		*genp = chp->ch_propnvlgen;
1599 
1600 	mutex_exit(&chp->ch_mutex);
1601 
1602 	if (rc != 0)
1603 		*nvlp = NULL;
1604 
1605 	return (rc);
1606 
1607 }
1608 
1609 /*
1610  * Init iteration of all events of a channel. This function creates a new
1611  * event queue and puts all events from the channel into that queue.
1612  * Subsequent calls to evch_chgetnextev will deliver the events from that
1613  * queue. Only one thread per channel is allowed to read through the events.
1614  * Returns 0 on success and 1 if there is already someone reading the
1615  * events.
1616  * If argument subid == NULL, we look for a subscriber which has
1617  * flag EVCH_SUB_DUMP set.
1618  */
1619 /*
1620  * Static variables that are used to traverse events of a channel in panic case.
1621  */
1622 static evch_chan_t	*evch_chan;
1623 static evch_eventq_t	*evch_subq;
1624 static sysevent_impl_t	*evch_curev;
1625 
1626 static evchanq_t *
1627 evch_chrdevent_init(evch_chan_t *chp, char *subid)
1628 {
1629 	evch_subd_t	*sdp;
1630 	void		*ev;
1631 	int		pmqstat;	/* Prev status of main queue */
1632 	int		psqstat;	/* Prev status of subscriber queue */
1633 	evchanq_t	*snp;		/* Pointer to q with snapshot of ev */
1634 	compare_f	compfunc;
1635 
1636 	compfunc = subid == NULL ? evch_dumpflgcmp : evch_subidcmp;
1637 	if (panicstr != NULL) {
1638 		evch_chan = chp;
1639 		evch_subq = NULL;
1640 		evch_curev = NULL;
1641 		if ((sdp = (evch_subd_t *)evch_dl_search(&chp->ch_subscr,
1642 		    compfunc, subid)) != NULL) {
1643 			evch_subq = sdp->sd_queue;
1644 		}
1645 		return (NULL);
1646 	}
1647 	mutex_enter(&chp->ch_mutex);
1648 	sdp = (evch_subd_t *)evch_dl_search(&chp->ch_subscr, compfunc, subid);
1649 	/*
1650 	 * Stop main event queue and subscriber queue if not already
1651 	 * in stop mode.
1652 	 */
1653 	pmqstat = evch_evq_status(chp->ch_queue);
1654 	if (pmqstat == 0)
1655 		evch_evq_stop(chp->ch_queue);
1656 	if (sdp != NULL) {
1657 		psqstat = evch_evq_status(sdp->sd_queue);
1658 		if (psqstat == 0)
1659 			evch_evq_stop(sdp->sd_queue);
1660 	}
1661 	/*
1662 	 * Create event queue to make a snapshot of all events in the
1663 	 * channel.
1664 	 */
1665 	snp = kmem_alloc(sizeof (evchanq_t), KM_SLEEP);
1666 	snp->sn_queue = evch_evq_create();
1667 	evch_evq_stop(snp->sn_queue);
1668 	/*
1669 	 * Make a snapshot of the subscriber queue and the main event queue.
1670 	 */
1671 	if (sdp != NULL) {
1672 		ev = NULL;
1673 		while ((ev = evch_evq_evnext(sdp->sd_queue, ev)) != NULL) {
1674 			(void) evch_evq_pub(snp->sn_queue, ev, EVCH_SLEEP);
1675 		}
1676 	}
1677 	ev = NULL;
1678 	while ((ev = evch_evq_evnext(chp->ch_queue, ev)) != NULL) {
1679 		(void) evch_evq_pub(snp->sn_queue, ev, EVCH_SLEEP);
1680 	}
1681 	snp->sn_nxtev = NULL;
1682 	/*
1683 	 * Restart main and subscriber queue if previously stopped
1684 	 */
1685 	if (sdp != NULL && psqstat == 0)
1686 		evch_evq_continue(sdp->sd_queue);
1687 	if (pmqstat == 0)
1688 		evch_evq_continue(chp->ch_queue);
1689 	mutex_exit(&chp->ch_mutex);
1690 	return (snp);
1691 }
1692 
1693 /*
1694  * Free all resources of the event queue snapshot. In case of panic
1695  * context snp must be NULL and no resources need to be free'ed.
1696  */
1697 static void
1698 evch_chrdevent_fini(evchanq_t *snp)
1699 {
1700 	if (snp != NULL) {
1701 		evch_evq_destroy(snp->sn_queue);
1702 		kmem_free(snp, sizeof (evchanq_t));
1703 	}
1704 }
1705 
1706 /*
1707  * Get address of next event from an event channel.
1708  * This function might be called in a panic context. In that case
1709  * no resources will be allocated and no locks grabbed.
1710  * In normal operation context a snapshot of the event queues of the
1711  * specified event channel will be taken.
1712  */
1713 static sysevent_impl_t *
1714 evch_chgetnextev(evchanq_t *snp)
1715 {
1716 	if (panicstr != NULL) {
1717 		if (evch_chan == NULL)
1718 			return (NULL);
1719 		if (evch_subq != NULL) {
1720 			/*
1721 			 * We have a subscriber queue. Traverse this queue
1722 			 * first.
1723 			 */
1724 			if ((evch_curev = (sysevent_impl_t *)
1725 			    evch_evq_evnext(evch_subq, evch_curev)) != NULL) {
1726 				return (evch_curev);
1727 			} else {
1728 				/*
1729 				 * All subscriber events traversed. evch_subq
1730 				 * == NULL indicates to take the main event
1731 				 * queue now.
1732 				 */
1733 				evch_subq = NULL;
1734 			}
1735 		}
1736 		/*
1737 		 * Traverse the main event queue.
1738 		 */
1739 		if ((evch_curev = (sysevent_impl_t *)
1740 		    evch_evq_evnext(evch_chan->ch_queue, evch_curev)) ==
1741 		    NULL) {
1742 			evch_chan = NULL;
1743 		}
1744 		return (evch_curev);
1745 	}
1746 	ASSERT(snp != NULL);
1747 	snp->sn_nxtev = (sysevent_impl_t *)evch_evq_evnext(snp->sn_queue,
1748 	    snp->sn_nxtev);
1749 	return (snp->sn_nxtev);
1750 }
1751 
1752 /*
1753  * The functions below build up the interface for the kernel to bind/unbind,
1754  * subscribe/unsubscribe and publish to event channels. It consists of the
1755  * following functions:
1756  *
1757  * sysevent_evc_bind	    - Bind to a channel. Create a channel if required
1758  * sysevent_evc_unbind	    - Unbind from a channel. Destroy ch. if last unbind
1759  * sysevent_evc_subscribe   - Subscribe to events from a channel
1760  * sysevent_evc_unsubscribe - Unsubscribe from an event class
1761  * sysevent_evc_publish	    - Publish an event to an event channel
1762  * sysevent_evc_control	    - Various control operation on event channel
1763  * sysevent_evc_setpropnvl  - Set channel property nvlist
1764  * sysevent_evc_getpropnvl  - Get channel property nvlist
1765  *
1766  * The function below are for evaluating a sysevent:
1767  *
1768  * sysevent_get_class_name  - Get pointer to event class string
1769  * sysevent_get_subclass_name - Get pointer to event subclass string
1770  * sysevent_get_seq	    - Get unique event sequence number
1771  * sysevent_get_time	    - Get hrestime of event publish
1772  * sysevent_get_size	    - Get size of event structure
1773  * sysevent_get_pub	    - Get publisher string
1774  * sysevent_get_attr_list   - Get copy of attribute list
1775  *
1776  * The following interfaces represent stability level project privat
1777  * and allow to save the events of an event channel even in a panic case.
1778  *
1779  * sysevent_evc_walk_init   - Take a snapshot of the events in a channel
1780  * sysevent_evc_walk_step   - Read next event from snapshot
1781  * sysevent_evc_walk_fini   - Free resources from event channel snapshot
1782  * sysevent_evc_event_attr  - Get event payload address and size
1783  */
1784 /*
1785  * allocate sysevent structure with optional space for attributes
1786  */
1787 static sysevent_impl_t *
1788 sysevent_evc_alloc(const char *class, const char *subclass, const char *pub,
1789     size_t pub_sz, size_t atsz, uint32_t flag)
1790 {
1791 	int		payload_sz;
1792 	int		class_sz, subclass_sz;
1793 	int 		aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
1794 	sysevent_impl_t	*ev;
1795 
1796 	/*
1797 	 * Calculate and reserve space for the class, subclass and
1798 	 * publisher strings in the event buffer
1799 	 */
1800 	class_sz = strlen(class) + 1;
1801 	subclass_sz = strlen(subclass) + 1;
1802 
1803 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz <=
1804 	    MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
1805 
1806 	/* String sizes must be 64-bit aligned in the event buffer */
1807 	aligned_class_sz = SE_ALIGN(class_sz);
1808 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
1809 	aligned_pub_sz = SE_ALIGN(pub_sz);
1810 
1811 	/*
1812 	 * Calculate payload size. Consider the space needed for alignment
1813 	 * and subtract the size of the uint64_t placeholder variables of
1814 	 * sysevent_impl_t.
1815 	 */
1816 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
1817 	    (aligned_subclass_sz - sizeof (uint64_t)) +
1818 	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t) +
1819 	    atsz;
1820 
1821 	/*
1822 	 * Allocate event buffer plus additional payload overhead
1823 	 */
1824 	if ((ev = evch_evq_evzalloc(sizeof (sysevent_impl_t) +
1825 	    payload_sz, flag)) == NULL) {
1826 		return (NULL);
1827 	}
1828 
1829 	/* Initialize the event buffer data */
1830 	SE_VERSION(ev) = SYS_EVENT_VERSION;
1831 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
1832 
1833 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t,
1834 	    se_class_name)) + aligned_class_sz;
1835 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
1836 
1837 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
1838 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
1839 
1840 	SE_ATTR_PTR(ev) = (uint64_t)0;
1841 	SE_PAYLOAD_SZ(ev) = payload_sz;
1842 
1843 	return (ev);
1844 }
1845 
1846 /*
1847  * Initialize event channel handling queues.
1848  */
1849 void
1850 sysevent_evc_init()
1851 {
1852 	evch_chinit();
1853 }
1854 
1855 /*
1856  * Second initialization step: create threads, if event channels are already
1857  * created
1858  */
1859 void
1860 sysevent_evc_thrinit()
1861 {
1862 	evch_chinitthr();
1863 }
1864 
1865 int
1866 sysevent_evc_bind(const char *ch_name, evchan_t **scpp, uint32_t flags)
1867 {
1868 	ASSERT(ch_name != NULL && scpp != NULL);
1869 	ASSERT((flags & ~EVCH_B_FLAGS) == 0);
1870 	return (evch_chbind(ch_name, (evch_bind_t **)scpp, flags));
1871 }
1872 
1873 int
1874 sysevent_evc_unbind(evchan_t *scp)
1875 {
1876 	evch_bind_t *bp = (evch_bind_t *)scp;
1877 
1878 	ASSERT(scp != NULL);
1879 	evch_chunsubscribe(bp, NULL, 0);
1880 	evch_chunbind(bp);
1881 
1882 	return (0);
1883 }
1884 
1885 int
1886 sysevent_evc_subscribe(evchan_t *scp, const char *sid, const char *class,
1887     int (*callb)(sysevent_t *ev, void *cookie),
1888     void *cookie, uint32_t flags)
1889 {
1890 	ASSERT(scp != NULL && sid != NULL && class != NULL && callb != NULL);
1891 	ASSERT(flags == 0);
1892 	if (strlen(sid) > MAX_SUBID_LEN) {
1893 		return (EINVAL);
1894 	}
1895 	if (strcmp(class, EC_ALL) == 0) {
1896 		class = NULL;
1897 	}
1898 	return (evch_chsubscribe((evch_bind_t *)scp, EVCH_DELKERN, sid, class,
1899 	    (void *)callb, cookie, 0, 0));
1900 }
1901 
1902 int
1903 sysevent_evc_unsubscribe(evchan_t *scp, const char *sid)
1904 {
1905 	ASSERT(scp != NULL && sid != NULL);
1906 	if (strcmp(sid, EVCH_ALLSUB) == 0) {
1907 		sid = NULL;
1908 	}
1909 	evch_chunsubscribe((evch_bind_t *)scp, sid, 0);
1910 
1911 	return (0);
1912 }
1913 
1914 /*
1915  * Publish kernel event. Returns 0 on success, error code else.
1916  * Optional attribute data is packed into the event structure.
1917  */
1918 int
1919 sysevent_evc_publish(evchan_t *scp, const char *class, const char *subclass,
1920     const char *vendor, const char *pubs, nvlist_t *attr, uint32_t flags)
1921 {
1922 	sysevent_impl_t	*evp;
1923 	char		pub[MAX_PUB_LEN];
1924 	int		pub_sz;		/* includes terminating 0 */
1925 	int		km_flags;
1926 	size_t		asz = 0;
1927 	uint64_t	attr_offset;
1928 	caddr_t		patt;
1929 	int		err;
1930 
1931 	ASSERT(scp != NULL && class != NULL && subclass != NULL &&
1932 	    vendor != NULL && pubs != NULL);
1933 
1934 	ASSERT((flags & ~(EVCH_SLEEP | EVCH_NOSLEEP | EVCH_TRYHARD |
1935 	    EVCH_QWAIT)) == 0);
1936 
1937 	km_flags = flags & (EVCH_SLEEP | EVCH_NOSLEEP | EVCH_TRYHARD);
1938 	ASSERT(km_flags == EVCH_SLEEP || km_flags == EVCH_NOSLEEP ||
1939 	    km_flags == EVCH_TRYHARD);
1940 
1941 	pub_sz = snprintf(pub, MAX_PUB_LEN, "%s:kern:%s", vendor, pubs) + 1;
1942 	if (pub_sz > MAX_PUB_LEN)
1943 		return (EINVAL);
1944 
1945 	if (attr != NULL) {
1946 		if ((err = nvlist_size(attr, &asz, NV_ENCODE_NATIVE)) != 0) {
1947 			return (err);
1948 		}
1949 	}
1950 	evp = sysevent_evc_alloc(class, subclass, pub, pub_sz, asz, km_flags);
1951 	if (evp == NULL) {
1952 		return (ENOMEM);
1953 	}
1954 	if (attr != NULL) {
1955 		/*
1956 		 * Pack attributes into event buffer. Event buffer already
1957 		 * has enough room for the packed nvlist.
1958 		 */
1959 		attr_offset = SE_ATTR_OFF(evp);
1960 		patt = (caddr_t)evp + attr_offset;
1961 
1962 		err = nvlist_pack(attr, &patt, &asz, NV_ENCODE_NATIVE,
1963 		    km_flags & EVCH_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1964 
1965 		ASSERT(err != ENOMEM);
1966 
1967 		if (err != 0) {
1968 			return (EINVAL);
1969 		}
1970 
1971 		evp->seh_attr_off = attr_offset;
1972 		SE_FLAG(evp) = SE_PACKED_BUF;
1973 	}
1974 	return (evch_chpublish((evch_bind_t *)scp, evp, flags));
1975 }
1976 
1977 int
1978 sysevent_evc_control(evchan_t *scp, int cmd, ...)
1979 {
1980 	va_list		ap;
1981 	evch_chan_t	*chp = ((evch_bind_t *)scp)->bd_channel;
1982 	uint32_t	*chlenp;
1983 	uint32_t	chlen;
1984 	uint32_t	ochlen;
1985 	int		rc = 0;
1986 
1987 	if (scp == NULL) {
1988 		return (EINVAL);
1989 	}
1990 
1991 	va_start(ap, cmd);
1992 	mutex_enter(&chp->ch_mutex);
1993 	switch (cmd) {
1994 	case EVCH_GET_CHAN_LEN:
1995 		chlenp = va_arg(ap, uint32_t *);
1996 		*chlenp = chp->ch_maxev;
1997 		break;
1998 	case EVCH_SET_CHAN_LEN:
1999 		chlen = va_arg(ap, uint32_t);
2000 		ochlen = chp->ch_maxev;
2001 		chp->ch_maxev = min(chlen, evch_events_max);
2002 		if (ochlen < chp->ch_maxev) {
2003 			cv_signal(&chp->ch_pubcv);
2004 		}
2005 		break;
2006 	case EVCH_GET_CHAN_LEN_MAX:
2007 		*va_arg(ap, uint32_t *) = evch_events_max;
2008 		break;
2009 	default:
2010 		rc = EINVAL;
2011 	}
2012 
2013 	mutex_exit(&chp->ch_mutex);
2014 	va_end(ap);
2015 	return (rc);
2016 }
2017 
2018 int
2019 sysevent_evc_setpropnvl(evchan_t *scp, nvlist_t *nvl)
2020 {
2021 	nvlist_t *nvlcp = nvl;
2022 
2023 	if (nvl != NULL && nvlist_dup(nvl, &nvlcp, 0) != 0)
2024 		return (ENOMEM);
2025 
2026 	evch_chsetpropnvl((evch_bind_t *)scp, nvlcp);
2027 
2028 	return (0);
2029 }
2030 
2031 int
2032 sysevent_evc_getpropnvl(evchan_t *scp, nvlist_t **nvlp)
2033 {
2034 	return (evch_chgetpropnvl((evch_bind_t *)scp, nvlp, NULL));
2035 }
2036 
2037 /*
2038  * Project private interface to take a snapshot of all events of the
2039  * specified event channel. Argument subscr may be a subscriber id, the empty
2040  * string "", or NULL. The empty string indicates that no subscriber is
2041  * selected, for example if a previous subscriber died. sysevent_evc_walk_next()
2042  * will deliver events from the main event queue in this case. If subscr is
2043  * NULL, the subscriber with the EVCH_SUB_DUMP flag set (subd->sd_dump != 0)
2044  * will be selected.
2045  *
2046  * In panic case this function returns NULL. This is legal. The NULL has
2047  * to be delivered to sysevent_evc_walk_step() and sysevent_evc_walk_fini().
2048  */
2049 evchanq_t *
2050 sysevent_evc_walk_init(evchan_t *scp, char *subscr)
2051 {
2052 	if (panicstr != NULL && scp == NULL)
2053 		return (NULL);
2054 	ASSERT(scp != NULL);
2055 	return (evch_chrdevent_init(((evch_bind_t *)scp)->bd_channel, subscr));
2056 }
2057 
2058 /*
2059  * Project private interface to read events from a previously taken
2060  * snapshot (with sysevent_evc_walk_init). In case of panic events
2061  * are retrieved directly from the channel data structures. No resources
2062  * are allocated and no mutexes are grabbed in panic context.
2063  */
2064 sysevent_t *
2065 sysevent_evc_walk_step(evchanq_t *evcq)
2066 {
2067 	return ((sysevent_t *)evch_chgetnextev(evcq));
2068 }
2069 
2070 /*
2071  * Project private interface to free a previously taken snapshot.
2072  */
2073 void
2074 sysevent_evc_walk_fini(evchanq_t *evcq)
2075 {
2076 	evch_chrdevent_fini(evcq);
2077 }
2078 
2079 /*
2080  * Get address and size of an event payload. Returns NULL when no
2081  * payload present.
2082  */
2083 char *
2084 sysevent_evc_event_attr(sysevent_t *ev, size_t *plsize)
2085 {
2086 	char	*attrp;
2087 	size_t	aoff;
2088 	size_t	asz;
2089 
2090 	aoff = SE_ATTR_OFF(ev);
2091 	attrp = (char *)ev + aoff;
2092 	asz = *plsize = SE_SIZE(ev) - aoff;
2093 	return (asz ? attrp : NULL);
2094 }
2095 
2096 /*
2097  * sysevent_get_class_name - Get class name string
2098  */
2099 char *
2100 sysevent_get_class_name(sysevent_t *ev)
2101 {
2102 	return (SE_CLASS_NAME(ev));
2103 }
2104 
2105 /*
2106  * sysevent_get_subclass_name - Get subclass name string
2107  */
2108 char *
2109 sysevent_get_subclass_name(sysevent_t *ev)
2110 {
2111 	return (SE_SUBCLASS_NAME(ev));
2112 }
2113 
2114 /*
2115  * sysevent_get_seq - Get event sequence id
2116  */
2117 uint64_t
2118 sysevent_get_seq(sysevent_t *ev)
2119 {
2120 	return (SE_SEQ(ev));
2121 }
2122 
2123 /*
2124  * sysevent_get_time - Get event timestamp
2125  */
2126 void
2127 sysevent_get_time(sysevent_t *ev, hrtime_t *etime)
2128 {
2129 	*etime = SE_TIME(ev);
2130 }
2131 
2132 /*
2133  * sysevent_get_size - Get event buffer size
2134  */
2135 size_t
2136 sysevent_get_size(sysevent_t *ev)
2137 {
2138 	return ((size_t)SE_SIZE(ev));
2139 }
2140 
2141 /*
2142  * sysevent_get_pub - Get publisher name string
2143  */
2144 char *
2145 sysevent_get_pub(sysevent_t *ev)
2146 {
2147 	return (SE_PUB_NAME(ev));
2148 }
2149 
2150 /*
2151  * sysevent_get_attr_list - stores address of a copy of the attribute list
2152  * associated with the given sysevent buffer. The list must be freed by the
2153  * caller.
2154  */
2155 int
2156 sysevent_get_attr_list(sysevent_t *ev, nvlist_t **nvlist)
2157 {
2158 	int		error;
2159 	caddr_t		attr;
2160 	size_t		attr_len;
2161 	uint64_t	attr_offset;
2162 
2163 	*nvlist = NULL;
2164 	if (SE_FLAG(ev) != SE_PACKED_BUF) {
2165 		return (EINVAL);
2166 	}
2167 	attr_offset = SE_ATTR_OFF(ev);
2168 	if (SE_SIZE(ev) == attr_offset) {
2169 		return (EINVAL);
2170 	}
2171 
2172 	/* unpack nvlist */
2173 	attr = (caddr_t)ev + attr_offset;
2174 	attr_len = SE_SIZE(ev) - attr_offset;
2175 	if ((error = nvlist_unpack(attr, attr_len, nvlist, 0)) != 0) {
2176 		error = error != ENOMEM ? EINVAL : error;
2177 		return (error);
2178 	}
2179 	return (0);
2180 }
2181 
2182 /*
2183  * Functions called by the sysevent driver for general purpose event channels
2184  *
2185  * evch_usrchanopen	- Create/Bind to an event channel
2186  * evch_usrchanclose	- Unbind/Destroy event channel
2187  * evch_usrallocev	- Allocate event data structure
2188  * evch_usrfreeev	- Free event data structure
2189  * evch_usrpostevent	- Publish event
2190  * evch_usrsubscribe	- Subscribe (register callback function)
2191  * evch_usrunsubscribe	- Unsubscribe
2192  * evch_usrcontrol_set	- Set channel properties
2193  * evch_usrcontrol_get	- Get channel properties
2194  * evch_usrgetchnames	- Get list of channel names
2195  * evch_usrgetchdata	- Get data of an event channel
2196  * evch_usrsetpropnvl	- Set channel properties nvlist
2197  * evch_usrgetpropnvl	- Get channel properties nvlist
2198  */
2199 evchan_t *
2200 evch_usrchanopen(const char *name, uint32_t flags, int *err)
2201 {
2202 	evch_bind_t *bp = NULL;
2203 
2204 	*err = evch_chbind(name, &bp, flags);
2205 	return ((evchan_t *)bp);
2206 }
2207 
2208 /*
2209  * Unbind from the channel.
2210  */
2211 void
2212 evch_usrchanclose(evchan_t *cbp)
2213 {
2214 	evch_chunbind((evch_bind_t *)cbp);
2215 }
2216 
2217 /*
2218  * Allocates log_evch_eventq_t structure but returns the pointer of the embedded
2219  * sysevent_impl_t structure as the opaque sysevent_t * data type
2220  */
2221 sysevent_impl_t *
2222 evch_usrallocev(size_t evsize, uint32_t flags)
2223 {
2224 	return ((sysevent_impl_t *)evch_evq_evzalloc(evsize, flags));
2225 }
2226 
2227 /*
2228  * Free evch_eventq_t structure
2229  */
2230 void
2231 evch_usrfreeev(sysevent_impl_t *ev)
2232 {
2233 	evch_evq_evfree((void *)ev);
2234 }
2235 
2236 /*
2237  * Posts an event to the given channel. The event structure has to be
2238  * allocated by evch_usrallocev(). Returns zero on success and an error
2239  * code else. Attributes have to be packed and included in the event structure.
2240  *
2241  */
2242 int
2243 evch_usrpostevent(evchan_t *bp, sysevent_impl_t *ev, uint32_t flags)
2244 {
2245 	return (evch_chpublish((evch_bind_t *)bp, ev, flags));
2246 }
2247 
2248 /*
2249  * Subscribe function for user land subscriptions
2250  */
2251 int
2252 evch_usrsubscribe(evchan_t *bp, const char *sid, const char *class,
2253     int d, uint32_t flags)
2254 {
2255 	door_handle_t	dh = door_ki_lookup(d);
2256 	int		rv;
2257 
2258 	if (dh == NULL) {
2259 		return (EINVAL);
2260 	}
2261 	if ((rv = evch_chsubscribe((evch_bind_t *)bp, EVCH_DELDOOR, sid, class,
2262 	    (void *)dh, NULL, flags, curproc->p_pid)) != 0) {
2263 		door_ki_rele(dh);
2264 	}
2265 	return (rv);
2266 }
2267 
2268 /*
2269  * Flag can be EVCH_SUB_KEEP or 0. EVCH_SUB_KEEP preserves persistent
2270  * subscribers
2271  */
2272 void
2273 evch_usrunsubscribe(evchan_t *bp, const char *subid, uint32_t flags)
2274 {
2275 	evch_chunsubscribe((evch_bind_t *)bp, subid, flags);
2276 }
2277 
2278 /*ARGSUSED*/
2279 int
2280 evch_usrcontrol_set(evchan_t *bp, int cmd, uint32_t value)
2281 {
2282 	evch_chan_t	*chp = ((evch_bind_t *)bp)->bd_channel;
2283 	uid_t		uid = crgetuid(curthread->t_cred);
2284 	int		rc = 0;
2285 
2286 	mutex_enter(&chp->ch_mutex);
2287 	switch (cmd) {
2288 	case EVCH_SET_CHAN_LEN:
2289 		if (uid && uid != chp->ch_uid) {
2290 			rc = EACCES;
2291 			break;
2292 		}
2293 		chp->ch_maxev = min(value, evch_events_max);
2294 		break;
2295 	default:
2296 		rc = EINVAL;
2297 	}
2298 	mutex_exit(&chp->ch_mutex);
2299 	return (rc);
2300 }
2301 
2302 /*ARGSUSED*/
2303 int
2304 evch_usrcontrol_get(evchan_t *bp, int cmd, uint32_t *value)
2305 {
2306 	evch_chan_t	*chp = ((evch_bind_t *)bp)->bd_channel;
2307 	int		rc = 0;
2308 
2309 	mutex_enter(&chp->ch_mutex);
2310 	switch (cmd) {
2311 	case EVCH_GET_CHAN_LEN:
2312 		*value = chp->ch_maxev;
2313 		break;
2314 	case EVCH_GET_CHAN_LEN_MAX:
2315 		*value = evch_events_max;
2316 		break;
2317 	default:
2318 		rc = EINVAL;
2319 	}
2320 	mutex_exit(&chp->ch_mutex);
2321 	return (rc);
2322 }
2323 
2324 int
2325 evch_usrgetchnames(char *buf, size_t size)
2326 {
2327 	return (evch_chgetnames(buf, size));
2328 }
2329 
2330 int
2331 evch_usrgetchdata(char *chname, void *buf, size_t size)
2332 {
2333 	return (evch_chgetchdata(chname, buf, size));
2334 }
2335 
2336 void
2337 evch_usrsetpropnvl(evchan_t *bp, nvlist_t *nvl)
2338 {
2339 	evch_chsetpropnvl((evch_bind_t *)bp, nvl);
2340 }
2341 
2342 int
2343 evch_usrgetpropnvl(evchan_t *bp, nvlist_t **nvlp, int64_t *genp)
2344 {
2345 	return (evch_chgetpropnvl((evch_bind_t *)bp, nvlp, genp));
2346 }
2347