xref: /titanic_50/usr/src/uts/common/os/log_sysevent.c (revision 6d6fcbbb256ba9a8fe3e6076ae8a1f493b1f1a99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/stropts.h>
29 #include <sys/debug.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/vmem.h>
33 #include <sys/cmn_err.h>
34 #include <sys/callb.h>
35 #include <sys/sysevent.h>
36 #include <sys/sysevent_impl.h>
37 #include <sys/modctl.h>
38 #include <sys/sysmacros.h>
39 #include <sys/disp.h>
40 #include <sys/autoconf.h>
41 #include <sys/atomic.h>
42 #include <sys/sdt.h>
43 
44 /* for doors */
45 #include <sys/pathname.h>
46 #include <sys/door.h>
47 #include <sys/kmem.h>
48 #include <sys/cpuvar.h>
49 #include <sys/fs/snode.h>
50 
51 /*
52  * log_sysevent.c - Provides the interfaces for kernel event publication
53  *			to the sysevent event daemon (syseventd).
54  */
55 
56 /*
57  * Debug stuff
58  */
59 static int log_event_debug = 0;
60 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
61 #ifdef DEBUG
62 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
63 #else
64 #define	LOG_DEBUG1(args)
65 #endif
66 
67 /*
68  * Local static vars
69  */
70 /* queue of event buffers sent to syseventd */
71 static log_eventq_t *log_eventq_sent = NULL;
72 
73 /*
74  * Count of event buffers in the queue
75  */
76 int log_eventq_cnt = 0;
77 
78 /* queue of event buffers awaiting delivery to syseventd */
79 static log_eventq_t *log_eventq_head = NULL;
80 static log_eventq_t *log_eventq_tail = NULL;
81 static uint64_t kernel_event_id = 0;
82 static int encoding = NV_ENCODE_NATIVE;
83 
84 /* log event delivery flag */
85 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
86 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
87 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
88 
89 /*
90  * Tunable maximum event buffer queue size. Size depends on how many events
91  * the queue must hold when syseventd is not available, for example during
92  * system startup. Experience showed that more than 2000 events could be posted
93  * due to correctable memory errors.
94  */
95 int logevent_max_q_sz = 5000;
96 
97 
98 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
99 static char logevent_door_upcall_filename[MAXPATHLEN];
100 
101 static door_handle_t event_door = NULL;		/* Door for upcalls */
102 static kmutex_t event_door_mutex;		/* To protect event_door */
103 
104 /*
105  * async thread-related variables
106  *
107  * eventq_head_mutex - synchronizes access to the kernel event queue
108  *
109  * eventq_sent_mutex - synchronizes access to the queue of event sents to
110  *			userlevel
111  *
112  * log_event_cv - condition variable signaled when an event has arrived or
113  *			userlevel ready to process event buffers
114  *
115  * async_thread - asynchronous event delivery thread to userlevel daemon.
116  *
117  * sysevent_upcall_status - status of the door upcall link
118  */
119 static kmutex_t eventq_head_mutex;
120 static kmutex_t eventq_sent_mutex;
121 static kcondvar_t log_event_cv;
122 static kthread_id_t async_thread = NULL;
123 
124 static kmutex_t event_qfull_mutex;
125 static kcondvar_t event_qfull_cv;
126 static int event_qfull_blocked = 0;
127 
128 static int sysevent_upcall_status = -1;
129 static kmutex_t registered_channel_mutex;
130 
131 /*
132  * Indicates the syseventd daemon has begun taking events
133  */
134 int sysevent_daemon_init = 0;
135 
136 /*
137  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
138  * caused by the server process doing a forkall().  Since all threads
139  * but the thread actually doing the forkall() need to be quiesced,
140  * the fork may take some time.  The min/max pause are in units
141  * of clock ticks.
142  */
143 #define	LOG_EVENT_MIN_PAUSE	8
144 #define	LOG_EVENT_MAX_PAUSE	128
145 
146 static kmutex_t	event_pause_mutex;
147 static kcondvar_t event_pause_cv;
148 static int event_pause_state = 0;
149 
150 /*ARGSUSED*/
151 static void
152 log_event_busy_timeout(void *arg)
153 {
154 	mutex_enter(&event_pause_mutex);
155 	event_pause_state = 0;
156 	cv_signal(&event_pause_cv);
157 	mutex_exit(&event_pause_mutex);
158 }
159 
160 static void
161 log_event_pause(int nticks)
162 {
163 	timeout_id_t id;
164 
165 	/*
166 	 * Only one use of log_event_pause at a time
167 	 */
168 	ASSERT(event_pause_state == 0);
169 
170 	event_pause_state = 1;
171 	id = timeout(log_event_busy_timeout, NULL, nticks);
172 	if (id != 0) {
173 		mutex_enter(&event_pause_mutex);
174 		while (event_pause_state)
175 			cv_wait(&event_pause_cv, &event_pause_mutex);
176 		mutex_exit(&event_pause_mutex);
177 	}
178 	event_pause_state = 0;
179 }
180 
181 
182 /*
183  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
184  * 			Check for rebinding errors
185  * 			This buffer is reused to by the syseventd door_return
186  *			to hold the result code
187  */
188 static int
189 log_event_upcall(log_event_upcall_arg_t *arg)
190 {
191 	int error;
192 	size_t size;
193 	sysevent_t *ev;
194 	door_arg_t darg, save_arg;
195 	int retry;
196 	int neagain = 0;
197 	int neintr = 0;
198 	int nticks = LOG_EVENT_MIN_PAUSE;
199 
200 	/* Initialize door args */
201 	ev = (sysevent_t *)&arg->buf;
202 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
203 
204 	darg.rbuf = (char *)arg;
205 	darg.data_ptr = (char *)arg;
206 	darg.rsize = size;
207 	darg.data_size = size;
208 	darg.desc_ptr = NULL;
209 	darg.desc_num = 0;
210 
211 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
212 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
213 
214 	save_arg = darg;
215 	for (retry = 0; ; retry++) {
216 
217 		mutex_enter(&event_door_mutex);
218 		if (event_door == NULL) {
219 			mutex_exit(&event_door_mutex);
220 
221 			return (EBADF);
222 		}
223 
224 		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
225 		    SIZE_MAX, 0)) == 0) {
226 			mutex_exit(&event_door_mutex);
227 			break;
228 		}
229 
230 		/*
231 		 * EBADF is handled outside the switch below because we need to
232 		 * hold event_door_mutex a bit longer
233 		 */
234 		if (error == EBADF) {
235 			/* Server died */
236 			door_ki_rele(event_door);
237 			event_door = NULL;
238 
239 			mutex_exit(&event_door_mutex);
240 			return (error);
241 		}
242 
243 		mutex_exit(&event_door_mutex);
244 
245 		/*
246 		 * The EBADF case is already handled above with event_door_mutex
247 		 * held
248 		 */
249 		switch (error) {
250 		case EINTR:
251 			neintr++;
252 			log_event_pause(2);
253 			darg = save_arg;
254 			break;
255 		case EAGAIN:
256 			/* cannot deliver upcall - process may be forking */
257 			neagain++;
258 			log_event_pause(nticks);
259 			nticks <<= 1;
260 			if (nticks > LOG_EVENT_MAX_PAUSE)
261 				nticks = LOG_EVENT_MAX_PAUSE;
262 			darg = save_arg;
263 			break;
264 		default:
265 			cmn_err(CE_CONT,
266 			    "log_event_upcall: door_ki_upcall error %d\n",
267 			    error);
268 			return (error);
269 		}
270 	}
271 
272 	if (neagain > 0 || neintr > 0) {
273 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
274 		    neagain, neintr, nticks));
275 	}
276 
277 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
278 	    "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
279 	    error, (void *)arg, (void *)darg.rbuf,
280 	    (void *)darg.data_ptr,
281 	    *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
282 
283 	if (!error) {
284 		/*
285 		 * upcall was successfully executed. Check return code.
286 		 */
287 		error = *((int *)(darg.rbuf));
288 	}
289 
290 	return (error);
291 }
292 
293 /*
294  * log_event_deliver - event delivery thread
295  *			Deliver all events on the event queue to syseventd.
296  *			If the daemon can not process events, stop event
297  *			delivery and wait for an indication from the
298  *			daemon to resume delivery.
299  *
300  *			Once all event buffers have been delivered, wait
301  *			until there are more to deliver.
302  */
303 static void
304 log_event_deliver()
305 {
306 	log_eventq_t *q;
307 	int upcall_err;
308 	callb_cpr_t cprinfo;
309 
310 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
311 	    "logevent");
312 
313 	/*
314 	 * eventq_head_mutex is exited (released) when there are no more
315 	 * events to process from the eventq in cv_wait().
316 	 */
317 	mutex_enter(&eventq_head_mutex);
318 
319 	for (;;) {
320 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
321 		    (void *)log_eventq_head));
322 
323 		upcall_err = 0;
324 		q = log_eventq_head;
325 
326 		while (q) {
327 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
328 				upcall_err = EAGAIN;
329 				break;
330 			}
331 
332 			log_event_delivery = LOGEVENT_DELIVERY_OK;
333 
334 			/*
335 			 * Release event queue lock during upcall to
336 			 * syseventd
337 			 */
338 			mutex_exit(&eventq_head_mutex);
339 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
340 				mutex_enter(&eventq_head_mutex);
341 				break;
342 			}
343 
344 			/*
345 			 * We may be able to add entries to
346 			 * the queue now.
347 			 */
348 			if (event_qfull_blocked > 0 &&
349 			    log_eventq_cnt < logevent_max_q_sz) {
350 				mutex_enter(&event_qfull_mutex);
351 				if (event_qfull_blocked > 0) {
352 					cv_signal(&event_qfull_cv);
353 				}
354 				mutex_exit(&event_qfull_mutex);
355 			}
356 
357 			mutex_enter(&eventq_head_mutex);
358 
359 			/*
360 			 * Daemon restart can cause entries to be moved from
361 			 * the sent queue and put back on the event queue.
362 			 * If this has occurred, replay event queue
363 			 * processing from the new queue head.
364 			 */
365 			if (q != log_eventq_head) {
366 				q = log_eventq_head;
367 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
368 				    "door upcall/daemon restart race\n"));
369 			} else {
370 				log_eventq_t *next;
371 
372 				/*
373 				 * Move the event to the sent queue when a
374 				 * successful delivery has been made.
375 				 */
376 				mutex_enter(&eventq_sent_mutex);
377 				next = q->next;
378 				q->next = log_eventq_sent;
379 				log_eventq_sent = q;
380 				q = next;
381 				log_eventq_head = q;
382 				log_eventq_cnt--;
383 				if (q == NULL) {
384 					ASSERT(log_eventq_cnt == 0);
385 					log_eventq_tail = NULL;
386 				}
387 				mutex_exit(&eventq_sent_mutex);
388 			}
389 		}
390 
391 		switch (upcall_err) {
392 		case 0:
393 			/*
394 			 * Success. The queue is empty.
395 			 */
396 			sysevent_upcall_status = 0;
397 			break;
398 		case EAGAIN:
399 			/*
400 			 * Delivery is on hold (but functional).
401 			 */
402 			sysevent_upcall_status = 0;
403 			/*
404 			 * If the user has already signaled for delivery
405 			 * resumption, continue.  Otherwise, we wait until
406 			 * we are signaled to continue.
407 			 */
408 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT)
409 				continue;
410 			log_event_delivery = LOGEVENT_DELIVERY_HOLD;
411 
412 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
413 			break;
414 		default:
415 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
416 			    "upcall err %d\n", upcall_err));
417 			sysevent_upcall_status = upcall_err;
418 			/*
419 			 * Signal everyone waiting that transport is down
420 			 */
421 			if (event_qfull_blocked > 0) {
422 				mutex_enter(&event_qfull_mutex);
423 				if (event_qfull_blocked > 0) {
424 					cv_broadcast(&event_qfull_cv);
425 				}
426 				mutex_exit(&event_qfull_mutex);
427 			}
428 			break;
429 		}
430 
431 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
432 		cv_wait(&log_event_cv, &eventq_head_mutex);
433 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
434 	}
435 	/* NOTREACHED */
436 }
437 
438 /*
439  * log_event_init - Allocate and initialize log_event data structures.
440  */
441 void
442 log_event_init()
443 {
444 	mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL);
445 
446 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
447 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
448 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
449 
450 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
451 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
452 
453 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
454 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
455 
456 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
457 	sysevent_evc_init();
458 }
459 
460 /*
461  * The following routines are used by kernel event publishers to
462  * allocate, append and free event buffers
463  */
464 /*
465  * sysevent_alloc - Allocate new eventq struct.  This element contains
466  *			an event buffer that will be used in a subsequent
467  *			call to log_sysevent.
468  */
469 sysevent_t *
470 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
471 {
472 	int payload_sz;
473 	int class_sz, subclass_sz, pub_sz;
474 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
475 	sysevent_t *ev;
476 	log_eventq_t *q;
477 
478 	ASSERT(class != NULL);
479 	ASSERT(subclass != NULL);
480 	ASSERT(pub != NULL);
481 
482 	/*
483 	 * Calculate and reserve space for the class, subclass and
484 	 * publisher strings in the event buffer
485 	 */
486 	class_sz = strlen(class) + 1;
487 	subclass_sz = strlen(subclass) + 1;
488 	pub_sz = strlen(pub) + 1;
489 
490 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
491 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
492 
493 	/* String sizes must be 64-bit aligned in the event buffer */
494 	aligned_class_sz = SE_ALIGN(class_sz);
495 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
496 	aligned_pub_sz = SE_ALIGN(pub_sz);
497 
498 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
499 	    (aligned_subclass_sz - sizeof (uint64_t)) +
500 	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
501 
502 	/*
503 	 * Allocate event buffer plus additional sysevent queue
504 	 * and payload overhead.
505 	 */
506 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
507 	if (q == NULL) {
508 		return (NULL);
509 	}
510 
511 	/* Initialize the event buffer data */
512 	ev = (sysevent_t *)&q->arg.buf;
513 	SE_VERSION(ev) = SYS_EVENT_VERSION;
514 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
515 
516 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
517 		+ aligned_class_sz;
518 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
519 
520 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
521 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
522 
523 	SE_ATTR_PTR(ev) = UINT64_C(0);
524 	SE_PAYLOAD_SZ(ev) = payload_sz;
525 
526 	return (ev);
527 }
528 
529 /*
530  * sysevent_free - Free event buffer and any attribute data.
531  */
532 void
533 sysevent_free(sysevent_t *ev)
534 {
535 	log_eventq_t *q;
536 	nvlist_t *nvl;
537 
538 	ASSERT(ev != NULL);
539 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
540 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
541 
542 	if (nvl != NULL) {
543 		size_t size = 0;
544 		(void) nvlist_size(nvl, &size, encoding);
545 		SE_PAYLOAD_SZ(ev) -= size;
546 		nvlist_free(nvl);
547 	}
548 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
549 }
550 
551 /*
552  * free_packed_event - Free packed event buffer
553  */
554 static void
555 free_packed_event(sysevent_t *ev)
556 {
557 	log_eventq_t *q;
558 
559 	ASSERT(ev != NULL);
560 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
561 
562 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
563 }
564 
565 /*
566  * sysevent_add_attr - Add new attribute element to an event attribute list
567  *			If attribute list is NULL, start a new list.
568  */
569 int
570 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
571 	sysevent_value_t *se_value, int flag)
572 {
573 	int error;
574 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
575 
576 	if (nvlp == NULL || se_value == NULL) {
577 		return (SE_EINVAL);
578 	}
579 
580 	/*
581 	 * attr_sz is composed of the value data size + the name data size +
582 	 * any header data.  64-bit aligned.
583 	 */
584 	if (strlen(name) >= MAX_ATTR_NAME) {
585 		return (SE_EINVAL);
586 	}
587 
588 	/*
589 	 * Allocate nvlist
590 	 */
591 	if ((*nvlp == NULL) &&
592 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
593 		return (SE_ENOMEM);
594 
595 	/* add the attribute */
596 	switch (se_value->value_type) {
597 	case SE_DATA_TYPE_BYTE:
598 		error = nvlist_add_byte(*ev_attr_list, name,
599 		    se_value->value.sv_byte);
600 		break;
601 	case SE_DATA_TYPE_INT16:
602 		error = nvlist_add_int16(*ev_attr_list, name,
603 		    se_value->value.sv_int16);
604 		break;
605 	case SE_DATA_TYPE_UINT16:
606 		error = nvlist_add_uint16(*ev_attr_list, name,
607 		    se_value->value.sv_uint16);
608 		break;
609 	case SE_DATA_TYPE_INT32:
610 		error = nvlist_add_int32(*ev_attr_list, name,
611 		    se_value->value.sv_int32);
612 		break;
613 	case SE_DATA_TYPE_UINT32:
614 		error = nvlist_add_uint32(*ev_attr_list, name,
615 		    se_value->value.sv_uint32);
616 		break;
617 	case SE_DATA_TYPE_INT64:
618 		error = nvlist_add_int64(*ev_attr_list, name,
619 		    se_value->value.sv_int64);
620 		break;
621 	case SE_DATA_TYPE_UINT64:
622 		error = nvlist_add_uint64(*ev_attr_list, name,
623 		    se_value->value.sv_uint64);
624 		break;
625 	case SE_DATA_TYPE_STRING:
626 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
627 			return (SE_EINVAL);
628 		error = nvlist_add_string(*ev_attr_list, name,
629 		    se_value->value.sv_string);
630 		break;
631 	case SE_DATA_TYPE_BYTES:
632 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
633 			return (SE_EINVAL);
634 		error = nvlist_add_byte_array(*ev_attr_list, name,
635 		    se_value->value.sv_bytes.data,
636 		    se_value->value.sv_bytes.size);
637 		break;
638 	case SE_DATA_TYPE_TIME:
639 		error = nvlist_add_hrtime(*ev_attr_list, name,
640 		    se_value->value.sv_time);
641 		break;
642 	default:
643 		return (SE_EINVAL);
644 	}
645 
646 	return (error ? SE_ENOMEM : 0);
647 }
648 
649 /*
650  * sysevent_free_attr - Free an attribute list not associated with an
651  *			event buffer.
652  */
653 void
654 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
655 {
656 	nvlist_free((nvlist_t *)ev_attr_list);
657 }
658 
659 /*
660  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
661  *
662  *	This data will be re-packed into contiguous memory when the event
663  *	buffer is posted to log_sysevent.
664  */
665 int
666 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
667 {
668 	size_t size = 0;
669 
670 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
671 		return (SE_EINVAL);
672 	}
673 
674 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
675 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
676 	SE_PAYLOAD_SZ(ev) += size;
677 	SE_FLAG(ev) = 0;
678 
679 	return (0);
680 }
681 
682 /*
683  * sysevent_detach_attributes - Detach but don't free attribute list from the
684  *				event buffer.
685  */
686 void
687 sysevent_detach_attributes(sysevent_t *ev)
688 {
689 	size_t size = 0;
690 	nvlist_t *nvl;
691 
692 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
693 		return;
694 	}
695 
696 	SE_ATTR_PTR(ev) = UINT64_C(0);
697 	(void) nvlist_size(nvl, &size, encoding);
698 	SE_PAYLOAD_SZ(ev) -= size;
699 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
700 }
701 
702 /*
703  * sysevent_attr_name - Get name of attribute
704  */
705 char *
706 sysevent_attr_name(sysevent_attr_t *attr)
707 {
708 	if (attr == NULL) {
709 		return (NULL);
710 	}
711 
712 	return (nvpair_name(attr));
713 }
714 
715 /*
716  * sysevent_attr_type - Get type of attribute
717  */
718 int
719 sysevent_attr_type(sysevent_attr_t *attr)
720 {
721 	/*
722 	 * The SE_DATA_TYPE_* are typedef'ed to be the
723 	 * same value as DATA_TYPE_*
724 	 */
725 	return (nvpair_type((nvpair_t *)attr));
726 }
727 
728 /*
729  * Repack event buffer into contiguous memory
730  */
731 static sysevent_t *
732 se_repack(sysevent_t *ev, int flag)
733 {
734 	size_t copy_len;
735 	caddr_t attr;
736 	size_t size;
737 	uint64_t attr_offset;
738 	sysevent_t *copy;
739 	log_eventq_t *qcopy;
740 	sysevent_attr_list_t *nvl;
741 
742 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
743 	qcopy = kmem_zalloc(copy_len, flag);
744 	if (qcopy == NULL) {
745 		return (NULL);
746 	}
747 	copy = (sysevent_t *)&qcopy->arg.buf;
748 
749 	/*
750 	 * Copy event header, class, subclass and publisher names
751 	 * Set the attribute offset (in number of bytes) to contiguous
752 	 * memory after the header.
753 	 */
754 
755 	attr_offset = SE_ATTR_OFF(ev);
756 
757 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
758 
759 	bcopy(ev, copy, attr_offset);
760 
761 	/* Check if attribute list exists */
762 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
763 		return (copy);
764 	}
765 
766 	/*
767 	 * Copy attribute data to contiguous memory
768 	 */
769 	attr = (char *)copy + attr_offset;
770 	(void) nvlist_size(nvl, &size, encoding);
771 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
772 		kmem_free(qcopy, copy_len);
773 		return (NULL);
774 	}
775 	SE_ATTR_PTR(copy) = UINT64_C(0);
776 	SE_FLAG(copy) = SE_PACKED_BUF;
777 
778 	return (copy);
779 }
780 
781 /*
782  * The sysevent registration provides a persistent and reliable database
783  * for channel information for sysevent channel publishers and
784  * subscribers.
785  *
786  * A channel is created and maintained by the kernel upon the first
787  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
788  * event subscription information is updated as publishers or subscribers
789  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
790  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
791  *
792  * For consistency, id's are assigned for every publisher or subscriber
793  * bound to a particular channel.  The id's are used to constrain resources
794  * and perform subscription lookup.
795  *
796  * Associated with each channel is a hashed list of the current subscriptions
797  * based upon event class and subclasses.  A subscription contains a class name,
798  * list of possible subclasses and an array of subscriber ids.  Subscriptions
799  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
800  *
801  * Channels are closed once the last subscriber or publisher performs a
802  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
803  * channel are freed upon last close.
804  *
805  * Locking:
806  *	Every operation to log_sysevent() is protected by a single lock,
807  *	registered_channel_mutex.  It is expected that the granularity of
808  *	a single lock is sufficient given the frequency that updates will
809  *	occur.
810  *
811  *	If this locking strategy proves to be too contentious, a per-hash
812  *	or per-channel locking strategy may be implemented.
813  */
814 
815 
816 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
817 					% CHAN_HASH_SZ)
818 
819 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
820 static int channel_cnt;
821 static void remove_all_class(sysevent_channel_descriptor_t *chan,
822 	uint32_t sub_id);
823 
824 static uint32_t
825 hash_func(const char *s)
826 {
827 	uint32_t result = 0;
828 	uint_t g;
829 
830 	while (*s != '\0') {
831 		result <<= 4;
832 		result += (uint32_t)*s++;
833 		g = result & 0xf0000000;
834 		if (g != 0) {
835 			result ^= g >> 24;
836 			result ^= g;
837 		}
838 	}
839 
840 	return (result);
841 }
842 
843 static sysevent_channel_descriptor_t *
844 get_channel(char *channel_name)
845 {
846 	int hash_index;
847 	sysevent_channel_descriptor_t *chan_list;
848 
849 	if (channel_name == NULL)
850 		return (NULL);
851 
852 	/* Find channel descriptor */
853 	hash_index = CHANN_HASH(channel_name);
854 	chan_list = registered_channels[hash_index];
855 	while (chan_list != NULL) {
856 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
857 			break;
858 		} else {
859 			chan_list = chan_list->scd_next;
860 		}
861 	}
862 
863 	return (chan_list);
864 }
865 
866 static class_lst_t *
867 create_channel_registration(sysevent_channel_descriptor_t *chan,
868     char *event_class, int index)
869 {
870 	size_t class_len;
871 	class_lst_t *c_list;
872 
873 	class_len = strlen(event_class) + 1;
874 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
875 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
876 	bcopy(event_class, c_list->cl_name, class_len);
877 
878 	c_list->cl_subclass_list =
879 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
880 	c_list->cl_subclass_list->sl_name =
881 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
882 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
883 	    sizeof (EC_SUB_ALL));
884 
885 	c_list->cl_next = chan->scd_class_list_tbl[index];
886 	chan->scd_class_list_tbl[index] = c_list;
887 
888 	return (c_list);
889 }
890 
891 static void
892 free_channel_registration(sysevent_channel_descriptor_t *chan)
893 {
894 	int i;
895 	class_lst_t *clist, *next_clist;
896 	subclass_lst_t *sclist, *next_sc;
897 
898 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
899 
900 		clist = chan->scd_class_list_tbl[i];
901 		while (clist != NULL) {
902 			sclist = clist->cl_subclass_list;
903 			while (sclist != NULL) {
904 				kmem_free(sclist->sl_name,
905 				    strlen(sclist->sl_name) + 1);
906 				next_sc = sclist->sl_next;
907 				kmem_free(sclist, sizeof (subclass_lst_t));
908 				sclist = next_sc;
909 			}
910 			kmem_free(clist->cl_name,
911 			    strlen(clist->cl_name) + 1);
912 			next_clist = clist->cl_next;
913 			kmem_free(clist, sizeof (class_lst_t));
914 			clist = next_clist;
915 		}
916 	}
917 	chan->scd_class_list_tbl[0] = NULL;
918 }
919 
920 static int
921 open_channel(char *channel_name)
922 {
923 	int hash_index;
924 	sysevent_channel_descriptor_t *chan, *chan_list;
925 
926 
927 	if (channel_cnt > MAX_CHAN) {
928 		return (-1);
929 	}
930 
931 	/* Find channel descriptor */
932 	hash_index = CHANN_HASH(channel_name);
933 	chan_list = registered_channels[hash_index];
934 	while (chan_list != NULL) {
935 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
936 			chan_list->scd_ref_cnt++;
937 			kmem_free(channel_name, strlen(channel_name) + 1);
938 			return (0);
939 		} else {
940 			chan_list = chan_list->scd_next;
941 		}
942 	}
943 
944 
945 	/* New channel descriptor */
946 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
947 	chan->scd_channel_name = channel_name;
948 
949 	/*
950 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
951 	 * Subscriber id 0 is never allocated, but is used as a reserved id
952 	 * by libsysevent
953 	 */
954 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
955 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
956 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
957 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
958 		return (-1);
959 	}
960 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
961 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
962 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
963 		vmem_destroy(chan->scd_subscriber_cache);
964 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
965 		return (-1);
966 	}
967 
968 	chan->scd_ref_cnt = 1;
969 
970 	(void) create_channel_registration(chan, EC_ALL, 0);
971 
972 	if (registered_channels[hash_index] != NULL)
973 		chan->scd_next = registered_channels[hash_index];
974 
975 	registered_channels[hash_index] = chan;
976 
977 	++channel_cnt;
978 
979 	return (0);
980 }
981 
982 static void
983 close_channel(char *channel_name)
984 {
985 	int hash_index;
986 	sysevent_channel_descriptor_t *chan, *prev_chan;
987 
988 	/* Find channel descriptor */
989 	hash_index = CHANN_HASH(channel_name);
990 	prev_chan = chan = registered_channels[hash_index];
991 
992 	while (chan != NULL) {
993 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
994 			break;
995 		} else {
996 			prev_chan = chan;
997 			chan = chan->scd_next;
998 		}
999 	}
1000 
1001 	if (chan == NULL)
1002 		return;
1003 
1004 	chan->scd_ref_cnt--;
1005 	if (chan->scd_ref_cnt > 0)
1006 		return;
1007 
1008 	free_channel_registration(chan);
1009 	vmem_destroy(chan->scd_subscriber_cache);
1010 	vmem_destroy(chan->scd_publisher_cache);
1011 	kmem_free(chan->scd_channel_name,
1012 	    strlen(chan->scd_channel_name) + 1);
1013 	if (registered_channels[hash_index] == chan)
1014 		registered_channels[hash_index] = chan->scd_next;
1015 	else
1016 		prev_chan->scd_next = chan->scd_next;
1017 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1018 	--channel_cnt;
1019 }
1020 
1021 static id_t
1022 bind_common(sysevent_channel_descriptor_t *chan, int type)
1023 {
1024 	id_t id;
1025 
1026 	if (type == SUBSCRIBER) {
1027 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1028 		    VM_NOSLEEP | VM_NEXTFIT);
1029 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1030 			return (0);
1031 		chan->scd_subscriber_ids[id] = 1;
1032 	} else {
1033 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1034 		    VM_NOSLEEP | VM_NEXTFIT);
1035 		if (id <= 0 || id > MAX_PUBLISHERS)
1036 			return (0);
1037 		chan->scd_publisher_ids[id] = 1;
1038 	}
1039 
1040 	return (id);
1041 }
1042 
1043 static int
1044 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1045 {
1046 	if (type == SUBSCRIBER) {
1047 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1048 			return (0);
1049 		if (chan->scd_subscriber_ids[id] == 0)
1050 			return (0);
1051 		(void) remove_all_class(chan, id);
1052 		chan->scd_subscriber_ids[id] = 0;
1053 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1054 	} else {
1055 		if (id <= 0 || id > MAX_PUBLISHERS)
1056 			return (0);
1057 		if (chan->scd_publisher_ids[id] == 0)
1058 			return (0);
1059 		chan->scd_publisher_ids[id] = 0;
1060 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1061 	}
1062 
1063 	return (1);
1064 }
1065 
1066 static void
1067 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1068 {
1069 	if (unbind_common(chan, type, id))
1070 		close_channel(chan->scd_channel_name);
1071 }
1072 
1073 static subclass_lst_t *
1074 find_subclass(class_lst_t *c_list, char *subclass)
1075 {
1076 	subclass_lst_t *sc_list;
1077 
1078 	if (c_list == NULL)
1079 		return (NULL);
1080 
1081 	sc_list = c_list->cl_subclass_list;
1082 
1083 	while (sc_list != NULL) {
1084 		if (strcmp(sc_list->sl_name, subclass) == 0) {
1085 			return (sc_list);
1086 		}
1087 		sc_list = sc_list->sl_next;
1088 	}
1089 
1090 	return (NULL);
1091 }
1092 
1093 static void
1094 insert_subclass(class_lst_t *c_list, char **subclass_names,
1095 	int subclass_num, uint32_t sub_id)
1096 {
1097 	int i, subclass_sz;
1098 	subclass_lst_t *sc_list;
1099 
1100 	for (i = 0; i < subclass_num; ++i) {
1101 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1102 		    != NULL) {
1103 			sc_list->sl_num[sub_id] = 1;
1104 		} else {
1105 
1106 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1107 			    KM_SLEEP);
1108 			subclass_sz = strlen(subclass_names[i]) + 1;
1109 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1110 			bcopy(subclass_names[i], sc_list->sl_name,
1111 			    subclass_sz);
1112 
1113 			sc_list->sl_num[sub_id] = 1;
1114 
1115 			sc_list->sl_next = c_list->cl_subclass_list;
1116 			c_list->cl_subclass_list = sc_list;
1117 		}
1118 	}
1119 }
1120 
1121 static class_lst_t *
1122 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1123 {
1124 	class_lst_t *c_list;
1125 
1126 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1127 	while (c_list != NULL) {
1128 		if (strcmp(class_name, c_list->cl_name) == 0)
1129 			break;
1130 		c_list = c_list->cl_next;
1131 	}
1132 
1133 	return (c_list);
1134 }
1135 
1136 static void
1137 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1138 {
1139 	int i;
1140 	class_lst_t *c_list;
1141 	subclass_lst_t *sc_list;
1142 
1143 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1144 
1145 		c_list = chan->scd_class_list_tbl[i];
1146 		while (c_list != NULL) {
1147 			sc_list = c_list->cl_subclass_list;
1148 			while (sc_list != NULL) {
1149 				sc_list->sl_num[sub_id] = 0;
1150 				sc_list = sc_list->sl_next;
1151 			}
1152 			c_list = c_list->cl_next;
1153 		}
1154 	}
1155 }
1156 
1157 static void
1158 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1159 	char *class_name)
1160 {
1161 	class_lst_t *c_list;
1162 	subclass_lst_t *sc_list;
1163 
1164 	if (strcmp(class_name, EC_ALL) == 0) {
1165 		remove_all_class(chan, sub_id);
1166 		return;
1167 	}
1168 
1169 	if ((c_list = find_class(chan, class_name)) == NULL) {
1170 		return;
1171 	}
1172 
1173 	sc_list = c_list->cl_subclass_list;
1174 	while (sc_list != NULL) {
1175 		sc_list->sl_num[sub_id] = 0;
1176 		sc_list = sc_list->sl_next;
1177 	}
1178 }
1179 
1180 static int
1181 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1182 	char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1183 {
1184 	class_lst_t *c_list;
1185 
1186 	if (strcmp(event_class, EC_ALL) == 0) {
1187 		insert_subclass(chan->scd_class_list_tbl[0],
1188 		    event_subclass_lst, 1, sub_id);
1189 		return (0);
1190 	}
1191 
1192 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1193 		return (-1);
1194 
1195 	/* New class, add to the registration cache */
1196 	if ((c_list = find_class(chan, event_class)) == NULL) {
1197 		c_list = create_channel_registration(chan, event_class,
1198 		    CLASS_HASH(event_class));
1199 	}
1200 
1201 	/* Update the subclass list */
1202 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1203 
1204 	return (0);
1205 }
1206 
1207 static int
1208 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1209 	char *nvlbuf, size_t nvlsize)
1210 {
1211 	uint_t num_elem;
1212 	char *event_class;
1213 	char **event_list;
1214 	nvlist_t *nvl;
1215 	nvpair_t *nvpair = NULL;
1216 
1217 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1218 		return (-1);
1219 
1220 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1221 		nvlist_free(nvl);
1222 		return (-1);
1223 	}
1224 
1225 	if ((event_class = nvpair_name(nvpair)) == NULL) {
1226 		nvlist_free(nvl);
1227 		return (-1);
1228 	}
1229 	if (nvpair_value_string_array(nvpair, &event_list,
1230 	    &num_elem) != 0) {
1231 		nvlist_free(nvl);
1232 		return (-1);
1233 	}
1234 
1235 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1236 		nvlist_free(nvl);
1237 		return (-1);
1238 	}
1239 
1240 	nvlist_free(nvl);
1241 
1242 	return (0);
1243 }
1244 
1245 /*
1246  * get_registration - Return the requested class hash chain
1247  */
1248 static int
1249 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1250 	uint32_t *bufsz, uint32_t class_index)
1251 {
1252 	int num_classes = 0;
1253 	char *nvlbuf = NULL;
1254 	size_t nvlsize;
1255 	nvlist_t *nvl;
1256 	class_lst_t *clist;
1257 	subclass_lst_t *sc_list;
1258 
1259 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1260 		return (EINVAL);
1261 
1262 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1263 		return (ENOENT);
1264 	}
1265 
1266 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1267 		return (EFAULT);
1268 	}
1269 
1270 	while (clist != NULL) {
1271 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1272 		    != 0) {
1273 			nvlist_free(nvl);
1274 			return (EFAULT);
1275 		}
1276 
1277 		sc_list = clist->cl_subclass_list;
1278 		while (sc_list != NULL) {
1279 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1280 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1281 				nvlist_free(nvl);
1282 				return (EFAULT);
1283 			}
1284 			sc_list = sc_list->sl_next;
1285 		}
1286 		num_classes++;
1287 		clist = clist->cl_next;
1288 	}
1289 
1290 	if (num_classes == 0) {
1291 		nvlist_free(nvl);
1292 		return (ENOENT);
1293 	}
1294 
1295 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1296 	    KM_SLEEP)
1297 	    != 0) {
1298 		nvlist_free(nvl);
1299 		return (EFAULT);
1300 	}
1301 
1302 	nvlist_free(nvl);
1303 
1304 	if (nvlsize > *bufsz) {
1305 		kmem_free(nvlbuf, nvlsize);
1306 		*bufsz = nvlsize;
1307 		return (EAGAIN);
1308 	}
1309 
1310 	bcopy(nvlbuf, databuf, nvlsize);
1311 	kmem_free(nvlbuf, nvlsize);
1312 
1313 	return (0);
1314 }
1315 
1316 /*
1317  * log_sysevent_register - Register event subscriber for a particular
1318  *		event channel.
1319  */
1320 int
1321 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1322 {
1323 	int error = 0;
1324 	char *kchannel, *databuf = NULL;
1325 	size_t bufsz;
1326 	se_pubsub_t kdata;
1327 	sysevent_channel_descriptor_t *chan;
1328 
1329 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1330 		return (EFAULT);
1331 	}
1332 	if (kdata.ps_channel_name_len == 0) {
1333 		return (EINVAL);
1334 	}
1335 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1336 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1337 		kmem_free(kchannel, kdata.ps_channel_name_len);
1338 		return (EFAULT);
1339 	}
1340 	bufsz = kdata.ps_buflen;
1341 	if (bufsz > 0) {
1342 		databuf = kmem_alloc(bufsz, KM_SLEEP);
1343 		if (copyin(udatabuf, databuf, bufsz) == -1) {
1344 			kmem_free(kchannel, kdata.ps_channel_name_len);
1345 			kmem_free(databuf, bufsz);
1346 			return (EFAULT);
1347 		}
1348 	}
1349 
1350 	mutex_enter(&registered_channel_mutex);
1351 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1352 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1353 		chan = get_channel(kchannel);
1354 		if (chan == NULL) {
1355 			mutex_exit(&registered_channel_mutex);
1356 			kmem_free(kchannel, kdata.ps_channel_name_len);
1357 			if (bufsz > 0)
1358 				kmem_free(databuf, bufsz);
1359 			return (ENOENT);
1360 		}
1361 	}
1362 
1363 	switch (kdata.ps_op) {
1364 	case SE_OPEN_REGISTRATION:
1365 		if (open_channel(kchannel) != 0) {
1366 			error = ENOMEM;
1367 			if (bufsz > 0)
1368 				kmem_free(databuf, bufsz);
1369 			kmem_free(kchannel, kdata.ps_channel_name_len);
1370 		}
1371 
1372 		mutex_exit(&registered_channel_mutex);
1373 		return (error);
1374 	case SE_CLOSE_REGISTRATION:
1375 		close_channel(kchannel);
1376 		break;
1377 	case SE_BIND_REGISTRATION:
1378 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1379 			error = EBUSY;
1380 		break;
1381 	case SE_UNBIND_REGISTRATION:
1382 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1383 		break;
1384 	case SE_REGISTER:
1385 		if (bufsz == 0) {
1386 			error = EINVAL;
1387 			break;
1388 		}
1389 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1390 			error = EINVAL;
1391 		break;
1392 	case SE_UNREGISTER:
1393 		if (bufsz == 0) {
1394 			error = EINVAL;
1395 			break;
1396 		}
1397 		remove_class(chan, kdata.ps_id, databuf);
1398 		break;
1399 	case SE_CLEANUP:
1400 		/* Cleanup the indicated subscriber or publisher */
1401 		release_id(chan, kdata.ps_type, kdata.ps_id);
1402 		break;
1403 	case SE_GET_REGISTRATION:
1404 		error = get_registration(chan, databuf,
1405 		    &kdata.ps_buflen, kdata.ps_id);
1406 		break;
1407 	default:
1408 		error = ENOTSUP;
1409 	}
1410 
1411 	mutex_exit(&registered_channel_mutex);
1412 
1413 	kmem_free(kchannel, kdata.ps_channel_name_len);
1414 
1415 	if (bufsz > 0) {
1416 		if (copyout(databuf, udatabuf, bufsz) == -1)
1417 			error = EFAULT;
1418 		kmem_free(databuf, bufsz);
1419 	}
1420 
1421 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1422 		return (EFAULT);
1423 
1424 	return (error);
1425 }
1426 
1427 /*
1428  * log_sysevent_copyout_data - Copyout event data to userland.
1429  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1430  *			The buffer size is always sufficient.
1431  */
1432 int
1433 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1434 {
1435 	int error = ENOENT;
1436 	log_eventq_t *q;
1437 	sysevent_t *ev;
1438 	sysevent_id_t eid_copy;
1439 
1440 	/*
1441 	 * Copy eid
1442 	 */
1443 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1444 		return (EFAULT);
1445 	}
1446 
1447 	mutex_enter(&eventq_sent_mutex);
1448 	q = log_eventq_sent;
1449 
1450 	/*
1451 	 * Search for event buffer on the sent queue with matching
1452 	 * event identifier
1453 	 */
1454 	while (q) {
1455 		ev = (sysevent_t *)&q->arg.buf;
1456 
1457 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1458 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1459 			q = q->next;
1460 			continue;
1461 		}
1462 
1463 		if (ubuflen < SE_SIZE(ev)) {
1464 			error = EFAULT;
1465 			break;
1466 		}
1467 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1468 			error = EFAULT;
1469 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1470 			    "0x%" PRIx64 " from queue: EFAULT\n",
1471 			    eid->eid_seq));
1472 		} else {
1473 			error = 0;
1474 		}
1475 		break;
1476 	}
1477 
1478 	mutex_exit(&eventq_sent_mutex);
1479 
1480 	return (error);
1481 }
1482 
1483 /*
1484  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1485  *			by eid (must have already been sent).  Called from
1486  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1487  */
1488 int
1489 log_sysevent_free_data(sysevent_id_t *eid)
1490 {
1491 	int error = ENOENT;
1492 	sysevent_t *ev;
1493 	log_eventq_t *q, *prev = NULL;
1494 	sysevent_id_t eid_copy;
1495 
1496 	/*
1497 	 * Copy eid
1498 	 */
1499 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1500 		return (EFAULT);
1501 	}
1502 
1503 	mutex_enter(&eventq_sent_mutex);
1504 	q = log_eventq_sent;
1505 
1506 	/*
1507 	 * Look for the event to be freed on the sent queue.  Due to delayed
1508 	 * processing of the event, it may not be on the sent queue yet.
1509 	 * It is up to the user to retry the free operation to ensure that the
1510 	 * event is properly freed.
1511 	 */
1512 	while (q) {
1513 		ev = (sysevent_t *)&q->arg.buf;
1514 
1515 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1516 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1517 			prev = q;
1518 			q = q->next;
1519 			continue;
1520 		}
1521 		/*
1522 		 * Take it out of log_eventq_sent and free it
1523 		 */
1524 		if (prev) {
1525 			prev->next = q->next;
1526 		} else {
1527 			log_eventq_sent = q->next;
1528 		}
1529 		free_packed_event(ev);
1530 		error = 0;
1531 		break;
1532 	}
1533 
1534 	mutex_exit(&eventq_sent_mutex);
1535 
1536 	return (error);
1537 }
1538 
1539 /*
1540  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1541  *			create log_event_deliver thread or wake it up
1542  */
1543 /*ARGSUSED*/
1544 void
1545 log_sysevent_flushq(int cmd, uint_t flag)
1546 {
1547 	mutex_enter(&eventq_head_mutex);
1548 
1549 	/*
1550 	 * Start the event delivery thread
1551 	 * Mark the upcall status as active since we should
1552 	 * now be able to begin emptying the queue normally.
1553 	 */
1554 	if (!async_thread) {
1555 		sysevent_upcall_status = 0;
1556 		sysevent_daemon_init = 1;
1557 		setup_ddi_poststartup();
1558 		async_thread = thread_create(NULL, 0, log_event_deliver,
1559 		    NULL, 0, &p0, TS_RUN, minclsyspri);
1560 	}
1561 
1562 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1563 	cv_signal(&log_event_cv);
1564 	mutex_exit(&eventq_head_mutex);
1565 }
1566 
1567 /*
1568  * log_sysevent_filename - Called by syseventd via
1569  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1570  *			to subsequently bind the event_door.
1571  *
1572  *			This routine is called everytime syseventd (re)starts
1573  *			and must therefore replay any events buffers that have
1574  *			been sent but not freed.
1575  *
1576  *			Event buffer delivery begins after a call to
1577  *			log_sysevent_flushq().
1578  */
1579 int
1580 log_sysevent_filename(char *file)
1581 {
1582 	mutex_enter(&event_door_mutex);
1583 
1584 	(void) strlcpy(logevent_door_upcall_filename, file,
1585 	    sizeof (logevent_door_upcall_filename));
1586 
1587 	/* Unbind old event door */
1588 	if (event_door != NULL)
1589 		door_ki_rele(event_door);
1590 	/* Establish door connection with user event daemon (syseventd) */
1591 	if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0)
1592 		event_door = NULL;
1593 
1594 	mutex_exit(&event_door_mutex);
1595 
1596 	/*
1597 	 * We are called when syseventd restarts. Move all sent, but
1598 	 * not committed events from log_eventq_sent to log_eventq_head.
1599 	 * Do it in proper order to maintain increasing event id.
1600 	 */
1601 	mutex_enter(&eventq_head_mutex);
1602 
1603 	mutex_enter(&eventq_sent_mutex);
1604 	while (log_eventq_sent) {
1605 		log_eventq_t *tmp = log_eventq_sent->next;
1606 		log_eventq_sent->next = log_eventq_head;
1607 		if (log_eventq_head == NULL) {
1608 			ASSERT(log_eventq_cnt == 0);
1609 			log_eventq_tail = log_eventq_sent;
1610 			log_eventq_tail->next = NULL;
1611 		} else if (log_eventq_head == log_eventq_tail) {
1612 			ASSERT(log_eventq_cnt == 1);
1613 			ASSERT(log_eventq_head->next == NULL);
1614 			ASSERT(log_eventq_tail->next == NULL);
1615 		}
1616 		log_eventq_head = log_eventq_sent;
1617 		log_eventq_sent = tmp;
1618 		log_eventq_cnt++;
1619 	}
1620 	mutex_exit(&eventq_sent_mutex);
1621 	mutex_exit(&eventq_head_mutex);
1622 
1623 	return (0);
1624 }
1625 
1626 /*
1627  * queue_sysevent - queue an event buffer
1628  */
1629 static int
1630 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1631 {
1632 	log_eventq_t *q;
1633 
1634 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1635 
1636 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1637 
1638 restart:
1639 
1640 	/* Max Q size exceeded */
1641 	mutex_enter(&event_qfull_mutex);
1642 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1643 		/*
1644 		 * If queue full and transport down, return no transport
1645 		 */
1646 		if (sysevent_upcall_status != 0) {
1647 			mutex_exit(&event_qfull_mutex);
1648 			free_packed_event(ev);
1649 			eid->eid_seq = UINT64_C(0);
1650 			eid->eid_ts = INT64_C(0);
1651 			return (SE_NO_TRANSPORT);
1652 		}
1653 		if (flag == SE_NOSLEEP) {
1654 			mutex_exit(&event_qfull_mutex);
1655 			free_packed_event(ev);
1656 			eid->eid_seq = UINT64_C(0);
1657 			eid->eid_ts = INT64_C(0);
1658 			return (SE_EQSIZE);
1659 		}
1660 		event_qfull_blocked++;
1661 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1662 		event_qfull_blocked--;
1663 		mutex_exit(&event_qfull_mutex);
1664 		goto restart;
1665 	}
1666 	mutex_exit(&event_qfull_mutex);
1667 
1668 	mutex_enter(&eventq_head_mutex);
1669 
1670 	/* Time stamp and assign ID */
1671 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1672 	    (uint64_t)1);
1673 	SE_TIME(ev) = eid->eid_ts = gethrtime();
1674 
1675 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1676 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1677 
1678 	/*
1679 	 * Put event on eventq
1680 	 */
1681 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1682 	q->next = NULL;
1683 	if (log_eventq_head == NULL) {
1684 		ASSERT(log_eventq_cnt == 0);
1685 		log_eventq_head = q;
1686 		log_eventq_tail = q;
1687 	} else {
1688 		if (log_eventq_head == log_eventq_tail) {
1689 			ASSERT(log_eventq_cnt == 1);
1690 			ASSERT(log_eventq_head->next == NULL);
1691 			ASSERT(log_eventq_tail->next == NULL);
1692 		}
1693 		log_eventq_tail->next = q;
1694 		log_eventq_tail = q;
1695 	}
1696 	log_eventq_cnt++;
1697 
1698 	/* Signal event delivery thread */
1699 	if (log_eventq_cnt == 1) {
1700 		cv_signal(&log_event_cv);
1701 	}
1702 	mutex_exit(&eventq_head_mutex);
1703 
1704 	return (0);
1705 }
1706 
1707 /*
1708  * log_sysevent - kernel system event logger.
1709  *
1710  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1711  * maximum event queue size will be exceeded
1712  * Returns 0 for successfully queued event buffer
1713  */
1714 int
1715 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1716 {
1717 	sysevent_t *ev_copy;
1718 	int rval;
1719 
1720 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1721 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1722 
1723 	ev_copy = se_repack(ev, flag);
1724 	if (ev_copy == NULL) {
1725 		ASSERT(flag == SE_NOSLEEP);
1726 		return (SE_ENOMEM);
1727 	}
1728 	rval = queue_sysevent(ev_copy, eid, flag);
1729 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1730 	    rval == SE_NO_TRANSPORT);
1731 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1732 	return (rval);
1733 }
1734 
1735 /*
1736  * log_usr_sysevent - user system event logger
1737  *			Private to devfsadm and accessible only via
1738  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1739  */
1740 int
1741 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1742 {
1743 	int ret, copy_sz;
1744 	sysevent_t *ev_copy;
1745 	sysevent_id_t new_eid;
1746 	log_eventq_t *qcopy;
1747 
1748 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1749 	    offsetof(log_event_upcall_arg_t, buf);
1750 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1751 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1752 
1753 	/*
1754 	 * Copy event
1755 	 */
1756 	if (copyin(ev, ev_copy, ev_size) == -1) {
1757 		kmem_free(qcopy, copy_sz);
1758 		return (EFAULT);
1759 	}
1760 
1761 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1762 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1763 			return (EAGAIN);
1764 		else
1765 			return (EIO);
1766 	}
1767 
1768 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1769 		return (EFAULT);
1770 	}
1771 
1772 	return (0);
1773 }
1774 
1775 
1776 
1777 int
1778 ddi_log_sysevent(
1779 	dev_info_t		*dip,
1780 	char			*vendor,
1781 	char			*class,
1782 	char			*subclass,
1783 	nvlist_t		*attr_list,
1784 	sysevent_id_t		*eidp,
1785 	int			sleep_flag)
1786 {
1787 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1788 	char			pubstr[32];
1789 	sysevent_t		*event;
1790 	sysevent_id_t		eid;
1791 	const char		*drvname;
1792 	char			*publisher;
1793 	int			se_flag;
1794 	int			rval;
1795 	int			n;
1796 
1797 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1798 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1799 		    "event from interrupt context with sleep semantics\n",
1800 		    ddi_driver_name(dip), ddi_get_instance(dip));
1801 		return (DDI_ECONTEXT);
1802 	}
1803 
1804 	drvname = ddi_driver_name(dip);
1805 	n = strlen(vendor) + strlen(drvname) + 7;
1806 	if (n < sizeof (pubstr)) {
1807 		publisher = pubstr;
1808 	} else {
1809 		publisher = kmem_alloc(n,
1810 		    (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1811 		if (publisher == NULL) {
1812 			return (DDI_ENOMEM);
1813 		}
1814 	}
1815 	(void) strcpy(publisher, vendor);
1816 	(void) strcat(publisher, ":kern:");
1817 	(void) strcat(publisher, drvname);
1818 
1819 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1820 	event = sysevent_alloc(class, subclass, publisher, se_flag);
1821 
1822 	if (publisher != pubstr) {
1823 		kmem_free(publisher, n);
1824 	}
1825 
1826 	if (event == NULL) {
1827 		return (DDI_ENOMEM);
1828 	}
1829 
1830 	if (list) {
1831 		(void) sysevent_attach_attributes(event, list);
1832 	}
1833 
1834 	rval = log_sysevent(event, se_flag, &eid);
1835 	if (list) {
1836 		sysevent_detach_attributes(event);
1837 	}
1838 	sysevent_free(event);
1839 	if (rval == 0) {
1840 		if (eidp) {
1841 			eidp->eid_seq = eid.eid_seq;
1842 			eidp->eid_ts = eid.eid_ts;
1843 		}
1844 		return (DDI_SUCCESS);
1845 	}
1846 	if (rval == SE_NO_TRANSPORT)
1847 		return (DDI_ETRANSPORT);
1848 
1849 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1850 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1851 }
1852 
1853 uint64_t
1854 log_sysevent_new_id(void)
1855 {
1856 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1857 }
1858