xref: /titanic_52/usr/src/uts/common/os/log_sysevent.c (revision a71a9b4041b747842ca055046d2e2b9c3564a4a8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright 2016 Toomas Soome <tsoome@me.com>
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/stropts.h>
30 #include <sys/debug.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/vmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/callb.h>
36 #include <sys/sysevent.h>
37 #include <sys/sysevent_impl.h>
38 #include <sys/sysevent/dev.h>
39 #include <sys/modctl.h>
40 #include <sys/sysmacros.h>
41 #include <sys/disp.h>
42 #include <sys/autoconf.h>
43 #include <sys/atomic.h>
44 #include <sys/sdt.h>
45 
46 /* for doors */
47 #include <sys/pathname.h>
48 #include <sys/door.h>
49 #include <sys/kmem.h>
50 #include <sys/cpuvar.h>
51 #include <sys/fs/snode.h>
52 
53 /*
54  * log_sysevent.c - Provides the interfaces for kernel event publication
55  *			to the sysevent event daemon (syseventd).
56  */
57 
58 /*
59  * Debug stuff
60  */
61 static int log_event_debug = 0;
62 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
63 #ifdef DEBUG
64 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
65 #else
66 #define	LOG_DEBUG1(args)
67 #endif
68 
69 /*
70  * Local static vars
71  */
72 /* queue of event buffers sent to syseventd */
73 static log_eventq_t *log_eventq_sent = NULL;
74 
75 /*
76  * Count of event buffers in the queue
77  */
78 int log_eventq_cnt = 0;
79 
80 /* queue of event buffers awaiting delivery to syseventd */
81 static log_eventq_t *log_eventq_head = NULL;
82 static log_eventq_t *log_eventq_tail = NULL;
83 static uint64_t kernel_event_id = 0;
84 static int encoding = NV_ENCODE_NATIVE;
85 
86 /* log event delivery flag */
87 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
88 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
89 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
90 
91 /*
92  * Tunable maximum event buffer queue size. Size depends on how many events
93  * the queue must hold when syseventd is not available, for example during
94  * system startup. Experience showed that more than 2000 events could be posted
95  * due to correctable memory errors.
96  */
97 int logevent_max_q_sz = 5000;
98 
99 
100 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
101 static char logevent_door_upcall_filename[MAXPATHLEN];
102 
103 static door_handle_t event_door = NULL;		/* Door for upcalls */
104 static kmutex_t event_door_mutex;		/* To protect event_door */
105 
106 /*
107  * async thread-related variables
108  *
109  * eventq_head_mutex - synchronizes access to the kernel event queue
110  *
111  * eventq_sent_mutex - synchronizes access to the queue of event sents to
112  *			userlevel
113  *
114  * log_event_cv - condition variable signaled when an event has arrived or
115  *			userlevel ready to process event buffers
116  *
117  * async_thread - asynchronous event delivery thread to userlevel daemon.
118  *
119  * sysevent_upcall_status - status of the door upcall link
120  */
121 static kmutex_t eventq_head_mutex;
122 static kmutex_t eventq_sent_mutex;
123 static kcondvar_t log_event_cv;
124 static kthread_id_t async_thread = NULL;
125 
126 static kmutex_t event_qfull_mutex;
127 static kcondvar_t event_qfull_cv;
128 static int event_qfull_blocked = 0;
129 
130 static int sysevent_upcall_status = -1;
131 static kmutex_t registered_channel_mutex;
132 
133 /*
134  * Indicates the syseventd daemon has begun taking events
135  */
136 int sysevent_daemon_init = 0;
137 
138 /*
139  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
140  * caused by the server process doing a forkall().  Since all threads
141  * but the thread actually doing the forkall() need to be quiesced,
142  * the fork may take some time.  The min/max pause are in units
143  * of clock ticks.
144  */
145 #define	LOG_EVENT_MIN_PAUSE	8
146 #define	LOG_EVENT_MAX_PAUSE	128
147 
148 static kmutex_t	event_pause_mutex;
149 static kcondvar_t event_pause_cv;
150 static int event_pause_state = 0;
151 
152 /*ARGSUSED*/
153 static void
154 log_event_busy_timeout(void *arg)
155 {
156 	mutex_enter(&event_pause_mutex);
157 	event_pause_state = 0;
158 	cv_signal(&event_pause_cv);
159 	mutex_exit(&event_pause_mutex);
160 }
161 
162 static void
163 log_event_pause(int nticks)
164 {
165 	timeout_id_t id;
166 
167 	/*
168 	 * Only one use of log_event_pause at a time
169 	 */
170 	ASSERT(event_pause_state == 0);
171 
172 	event_pause_state = 1;
173 	id = timeout(log_event_busy_timeout, NULL, nticks);
174 	if (id != 0) {
175 		mutex_enter(&event_pause_mutex);
176 		while (event_pause_state)
177 			cv_wait(&event_pause_cv, &event_pause_mutex);
178 		mutex_exit(&event_pause_mutex);
179 	}
180 	event_pause_state = 0;
181 }
182 
183 
184 /*
185  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
186  * 			Check for rebinding errors
187  * 			This buffer is reused to by the syseventd door_return
188  *			to hold the result code
189  */
190 static int
191 log_event_upcall(log_event_upcall_arg_t *arg)
192 {
193 	int error;
194 	size_t size;
195 	sysevent_t *ev;
196 	door_arg_t darg, save_arg;
197 	int retry;
198 	int neagain = 0;
199 	int neintr = 0;
200 	int nticks = LOG_EVENT_MIN_PAUSE;
201 
202 	/* Initialize door args */
203 	ev = (sysevent_t *)&arg->buf;
204 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
205 
206 	darg.rbuf = (char *)arg;
207 	darg.data_ptr = (char *)arg;
208 	darg.rsize = size;
209 	darg.data_size = size;
210 	darg.desc_ptr = NULL;
211 	darg.desc_num = 0;
212 
213 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
214 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
215 
216 	save_arg = darg;
217 	for (retry = 0; ; retry++) {
218 
219 		mutex_enter(&event_door_mutex);
220 		if (event_door == NULL) {
221 			mutex_exit(&event_door_mutex);
222 
223 			return (EBADF);
224 		}
225 
226 		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
227 		    SIZE_MAX, 0)) == 0) {
228 			mutex_exit(&event_door_mutex);
229 			break;
230 		}
231 
232 		/*
233 		 * EBADF is handled outside the switch below because we need to
234 		 * hold event_door_mutex a bit longer
235 		 */
236 		if (error == EBADF) {
237 			/* Server died */
238 			door_ki_rele(event_door);
239 			event_door = NULL;
240 
241 			mutex_exit(&event_door_mutex);
242 			return (error);
243 		}
244 
245 		mutex_exit(&event_door_mutex);
246 
247 		/*
248 		 * The EBADF case is already handled above with event_door_mutex
249 		 * held
250 		 */
251 		switch (error) {
252 		case EINTR:
253 			neintr++;
254 			log_event_pause(2);
255 			darg = save_arg;
256 			break;
257 		case EAGAIN:
258 			/* cannot deliver upcall - process may be forking */
259 			neagain++;
260 			log_event_pause(nticks);
261 			nticks <<= 1;
262 			if (nticks > LOG_EVENT_MAX_PAUSE)
263 				nticks = LOG_EVENT_MAX_PAUSE;
264 			darg = save_arg;
265 			break;
266 		default:
267 			cmn_err(CE_CONT,
268 			    "log_event_upcall: door_ki_upcall error %d\n",
269 			    error);
270 			return (error);
271 		}
272 	}
273 
274 	if (neagain > 0 || neintr > 0) {
275 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
276 		    neagain, neintr, nticks));
277 	}
278 
279 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
280 	    "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
281 	    error, (void *)arg, (void *)darg.rbuf,
282 	    (void *)darg.data_ptr,
283 	    *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
284 
285 	if (!error) {
286 		/*
287 		 * upcall was successfully executed. Check return code.
288 		 */
289 		error = *((int *)(darg.rbuf));
290 	}
291 
292 	return (error);
293 }
294 
295 /*
296  * log_event_deliver - event delivery thread
297  *			Deliver all events on the event queue to syseventd.
298  *			If the daemon can not process events, stop event
299  *			delivery and wait for an indication from the
300  *			daemon to resume delivery.
301  *
302  *			Once all event buffers have been delivered, wait
303  *			until there are more to deliver.
304  */
305 static void
306 log_event_deliver()
307 {
308 	log_eventq_t *q;
309 	int upcall_err;
310 	callb_cpr_t cprinfo;
311 
312 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
313 	    "logevent");
314 
315 	/*
316 	 * eventq_head_mutex is exited (released) when there are no more
317 	 * events to process from the eventq in cv_wait().
318 	 */
319 	mutex_enter(&eventq_head_mutex);
320 
321 	for (;;) {
322 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
323 		    (void *)log_eventq_head));
324 
325 		upcall_err = 0;
326 		q = log_eventq_head;
327 
328 		while (q) {
329 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
330 				upcall_err = EAGAIN;
331 				break;
332 			}
333 
334 			log_event_delivery = LOGEVENT_DELIVERY_OK;
335 
336 			/*
337 			 * Release event queue lock during upcall to
338 			 * syseventd
339 			 */
340 			mutex_exit(&eventq_head_mutex);
341 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
342 				mutex_enter(&eventq_head_mutex);
343 				break;
344 			}
345 
346 			/*
347 			 * We may be able to add entries to
348 			 * the queue now.
349 			 */
350 			if (event_qfull_blocked > 0 &&
351 			    log_eventq_cnt < logevent_max_q_sz) {
352 				mutex_enter(&event_qfull_mutex);
353 				if (event_qfull_blocked > 0) {
354 					cv_signal(&event_qfull_cv);
355 				}
356 				mutex_exit(&event_qfull_mutex);
357 			}
358 
359 			mutex_enter(&eventq_head_mutex);
360 
361 			/*
362 			 * Daemon restart can cause entries to be moved from
363 			 * the sent queue and put back on the event queue.
364 			 * If this has occurred, replay event queue
365 			 * processing from the new queue head.
366 			 */
367 			if (q != log_eventq_head) {
368 				q = log_eventq_head;
369 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
370 				    "door upcall/daemon restart race\n"));
371 			} else {
372 				log_eventq_t *next;
373 
374 				/*
375 				 * Move the event to the sent queue when a
376 				 * successful delivery has been made.
377 				 */
378 				mutex_enter(&eventq_sent_mutex);
379 				next = q->next;
380 				q->next = log_eventq_sent;
381 				log_eventq_sent = q;
382 				q = next;
383 				log_eventq_head = q;
384 				log_eventq_cnt--;
385 				if (q == NULL) {
386 					ASSERT(log_eventq_cnt == 0);
387 					log_eventq_tail = NULL;
388 				}
389 				mutex_exit(&eventq_sent_mutex);
390 			}
391 		}
392 
393 		switch (upcall_err) {
394 		case 0:
395 			/*
396 			 * Success. The queue is empty.
397 			 */
398 			sysevent_upcall_status = 0;
399 			break;
400 		case EAGAIN:
401 			/*
402 			 * Delivery is on hold (but functional).
403 			 */
404 			sysevent_upcall_status = 0;
405 			/*
406 			 * If the user has already signaled for delivery
407 			 * resumption, continue.  Otherwise, we wait until
408 			 * we are signaled to continue.
409 			 */
410 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT)
411 				continue;
412 			log_event_delivery = LOGEVENT_DELIVERY_HOLD;
413 
414 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
415 			break;
416 		default:
417 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
418 			    "upcall err %d\n", upcall_err));
419 			sysevent_upcall_status = upcall_err;
420 			/*
421 			 * Signal everyone waiting that transport is down
422 			 */
423 			if (event_qfull_blocked > 0) {
424 				mutex_enter(&event_qfull_mutex);
425 				if (event_qfull_blocked > 0) {
426 					cv_broadcast(&event_qfull_cv);
427 				}
428 				mutex_exit(&event_qfull_mutex);
429 			}
430 			break;
431 		}
432 
433 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
434 		cv_wait(&log_event_cv, &eventq_head_mutex);
435 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
436 	}
437 	/* NOTREACHED */
438 }
439 
440 /*
441  * log_event_init - Allocate and initialize log_event data structures.
442  */
443 void
444 log_event_init()
445 {
446 	mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL);
447 
448 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
449 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
450 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
451 
452 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
453 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
454 
455 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
456 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
457 
458 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
459 	sysevent_evc_init();
460 }
461 
462 /*
463  * The following routines are used by kernel event publishers to
464  * allocate, append and free event buffers
465  */
466 /*
467  * sysevent_alloc - Allocate new eventq struct.  This element contains
468  *			an event buffer that will be used in a subsequent
469  *			call to log_sysevent.
470  */
471 sysevent_t *
472 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
473 {
474 	int payload_sz;
475 	int class_sz, subclass_sz, pub_sz;
476 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
477 	sysevent_t *ev;
478 	log_eventq_t *q;
479 
480 	ASSERT(class != NULL);
481 	ASSERT(subclass != NULL);
482 	ASSERT(pub != NULL);
483 
484 	/*
485 	 * Calculate and reserve space for the class, subclass and
486 	 * publisher strings in the event buffer
487 	 */
488 	class_sz = strlen(class) + 1;
489 	subclass_sz = strlen(subclass) + 1;
490 	pub_sz = strlen(pub) + 1;
491 
492 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
493 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
494 
495 	/* String sizes must be 64-bit aligned in the event buffer */
496 	aligned_class_sz = SE_ALIGN(class_sz);
497 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
498 	aligned_pub_sz = SE_ALIGN(pub_sz);
499 
500 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
501 	    (aligned_subclass_sz - sizeof (uint64_t)) +
502 	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
503 
504 	/*
505 	 * Allocate event buffer plus additional sysevent queue
506 	 * and payload overhead.
507 	 */
508 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
509 	if (q == NULL) {
510 		return (NULL);
511 	}
512 
513 	/* Initialize the event buffer data */
514 	ev = (sysevent_t *)&q->arg.buf;
515 	SE_VERSION(ev) = SYS_EVENT_VERSION;
516 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
517 
518 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
519 		+ aligned_class_sz;
520 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
521 
522 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
523 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
524 
525 	SE_ATTR_PTR(ev) = UINT64_C(0);
526 	SE_PAYLOAD_SZ(ev) = payload_sz;
527 
528 	return (ev);
529 }
530 
531 /*
532  * sysevent_free - Free event buffer and any attribute data.
533  */
534 void
535 sysevent_free(sysevent_t *ev)
536 {
537 	log_eventq_t *q;
538 	nvlist_t *nvl;
539 
540 	ASSERT(ev != NULL);
541 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
542 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
543 
544 	if (nvl != NULL) {
545 		size_t size = 0;
546 		(void) nvlist_size(nvl, &size, encoding);
547 		SE_PAYLOAD_SZ(ev) -= size;
548 		nvlist_free(nvl);
549 	}
550 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
551 }
552 
553 /*
554  * free_packed_event - Free packed event buffer
555  */
556 static void
557 free_packed_event(sysevent_t *ev)
558 {
559 	log_eventq_t *q;
560 
561 	ASSERT(ev != NULL);
562 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
563 
564 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
565 }
566 
567 /*
568  * sysevent_add_attr - Add new attribute element to an event attribute list
569  *			If attribute list is NULL, start a new list.
570  */
571 int
572 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
573     sysevent_value_t *se_value, int flag)
574 {
575 	int error;
576 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
577 
578 	if (nvlp == NULL || se_value == NULL) {
579 		return (SE_EINVAL);
580 	}
581 
582 	/*
583 	 * attr_sz is composed of the value data size + the name data size +
584 	 * any header data.  64-bit aligned.
585 	 */
586 	if (strlen(name) >= MAX_ATTR_NAME) {
587 		return (SE_EINVAL);
588 	}
589 
590 	/*
591 	 * Allocate nvlist
592 	 */
593 	if ((*nvlp == NULL) &&
594 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
595 		return (SE_ENOMEM);
596 
597 	/* add the attribute */
598 	switch (se_value->value_type) {
599 	case SE_DATA_TYPE_BYTE:
600 		error = nvlist_add_byte(*ev_attr_list, name,
601 		    se_value->value.sv_byte);
602 		break;
603 	case SE_DATA_TYPE_INT16:
604 		error = nvlist_add_int16(*ev_attr_list, name,
605 		    se_value->value.sv_int16);
606 		break;
607 	case SE_DATA_TYPE_UINT16:
608 		error = nvlist_add_uint16(*ev_attr_list, name,
609 		    se_value->value.sv_uint16);
610 		break;
611 	case SE_DATA_TYPE_INT32:
612 		error = nvlist_add_int32(*ev_attr_list, name,
613 		    se_value->value.sv_int32);
614 		break;
615 	case SE_DATA_TYPE_UINT32:
616 		error = nvlist_add_uint32(*ev_attr_list, name,
617 		    se_value->value.sv_uint32);
618 		break;
619 	case SE_DATA_TYPE_INT64:
620 		error = nvlist_add_int64(*ev_attr_list, name,
621 		    se_value->value.sv_int64);
622 		break;
623 	case SE_DATA_TYPE_UINT64:
624 		error = nvlist_add_uint64(*ev_attr_list, name,
625 		    se_value->value.sv_uint64);
626 		break;
627 	case SE_DATA_TYPE_STRING:
628 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
629 			return (SE_EINVAL);
630 		error = nvlist_add_string(*ev_attr_list, name,
631 		    se_value->value.sv_string);
632 		break;
633 	case SE_DATA_TYPE_BYTES:
634 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
635 			return (SE_EINVAL);
636 		error = nvlist_add_byte_array(*ev_attr_list, name,
637 		    se_value->value.sv_bytes.data,
638 		    se_value->value.sv_bytes.size);
639 		break;
640 	case SE_DATA_TYPE_TIME:
641 		error = nvlist_add_hrtime(*ev_attr_list, name,
642 		    se_value->value.sv_time);
643 		break;
644 	default:
645 		return (SE_EINVAL);
646 	}
647 
648 	return (error ? SE_ENOMEM : 0);
649 }
650 
651 /*
652  * sysevent_free_attr - Free an attribute list not associated with an
653  *			event buffer.
654  */
655 void
656 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
657 {
658 	nvlist_free((nvlist_t *)ev_attr_list);
659 }
660 
661 /*
662  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
663  *
664  *	This data will be re-packed into contiguous memory when the event
665  *	buffer is posted to log_sysevent.
666  */
667 int
668 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
669 {
670 	size_t size = 0;
671 
672 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
673 		return (SE_EINVAL);
674 	}
675 
676 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
677 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
678 	SE_PAYLOAD_SZ(ev) += size;
679 	SE_FLAG(ev) = 0;
680 
681 	return (0);
682 }
683 
684 /*
685  * sysevent_detach_attributes - Detach but don't free attribute list from the
686  *				event buffer.
687  */
688 void
689 sysevent_detach_attributes(sysevent_t *ev)
690 {
691 	size_t size = 0;
692 	nvlist_t *nvl;
693 
694 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
695 		return;
696 	}
697 
698 	SE_ATTR_PTR(ev) = UINT64_C(0);
699 	(void) nvlist_size(nvl, &size, encoding);
700 	SE_PAYLOAD_SZ(ev) -= size;
701 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
702 }
703 
704 /*
705  * sysevent_attr_name - Get name of attribute
706  */
707 char *
708 sysevent_attr_name(sysevent_attr_t *attr)
709 {
710 	if (attr == NULL) {
711 		return (NULL);
712 	}
713 
714 	return (nvpair_name(attr));
715 }
716 
717 /*
718  * sysevent_attr_type - Get type of attribute
719  */
720 int
721 sysevent_attr_type(sysevent_attr_t *attr)
722 {
723 	/*
724 	 * The SE_DATA_TYPE_* are typedef'ed to be the
725 	 * same value as DATA_TYPE_*
726 	 */
727 	return (nvpair_type((nvpair_t *)attr));
728 }
729 
730 /*
731  * Repack event buffer into contiguous memory
732  */
733 static sysevent_t *
734 se_repack(sysevent_t *ev, int flag)
735 {
736 	size_t copy_len;
737 	caddr_t attr;
738 	size_t size;
739 	uint64_t attr_offset;
740 	sysevent_t *copy;
741 	log_eventq_t *qcopy;
742 	sysevent_attr_list_t *nvl;
743 
744 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
745 	qcopy = kmem_zalloc(copy_len, flag);
746 	if (qcopy == NULL) {
747 		return (NULL);
748 	}
749 	copy = (sysevent_t *)&qcopy->arg.buf;
750 
751 	/*
752 	 * Copy event header, class, subclass and publisher names
753 	 * Set the attribute offset (in number of bytes) to contiguous
754 	 * memory after the header.
755 	 */
756 
757 	attr_offset = SE_ATTR_OFF(ev);
758 
759 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
760 
761 	bcopy(ev, copy, attr_offset);
762 
763 	/* Check if attribute list exists */
764 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
765 		return (copy);
766 	}
767 
768 	/*
769 	 * Copy attribute data to contiguous memory
770 	 */
771 	attr = (char *)copy + attr_offset;
772 	(void) nvlist_size(nvl, &size, encoding);
773 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
774 		kmem_free(qcopy, copy_len);
775 		return (NULL);
776 	}
777 	SE_ATTR_PTR(copy) = UINT64_C(0);
778 	SE_FLAG(copy) = SE_PACKED_BUF;
779 
780 	return (copy);
781 }
782 
783 /*
784  * The sysevent registration provides a persistent and reliable database
785  * for channel information for sysevent channel publishers and
786  * subscribers.
787  *
788  * A channel is created and maintained by the kernel upon the first
789  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
790  * event subscription information is updated as publishers or subscribers
791  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
792  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
793  *
794  * For consistency, id's are assigned for every publisher or subscriber
795  * bound to a particular channel.  The id's are used to constrain resources
796  * and perform subscription lookup.
797  *
798  * Associated with each channel is a hashed list of the current subscriptions
799  * based upon event class and subclasses.  A subscription contains a class name,
800  * list of possible subclasses and an array of subscriber ids.  Subscriptions
801  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
802  *
803  * Channels are closed once the last subscriber or publisher performs a
804  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
805  * channel are freed upon last close.
806  *
807  * Locking:
808  *	Every operation to log_sysevent() is protected by a single lock,
809  *	registered_channel_mutex.  It is expected that the granularity of
810  *	a single lock is sufficient given the frequency that updates will
811  *	occur.
812  *
813  *	If this locking strategy proves to be too contentious, a per-hash
814  *	or per-channel locking strategy may be implemented.
815  */
816 
817 
818 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
819 					% CHAN_HASH_SZ)
820 
821 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
822 static int channel_cnt;
823 static void remove_all_class(sysevent_channel_descriptor_t *chan,
824 	uint32_t sub_id);
825 
826 static uint32_t
827 hash_func(const char *s)
828 {
829 	uint32_t result = 0;
830 	uint_t g;
831 
832 	while (*s != '\0') {
833 		result <<= 4;
834 		result += (uint32_t)*s++;
835 		g = result & 0xf0000000;
836 		if (g != 0) {
837 			result ^= g >> 24;
838 			result ^= g;
839 		}
840 	}
841 
842 	return (result);
843 }
844 
845 static sysevent_channel_descriptor_t *
846 get_channel(char *channel_name)
847 {
848 	int hash_index;
849 	sysevent_channel_descriptor_t *chan_list;
850 
851 	if (channel_name == NULL)
852 		return (NULL);
853 
854 	/* Find channel descriptor */
855 	hash_index = CHANN_HASH(channel_name);
856 	chan_list = registered_channels[hash_index];
857 	while (chan_list != NULL) {
858 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
859 			break;
860 		} else {
861 			chan_list = chan_list->scd_next;
862 		}
863 	}
864 
865 	return (chan_list);
866 }
867 
868 static class_lst_t *
869 create_channel_registration(sysevent_channel_descriptor_t *chan,
870     char *event_class, int index)
871 {
872 	size_t class_len;
873 	class_lst_t *c_list;
874 
875 	class_len = strlen(event_class) + 1;
876 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
877 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
878 	bcopy(event_class, c_list->cl_name, class_len);
879 
880 	c_list->cl_subclass_list =
881 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
882 	c_list->cl_subclass_list->sl_name =
883 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
884 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
885 	    sizeof (EC_SUB_ALL));
886 
887 	c_list->cl_next = chan->scd_class_list_tbl[index];
888 	chan->scd_class_list_tbl[index] = c_list;
889 
890 	return (c_list);
891 }
892 
893 static void
894 free_channel_registration(sysevent_channel_descriptor_t *chan)
895 {
896 	int i;
897 	class_lst_t *clist, *next_clist;
898 	subclass_lst_t *sclist, *next_sc;
899 
900 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
901 
902 		clist = chan->scd_class_list_tbl[i];
903 		while (clist != NULL) {
904 			sclist = clist->cl_subclass_list;
905 			while (sclist != NULL) {
906 				kmem_free(sclist->sl_name,
907 				    strlen(sclist->sl_name) + 1);
908 				next_sc = sclist->sl_next;
909 				kmem_free(sclist, sizeof (subclass_lst_t));
910 				sclist = next_sc;
911 			}
912 			kmem_free(clist->cl_name,
913 			    strlen(clist->cl_name) + 1);
914 			next_clist = clist->cl_next;
915 			kmem_free(clist, sizeof (class_lst_t));
916 			clist = next_clist;
917 		}
918 	}
919 	chan->scd_class_list_tbl[0] = NULL;
920 }
921 
922 static int
923 open_channel(char *channel_name)
924 {
925 	int hash_index;
926 	sysevent_channel_descriptor_t *chan, *chan_list;
927 
928 
929 	if (channel_cnt > MAX_CHAN) {
930 		return (-1);
931 	}
932 
933 	/* Find channel descriptor */
934 	hash_index = CHANN_HASH(channel_name);
935 	chan_list = registered_channels[hash_index];
936 	while (chan_list != NULL) {
937 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
938 			chan_list->scd_ref_cnt++;
939 			kmem_free(channel_name, strlen(channel_name) + 1);
940 			return (0);
941 		} else {
942 			chan_list = chan_list->scd_next;
943 		}
944 	}
945 
946 
947 	/* New channel descriptor */
948 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
949 	chan->scd_channel_name = channel_name;
950 
951 	/*
952 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
953 	 * Subscriber id 0 is never allocated, but is used as a reserved id
954 	 * by libsysevent
955 	 */
956 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
957 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
958 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
959 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
960 		return (-1);
961 	}
962 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
963 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
964 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
965 		vmem_destroy(chan->scd_subscriber_cache);
966 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
967 		return (-1);
968 	}
969 
970 	chan->scd_ref_cnt = 1;
971 
972 	(void) create_channel_registration(chan, EC_ALL, 0);
973 
974 	if (registered_channels[hash_index] != NULL)
975 		chan->scd_next = registered_channels[hash_index];
976 
977 	registered_channels[hash_index] = chan;
978 
979 	++channel_cnt;
980 
981 	return (0);
982 }
983 
984 static void
985 close_channel(char *channel_name)
986 {
987 	int hash_index;
988 	sysevent_channel_descriptor_t *chan, *prev_chan;
989 
990 	/* Find channel descriptor */
991 	hash_index = CHANN_HASH(channel_name);
992 	prev_chan = chan = registered_channels[hash_index];
993 
994 	while (chan != NULL) {
995 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
996 			break;
997 		} else {
998 			prev_chan = chan;
999 			chan = chan->scd_next;
1000 		}
1001 	}
1002 
1003 	if (chan == NULL)
1004 		return;
1005 
1006 	chan->scd_ref_cnt--;
1007 	if (chan->scd_ref_cnt > 0)
1008 		return;
1009 
1010 	free_channel_registration(chan);
1011 	vmem_destroy(chan->scd_subscriber_cache);
1012 	vmem_destroy(chan->scd_publisher_cache);
1013 	kmem_free(chan->scd_channel_name,
1014 	    strlen(chan->scd_channel_name) + 1);
1015 	if (registered_channels[hash_index] == chan)
1016 		registered_channels[hash_index] = chan->scd_next;
1017 	else
1018 		prev_chan->scd_next = chan->scd_next;
1019 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1020 	--channel_cnt;
1021 }
1022 
1023 static id_t
1024 bind_common(sysevent_channel_descriptor_t *chan, int type)
1025 {
1026 	id_t id;
1027 
1028 	if (type == SUBSCRIBER) {
1029 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1030 		    VM_NOSLEEP | VM_NEXTFIT);
1031 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1032 			return (0);
1033 		chan->scd_subscriber_ids[id] = 1;
1034 	} else {
1035 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1036 		    VM_NOSLEEP | VM_NEXTFIT);
1037 		if (id <= 0 || id > MAX_PUBLISHERS)
1038 			return (0);
1039 		chan->scd_publisher_ids[id] = 1;
1040 	}
1041 
1042 	return (id);
1043 }
1044 
1045 static int
1046 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1047 {
1048 	if (type == SUBSCRIBER) {
1049 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1050 			return (0);
1051 		if (chan->scd_subscriber_ids[id] == 0)
1052 			return (0);
1053 		(void) remove_all_class(chan, id);
1054 		chan->scd_subscriber_ids[id] = 0;
1055 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1056 	} else {
1057 		if (id <= 0 || id > MAX_PUBLISHERS)
1058 			return (0);
1059 		if (chan->scd_publisher_ids[id] == 0)
1060 			return (0);
1061 		chan->scd_publisher_ids[id] = 0;
1062 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1063 	}
1064 
1065 	return (1);
1066 }
1067 
1068 static void
1069 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1070 {
1071 	if (unbind_common(chan, type, id))
1072 		close_channel(chan->scd_channel_name);
1073 }
1074 
1075 static subclass_lst_t *
1076 find_subclass(class_lst_t *c_list, char *subclass)
1077 {
1078 	subclass_lst_t *sc_list;
1079 
1080 	if (c_list == NULL)
1081 		return (NULL);
1082 
1083 	sc_list = c_list->cl_subclass_list;
1084 
1085 	while (sc_list != NULL) {
1086 		if (strcmp(sc_list->sl_name, subclass) == 0) {
1087 			return (sc_list);
1088 		}
1089 		sc_list = sc_list->sl_next;
1090 	}
1091 
1092 	return (NULL);
1093 }
1094 
1095 static void
1096 insert_subclass(class_lst_t *c_list, char **subclass_names,
1097     int subclass_num, uint32_t sub_id)
1098 {
1099 	int i, subclass_sz;
1100 	subclass_lst_t *sc_list;
1101 
1102 	for (i = 0; i < subclass_num; ++i) {
1103 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1104 		    != NULL) {
1105 			sc_list->sl_num[sub_id] = 1;
1106 		} else {
1107 
1108 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1109 			    KM_SLEEP);
1110 			subclass_sz = strlen(subclass_names[i]) + 1;
1111 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1112 			bcopy(subclass_names[i], sc_list->sl_name,
1113 			    subclass_sz);
1114 
1115 			sc_list->sl_num[sub_id] = 1;
1116 
1117 			sc_list->sl_next = c_list->cl_subclass_list;
1118 			c_list->cl_subclass_list = sc_list;
1119 		}
1120 	}
1121 }
1122 
1123 static class_lst_t *
1124 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1125 {
1126 	class_lst_t *c_list;
1127 
1128 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1129 	while (c_list != NULL) {
1130 		if (strcmp(class_name, c_list->cl_name) == 0)
1131 			break;
1132 		c_list = c_list->cl_next;
1133 	}
1134 
1135 	return (c_list);
1136 }
1137 
1138 static void
1139 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1140 {
1141 	int i;
1142 	class_lst_t *c_list;
1143 	subclass_lst_t *sc_list;
1144 
1145 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1146 
1147 		c_list = chan->scd_class_list_tbl[i];
1148 		while (c_list != NULL) {
1149 			sc_list = c_list->cl_subclass_list;
1150 			while (sc_list != NULL) {
1151 				sc_list->sl_num[sub_id] = 0;
1152 				sc_list = sc_list->sl_next;
1153 			}
1154 			c_list = c_list->cl_next;
1155 		}
1156 	}
1157 }
1158 
1159 static void
1160 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1161     char *class_name)
1162 {
1163 	class_lst_t *c_list;
1164 	subclass_lst_t *sc_list;
1165 
1166 	if (strcmp(class_name, EC_ALL) == 0) {
1167 		remove_all_class(chan, sub_id);
1168 		return;
1169 	}
1170 
1171 	if ((c_list = find_class(chan, class_name)) == NULL) {
1172 		return;
1173 	}
1174 
1175 	sc_list = c_list->cl_subclass_list;
1176 	while (sc_list != NULL) {
1177 		sc_list->sl_num[sub_id] = 0;
1178 		sc_list = sc_list->sl_next;
1179 	}
1180 }
1181 
1182 static int
1183 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1184     char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1185 {
1186 	class_lst_t *c_list;
1187 
1188 	if (strcmp(event_class, EC_ALL) == 0) {
1189 		insert_subclass(chan->scd_class_list_tbl[0],
1190 		    event_subclass_lst, 1, sub_id);
1191 		return (0);
1192 	}
1193 
1194 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1195 		return (-1);
1196 
1197 	/* New class, add to the registration cache */
1198 	if ((c_list = find_class(chan, event_class)) == NULL) {
1199 		c_list = create_channel_registration(chan, event_class,
1200 		    CLASS_HASH(event_class));
1201 	}
1202 
1203 	/* Update the subclass list */
1204 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1205 
1206 	return (0);
1207 }
1208 
1209 static int
1210 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1211     char *nvlbuf, size_t nvlsize)
1212 {
1213 	uint_t num_elem;
1214 	char *event_class;
1215 	char **event_list;
1216 	nvlist_t *nvl;
1217 	nvpair_t *nvpair = NULL;
1218 
1219 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1220 		return (-1);
1221 
1222 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1223 		nvlist_free(nvl);
1224 		return (-1);
1225 	}
1226 
1227 	if ((event_class = nvpair_name(nvpair)) == NULL) {
1228 		nvlist_free(nvl);
1229 		return (-1);
1230 	}
1231 	if (nvpair_value_string_array(nvpair, &event_list,
1232 	    &num_elem) != 0) {
1233 		nvlist_free(nvl);
1234 		return (-1);
1235 	}
1236 
1237 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1238 		nvlist_free(nvl);
1239 		return (-1);
1240 	}
1241 
1242 	nvlist_free(nvl);
1243 
1244 	return (0);
1245 }
1246 
1247 /*
1248  * get_registration - Return the requested class hash chain
1249  */
1250 static int
1251 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1252     uint32_t *bufsz, uint32_t class_index)
1253 {
1254 	int num_classes = 0;
1255 	char *nvlbuf = NULL;
1256 	size_t nvlsize;
1257 	nvlist_t *nvl;
1258 	class_lst_t *clist;
1259 	subclass_lst_t *sc_list;
1260 
1261 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1262 		return (EINVAL);
1263 
1264 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1265 		return (ENOENT);
1266 	}
1267 
1268 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1269 		return (EFAULT);
1270 	}
1271 
1272 	while (clist != NULL) {
1273 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1274 		    != 0) {
1275 			nvlist_free(nvl);
1276 			return (EFAULT);
1277 		}
1278 
1279 		sc_list = clist->cl_subclass_list;
1280 		while (sc_list != NULL) {
1281 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1282 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1283 				nvlist_free(nvl);
1284 				return (EFAULT);
1285 			}
1286 			sc_list = sc_list->sl_next;
1287 		}
1288 		num_classes++;
1289 		clist = clist->cl_next;
1290 	}
1291 
1292 	if (num_classes == 0) {
1293 		nvlist_free(nvl);
1294 		return (ENOENT);
1295 	}
1296 
1297 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1298 	    KM_SLEEP)
1299 	    != 0) {
1300 		nvlist_free(nvl);
1301 		return (EFAULT);
1302 	}
1303 
1304 	nvlist_free(nvl);
1305 
1306 	if (nvlsize > *bufsz) {
1307 		kmem_free(nvlbuf, nvlsize);
1308 		*bufsz = nvlsize;
1309 		return (EAGAIN);
1310 	}
1311 
1312 	bcopy(nvlbuf, databuf, nvlsize);
1313 	kmem_free(nvlbuf, nvlsize);
1314 
1315 	return (0);
1316 }
1317 
1318 /*
1319  * log_sysevent_register - Register event subscriber for a particular
1320  *		event channel.
1321  */
1322 int
1323 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1324 {
1325 	int error = 0;
1326 	char *kchannel, *databuf = NULL;
1327 	size_t bufsz;
1328 	se_pubsub_t kdata;
1329 	sysevent_channel_descriptor_t *chan;
1330 
1331 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1332 		return (EFAULT);
1333 	}
1334 	if (kdata.ps_channel_name_len == 0) {
1335 		return (EINVAL);
1336 	}
1337 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1338 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1339 		kmem_free(kchannel, kdata.ps_channel_name_len);
1340 		return (EFAULT);
1341 	}
1342 	bufsz = kdata.ps_buflen;
1343 	if (bufsz > 0) {
1344 		databuf = kmem_alloc(bufsz, KM_SLEEP);
1345 		if (copyin(udatabuf, databuf, bufsz) == -1) {
1346 			kmem_free(kchannel, kdata.ps_channel_name_len);
1347 			kmem_free(databuf, bufsz);
1348 			return (EFAULT);
1349 		}
1350 	}
1351 
1352 	mutex_enter(&registered_channel_mutex);
1353 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1354 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1355 		chan = get_channel(kchannel);
1356 		if (chan == NULL) {
1357 			mutex_exit(&registered_channel_mutex);
1358 			kmem_free(kchannel, kdata.ps_channel_name_len);
1359 			if (bufsz > 0)
1360 				kmem_free(databuf, bufsz);
1361 			return (ENOENT);
1362 		}
1363 	}
1364 
1365 	switch (kdata.ps_op) {
1366 	case SE_OPEN_REGISTRATION:
1367 		if (open_channel(kchannel) != 0) {
1368 			error = ENOMEM;
1369 			if (bufsz > 0)
1370 				kmem_free(databuf, bufsz);
1371 			kmem_free(kchannel, kdata.ps_channel_name_len);
1372 		}
1373 
1374 		mutex_exit(&registered_channel_mutex);
1375 		return (error);
1376 	case SE_CLOSE_REGISTRATION:
1377 		close_channel(kchannel);
1378 		break;
1379 	case SE_BIND_REGISTRATION:
1380 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1381 			error = EBUSY;
1382 		break;
1383 	case SE_UNBIND_REGISTRATION:
1384 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1385 		break;
1386 	case SE_REGISTER:
1387 		if (bufsz == 0) {
1388 			error = EINVAL;
1389 			break;
1390 		}
1391 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1392 			error = EINVAL;
1393 		break;
1394 	case SE_UNREGISTER:
1395 		if (bufsz == 0) {
1396 			error = EINVAL;
1397 			break;
1398 		}
1399 		remove_class(chan, kdata.ps_id, databuf);
1400 		break;
1401 	case SE_CLEANUP:
1402 		/* Cleanup the indicated subscriber or publisher */
1403 		release_id(chan, kdata.ps_type, kdata.ps_id);
1404 		break;
1405 	case SE_GET_REGISTRATION:
1406 		error = get_registration(chan, databuf,
1407 		    &kdata.ps_buflen, kdata.ps_id);
1408 		break;
1409 	default:
1410 		error = ENOTSUP;
1411 	}
1412 
1413 	mutex_exit(&registered_channel_mutex);
1414 
1415 	kmem_free(kchannel, kdata.ps_channel_name_len);
1416 
1417 	if (bufsz > 0) {
1418 		if (copyout(databuf, udatabuf, bufsz) == -1)
1419 			error = EFAULT;
1420 		kmem_free(databuf, bufsz);
1421 	}
1422 
1423 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1424 		return (EFAULT);
1425 
1426 	return (error);
1427 }
1428 
1429 /*
1430  * log_sysevent_copyout_data - Copyout event data to userland.
1431  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1432  *			The buffer size is always sufficient.
1433  */
1434 int
1435 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1436 {
1437 	int error = ENOENT;
1438 	log_eventq_t *q;
1439 	sysevent_t *ev;
1440 	sysevent_id_t eid_copy;
1441 
1442 	/*
1443 	 * Copy eid
1444 	 */
1445 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1446 		return (EFAULT);
1447 	}
1448 
1449 	mutex_enter(&eventq_sent_mutex);
1450 	q = log_eventq_sent;
1451 
1452 	/*
1453 	 * Search for event buffer on the sent queue with matching
1454 	 * event identifier
1455 	 */
1456 	while (q) {
1457 		ev = (sysevent_t *)&q->arg.buf;
1458 
1459 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1460 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1461 			q = q->next;
1462 			continue;
1463 		}
1464 
1465 		if (ubuflen < SE_SIZE(ev)) {
1466 			error = EFAULT;
1467 			break;
1468 		}
1469 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1470 			error = EFAULT;
1471 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1472 			    "0x%" PRIx64 " from queue: EFAULT\n",
1473 			    eid->eid_seq));
1474 		} else {
1475 			error = 0;
1476 		}
1477 		break;
1478 	}
1479 
1480 	mutex_exit(&eventq_sent_mutex);
1481 
1482 	return (error);
1483 }
1484 
1485 /*
1486  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1487  *			by eid (must have already been sent).  Called from
1488  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1489  */
1490 int
1491 log_sysevent_free_data(sysevent_id_t *eid)
1492 {
1493 	int error = ENOENT;
1494 	sysevent_t *ev;
1495 	log_eventq_t *q, *prev = NULL;
1496 	sysevent_id_t eid_copy;
1497 
1498 	/*
1499 	 * Copy eid
1500 	 */
1501 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1502 		return (EFAULT);
1503 	}
1504 
1505 	mutex_enter(&eventq_sent_mutex);
1506 	q = log_eventq_sent;
1507 
1508 	/*
1509 	 * Look for the event to be freed on the sent queue.  Due to delayed
1510 	 * processing of the event, it may not be on the sent queue yet.
1511 	 * It is up to the user to retry the free operation to ensure that the
1512 	 * event is properly freed.
1513 	 */
1514 	while (q) {
1515 		ev = (sysevent_t *)&q->arg.buf;
1516 
1517 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1518 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1519 			prev = q;
1520 			q = q->next;
1521 			continue;
1522 		}
1523 		/*
1524 		 * Take it out of log_eventq_sent and free it
1525 		 */
1526 		if (prev) {
1527 			prev->next = q->next;
1528 		} else {
1529 			log_eventq_sent = q->next;
1530 		}
1531 		free_packed_event(ev);
1532 		error = 0;
1533 		break;
1534 	}
1535 
1536 	mutex_exit(&eventq_sent_mutex);
1537 
1538 	return (error);
1539 }
1540 
1541 /*
1542  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1543  *			create log_event_deliver thread or wake it up
1544  */
1545 /*ARGSUSED*/
1546 void
1547 log_sysevent_flushq(int cmd, uint_t flag)
1548 {
1549 	mutex_enter(&eventq_head_mutex);
1550 
1551 	/*
1552 	 * Start the event delivery thread
1553 	 * Mark the upcall status as active since we should
1554 	 * now be able to begin emptying the queue normally.
1555 	 */
1556 	if (!async_thread) {
1557 		sysevent_upcall_status = 0;
1558 		sysevent_daemon_init = 1;
1559 		setup_ddi_poststartup();
1560 		async_thread = thread_create(NULL, 0, log_event_deliver,
1561 		    NULL, 0, &p0, TS_RUN, minclsyspri);
1562 	}
1563 
1564 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1565 	cv_signal(&log_event_cv);
1566 	mutex_exit(&eventq_head_mutex);
1567 }
1568 
1569 /*
1570  * log_sysevent_filename - Called by syseventd via
1571  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1572  *			to subsequently bind the event_door.
1573  *
1574  *			This routine is called everytime syseventd (re)starts
1575  *			and must therefore replay any events buffers that have
1576  *			been sent but not freed.
1577  *
1578  *			Event buffer delivery begins after a call to
1579  *			log_sysevent_flushq().
1580  */
1581 int
1582 log_sysevent_filename(char *file)
1583 {
1584 	mutex_enter(&event_door_mutex);
1585 
1586 	(void) strlcpy(logevent_door_upcall_filename, file,
1587 	    sizeof (logevent_door_upcall_filename));
1588 
1589 	/* Unbind old event door */
1590 	if (event_door != NULL)
1591 		door_ki_rele(event_door);
1592 	/* Establish door connection with user event daemon (syseventd) */
1593 	if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0)
1594 		event_door = NULL;
1595 
1596 	mutex_exit(&event_door_mutex);
1597 
1598 	/*
1599 	 * We are called when syseventd restarts. Move all sent, but
1600 	 * not committed events from log_eventq_sent to log_eventq_head.
1601 	 * Do it in proper order to maintain increasing event id.
1602 	 */
1603 	mutex_enter(&eventq_head_mutex);
1604 
1605 	mutex_enter(&eventq_sent_mutex);
1606 	while (log_eventq_sent) {
1607 		log_eventq_t *tmp = log_eventq_sent->next;
1608 		log_eventq_sent->next = log_eventq_head;
1609 		if (log_eventq_head == NULL) {
1610 			ASSERT(log_eventq_cnt == 0);
1611 			log_eventq_tail = log_eventq_sent;
1612 			log_eventq_tail->next = NULL;
1613 		} else if (log_eventq_head == log_eventq_tail) {
1614 			ASSERT(log_eventq_cnt == 1);
1615 			ASSERT(log_eventq_head->next == NULL);
1616 			ASSERT(log_eventq_tail->next == NULL);
1617 		}
1618 		log_eventq_head = log_eventq_sent;
1619 		log_eventq_sent = tmp;
1620 		log_eventq_cnt++;
1621 	}
1622 	mutex_exit(&eventq_sent_mutex);
1623 	mutex_exit(&eventq_head_mutex);
1624 
1625 	return (0);
1626 }
1627 
1628 /*
1629  * queue_sysevent - queue an event buffer
1630  */
1631 static int
1632 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1633 {
1634 	log_eventq_t *q;
1635 
1636 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1637 
1638 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1639 
1640 restart:
1641 
1642 	/* Max Q size exceeded */
1643 	mutex_enter(&event_qfull_mutex);
1644 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1645 		/*
1646 		 * If queue full and transport down, return no transport
1647 		 */
1648 		if (sysevent_upcall_status != 0) {
1649 			mutex_exit(&event_qfull_mutex);
1650 			free_packed_event(ev);
1651 			eid->eid_seq = UINT64_C(0);
1652 			eid->eid_ts = INT64_C(0);
1653 			return (SE_NO_TRANSPORT);
1654 		}
1655 		if (flag == SE_NOSLEEP) {
1656 			mutex_exit(&event_qfull_mutex);
1657 			free_packed_event(ev);
1658 			eid->eid_seq = UINT64_C(0);
1659 			eid->eid_ts = INT64_C(0);
1660 			return (SE_EQSIZE);
1661 		}
1662 		event_qfull_blocked++;
1663 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1664 		event_qfull_blocked--;
1665 		mutex_exit(&event_qfull_mutex);
1666 		goto restart;
1667 	}
1668 	mutex_exit(&event_qfull_mutex);
1669 
1670 	mutex_enter(&eventq_head_mutex);
1671 
1672 	/* Time stamp and assign ID */
1673 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1674 	    (uint64_t)1);
1675 	SE_TIME(ev) = eid->eid_ts = gethrtime();
1676 
1677 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1678 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1679 
1680 	/*
1681 	 * Put event on eventq
1682 	 */
1683 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1684 	q->next = NULL;
1685 	if (log_eventq_head == NULL) {
1686 		ASSERT(log_eventq_cnt == 0);
1687 		log_eventq_head = q;
1688 		log_eventq_tail = q;
1689 	} else {
1690 		if (log_eventq_head == log_eventq_tail) {
1691 			ASSERT(log_eventq_cnt == 1);
1692 			ASSERT(log_eventq_head->next == NULL);
1693 			ASSERT(log_eventq_tail->next == NULL);
1694 		}
1695 		log_eventq_tail->next = q;
1696 		log_eventq_tail = q;
1697 	}
1698 	log_eventq_cnt++;
1699 
1700 	/* Signal event delivery thread */
1701 	if (log_eventq_cnt == 1) {
1702 		cv_signal(&log_event_cv);
1703 	}
1704 	mutex_exit(&eventq_head_mutex);
1705 
1706 	return (0);
1707 }
1708 
1709 /*
1710  * log_sysevent - kernel system event logger.
1711  *
1712  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1713  * maximum event queue size will be exceeded
1714  * Returns 0 for successfully queued event buffer
1715  */
1716 int
1717 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1718 {
1719 	sysevent_t *ev_copy;
1720 	int rval;
1721 
1722 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1723 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1724 
1725 	ev_copy = se_repack(ev, flag);
1726 	if (ev_copy == NULL) {
1727 		ASSERT(flag == SE_NOSLEEP);
1728 		return (SE_ENOMEM);
1729 	}
1730 	rval = queue_sysevent(ev_copy, eid, flag);
1731 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1732 	    rval == SE_NO_TRANSPORT);
1733 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1734 	return (rval);
1735 }
1736 
1737 /*
1738  * Publish EC_DEV_ADD and EC_DEV_REMOVE events from devfsadm to lofi.
1739  * This interface is needed to pass device link names to the lofi driver,
1740  * to be returned via ioctl() to the lofiadm command.
1741  * The problem is, if lofiadm is executed in local zone, there is no
1742  * mechanism to announce the device name from the /dev tree back to lofiadm,
1743  * as sysevents are not accessible from local zone and devfsadmd is only
1744  * running in global zone.
1745  *
1746  * Delayed/missed events are not fatal for lofi, as the device name returned
1747  * to lofiadm is for information and can be re-queried with listing
1748  * mappings with lofiadm command.
1749  *
1750  * Once we have a better method, this interface should be reworked.
1751  */
1752 static void
1753 notify_lofi(sysevent_t *ev)
1754 {
1755 	static evchan_t *devfs_chan = NULL;
1756 	nvlist_t *nvlist;
1757 	int ret;
1758 
1759 	if ((strcmp(EC_DEV_ADD, sysevent_get_class_name(ev)) != 0) &&
1760 	    (strcmp(EC_DEV_REMOVE, sysevent_get_class_name(ev)) != 0))
1761 		return;
1762 
1763 	/* only bind once to avoid bind/unbind storm on busy system */
1764 	if (devfs_chan == NULL) {
1765 		if ((ret = sysevent_evc_bind("devfsadm_event_channel",
1766 		    &devfs_chan, EVCH_CREAT | EVCH_HOLD_PEND)) != 0) {
1767 			cmn_err(CE_CONT, "sysevent_evc_bind failed: %d\n", ret);
1768 			return;
1769 		}
1770 	}
1771 
1772 	(void) sysevent_get_attr_list(ev, &nvlist);
1773 	(void) sysevent_evc_publish(devfs_chan, sysevent_get_class_name(ev),
1774 	    sysevent_get_subclass_name(ev), "illumos", EC_DEVFS, nvlist,
1775 	    EVCH_SLEEP);
1776 
1777 	nvlist_free(nvlist);
1778 }
1779 
1780 /*
1781  * log_usr_sysevent - user system event logger
1782  *			Private to devfsadm and accessible only via
1783  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1784  */
1785 int
1786 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1787 {
1788 	int ret, copy_sz;
1789 	sysevent_t *ev_copy;
1790 	sysevent_id_t new_eid;
1791 	log_eventq_t *qcopy;
1792 
1793 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1794 	    offsetof(log_event_upcall_arg_t, buf);
1795 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1796 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1797 
1798 	/*
1799 	 * Copy event
1800 	 */
1801 	if (copyin(ev, ev_copy, ev_size) == -1) {
1802 		kmem_free(qcopy, copy_sz);
1803 		return (EFAULT);
1804 	}
1805 
1806 	notify_lofi(ev_copy);
1807 
1808 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1809 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1810 			return (EAGAIN);
1811 		else
1812 			return (EIO);
1813 	}
1814 
1815 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1816 		return (EFAULT);
1817 	}
1818 
1819 	return (0);
1820 }
1821 
1822 
1823 
1824 int
1825 ddi_log_sysevent(
1826 	dev_info_t		*dip,
1827 	char			*vendor,
1828 	char			*class,
1829 	char			*subclass,
1830 	nvlist_t		*attr_list,
1831 	sysevent_id_t		*eidp,
1832 	int			sleep_flag)
1833 {
1834 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1835 	char			pubstr[32];
1836 	sysevent_t		*event;
1837 	sysevent_id_t		eid;
1838 	const char		*drvname;
1839 	char			*publisher;
1840 	int			se_flag;
1841 	int			rval;
1842 	int			n;
1843 
1844 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1845 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1846 		    "event from interrupt context with sleep semantics\n",
1847 		    ddi_driver_name(dip), ddi_get_instance(dip));
1848 		return (DDI_ECONTEXT);
1849 	}
1850 
1851 	drvname = ddi_driver_name(dip);
1852 	n = strlen(vendor) + strlen(drvname) + 7;
1853 	if (n < sizeof (pubstr)) {
1854 		publisher = pubstr;
1855 	} else {
1856 		publisher = kmem_alloc(n,
1857 		    (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1858 		if (publisher == NULL) {
1859 			return (DDI_ENOMEM);
1860 		}
1861 	}
1862 	(void) strcpy(publisher, vendor);
1863 	(void) strcat(publisher, ":kern:");
1864 	(void) strcat(publisher, drvname);
1865 
1866 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1867 	event = sysevent_alloc(class, subclass, publisher, se_flag);
1868 
1869 	if (publisher != pubstr) {
1870 		kmem_free(publisher, n);
1871 	}
1872 
1873 	if (event == NULL) {
1874 		return (DDI_ENOMEM);
1875 	}
1876 
1877 	if (list) {
1878 		(void) sysevent_attach_attributes(event, list);
1879 	}
1880 
1881 	rval = log_sysevent(event, se_flag, &eid);
1882 	if (list) {
1883 		sysevent_detach_attributes(event);
1884 	}
1885 	sysevent_free(event);
1886 	if (rval == 0) {
1887 		if (eidp) {
1888 			eidp->eid_seq = eid.eid_seq;
1889 			eidp->eid_ts = eid.eid_ts;
1890 		}
1891 		return (DDI_SUCCESS);
1892 	}
1893 	if (rval == SE_NO_TRANSPORT)
1894 		return (DDI_ETRANSPORT);
1895 
1896 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1897 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1898 }
1899 
1900 uint64_t
1901 log_sysevent_new_id(void)
1902 {
1903 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1904 }
1905