xref: /illumos-gate/usr/src/uts/common/os/log_sysevent.c (revision 3bf53144544c5875536acef3d1214ec06c34adad)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright 2016 Toomas Soome <tsoome@me.com>
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/stropts.h>
30 #include <sys/debug.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/vmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/callb.h>
36 #include <sys/sysevent.h>
37 #include <sys/sysevent_impl.h>
38 #include <sys/sysevent/dev.h>
39 #include <sys/modctl.h>
40 #include <sys/lofi_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/disp.h>
43 #include <sys/autoconf.h>
44 #include <sys/atomic.h>
45 #include <sys/sdt.h>
46 
47 /* for doors */
48 #include <sys/pathname.h>
49 #include <sys/door.h>
50 #include <sys/kmem.h>
51 #include <sys/cpuvar.h>
52 #include <sys/fs/snode.h>
53 
54 /*
55  * log_sysevent.c - Provides the interfaces for kernel event publication
56  *			to the sysevent event daemon (syseventd).
57  */
58 
59 /*
60  * Debug stuff
61  */
62 static int log_event_debug = 0;
63 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
64 #ifdef DEBUG
65 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
66 #else
67 #define	LOG_DEBUG1(args)
68 #endif
69 
70 /*
71  * Local static vars
72  */
73 /* queue of event buffers sent to syseventd */
74 static log_eventq_t *log_eventq_sent = NULL;
75 
76 /*
77  * Count of event buffers in the queue
78  */
79 int log_eventq_cnt = 0;
80 
81 /* queue of event buffers awaiting delivery to syseventd */
82 static log_eventq_t *log_eventq_head = NULL;
83 static log_eventq_t *log_eventq_tail = NULL;
84 static uint64_t kernel_event_id = 0;
85 static int encoding = NV_ENCODE_NATIVE;
86 
87 /* log event delivery flag */
88 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
89 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
90 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
91 
92 /*
93  * Tunable maximum event buffer queue size. Size depends on how many events
94  * the queue must hold when syseventd is not available, for example during
95  * system startup. Experience showed that more than 2000 events could be posted
96  * due to correctable memory errors.
97  */
98 int logevent_max_q_sz = 5000;
99 
100 
101 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
102 static char logevent_door_upcall_filename[MAXPATHLEN];
103 
104 static door_handle_t event_door = NULL;		/* Door for upcalls */
105 static kmutex_t event_door_mutex;		/* To protect event_door */
106 
107 /*
108  * async thread-related variables
109  *
110  * eventq_head_mutex - synchronizes access to the kernel event queue
111  *
112  * eventq_sent_mutex - synchronizes access to the queue of event sents to
113  *			userlevel
114  *
115  * log_event_cv - condition variable signaled when an event has arrived or
116  *			userlevel ready to process event buffers
117  *
118  * async_thread - asynchronous event delivery thread to userlevel daemon.
119  *
120  * sysevent_upcall_status - status of the door upcall link
121  */
122 static kmutex_t eventq_head_mutex;
123 static kmutex_t eventq_sent_mutex;
124 static kcondvar_t log_event_cv;
125 static kthread_id_t async_thread = NULL;
126 
127 static kmutex_t event_qfull_mutex;
128 static kcondvar_t event_qfull_cv;
129 static int event_qfull_blocked = 0;
130 
131 static int sysevent_upcall_status = -1;
132 static kmutex_t registered_channel_mutex;
133 
134 /*
135  * Indicates the syseventd daemon has begun taking events
136  */
137 int sysevent_daemon_init = 0;
138 
139 /*
140  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
141  * caused by the server process doing a forkall().  Since all threads
142  * but the thread actually doing the forkall() need to be quiesced,
143  * the fork may take some time.  The min/max pause are in units
144  * of clock ticks.
145  */
146 #define	LOG_EVENT_MIN_PAUSE	8
147 #define	LOG_EVENT_MAX_PAUSE	128
148 
149 static kmutex_t	event_pause_mutex;
150 static kcondvar_t event_pause_cv;
151 static int event_pause_state = 0;
152 
153 /* Cached device links for lofi. */
154 lofi_nvl_t lofi_devlink_cache;
155 
156 /*ARGSUSED*/
157 static void
158 log_event_busy_timeout(void *arg)
159 {
160 	mutex_enter(&event_pause_mutex);
161 	event_pause_state = 0;
162 	cv_signal(&event_pause_cv);
163 	mutex_exit(&event_pause_mutex);
164 }
165 
166 static void
167 log_event_pause(int nticks)
168 {
169 	timeout_id_t id;
170 
171 	/*
172 	 * Only one use of log_event_pause at a time
173 	 */
174 	ASSERT(event_pause_state == 0);
175 
176 	event_pause_state = 1;
177 	id = timeout(log_event_busy_timeout, NULL, nticks);
178 	if (id != 0) {
179 		mutex_enter(&event_pause_mutex);
180 		while (event_pause_state)
181 			cv_wait(&event_pause_cv, &event_pause_mutex);
182 		mutex_exit(&event_pause_mutex);
183 	}
184 	event_pause_state = 0;
185 }
186 
187 
188 /*
189  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
190  * 			Check for rebinding errors
191  * 			This buffer is reused to by the syseventd door_return
192  *			to hold the result code
193  */
194 static int
195 log_event_upcall(log_event_upcall_arg_t *arg)
196 {
197 	int error;
198 	size_t size;
199 	sysevent_t *ev;
200 	door_arg_t darg, save_arg;
201 	int retry;
202 	int neagain = 0;
203 	int neintr = 0;
204 	int nticks = LOG_EVENT_MIN_PAUSE;
205 
206 	/* Initialize door args */
207 	ev = (sysevent_t *)&arg->buf;
208 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
209 
210 	darg.rbuf = (char *)arg;
211 	darg.data_ptr = (char *)arg;
212 	darg.rsize = size;
213 	darg.data_size = size;
214 	darg.desc_ptr = NULL;
215 	darg.desc_num = 0;
216 
217 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
218 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
219 
220 	save_arg = darg;
221 	for (retry = 0; ; retry++) {
222 
223 		mutex_enter(&event_door_mutex);
224 		if (event_door == NULL) {
225 			mutex_exit(&event_door_mutex);
226 
227 			return (EBADF);
228 		}
229 
230 		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
231 		    SIZE_MAX, 0)) == 0) {
232 			mutex_exit(&event_door_mutex);
233 			break;
234 		}
235 
236 		/*
237 		 * EBADF is handled outside the switch below because we need to
238 		 * hold event_door_mutex a bit longer
239 		 */
240 		if (error == EBADF) {
241 			/* Server died */
242 			door_ki_rele(event_door);
243 			event_door = NULL;
244 
245 			mutex_exit(&event_door_mutex);
246 			return (error);
247 		}
248 
249 		mutex_exit(&event_door_mutex);
250 
251 		/*
252 		 * The EBADF case is already handled above with event_door_mutex
253 		 * held
254 		 */
255 		switch (error) {
256 		case EINTR:
257 			neintr++;
258 			log_event_pause(2);
259 			darg = save_arg;
260 			break;
261 		case EAGAIN:
262 			/* cannot deliver upcall - process may be forking */
263 			neagain++;
264 			log_event_pause(nticks);
265 			nticks <<= 1;
266 			if (nticks > LOG_EVENT_MAX_PAUSE)
267 				nticks = LOG_EVENT_MAX_PAUSE;
268 			darg = save_arg;
269 			break;
270 		default:
271 			cmn_err(CE_CONT,
272 			    "log_event_upcall: door_ki_upcall error %d\n",
273 			    error);
274 			return (error);
275 		}
276 	}
277 
278 	if (neagain > 0 || neintr > 0) {
279 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
280 		    neagain, neintr, nticks));
281 	}
282 
283 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
284 	    "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
285 	    error, (void *)arg, (void *)darg.rbuf,
286 	    (void *)darg.data_ptr,
287 	    *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
288 
289 	if (!error) {
290 		/*
291 		 * upcall was successfully executed. Check return code.
292 		 */
293 		error = *((int *)(darg.rbuf));
294 	}
295 
296 	return (error);
297 }
298 
299 /*
300  * log_event_deliver - event delivery thread
301  *			Deliver all events on the event queue to syseventd.
302  *			If the daemon can not process events, stop event
303  *			delivery and wait for an indication from the
304  *			daemon to resume delivery.
305  *
306  *			Once all event buffers have been delivered, wait
307  *			until there are more to deliver.
308  */
309 static void
310 log_event_deliver()
311 {
312 	log_eventq_t *q;
313 	int upcall_err;
314 	callb_cpr_t cprinfo;
315 
316 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
317 	    "logevent");
318 
319 	/*
320 	 * eventq_head_mutex is exited (released) when there are no more
321 	 * events to process from the eventq in cv_wait().
322 	 */
323 	mutex_enter(&eventq_head_mutex);
324 
325 	for (;;) {
326 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
327 		    (void *)log_eventq_head));
328 
329 		upcall_err = 0;
330 		q = log_eventq_head;
331 
332 		while (q) {
333 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
334 				upcall_err = EAGAIN;
335 				break;
336 			}
337 
338 			log_event_delivery = LOGEVENT_DELIVERY_OK;
339 
340 			/*
341 			 * Release event queue lock during upcall to
342 			 * syseventd
343 			 */
344 			mutex_exit(&eventq_head_mutex);
345 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
346 				mutex_enter(&eventq_head_mutex);
347 				break;
348 			}
349 
350 			/*
351 			 * We may be able to add entries to
352 			 * the queue now.
353 			 */
354 			if (event_qfull_blocked > 0 &&
355 			    log_eventq_cnt < logevent_max_q_sz) {
356 				mutex_enter(&event_qfull_mutex);
357 				if (event_qfull_blocked > 0) {
358 					cv_signal(&event_qfull_cv);
359 				}
360 				mutex_exit(&event_qfull_mutex);
361 			}
362 
363 			mutex_enter(&eventq_head_mutex);
364 
365 			/*
366 			 * Daemon restart can cause entries to be moved from
367 			 * the sent queue and put back on the event queue.
368 			 * If this has occurred, replay event queue
369 			 * processing from the new queue head.
370 			 */
371 			if (q != log_eventq_head) {
372 				q = log_eventq_head;
373 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
374 				    "door upcall/daemon restart race\n"));
375 			} else {
376 				log_eventq_t *next;
377 
378 				/*
379 				 * Move the event to the sent queue when a
380 				 * successful delivery has been made.
381 				 */
382 				mutex_enter(&eventq_sent_mutex);
383 				next = q->next;
384 				q->next = log_eventq_sent;
385 				log_eventq_sent = q;
386 				q = next;
387 				log_eventq_head = q;
388 				log_eventq_cnt--;
389 				if (q == NULL) {
390 					ASSERT(log_eventq_cnt == 0);
391 					log_eventq_tail = NULL;
392 				}
393 				mutex_exit(&eventq_sent_mutex);
394 			}
395 		}
396 
397 		switch (upcall_err) {
398 		case 0:
399 			/*
400 			 * Success. The queue is empty.
401 			 */
402 			sysevent_upcall_status = 0;
403 			break;
404 		case EAGAIN:
405 			/*
406 			 * Delivery is on hold (but functional).
407 			 */
408 			sysevent_upcall_status = 0;
409 			/*
410 			 * If the user has already signaled for delivery
411 			 * resumption, continue.  Otherwise, we wait until
412 			 * we are signaled to continue.
413 			 */
414 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT)
415 				continue;
416 			log_event_delivery = LOGEVENT_DELIVERY_HOLD;
417 
418 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
419 			break;
420 		default:
421 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
422 			    "upcall err %d\n", upcall_err));
423 			sysevent_upcall_status = upcall_err;
424 			/*
425 			 * Signal everyone waiting that transport is down
426 			 */
427 			if (event_qfull_blocked > 0) {
428 				mutex_enter(&event_qfull_mutex);
429 				if (event_qfull_blocked > 0) {
430 					cv_broadcast(&event_qfull_cv);
431 				}
432 				mutex_exit(&event_qfull_mutex);
433 			}
434 			break;
435 		}
436 
437 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
438 		cv_wait(&log_event_cv, &eventq_head_mutex);
439 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
440 	}
441 	/* NOTREACHED */
442 }
443 
444 /*
445  * Set up the nvlist based data cache. User by lofi to find
446  * device name for mapped file.
447  */
448 static void
449 lofi_nvl_init(lofi_nvl_t *cache)
450 {
451 	mutex_init(&cache->ln_lock, NULL, MUTEX_DRIVER, NULL);
452 	cv_init(&cache->ln_cv, NULL, CV_DRIVER, NULL);
453 	(void) nvlist_alloc(&cache->ln_data, NV_UNIQUE_NAME, KM_SLEEP);
454 }
455 
456 /*
457  * log_event_init - Allocate and initialize log_event data structures.
458  */
459 void
460 log_event_init()
461 {
462 	/* Set up devlink cache for lofi. */
463 	lofi_nvl_init(&lofi_devlink_cache);
464 
465 	mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL);
466 
467 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
468 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
469 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
470 
471 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
472 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
473 
474 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
475 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
476 
477 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
478 	sysevent_evc_init();
479 }
480 
481 /*
482  * The following routines are used by kernel event publishers to
483  * allocate, append and free event buffers
484  */
485 /*
486  * sysevent_alloc - Allocate new eventq struct.  This element contains
487  *			an event buffer that will be used in a subsequent
488  *			call to log_sysevent.
489  */
490 sysevent_t *
491 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
492 {
493 	int payload_sz;
494 	int class_sz, subclass_sz, pub_sz;
495 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
496 	sysevent_t *ev;
497 	log_eventq_t *q;
498 
499 	ASSERT(class != NULL);
500 	ASSERT(subclass != NULL);
501 	ASSERT(pub != NULL);
502 
503 	/*
504 	 * Calculate and reserve space for the class, subclass and
505 	 * publisher strings in the event buffer
506 	 */
507 	class_sz = strlen(class) + 1;
508 	subclass_sz = strlen(subclass) + 1;
509 	pub_sz = strlen(pub) + 1;
510 
511 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
512 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
513 
514 	/* String sizes must be 64-bit aligned in the event buffer */
515 	aligned_class_sz = SE_ALIGN(class_sz);
516 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
517 	aligned_pub_sz = SE_ALIGN(pub_sz);
518 
519 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
520 	    (aligned_subclass_sz - sizeof (uint64_t)) +
521 	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
522 
523 	/*
524 	 * Allocate event buffer plus additional sysevent queue
525 	 * and payload overhead.
526 	 */
527 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
528 	if (q == NULL) {
529 		return (NULL);
530 	}
531 
532 	/* Initialize the event buffer data */
533 	ev = (sysevent_t *)&q->arg.buf;
534 	SE_VERSION(ev) = SYS_EVENT_VERSION;
535 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
536 
537 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
538 		+ aligned_class_sz;
539 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
540 
541 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
542 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
543 
544 	SE_ATTR_PTR(ev) = UINT64_C(0);
545 	SE_PAYLOAD_SZ(ev) = payload_sz;
546 
547 	return (ev);
548 }
549 
550 /*
551  * sysevent_free - Free event buffer and any attribute data.
552  */
553 void
554 sysevent_free(sysevent_t *ev)
555 {
556 	log_eventq_t *q;
557 	nvlist_t *nvl;
558 
559 	ASSERT(ev != NULL);
560 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
561 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
562 
563 	if (nvl != NULL) {
564 		size_t size = 0;
565 		(void) nvlist_size(nvl, &size, encoding);
566 		SE_PAYLOAD_SZ(ev) -= size;
567 		nvlist_free(nvl);
568 	}
569 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
570 }
571 
572 /*
573  * free_packed_event - Free packed event buffer
574  */
575 static void
576 free_packed_event(sysevent_t *ev)
577 {
578 	log_eventq_t *q;
579 
580 	ASSERT(ev != NULL);
581 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
582 
583 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
584 }
585 
586 /*
587  * sysevent_add_attr - Add new attribute element to an event attribute list
588  *			If attribute list is NULL, start a new list.
589  */
590 int
591 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
592     sysevent_value_t *se_value, int flag)
593 {
594 	int error;
595 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
596 
597 	if (nvlp == NULL || se_value == NULL) {
598 		return (SE_EINVAL);
599 	}
600 
601 	/*
602 	 * attr_sz is composed of the value data size + the name data size +
603 	 * any header data.  64-bit aligned.
604 	 */
605 	if (strlen(name) >= MAX_ATTR_NAME) {
606 		return (SE_EINVAL);
607 	}
608 
609 	/*
610 	 * Allocate nvlist
611 	 */
612 	if ((*nvlp == NULL) &&
613 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
614 		return (SE_ENOMEM);
615 
616 	/* add the attribute */
617 	switch (se_value->value_type) {
618 	case SE_DATA_TYPE_BYTE:
619 		error = nvlist_add_byte(*ev_attr_list, name,
620 		    se_value->value.sv_byte);
621 		break;
622 	case SE_DATA_TYPE_INT16:
623 		error = nvlist_add_int16(*ev_attr_list, name,
624 		    se_value->value.sv_int16);
625 		break;
626 	case SE_DATA_TYPE_UINT16:
627 		error = nvlist_add_uint16(*ev_attr_list, name,
628 		    se_value->value.sv_uint16);
629 		break;
630 	case SE_DATA_TYPE_INT32:
631 		error = nvlist_add_int32(*ev_attr_list, name,
632 		    se_value->value.sv_int32);
633 		break;
634 	case SE_DATA_TYPE_UINT32:
635 		error = nvlist_add_uint32(*ev_attr_list, name,
636 		    se_value->value.sv_uint32);
637 		break;
638 	case SE_DATA_TYPE_INT64:
639 		error = nvlist_add_int64(*ev_attr_list, name,
640 		    se_value->value.sv_int64);
641 		break;
642 	case SE_DATA_TYPE_UINT64:
643 		error = nvlist_add_uint64(*ev_attr_list, name,
644 		    se_value->value.sv_uint64);
645 		break;
646 	case SE_DATA_TYPE_STRING:
647 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
648 			return (SE_EINVAL);
649 		error = nvlist_add_string(*ev_attr_list, name,
650 		    se_value->value.sv_string);
651 		break;
652 	case SE_DATA_TYPE_BYTES:
653 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
654 			return (SE_EINVAL);
655 		error = nvlist_add_byte_array(*ev_attr_list, name,
656 		    se_value->value.sv_bytes.data,
657 		    se_value->value.sv_bytes.size);
658 		break;
659 	case SE_DATA_TYPE_TIME:
660 		error = nvlist_add_hrtime(*ev_attr_list, name,
661 		    se_value->value.sv_time);
662 		break;
663 	default:
664 		return (SE_EINVAL);
665 	}
666 
667 	return (error ? SE_ENOMEM : 0);
668 }
669 
670 /*
671  * sysevent_free_attr - Free an attribute list not associated with an
672  *			event buffer.
673  */
674 void
675 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
676 {
677 	nvlist_free((nvlist_t *)ev_attr_list);
678 }
679 
680 /*
681  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
682  *
683  *	This data will be re-packed into contiguous memory when the event
684  *	buffer is posted to log_sysevent.
685  */
686 int
687 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
688 {
689 	size_t size = 0;
690 
691 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
692 		return (SE_EINVAL);
693 	}
694 
695 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
696 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
697 	SE_PAYLOAD_SZ(ev) += size;
698 	SE_FLAG(ev) = 0;
699 
700 	return (0);
701 }
702 
703 /*
704  * sysevent_detach_attributes - Detach but don't free attribute list from the
705  *				event buffer.
706  */
707 void
708 sysevent_detach_attributes(sysevent_t *ev)
709 {
710 	size_t size = 0;
711 	nvlist_t *nvl;
712 
713 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
714 		return;
715 	}
716 
717 	SE_ATTR_PTR(ev) = UINT64_C(0);
718 	(void) nvlist_size(nvl, &size, encoding);
719 	SE_PAYLOAD_SZ(ev) -= size;
720 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
721 }
722 
723 /*
724  * sysevent_attr_name - Get name of attribute
725  */
726 char *
727 sysevent_attr_name(sysevent_attr_t *attr)
728 {
729 	if (attr == NULL) {
730 		return (NULL);
731 	}
732 
733 	return (nvpair_name(attr));
734 }
735 
736 /*
737  * sysevent_attr_type - Get type of attribute
738  */
739 int
740 sysevent_attr_type(sysevent_attr_t *attr)
741 {
742 	/*
743 	 * The SE_DATA_TYPE_* are typedef'ed to be the
744 	 * same value as DATA_TYPE_*
745 	 */
746 	return (nvpair_type((nvpair_t *)attr));
747 }
748 
749 /*
750  * Repack event buffer into contiguous memory
751  */
752 static sysevent_t *
753 se_repack(sysevent_t *ev, int flag)
754 {
755 	size_t copy_len;
756 	caddr_t attr;
757 	size_t size;
758 	uint64_t attr_offset;
759 	sysevent_t *copy;
760 	log_eventq_t *qcopy;
761 	sysevent_attr_list_t *nvl;
762 
763 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
764 	qcopy = kmem_zalloc(copy_len, flag);
765 	if (qcopy == NULL) {
766 		return (NULL);
767 	}
768 	copy = (sysevent_t *)&qcopy->arg.buf;
769 
770 	/*
771 	 * Copy event header, class, subclass and publisher names
772 	 * Set the attribute offset (in number of bytes) to contiguous
773 	 * memory after the header.
774 	 */
775 
776 	attr_offset = SE_ATTR_OFF(ev);
777 
778 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
779 
780 	bcopy(ev, copy, attr_offset);
781 
782 	/* Check if attribute list exists */
783 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
784 		return (copy);
785 	}
786 
787 	/*
788 	 * Copy attribute data to contiguous memory
789 	 */
790 	attr = (char *)copy + attr_offset;
791 	(void) nvlist_size(nvl, &size, encoding);
792 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
793 		kmem_free(qcopy, copy_len);
794 		return (NULL);
795 	}
796 	SE_ATTR_PTR(copy) = UINT64_C(0);
797 	SE_FLAG(copy) = SE_PACKED_BUF;
798 
799 	return (copy);
800 }
801 
802 /*
803  * The sysevent registration provides a persistent and reliable database
804  * for channel information for sysevent channel publishers and
805  * subscribers.
806  *
807  * A channel is created and maintained by the kernel upon the first
808  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
809  * event subscription information is updated as publishers or subscribers
810  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
811  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
812  *
813  * For consistency, id's are assigned for every publisher or subscriber
814  * bound to a particular channel.  The id's are used to constrain resources
815  * and perform subscription lookup.
816  *
817  * Associated with each channel is a hashed list of the current subscriptions
818  * based upon event class and subclasses.  A subscription contains a class name,
819  * list of possible subclasses and an array of subscriber ids.  Subscriptions
820  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
821  *
822  * Channels are closed once the last subscriber or publisher performs a
823  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
824  * channel are freed upon last close.
825  *
826  * Locking:
827  *	Every operation to log_sysevent() is protected by a single lock,
828  *	registered_channel_mutex.  It is expected that the granularity of
829  *	a single lock is sufficient given the frequency that updates will
830  *	occur.
831  *
832  *	If this locking strategy proves to be too contentious, a per-hash
833  *	or per-channel locking strategy may be implemented.
834  */
835 
836 
837 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
838 					% CHAN_HASH_SZ)
839 
840 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
841 static int channel_cnt;
842 static void remove_all_class(sysevent_channel_descriptor_t *chan,
843 	uint32_t sub_id);
844 
845 static uint32_t
846 hash_func(const char *s)
847 {
848 	uint32_t result = 0;
849 	uint_t g;
850 
851 	while (*s != '\0') {
852 		result <<= 4;
853 		result += (uint32_t)*s++;
854 		g = result & 0xf0000000;
855 		if (g != 0) {
856 			result ^= g >> 24;
857 			result ^= g;
858 		}
859 	}
860 
861 	return (result);
862 }
863 
864 static sysevent_channel_descriptor_t *
865 get_channel(char *channel_name)
866 {
867 	int hash_index;
868 	sysevent_channel_descriptor_t *chan_list;
869 
870 	if (channel_name == NULL)
871 		return (NULL);
872 
873 	/* Find channel descriptor */
874 	hash_index = CHANN_HASH(channel_name);
875 	chan_list = registered_channels[hash_index];
876 	while (chan_list != NULL) {
877 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
878 			break;
879 		} else {
880 			chan_list = chan_list->scd_next;
881 		}
882 	}
883 
884 	return (chan_list);
885 }
886 
887 static class_lst_t *
888 create_channel_registration(sysevent_channel_descriptor_t *chan,
889     char *event_class, int index)
890 {
891 	size_t class_len;
892 	class_lst_t *c_list;
893 
894 	class_len = strlen(event_class) + 1;
895 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
896 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
897 	bcopy(event_class, c_list->cl_name, class_len);
898 
899 	c_list->cl_subclass_list =
900 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
901 	c_list->cl_subclass_list->sl_name =
902 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
903 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
904 	    sizeof (EC_SUB_ALL));
905 
906 	c_list->cl_next = chan->scd_class_list_tbl[index];
907 	chan->scd_class_list_tbl[index] = c_list;
908 
909 	return (c_list);
910 }
911 
912 static void
913 free_channel_registration(sysevent_channel_descriptor_t *chan)
914 {
915 	int i;
916 	class_lst_t *clist, *next_clist;
917 	subclass_lst_t *sclist, *next_sc;
918 
919 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
920 
921 		clist = chan->scd_class_list_tbl[i];
922 		while (clist != NULL) {
923 			sclist = clist->cl_subclass_list;
924 			while (sclist != NULL) {
925 				kmem_free(sclist->sl_name,
926 				    strlen(sclist->sl_name) + 1);
927 				next_sc = sclist->sl_next;
928 				kmem_free(sclist, sizeof (subclass_lst_t));
929 				sclist = next_sc;
930 			}
931 			kmem_free(clist->cl_name,
932 			    strlen(clist->cl_name) + 1);
933 			next_clist = clist->cl_next;
934 			kmem_free(clist, sizeof (class_lst_t));
935 			clist = next_clist;
936 		}
937 	}
938 	chan->scd_class_list_tbl[0] = NULL;
939 }
940 
941 static int
942 open_channel(char *channel_name)
943 {
944 	int hash_index;
945 	sysevent_channel_descriptor_t *chan, *chan_list;
946 
947 
948 	if (channel_cnt > MAX_CHAN) {
949 		return (-1);
950 	}
951 
952 	/* Find channel descriptor */
953 	hash_index = CHANN_HASH(channel_name);
954 	chan_list = registered_channels[hash_index];
955 	while (chan_list != NULL) {
956 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
957 			chan_list->scd_ref_cnt++;
958 			kmem_free(channel_name, strlen(channel_name) + 1);
959 			return (0);
960 		} else {
961 			chan_list = chan_list->scd_next;
962 		}
963 	}
964 
965 
966 	/* New channel descriptor */
967 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
968 	chan->scd_channel_name = channel_name;
969 
970 	/*
971 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
972 	 * Subscriber id 0 is never allocated, but is used as a reserved id
973 	 * by libsysevent
974 	 */
975 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
976 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
977 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
978 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
979 		return (-1);
980 	}
981 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
982 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
983 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
984 		vmem_destroy(chan->scd_subscriber_cache);
985 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
986 		return (-1);
987 	}
988 
989 	chan->scd_ref_cnt = 1;
990 
991 	(void) create_channel_registration(chan, EC_ALL, 0);
992 
993 	if (registered_channels[hash_index] != NULL)
994 		chan->scd_next = registered_channels[hash_index];
995 
996 	registered_channels[hash_index] = chan;
997 
998 	++channel_cnt;
999 
1000 	return (0);
1001 }
1002 
1003 static void
1004 close_channel(char *channel_name)
1005 {
1006 	int hash_index;
1007 	sysevent_channel_descriptor_t *chan, *prev_chan;
1008 
1009 	/* Find channel descriptor */
1010 	hash_index = CHANN_HASH(channel_name);
1011 	prev_chan = chan = registered_channels[hash_index];
1012 
1013 	while (chan != NULL) {
1014 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1015 			break;
1016 		} else {
1017 			prev_chan = chan;
1018 			chan = chan->scd_next;
1019 		}
1020 	}
1021 
1022 	if (chan == NULL)
1023 		return;
1024 
1025 	chan->scd_ref_cnt--;
1026 	if (chan->scd_ref_cnt > 0)
1027 		return;
1028 
1029 	free_channel_registration(chan);
1030 	vmem_destroy(chan->scd_subscriber_cache);
1031 	vmem_destroy(chan->scd_publisher_cache);
1032 	kmem_free(chan->scd_channel_name,
1033 	    strlen(chan->scd_channel_name) + 1);
1034 	if (registered_channels[hash_index] == chan)
1035 		registered_channels[hash_index] = chan->scd_next;
1036 	else
1037 		prev_chan->scd_next = chan->scd_next;
1038 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1039 	--channel_cnt;
1040 }
1041 
1042 static id_t
1043 bind_common(sysevent_channel_descriptor_t *chan, int type)
1044 {
1045 	id_t id;
1046 
1047 	if (type == SUBSCRIBER) {
1048 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1049 		    VM_NOSLEEP | VM_NEXTFIT);
1050 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1051 			return (0);
1052 		chan->scd_subscriber_ids[id] = 1;
1053 	} else {
1054 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1055 		    VM_NOSLEEP | VM_NEXTFIT);
1056 		if (id <= 0 || id > MAX_PUBLISHERS)
1057 			return (0);
1058 		chan->scd_publisher_ids[id] = 1;
1059 	}
1060 
1061 	return (id);
1062 }
1063 
1064 static int
1065 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1066 {
1067 	if (type == SUBSCRIBER) {
1068 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1069 			return (0);
1070 		if (chan->scd_subscriber_ids[id] == 0)
1071 			return (0);
1072 		(void) remove_all_class(chan, id);
1073 		chan->scd_subscriber_ids[id] = 0;
1074 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1075 	} else {
1076 		if (id <= 0 || id > MAX_PUBLISHERS)
1077 			return (0);
1078 		if (chan->scd_publisher_ids[id] == 0)
1079 			return (0);
1080 		chan->scd_publisher_ids[id] = 0;
1081 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1082 	}
1083 
1084 	return (1);
1085 }
1086 
1087 static void
1088 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1089 {
1090 	if (unbind_common(chan, type, id))
1091 		close_channel(chan->scd_channel_name);
1092 }
1093 
1094 static subclass_lst_t *
1095 find_subclass(class_lst_t *c_list, char *subclass)
1096 {
1097 	subclass_lst_t *sc_list;
1098 
1099 	if (c_list == NULL)
1100 		return (NULL);
1101 
1102 	sc_list = c_list->cl_subclass_list;
1103 
1104 	while (sc_list != NULL) {
1105 		if (strcmp(sc_list->sl_name, subclass) == 0) {
1106 			return (sc_list);
1107 		}
1108 		sc_list = sc_list->sl_next;
1109 	}
1110 
1111 	return (NULL);
1112 }
1113 
1114 static void
1115 insert_subclass(class_lst_t *c_list, char **subclass_names,
1116     int subclass_num, uint32_t sub_id)
1117 {
1118 	int i, subclass_sz;
1119 	subclass_lst_t *sc_list;
1120 
1121 	for (i = 0; i < subclass_num; ++i) {
1122 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1123 		    != NULL) {
1124 			sc_list->sl_num[sub_id] = 1;
1125 		} else {
1126 
1127 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1128 			    KM_SLEEP);
1129 			subclass_sz = strlen(subclass_names[i]) + 1;
1130 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1131 			bcopy(subclass_names[i], sc_list->sl_name,
1132 			    subclass_sz);
1133 
1134 			sc_list->sl_num[sub_id] = 1;
1135 
1136 			sc_list->sl_next = c_list->cl_subclass_list;
1137 			c_list->cl_subclass_list = sc_list;
1138 		}
1139 	}
1140 }
1141 
1142 static class_lst_t *
1143 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1144 {
1145 	class_lst_t *c_list;
1146 
1147 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1148 	while (c_list != NULL) {
1149 		if (strcmp(class_name, c_list->cl_name) == 0)
1150 			break;
1151 		c_list = c_list->cl_next;
1152 	}
1153 
1154 	return (c_list);
1155 }
1156 
1157 static void
1158 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1159 {
1160 	int i;
1161 	class_lst_t *c_list;
1162 	subclass_lst_t *sc_list;
1163 
1164 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1165 
1166 		c_list = chan->scd_class_list_tbl[i];
1167 		while (c_list != NULL) {
1168 			sc_list = c_list->cl_subclass_list;
1169 			while (sc_list != NULL) {
1170 				sc_list->sl_num[sub_id] = 0;
1171 				sc_list = sc_list->sl_next;
1172 			}
1173 			c_list = c_list->cl_next;
1174 		}
1175 	}
1176 }
1177 
1178 static void
1179 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1180     char *class_name)
1181 {
1182 	class_lst_t *c_list;
1183 	subclass_lst_t *sc_list;
1184 
1185 	if (strcmp(class_name, EC_ALL) == 0) {
1186 		remove_all_class(chan, sub_id);
1187 		return;
1188 	}
1189 
1190 	if ((c_list = find_class(chan, class_name)) == NULL) {
1191 		return;
1192 	}
1193 
1194 	sc_list = c_list->cl_subclass_list;
1195 	while (sc_list != NULL) {
1196 		sc_list->sl_num[sub_id] = 0;
1197 		sc_list = sc_list->sl_next;
1198 	}
1199 }
1200 
1201 static int
1202 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1203     char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1204 {
1205 	class_lst_t *c_list;
1206 
1207 	if (strcmp(event_class, EC_ALL) == 0) {
1208 		insert_subclass(chan->scd_class_list_tbl[0],
1209 		    event_subclass_lst, 1, sub_id);
1210 		return (0);
1211 	}
1212 
1213 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1214 		return (-1);
1215 
1216 	/* New class, add to the registration cache */
1217 	if ((c_list = find_class(chan, event_class)) == NULL) {
1218 		c_list = create_channel_registration(chan, event_class,
1219 		    CLASS_HASH(event_class));
1220 	}
1221 
1222 	/* Update the subclass list */
1223 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1224 
1225 	return (0);
1226 }
1227 
1228 static int
1229 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1230     char *nvlbuf, size_t nvlsize)
1231 {
1232 	uint_t num_elem;
1233 	char *event_class;
1234 	char **event_list;
1235 	nvlist_t *nvl;
1236 	nvpair_t *nvpair = NULL;
1237 
1238 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1239 		return (-1);
1240 
1241 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1242 		nvlist_free(nvl);
1243 		return (-1);
1244 	}
1245 
1246 	if ((event_class = nvpair_name(nvpair)) == NULL) {
1247 		nvlist_free(nvl);
1248 		return (-1);
1249 	}
1250 	if (nvpair_value_string_array(nvpair, &event_list,
1251 	    &num_elem) != 0) {
1252 		nvlist_free(nvl);
1253 		return (-1);
1254 	}
1255 
1256 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1257 		nvlist_free(nvl);
1258 		return (-1);
1259 	}
1260 
1261 	nvlist_free(nvl);
1262 
1263 	return (0);
1264 }
1265 
1266 /*
1267  * get_registration - Return the requested class hash chain
1268  */
1269 static int
1270 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1271     uint32_t *bufsz, uint32_t class_index)
1272 {
1273 	int num_classes = 0;
1274 	char *nvlbuf = NULL;
1275 	size_t nvlsize;
1276 	nvlist_t *nvl;
1277 	class_lst_t *clist;
1278 	subclass_lst_t *sc_list;
1279 
1280 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1281 		return (EINVAL);
1282 
1283 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1284 		return (ENOENT);
1285 	}
1286 
1287 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1288 		return (EFAULT);
1289 	}
1290 
1291 	while (clist != NULL) {
1292 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1293 		    != 0) {
1294 			nvlist_free(nvl);
1295 			return (EFAULT);
1296 		}
1297 
1298 		sc_list = clist->cl_subclass_list;
1299 		while (sc_list != NULL) {
1300 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1301 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1302 				nvlist_free(nvl);
1303 				return (EFAULT);
1304 			}
1305 			sc_list = sc_list->sl_next;
1306 		}
1307 		num_classes++;
1308 		clist = clist->cl_next;
1309 	}
1310 
1311 	if (num_classes == 0) {
1312 		nvlist_free(nvl);
1313 		return (ENOENT);
1314 	}
1315 
1316 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1317 	    KM_SLEEP)
1318 	    != 0) {
1319 		nvlist_free(nvl);
1320 		return (EFAULT);
1321 	}
1322 
1323 	nvlist_free(nvl);
1324 
1325 	if (nvlsize > *bufsz) {
1326 		kmem_free(nvlbuf, nvlsize);
1327 		*bufsz = nvlsize;
1328 		return (EAGAIN);
1329 	}
1330 
1331 	bcopy(nvlbuf, databuf, nvlsize);
1332 	kmem_free(nvlbuf, nvlsize);
1333 
1334 	return (0);
1335 }
1336 
1337 /*
1338  * log_sysevent_register - Register event subscriber for a particular
1339  *		event channel.
1340  */
1341 int
1342 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1343 {
1344 	int error = 0;
1345 	char *kchannel, *databuf = NULL;
1346 	size_t bufsz;
1347 	se_pubsub_t kdata;
1348 	sysevent_channel_descriptor_t *chan;
1349 
1350 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1351 		return (EFAULT);
1352 	}
1353 	if (kdata.ps_channel_name_len == 0) {
1354 		return (EINVAL);
1355 	}
1356 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1357 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1358 		kmem_free(kchannel, kdata.ps_channel_name_len);
1359 		return (EFAULT);
1360 	}
1361 	bufsz = kdata.ps_buflen;
1362 	if (bufsz > 0) {
1363 		databuf = kmem_alloc(bufsz, KM_SLEEP);
1364 		if (copyin(udatabuf, databuf, bufsz) == -1) {
1365 			kmem_free(kchannel, kdata.ps_channel_name_len);
1366 			kmem_free(databuf, bufsz);
1367 			return (EFAULT);
1368 		}
1369 	}
1370 
1371 	mutex_enter(&registered_channel_mutex);
1372 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1373 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1374 		chan = get_channel(kchannel);
1375 		if (chan == NULL) {
1376 			mutex_exit(&registered_channel_mutex);
1377 			kmem_free(kchannel, kdata.ps_channel_name_len);
1378 			if (bufsz > 0)
1379 				kmem_free(databuf, bufsz);
1380 			return (ENOENT);
1381 		}
1382 	}
1383 
1384 	switch (kdata.ps_op) {
1385 	case SE_OPEN_REGISTRATION:
1386 		if (open_channel(kchannel) != 0) {
1387 			error = ENOMEM;
1388 			if (bufsz > 0)
1389 				kmem_free(databuf, bufsz);
1390 			kmem_free(kchannel, kdata.ps_channel_name_len);
1391 		}
1392 
1393 		mutex_exit(&registered_channel_mutex);
1394 		return (error);
1395 	case SE_CLOSE_REGISTRATION:
1396 		close_channel(kchannel);
1397 		break;
1398 	case SE_BIND_REGISTRATION:
1399 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1400 			error = EBUSY;
1401 		break;
1402 	case SE_UNBIND_REGISTRATION:
1403 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1404 		break;
1405 	case SE_REGISTER:
1406 		if (bufsz == 0) {
1407 			error = EINVAL;
1408 			break;
1409 		}
1410 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1411 			error = EINVAL;
1412 		break;
1413 	case SE_UNREGISTER:
1414 		if (bufsz == 0) {
1415 			error = EINVAL;
1416 			break;
1417 		}
1418 		remove_class(chan, kdata.ps_id, databuf);
1419 		break;
1420 	case SE_CLEANUP:
1421 		/* Cleanup the indicated subscriber or publisher */
1422 		release_id(chan, kdata.ps_type, kdata.ps_id);
1423 		break;
1424 	case SE_GET_REGISTRATION:
1425 		error = get_registration(chan, databuf,
1426 		    &kdata.ps_buflen, kdata.ps_id);
1427 		break;
1428 	default:
1429 		error = ENOTSUP;
1430 	}
1431 
1432 	mutex_exit(&registered_channel_mutex);
1433 
1434 	kmem_free(kchannel, kdata.ps_channel_name_len);
1435 
1436 	if (bufsz > 0) {
1437 		if (copyout(databuf, udatabuf, bufsz) == -1)
1438 			error = EFAULT;
1439 		kmem_free(databuf, bufsz);
1440 	}
1441 
1442 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1443 		return (EFAULT);
1444 
1445 	return (error);
1446 }
1447 
1448 /*
1449  * log_sysevent_copyout_data - Copyout event data to userland.
1450  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1451  *			The buffer size is always sufficient.
1452  */
1453 int
1454 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1455 {
1456 	int error = ENOENT;
1457 	log_eventq_t *q;
1458 	sysevent_t *ev;
1459 	sysevent_id_t eid_copy;
1460 
1461 	/*
1462 	 * Copy eid
1463 	 */
1464 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1465 		return (EFAULT);
1466 	}
1467 
1468 	mutex_enter(&eventq_sent_mutex);
1469 	q = log_eventq_sent;
1470 
1471 	/*
1472 	 * Search for event buffer on the sent queue with matching
1473 	 * event identifier
1474 	 */
1475 	while (q) {
1476 		ev = (sysevent_t *)&q->arg.buf;
1477 
1478 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1479 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1480 			q = q->next;
1481 			continue;
1482 		}
1483 
1484 		if (ubuflen < SE_SIZE(ev)) {
1485 			error = EFAULT;
1486 			break;
1487 		}
1488 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1489 			error = EFAULT;
1490 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1491 			    "0x%" PRIx64 " from queue: EFAULT\n",
1492 			    eid->eid_seq));
1493 		} else {
1494 			error = 0;
1495 		}
1496 		break;
1497 	}
1498 
1499 	mutex_exit(&eventq_sent_mutex);
1500 
1501 	return (error);
1502 }
1503 
1504 /*
1505  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1506  *			by eid (must have already been sent).  Called from
1507  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1508  */
1509 int
1510 log_sysevent_free_data(sysevent_id_t *eid)
1511 {
1512 	int error = ENOENT;
1513 	sysevent_t *ev;
1514 	log_eventq_t *q, *prev = NULL;
1515 	sysevent_id_t eid_copy;
1516 
1517 	/*
1518 	 * Copy eid
1519 	 */
1520 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1521 		return (EFAULT);
1522 	}
1523 
1524 	mutex_enter(&eventq_sent_mutex);
1525 	q = log_eventq_sent;
1526 
1527 	/*
1528 	 * Look for the event to be freed on the sent queue.  Due to delayed
1529 	 * processing of the event, it may not be on the sent queue yet.
1530 	 * It is up to the user to retry the free operation to ensure that the
1531 	 * event is properly freed.
1532 	 */
1533 	while (q) {
1534 		ev = (sysevent_t *)&q->arg.buf;
1535 
1536 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1537 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1538 			prev = q;
1539 			q = q->next;
1540 			continue;
1541 		}
1542 		/*
1543 		 * Take it out of log_eventq_sent and free it
1544 		 */
1545 		if (prev) {
1546 			prev->next = q->next;
1547 		} else {
1548 			log_eventq_sent = q->next;
1549 		}
1550 		free_packed_event(ev);
1551 		error = 0;
1552 		break;
1553 	}
1554 
1555 	mutex_exit(&eventq_sent_mutex);
1556 
1557 	return (error);
1558 }
1559 
1560 /*
1561  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1562  *			create log_event_deliver thread or wake it up
1563  */
1564 /*ARGSUSED*/
1565 void
1566 log_sysevent_flushq(int cmd, uint_t flag)
1567 {
1568 	mutex_enter(&eventq_head_mutex);
1569 
1570 	/*
1571 	 * Start the event delivery thread
1572 	 * Mark the upcall status as active since we should
1573 	 * now be able to begin emptying the queue normally.
1574 	 */
1575 	if (!async_thread) {
1576 		sysevent_upcall_status = 0;
1577 		sysevent_daemon_init = 1;
1578 		setup_ddi_poststartup();
1579 		async_thread = thread_create(NULL, 0, log_event_deliver,
1580 		    NULL, 0, &p0, TS_RUN, minclsyspri);
1581 	}
1582 
1583 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1584 	cv_signal(&log_event_cv);
1585 	mutex_exit(&eventq_head_mutex);
1586 }
1587 
1588 /*
1589  * log_sysevent_filename - Called by syseventd via
1590  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1591  *			to subsequently bind the event_door.
1592  *
1593  *			This routine is called everytime syseventd (re)starts
1594  *			and must therefore replay any events buffers that have
1595  *			been sent but not freed.
1596  *
1597  *			Event buffer delivery begins after a call to
1598  *			log_sysevent_flushq().
1599  */
1600 int
1601 log_sysevent_filename(char *file)
1602 {
1603 	mutex_enter(&event_door_mutex);
1604 
1605 	(void) strlcpy(logevent_door_upcall_filename, file,
1606 	    sizeof (logevent_door_upcall_filename));
1607 
1608 	/* Unbind old event door */
1609 	if (event_door != NULL)
1610 		door_ki_rele(event_door);
1611 	/* Establish door connection with user event daemon (syseventd) */
1612 	if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0)
1613 		event_door = NULL;
1614 
1615 	mutex_exit(&event_door_mutex);
1616 
1617 	/*
1618 	 * We are called when syseventd restarts. Move all sent, but
1619 	 * not committed events from log_eventq_sent to log_eventq_head.
1620 	 * Do it in proper order to maintain increasing event id.
1621 	 */
1622 	mutex_enter(&eventq_head_mutex);
1623 
1624 	mutex_enter(&eventq_sent_mutex);
1625 	while (log_eventq_sent) {
1626 		log_eventq_t *tmp = log_eventq_sent->next;
1627 		log_eventq_sent->next = log_eventq_head;
1628 		if (log_eventq_head == NULL) {
1629 			ASSERT(log_eventq_cnt == 0);
1630 			log_eventq_tail = log_eventq_sent;
1631 			log_eventq_tail->next = NULL;
1632 		} else if (log_eventq_head == log_eventq_tail) {
1633 			ASSERT(log_eventq_cnt == 1);
1634 			ASSERT(log_eventq_head->next == NULL);
1635 			ASSERT(log_eventq_tail->next == NULL);
1636 		}
1637 		log_eventq_head = log_eventq_sent;
1638 		log_eventq_sent = tmp;
1639 		log_eventq_cnt++;
1640 	}
1641 	mutex_exit(&eventq_sent_mutex);
1642 	mutex_exit(&eventq_head_mutex);
1643 
1644 	return (0);
1645 }
1646 
1647 /*
1648  * queue_sysevent - queue an event buffer
1649  */
1650 static int
1651 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1652 {
1653 	log_eventq_t *q;
1654 
1655 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1656 
1657 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1658 
1659 restart:
1660 
1661 	/* Max Q size exceeded */
1662 	mutex_enter(&event_qfull_mutex);
1663 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1664 		/*
1665 		 * If queue full and transport down, return no transport
1666 		 */
1667 		if (sysevent_upcall_status != 0) {
1668 			mutex_exit(&event_qfull_mutex);
1669 			free_packed_event(ev);
1670 			eid->eid_seq = UINT64_C(0);
1671 			eid->eid_ts = INT64_C(0);
1672 			return (SE_NO_TRANSPORT);
1673 		}
1674 		if (flag == SE_NOSLEEP) {
1675 			mutex_exit(&event_qfull_mutex);
1676 			free_packed_event(ev);
1677 			eid->eid_seq = UINT64_C(0);
1678 			eid->eid_ts = INT64_C(0);
1679 			return (SE_EQSIZE);
1680 		}
1681 		event_qfull_blocked++;
1682 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1683 		event_qfull_blocked--;
1684 		mutex_exit(&event_qfull_mutex);
1685 		goto restart;
1686 	}
1687 	mutex_exit(&event_qfull_mutex);
1688 
1689 	mutex_enter(&eventq_head_mutex);
1690 
1691 	/* Time stamp and assign ID */
1692 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1693 	    (uint64_t)1);
1694 	SE_TIME(ev) = eid->eid_ts = gethrtime();
1695 
1696 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1697 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1698 
1699 	/*
1700 	 * Put event on eventq
1701 	 */
1702 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1703 	q->next = NULL;
1704 	if (log_eventq_head == NULL) {
1705 		ASSERT(log_eventq_cnt == 0);
1706 		log_eventq_head = q;
1707 		log_eventq_tail = q;
1708 	} else {
1709 		if (log_eventq_head == log_eventq_tail) {
1710 			ASSERT(log_eventq_cnt == 1);
1711 			ASSERT(log_eventq_head->next == NULL);
1712 			ASSERT(log_eventq_tail->next == NULL);
1713 		}
1714 		log_eventq_tail->next = q;
1715 		log_eventq_tail = q;
1716 	}
1717 	log_eventq_cnt++;
1718 
1719 	/* Signal event delivery thread */
1720 	if (log_eventq_cnt == 1) {
1721 		cv_signal(&log_event_cv);
1722 	}
1723 	mutex_exit(&eventq_head_mutex);
1724 
1725 	return (0);
1726 }
1727 
1728 /*
1729  * log_sysevent - kernel system event logger.
1730  *
1731  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1732  * maximum event queue size will be exceeded
1733  * Returns 0 for successfully queued event buffer
1734  */
1735 int
1736 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1737 {
1738 	sysevent_t *ev_copy;
1739 	int rval;
1740 
1741 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1742 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1743 
1744 	ev_copy = se_repack(ev, flag);
1745 	if (ev_copy == NULL) {
1746 		ASSERT(flag == SE_NOSLEEP);
1747 		return (SE_ENOMEM);
1748 	}
1749 	rval = queue_sysevent(ev_copy, eid, flag);
1750 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1751 	    rval == SE_NO_TRANSPORT);
1752 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1753 	return (rval);
1754 }
1755 
1756 /*
1757  * Publish EC_DEV_ADD and EC_DEV_REMOVE events from devfsadm to lofi.
1758  * This interface is needed to pass device link names to the lofi driver,
1759  * to be returned via ioctl() to the lofiadm command.
1760  * The problem is, if lofiadm is executed in local zone, there is no
1761  * mechanism to announce the device name from the /dev tree back to lofiadm,
1762  * as sysevents are not accessible from local zone and devfsadmd is only
1763  * running in global zone.
1764  *
1765  * Delayed/missed events are not fatal for lofi, as the device name returned
1766  * to lofiadm is for information and can be re-queried with listing
1767  * mappings with lofiadm command.
1768  *
1769  * Once we have a better method, this interface should be reworked.
1770  */
1771 static void
1772 notify_lofi(sysevent_t *ev)
1773 {
1774 	nvlist_t *nvlist;
1775 	char name[10], *class, *driver;
1776 	int32_t instance;
1777 
1778 	class = sysevent_get_class_name(ev);
1779 	if ((strcmp(EC_DEV_ADD, class) != 0) &&
1780 	    (strcmp(EC_DEV_REMOVE, class) != 0)) {
1781 		return;
1782 	}
1783 
1784 	(void) sysevent_get_attr_list(ev, &nvlist);
1785 	driver = fnvlist_lookup_string(nvlist, DEV_DRIVER_NAME);
1786 	instance = fnvlist_lookup_int32(nvlist, DEV_INSTANCE);
1787 
1788 	/* We are only interested about lofi. */
1789 	if (strcmp(driver, "lofi") != 0) {
1790 		fnvlist_free(nvlist);
1791 		return;
1792 	}
1793 
1794 	/*
1795 	 * insert or remove device info, then announce the change
1796 	 * via cv_broadcast.
1797 	 */
1798 	(void) snprintf(name, sizeof (name), "%d", instance);
1799 	mutex_enter(&lofi_devlink_cache.ln_lock);
1800 	if (strcmp(class, EC_DEV_ADD) == 0) {
1801 		fnvlist_add_nvlist(lofi_devlink_cache.ln_data, name, nvlist);
1802 	} else {
1803 		/* Can not use fnvlist_remove() as we can get ENOENT. */
1804 		(void) nvlist_remove_all(lofi_devlink_cache.ln_data, name);
1805 	}
1806 	cv_broadcast(&lofi_devlink_cache.ln_cv);
1807 	mutex_exit(&lofi_devlink_cache.ln_lock);
1808 
1809 	fnvlist_free(nvlist);
1810 }
1811 
1812 /*
1813  * log_usr_sysevent - user system event logger
1814  *			Private to devfsadm and accessible only via
1815  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1816  */
1817 int
1818 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1819 {
1820 	int ret, copy_sz;
1821 	sysevent_t *ev_copy;
1822 	sysevent_id_t new_eid;
1823 	log_eventq_t *qcopy;
1824 
1825 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1826 	    offsetof(log_event_upcall_arg_t, buf);
1827 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1828 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1829 
1830 	/*
1831 	 * Copy event
1832 	 */
1833 	if (copyin(ev, ev_copy, ev_size) == -1) {
1834 		kmem_free(qcopy, copy_sz);
1835 		return (EFAULT);
1836 	}
1837 
1838 	notify_lofi(ev_copy);
1839 
1840 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1841 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1842 			return (EAGAIN);
1843 		else
1844 			return (EIO);
1845 	}
1846 
1847 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1848 		return (EFAULT);
1849 	}
1850 
1851 	return (0);
1852 }
1853 
1854 
1855 
1856 int
1857 ddi_log_sysevent(
1858 	dev_info_t		*dip,
1859 	char			*vendor,
1860 	char			*class,
1861 	char			*subclass,
1862 	nvlist_t		*attr_list,
1863 	sysevent_id_t		*eidp,
1864 	int			sleep_flag)
1865 {
1866 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1867 	char			pubstr[32];
1868 	sysevent_t		*event;
1869 	sysevent_id_t		eid;
1870 	const char		*drvname;
1871 	char			*publisher;
1872 	int			se_flag;
1873 	int			rval;
1874 	int			n;
1875 
1876 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1877 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1878 		    "event from interrupt context with sleep semantics\n",
1879 		    ddi_driver_name(dip), ddi_get_instance(dip));
1880 		return (DDI_ECONTEXT);
1881 	}
1882 
1883 	drvname = ddi_driver_name(dip);
1884 	n = strlen(vendor) + strlen(drvname) + 7;
1885 	if (n < sizeof (pubstr)) {
1886 		publisher = pubstr;
1887 	} else {
1888 		publisher = kmem_alloc(n,
1889 		    (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1890 		if (publisher == NULL) {
1891 			return (DDI_ENOMEM);
1892 		}
1893 	}
1894 	(void) strcpy(publisher, vendor);
1895 	(void) strcat(publisher, ":kern:");
1896 	(void) strcat(publisher, drvname);
1897 
1898 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1899 	event = sysevent_alloc(class, subclass, publisher, se_flag);
1900 
1901 	if (publisher != pubstr) {
1902 		kmem_free(publisher, n);
1903 	}
1904 
1905 	if (event == NULL) {
1906 		return (DDI_ENOMEM);
1907 	}
1908 
1909 	if (list) {
1910 		(void) sysevent_attach_attributes(event, list);
1911 	}
1912 
1913 	rval = log_sysevent(event, se_flag, &eid);
1914 	if (list) {
1915 		sysevent_detach_attributes(event);
1916 	}
1917 	sysevent_free(event);
1918 	if (rval == 0) {
1919 		if (eidp) {
1920 			eidp->eid_seq = eid.eid_seq;
1921 			eidp->eid_ts = eid.eid_ts;
1922 		}
1923 		return (DDI_SUCCESS);
1924 	}
1925 	if (rval == SE_NO_TRANSPORT)
1926 		return (DDI_ETRANSPORT);
1927 
1928 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1929 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1930 }
1931 
1932 uint64_t
1933 log_sysevent_new_id(void)
1934 {
1935 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1936 }
1937