xref: /titanic_44/usr/src/uts/common/os/log_sysevent.c (revision fff7ec1d8ce71b3d8a998ac4391a99860ce07180)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/errno.h>
27 #include <sys/stropts.h>
28 #include <sys/debug.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/vmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/callb.h>
34 #include <sys/sysevent.h>
35 #include <sys/sysevent_impl.h>
36 #include <sys/modctl.h>
37 #include <sys/sysmacros.h>
38 #include <sys/disp.h>
39 #include <sys/autoconf.h>
40 #include <sys/atomic.h>
41 #include <sys/sdt.h>
42 
43 /* for doors */
44 #include <sys/pathname.h>
45 #include <sys/door.h>
46 #include <sys/kmem.h>
47 #include <sys/cpuvar.h>
48 #include <sys/fs/snode.h>
49 
50 /*
51  * log_sysevent.c - Provides the interfaces for kernel event publication
52  *			to the sysevent event daemon (syseventd).
53  */
54 
55 /*
56  * Debug stuff
57  */
58 static int log_event_debug = 0;
59 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
60 #ifdef DEBUG
61 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
62 #else
63 #define	LOG_DEBUG1(args)
64 #endif
65 
66 /*
67  * Local static vars
68  */
69 /* queue of event buffers sent to syseventd */
70 static log_eventq_t *log_eventq_sent = NULL;
71 
72 /*
73  * Count of event buffers in the queue
74  */
75 int log_eventq_cnt = 0;
76 
77 /* queue of event buffers awaiting delivery to syseventd */
78 static log_eventq_t *log_eventq_head = NULL;
79 static log_eventq_t *log_eventq_tail = NULL;
80 static uint64_t kernel_event_id = 0;
81 static int encoding = NV_ENCODE_NATIVE;
82 
83 /* log event delivery flag */
84 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
85 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
86 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
87 
88 /*
89  * Tunable maximum event buffer queue size. Size depends on how many events
90  * the queue must hold when syseventd is not available, for example during
91  * system startup. Experience showed that more than 2000 events could be posted
92  * due to correctable memory errors.
93  */
94 int logevent_max_q_sz = 5000;
95 
96 
97 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
98 static char *logevent_door_upcall_filename = NULL;
99 static int logevent_door_upcall_filename_size;
100 
101 static door_handle_t event_door = NULL;		/* Door for upcalls */
102 
103 /*
104  * async thread-related variables
105  *
106  * eventq_head_mutex - synchronizes access to the kernel event queue
107  *
108  * eventq_sent_mutex - synchronizes access to the queue of event sents to
109  *			userlevel
110  *
111  * log_event_cv - condition variable signaled when an event has arrived or
112  *			userlevel ready to process event buffers
113  *
114  * async_thread - asynchronous event delivery thread to userlevel daemon.
115  *
116  * sysevent_upcall_status - status of the door upcall link
117  */
118 static kmutex_t eventq_head_mutex;
119 static kmutex_t eventq_sent_mutex;
120 static kcondvar_t log_event_cv;
121 static kthread_id_t async_thread = NULL;
122 
123 static kmutex_t event_qfull_mutex;
124 static kcondvar_t event_qfull_cv;
125 static int event_qfull_blocked = 0;
126 
127 static int sysevent_upcall_status = -1;
128 static kmutex_t registered_channel_mutex;
129 
130 /*
131  * Indicates the syseventd daemon has begun taking events
132  */
133 int sysevent_daemon_init = 0;
134 
135 /*
136  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
137  * caused by the server process doing a forkall().  Since all threads
138  * but the thread actually doing the forkall() need to be quiesced,
139  * the fork may take some time.  The min/max pause are in units
140  * of clock ticks.
141  */
142 #define	LOG_EVENT_MIN_PAUSE	8
143 #define	LOG_EVENT_MAX_PAUSE	128
144 
145 static kmutex_t	event_pause_mutex;
146 static kcondvar_t event_pause_cv;
147 static int event_pause_state = 0;
148 
149 /*
150  * log_event_upcall_lookup - Establish door connection with user event
151  *				daemon (syseventd)
152  */
153 static int
154 log_event_upcall_lookup()
155 {
156 	int	error;
157 
158 	if (event_door) {	/* Release our previous hold (if any) */
159 		door_ki_rele(event_door);
160 	}
161 
162 	event_door = NULL;
163 
164 	/*
165 	 * Locate the door used for upcalls
166 	 */
167 	if ((error =
168 	    door_ki_open(logevent_door_upcall_filename, &event_door)) != 0) {
169 		return (error);
170 	}
171 
172 	return (0);
173 }
174 
175 
176 /*ARGSUSED*/
177 static void
178 log_event_busy_timeout(void *arg)
179 {
180 	mutex_enter(&event_pause_mutex);
181 	event_pause_state = 0;
182 	cv_signal(&event_pause_cv);
183 	mutex_exit(&event_pause_mutex);
184 }
185 
186 static void
187 log_event_pause(int nticks)
188 {
189 	timeout_id_t id;
190 
191 	/*
192 	 * Only one use of log_event_pause at a time
193 	 */
194 	ASSERT(event_pause_state == 0);
195 
196 	event_pause_state = 1;
197 	id = timeout(log_event_busy_timeout, NULL, nticks);
198 	if (id != 0) {
199 		mutex_enter(&event_pause_mutex);
200 		while (event_pause_state)
201 			cv_wait(&event_pause_cv, &event_pause_mutex);
202 		mutex_exit(&event_pause_mutex);
203 	}
204 	event_pause_state = 0;
205 }
206 
207 
208 /*
209  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
210  * 			Check for rebinding errors
211  * 			This buffer is reused to by the syseventd door_return
212  *			to hold the result code
213  */
214 static int
215 log_event_upcall(log_event_upcall_arg_t *arg)
216 {
217 	int error;
218 	size_t size;
219 	sysevent_t *ev;
220 	door_arg_t darg, save_arg;
221 	int retry;
222 	int neagain = 0;
223 	int neintr = 0;
224 	int nticks = LOG_EVENT_MIN_PAUSE;
225 
226 	/* Initialize door args */
227 	ev = (sysevent_t *)&arg->buf;
228 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
229 
230 	darg.rbuf = (char *)arg;
231 	darg.data_ptr = (char *)arg;
232 	darg.rsize = size;
233 	darg.data_size = size;
234 	darg.desc_ptr = NULL;
235 	darg.desc_num = 0;
236 
237 	if ((event_door == NULL) &&
238 	    ((error = log_event_upcall_lookup()) != 0)) {
239 		LOG_DEBUG((CE_CONT,
240 		    "log_event_upcall: event_door error (%d)\n", error));
241 
242 		return (error);
243 	}
244 
245 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
246 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
247 
248 	save_arg = darg;
249 	for (retry = 0; ; retry++) {
250 		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
251 		    SIZE_MAX, 0)) == 0) {
252 			break;
253 		}
254 		switch (error) {
255 		case EINTR:
256 			neintr++;
257 			log_event_pause(2);
258 			darg = save_arg;
259 			break;
260 		case EAGAIN:
261 			/* cannot deliver upcall - process may be forking */
262 			neagain++;
263 			log_event_pause(nticks);
264 			nticks <<= 1;
265 			if (nticks > LOG_EVENT_MAX_PAUSE)
266 				nticks = LOG_EVENT_MAX_PAUSE;
267 			darg = save_arg;
268 			break;
269 		case EBADF:
270 			LOG_DEBUG((CE_CONT, "log_event_upcall: rebinding\n"));
271 			/* Server may have died. Try rebinding */
272 			if ((error = log_event_upcall_lookup()) != 0) {
273 				LOG_DEBUG((CE_CONT,
274 				    "log_event_upcall: lookup error %d\n",
275 				    error));
276 				return (EBADF);
277 			}
278 			if (retry > 4) {
279 				LOG_DEBUG((CE_CONT,
280 				    "log_event_upcall: ebadf\n"));
281 				return (EBADF);
282 			}
283 			LOG_DEBUG((CE_CONT, "log_event_upcall: "
284 			    "retrying upcall after lookup\n"));
285 			darg = save_arg;
286 			break;
287 		default:
288 			cmn_err(CE_CONT,
289 			    "log_event_upcall: door_ki_upcall error %d\n",
290 			    error);
291 			return (error);
292 		}
293 	}
294 
295 	if (neagain > 0 || neintr > 0) {
296 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
297 		    neagain, neintr, nticks));
298 	}
299 
300 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
301 	    "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
302 	    error, (void *)arg, (void *)darg.rbuf,
303 	    (void *)darg.data_ptr,
304 	    *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
305 
306 	if (!error) {
307 		/*
308 		 * upcall was successfully executed. Check return code.
309 		 */
310 		error = *((int *)(darg.rbuf));
311 	}
312 
313 	return (error);
314 }
315 
316 /*
317  * log_event_deliver - event delivery thread
318  *			Deliver all events on the event queue to syseventd.
319  *			If the daemon can not process events, stop event
320  *			delivery and wait for an indication from the
321  *			daemon to resume delivery.
322  *
323  *			Once all event buffers have been delivered, wait
324  *			until there are more to deliver.
325  */
326 static void
327 log_event_deliver()
328 {
329 	log_eventq_t *q;
330 	int upcall_err;
331 	callb_cpr_t cprinfo;
332 
333 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
334 	    "logevent");
335 
336 	/*
337 	 * eventq_head_mutex is exited (released) when there are no more
338 	 * events to process from the eventq in cv_wait().
339 	 */
340 	mutex_enter(&eventq_head_mutex);
341 
342 	for (;;) {
343 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
344 		    (void *)log_eventq_head));
345 
346 		upcall_err = 0;
347 		q = log_eventq_head;
348 
349 		while (q) {
350 			log_eventq_t *next;
351 
352 			/*
353 			 * Release event queue lock during upcall to
354 			 * syseventd
355 			 */
356 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
357 				upcall_err = EAGAIN;
358 				break;
359 			}
360 
361 			mutex_exit(&eventq_head_mutex);
362 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
363 				mutex_enter(&eventq_head_mutex);
364 				break;
365 			}
366 
367 			/*
368 			 * We may be able to add entries to
369 			 * the queue now.
370 			 */
371 			if (event_qfull_blocked > 0 &&
372 			    log_eventq_cnt < logevent_max_q_sz) {
373 				mutex_enter(&event_qfull_mutex);
374 				if (event_qfull_blocked > 0) {
375 					cv_signal(&event_qfull_cv);
376 				}
377 				mutex_exit(&event_qfull_mutex);
378 			}
379 
380 			mutex_enter(&eventq_head_mutex);
381 
382 			/*
383 			 * Daemon restart can cause entries to be moved from
384 			 * the sent queue and put back on the event queue.
385 			 * If this has occurred, replay event queue
386 			 * processing from the new queue head.
387 			 */
388 			if (q != log_eventq_head) {
389 				q = log_eventq_head;
390 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
391 				    "door upcall/daemon restart race\n"));
392 			} else {
393 				/*
394 				 * Move the event to the sent queue when a
395 				 * successful delivery has been made.
396 				 */
397 				mutex_enter(&eventq_sent_mutex);
398 				next = q->next;
399 				q->next = log_eventq_sent;
400 				log_eventq_sent = q;
401 				q = next;
402 				log_eventq_head = q;
403 				log_eventq_cnt--;
404 				if (q == NULL) {
405 					ASSERT(log_eventq_cnt == 0);
406 					log_eventq_tail = NULL;
407 				}
408 				mutex_exit(&eventq_sent_mutex);
409 			}
410 		}
411 
412 		switch (upcall_err) {
413 		case 0:
414 			/*
415 			 * Success. The queue is empty.
416 			 */
417 			sysevent_upcall_status = 0;
418 			break;
419 		case EAGAIN:
420 			/*
421 			 * Delivery is on hold (but functional).
422 			 */
423 			sysevent_upcall_status = 0;
424 			/*
425 			 * If the user has already signaled for delivery
426 			 * resumption, continue.  Otherwise, we wait until
427 			 * we are signaled to continue.
428 			 */
429 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT) {
430 				log_event_delivery = LOGEVENT_DELIVERY_OK;
431 				continue;
432 			} else {
433 				log_event_delivery = LOGEVENT_DELIVERY_HOLD;
434 			}
435 
436 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
437 			break;
438 		default:
439 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
440 			    "upcall err %d\n", upcall_err));
441 			sysevent_upcall_status = upcall_err;
442 			/*
443 			 * Signal everyone waiting that transport is down
444 			 */
445 			if (event_qfull_blocked > 0) {
446 				mutex_enter(&event_qfull_mutex);
447 				if (event_qfull_blocked > 0) {
448 					cv_broadcast(&event_qfull_cv);
449 				}
450 				mutex_exit(&event_qfull_mutex);
451 			}
452 			break;
453 		}
454 
455 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
456 		cv_wait(&log_event_cv, &eventq_head_mutex);
457 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
458 	}
459 	/* NOTREACHED */
460 }
461 
462 /*
463  * log_event_init - Allocate and initialize log_event data structures.
464  */
465 void
466 log_event_init()
467 {
468 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
469 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
470 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
471 
472 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
473 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
474 
475 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
476 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
477 
478 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
479 	sysevent_evc_init();
480 }
481 
482 /*
483  * The following routines are used by kernel event publishers to
484  * allocate, append and free event buffers
485  */
486 /*
487  * sysevent_alloc - Allocate new eventq struct.  This element contains
488  *			an event buffer that will be used in a subsequent
489  *			call to log_sysevent.
490  */
491 sysevent_t *
492 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
493 {
494 	int payload_sz;
495 	int class_sz, subclass_sz, pub_sz;
496 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
497 	sysevent_t *ev;
498 	log_eventq_t *q;
499 
500 	ASSERT(class != NULL);
501 	ASSERT(subclass != NULL);
502 	ASSERT(pub != NULL);
503 
504 	/*
505 	 * Calculate and reserve space for the class, subclass and
506 	 * publisher strings in the event buffer
507 	 */
508 	class_sz = strlen(class) + 1;
509 	subclass_sz = strlen(subclass) + 1;
510 	pub_sz = strlen(pub) + 1;
511 
512 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
513 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
514 
515 	/* String sizes must be 64-bit aligned in the event buffer */
516 	aligned_class_sz = SE_ALIGN(class_sz);
517 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
518 	aligned_pub_sz = SE_ALIGN(pub_sz);
519 
520 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
521 	    (aligned_subclass_sz - sizeof (uint64_t)) +
522 	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
523 
524 	/*
525 	 * Allocate event buffer plus additional sysevent queue
526 	 * and payload overhead.
527 	 */
528 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
529 	if (q == NULL) {
530 		return (NULL);
531 	}
532 
533 	/* Initialize the event buffer data */
534 	ev = (sysevent_t *)&q->arg.buf;
535 	SE_VERSION(ev) = SYS_EVENT_VERSION;
536 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
537 
538 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
539 		+ aligned_class_sz;
540 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
541 
542 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
543 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
544 
545 	SE_ATTR_PTR(ev) = UINT64_C(0);
546 	SE_PAYLOAD_SZ(ev) = payload_sz;
547 
548 	return (ev);
549 }
550 
551 /*
552  * sysevent_free - Free event buffer and any attribute data.
553  */
554 void
555 sysevent_free(sysevent_t *ev)
556 {
557 	log_eventq_t *q;
558 	nvlist_t *nvl;
559 
560 	ASSERT(ev != NULL);
561 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
562 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
563 
564 	if (nvl != NULL) {
565 		size_t size = 0;
566 		(void) nvlist_size(nvl, &size, encoding);
567 		SE_PAYLOAD_SZ(ev) -= size;
568 		nvlist_free(nvl);
569 	}
570 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
571 }
572 
573 /*
574  * free_packed_event - Free packed event buffer
575  */
576 static void
577 free_packed_event(sysevent_t *ev)
578 {
579 	log_eventq_t *q;
580 
581 	ASSERT(ev != NULL);
582 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
583 
584 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
585 }
586 
587 /*
588  * sysevent_add_attr - Add new attribute element to an event attribute list
589  *			If attribute list is NULL, start a new list.
590  */
591 int
592 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
593 	sysevent_value_t *se_value, int flag)
594 {
595 	int error;
596 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
597 
598 	if (nvlp == NULL || se_value == NULL) {
599 		return (SE_EINVAL);
600 	}
601 
602 	/*
603 	 * attr_sz is composed of the value data size + the name data size +
604 	 * any header data.  64-bit aligned.
605 	 */
606 	if (strlen(name) >= MAX_ATTR_NAME) {
607 		return (SE_EINVAL);
608 	}
609 
610 	/*
611 	 * Allocate nvlist
612 	 */
613 	if ((*nvlp == NULL) &&
614 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
615 		return (SE_ENOMEM);
616 
617 	/* add the attribute */
618 	switch (se_value->value_type) {
619 	case SE_DATA_TYPE_BYTE:
620 		error = nvlist_add_byte(*ev_attr_list, name,
621 		    se_value->value.sv_byte);
622 		break;
623 	case SE_DATA_TYPE_INT16:
624 		error = nvlist_add_int16(*ev_attr_list, name,
625 		    se_value->value.sv_int16);
626 		break;
627 	case SE_DATA_TYPE_UINT16:
628 		error = nvlist_add_uint16(*ev_attr_list, name,
629 		    se_value->value.sv_uint16);
630 		break;
631 	case SE_DATA_TYPE_INT32:
632 		error = nvlist_add_int32(*ev_attr_list, name,
633 		    se_value->value.sv_int32);
634 		break;
635 	case SE_DATA_TYPE_UINT32:
636 		error = nvlist_add_uint32(*ev_attr_list, name,
637 		    se_value->value.sv_uint32);
638 		break;
639 	case SE_DATA_TYPE_INT64:
640 		error = nvlist_add_int64(*ev_attr_list, name,
641 		    se_value->value.sv_int64);
642 		break;
643 	case SE_DATA_TYPE_UINT64:
644 		error = nvlist_add_uint64(*ev_attr_list, name,
645 		    se_value->value.sv_uint64);
646 		break;
647 	case SE_DATA_TYPE_STRING:
648 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
649 			return (SE_EINVAL);
650 		error = nvlist_add_string(*ev_attr_list, name,
651 		    se_value->value.sv_string);
652 		break;
653 	case SE_DATA_TYPE_BYTES:
654 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
655 			return (SE_EINVAL);
656 		error = nvlist_add_byte_array(*ev_attr_list, name,
657 		    se_value->value.sv_bytes.data,
658 		    se_value->value.sv_bytes.size);
659 		break;
660 	case SE_DATA_TYPE_TIME:
661 		error = nvlist_add_hrtime(*ev_attr_list, name,
662 		    se_value->value.sv_time);
663 		break;
664 	default:
665 		return (SE_EINVAL);
666 	}
667 
668 	return (error ? SE_ENOMEM : 0);
669 }
670 
671 /*
672  * sysevent_free_attr - Free an attribute list not associated with an
673  *			event buffer.
674  */
675 void
676 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
677 {
678 	nvlist_free((nvlist_t *)ev_attr_list);
679 }
680 
681 /*
682  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
683  *
684  *	This data will be re-packed into contiguous memory when the event
685  *	buffer is posted to log_sysevent.
686  */
687 int
688 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
689 {
690 	size_t size = 0;
691 
692 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
693 		return (SE_EINVAL);
694 	}
695 
696 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
697 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
698 	SE_PAYLOAD_SZ(ev) += size;
699 	SE_FLAG(ev) = 0;
700 
701 	return (0);
702 }
703 
704 /*
705  * sysevent_detach_attributes - Detach but don't free attribute list from the
706  *				event buffer.
707  */
708 void
709 sysevent_detach_attributes(sysevent_t *ev)
710 {
711 	size_t size = 0;
712 	nvlist_t *nvl;
713 
714 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
715 		return;
716 	}
717 
718 	SE_ATTR_PTR(ev) = UINT64_C(0);
719 	(void) nvlist_size(nvl, &size, encoding);
720 	SE_PAYLOAD_SZ(ev) -= size;
721 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
722 }
723 
724 /*
725  * sysevent_attr_name - Get name of attribute
726  */
727 char *
728 sysevent_attr_name(sysevent_attr_t *attr)
729 {
730 	if (attr == NULL) {
731 		return (NULL);
732 	}
733 
734 	return (nvpair_name(attr));
735 }
736 
737 /*
738  * sysevent_attr_type - Get type of attribute
739  */
740 int
741 sysevent_attr_type(sysevent_attr_t *attr)
742 {
743 	/*
744 	 * The SE_DATA_TYPE_* are typedef'ed to be the
745 	 * same value as DATA_TYPE_*
746 	 */
747 	return (nvpair_type((nvpair_t *)attr));
748 }
749 
750 /*
751  * Repack event buffer into contiguous memory
752  */
753 static sysevent_t *
754 se_repack(sysevent_t *ev, int flag)
755 {
756 	size_t copy_len;
757 	caddr_t attr;
758 	size_t size;
759 	uint64_t attr_offset;
760 	sysevent_t *copy;
761 	log_eventq_t *qcopy;
762 	sysevent_attr_list_t *nvl;
763 
764 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
765 	qcopy = kmem_zalloc(copy_len, flag);
766 	if (qcopy == NULL) {
767 		return (NULL);
768 	}
769 	copy = (sysevent_t *)&qcopy->arg.buf;
770 
771 	/*
772 	 * Copy event header, class, subclass and publisher names
773 	 * Set the attribute offset (in number of bytes) to contiguous
774 	 * memory after the header.
775 	 */
776 
777 	attr_offset = SE_ATTR_OFF(ev);
778 
779 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
780 
781 	bcopy(ev, copy, attr_offset);
782 
783 	/* Check if attribute list exists */
784 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
785 		return (copy);
786 	}
787 
788 	/*
789 	 * Copy attribute data to contiguous memory
790 	 */
791 	attr = (char *)copy + attr_offset;
792 	(void) nvlist_size(nvl, &size, encoding);
793 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
794 		kmem_free(qcopy, copy_len);
795 		return (NULL);
796 	}
797 	SE_ATTR_PTR(copy) = UINT64_C(0);
798 	SE_FLAG(copy) = SE_PACKED_BUF;
799 
800 	return (copy);
801 }
802 
803 /*
804  * The sysevent registration provides a persistent and reliable database
805  * for channel information for sysevent channel publishers and
806  * subscribers.
807  *
808  * A channel is created and maintained by the kernel upon the first
809  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
810  * event subscription information is updated as publishers or subscribers
811  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
812  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
813  *
814  * For consistency, id's are assigned for every publisher or subscriber
815  * bound to a particular channel.  The id's are used to constrain resources
816  * and perform subscription lookup.
817  *
818  * Associated with each channel is a hashed list of the current subscriptions
819  * based upon event class and subclasses.  A subscription contains a class name,
820  * list of possible subclasses and an array of subscriber ids.  Subscriptions
821  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
822  *
823  * Channels are closed once the last subscriber or publisher performs a
824  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
825  * channel are freed upon last close.
826  *
827  * Locking:
828  *	Every operation to log_sysevent() is protected by a single lock,
829  *	registered_channel_mutex.  It is expected that the granularity of
830  *	a single lock is sufficient given the frequency that updates will
831  *	occur.
832  *
833  *	If this locking strategy proves to be too contentious, a per-hash
834  *	or per-channel locking strategy may be implemented.
835  */
836 
837 
838 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
839 					% CHAN_HASH_SZ)
840 
841 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
842 static int channel_cnt;
843 static void remove_all_class(sysevent_channel_descriptor_t *chan,
844 	uint32_t sub_id);
845 
846 static uint32_t
847 hash_func(const char *s)
848 {
849 	uint32_t result = 0;
850 	uint_t g;
851 
852 	while (*s != '\0') {
853 		result <<= 4;
854 		result += (uint32_t)*s++;
855 		g = result & 0xf0000000;
856 		if (g != 0) {
857 			result ^= g >> 24;
858 			result ^= g;
859 		}
860 	}
861 
862 	return (result);
863 }
864 
865 static sysevent_channel_descriptor_t *
866 get_channel(char *channel_name)
867 {
868 	int hash_index;
869 	sysevent_channel_descriptor_t *chan_list;
870 
871 	if (channel_name == NULL)
872 		return (NULL);
873 
874 	/* Find channel descriptor */
875 	hash_index = CHANN_HASH(channel_name);
876 	chan_list = registered_channels[hash_index];
877 	while (chan_list != NULL) {
878 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
879 			break;
880 		} else {
881 			chan_list = chan_list->scd_next;
882 		}
883 	}
884 
885 	return (chan_list);
886 }
887 
888 static class_lst_t *
889 create_channel_registration(sysevent_channel_descriptor_t *chan,
890     char *event_class, int index)
891 {
892 	size_t class_len;
893 	class_lst_t *c_list;
894 
895 	class_len = strlen(event_class) + 1;
896 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
897 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
898 	bcopy(event_class, c_list->cl_name, class_len);
899 
900 	c_list->cl_subclass_list =
901 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
902 	c_list->cl_subclass_list->sl_name =
903 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
904 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
905 	    sizeof (EC_SUB_ALL));
906 
907 	c_list->cl_next = chan->scd_class_list_tbl[index];
908 	chan->scd_class_list_tbl[index] = c_list;
909 
910 	return (c_list);
911 }
912 
913 static void
914 free_channel_registration(sysevent_channel_descriptor_t *chan)
915 {
916 	int i;
917 	class_lst_t *clist, *next_clist;
918 	subclass_lst_t *sclist, *next_sc;
919 
920 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
921 
922 		clist = chan->scd_class_list_tbl[i];
923 		while (clist != NULL) {
924 			sclist = clist->cl_subclass_list;
925 			while (sclist != NULL) {
926 				kmem_free(sclist->sl_name,
927 				    strlen(sclist->sl_name) + 1);
928 				next_sc = sclist->sl_next;
929 				kmem_free(sclist, sizeof (subclass_lst_t));
930 				sclist = next_sc;
931 			}
932 			kmem_free(clist->cl_name,
933 			    strlen(clist->cl_name) + 1);
934 			next_clist = clist->cl_next;
935 			kmem_free(clist, sizeof (class_lst_t));
936 			clist = next_clist;
937 		}
938 	}
939 	chan->scd_class_list_tbl[0] = NULL;
940 }
941 
942 static int
943 open_channel(char *channel_name)
944 {
945 	int hash_index;
946 	sysevent_channel_descriptor_t *chan, *chan_list;
947 
948 
949 	if (channel_cnt > MAX_CHAN) {
950 		return (-1);
951 	}
952 
953 	/* Find channel descriptor */
954 	hash_index = CHANN_HASH(channel_name);
955 	chan_list = registered_channels[hash_index];
956 	while (chan_list != NULL) {
957 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
958 			chan_list->scd_ref_cnt++;
959 			kmem_free(channel_name, strlen(channel_name) + 1);
960 			return (0);
961 		} else {
962 			chan_list = chan_list->scd_next;
963 		}
964 	}
965 
966 
967 	/* New channel descriptor */
968 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
969 	chan->scd_channel_name = channel_name;
970 
971 	/*
972 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
973 	 * Subscriber id 0 is never allocated, but is used as a reserved id
974 	 * by libsysevent
975 	 */
976 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
977 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
978 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
979 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
980 		return (-1);
981 	}
982 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
983 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
984 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
985 		vmem_destroy(chan->scd_subscriber_cache);
986 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
987 		return (-1);
988 	}
989 
990 	chan->scd_ref_cnt = 1;
991 
992 	(void) create_channel_registration(chan, EC_ALL, 0);
993 
994 	if (registered_channels[hash_index] != NULL)
995 		chan->scd_next = registered_channels[hash_index];
996 
997 	registered_channels[hash_index] = chan;
998 
999 	++channel_cnt;
1000 
1001 	return (0);
1002 }
1003 
1004 static void
1005 close_channel(char *channel_name)
1006 {
1007 	int hash_index;
1008 	sysevent_channel_descriptor_t *chan, *prev_chan;
1009 
1010 	/* Find channel descriptor */
1011 	hash_index = CHANN_HASH(channel_name);
1012 	prev_chan = chan = registered_channels[hash_index];
1013 
1014 	while (chan != NULL) {
1015 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1016 			break;
1017 		} else {
1018 			prev_chan = chan;
1019 			chan = chan->scd_next;
1020 		}
1021 	}
1022 
1023 	if (chan == NULL)
1024 		return;
1025 
1026 	chan->scd_ref_cnt--;
1027 	if (chan->scd_ref_cnt > 0)
1028 		return;
1029 
1030 	free_channel_registration(chan);
1031 	vmem_destroy(chan->scd_subscriber_cache);
1032 	vmem_destroy(chan->scd_publisher_cache);
1033 	kmem_free(chan->scd_channel_name,
1034 	    strlen(chan->scd_channel_name) + 1);
1035 	if (registered_channels[hash_index] == chan)
1036 		registered_channels[hash_index] = chan->scd_next;
1037 	else
1038 		prev_chan->scd_next = chan->scd_next;
1039 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1040 	--channel_cnt;
1041 }
1042 
1043 static id_t
1044 bind_common(sysevent_channel_descriptor_t *chan, int type)
1045 {
1046 	id_t id;
1047 
1048 	if (type == SUBSCRIBER) {
1049 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1050 		    VM_NOSLEEP | VM_NEXTFIT);
1051 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1052 			return (0);
1053 		chan->scd_subscriber_ids[id] = 1;
1054 	} else {
1055 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1056 		    VM_NOSLEEP | VM_NEXTFIT);
1057 		if (id <= 0 || id > MAX_PUBLISHERS)
1058 			return (0);
1059 		chan->scd_publisher_ids[id] = 1;
1060 	}
1061 
1062 	return (id);
1063 }
1064 
1065 static int
1066 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1067 {
1068 	if (type == SUBSCRIBER) {
1069 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1070 			return (0);
1071 		if (chan->scd_subscriber_ids[id] == 0)
1072 			return (0);
1073 		(void) remove_all_class(chan, id);
1074 		chan->scd_subscriber_ids[id] = 0;
1075 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1076 	} else {
1077 		if (id <= 0 || id > MAX_PUBLISHERS)
1078 			return (0);
1079 		if (chan->scd_publisher_ids[id] == 0)
1080 			return (0);
1081 		chan->scd_publisher_ids[id] = 0;
1082 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1083 	}
1084 
1085 	return (1);
1086 }
1087 
1088 static void
1089 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1090 {
1091 	if (unbind_common(chan, type, id))
1092 		close_channel(chan->scd_channel_name);
1093 }
1094 
1095 static subclass_lst_t *
1096 find_subclass(class_lst_t *c_list, char *subclass)
1097 {
1098 	subclass_lst_t *sc_list;
1099 
1100 	if (c_list == NULL)
1101 		return (NULL);
1102 
1103 	sc_list = c_list->cl_subclass_list;
1104 
1105 	while (sc_list != NULL) {
1106 		if (strcmp(sc_list->sl_name, subclass) == 0) {
1107 			return (sc_list);
1108 		}
1109 		sc_list = sc_list->sl_next;
1110 	}
1111 
1112 	return (NULL);
1113 }
1114 
1115 static void
1116 insert_subclass(class_lst_t *c_list, char **subclass_names,
1117 	int subclass_num, uint32_t sub_id)
1118 {
1119 	int i, subclass_sz;
1120 	subclass_lst_t *sc_list;
1121 
1122 	for (i = 0; i < subclass_num; ++i) {
1123 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1124 		    != NULL) {
1125 			sc_list->sl_num[sub_id] = 1;
1126 		} else {
1127 
1128 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1129 			    KM_SLEEP);
1130 			subclass_sz = strlen(subclass_names[i]) + 1;
1131 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1132 			bcopy(subclass_names[i], sc_list->sl_name,
1133 			    subclass_sz);
1134 
1135 			sc_list->sl_num[sub_id] = 1;
1136 
1137 			sc_list->sl_next = c_list->cl_subclass_list;
1138 			c_list->cl_subclass_list = sc_list;
1139 		}
1140 	}
1141 }
1142 
1143 static class_lst_t *
1144 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1145 {
1146 	class_lst_t *c_list;
1147 
1148 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1149 	while (c_list != NULL) {
1150 		if (strcmp(class_name, c_list->cl_name) == 0)
1151 			break;
1152 		c_list = c_list->cl_next;
1153 	}
1154 
1155 	return (c_list);
1156 }
1157 
1158 static void
1159 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1160 {
1161 	int i;
1162 	class_lst_t *c_list;
1163 	subclass_lst_t *sc_list;
1164 
1165 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1166 
1167 		c_list = chan->scd_class_list_tbl[i];
1168 		while (c_list != NULL) {
1169 			sc_list = c_list->cl_subclass_list;
1170 			while (sc_list != NULL) {
1171 				sc_list->sl_num[sub_id] = 0;
1172 				sc_list = sc_list->sl_next;
1173 			}
1174 			c_list = c_list->cl_next;
1175 		}
1176 	}
1177 }
1178 
1179 static void
1180 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1181 	char *class_name)
1182 {
1183 	class_lst_t *c_list;
1184 	subclass_lst_t *sc_list;
1185 
1186 	if (strcmp(class_name, EC_ALL) == 0) {
1187 		remove_all_class(chan, sub_id);
1188 		return;
1189 	}
1190 
1191 	if ((c_list = find_class(chan, class_name)) == NULL) {
1192 		return;
1193 	}
1194 
1195 	sc_list = c_list->cl_subclass_list;
1196 	while (sc_list != NULL) {
1197 		sc_list->sl_num[sub_id] = 0;
1198 		sc_list = sc_list->sl_next;
1199 	}
1200 }
1201 
1202 static int
1203 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1204 	char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1205 {
1206 	class_lst_t *c_list;
1207 
1208 	if (strcmp(event_class, EC_ALL) == 0) {
1209 		insert_subclass(chan->scd_class_list_tbl[0],
1210 		    event_subclass_lst, 1, sub_id);
1211 		return (0);
1212 	}
1213 
1214 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1215 		return (-1);
1216 
1217 	/* New class, add to the registration cache */
1218 	if ((c_list = find_class(chan, event_class)) == NULL) {
1219 		c_list = create_channel_registration(chan, event_class,
1220 		    CLASS_HASH(event_class));
1221 	}
1222 
1223 	/* Update the subclass list */
1224 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1225 
1226 	return (0);
1227 }
1228 
1229 static int
1230 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1231 	char *nvlbuf, size_t nvlsize)
1232 {
1233 	uint_t num_elem;
1234 	char *event_class;
1235 	char **event_list;
1236 	nvlist_t *nvl;
1237 	nvpair_t *nvpair = NULL;
1238 
1239 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1240 		return (-1);
1241 
1242 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1243 		nvlist_free(nvl);
1244 		return (-1);
1245 	}
1246 
1247 	if ((event_class = nvpair_name(nvpair)) == NULL) {
1248 		nvlist_free(nvl);
1249 		return (-1);
1250 	}
1251 	if (nvpair_value_string_array(nvpair, &event_list,
1252 	    &num_elem) != 0) {
1253 		nvlist_free(nvl);
1254 		return (-1);
1255 	}
1256 
1257 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1258 		nvlist_free(nvl);
1259 		return (-1);
1260 	}
1261 
1262 	nvlist_free(nvl);
1263 
1264 	return (0);
1265 }
1266 
1267 /*
1268  * get_registration - Return the requested class hash chain
1269  */
1270 static int
1271 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1272 	uint32_t *bufsz, uint32_t class_index)
1273 {
1274 	int num_classes = 0;
1275 	char *nvlbuf = NULL;
1276 	size_t nvlsize;
1277 	nvlist_t *nvl;
1278 	class_lst_t *clist;
1279 	subclass_lst_t *sc_list;
1280 
1281 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1282 		return (EINVAL);
1283 
1284 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1285 		return (ENOENT);
1286 	}
1287 
1288 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1289 		return (EFAULT);
1290 	}
1291 
1292 	while (clist != NULL) {
1293 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1294 		    != 0) {
1295 			nvlist_free(nvl);
1296 			return (EFAULT);
1297 		}
1298 
1299 		sc_list = clist->cl_subclass_list;
1300 		while (sc_list != NULL) {
1301 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1302 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1303 				nvlist_free(nvl);
1304 				return (EFAULT);
1305 			}
1306 			sc_list = sc_list->sl_next;
1307 		}
1308 		num_classes++;
1309 		clist = clist->cl_next;
1310 	}
1311 
1312 	if (num_classes == 0) {
1313 		nvlist_free(nvl);
1314 		return (ENOENT);
1315 	}
1316 
1317 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1318 	    KM_SLEEP)
1319 	    != 0) {
1320 		nvlist_free(nvl);
1321 		return (EFAULT);
1322 	}
1323 
1324 	nvlist_free(nvl);
1325 
1326 	if (nvlsize > *bufsz) {
1327 		kmem_free(nvlbuf, nvlsize);
1328 		*bufsz = nvlsize;
1329 		return (EAGAIN);
1330 	}
1331 
1332 	bcopy(nvlbuf, databuf, nvlsize);
1333 	kmem_free(nvlbuf, nvlsize);
1334 
1335 	return (0);
1336 }
1337 
1338 /*
1339  * log_sysevent_register - Register event subscriber for a particular
1340  *		event channel.
1341  */
1342 int
1343 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1344 {
1345 	int error = 0;
1346 	char *kchannel, *databuf = NULL;
1347 	size_t bufsz;
1348 	se_pubsub_t kdata;
1349 	sysevent_channel_descriptor_t *chan;
1350 
1351 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1352 		return (EFAULT);
1353 	}
1354 	if (kdata.ps_channel_name_len == 0) {
1355 		return (EINVAL);
1356 	}
1357 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1358 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1359 		kmem_free(kchannel, kdata.ps_channel_name_len);
1360 		return (EFAULT);
1361 	}
1362 	bufsz = kdata.ps_buflen;
1363 	if (bufsz > 0) {
1364 		databuf = kmem_alloc(bufsz, KM_SLEEP);
1365 		if (copyin(udatabuf, databuf, bufsz) == -1) {
1366 			kmem_free(kchannel, kdata.ps_channel_name_len);
1367 			kmem_free(databuf, bufsz);
1368 			return (EFAULT);
1369 		}
1370 	}
1371 
1372 	mutex_enter(&registered_channel_mutex);
1373 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1374 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1375 		chan = get_channel(kchannel);
1376 		if (chan == NULL) {
1377 			mutex_exit(&registered_channel_mutex);
1378 			kmem_free(kchannel, kdata.ps_channel_name_len);
1379 			if (bufsz > 0)
1380 				kmem_free(databuf, bufsz);
1381 			return (ENOENT);
1382 		}
1383 	}
1384 
1385 	switch (kdata.ps_op) {
1386 	case SE_OPEN_REGISTRATION:
1387 		if (open_channel(kchannel) != 0) {
1388 			error = ENOMEM;
1389 			if (bufsz > 0)
1390 				kmem_free(databuf, bufsz);
1391 			kmem_free(kchannel, kdata.ps_channel_name_len);
1392 		}
1393 
1394 		mutex_exit(&registered_channel_mutex);
1395 		return (error);
1396 	case SE_CLOSE_REGISTRATION:
1397 		close_channel(kchannel);
1398 		break;
1399 	case SE_BIND_REGISTRATION:
1400 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1401 			error = EBUSY;
1402 		break;
1403 	case SE_UNBIND_REGISTRATION:
1404 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1405 		break;
1406 	case SE_REGISTER:
1407 		if (bufsz == 0) {
1408 			error = EINVAL;
1409 			break;
1410 		}
1411 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1412 			error = EINVAL;
1413 		break;
1414 	case SE_UNREGISTER:
1415 		if (bufsz == 0) {
1416 			error = EINVAL;
1417 			break;
1418 		}
1419 		remove_class(chan, kdata.ps_id, databuf);
1420 		break;
1421 	case SE_CLEANUP:
1422 		/* Cleanup the indicated subscriber or publisher */
1423 		release_id(chan, kdata.ps_type, kdata.ps_id);
1424 		break;
1425 	case SE_GET_REGISTRATION:
1426 		error = get_registration(chan, databuf,
1427 		    &kdata.ps_buflen, kdata.ps_id);
1428 		break;
1429 	default:
1430 		error = ENOTSUP;
1431 	}
1432 
1433 	mutex_exit(&registered_channel_mutex);
1434 
1435 	kmem_free(kchannel, kdata.ps_channel_name_len);
1436 
1437 	if (bufsz > 0) {
1438 		if (copyout(databuf, udatabuf, bufsz) == -1)
1439 			error = EFAULT;
1440 		kmem_free(databuf, bufsz);
1441 	}
1442 
1443 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1444 		return (EFAULT);
1445 
1446 	return (error);
1447 }
1448 
1449 /*
1450  * log_sysevent_copyout_data - Copyout event data to userland.
1451  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1452  *			The buffer size is always sufficient.
1453  */
1454 int
1455 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1456 {
1457 	int error = ENOENT;
1458 	log_eventq_t *q;
1459 	sysevent_t *ev;
1460 	sysevent_id_t eid_copy;
1461 
1462 	/*
1463 	 * Copy eid
1464 	 */
1465 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1466 		return (EFAULT);
1467 	}
1468 
1469 	mutex_enter(&eventq_sent_mutex);
1470 	q = log_eventq_sent;
1471 
1472 	/*
1473 	 * Search for event buffer on the sent queue with matching
1474 	 * event identifier
1475 	 */
1476 	while (q) {
1477 		ev = (sysevent_t *)&q->arg.buf;
1478 
1479 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1480 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1481 			q = q->next;
1482 			continue;
1483 		}
1484 
1485 		if (ubuflen < SE_SIZE(ev)) {
1486 			error = EFAULT;
1487 			break;
1488 		}
1489 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1490 			error = EFAULT;
1491 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1492 			    "0x%" PRIx64 " from queue: EFAULT\n",
1493 			    eid->eid_seq));
1494 		} else {
1495 			error = 0;
1496 		}
1497 		break;
1498 	}
1499 
1500 	mutex_exit(&eventq_sent_mutex);
1501 
1502 	return (error);
1503 }
1504 
1505 /*
1506  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1507  *			by eid (must have already been sent).  Called from
1508  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1509  */
1510 int
1511 log_sysevent_free_data(sysevent_id_t *eid)
1512 {
1513 	int error = ENOENT;
1514 	sysevent_t *ev;
1515 	log_eventq_t *q, *prev = NULL;
1516 	sysevent_id_t eid_copy;
1517 
1518 	/*
1519 	 * Copy eid
1520 	 */
1521 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1522 		return (EFAULT);
1523 	}
1524 
1525 	mutex_enter(&eventq_sent_mutex);
1526 	q = log_eventq_sent;
1527 
1528 	/*
1529 	 * Look for the event to be freed on the sent queue.  Due to delayed
1530 	 * processing of the event, it may not be on the sent queue yet.
1531 	 * It is up to the user to retry the free operation to ensure that the
1532 	 * event is properly freed.
1533 	 */
1534 	while (q) {
1535 		ev = (sysevent_t *)&q->arg.buf;
1536 
1537 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1538 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1539 			prev = q;
1540 			q = q->next;
1541 			continue;
1542 		}
1543 		/*
1544 		 * Take it out of log_eventq_sent and free it
1545 		 */
1546 		if (prev) {
1547 			prev->next = q->next;
1548 		} else {
1549 			log_eventq_sent = q->next;
1550 		}
1551 		free_packed_event(ev);
1552 		error = 0;
1553 		break;
1554 	}
1555 
1556 	mutex_exit(&eventq_sent_mutex);
1557 
1558 	return (error);
1559 }
1560 
1561 /*
1562  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1563  *			create log_event_deliver thread or wake it up
1564  */
1565 /*ARGSUSED*/
1566 void
1567 log_sysevent_flushq(int cmd, uint_t flag)
1568 {
1569 	mutex_enter(&eventq_head_mutex);
1570 
1571 	/*
1572 	 * Start the event delivery thread
1573 	 * Mark the upcall status as active since we should
1574 	 * now be able to begin emptying the queue normally.
1575 	 */
1576 	if (!async_thread) {
1577 		sysevent_upcall_status = 0;
1578 		sysevent_daemon_init = 1;
1579 		setup_ddi_poststartup();
1580 		async_thread = thread_create(NULL, 0, log_event_deliver,
1581 		    NULL, 0, &p0, TS_RUN, minclsyspri);
1582 	}
1583 
1584 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1585 	cv_signal(&log_event_cv);
1586 	mutex_exit(&eventq_head_mutex);
1587 }
1588 
1589 /*
1590  * log_sysevent_filename - Called by syseventd via
1591  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1592  *			to subsequently bind the event_door.
1593  *
1594  *			This routine is called everytime syseventd (re)starts
1595  *			and must therefore replay any events buffers that have
1596  *			been sent but not freed.
1597  *
1598  *			Event buffer delivery begins after a call to
1599  *			log_sysevent_flushq().
1600  */
1601 int
1602 log_sysevent_filename(char *file)
1603 {
1604 	/*
1605 	 * Called serially by syseventd init code, no need to protect door
1606 	 * data.
1607 	 */
1608 	/* Unbind old event door */
1609 	if (logevent_door_upcall_filename) {
1610 		kmem_free(logevent_door_upcall_filename,
1611 		    logevent_door_upcall_filename_size);
1612 		if (event_door) {
1613 			door_ki_rele(event_door);
1614 			event_door = NULL;
1615 		}
1616 	}
1617 	logevent_door_upcall_filename_size = strlen(file) + 1;
1618 	logevent_door_upcall_filename = kmem_alloc(
1619 	    logevent_door_upcall_filename_size, KM_SLEEP);
1620 	(void) strcpy(logevent_door_upcall_filename, file);
1621 
1622 	/*
1623 	 * We are called when syseventd restarts. Move all sent, but
1624 	 * not committed events from log_eventq_sent to log_eventq_head.
1625 	 * Do it in proper order to maintain increasing event id.
1626 	 */
1627 	mutex_enter(&eventq_head_mutex);
1628 
1629 	mutex_enter(&eventq_sent_mutex);
1630 	while (log_eventq_sent) {
1631 		log_eventq_t *tmp = log_eventq_sent->next;
1632 		log_eventq_sent->next = log_eventq_head;
1633 		if (log_eventq_head == NULL) {
1634 			ASSERT(log_eventq_cnt == 0);
1635 			log_eventq_tail = log_eventq_sent;
1636 			log_eventq_tail->next = NULL;
1637 		} else if (log_eventq_head == log_eventq_tail) {
1638 			ASSERT(log_eventq_cnt == 1);
1639 			ASSERT(log_eventq_head->next == NULL);
1640 			ASSERT(log_eventq_tail->next == NULL);
1641 		}
1642 		log_eventq_head = log_eventq_sent;
1643 		log_eventq_sent = tmp;
1644 		log_eventq_cnt++;
1645 	}
1646 	mutex_exit(&eventq_sent_mutex);
1647 	mutex_exit(&eventq_head_mutex);
1648 
1649 	return (0);
1650 }
1651 
1652 /*
1653  * queue_sysevent - queue an event buffer
1654  */
1655 static int
1656 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1657 {
1658 	log_eventq_t *q;
1659 
1660 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1661 
1662 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1663 
1664 restart:
1665 
1666 	/* Max Q size exceeded */
1667 	mutex_enter(&event_qfull_mutex);
1668 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1669 		/*
1670 		 * If queue full and transport down, return no transport
1671 		 */
1672 		if (sysevent_upcall_status != 0) {
1673 			mutex_exit(&event_qfull_mutex);
1674 			free_packed_event(ev);
1675 			eid->eid_seq = UINT64_C(0);
1676 			eid->eid_ts = INT64_C(0);
1677 			return (SE_NO_TRANSPORT);
1678 		}
1679 		if (flag == SE_NOSLEEP) {
1680 			mutex_exit(&event_qfull_mutex);
1681 			free_packed_event(ev);
1682 			eid->eid_seq = UINT64_C(0);
1683 			eid->eid_ts = INT64_C(0);
1684 			return (SE_EQSIZE);
1685 		}
1686 		event_qfull_blocked++;
1687 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1688 		event_qfull_blocked--;
1689 		mutex_exit(&event_qfull_mutex);
1690 		goto restart;
1691 	}
1692 	mutex_exit(&event_qfull_mutex);
1693 
1694 	mutex_enter(&eventq_head_mutex);
1695 
1696 	/* Time stamp and assign ID */
1697 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1698 	    (uint64_t)1);
1699 	SE_TIME(ev) = eid->eid_ts = gethrtime();
1700 
1701 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1702 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1703 
1704 	/*
1705 	 * Put event on eventq
1706 	 */
1707 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1708 	q->next = NULL;
1709 	if (log_eventq_head == NULL) {
1710 		ASSERT(log_eventq_cnt == 0);
1711 		log_eventq_head = q;
1712 		log_eventq_tail = q;
1713 	} else {
1714 		if (log_eventq_head == log_eventq_tail) {
1715 			ASSERT(log_eventq_cnt == 1);
1716 			ASSERT(log_eventq_head->next == NULL);
1717 			ASSERT(log_eventq_tail->next == NULL);
1718 		}
1719 		log_eventq_tail->next = q;
1720 		log_eventq_tail = q;
1721 	}
1722 	log_eventq_cnt++;
1723 
1724 	/* Signal event delivery thread */
1725 	if (log_eventq_cnt == 1) {
1726 		cv_signal(&log_event_cv);
1727 	}
1728 	mutex_exit(&eventq_head_mutex);
1729 
1730 	return (0);
1731 }
1732 
1733 /*
1734  * log_sysevent - kernel system event logger.
1735  *
1736  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1737  * maximum event queue size will be exceeded
1738  * Returns 0 for successfully queued event buffer
1739  */
1740 int
1741 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1742 {
1743 	sysevent_t *ev_copy;
1744 	int rval;
1745 
1746 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1747 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1748 
1749 	ev_copy = se_repack(ev, flag);
1750 	if (ev_copy == NULL) {
1751 		ASSERT(flag == SE_NOSLEEP);
1752 		return (SE_ENOMEM);
1753 	}
1754 	rval = queue_sysevent(ev_copy, eid, flag);
1755 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1756 	    rval == SE_NO_TRANSPORT);
1757 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1758 	return (rval);
1759 }
1760 
1761 /*
1762  * log_usr_sysevent - user system event logger
1763  *			Private to devfsadm and accessible only via
1764  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1765  */
1766 int
1767 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1768 {
1769 	int ret, copy_sz;
1770 	sysevent_t *ev_copy;
1771 	sysevent_id_t new_eid;
1772 	log_eventq_t *qcopy;
1773 
1774 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1775 	    offsetof(log_event_upcall_arg_t, buf);
1776 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1777 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1778 
1779 	/*
1780 	 * Copy event
1781 	 */
1782 	if (copyin(ev, ev_copy, ev_size) == -1) {
1783 		kmem_free(qcopy, copy_sz);
1784 		return (EFAULT);
1785 	}
1786 
1787 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1788 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1789 			return (EAGAIN);
1790 		else
1791 			return (EIO);
1792 	}
1793 
1794 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1795 		return (EFAULT);
1796 	}
1797 
1798 	return (0);
1799 }
1800 
1801 
1802 
1803 int
1804 ddi_log_sysevent(
1805 	dev_info_t		*dip,
1806 	char			*vendor,
1807 	char			*class,
1808 	char			*subclass,
1809 	nvlist_t		*attr_list,
1810 	sysevent_id_t		*eidp,
1811 	int			sleep_flag)
1812 {
1813 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1814 	char			pubstr[32];
1815 	sysevent_t		*event;
1816 	sysevent_id_t		eid;
1817 	const char		*drvname;
1818 	char			*publisher;
1819 	int			se_flag;
1820 	int			rval;
1821 	int			n;
1822 
1823 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1824 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1825 		    "event from interrupt context with sleep semantics\n",
1826 		    ddi_driver_name(dip), ddi_get_instance(dip));
1827 		return (DDI_ECONTEXT);
1828 	}
1829 
1830 	drvname = ddi_driver_name(dip);
1831 	n = strlen(vendor) + strlen(drvname) + 7;
1832 	if (n < sizeof (pubstr)) {
1833 		publisher = pubstr;
1834 	} else {
1835 		publisher = kmem_alloc(n,
1836 		    (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1837 		if (publisher == NULL) {
1838 			return (DDI_ENOMEM);
1839 		}
1840 	}
1841 	(void) strcpy(publisher, vendor);
1842 	(void) strcat(publisher, ":kern:");
1843 	(void) strcat(publisher, drvname);
1844 
1845 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1846 	event = sysevent_alloc(class, subclass, publisher, se_flag);
1847 
1848 	if (publisher != pubstr) {
1849 		kmem_free(publisher, n);
1850 	}
1851 
1852 	if (event == NULL) {
1853 		return (DDI_ENOMEM);
1854 	}
1855 
1856 	if (list) {
1857 		(void) sysevent_attach_attributes(event, list);
1858 	}
1859 
1860 	rval = log_sysevent(event, se_flag, &eid);
1861 	if (list) {
1862 		sysevent_detach_attributes(event);
1863 	}
1864 	sysevent_free(event);
1865 	if (rval == 0) {
1866 		if (eidp) {
1867 			eidp->eid_seq = eid.eid_seq;
1868 			eidp->eid_ts = eid.eid_ts;
1869 		}
1870 		return (DDI_SUCCESS);
1871 	}
1872 	if (rval == SE_NO_TRANSPORT)
1873 		return (DDI_ETRANSPORT);
1874 
1875 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1876 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1877 }
1878 
1879 uint64_t
1880 log_sysevent_new_id(void)
1881 {
1882 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1883 }
1884