xref: /titanic_41/usr/src/uts/common/os/log_sysevent.c (revision 200c5a5a428f15c16e2a526ed69d462af62e8e1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/errno.h>
30 #include <sys/stropts.h>
31 #include <sys/debug.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/vmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/callb.h>
37 #include <sys/sysevent.h>
38 #include <sys/sysevent_impl.h>
39 #include <sys/modctl.h>
40 #include <sys/sysmacros.h>
41 #include <sys/disp.h>
42 #include <sys/autoconf.h>
43 #include <sys/atomic.h>
44 #include <sys/sdt.h>
45 
46 /* for doors */
47 #include <sys/pathname.h>
48 #include <sys/door.h>
49 #include <sys/kmem.h>
50 #include <sys/cpuvar.h>
51 #include <sys/fs/snode.h>
52 
53 /*
54  * log_sysevent.c - Provides the interfaces for kernel event publication
55  *			to the sysevent event daemon (syseventd).
56  */
57 
58 /*
59  * Debug stuff
60  */
61 static int log_event_debug = 0;
62 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
63 #ifdef DEBUG
64 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
65 #else
66 #define	LOG_DEBUG1(args)
67 #endif
68 
69 /*
70  * Local static vars
71  */
72 /* queue of event buffers sent to syseventd */
73 static log_eventq_t *log_eventq_sent = NULL;
74 
75 /*
76  * Count of event buffers in the queue
77  */
78 int log_eventq_cnt = 0;
79 
80 /* queue of event buffers awaiting delivery to syseventd */
81 static log_eventq_t *log_eventq_head = NULL;
82 static log_eventq_t *log_eventq_tail = NULL;
83 static uint64_t kernel_event_id = 0;
84 static int encoding = NV_ENCODE_NATIVE;
85 
86 /* log event delivery flag */
87 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
88 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
89 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
90 
91 /*
92  * Tunable maximum event buffer queue size. Size depends on how many events
93  * the queue must hold when syseventd is not available, for example during
94  * system startup. Experience showed that more than 2000 events could be posted
95  * due to correctable memory errors.
96  */
97 int logevent_max_q_sz = 5000;
98 
99 
100 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
101 static char *logevent_door_upcall_filename = NULL;
102 static int logevent_door_upcall_filename_size;
103 
104 static door_handle_t event_door = NULL;		/* Door for upcalls */
105 
106 /*
107  * async thread-related variables
108  *
109  * eventq_head_mutex - synchronizes access to the kernel event queue
110  *
111  * eventq_sent_mutex - synchronizes access to the queue of event sents to
112  *			userlevel
113  *
114  * log_event_cv - condition variable signaled when an event has arrived or
115  *			userlevel ready to process event buffers
116  *
117  * async_thread - asynchronous event delivery thread to userlevel daemon.
118  *
119  * sysevent_upcall_status - status of the door upcall link
120  */
121 static kmutex_t eventq_head_mutex;
122 static kmutex_t eventq_sent_mutex;
123 static kcondvar_t log_event_cv;
124 static kthread_id_t async_thread = NULL;
125 
126 static kmutex_t event_qfull_mutex;
127 static kcondvar_t event_qfull_cv;
128 static int event_qfull_blocked = 0;
129 
130 static int sysevent_upcall_status = -1;
131 static kmutex_t registered_channel_mutex;
132 
133 /*
134  * Indicates the syseventd daemon has begun taking events
135  */
136 int sysevent_daemon_init = 0;
137 
138 /*
139  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
140  * caused by the server process doing a forkall().  Since all threads
141  * but the thread actually doing the forkall() need to be quiesced,
142  * the fork may take some time.  The min/max pause are in units
143  * of clock ticks.
144  */
145 #define	LOG_EVENT_MIN_PAUSE	8
146 #define	LOG_EVENT_MAX_PAUSE	128
147 
148 static kmutex_t	event_pause_mutex;
149 static kcondvar_t event_pause_cv;
150 static int event_pause_state = 0;
151 
152 /*
153  * log_event_upcall_lookup - Establish door connection with user event
154  *				daemon (syseventd)
155  */
156 static int
157 log_event_upcall_lookup()
158 {
159 	int	error;
160 
161 	if (event_door) {	/* Release our previous hold (if any) */
162 		door_ki_rele(event_door);
163 	}
164 
165 	event_door = NULL;
166 
167 	/*
168 	 * Locate the door used for upcalls
169 	 */
170 	if ((error =
171 	    door_ki_open(logevent_door_upcall_filename, &event_door)) != 0) {
172 		return (error);
173 	}
174 
175 	return (0);
176 }
177 
178 
179 /*ARGSUSED*/
180 static void
181 log_event_busy_timeout(void *arg)
182 {
183 	mutex_enter(&event_pause_mutex);
184 	event_pause_state = 0;
185 	cv_signal(&event_pause_cv);
186 	mutex_exit(&event_pause_mutex);
187 }
188 
189 static void
190 log_event_pause(int nticks)
191 {
192 	timeout_id_t id;
193 
194 	/*
195 	 * Only one use of log_event_pause at a time
196 	 */
197 	ASSERT(event_pause_state == 0);
198 
199 	event_pause_state = 1;
200 	id = timeout(log_event_busy_timeout, NULL, nticks);
201 	if (id != 0) {
202 		mutex_enter(&event_pause_mutex);
203 		while (event_pause_state)
204 			cv_wait(&event_pause_cv, &event_pause_mutex);
205 		mutex_exit(&event_pause_mutex);
206 	}
207 	event_pause_state = 0;
208 }
209 
210 
211 /*
212  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
213  * 			Check for rebinding errors
214  * 			This buffer is reused to by the syseventd door_return
215  *			to hold the result code
216  */
217 static int
218 log_event_upcall(log_event_upcall_arg_t *arg)
219 {
220 	int error;
221 	size_t size;
222 	sysevent_t *ev;
223 	door_arg_t darg, save_arg;
224 	int retry;
225 	int neagain = 0;
226 	int neintr = 0;
227 	int nticks = LOG_EVENT_MIN_PAUSE;
228 
229 	/* Initialize door args */
230 	ev = (sysevent_t *)&arg->buf;
231 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
232 
233 	darg.rbuf = (char *)arg;
234 	darg.data_ptr = (char *)arg;
235 	darg.rsize = size;
236 	darg.data_size = size;
237 	darg.desc_ptr = NULL;
238 	darg.desc_num = 0;
239 
240 	if ((event_door == NULL) &&
241 	    ((error = log_event_upcall_lookup()) != 0)) {
242 		LOG_DEBUG((CE_CONT,
243 		    "log_event_upcall: event_door error (%d)\n", error));
244 
245 		return (error);
246 	}
247 
248 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
249 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
250 
251 	save_arg = darg;
252 	for (retry = 0; ; retry++) {
253 		if ((error = door_ki_upcall(event_door, &darg)) == 0) {
254 			break;
255 		}
256 		switch (error) {
257 		case EINTR:
258 			neintr++;
259 			log_event_pause(2);
260 			darg = save_arg;
261 			break;
262 		case EAGAIN:
263 			/* cannot deliver upcall - process may be forking */
264 			neagain++;
265 			log_event_pause(nticks);
266 			nticks <<= 1;
267 			if (nticks > LOG_EVENT_MAX_PAUSE)
268 				nticks = LOG_EVENT_MAX_PAUSE;
269 			darg = save_arg;
270 			break;
271 		case EBADF:
272 			LOG_DEBUG((CE_CONT, "log_event_upcall: rebinding\n"));
273 			/* Server may have died. Try rebinding */
274 			if ((error = log_event_upcall_lookup()) != 0) {
275 				LOG_DEBUG((CE_CONT,
276 				    "log_event_upcall: lookup error %d\n",
277 				    error));
278 				return (EBADF);
279 			}
280 			if (retry > 4) {
281 				LOG_DEBUG((CE_CONT,
282 					"log_event_upcall: ebadf\n"));
283 				return (EBADF);
284 			}
285 			LOG_DEBUG((CE_CONT, "log_event_upcall: "
286 				"retrying upcall after lookup\n"));
287 			darg = save_arg;
288 			break;
289 		default:
290 			cmn_err(CE_CONT,
291 			    "log_event_upcall: door_ki_upcall error %d\n",
292 			    error);
293 			return (error);
294 		}
295 	}
296 
297 	if (neagain > 0 || neintr > 0) {
298 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
299 			neagain, neintr, nticks));
300 	}
301 
302 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
303 		"error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
304 		error, (void *)arg, (void *)darg.rbuf,
305 		(void *)darg.data_ptr,
306 		*((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
307 
308 	if (!error) {
309 		/*
310 		 * upcall was successfully executed. Check return code.
311 		 */
312 		error = *((int *)(darg.rbuf));
313 	}
314 
315 	return (error);
316 }
317 
318 /*
319  * log_event_deliver - event delivery thread
320  *			Deliver all events on the event queue to syseventd.
321  *			If the daemon can not process events, stop event
322  *			delivery and wait for an indication from the
323  *			daemon to resume delivery.
324  *
325  *			Once all event buffers have been delivered, wait
326  *			until there are more to deliver.
327  */
328 static void
329 log_event_deliver()
330 {
331 	log_eventq_t *q;
332 	int upcall_err;
333 	callb_cpr_t cprinfo;
334 
335 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
336 				"logevent");
337 
338 	/*
339 	 * eventq_head_mutex is exited (released) when there are no more
340 	 * events to process from the eventq in cv_wait().
341 	 */
342 	mutex_enter(&eventq_head_mutex);
343 
344 	for (;;) {
345 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
346 		    (void *)log_eventq_head));
347 
348 		upcall_err = 0;
349 		q = log_eventq_head;
350 
351 		while (q) {
352 			log_eventq_t *next;
353 
354 			/*
355 			 * Release event queue lock during upcall to
356 			 * syseventd
357 			 */
358 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
359 				upcall_err = EAGAIN;
360 				break;
361 			}
362 
363 			mutex_exit(&eventq_head_mutex);
364 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
365 				mutex_enter(&eventq_head_mutex);
366 				break;
367 			}
368 
369 			/*
370 			 * We may be able to add entries to
371 			 * the queue now.
372 			 */
373 			if (event_qfull_blocked > 0 &&
374 			    log_eventq_cnt < logevent_max_q_sz) {
375 				mutex_enter(&event_qfull_mutex);
376 				if (event_qfull_blocked > 0) {
377 					cv_signal(&event_qfull_cv);
378 				}
379 				mutex_exit(&event_qfull_mutex);
380 			}
381 
382 			mutex_enter(&eventq_head_mutex);
383 
384 			/*
385 			 * Daemon restart can cause entries to be moved from
386 			 * the sent queue and put back on the event queue.
387 			 * If this has occurred, replay event queue
388 			 * processing from the new queue head.
389 			 */
390 			if (q != log_eventq_head) {
391 				q = log_eventq_head;
392 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
393 				    "door upcall/daemon restart race\n"));
394 			} else {
395 				/*
396 				 * Move the event to the sent queue when a
397 				 * successful delivery has been made.
398 				 */
399 				mutex_enter(&eventq_sent_mutex);
400 				next = q->next;
401 				q->next = log_eventq_sent;
402 				log_eventq_sent = q;
403 				q = next;
404 				log_eventq_head = q;
405 				log_eventq_cnt--;
406 				if (q == NULL) {
407 					ASSERT(log_eventq_cnt == 0);
408 					log_eventq_tail = NULL;
409 				}
410 				mutex_exit(&eventq_sent_mutex);
411 			}
412 		}
413 
414 		switch (upcall_err) {
415 		case 0:
416 			/*
417 			 * Success. The queue is empty.
418 			 */
419 			sysevent_upcall_status = 0;
420 			break;
421 		case EAGAIN:
422 			/*
423 			 * Delivery is on hold (but functional).
424 			 */
425 			sysevent_upcall_status = 0;
426 			/*
427 			 * If the user has already signaled for delivery
428 			 * resumption, continue.  Otherwise, we wait until
429 			 * we are signaled to continue.
430 			 */
431 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT) {
432 				log_event_delivery = LOGEVENT_DELIVERY_OK;
433 				continue;
434 			} else {
435 				log_event_delivery = LOGEVENT_DELIVERY_HOLD;
436 			}
437 
438 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
439 			break;
440 		default:
441 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
442 				"upcall err %d\n", upcall_err));
443 			sysevent_upcall_status = upcall_err;
444 			/*
445 			 * Signal everyone waiting that transport is down
446 			 */
447 			if (event_qfull_blocked > 0) {
448 				mutex_enter(&event_qfull_mutex);
449 				if (event_qfull_blocked > 0) {
450 					cv_broadcast(&event_qfull_cv);
451 				}
452 				mutex_exit(&event_qfull_mutex);
453 			}
454 			break;
455 		}
456 
457 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
458 		cv_wait(&log_event_cv, &eventq_head_mutex);
459 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
460 	}
461 	/* NOTREACHED */
462 }
463 
464 /*
465  * log_event_init - Allocate and initialize log_event data structures.
466  */
467 void
468 log_event_init()
469 {
470 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
471 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
472 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
473 
474 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
475 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
476 
477 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
478 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
479 
480 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
481 	sysevent_evc_init();
482 }
483 
484 /*
485  * The following routines are used by kernel event publishers to
486  * allocate, append and free event buffers
487  */
488 /*
489  * sysevent_alloc - Allocate new eventq struct.  This element contains
490  *			an event buffer that will be used in a subsequent
491  *			call to log_sysevent.
492  */
493 sysevent_t *
494 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
495 {
496 	int payload_sz;
497 	int class_sz, subclass_sz, pub_sz;
498 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
499 	sysevent_t *ev;
500 	log_eventq_t *q;
501 
502 	ASSERT(class != NULL);
503 	ASSERT(subclass != NULL);
504 	ASSERT(pub != NULL);
505 
506 	/*
507 	 * Calculate and reserve space for the class, subclass and
508 	 * publisher strings in the event buffer
509 	 */
510 	class_sz = strlen(class) + 1;
511 	subclass_sz = strlen(subclass) + 1;
512 	pub_sz = strlen(pub) + 1;
513 
514 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
515 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
516 
517 	/* String sizes must be 64-bit aligned in the event buffer */
518 	aligned_class_sz = SE_ALIGN(class_sz);
519 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
520 	aligned_pub_sz = SE_ALIGN(pub_sz);
521 
522 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
523 		(aligned_subclass_sz - sizeof (uint64_t)) +
524 		(aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
525 
526 	/*
527 	 * Allocate event buffer plus additional sysevent queue
528 	 * and payload overhead.
529 	 */
530 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
531 	if (q == NULL) {
532 		return (NULL);
533 	}
534 
535 	/* Initialize the event buffer data */
536 	ev = (sysevent_t *)&q->arg.buf;
537 	SE_VERSION(ev) = SYS_EVENT_VERSION;
538 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
539 
540 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
541 		+ aligned_class_sz;
542 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
543 
544 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
545 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
546 
547 	SE_ATTR_PTR(ev) = UINT64_C(0);
548 	SE_PAYLOAD_SZ(ev) = payload_sz;
549 
550 	return (ev);
551 }
552 
553 /*
554  * sysevent_free - Free event buffer and any attribute data.
555  */
556 void
557 sysevent_free(sysevent_t *ev)
558 {
559 	log_eventq_t *q;
560 	nvlist_t *nvl;
561 
562 	ASSERT(ev != NULL);
563 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
564 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
565 
566 	if (nvl != NULL) {
567 		size_t size = 0;
568 		(void) nvlist_size(nvl, &size, encoding);
569 		SE_PAYLOAD_SZ(ev) -= size;
570 		nvlist_free(nvl);
571 	}
572 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
573 }
574 
575 /*
576  * free_packed_event - Free packed event buffer
577  */
578 static void
579 free_packed_event(sysevent_t *ev)
580 {
581 	log_eventq_t *q;
582 
583 	ASSERT(ev != NULL);
584 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
585 
586 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
587 }
588 
589 /*
590  * sysevent_add_attr - Add new attribute element to an event attribute list
591  *			If attribute list is NULL, start a new list.
592  */
593 int
594 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
595 	sysevent_value_t *se_value, int flag)
596 {
597 	int error;
598 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
599 
600 	if (nvlp == NULL || se_value == NULL) {
601 		return (SE_EINVAL);
602 	}
603 
604 	/*
605 	 * attr_sz is composed of the value data size + the name data size +
606 	 * any header data.  64-bit aligned.
607 	 */
608 	if (strlen(name) >= MAX_ATTR_NAME) {
609 		return (SE_EINVAL);
610 	}
611 
612 	/*
613 	 * Allocate nvlist
614 	 */
615 	if ((*nvlp == NULL) &&
616 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
617 		return (SE_ENOMEM);
618 
619 	/* add the attribute */
620 	switch (se_value->value_type) {
621 	case SE_DATA_TYPE_BYTE:
622 		error = nvlist_add_byte(*ev_attr_list, name,
623 		    se_value->value.sv_byte);
624 		break;
625 	case SE_DATA_TYPE_INT16:
626 		error = nvlist_add_int16(*ev_attr_list, name,
627 		    se_value->value.sv_int16);
628 		break;
629 	case SE_DATA_TYPE_UINT16:
630 		error = nvlist_add_uint16(*ev_attr_list, name,
631 		    se_value->value.sv_uint16);
632 		break;
633 	case SE_DATA_TYPE_INT32:
634 		error = nvlist_add_int32(*ev_attr_list, name,
635 		    se_value->value.sv_int32);
636 		break;
637 	case SE_DATA_TYPE_UINT32:
638 		error = nvlist_add_uint32(*ev_attr_list, name,
639 		    se_value->value.sv_uint32);
640 		break;
641 	case SE_DATA_TYPE_INT64:
642 		error = nvlist_add_int64(*ev_attr_list, name,
643 		    se_value->value.sv_int64);
644 		break;
645 	case SE_DATA_TYPE_UINT64:
646 		error = nvlist_add_uint64(*ev_attr_list, name,
647 		    se_value->value.sv_uint64);
648 		break;
649 	case SE_DATA_TYPE_STRING:
650 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
651 			return (SE_EINVAL);
652 		error = nvlist_add_string(*ev_attr_list, name,
653 		    se_value->value.sv_string);
654 		break;
655 	case SE_DATA_TYPE_BYTES:
656 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
657 			return (SE_EINVAL);
658 		error = nvlist_add_byte_array(*ev_attr_list, name,
659 		    se_value->value.sv_bytes.data,
660 		    se_value->value.sv_bytes.size);
661 		break;
662 	case SE_DATA_TYPE_TIME:
663 		error = nvlist_add_hrtime(*ev_attr_list, name,
664 		    se_value->value.sv_time);
665 		break;
666 	default:
667 		return (SE_EINVAL);
668 	}
669 
670 	return (error ? SE_ENOMEM : 0);
671 }
672 
673 /*
674  * sysevent_free_attr - Free an attribute list not associated with an
675  *			event buffer.
676  */
677 void
678 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
679 {
680 	nvlist_free((nvlist_t *)ev_attr_list);
681 }
682 
683 /*
684  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
685  *
686  *	This data will be re-packed into contiguous memory when the event
687  *	buffer is posted to log_sysevent.
688  */
689 int
690 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
691 {
692 	size_t size = 0;
693 
694 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
695 		return (SE_EINVAL);
696 	}
697 
698 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
699 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
700 	SE_PAYLOAD_SZ(ev) += size;
701 	SE_FLAG(ev) = 0;
702 
703 	return (0);
704 }
705 
706 /*
707  * sysevent_detach_attributes - Detach but don't free attribute list from the
708  *				event buffer.
709  */
710 void
711 sysevent_detach_attributes(sysevent_t *ev)
712 {
713 	size_t size = 0;
714 	nvlist_t *nvl;
715 
716 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
717 		return;
718 	}
719 
720 	SE_ATTR_PTR(ev) = UINT64_C(0);
721 	(void) nvlist_size(nvl, &size, encoding);
722 	SE_PAYLOAD_SZ(ev) -= size;
723 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
724 }
725 
726 /*
727  * sysevent_attr_name - Get name of attribute
728  */
729 char *
730 sysevent_attr_name(sysevent_attr_t *attr)
731 {
732 	if (attr == NULL) {
733 		return (NULL);
734 	}
735 
736 	return (nvpair_name(attr));
737 }
738 
739 /*
740  * sysevent_attr_type - Get type of attribute
741  */
742 int
743 sysevent_attr_type(sysevent_attr_t *attr)
744 {
745 	/*
746 	 * The SE_DATA_TYPE_* are typedef'ed to be the
747 	 * same value as DATA_TYPE_*
748 	 */
749 	return (nvpair_type((nvpair_t *)attr));
750 }
751 
752 /*
753  * Repack event buffer into contiguous memory
754  */
755 static sysevent_t *
756 se_repack(sysevent_t *ev, int flag)
757 {
758 	size_t copy_len;
759 	caddr_t attr;
760 	size_t size;
761 	uint64_t attr_offset;
762 	sysevent_t *copy;
763 	log_eventq_t *qcopy;
764 	sysevent_attr_list_t *nvl;
765 
766 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
767 	qcopy = kmem_zalloc(copy_len, flag);
768 	if (qcopy == NULL) {
769 		return (NULL);
770 	}
771 	copy = (sysevent_t *)&qcopy->arg.buf;
772 
773 	/*
774 	 * Copy event header, class, subclass and publisher names
775 	 * Set the attribute offset (in number of bytes) to contiguous
776 	 * memory after the header.
777 	 */
778 
779 	attr_offset = SE_ATTR_OFF(ev);
780 
781 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
782 
783 	bcopy(ev, copy, attr_offset);
784 
785 	/* Check if attribute list exists */
786 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
787 		return (copy);
788 	}
789 
790 	/*
791 	 * Copy attribute data to contiguous memory
792 	 */
793 	attr = (char *)copy + attr_offset;
794 	(void) nvlist_size(nvl, &size, encoding);
795 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
796 		kmem_free(qcopy, copy_len);
797 		return (NULL);
798 	}
799 	SE_ATTR_PTR(copy) = UINT64_C(0);
800 	SE_FLAG(copy) = SE_PACKED_BUF;
801 
802 	return (copy);
803 }
804 
805 /*
806  * The sysevent registration provides a persistent and reliable database
807  * for channel information for sysevent channel publishers and
808  * subscribers.
809  *
810  * A channel is created and maintained by the kernel upon the first
811  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
812  * event subscription information is updated as publishers or subscribers
813  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
814  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
815  *
816  * For consistency, id's are assigned for every publisher or subscriber
817  * bound to a particular channel.  The id's are used to constrain resources
818  * and perform subscription lookup.
819  *
820  * Associated with each channel is a hashed list of the current subscriptions
821  * based upon event class and subclasses.  A subscription contains a class name,
822  * list of possible subclasses and an array of subscriber ids.  Subscriptions
823  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
824  *
825  * Channels are closed once the last subscriber or publisher performs a
826  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
827  * channel are freed upon last close.
828  *
829  * Locking:
830  *	Every operation to log_sysevent() is protected by a single lock,
831  *	registered_channel_mutex.  It is expected that the granularity of
832  *	a single lock is sufficient given the frequency that updates will
833  *	occur.
834  *
835  *	If this locking strategy proves to be too contentious, a per-hash
836  *	or per-channel locking strategy may be implemented.
837  */
838 
839 
840 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
841 					% CHAN_HASH_SZ)
842 
843 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
844 static int channel_cnt;
845 static void remove_all_class(sysevent_channel_descriptor_t *chan,
846 	uint32_t sub_id);
847 
848 static uint32_t
849 hash_func(const char *s)
850 {
851 	uint32_t result = 0;
852 	uint_t g;
853 
854 	while (*s != '\0') {
855 		result <<= 4;
856 		result += (uint32_t)*s++;
857 		g = result & 0xf0000000;
858 		if (g != 0) {
859 			result ^= g >> 24;
860 			result ^= g;
861 		}
862 	}
863 
864 	return (result);
865 }
866 
867 static sysevent_channel_descriptor_t *
868 get_channel(char *channel_name)
869 {
870 	int hash_index;
871 	sysevent_channel_descriptor_t *chan_list;
872 
873 	if (channel_name == NULL)
874 		return (NULL);
875 
876 	/* Find channel descriptor */
877 	hash_index = CHANN_HASH(channel_name);
878 	chan_list = registered_channels[hash_index];
879 	while (chan_list != NULL) {
880 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
881 			break;
882 		} else {
883 			chan_list = chan_list->scd_next;
884 		}
885 	}
886 
887 	return (chan_list);
888 }
889 
890 static class_lst_t *
891 create_channel_registration(sysevent_channel_descriptor_t *chan,
892     char *event_class, int index)
893 {
894 	size_t class_len;
895 	class_lst_t *c_list;
896 
897 	class_len = strlen(event_class) + 1;
898 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
899 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
900 	bcopy(event_class, c_list->cl_name, class_len);
901 
902 	c_list->cl_subclass_list =
903 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
904 	c_list->cl_subclass_list->sl_name =
905 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
906 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
907 	    sizeof (EC_SUB_ALL));
908 
909 	c_list->cl_next = chan->scd_class_list_tbl[index];
910 	chan->scd_class_list_tbl[index] = c_list;
911 
912 	return (c_list);
913 }
914 
915 static void
916 free_channel_registration(sysevent_channel_descriptor_t *chan)
917 {
918 	int i;
919 	class_lst_t *clist, *next_clist;
920 	subclass_lst_t *sclist, *next_sc;
921 
922 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
923 
924 		clist = chan->scd_class_list_tbl[i];
925 		while (clist != NULL) {
926 			sclist = clist->cl_subclass_list;
927 			while (sclist != NULL) {
928 				kmem_free(sclist->sl_name,
929 				    strlen(sclist->sl_name) + 1);
930 				next_sc = sclist->sl_next;
931 				kmem_free(sclist, sizeof (subclass_lst_t));
932 				sclist = next_sc;
933 			}
934 			kmem_free(clist->cl_name,
935 			    strlen(clist->cl_name) + 1);
936 			next_clist = clist->cl_next;
937 			kmem_free(clist, sizeof (class_lst_t));
938 			clist = next_clist;
939 		}
940 	}
941 	chan->scd_class_list_tbl[0] = NULL;
942 }
943 
944 static int
945 open_channel(char *channel_name)
946 {
947 	int hash_index;
948 	sysevent_channel_descriptor_t *chan, *chan_list;
949 
950 
951 	if (channel_cnt > MAX_CHAN) {
952 		return (-1);
953 	}
954 
955 	/* Find channel descriptor */
956 	hash_index = CHANN_HASH(channel_name);
957 	chan_list = registered_channels[hash_index];
958 	while (chan_list != NULL) {
959 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
960 			chan_list->scd_ref_cnt++;
961 			kmem_free(channel_name, strlen(channel_name) + 1);
962 			return (0);
963 		} else {
964 			chan_list = chan_list->scd_next;
965 		}
966 	}
967 
968 
969 	/* New channel descriptor */
970 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
971 	chan->scd_channel_name = channel_name;
972 
973 	/*
974 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
975 	 * Subscriber id 0 is never allocated, but is used as a reserved id
976 	 * by libsysevent
977 	 */
978 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
979 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
980 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
981 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
982 		return (-1);
983 	}
984 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
985 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
986 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
987 		vmem_destroy(chan->scd_subscriber_cache);
988 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
989 		return (-1);
990 	}
991 
992 	chan->scd_ref_cnt = 1;
993 
994 	(void) create_channel_registration(chan, EC_ALL, 0);
995 
996 	if (registered_channels[hash_index] != NULL)
997 		chan->scd_next = registered_channels[hash_index];
998 
999 	registered_channels[hash_index] = chan;
1000 
1001 	++channel_cnt;
1002 
1003 	return (0);
1004 }
1005 
1006 static void
1007 close_channel(char *channel_name)
1008 {
1009 	int hash_index;
1010 	sysevent_channel_descriptor_t *chan, *prev_chan;
1011 
1012 	/* Find channel descriptor */
1013 	hash_index = CHANN_HASH(channel_name);
1014 	prev_chan = chan = registered_channels[hash_index];
1015 
1016 	while (chan != NULL) {
1017 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1018 			break;
1019 		} else {
1020 			prev_chan = chan;
1021 			chan = chan->scd_next;
1022 		}
1023 	}
1024 
1025 	if (chan == NULL)
1026 		return;
1027 
1028 	chan->scd_ref_cnt--;
1029 	if (chan->scd_ref_cnt > 0)
1030 		return;
1031 
1032 	free_channel_registration(chan);
1033 	vmem_destroy(chan->scd_subscriber_cache);
1034 	vmem_destroy(chan->scd_publisher_cache);
1035 	kmem_free(chan->scd_channel_name,
1036 	    strlen(chan->scd_channel_name) + 1);
1037 	if (registered_channels[hash_index] == chan)
1038 		registered_channels[hash_index] = chan->scd_next;
1039 	else
1040 		prev_chan->scd_next = chan->scd_next;
1041 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1042 	--channel_cnt;
1043 }
1044 
1045 static id_t
1046 bind_common(sysevent_channel_descriptor_t *chan, int type)
1047 {
1048 	id_t id;
1049 
1050 	if (type == SUBSCRIBER) {
1051 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1052 		    VM_NOSLEEP | VM_NEXTFIT);
1053 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1054 			return (0);
1055 		chan->scd_subscriber_ids[id] = 1;
1056 	} else {
1057 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1058 		    VM_NOSLEEP | VM_NEXTFIT);
1059 		if (id <= 0 || id > MAX_PUBLISHERS)
1060 			return (0);
1061 		chan->scd_publisher_ids[id] = 1;
1062 	}
1063 
1064 	return (id);
1065 }
1066 
1067 static int
1068 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1069 {
1070 	if (type == SUBSCRIBER) {
1071 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1072 			return (0);
1073 		if (chan->scd_subscriber_ids[id] == 0)
1074 			return (0);
1075 		(void) remove_all_class(chan, id);
1076 		chan->scd_subscriber_ids[id] = 0;
1077 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1078 	} else {
1079 		if (id <= 0 || id > MAX_PUBLISHERS)
1080 			return (0);
1081 		if (chan->scd_publisher_ids[id] == 0)
1082 			return (0);
1083 		chan->scd_publisher_ids[id] = 0;
1084 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1085 	}
1086 
1087 	return (1);
1088 }
1089 
1090 static void
1091 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1092 {
1093 	if (unbind_common(chan, type, id))
1094 		close_channel(chan->scd_channel_name);
1095 }
1096 
1097 static subclass_lst_t *
1098 find_subclass(class_lst_t *c_list, char *subclass)
1099 {
1100 	subclass_lst_t *sc_list;
1101 
1102 	if (c_list == NULL)
1103 		return (NULL);
1104 
1105 	sc_list = c_list->cl_subclass_list;
1106 
1107 	while (sc_list != NULL) {
1108 		if (strcmp(sc_list->sl_name, subclass) == 0) {
1109 			return (sc_list);
1110 		}
1111 		sc_list = sc_list->sl_next;
1112 	}
1113 
1114 	return (NULL);
1115 }
1116 
1117 static void
1118 insert_subclass(class_lst_t *c_list, char **subclass_names,
1119 	int subclass_num, uint32_t sub_id)
1120 {
1121 	int i, subclass_sz;
1122 	subclass_lst_t *sc_list;
1123 
1124 	for (i = 0; i < subclass_num; ++i) {
1125 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1126 		    != NULL) {
1127 			sc_list->sl_num[sub_id] = 1;
1128 		} else {
1129 
1130 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1131 			    KM_SLEEP);
1132 			subclass_sz = strlen(subclass_names[i]) + 1;
1133 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1134 			bcopy(subclass_names[i], sc_list->sl_name,
1135 			    subclass_sz);
1136 
1137 			sc_list->sl_num[sub_id] = 1;
1138 
1139 			sc_list->sl_next = c_list->cl_subclass_list;
1140 			c_list->cl_subclass_list = sc_list;
1141 		}
1142 	}
1143 }
1144 
1145 static class_lst_t *
1146 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1147 {
1148 	class_lst_t *c_list;
1149 
1150 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1151 	while (c_list != NULL) {
1152 		if (strcmp(class_name, c_list->cl_name) == 0)
1153 			break;
1154 		c_list = c_list->cl_next;
1155 	}
1156 
1157 	return (c_list);
1158 }
1159 
1160 static void
1161 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1162 {
1163 	int i;
1164 	class_lst_t *c_list;
1165 	subclass_lst_t *sc_list;
1166 
1167 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1168 
1169 		c_list = chan->scd_class_list_tbl[i];
1170 		while (c_list != NULL) {
1171 			sc_list = c_list->cl_subclass_list;
1172 			while (sc_list != NULL) {
1173 				sc_list->sl_num[sub_id] = 0;
1174 				sc_list = sc_list->sl_next;
1175 			}
1176 			c_list = c_list->cl_next;
1177 		}
1178 	}
1179 }
1180 
1181 static void
1182 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1183 	char *class_name)
1184 {
1185 	class_lst_t *c_list;
1186 	subclass_lst_t *sc_list;
1187 
1188 	if (strcmp(class_name, EC_ALL) == 0) {
1189 		remove_all_class(chan, sub_id);
1190 		return;
1191 	}
1192 
1193 	if ((c_list = find_class(chan, class_name)) == NULL) {
1194 		return;
1195 	}
1196 
1197 	sc_list = c_list->cl_subclass_list;
1198 	while (sc_list != NULL) {
1199 		sc_list->sl_num[sub_id] = 0;
1200 		sc_list = sc_list->sl_next;
1201 	}
1202 }
1203 
1204 static int
1205 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1206 	char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1207 {
1208 	class_lst_t *c_list;
1209 
1210 	if (strcmp(event_class, EC_ALL) == 0) {
1211 		insert_subclass(chan->scd_class_list_tbl[0],
1212 		    event_subclass_lst, 1, sub_id);
1213 		return (0);
1214 	}
1215 
1216 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1217 		return (-1);
1218 
1219 	/* New class, add to the registration cache */
1220 	if ((c_list = find_class(chan, event_class)) == NULL) {
1221 		c_list = create_channel_registration(chan, event_class,
1222 		    CLASS_HASH(event_class));
1223 	}
1224 
1225 	/* Update the subclass list */
1226 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1227 
1228 	return (0);
1229 }
1230 
1231 static int
1232 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1233 	char *nvlbuf, size_t nvlsize)
1234 {
1235 	uint_t num_elem;
1236 	char *event_class;
1237 	char **event_list;
1238 	nvlist_t *nvl;
1239 	nvpair_t *nvpair = NULL;
1240 
1241 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1242 		return (-1);
1243 
1244 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1245 		nvlist_free(nvl);
1246 		return (-1);
1247 	}
1248 
1249 	if ((event_class = nvpair_name(nvpair)) == NULL) {
1250 		nvlist_free(nvl);
1251 		return (-1);
1252 	}
1253 	if (nvpair_value_string_array(nvpair, &event_list,
1254 	    &num_elem) != 0) {
1255 		nvlist_free(nvl);
1256 		return (-1);
1257 	}
1258 
1259 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1260 		nvlist_free(nvl);
1261 		return (-1);
1262 	}
1263 
1264 	nvlist_free(nvl);
1265 
1266 	return (0);
1267 }
1268 
1269 /*
1270  * get_registration - Return the requested class hash chain
1271  */
1272 static int
1273 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1274 	uint32_t *bufsz, uint32_t class_index)
1275 {
1276 	int num_classes = 0;
1277 	char *nvlbuf = NULL;
1278 	size_t nvlsize;
1279 	nvlist_t *nvl;
1280 	class_lst_t *clist;
1281 	subclass_lst_t *sc_list;
1282 
1283 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1284 		return (EINVAL);
1285 
1286 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1287 		return (ENOENT);
1288 	}
1289 
1290 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1291 		return (EFAULT);
1292 	}
1293 
1294 	while (clist != NULL) {
1295 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1296 		    != 0) {
1297 			nvlist_free(nvl);
1298 			return (EFAULT);
1299 		}
1300 
1301 		sc_list = clist->cl_subclass_list;
1302 		while (sc_list != NULL) {
1303 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1304 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1305 				nvlist_free(nvl);
1306 				return (EFAULT);
1307 			}
1308 			sc_list = sc_list->sl_next;
1309 		}
1310 		num_classes++;
1311 		clist = clist->cl_next;
1312 	}
1313 
1314 	if (num_classes == 0) {
1315 		nvlist_free(nvl);
1316 		return (ENOENT);
1317 	}
1318 
1319 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1320 	    KM_SLEEP)
1321 	    != 0) {
1322 		nvlist_free(nvl);
1323 		return (EFAULT);
1324 	}
1325 
1326 	nvlist_free(nvl);
1327 
1328 	if (nvlsize > *bufsz) {
1329 		kmem_free(nvlbuf, nvlsize);
1330 		*bufsz = nvlsize;
1331 		return (EAGAIN);
1332 	}
1333 
1334 	bcopy(nvlbuf, databuf, nvlsize);
1335 	kmem_free(nvlbuf, nvlsize);
1336 
1337 	return (0);
1338 }
1339 
1340 /*
1341  * log_sysevent_register - Register event subscriber for a particular
1342  *		event channel.
1343  */
1344 int
1345 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1346 {
1347 	int error = 0;
1348 	char *kchannel, *databuf = NULL;
1349 	size_t bufsz;
1350 	se_pubsub_t kdata;
1351 	sysevent_channel_descriptor_t *chan;
1352 
1353 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1354 		return (EFAULT);
1355 	}
1356 	if (kdata.ps_channel_name_len == 0) {
1357 		return (EINVAL);
1358 	}
1359 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1360 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1361 		kmem_free(kchannel, kdata.ps_channel_name_len);
1362 		return (EFAULT);
1363 	}
1364 	bufsz = kdata.ps_buflen;
1365 	if (bufsz > 0) {
1366 		databuf = kmem_alloc(bufsz, KM_SLEEP);
1367 		if (copyin(udatabuf, databuf, bufsz) == -1) {
1368 			kmem_free(kchannel, kdata.ps_channel_name_len);
1369 			kmem_free(databuf, bufsz);
1370 			return (EFAULT);
1371 		}
1372 	}
1373 
1374 	mutex_enter(&registered_channel_mutex);
1375 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1376 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1377 		chan = get_channel(kchannel);
1378 		if (chan == NULL) {
1379 			mutex_exit(&registered_channel_mutex);
1380 			kmem_free(kchannel, kdata.ps_channel_name_len);
1381 			if (bufsz > 0)
1382 				kmem_free(databuf, bufsz);
1383 			return (ENOENT);
1384 		}
1385 	}
1386 
1387 	switch (kdata.ps_op) {
1388 	case SE_OPEN_REGISTRATION:
1389 		if (open_channel(kchannel) != 0) {
1390 			error = ENOMEM;
1391 			if (bufsz > 0)
1392 				kmem_free(databuf, bufsz);
1393 			kmem_free(kchannel, kdata.ps_channel_name_len);
1394 		}
1395 
1396 		mutex_exit(&registered_channel_mutex);
1397 		return (error);
1398 	case SE_CLOSE_REGISTRATION:
1399 		close_channel(kchannel);
1400 		break;
1401 	case SE_BIND_REGISTRATION:
1402 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1403 			error = EBUSY;
1404 		break;
1405 	case SE_UNBIND_REGISTRATION:
1406 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1407 		break;
1408 	case SE_REGISTER:
1409 		if (bufsz == 0) {
1410 			error = EINVAL;
1411 			break;
1412 		}
1413 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1414 			error = EINVAL;
1415 		break;
1416 	case SE_UNREGISTER:
1417 		if (bufsz == 0) {
1418 			error = EINVAL;
1419 			break;
1420 		}
1421 		remove_class(chan, kdata.ps_id, databuf);
1422 		break;
1423 	case SE_CLEANUP:
1424 		/* Cleanup the indicated subscriber or publisher */
1425 		release_id(chan, kdata.ps_type, kdata.ps_id);
1426 		break;
1427 	case SE_GET_REGISTRATION:
1428 		error = get_registration(chan, databuf,
1429 		    &kdata.ps_buflen, kdata.ps_id);
1430 		break;
1431 	default:
1432 		error = ENOTSUP;
1433 	}
1434 
1435 	mutex_exit(&registered_channel_mutex);
1436 
1437 	kmem_free(kchannel, kdata.ps_channel_name_len);
1438 
1439 	if (bufsz > 0) {
1440 		if (copyout(databuf, udatabuf, bufsz) == -1)
1441 			error = EFAULT;
1442 		kmem_free(databuf, bufsz);
1443 	}
1444 
1445 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1446 		return (EFAULT);
1447 
1448 	return (error);
1449 }
1450 
1451 /*
1452  * log_sysevent_copyout_data - Copyout event data to userland.
1453  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1454  *			The buffer size is always sufficient.
1455  */
1456 int
1457 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1458 {
1459 	int error = ENOENT;
1460 	log_eventq_t *q;
1461 	sysevent_t *ev;
1462 	sysevent_id_t eid_copy;
1463 
1464 	/*
1465 	 * Copy eid
1466 	 */
1467 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1468 		return (EFAULT);
1469 	}
1470 
1471 	mutex_enter(&eventq_sent_mutex);
1472 	q = log_eventq_sent;
1473 
1474 	/*
1475 	 * Search for event buffer on the sent queue with matching
1476 	 * event identifier
1477 	 */
1478 	while (q) {
1479 		ev = (sysevent_t *)&q->arg.buf;
1480 
1481 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1482 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1483 			q = q->next;
1484 			continue;
1485 		}
1486 
1487 		if (ubuflen < SE_SIZE(ev)) {
1488 			error = EFAULT;
1489 			break;
1490 		}
1491 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1492 			error = EFAULT;
1493 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1494 			    "0x%" PRIx64 " from queue: EFAULT\n",
1495 			    eid->eid_seq));
1496 		} else {
1497 			error = 0;
1498 		}
1499 		break;
1500 	}
1501 
1502 	mutex_exit(&eventq_sent_mutex);
1503 
1504 	return (error);
1505 }
1506 
1507 /*
1508  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1509  *			by eid (must have already been sent).  Called from
1510  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1511  */
1512 int
1513 log_sysevent_free_data(sysevent_id_t *eid)
1514 {
1515 	int error = ENOENT;
1516 	sysevent_t *ev;
1517 	log_eventq_t *q, *prev = NULL;
1518 	sysevent_id_t eid_copy;
1519 
1520 	/*
1521 	 * Copy eid
1522 	 */
1523 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1524 		return (EFAULT);
1525 	}
1526 
1527 	mutex_enter(&eventq_sent_mutex);
1528 	q = log_eventq_sent;
1529 
1530 	/*
1531 	 * Look for the event to be freed on the sent queue.  Due to delayed
1532 	 * processing of the event, it may not be on the sent queue yet.
1533 	 * It is up to the user to retry the free operation to ensure that the
1534 	 * event is properly freed.
1535 	 */
1536 	while (q) {
1537 		ev = (sysevent_t *)&q->arg.buf;
1538 
1539 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1540 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1541 			prev = q;
1542 			q = q->next;
1543 			continue;
1544 		}
1545 		/*
1546 		 * Take it out of log_eventq_sent and free it
1547 		 */
1548 		if (prev) {
1549 			prev->next = q->next;
1550 		} else {
1551 			log_eventq_sent = q->next;
1552 		}
1553 		free_packed_event(ev);
1554 		error = 0;
1555 		break;
1556 	}
1557 
1558 	mutex_exit(&eventq_sent_mutex);
1559 
1560 	return (error);
1561 }
1562 
1563 /*
1564  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1565  *			create log_event_deliver thread or wake it up
1566  */
1567 /*ARGSUSED*/
1568 void
1569 log_sysevent_flushq(int cmd, uint_t flag)
1570 {
1571 	mutex_enter(&eventq_head_mutex);
1572 
1573 	/*
1574 	 * Start the event delivery thread
1575 	 * Mark the upcall status as active since we should
1576 	 * now be able to begin emptying the queue normally.
1577 	 */
1578 	if (!async_thread) {
1579 		sysevent_upcall_status = 0;
1580 		sysevent_daemon_init = 1;
1581 		setup_ddi_poststartup();
1582 		async_thread = thread_create(NULL, 0, log_event_deliver,
1583 		    NULL, 0, &p0, TS_RUN, minclsyspri);
1584 	}
1585 
1586 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1587 	cv_signal(&log_event_cv);
1588 	mutex_exit(&eventq_head_mutex);
1589 }
1590 
1591 /*
1592  * log_sysevent_filename - Called by syseventd via
1593  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1594  *			to subsequently bind the event_door.
1595  *
1596  *			This routine is called everytime syseventd (re)starts
1597  *			and must therefore replay any events buffers that have
1598  *			been sent but not freed.
1599  *
1600  *			Event buffer delivery begins after a call to
1601  *			log_sysevent_flushq().
1602  */
1603 int
1604 log_sysevent_filename(char *file)
1605 {
1606 	/*
1607 	 * Called serially by syseventd init code, no need to protect door
1608 	 * data.
1609 	 */
1610 	/* Unbind old event door */
1611 	if (logevent_door_upcall_filename) {
1612 		kmem_free(logevent_door_upcall_filename,
1613 			logevent_door_upcall_filename_size);
1614 		if (event_door) {
1615 			door_ki_rele(event_door);
1616 			event_door = NULL;
1617 		}
1618 	}
1619 	logevent_door_upcall_filename_size = strlen(file) + 1;
1620 	logevent_door_upcall_filename = kmem_alloc(
1621 		logevent_door_upcall_filename_size, KM_SLEEP);
1622 	(void) strcpy(logevent_door_upcall_filename, file);
1623 
1624 	/*
1625 	 * We are called when syseventd restarts. Move all sent, but
1626 	 * not committed events from log_eventq_sent to log_eventq_head.
1627 	 * Do it in proper order to maintain increasing event id.
1628 	 */
1629 	mutex_enter(&eventq_head_mutex);
1630 
1631 	mutex_enter(&eventq_sent_mutex);
1632 	while (log_eventq_sent) {
1633 		log_eventq_t *tmp = log_eventq_sent->next;
1634 		log_eventq_sent->next = log_eventq_head;
1635 		if (log_eventq_head == NULL) {
1636 			ASSERT(log_eventq_cnt == 0);
1637 			log_eventq_tail = log_eventq_sent;
1638 			log_eventq_tail->next = NULL;
1639 		} else if (log_eventq_head == log_eventq_tail) {
1640 			ASSERT(log_eventq_cnt == 1);
1641 			ASSERT(log_eventq_head->next == NULL);
1642 			ASSERT(log_eventq_tail->next == NULL);
1643 		}
1644 		log_eventq_head = log_eventq_sent;
1645 		log_eventq_sent = tmp;
1646 		log_eventq_cnt++;
1647 	}
1648 	mutex_exit(&eventq_sent_mutex);
1649 	mutex_exit(&eventq_head_mutex);
1650 
1651 	return (0);
1652 }
1653 
1654 /*
1655  * queue_sysevent - queue an event buffer
1656  */
1657 static int
1658 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1659 {
1660 	log_eventq_t *q;
1661 
1662 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1663 
1664 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1665 
1666 restart:
1667 
1668 	/* Max Q size exceeded */
1669 	mutex_enter(&event_qfull_mutex);
1670 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1671 		/*
1672 		 * If queue full and transport down, return no transport
1673 		 */
1674 		if (sysevent_upcall_status != 0) {
1675 			mutex_exit(&event_qfull_mutex);
1676 			free_packed_event(ev);
1677 			eid->eid_seq = UINT64_C(0);
1678 			eid->eid_ts = INT64_C(0);
1679 			return (SE_NO_TRANSPORT);
1680 		}
1681 		if (flag == SE_NOSLEEP) {
1682 			mutex_exit(&event_qfull_mutex);
1683 			free_packed_event(ev);
1684 			eid->eid_seq = UINT64_C(0);
1685 			eid->eid_ts = INT64_C(0);
1686 			return (SE_EQSIZE);
1687 		}
1688 		event_qfull_blocked++;
1689 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1690 		event_qfull_blocked--;
1691 		mutex_exit(&event_qfull_mutex);
1692 		goto restart;
1693 	}
1694 	mutex_exit(&event_qfull_mutex);
1695 
1696 	mutex_enter(&eventq_head_mutex);
1697 
1698 	/* Time stamp and assign ID */
1699 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1700 		(uint64_t)1);
1701 	SE_TIME(ev) = eid->eid_ts = gethrtime();
1702 
1703 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1704 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1705 
1706 	/*
1707 	 * Put event on eventq
1708 	 */
1709 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1710 	q->next = NULL;
1711 	if (log_eventq_head == NULL) {
1712 		ASSERT(log_eventq_cnt == 0);
1713 		log_eventq_head = q;
1714 		log_eventq_tail = q;
1715 	} else {
1716 		if (log_eventq_head == log_eventq_tail) {
1717 			ASSERT(log_eventq_cnt == 1);
1718 			ASSERT(log_eventq_head->next == NULL);
1719 			ASSERT(log_eventq_tail->next == NULL);
1720 		}
1721 		log_eventq_tail->next = q;
1722 		log_eventq_tail = q;
1723 	}
1724 	log_eventq_cnt++;
1725 
1726 	/* Signal event delivery thread */
1727 	if (log_eventq_cnt == 1) {
1728 		cv_signal(&log_event_cv);
1729 	}
1730 	mutex_exit(&eventq_head_mutex);
1731 
1732 	return (0);
1733 }
1734 
1735 /*
1736  * log_sysevent - kernel system event logger.
1737  *
1738  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1739  * maximum event queue size will be exceeded
1740  * Returns 0 for successfully queued event buffer
1741  */
1742 int
1743 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1744 {
1745 	sysevent_t *ev_copy;
1746 	int rval;
1747 
1748 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1749 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1750 
1751 	ev_copy = se_repack(ev, flag);
1752 	if (ev_copy == NULL) {
1753 		ASSERT(flag == SE_NOSLEEP);
1754 		return (SE_ENOMEM);
1755 	}
1756 	rval = queue_sysevent(ev_copy, eid, flag);
1757 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1758 		rval == SE_NO_TRANSPORT);
1759 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1760 	return (rval);
1761 }
1762 
1763 /*
1764  * log_usr_sysevent - user system event logger
1765  *			Private to devfsadm and accessible only via
1766  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1767  */
1768 int
1769 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1770 {
1771 	int ret, copy_sz;
1772 	sysevent_t *ev_copy;
1773 	sysevent_id_t new_eid;
1774 	log_eventq_t *qcopy;
1775 
1776 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1777 		offsetof(log_event_upcall_arg_t, buf);
1778 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1779 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1780 
1781 	/*
1782 	 * Copy event
1783 	 */
1784 	if (copyin(ev, ev_copy, ev_size) == -1) {
1785 		kmem_free(qcopy, copy_sz);
1786 		return (EFAULT);
1787 	}
1788 
1789 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1790 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1791 			return (EAGAIN);
1792 		else
1793 			return (EIO);
1794 	}
1795 
1796 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1797 		return (EFAULT);
1798 	}
1799 
1800 	return (0);
1801 }
1802 
1803 
1804 
1805 int
1806 ddi_log_sysevent(
1807 	dev_info_t		*dip,
1808 	char			*vendor,
1809 	char			*class,
1810 	char			*subclass,
1811 	nvlist_t		*attr_list,
1812 	sysevent_id_t		*eidp,
1813 	int			sleep_flag)
1814 {
1815 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1816 	char			pubstr[32];
1817 	sysevent_t		*event;
1818 	sysevent_id_t		eid;
1819 	const char		*drvname;
1820 	char			*publisher;
1821 	int			se_flag;
1822 	int			rval;
1823 	int			n;
1824 
1825 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1826 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1827 			"event from interrupt context with sleep semantics\n",
1828 			ddi_driver_name(dip), ddi_get_instance(dip));
1829 		return (DDI_ECONTEXT);
1830 	}
1831 
1832 	drvname = ddi_driver_name(dip);
1833 	n = strlen(vendor) + strlen(drvname) + 7;
1834 	if (n < sizeof (pubstr)) {
1835 		publisher = pubstr;
1836 	} else {
1837 		publisher = kmem_alloc(n,
1838 			(sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1839 		if (publisher == NULL) {
1840 			return (DDI_ENOMEM);
1841 		}
1842 	}
1843 	(void) strcpy(publisher, vendor);
1844 	(void) strcat(publisher, ":kern:");
1845 	(void) strcat(publisher, drvname);
1846 
1847 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1848 	event = sysevent_alloc(class, subclass, publisher, se_flag);
1849 
1850 	if (publisher != pubstr) {
1851 		kmem_free(publisher, n);
1852 	}
1853 
1854 	if (event == NULL) {
1855 		return (DDI_ENOMEM);
1856 	}
1857 
1858 	if (list) {
1859 		(void) sysevent_attach_attributes(event, list);
1860 	}
1861 
1862 	rval = log_sysevent(event, se_flag, &eid);
1863 	if (list) {
1864 		sysevent_detach_attributes(event);
1865 	}
1866 	sysevent_free(event);
1867 	if (rval == 0) {
1868 		if (eidp) {
1869 			eidp->eid_seq = eid.eid_seq;
1870 			eidp->eid_ts = eid.eid_ts;
1871 		}
1872 		return (DDI_SUCCESS);
1873 	}
1874 	if (rval == SE_NO_TRANSPORT)
1875 		return (DDI_ETRANSPORT);
1876 
1877 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1878 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1879 }
1880 
1881 uint64_t
1882 log_sysevent_new_id()
1883 {
1884 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1885 }
1886