1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 * Copyright 2016 Toomas Soome <tsoome@me.com>
25 */
26
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/stropts.h>
30 #include <sys/debug.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/vmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/callb.h>
36 #include <sys/sysevent.h>
37 #include <sys/sysevent_impl.h>
38 #include <sys/sysevent/dev.h>
39 #include <sys/modctl.h>
40 #include <sys/lofi_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/disp.h>
43 #include <sys/autoconf.h>
44 #include <sys/atomic.h>
45 #include <sys/sdt.h>
46
47 /* for doors */
48 #include <sys/pathname.h>
49 #include <sys/door.h>
50 #include <sys/kmem.h>
51 #include <sys/cpuvar.h>
52 #include <sys/fs/snode.h>
53
54 /*
55 * log_sysevent.c - Provides the interfaces for kernel event publication
56 * to the sysevent event daemon (syseventd).
57 */
58
59 /*
60 * Debug stuff
61 */
62 static int log_event_debug = 0;
63 #define LOG_DEBUG(args) if (log_event_debug) cmn_err args
64 #ifdef DEBUG
65 #define LOG_DEBUG1(args) if (log_event_debug > 1) cmn_err args
66 #else
67 #define LOG_DEBUG1(args)
68 #endif
69
70 /*
71 * Local static vars
72 */
73 /* queue of event buffers sent to syseventd */
74 static log_eventq_t *log_eventq_sent = NULL;
75
76 /*
77 * Count of event buffers in the queue
78 */
79 int log_eventq_cnt = 0;
80
81 /* queue of event buffers awaiting delivery to syseventd */
82 static log_eventq_t *log_eventq_head = NULL;
83 static log_eventq_t *log_eventq_tail = NULL;
84 static uint64_t kernel_event_id = 0;
85 static int encoding = NV_ENCODE_NATIVE;
86
87 /* log event delivery flag */
88 #define LOGEVENT_DELIVERY_OK 0 /* OK to deliver event buffers */
89 #define LOGEVENT_DELIVERY_CONT 1 /* Continue to deliver event buffers */
90 #define LOGEVENT_DELIVERY_HOLD 2 /* Hold delivering of event buffers */
91
92 /*
93 * Tunable maximum event buffer queue size. Size depends on how many events
94 * the queue must hold when syseventd is not available, for example during
95 * system startup. Experience showed that more than 2000 events could be posted
96 * due to correctable memory errors.
97 */
98 int logevent_max_q_sz = 5000;
99
100
101 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
102 static char logevent_door_upcall_filename[MAXPATHLEN];
103
104 static door_handle_t event_door = NULL; /* Door for upcalls */
105 static kmutex_t event_door_mutex; /* To protect event_door */
106
107 /*
108 * async thread-related variables
109 *
110 * eventq_head_mutex - synchronizes access to the kernel event queue
111 *
112 * eventq_sent_mutex - synchronizes access to the queue of event sents to
113 * userlevel
114 *
115 * log_event_cv - condition variable signaled when an event has arrived or
116 * userlevel ready to process event buffers
117 *
118 * async_thread - asynchronous event delivery thread to userlevel daemon.
119 *
120 * sysevent_upcall_status - status of the door upcall link
121 */
122 static kmutex_t eventq_head_mutex;
123 static kmutex_t eventq_sent_mutex;
124 static kcondvar_t log_event_cv;
125 static kthread_id_t async_thread = NULL;
126
127 static kmutex_t event_qfull_mutex;
128 static kcondvar_t event_qfull_cv;
129 static int event_qfull_blocked = 0;
130
131 static int sysevent_upcall_status = -1;
132 static kmutex_t registered_channel_mutex;
133
134 /*
135 * Indicates the syseventd daemon has begun taking events
136 */
137 int sysevent_daemon_init = 0;
138
139 /*
140 * Back-off delay when door_ki_upcall returns EAGAIN. Typically
141 * caused by the server process doing a forkall(). Since all threads
142 * but the thread actually doing the forkall() need to be quiesced,
143 * the fork may take some time. The min/max pause are in units
144 * of clock ticks.
145 */
146 #define LOG_EVENT_MIN_PAUSE 8
147 #define LOG_EVENT_MAX_PAUSE 128
148
149 static kmutex_t event_pause_mutex;
150 static kcondvar_t event_pause_cv;
151 static int event_pause_state = 0;
152
153 /* Cached device links for lofi. */
154 lofi_nvl_t lofi_devlink_cache;
155
156 /*ARGSUSED*/
157 static void
log_event_busy_timeout(void * arg)158 log_event_busy_timeout(void *arg)
159 {
160 mutex_enter(&event_pause_mutex);
161 event_pause_state = 0;
162 cv_signal(&event_pause_cv);
163 mutex_exit(&event_pause_mutex);
164 }
165
166 static void
log_event_pause(int nticks)167 log_event_pause(int nticks)
168 {
169 timeout_id_t id;
170
171 /*
172 * Only one use of log_event_pause at a time
173 */
174 ASSERT(event_pause_state == 0);
175
176 event_pause_state = 1;
177 id = timeout(log_event_busy_timeout, NULL, nticks);
178 if (id != 0) {
179 mutex_enter(&event_pause_mutex);
180 while (event_pause_state)
181 cv_wait(&event_pause_cv, &event_pause_mutex);
182 mutex_exit(&event_pause_mutex);
183 }
184 event_pause_state = 0;
185 }
186
187
188 /*
189 * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
190 * Check for rebinding errors
191 * This buffer is reused to by the syseventd door_return
192 * to hold the result code
193 */
194 static int
log_event_upcall(log_event_upcall_arg_t * arg)195 log_event_upcall(log_event_upcall_arg_t *arg)
196 {
197 int error;
198 size_t size;
199 sysevent_t *ev;
200 door_arg_t darg, save_arg;
201 int retry;
202 int neagain = 0;
203 int neintr = 0;
204 int nticks = LOG_EVENT_MIN_PAUSE;
205
206 /* Initialize door args */
207 ev = (sysevent_t *)&arg->buf;
208 size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
209
210 darg.rbuf = (char *)arg;
211 darg.data_ptr = (char *)arg;
212 darg.rsize = size;
213 darg.data_size = size;
214 darg.desc_ptr = NULL;
215 darg.desc_num = 0;
216
217 LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
218 (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
219
220 save_arg = darg;
221 for (retry = 0; ; retry++) {
222
223 mutex_enter(&event_door_mutex);
224 if (event_door == NULL) {
225 mutex_exit(&event_door_mutex);
226
227 return (EBADF);
228 }
229
230 if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
231 SIZE_MAX, 0)) == 0) {
232 mutex_exit(&event_door_mutex);
233 break;
234 }
235
236 /*
237 * EBADF is handled outside the switch below because we need to
238 * hold event_door_mutex a bit longer
239 */
240 if (error == EBADF) {
241 /* Server died */
242 door_ki_rele(event_door);
243 event_door = NULL;
244
245 mutex_exit(&event_door_mutex);
246 return (error);
247 }
248
249 mutex_exit(&event_door_mutex);
250
251 /*
252 * The EBADF case is already handled above with event_door_mutex
253 * held
254 */
255 switch (error) {
256 case EINTR:
257 neintr++;
258 log_event_pause(2);
259 darg = save_arg;
260 break;
261 case EAGAIN:
262 /* cannot deliver upcall - process may be forking */
263 neagain++;
264 log_event_pause(nticks);
265 nticks <<= 1;
266 if (nticks > LOG_EVENT_MAX_PAUSE)
267 nticks = LOG_EVENT_MAX_PAUSE;
268 darg = save_arg;
269 break;
270 default:
271 cmn_err(CE_CONT,
272 "log_event_upcall: door_ki_upcall error %d\n",
273 error);
274 return (error);
275 }
276 }
277
278 if (neagain > 0 || neintr > 0) {
279 LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
280 neagain, neintr, nticks));
281 }
282
283 LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
284 "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
285 error, (void *)arg, (void *)darg.rbuf,
286 (void *)darg.data_ptr,
287 *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
288
289 if (!error) {
290 /*
291 * upcall was successfully executed. Check return code.
292 */
293 error = *((int *)(darg.rbuf));
294 }
295
296 return (error);
297 }
298
299 /*
300 * log_event_deliver - event delivery thread
301 * Deliver all events on the event queue to syseventd.
302 * If the daemon can not process events, stop event
303 * delivery and wait for an indication from the
304 * daemon to resume delivery.
305 *
306 * Once all event buffers have been delivered, wait
307 * until there are more to deliver.
308 */
309 static void
log_event_deliver()310 log_event_deliver()
311 {
312 log_eventq_t *q;
313 int upcall_err;
314 callb_cpr_t cprinfo;
315
316 CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
317 "logevent");
318
319 /*
320 * eventq_head_mutex is exited (released) when there are no more
321 * events to process from the eventq in cv_wait().
322 */
323 mutex_enter(&eventq_head_mutex);
324
325 for (;;) {
326 LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
327 (void *)log_eventq_head));
328
329 upcall_err = 0;
330 q = log_eventq_head;
331
332 while (q) {
333 if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
334 upcall_err = EAGAIN;
335 break;
336 }
337
338 log_event_delivery = LOGEVENT_DELIVERY_OK;
339
340 /*
341 * Release event queue lock during upcall to
342 * syseventd
343 */
344 mutex_exit(&eventq_head_mutex);
345 if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
346 mutex_enter(&eventq_head_mutex);
347 break;
348 }
349
350 /*
351 * We may be able to add entries to
352 * the queue now.
353 */
354 if (event_qfull_blocked > 0 &&
355 log_eventq_cnt < logevent_max_q_sz) {
356 mutex_enter(&event_qfull_mutex);
357 if (event_qfull_blocked > 0) {
358 cv_signal(&event_qfull_cv);
359 }
360 mutex_exit(&event_qfull_mutex);
361 }
362
363 mutex_enter(&eventq_head_mutex);
364
365 /*
366 * Daemon restart can cause entries to be moved from
367 * the sent queue and put back on the event queue.
368 * If this has occurred, replay event queue
369 * processing from the new queue head.
370 */
371 if (q != log_eventq_head) {
372 q = log_eventq_head;
373 LOG_DEBUG((CE_CONT, "log_event_deliver: "
374 "door upcall/daemon restart race\n"));
375 } else {
376 log_eventq_t *next;
377
378 /*
379 * Move the event to the sent queue when a
380 * successful delivery has been made.
381 */
382 mutex_enter(&eventq_sent_mutex);
383 next = q->next;
384 q->next = log_eventq_sent;
385 log_eventq_sent = q;
386 q = next;
387 log_eventq_head = q;
388 log_eventq_cnt--;
389 if (q == NULL) {
390 ASSERT(log_eventq_cnt == 0);
391 log_eventq_tail = NULL;
392 }
393 mutex_exit(&eventq_sent_mutex);
394 }
395 }
396
397 switch (upcall_err) {
398 case 0:
399 /*
400 * Success. The queue is empty.
401 */
402 sysevent_upcall_status = 0;
403 break;
404 case EAGAIN:
405 /*
406 * Delivery is on hold (but functional).
407 */
408 sysevent_upcall_status = 0;
409 /*
410 * If the user has already signaled for delivery
411 * resumption, continue. Otherwise, we wait until
412 * we are signaled to continue.
413 */
414 if (log_event_delivery == LOGEVENT_DELIVERY_CONT)
415 continue;
416 log_event_delivery = LOGEVENT_DELIVERY_HOLD;
417
418 LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
419 break;
420 default:
421 LOG_DEBUG((CE_CONT, "log_event_deliver: "
422 "upcall err %d\n", upcall_err));
423 sysevent_upcall_status = upcall_err;
424 /*
425 * Signal everyone waiting that transport is down
426 */
427 if (event_qfull_blocked > 0) {
428 mutex_enter(&event_qfull_mutex);
429 if (event_qfull_blocked > 0) {
430 cv_broadcast(&event_qfull_cv);
431 }
432 mutex_exit(&event_qfull_mutex);
433 }
434 break;
435 }
436
437 CALLB_CPR_SAFE_BEGIN(&cprinfo);
438 cv_wait(&log_event_cv, &eventq_head_mutex);
439 CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
440 }
441 /* NOTREACHED */
442 }
443
444 /*
445 * Set up the nvlist based data cache. User by lofi to find
446 * device name for mapped file.
447 */
448 static void
lofi_nvl_init(lofi_nvl_t * cache)449 lofi_nvl_init(lofi_nvl_t *cache)
450 {
451 mutex_init(&cache->ln_lock, NULL, MUTEX_DRIVER, NULL);
452 cv_init(&cache->ln_cv, NULL, CV_DRIVER, NULL);
453 (void) nvlist_alloc(&cache->ln_data, NV_UNIQUE_NAME, KM_SLEEP);
454 }
455
456 /*
457 * log_event_init - Allocate and initialize log_event data structures.
458 */
459 void
log_event_init()460 log_event_init()
461 {
462 /* Set up devlink cache for lofi. */
463 lofi_nvl_init(&lofi_devlink_cache);
464
465 mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL);
466
467 mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
468 mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
469 cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
470
471 mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
472 cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
473
474 mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
475 cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
476
477 mutex_init(®istered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
478 sysevent_evc_init();
479 }
480
481 /*
482 * The following routines are used by kernel event publishers to
483 * allocate, append and free event buffers
484 */
485 /*
486 * sysevent_alloc - Allocate new eventq struct. This element contains
487 * an event buffer that will be used in a subsequent
488 * call to log_sysevent.
489 */
490 sysevent_t *
sysevent_alloc(char * class,char * subclass,char * pub,int flag)491 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
492 {
493 int payload_sz;
494 int class_sz, subclass_sz, pub_sz;
495 int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
496 sysevent_t *ev;
497 log_eventq_t *q;
498
499 ASSERT(class != NULL);
500 ASSERT(subclass != NULL);
501 ASSERT(pub != NULL);
502
503 /*
504 * Calculate and reserve space for the class, subclass and
505 * publisher strings in the event buffer
506 */
507 class_sz = strlen(class) + 1;
508 subclass_sz = strlen(subclass) + 1;
509 pub_sz = strlen(pub) + 1;
510
511 ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
512 <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
513
514 /* String sizes must be 64-bit aligned in the event buffer */
515 aligned_class_sz = SE_ALIGN(class_sz);
516 aligned_subclass_sz = SE_ALIGN(subclass_sz);
517 aligned_pub_sz = SE_ALIGN(pub_sz);
518
519 payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
520 (aligned_subclass_sz - sizeof (uint64_t)) +
521 (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
522
523 /*
524 * Allocate event buffer plus additional sysevent queue
525 * and payload overhead.
526 */
527 q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
528 if (q == NULL) {
529 return (NULL);
530 }
531
532 /* Initialize the event buffer data */
533 ev = (sysevent_t *)&q->arg.buf;
534 SE_VERSION(ev) = SYS_EVENT_VERSION;
535 bcopy(class, SE_CLASS_NAME(ev), class_sz);
536
537 SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
538 + aligned_class_sz;
539 bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
540
541 SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
542 bcopy(pub, SE_PUB_NAME(ev), pub_sz);
543
544 SE_ATTR_PTR(ev) = UINT64_C(0);
545 SE_PAYLOAD_SZ(ev) = payload_sz;
546
547 return (ev);
548 }
549
550 /*
551 * sysevent_free - Free event buffer and any attribute data.
552 */
553 void
sysevent_free(sysevent_t * ev)554 sysevent_free(sysevent_t *ev)
555 {
556 log_eventq_t *q;
557 nvlist_t *nvl;
558
559 ASSERT(ev != NULL);
560 q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
561 nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
562
563 if (nvl != NULL) {
564 size_t size = 0;
565 (void) nvlist_size(nvl, &size, encoding);
566 SE_PAYLOAD_SZ(ev) -= size;
567 nvlist_free(nvl);
568 }
569 kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
570 }
571
572 /*
573 * free_packed_event - Free packed event buffer
574 */
575 static void
free_packed_event(sysevent_t * ev)576 free_packed_event(sysevent_t *ev)
577 {
578 log_eventq_t *q;
579
580 ASSERT(ev != NULL);
581 q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
582
583 kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
584 }
585
586 /*
587 * sysevent_add_attr - Add new attribute element to an event attribute list
588 * If attribute list is NULL, start a new list.
589 */
590 int
sysevent_add_attr(sysevent_attr_list_t ** ev_attr_list,char * name,sysevent_value_t * se_value,int flag)591 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
592 sysevent_value_t *se_value, int flag)
593 {
594 int error;
595 nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
596
597 if (nvlp == NULL || se_value == NULL) {
598 return (SE_EINVAL);
599 }
600
601 /*
602 * attr_sz is composed of the value data size + the name data size +
603 * any header data. 64-bit aligned.
604 */
605 if (strlen(name) >= MAX_ATTR_NAME) {
606 return (SE_EINVAL);
607 }
608
609 /*
610 * Allocate nvlist
611 */
612 if ((*nvlp == NULL) &&
613 (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
614 return (SE_ENOMEM);
615
616 /* add the attribute */
617 switch (se_value->value_type) {
618 case SE_DATA_TYPE_BYTE:
619 error = nvlist_add_byte(*ev_attr_list, name,
620 se_value->value.sv_byte);
621 break;
622 case SE_DATA_TYPE_INT16:
623 error = nvlist_add_int16(*ev_attr_list, name,
624 se_value->value.sv_int16);
625 break;
626 case SE_DATA_TYPE_UINT16:
627 error = nvlist_add_uint16(*ev_attr_list, name,
628 se_value->value.sv_uint16);
629 break;
630 case SE_DATA_TYPE_INT32:
631 error = nvlist_add_int32(*ev_attr_list, name,
632 se_value->value.sv_int32);
633 break;
634 case SE_DATA_TYPE_UINT32:
635 error = nvlist_add_uint32(*ev_attr_list, name,
636 se_value->value.sv_uint32);
637 break;
638 case SE_DATA_TYPE_INT64:
639 error = nvlist_add_int64(*ev_attr_list, name,
640 se_value->value.sv_int64);
641 break;
642 case SE_DATA_TYPE_UINT64:
643 error = nvlist_add_uint64(*ev_attr_list, name,
644 se_value->value.sv_uint64);
645 break;
646 case SE_DATA_TYPE_STRING:
647 if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
648 return (SE_EINVAL);
649 error = nvlist_add_string(*ev_attr_list, name,
650 se_value->value.sv_string);
651 break;
652 case SE_DATA_TYPE_BYTES:
653 if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
654 return (SE_EINVAL);
655 error = nvlist_add_byte_array(*ev_attr_list, name,
656 se_value->value.sv_bytes.data,
657 se_value->value.sv_bytes.size);
658 break;
659 case SE_DATA_TYPE_TIME:
660 error = nvlist_add_hrtime(*ev_attr_list, name,
661 se_value->value.sv_time);
662 break;
663 default:
664 return (SE_EINVAL);
665 }
666
667 return (error ? SE_ENOMEM : 0);
668 }
669
670 /*
671 * sysevent_free_attr - Free an attribute list not associated with an
672 * event buffer.
673 */
674 void
sysevent_free_attr(sysevent_attr_list_t * ev_attr_list)675 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
676 {
677 nvlist_free((nvlist_t *)ev_attr_list);
678 }
679
680 /*
681 * sysevent_attach_attributes - Attach an attribute list to an event buffer.
682 *
683 * This data will be re-packed into contiguous memory when the event
684 * buffer is posted to log_sysevent.
685 */
686 int
sysevent_attach_attributes(sysevent_t * ev,sysevent_attr_list_t * ev_attr_list)687 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
688 {
689 size_t size = 0;
690
691 if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
692 return (SE_EINVAL);
693 }
694
695 SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
696 (void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
697 SE_PAYLOAD_SZ(ev) += size;
698 SE_FLAG(ev) = 0;
699
700 return (0);
701 }
702
703 /*
704 * sysevent_detach_attributes - Detach but don't free attribute list from the
705 * event buffer.
706 */
707 void
sysevent_detach_attributes(sysevent_t * ev)708 sysevent_detach_attributes(sysevent_t *ev)
709 {
710 size_t size = 0;
711 nvlist_t *nvl;
712
713 if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
714 return;
715 }
716
717 SE_ATTR_PTR(ev) = UINT64_C(0);
718 (void) nvlist_size(nvl, &size, encoding);
719 SE_PAYLOAD_SZ(ev) -= size;
720 ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
721 }
722
723 /*
724 * sysevent_attr_name - Get name of attribute
725 */
726 char *
sysevent_attr_name(sysevent_attr_t * attr)727 sysevent_attr_name(sysevent_attr_t *attr)
728 {
729 if (attr == NULL) {
730 return (NULL);
731 }
732
733 return (nvpair_name(attr));
734 }
735
736 /*
737 * sysevent_attr_type - Get type of attribute
738 */
739 int
sysevent_attr_type(sysevent_attr_t * attr)740 sysevent_attr_type(sysevent_attr_t *attr)
741 {
742 /*
743 * The SE_DATA_TYPE_* are typedef'ed to be the
744 * same value as DATA_TYPE_*
745 */
746 return (nvpair_type((nvpair_t *)attr));
747 }
748
749 /*
750 * Repack event buffer into contiguous memory
751 */
752 static sysevent_t *
se_repack(sysevent_t * ev,int flag)753 se_repack(sysevent_t *ev, int flag)
754 {
755 size_t copy_len;
756 caddr_t attr;
757 size_t size;
758 uint64_t attr_offset;
759 sysevent_t *copy;
760 log_eventq_t *qcopy;
761 sysevent_attr_list_t *nvl;
762
763 copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
764 qcopy = kmem_zalloc(copy_len, flag);
765 if (qcopy == NULL) {
766 return (NULL);
767 }
768 copy = (sysevent_t *)&qcopy->arg.buf;
769
770 /*
771 * Copy event header, class, subclass and publisher names
772 * Set the attribute offset (in number of bytes) to contiguous
773 * memory after the header.
774 */
775
776 attr_offset = SE_ATTR_OFF(ev);
777
778 ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
779
780 bcopy(ev, copy, attr_offset);
781
782 /* Check if attribute list exists */
783 if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
784 return (copy);
785 }
786
787 /*
788 * Copy attribute data to contiguous memory
789 */
790 attr = (char *)copy + attr_offset;
791 (void) nvlist_size(nvl, &size, encoding);
792 if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
793 kmem_free(qcopy, copy_len);
794 return (NULL);
795 }
796 SE_ATTR_PTR(copy) = UINT64_C(0);
797 SE_FLAG(copy) = SE_PACKED_BUF;
798
799 return (copy);
800 }
801
802 /*
803 * The sysevent registration provides a persistent and reliable database
804 * for channel information for sysevent channel publishers and
805 * subscribers.
806 *
807 * A channel is created and maintained by the kernel upon the first
808 * SE_OPEN_REGISTRATION operation to log_sysevent_register(). Channel
809 * event subscription information is updated as publishers or subscribers
810 * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
811 * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
812 *
813 * For consistency, id's are assigned for every publisher or subscriber
814 * bound to a particular channel. The id's are used to constrain resources
815 * and perform subscription lookup.
816 *
817 * Associated with each channel is a hashed list of the current subscriptions
818 * based upon event class and subclasses. A subscription contains a class name,
819 * list of possible subclasses and an array of subscriber ids. Subscriptions
820 * are updated for every SE_REGISTER or SE_UNREGISTER operation.
821 *
822 * Channels are closed once the last subscriber or publisher performs a
823 * SE_CLOSE_REGISTRATION operation. All resources associated with the named
824 * channel are freed upon last close.
825 *
826 * Locking:
827 * Every operation to log_sysevent() is protected by a single lock,
828 * registered_channel_mutex. It is expected that the granularity of
829 * a single lock is sufficient given the frequency that updates will
830 * occur.
831 *
832 * If this locking strategy proves to be too contentious, a per-hash
833 * or per-channel locking strategy may be implemented.
834 */
835
836
837 #define CHANN_HASH(channel_name) (hash_func(channel_name) \
838 % CHAN_HASH_SZ)
839
840 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
841 static int channel_cnt;
842 static void remove_all_class(sysevent_channel_descriptor_t *chan,
843 uint32_t sub_id);
844
845 static uint32_t
hash_func(const char * s)846 hash_func(const char *s)
847 {
848 uint32_t result = 0;
849 uint_t g;
850
851 while (*s != '\0') {
852 result <<= 4;
853 result += (uint32_t)*s++;
854 g = result & 0xf0000000;
855 if (g != 0) {
856 result ^= g >> 24;
857 result ^= g;
858 }
859 }
860
861 return (result);
862 }
863
864 static sysevent_channel_descriptor_t *
get_channel(char * channel_name)865 get_channel(char *channel_name)
866 {
867 int hash_index;
868 sysevent_channel_descriptor_t *chan_list;
869
870 if (channel_name == NULL)
871 return (NULL);
872
873 /* Find channel descriptor */
874 hash_index = CHANN_HASH(channel_name);
875 chan_list = registered_channels[hash_index];
876 while (chan_list != NULL) {
877 if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
878 break;
879 } else {
880 chan_list = chan_list->scd_next;
881 }
882 }
883
884 return (chan_list);
885 }
886
887 static class_lst_t *
create_channel_registration(sysevent_channel_descriptor_t * chan,char * event_class,int index)888 create_channel_registration(sysevent_channel_descriptor_t *chan,
889 char *event_class, int index)
890 {
891 size_t class_len;
892 class_lst_t *c_list;
893
894 class_len = strlen(event_class) + 1;
895 c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
896 c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
897 bcopy(event_class, c_list->cl_name, class_len);
898
899 c_list->cl_subclass_list =
900 kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
901 c_list->cl_subclass_list->sl_name =
902 kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
903 bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
904 sizeof (EC_SUB_ALL));
905
906 c_list->cl_next = chan->scd_class_list_tbl[index];
907 chan->scd_class_list_tbl[index] = c_list;
908
909 return (c_list);
910 }
911
912 static void
free_channel_registration(sysevent_channel_descriptor_t * chan)913 free_channel_registration(sysevent_channel_descriptor_t *chan)
914 {
915 int i;
916 class_lst_t *clist, *next_clist;
917 subclass_lst_t *sclist, *next_sc;
918
919 for (i = 0; i <= CLASS_HASH_SZ; ++i) {
920
921 clist = chan->scd_class_list_tbl[i];
922 while (clist != NULL) {
923 sclist = clist->cl_subclass_list;
924 while (sclist != NULL) {
925 kmem_free(sclist->sl_name,
926 strlen(sclist->sl_name) + 1);
927 next_sc = sclist->sl_next;
928 kmem_free(sclist, sizeof (subclass_lst_t));
929 sclist = next_sc;
930 }
931 kmem_free(clist->cl_name,
932 strlen(clist->cl_name) + 1);
933 next_clist = clist->cl_next;
934 kmem_free(clist, sizeof (class_lst_t));
935 clist = next_clist;
936 }
937 }
938 chan->scd_class_list_tbl[0] = NULL;
939 }
940
941 static int
open_channel(char * channel_name)942 open_channel(char *channel_name)
943 {
944 int hash_index;
945 sysevent_channel_descriptor_t *chan, *chan_list;
946
947
948 if (channel_cnt > MAX_CHAN) {
949 return (-1);
950 }
951
952 /* Find channel descriptor */
953 hash_index = CHANN_HASH(channel_name);
954 chan_list = registered_channels[hash_index];
955 while (chan_list != NULL) {
956 if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
957 chan_list->scd_ref_cnt++;
958 kmem_free(channel_name, strlen(channel_name) + 1);
959 return (0);
960 } else {
961 chan_list = chan_list->scd_next;
962 }
963 }
964
965
966 /* New channel descriptor */
967 chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
968 chan->scd_channel_name = channel_name;
969
970 /*
971 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
972 * Subscriber id 0 is never allocated, but is used as a reserved id
973 * by libsysevent
974 */
975 if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
976 MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
977 VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
978 kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
979 return (-1);
980 }
981 if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
982 MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
983 VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
984 vmem_destroy(chan->scd_subscriber_cache);
985 kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
986 return (-1);
987 }
988
989 chan->scd_ref_cnt = 1;
990
991 (void) create_channel_registration(chan, EC_ALL, 0);
992
993 if (registered_channels[hash_index] != NULL)
994 chan->scd_next = registered_channels[hash_index];
995
996 registered_channels[hash_index] = chan;
997
998 ++channel_cnt;
999
1000 return (0);
1001 }
1002
1003 static void
close_channel(char * channel_name)1004 close_channel(char *channel_name)
1005 {
1006 int hash_index;
1007 sysevent_channel_descriptor_t *chan, *prev_chan;
1008
1009 /* Find channel descriptor */
1010 hash_index = CHANN_HASH(channel_name);
1011 prev_chan = chan = registered_channels[hash_index];
1012
1013 while (chan != NULL) {
1014 if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1015 break;
1016 } else {
1017 prev_chan = chan;
1018 chan = chan->scd_next;
1019 }
1020 }
1021
1022 if (chan == NULL)
1023 return;
1024
1025 chan->scd_ref_cnt--;
1026 if (chan->scd_ref_cnt > 0)
1027 return;
1028
1029 free_channel_registration(chan);
1030 vmem_destroy(chan->scd_subscriber_cache);
1031 vmem_destroy(chan->scd_publisher_cache);
1032 kmem_free(chan->scd_channel_name,
1033 strlen(chan->scd_channel_name) + 1);
1034 if (registered_channels[hash_index] == chan)
1035 registered_channels[hash_index] = chan->scd_next;
1036 else
1037 prev_chan->scd_next = chan->scd_next;
1038 kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1039 --channel_cnt;
1040 }
1041
1042 static id_t
bind_common(sysevent_channel_descriptor_t * chan,int type)1043 bind_common(sysevent_channel_descriptor_t *chan, int type)
1044 {
1045 id_t id;
1046
1047 if (type == SUBSCRIBER) {
1048 id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1049 VM_NOSLEEP | VM_NEXTFIT);
1050 if (id <= 0 || id > MAX_SUBSCRIBERS)
1051 return (0);
1052 chan->scd_subscriber_ids[id] = 1;
1053 } else {
1054 id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1055 VM_NOSLEEP | VM_NEXTFIT);
1056 if (id <= 0 || id > MAX_PUBLISHERS)
1057 return (0);
1058 chan->scd_publisher_ids[id] = 1;
1059 }
1060
1061 return (id);
1062 }
1063
1064 static int
unbind_common(sysevent_channel_descriptor_t * chan,int type,id_t id)1065 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1066 {
1067 if (type == SUBSCRIBER) {
1068 if (id <= 0 || id > MAX_SUBSCRIBERS)
1069 return (0);
1070 if (chan->scd_subscriber_ids[id] == 0)
1071 return (0);
1072 (void) remove_all_class(chan, id);
1073 chan->scd_subscriber_ids[id] = 0;
1074 vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1075 } else {
1076 if (id <= 0 || id > MAX_PUBLISHERS)
1077 return (0);
1078 if (chan->scd_publisher_ids[id] == 0)
1079 return (0);
1080 chan->scd_publisher_ids[id] = 0;
1081 vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1082 }
1083
1084 return (1);
1085 }
1086
1087 static void
release_id(sysevent_channel_descriptor_t * chan,int type,id_t id)1088 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1089 {
1090 if (unbind_common(chan, type, id))
1091 close_channel(chan->scd_channel_name);
1092 }
1093
1094 static subclass_lst_t *
find_subclass(class_lst_t * c_list,char * subclass)1095 find_subclass(class_lst_t *c_list, char *subclass)
1096 {
1097 subclass_lst_t *sc_list;
1098
1099 if (c_list == NULL)
1100 return (NULL);
1101
1102 sc_list = c_list->cl_subclass_list;
1103
1104 while (sc_list != NULL) {
1105 if (strcmp(sc_list->sl_name, subclass) == 0) {
1106 return (sc_list);
1107 }
1108 sc_list = sc_list->sl_next;
1109 }
1110
1111 return (NULL);
1112 }
1113
1114 static void
insert_subclass(class_lst_t * c_list,char ** subclass_names,int subclass_num,uint32_t sub_id)1115 insert_subclass(class_lst_t *c_list, char **subclass_names,
1116 int subclass_num, uint32_t sub_id)
1117 {
1118 int i, subclass_sz;
1119 subclass_lst_t *sc_list;
1120
1121 for (i = 0; i < subclass_num; ++i) {
1122 if ((sc_list = find_subclass(c_list, subclass_names[i]))
1123 != NULL) {
1124 sc_list->sl_num[sub_id] = 1;
1125 } else {
1126
1127 sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1128 KM_SLEEP);
1129 subclass_sz = strlen(subclass_names[i]) + 1;
1130 sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1131 bcopy(subclass_names[i], sc_list->sl_name,
1132 subclass_sz);
1133
1134 sc_list->sl_num[sub_id] = 1;
1135
1136 sc_list->sl_next = c_list->cl_subclass_list;
1137 c_list->cl_subclass_list = sc_list;
1138 }
1139 }
1140 }
1141
1142 static class_lst_t *
find_class(sysevent_channel_descriptor_t * chan,char * class_name)1143 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1144 {
1145 class_lst_t *c_list;
1146
1147 c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1148 while (c_list != NULL) {
1149 if (strcmp(class_name, c_list->cl_name) == 0)
1150 break;
1151 c_list = c_list->cl_next;
1152 }
1153
1154 return (c_list);
1155 }
1156
1157 static void
remove_all_class(sysevent_channel_descriptor_t * chan,uint32_t sub_id)1158 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1159 {
1160 int i;
1161 class_lst_t *c_list;
1162 subclass_lst_t *sc_list;
1163
1164 for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1165
1166 c_list = chan->scd_class_list_tbl[i];
1167 while (c_list != NULL) {
1168 sc_list = c_list->cl_subclass_list;
1169 while (sc_list != NULL) {
1170 sc_list->sl_num[sub_id] = 0;
1171 sc_list = sc_list->sl_next;
1172 }
1173 c_list = c_list->cl_next;
1174 }
1175 }
1176 }
1177
1178 static void
remove_class(sysevent_channel_descriptor_t * chan,uint32_t sub_id,char * class_name)1179 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1180 char *class_name)
1181 {
1182 class_lst_t *c_list;
1183 subclass_lst_t *sc_list;
1184
1185 if (strcmp(class_name, EC_ALL) == 0) {
1186 remove_all_class(chan, sub_id);
1187 return;
1188 }
1189
1190 if ((c_list = find_class(chan, class_name)) == NULL) {
1191 return;
1192 }
1193
1194 sc_list = c_list->cl_subclass_list;
1195 while (sc_list != NULL) {
1196 sc_list->sl_num[sub_id] = 0;
1197 sc_list = sc_list->sl_next;
1198 }
1199 }
1200
1201 static int
insert_class(sysevent_channel_descriptor_t * chan,char * event_class,char ** event_subclass_lst,int subclass_num,uint32_t sub_id)1202 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1203 char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1204 {
1205 class_lst_t *c_list;
1206
1207 if (strcmp(event_class, EC_ALL) == 0) {
1208 insert_subclass(chan->scd_class_list_tbl[0],
1209 event_subclass_lst, 1, sub_id);
1210 return (0);
1211 }
1212
1213 if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1214 return (-1);
1215
1216 /* New class, add to the registration cache */
1217 if ((c_list = find_class(chan, event_class)) == NULL) {
1218 c_list = create_channel_registration(chan, event_class,
1219 CLASS_HASH(event_class));
1220 }
1221
1222 /* Update the subclass list */
1223 insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1224
1225 return (0);
1226 }
1227
1228 static int
add_registration(sysevent_channel_descriptor_t * chan,uint32_t sub_id,char * nvlbuf,size_t nvlsize)1229 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1230 char *nvlbuf, size_t nvlsize)
1231 {
1232 uint_t num_elem;
1233 char *event_class;
1234 char **event_list;
1235 nvlist_t *nvl;
1236 nvpair_t *nvpair = NULL;
1237
1238 if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1239 return (-1);
1240
1241 if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1242 nvlist_free(nvl);
1243 return (-1);
1244 }
1245
1246 if ((event_class = nvpair_name(nvpair)) == NULL) {
1247 nvlist_free(nvl);
1248 return (-1);
1249 }
1250 if (nvpair_value_string_array(nvpair, &event_list,
1251 &num_elem) != 0) {
1252 nvlist_free(nvl);
1253 return (-1);
1254 }
1255
1256 if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1257 nvlist_free(nvl);
1258 return (-1);
1259 }
1260
1261 nvlist_free(nvl);
1262
1263 return (0);
1264 }
1265
1266 /*
1267 * get_registration - Return the requested class hash chain
1268 */
1269 static int
get_registration(sysevent_channel_descriptor_t * chan,char * databuf,uint32_t * bufsz,uint32_t class_index)1270 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1271 uint32_t *bufsz, uint32_t class_index)
1272 {
1273 int num_classes = 0;
1274 char *nvlbuf = NULL;
1275 size_t nvlsize;
1276 nvlist_t *nvl;
1277 class_lst_t *clist;
1278 subclass_lst_t *sc_list;
1279
1280 if (class_index > CLASS_HASH_SZ)
1281 return (EINVAL);
1282
1283 if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1284 return (ENOENT);
1285 }
1286
1287 if (nvlist_alloc(&nvl, 0, 0) != 0) {
1288 return (EFAULT);
1289 }
1290
1291 while (clist != NULL) {
1292 if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1293 != 0) {
1294 nvlist_free(nvl);
1295 return (EFAULT);
1296 }
1297
1298 sc_list = clist->cl_subclass_list;
1299 while (sc_list != NULL) {
1300 if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1301 sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1302 nvlist_free(nvl);
1303 return (EFAULT);
1304 }
1305 sc_list = sc_list->sl_next;
1306 }
1307 num_classes++;
1308 clist = clist->cl_next;
1309 }
1310
1311 if (num_classes == 0) {
1312 nvlist_free(nvl);
1313 return (ENOENT);
1314 }
1315
1316 if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1317 KM_SLEEP)
1318 != 0) {
1319 nvlist_free(nvl);
1320 return (EFAULT);
1321 }
1322
1323 nvlist_free(nvl);
1324
1325 if (nvlsize > *bufsz) {
1326 kmem_free(nvlbuf, nvlsize);
1327 *bufsz = nvlsize;
1328 return (EAGAIN);
1329 }
1330
1331 bcopy(nvlbuf, databuf, nvlsize);
1332 kmem_free(nvlbuf, nvlsize);
1333
1334 return (0);
1335 }
1336
1337 /*
1338 * log_sysevent_register - Register event subscriber for a particular
1339 * event channel.
1340 */
1341 int
log_sysevent_register(char * channel_name,char * udatabuf,se_pubsub_t * udata)1342 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1343 {
1344 int error = 0;
1345 char *kchannel, *databuf = NULL;
1346 size_t bufsz;
1347 se_pubsub_t kdata;
1348 sysevent_channel_descriptor_t *chan = NULL;
1349
1350 if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1351 return (EFAULT);
1352 }
1353 if (kdata.ps_channel_name_len == 0) {
1354 return (EINVAL);
1355 }
1356 kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1357 if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1358 kmem_free(kchannel, kdata.ps_channel_name_len);
1359 return (EFAULT);
1360 }
1361 bufsz = kdata.ps_buflen;
1362 if (bufsz > 0) {
1363 databuf = kmem_alloc(bufsz, KM_SLEEP);
1364 if (copyin(udatabuf, databuf, bufsz) == -1) {
1365 kmem_free(kchannel, kdata.ps_channel_name_len);
1366 kmem_free(databuf, bufsz);
1367 return (EFAULT);
1368 }
1369 }
1370
1371 mutex_enter(®istered_channel_mutex);
1372 if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1373 kdata.ps_op != SE_CLOSE_REGISTRATION) {
1374 chan = get_channel(kchannel);
1375 if (chan == NULL) {
1376 mutex_exit(®istered_channel_mutex);
1377 kmem_free(kchannel, kdata.ps_channel_name_len);
1378 if (bufsz > 0)
1379 kmem_free(databuf, bufsz);
1380 return (ENOENT);
1381 }
1382 }
1383
1384 switch (kdata.ps_op) {
1385 case SE_OPEN_REGISTRATION:
1386 if (open_channel(kchannel) != 0) {
1387 error = ENOMEM;
1388 if (bufsz > 0)
1389 kmem_free(databuf, bufsz);
1390 kmem_free(kchannel, kdata.ps_channel_name_len);
1391 }
1392
1393 mutex_exit(®istered_channel_mutex);
1394 return (error);
1395 case SE_CLOSE_REGISTRATION:
1396 close_channel(kchannel);
1397 break;
1398 case SE_BIND_REGISTRATION: {
1399 id_t id;
1400
1401 id = bind_common(chan, kdata.ps_type);
1402 kdata.ps_id = (uint32_t)id;
1403 if (id <= 0)
1404 error = EBUSY;
1405 break;
1406 }
1407 case SE_UNBIND_REGISTRATION:
1408 (void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1409 break;
1410 case SE_REGISTER:
1411 if (bufsz == 0) {
1412 error = EINVAL;
1413 break;
1414 }
1415 if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1416 error = EINVAL;
1417 break;
1418 case SE_UNREGISTER:
1419 if (bufsz == 0) {
1420 error = EINVAL;
1421 break;
1422 }
1423 remove_class(chan, kdata.ps_id, databuf);
1424 break;
1425 case SE_CLEANUP:
1426 /* Cleanup the indicated subscriber or publisher */
1427 release_id(chan, kdata.ps_type, kdata.ps_id);
1428 break;
1429 case SE_GET_REGISTRATION:
1430 error = get_registration(chan, databuf,
1431 &kdata.ps_buflen, kdata.ps_id);
1432 break;
1433 default:
1434 error = ENOTSUP;
1435 }
1436
1437 mutex_exit(®istered_channel_mutex);
1438
1439 kmem_free(kchannel, kdata.ps_channel_name_len);
1440
1441 if (bufsz > 0) {
1442 if (copyout(databuf, udatabuf, bufsz) == -1)
1443 error = EFAULT;
1444 kmem_free(databuf, bufsz);
1445 }
1446
1447 if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1448 return (EFAULT);
1449
1450 return (error);
1451 }
1452
1453 /*
1454 * log_sysevent_copyout_data - Copyout event data to userland.
1455 * This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1456 * The buffer size is always sufficient.
1457 */
1458 int
log_sysevent_copyout_data(sysevent_id_t * eid,size_t ubuflen,caddr_t ubuf)1459 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1460 {
1461 int error = ENOENT;
1462 log_eventq_t *q;
1463 sysevent_t *ev;
1464 sysevent_id_t eid_copy;
1465
1466 /*
1467 * Copy eid
1468 */
1469 if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1470 return (EFAULT);
1471 }
1472
1473 mutex_enter(&eventq_sent_mutex);
1474 q = log_eventq_sent;
1475
1476 /*
1477 * Search for event buffer on the sent queue with matching
1478 * event identifier
1479 */
1480 while (q) {
1481 ev = (sysevent_t *)&q->arg.buf;
1482
1483 if (SE_TIME(ev) != eid_copy.eid_ts ||
1484 SE_SEQ(ev) != eid_copy.eid_seq) {
1485 q = q->next;
1486 continue;
1487 }
1488
1489 if (ubuflen < SE_SIZE(ev)) {
1490 error = EFAULT;
1491 break;
1492 }
1493 if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1494 error = EFAULT;
1495 LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1496 "0x%" PRIx64 " from queue: EFAULT\n",
1497 eid->eid_seq));
1498 } else {
1499 error = 0;
1500 }
1501 break;
1502 }
1503
1504 mutex_exit(&eventq_sent_mutex);
1505
1506 return (error);
1507 }
1508
1509 /*
1510 * log_sysevent_free_data - Free kernel copy of the event buffer identified
1511 * by eid (must have already been sent). Called from
1512 * modctl(MODEVENTS, MODEVENTS_FREEDATA).
1513 */
1514 int
log_sysevent_free_data(sysevent_id_t * eid)1515 log_sysevent_free_data(sysevent_id_t *eid)
1516 {
1517 int error = ENOENT;
1518 sysevent_t *ev;
1519 log_eventq_t *q, *prev = NULL;
1520 sysevent_id_t eid_copy;
1521
1522 /*
1523 * Copy eid
1524 */
1525 if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1526 return (EFAULT);
1527 }
1528
1529 mutex_enter(&eventq_sent_mutex);
1530 q = log_eventq_sent;
1531
1532 /*
1533 * Look for the event to be freed on the sent queue. Due to delayed
1534 * processing of the event, it may not be on the sent queue yet.
1535 * It is up to the user to retry the free operation to ensure that the
1536 * event is properly freed.
1537 */
1538 while (q) {
1539 ev = (sysevent_t *)&q->arg.buf;
1540
1541 if (SE_TIME(ev) != eid_copy.eid_ts ||
1542 SE_SEQ(ev) != eid_copy.eid_seq) {
1543 prev = q;
1544 q = q->next;
1545 continue;
1546 }
1547 /*
1548 * Take it out of log_eventq_sent and free it
1549 */
1550 if (prev) {
1551 prev->next = q->next;
1552 } else {
1553 log_eventq_sent = q->next;
1554 }
1555 free_packed_event(ev);
1556 error = 0;
1557 break;
1558 }
1559
1560 mutex_exit(&eventq_sent_mutex);
1561
1562 return (error);
1563 }
1564
1565 /*
1566 * log_sysevent_flushq - Begin or resume event buffer delivery. If neccessary,
1567 * create log_event_deliver thread or wake it up
1568 */
1569 /*ARGSUSED*/
1570 void
log_sysevent_flushq(int cmd,uint_t flag)1571 log_sysevent_flushq(int cmd, uint_t flag)
1572 {
1573 mutex_enter(&eventq_head_mutex);
1574
1575 /*
1576 * Start the event delivery thread
1577 * Mark the upcall status as active since we should
1578 * now be able to begin emptying the queue normally.
1579 */
1580 if (!async_thread) {
1581 sysevent_upcall_status = 0;
1582 sysevent_daemon_init = 1;
1583 setup_ddi_poststartup();
1584 async_thread = thread_create(NULL, 0, log_event_deliver,
1585 NULL, 0, &p0, TS_RUN, minclsyspri);
1586 }
1587
1588 log_event_delivery = LOGEVENT_DELIVERY_CONT;
1589 cv_signal(&log_event_cv);
1590 mutex_exit(&eventq_head_mutex);
1591 }
1592
1593 /*
1594 * log_sysevent_filename - Called by syseventd via
1595 * modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1596 * to subsequently bind the event_door.
1597 *
1598 * This routine is called everytime syseventd (re)starts
1599 * and must therefore replay any events buffers that have
1600 * been sent but not freed.
1601 *
1602 * Event buffer delivery begins after a call to
1603 * log_sysevent_flushq().
1604 */
1605 int
log_sysevent_filename(char * file)1606 log_sysevent_filename(char *file)
1607 {
1608 mutex_enter(&event_door_mutex);
1609
1610 (void) strlcpy(logevent_door_upcall_filename, file,
1611 sizeof (logevent_door_upcall_filename));
1612
1613 /* Unbind old event door */
1614 if (event_door != NULL)
1615 door_ki_rele(event_door);
1616 /* Establish door connection with user event daemon (syseventd) */
1617 if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0)
1618 event_door = NULL;
1619
1620 mutex_exit(&event_door_mutex);
1621
1622 /*
1623 * We are called when syseventd restarts. Move all sent, but
1624 * not committed events from log_eventq_sent to log_eventq_head.
1625 * Do it in proper order to maintain increasing event id.
1626 */
1627 mutex_enter(&eventq_head_mutex);
1628
1629 mutex_enter(&eventq_sent_mutex);
1630 while (log_eventq_sent) {
1631 log_eventq_t *tmp = log_eventq_sent->next;
1632 log_eventq_sent->next = log_eventq_head;
1633 if (log_eventq_head == NULL) {
1634 ASSERT(log_eventq_cnt == 0);
1635 log_eventq_tail = log_eventq_sent;
1636 log_eventq_tail->next = NULL;
1637 } else if (log_eventq_head == log_eventq_tail) {
1638 ASSERT(log_eventq_cnt == 1);
1639 ASSERT(log_eventq_head->next == NULL);
1640 ASSERT(log_eventq_tail->next == NULL);
1641 }
1642 log_eventq_head = log_eventq_sent;
1643 log_eventq_sent = tmp;
1644 log_eventq_cnt++;
1645 }
1646 mutex_exit(&eventq_sent_mutex);
1647 mutex_exit(&eventq_head_mutex);
1648
1649 return (0);
1650 }
1651
1652 /*
1653 * queue_sysevent - queue an event buffer
1654 */
1655 static int
queue_sysevent(sysevent_t * ev,sysevent_id_t * eid,int flag)1656 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1657 {
1658 log_eventq_t *q;
1659
1660 ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1661
1662 DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1663
1664 restart:
1665
1666 /* Max Q size exceeded */
1667 mutex_enter(&event_qfull_mutex);
1668 if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1669 /*
1670 * If queue full and transport down, return no transport
1671 */
1672 if (sysevent_upcall_status != 0) {
1673 mutex_exit(&event_qfull_mutex);
1674 free_packed_event(ev);
1675 eid->eid_seq = UINT64_C(0);
1676 eid->eid_ts = INT64_C(0);
1677 return (SE_NO_TRANSPORT);
1678 }
1679 if (flag == SE_NOSLEEP) {
1680 mutex_exit(&event_qfull_mutex);
1681 free_packed_event(ev);
1682 eid->eid_seq = UINT64_C(0);
1683 eid->eid_ts = INT64_C(0);
1684 return (SE_EQSIZE);
1685 }
1686 event_qfull_blocked++;
1687 cv_wait(&event_qfull_cv, &event_qfull_mutex);
1688 event_qfull_blocked--;
1689 mutex_exit(&event_qfull_mutex);
1690 goto restart;
1691 }
1692 mutex_exit(&event_qfull_mutex);
1693
1694 mutex_enter(&eventq_head_mutex);
1695
1696 /* Time stamp and assign ID */
1697 SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1698 (uint64_t)1);
1699 SE_TIME(ev) = eid->eid_ts = gethrtime();
1700
1701 LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1702 SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1703
1704 /*
1705 * Put event on eventq
1706 */
1707 q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1708 q->next = NULL;
1709 if (log_eventq_head == NULL) {
1710 ASSERT(log_eventq_cnt == 0);
1711 log_eventq_head = q;
1712 log_eventq_tail = q;
1713 } else {
1714 if (log_eventq_head == log_eventq_tail) {
1715 ASSERT(log_eventq_cnt == 1);
1716 ASSERT(log_eventq_head->next == NULL);
1717 ASSERT(log_eventq_tail->next == NULL);
1718 }
1719 log_eventq_tail->next = q;
1720 log_eventq_tail = q;
1721 }
1722 log_eventq_cnt++;
1723
1724 /* Signal event delivery thread */
1725 if (log_eventq_cnt == 1) {
1726 cv_signal(&log_event_cv);
1727 }
1728 mutex_exit(&eventq_head_mutex);
1729
1730 return (0);
1731 }
1732
1733 /*
1734 * log_sysevent - kernel system event logger.
1735 *
1736 * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1737 * maximum event queue size will be exceeded
1738 * Returns 0 for successfully queued event buffer
1739 */
1740 int
log_sysevent(sysevent_t * ev,int flag,sysevent_id_t * eid)1741 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1742 {
1743 sysevent_t *ev_copy;
1744 int rval;
1745
1746 ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1747 ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1748
1749 ev_copy = se_repack(ev, flag);
1750 if (ev_copy == NULL) {
1751 ASSERT(flag == SE_NOSLEEP);
1752 return (SE_ENOMEM);
1753 }
1754 rval = queue_sysevent(ev_copy, eid, flag);
1755 ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1756 rval == SE_NO_TRANSPORT);
1757 ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1758 return (rval);
1759 }
1760
1761 /*
1762 * Publish EC_DEV_ADD and EC_DEV_REMOVE events from devfsadm to lofi.
1763 * This interface is needed to pass device link names to the lofi driver,
1764 * to be returned via ioctl() to the lofiadm command.
1765 * The problem is, if lofiadm is executed in local zone, there is no
1766 * mechanism to announce the device name from the /dev tree back to lofiadm,
1767 * as sysevents are not accessible from local zone and devfsadmd is only
1768 * running in global zone.
1769 *
1770 * Delayed/missed events are not fatal for lofi, as the device name returned
1771 * to lofiadm is for information and can be re-queried with listing
1772 * mappings with lofiadm command.
1773 *
1774 * Once we have a better method, this interface should be reworked.
1775 */
1776 static void
notify_lofi(sysevent_t * ev)1777 notify_lofi(sysevent_t *ev)
1778 {
1779 nvlist_t *nvlist;
1780 char name[10], *class, *driver;
1781 int32_t instance;
1782
1783 class = sysevent_get_class_name(ev);
1784 if ((strcmp(EC_DEV_ADD, class) != 0) &&
1785 (strcmp(EC_DEV_REMOVE, class) != 0)) {
1786 return;
1787 }
1788
1789 (void) sysevent_get_attr_list(ev, &nvlist);
1790 driver = fnvlist_lookup_string(nvlist, DEV_DRIVER_NAME);
1791 instance = fnvlist_lookup_int32(nvlist, DEV_INSTANCE);
1792
1793 /* We are only interested about lofi. */
1794 if (strcmp(driver, "lofi") != 0) {
1795 fnvlist_free(nvlist);
1796 return;
1797 }
1798
1799 /*
1800 * insert or remove device info, then announce the change
1801 * via cv_broadcast.
1802 */
1803 (void) snprintf(name, sizeof (name), "%d", instance);
1804 mutex_enter(&lofi_devlink_cache.ln_lock);
1805 if (strcmp(class, EC_DEV_ADD) == 0) {
1806 fnvlist_add_nvlist(lofi_devlink_cache.ln_data, name, nvlist);
1807 } else {
1808 /* Can not use fnvlist_remove() as we can get ENOENT. */
1809 (void) nvlist_remove_all(lofi_devlink_cache.ln_data, name);
1810 }
1811 cv_broadcast(&lofi_devlink_cache.ln_cv);
1812 mutex_exit(&lofi_devlink_cache.ln_lock);
1813
1814 fnvlist_free(nvlist);
1815 }
1816
1817 /*
1818 * log_usr_sysevent - user system event logger
1819 * Private to devfsadm and accessible only via
1820 * modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1821 */
1822 int
log_usr_sysevent(sysevent_t * ev,int ev_size,sysevent_id_t * eid)1823 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1824 {
1825 int ret, copy_sz;
1826 sysevent_t *ev_copy;
1827 sysevent_id_t new_eid;
1828 log_eventq_t *qcopy;
1829
1830 copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1831 offsetof(log_event_upcall_arg_t, buf);
1832 qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1833 ev_copy = (sysevent_t *)&qcopy->arg.buf;
1834
1835 /*
1836 * Copy event
1837 */
1838 if (copyin(ev, ev_copy, ev_size) == -1) {
1839 kmem_free(qcopy, copy_sz);
1840 return (EFAULT);
1841 }
1842
1843 notify_lofi(ev_copy);
1844
1845 if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1846 if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1847 return (EAGAIN);
1848 else
1849 return (EIO);
1850 }
1851
1852 if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1853 return (EFAULT);
1854 }
1855
1856 return (0);
1857 }
1858
1859
1860
1861 int
ddi_log_sysevent(dev_info_t * dip,char * vendor,char * class,char * subclass,nvlist_t * attr_list,sysevent_id_t * eidp,int sleep_flag)1862 ddi_log_sysevent(
1863 dev_info_t *dip,
1864 char *vendor,
1865 char *class,
1866 char *subclass,
1867 nvlist_t *attr_list,
1868 sysevent_id_t *eidp,
1869 int sleep_flag)
1870 {
1871 sysevent_attr_list_t *list = (sysevent_attr_list_t *)attr_list;
1872 char pubstr[32];
1873 sysevent_t *event;
1874 sysevent_id_t eid;
1875 const char *drvname;
1876 char *publisher;
1877 int se_flag;
1878 int rval;
1879 int n;
1880
1881 if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1882 cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1883 "event from interrupt context with sleep semantics\n",
1884 ddi_driver_name(dip), ddi_get_instance(dip));
1885 return (DDI_ECONTEXT);
1886 }
1887
1888 drvname = ddi_driver_name(dip);
1889 n = strlen(vendor) + strlen(drvname) + 7;
1890 if (n < sizeof (pubstr)) {
1891 publisher = pubstr;
1892 } else {
1893 publisher = kmem_alloc(n,
1894 (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1895 if (publisher == NULL) {
1896 return (DDI_ENOMEM);
1897 }
1898 }
1899 (void) strcpy(publisher, vendor);
1900 (void) strcat(publisher, ":kern:");
1901 (void) strcat(publisher, drvname);
1902
1903 se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1904 event = sysevent_alloc(class, subclass, publisher, se_flag);
1905
1906 if (publisher != pubstr) {
1907 kmem_free(publisher, n);
1908 }
1909
1910 if (event == NULL) {
1911 return (DDI_ENOMEM);
1912 }
1913
1914 if (list) {
1915 (void) sysevent_attach_attributes(event, list);
1916 }
1917
1918 rval = log_sysevent(event, se_flag, &eid);
1919 if (list) {
1920 sysevent_detach_attributes(event);
1921 }
1922 sysevent_free(event);
1923 if (rval == 0) {
1924 if (eidp) {
1925 eidp->eid_seq = eid.eid_seq;
1926 eidp->eid_ts = eid.eid_ts;
1927 }
1928 return (DDI_SUCCESS);
1929 }
1930 if (rval == SE_NO_TRANSPORT)
1931 return (DDI_ETRANSPORT);
1932
1933 ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1934 return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1935 }
1936
1937 uint64_t
log_sysevent_new_id(void)1938 log_sysevent_new_id(void)
1939 {
1940 return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1941 }
1942