1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2016 Toomas Soome <tsoome@me.com> 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/stropts.h> 30 #include <sys/debug.h> 31 #include <sys/ddi.h> 32 #include <sys/sunddi.h> 33 #include <sys/vmem.h> 34 #include <sys/cmn_err.h> 35 #include <sys/callb.h> 36 #include <sys/sysevent.h> 37 #include <sys/sysevent_impl.h> 38 #include <sys/sysevent/dev.h> 39 #include <sys/modctl.h> 40 #include <sys/lofi_impl.h> 41 #include <sys/sysmacros.h> 42 #include <sys/disp.h> 43 #include <sys/autoconf.h> 44 #include <sys/atomic.h> 45 #include <sys/sdt.h> 46 47 /* for doors */ 48 #include <sys/pathname.h> 49 #include <sys/door.h> 50 #include <sys/kmem.h> 51 #include <sys/cpuvar.h> 52 #include <sys/fs/snode.h> 53 54 /* 55 * log_sysevent.c - Provides the interfaces for kernel event publication 56 * to the sysevent event daemon (syseventd). 57 */ 58 59 /* 60 * Debug stuff 61 */ 62 static int log_event_debug = 0; 63 #define LOG_DEBUG(args) if (log_event_debug) cmn_err args 64 #ifdef DEBUG 65 #define LOG_DEBUG1(args) if (log_event_debug > 1) cmn_err args 66 #else 67 #define LOG_DEBUG1(args) 68 #endif 69 70 /* 71 * Local static vars 72 */ 73 /* queue of event buffers sent to syseventd */ 74 static log_eventq_t *log_eventq_sent = NULL; 75 76 /* 77 * Count of event buffers in the queue 78 */ 79 int log_eventq_cnt = 0; 80 81 /* queue of event buffers awaiting delivery to syseventd */ 82 static log_eventq_t *log_eventq_head = NULL; 83 static log_eventq_t *log_eventq_tail = NULL; 84 static uint64_t kernel_event_id = 0; 85 static int encoding = NV_ENCODE_NATIVE; 86 87 /* log event delivery flag */ 88 #define LOGEVENT_DELIVERY_OK 0 /* OK to deliver event buffers */ 89 #define LOGEVENT_DELIVERY_CONT 1 /* Continue to deliver event buffers */ 90 #define LOGEVENT_DELIVERY_HOLD 2 /* Hold delivering of event buffers */ 91 92 /* 93 * Tunable maximum event buffer queue size. Size depends on how many events 94 * the queue must hold when syseventd is not available, for example during 95 * system startup. Experience showed that more than 2000 events could be posted 96 * due to correctable memory errors. 97 */ 98 int logevent_max_q_sz = 5000; 99 100 101 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD; 102 static char logevent_door_upcall_filename[MAXPATHLEN]; 103 104 static door_handle_t event_door = NULL; /* Door for upcalls */ 105 static kmutex_t event_door_mutex; /* To protect event_door */ 106 107 /* 108 * async thread-related variables 109 * 110 * eventq_head_mutex - synchronizes access to the kernel event queue 111 * 112 * eventq_sent_mutex - synchronizes access to the queue of event sents to 113 * userlevel 114 * 115 * log_event_cv - condition variable signaled when an event has arrived or 116 * userlevel ready to process event buffers 117 * 118 * async_thread - asynchronous event delivery thread to userlevel daemon. 119 * 120 * sysevent_upcall_status - status of the door upcall link 121 */ 122 static kmutex_t eventq_head_mutex; 123 static kmutex_t eventq_sent_mutex; 124 static kcondvar_t log_event_cv; 125 static kthread_id_t async_thread = NULL; 126 127 static kmutex_t event_qfull_mutex; 128 static kcondvar_t event_qfull_cv; 129 static int event_qfull_blocked = 0; 130 131 static int sysevent_upcall_status = -1; 132 static kmutex_t registered_channel_mutex; 133 134 /* 135 * Indicates the syseventd daemon has begun taking events 136 */ 137 int sysevent_daemon_init = 0; 138 139 /* 140 * Back-off delay when door_ki_upcall returns EAGAIN. Typically 141 * caused by the server process doing a forkall(). Since all threads 142 * but the thread actually doing the forkall() need to be quiesced, 143 * the fork may take some time. The min/max pause are in units 144 * of clock ticks. 145 */ 146 #define LOG_EVENT_MIN_PAUSE 8 147 #define LOG_EVENT_MAX_PAUSE 128 148 149 static kmutex_t event_pause_mutex; 150 static kcondvar_t event_pause_cv; 151 static int event_pause_state = 0; 152 153 /* Cached device links for lofi. */ 154 lofi_nvl_t lofi_devlink_cache; 155 156 /*ARGSUSED*/ 157 static void 158 log_event_busy_timeout(void *arg) 159 { 160 mutex_enter(&event_pause_mutex); 161 event_pause_state = 0; 162 cv_signal(&event_pause_cv); 163 mutex_exit(&event_pause_mutex); 164 } 165 166 static void 167 log_event_pause(int nticks) 168 { 169 timeout_id_t id; 170 171 /* 172 * Only one use of log_event_pause at a time 173 */ 174 ASSERT(event_pause_state == 0); 175 176 event_pause_state = 1; 177 id = timeout(log_event_busy_timeout, NULL, nticks); 178 if (id != 0) { 179 mutex_enter(&event_pause_mutex); 180 while (event_pause_state) 181 cv_wait(&event_pause_cv, &event_pause_mutex); 182 mutex_exit(&event_pause_mutex); 183 } 184 event_pause_state = 0; 185 } 186 187 188 /* 189 * log_event_upcall - Perform the upcall to syseventd for event buffer delivery. 190 * Check for rebinding errors 191 * This buffer is reused to by the syseventd door_return 192 * to hold the result code 193 */ 194 static int 195 log_event_upcall(log_event_upcall_arg_t *arg) 196 { 197 int error; 198 size_t size; 199 sysevent_t *ev; 200 door_arg_t darg, save_arg; 201 int retry; 202 int neagain = 0; 203 int neintr = 0; 204 int nticks = LOG_EVENT_MIN_PAUSE; 205 206 /* Initialize door args */ 207 ev = (sysevent_t *)&arg->buf; 208 size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev); 209 210 darg.rbuf = (char *)arg; 211 darg.data_ptr = (char *)arg; 212 darg.rsize = size; 213 darg.data_size = size; 214 darg.desc_ptr = NULL; 215 darg.desc_num = 0; 216 217 LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n", 218 (longlong_t)SE_SEQ((sysevent_t *)&arg->buf))); 219 220 save_arg = darg; 221 for (retry = 0; ; retry++) { 222 223 mutex_enter(&event_door_mutex); 224 if (event_door == NULL) { 225 mutex_exit(&event_door_mutex); 226 227 return (EBADF); 228 } 229 230 if ((error = door_ki_upcall_limited(event_door, &darg, NULL, 231 SIZE_MAX, 0)) == 0) { 232 mutex_exit(&event_door_mutex); 233 break; 234 } 235 236 /* 237 * EBADF is handled outside the switch below because we need to 238 * hold event_door_mutex a bit longer 239 */ 240 if (error == EBADF) { 241 /* Server died */ 242 door_ki_rele(event_door); 243 event_door = NULL; 244 245 mutex_exit(&event_door_mutex); 246 return (error); 247 } 248 249 mutex_exit(&event_door_mutex); 250 251 /* 252 * The EBADF case is already handled above with event_door_mutex 253 * held 254 */ 255 switch (error) { 256 case EINTR: 257 neintr++; 258 log_event_pause(2); 259 darg = save_arg; 260 break; 261 case EAGAIN: 262 /* cannot deliver upcall - process may be forking */ 263 neagain++; 264 log_event_pause(nticks); 265 nticks <<= 1; 266 if (nticks > LOG_EVENT_MAX_PAUSE) 267 nticks = LOG_EVENT_MAX_PAUSE; 268 darg = save_arg; 269 break; 270 default: 271 cmn_err(CE_CONT, 272 "log_event_upcall: door_ki_upcall error %d\n", 273 error); 274 return (error); 275 } 276 } 277 278 if (neagain > 0 || neintr > 0) { 279 LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n", 280 neagain, neintr, nticks)); 281 } 282 283 LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t" 284 "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n", 285 error, (void *)arg, (void *)darg.rbuf, 286 (void *)darg.data_ptr, 287 *((int *)(darg.rbuf)), *((int *)(darg.data_ptr)))); 288 289 if (!error) { 290 /* 291 * upcall was successfully executed. Check return code. 292 */ 293 error = *((int *)(darg.rbuf)); 294 } 295 296 return (error); 297 } 298 299 /* 300 * log_event_deliver - event delivery thread 301 * Deliver all events on the event queue to syseventd. 302 * If the daemon can not process events, stop event 303 * delivery and wait for an indication from the 304 * daemon to resume delivery. 305 * 306 * Once all event buffers have been delivered, wait 307 * until there are more to deliver. 308 */ 309 static void 310 log_event_deliver() 311 { 312 log_eventq_t *q; 313 int upcall_err; 314 callb_cpr_t cprinfo; 315 316 CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr, 317 "logevent"); 318 319 /* 320 * eventq_head_mutex is exited (released) when there are no more 321 * events to process from the eventq in cv_wait(). 322 */ 323 mutex_enter(&eventq_head_mutex); 324 325 for (;;) { 326 LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n", 327 (void *)log_eventq_head)); 328 329 upcall_err = 0; 330 q = log_eventq_head; 331 332 while (q) { 333 if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) { 334 upcall_err = EAGAIN; 335 break; 336 } 337 338 log_event_delivery = LOGEVENT_DELIVERY_OK; 339 340 /* 341 * Release event queue lock during upcall to 342 * syseventd 343 */ 344 mutex_exit(&eventq_head_mutex); 345 if ((upcall_err = log_event_upcall(&q->arg)) != 0) { 346 mutex_enter(&eventq_head_mutex); 347 break; 348 } 349 350 /* 351 * We may be able to add entries to 352 * the queue now. 353 */ 354 if (event_qfull_blocked > 0 && 355 log_eventq_cnt < logevent_max_q_sz) { 356 mutex_enter(&event_qfull_mutex); 357 if (event_qfull_blocked > 0) { 358 cv_signal(&event_qfull_cv); 359 } 360 mutex_exit(&event_qfull_mutex); 361 } 362 363 mutex_enter(&eventq_head_mutex); 364 365 /* 366 * Daemon restart can cause entries to be moved from 367 * the sent queue and put back on the event queue. 368 * If this has occurred, replay event queue 369 * processing from the new queue head. 370 */ 371 if (q != log_eventq_head) { 372 q = log_eventq_head; 373 LOG_DEBUG((CE_CONT, "log_event_deliver: " 374 "door upcall/daemon restart race\n")); 375 } else { 376 log_eventq_t *next; 377 378 /* 379 * Move the event to the sent queue when a 380 * successful delivery has been made. 381 */ 382 mutex_enter(&eventq_sent_mutex); 383 next = q->next; 384 q->next = log_eventq_sent; 385 log_eventq_sent = q; 386 q = next; 387 log_eventq_head = q; 388 log_eventq_cnt--; 389 if (q == NULL) { 390 ASSERT(log_eventq_cnt == 0); 391 log_eventq_tail = NULL; 392 } 393 mutex_exit(&eventq_sent_mutex); 394 } 395 } 396 397 switch (upcall_err) { 398 case 0: 399 /* 400 * Success. The queue is empty. 401 */ 402 sysevent_upcall_status = 0; 403 break; 404 case EAGAIN: 405 /* 406 * Delivery is on hold (but functional). 407 */ 408 sysevent_upcall_status = 0; 409 /* 410 * If the user has already signaled for delivery 411 * resumption, continue. Otherwise, we wait until 412 * we are signaled to continue. 413 */ 414 if (log_event_delivery == LOGEVENT_DELIVERY_CONT) 415 continue; 416 log_event_delivery = LOGEVENT_DELIVERY_HOLD; 417 418 LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n")); 419 break; 420 default: 421 LOG_DEBUG((CE_CONT, "log_event_deliver: " 422 "upcall err %d\n", upcall_err)); 423 sysevent_upcall_status = upcall_err; 424 /* 425 * Signal everyone waiting that transport is down 426 */ 427 if (event_qfull_blocked > 0) { 428 mutex_enter(&event_qfull_mutex); 429 if (event_qfull_blocked > 0) { 430 cv_broadcast(&event_qfull_cv); 431 } 432 mutex_exit(&event_qfull_mutex); 433 } 434 break; 435 } 436 437 CALLB_CPR_SAFE_BEGIN(&cprinfo); 438 cv_wait(&log_event_cv, &eventq_head_mutex); 439 CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex); 440 } 441 /* NOTREACHED */ 442 } 443 444 /* 445 * Set up the nvlist based data cache. User by lofi to find 446 * device name for mapped file. 447 */ 448 static void 449 lofi_nvl_init(lofi_nvl_t *cache) 450 { 451 mutex_init(&cache->ln_lock, NULL, MUTEX_DRIVER, NULL); 452 cv_init(&cache->ln_cv, NULL, CV_DRIVER, NULL); 453 (void) nvlist_alloc(&cache->ln_data, NV_UNIQUE_NAME, KM_SLEEP); 454 } 455 456 /* 457 * log_event_init - Allocate and initialize log_event data structures. 458 */ 459 void 460 log_event_init() 461 { 462 /* Set up devlink cache for lofi. */ 463 lofi_nvl_init(&lofi_devlink_cache); 464 465 mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL); 466 467 mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL); 468 mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL); 469 cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL); 470 471 mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL); 472 cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL); 473 474 mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL); 475 cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL); 476 477 mutex_init(®istered_channel_mutex, NULL, MUTEX_DEFAULT, NULL); 478 sysevent_evc_init(); 479 } 480 481 /* 482 * The following routines are used by kernel event publishers to 483 * allocate, append and free event buffers 484 */ 485 /* 486 * sysevent_alloc - Allocate new eventq struct. This element contains 487 * an event buffer that will be used in a subsequent 488 * call to log_sysevent. 489 */ 490 sysevent_t * 491 sysevent_alloc(char *class, char *subclass, char *pub, int flag) 492 { 493 int payload_sz; 494 int class_sz, subclass_sz, pub_sz; 495 int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz; 496 sysevent_t *ev; 497 log_eventq_t *q; 498 499 ASSERT(class != NULL); 500 ASSERT(subclass != NULL); 501 ASSERT(pub != NULL); 502 503 /* 504 * Calculate and reserve space for the class, subclass and 505 * publisher strings in the event buffer 506 */ 507 class_sz = strlen(class) + 1; 508 subclass_sz = strlen(subclass) + 1; 509 pub_sz = strlen(pub) + 1; 510 511 ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz 512 <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN)); 513 514 /* String sizes must be 64-bit aligned in the event buffer */ 515 aligned_class_sz = SE_ALIGN(class_sz); 516 aligned_subclass_sz = SE_ALIGN(subclass_sz); 517 aligned_pub_sz = SE_ALIGN(pub_sz); 518 519 payload_sz = (aligned_class_sz - sizeof (uint64_t)) + 520 (aligned_subclass_sz - sizeof (uint64_t)) + 521 (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t); 522 523 /* 524 * Allocate event buffer plus additional sysevent queue 525 * and payload overhead. 526 */ 527 q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag); 528 if (q == NULL) { 529 return (NULL); 530 } 531 532 /* Initialize the event buffer data */ 533 ev = (sysevent_t *)&q->arg.buf; 534 SE_VERSION(ev) = SYS_EVENT_VERSION; 535 bcopy(class, SE_CLASS_NAME(ev), class_sz); 536 537 SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name)) 538 + aligned_class_sz; 539 bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz); 540 541 SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz; 542 bcopy(pub, SE_PUB_NAME(ev), pub_sz); 543 544 SE_ATTR_PTR(ev) = UINT64_C(0); 545 SE_PAYLOAD_SZ(ev) = payload_sz; 546 547 return (ev); 548 } 549 550 /* 551 * sysevent_free - Free event buffer and any attribute data. 552 */ 553 void 554 sysevent_free(sysevent_t *ev) 555 { 556 log_eventq_t *q; 557 nvlist_t *nvl; 558 559 ASSERT(ev != NULL); 560 q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf)); 561 nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev); 562 563 if (nvl != NULL) { 564 size_t size = 0; 565 (void) nvlist_size(nvl, &size, encoding); 566 SE_PAYLOAD_SZ(ev) -= size; 567 nvlist_free(nvl); 568 } 569 kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev)); 570 } 571 572 /* 573 * free_packed_event - Free packed event buffer 574 */ 575 static void 576 free_packed_event(sysevent_t *ev) 577 { 578 log_eventq_t *q; 579 580 ASSERT(ev != NULL); 581 q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf)); 582 583 kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev)); 584 } 585 586 /* 587 * sysevent_add_attr - Add new attribute element to an event attribute list 588 * If attribute list is NULL, start a new list. 589 */ 590 int 591 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name, 592 sysevent_value_t *se_value, int flag) 593 { 594 int error; 595 nvlist_t **nvlp = (nvlist_t **)ev_attr_list; 596 597 if (nvlp == NULL || se_value == NULL) { 598 return (SE_EINVAL); 599 } 600 601 /* 602 * attr_sz is composed of the value data size + the name data size + 603 * any header data. 64-bit aligned. 604 */ 605 if (strlen(name) >= MAX_ATTR_NAME) { 606 return (SE_EINVAL); 607 } 608 609 /* 610 * Allocate nvlist 611 */ 612 if ((*nvlp == NULL) && 613 (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0)) 614 return (SE_ENOMEM); 615 616 /* add the attribute */ 617 switch (se_value->value_type) { 618 case SE_DATA_TYPE_BYTE: 619 error = nvlist_add_byte(*ev_attr_list, name, 620 se_value->value.sv_byte); 621 break; 622 case SE_DATA_TYPE_INT16: 623 error = nvlist_add_int16(*ev_attr_list, name, 624 se_value->value.sv_int16); 625 break; 626 case SE_DATA_TYPE_UINT16: 627 error = nvlist_add_uint16(*ev_attr_list, name, 628 se_value->value.sv_uint16); 629 break; 630 case SE_DATA_TYPE_INT32: 631 error = nvlist_add_int32(*ev_attr_list, name, 632 se_value->value.sv_int32); 633 break; 634 case SE_DATA_TYPE_UINT32: 635 error = nvlist_add_uint32(*ev_attr_list, name, 636 se_value->value.sv_uint32); 637 break; 638 case SE_DATA_TYPE_INT64: 639 error = nvlist_add_int64(*ev_attr_list, name, 640 se_value->value.sv_int64); 641 break; 642 case SE_DATA_TYPE_UINT64: 643 error = nvlist_add_uint64(*ev_attr_list, name, 644 se_value->value.sv_uint64); 645 break; 646 case SE_DATA_TYPE_STRING: 647 if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ) 648 return (SE_EINVAL); 649 error = nvlist_add_string(*ev_attr_list, name, 650 se_value->value.sv_string); 651 break; 652 case SE_DATA_TYPE_BYTES: 653 if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY) 654 return (SE_EINVAL); 655 error = nvlist_add_byte_array(*ev_attr_list, name, 656 se_value->value.sv_bytes.data, 657 se_value->value.sv_bytes.size); 658 break; 659 case SE_DATA_TYPE_TIME: 660 error = nvlist_add_hrtime(*ev_attr_list, name, 661 se_value->value.sv_time); 662 break; 663 default: 664 return (SE_EINVAL); 665 } 666 667 return (error ? SE_ENOMEM : 0); 668 } 669 670 /* 671 * sysevent_free_attr - Free an attribute list not associated with an 672 * event buffer. 673 */ 674 void 675 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list) 676 { 677 nvlist_free((nvlist_t *)ev_attr_list); 678 } 679 680 /* 681 * sysevent_attach_attributes - Attach an attribute list to an event buffer. 682 * 683 * This data will be re-packed into contiguous memory when the event 684 * buffer is posted to log_sysevent. 685 */ 686 int 687 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list) 688 { 689 size_t size = 0; 690 691 if (SE_ATTR_PTR(ev) != UINT64_C(0)) { 692 return (SE_EINVAL); 693 } 694 695 SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list; 696 (void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding); 697 SE_PAYLOAD_SZ(ev) += size; 698 SE_FLAG(ev) = 0; 699 700 return (0); 701 } 702 703 /* 704 * sysevent_detach_attributes - Detach but don't free attribute list from the 705 * event buffer. 706 */ 707 void 708 sysevent_detach_attributes(sysevent_t *ev) 709 { 710 size_t size = 0; 711 nvlist_t *nvl; 712 713 if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) { 714 return; 715 } 716 717 SE_ATTR_PTR(ev) = UINT64_C(0); 718 (void) nvlist_size(nvl, &size, encoding); 719 SE_PAYLOAD_SZ(ev) -= size; 720 ASSERT(SE_PAYLOAD_SZ(ev) >= 0); 721 } 722 723 /* 724 * sysevent_attr_name - Get name of attribute 725 */ 726 char * 727 sysevent_attr_name(sysevent_attr_t *attr) 728 { 729 if (attr == NULL) { 730 return (NULL); 731 } 732 733 return (nvpair_name(attr)); 734 } 735 736 /* 737 * sysevent_attr_type - Get type of attribute 738 */ 739 int 740 sysevent_attr_type(sysevent_attr_t *attr) 741 { 742 /* 743 * The SE_DATA_TYPE_* are typedef'ed to be the 744 * same value as DATA_TYPE_* 745 */ 746 return (nvpair_type((nvpair_t *)attr)); 747 } 748 749 /* 750 * Repack event buffer into contiguous memory 751 */ 752 static sysevent_t * 753 se_repack(sysevent_t *ev, int flag) 754 { 755 size_t copy_len; 756 caddr_t attr; 757 size_t size; 758 uint64_t attr_offset; 759 sysevent_t *copy; 760 log_eventq_t *qcopy; 761 sysevent_attr_list_t *nvl; 762 763 copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev); 764 qcopy = kmem_zalloc(copy_len, flag); 765 if (qcopy == NULL) { 766 return (NULL); 767 } 768 copy = (sysevent_t *)&qcopy->arg.buf; 769 770 /* 771 * Copy event header, class, subclass and publisher names 772 * Set the attribute offset (in number of bytes) to contiguous 773 * memory after the header. 774 */ 775 776 attr_offset = SE_ATTR_OFF(ev); 777 778 ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len); 779 780 bcopy(ev, copy, attr_offset); 781 782 /* Check if attribute list exists */ 783 if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) { 784 return (copy); 785 } 786 787 /* 788 * Copy attribute data to contiguous memory 789 */ 790 attr = (char *)copy + attr_offset; 791 (void) nvlist_size(nvl, &size, encoding); 792 if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) { 793 kmem_free(qcopy, copy_len); 794 return (NULL); 795 } 796 SE_ATTR_PTR(copy) = UINT64_C(0); 797 SE_FLAG(copy) = SE_PACKED_BUF; 798 799 return (copy); 800 } 801 802 /* 803 * The sysevent registration provides a persistent and reliable database 804 * for channel information for sysevent channel publishers and 805 * subscribers. 806 * 807 * A channel is created and maintained by the kernel upon the first 808 * SE_OPEN_REGISTRATION operation to log_sysevent_register(). Channel 809 * event subscription information is updated as publishers or subscribers 810 * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER, 811 * SE_UNREGISTER and SE_UNBIND_REGISTRATION). 812 * 813 * For consistency, id's are assigned for every publisher or subscriber 814 * bound to a particular channel. The id's are used to constrain resources 815 * and perform subscription lookup. 816 * 817 * Associated with each channel is a hashed list of the current subscriptions 818 * based upon event class and subclasses. A subscription contains a class name, 819 * list of possible subclasses and an array of subscriber ids. Subscriptions 820 * are updated for every SE_REGISTER or SE_UNREGISTER operation. 821 * 822 * Channels are closed once the last subscriber or publisher performs a 823 * SE_CLOSE_REGISTRATION operation. All resources associated with the named 824 * channel are freed upon last close. 825 * 826 * Locking: 827 * Every operation to log_sysevent() is protected by a single lock, 828 * registered_channel_mutex. It is expected that the granularity of 829 * a single lock is sufficient given the frequency that updates will 830 * occur. 831 * 832 * If this locking strategy proves to be too contentious, a per-hash 833 * or per-channel locking strategy may be implemented. 834 */ 835 836 837 #define CHANN_HASH(channel_name) (hash_func(channel_name) \ 838 % CHAN_HASH_SZ) 839 840 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ]; 841 static int channel_cnt; 842 static void remove_all_class(sysevent_channel_descriptor_t *chan, 843 uint32_t sub_id); 844 845 static uint32_t 846 hash_func(const char *s) 847 { 848 uint32_t result = 0; 849 uint_t g; 850 851 while (*s != '\0') { 852 result <<= 4; 853 result += (uint32_t)*s++; 854 g = result & 0xf0000000; 855 if (g != 0) { 856 result ^= g >> 24; 857 result ^= g; 858 } 859 } 860 861 return (result); 862 } 863 864 static sysevent_channel_descriptor_t * 865 get_channel(char *channel_name) 866 { 867 int hash_index; 868 sysevent_channel_descriptor_t *chan_list; 869 870 if (channel_name == NULL) 871 return (NULL); 872 873 /* Find channel descriptor */ 874 hash_index = CHANN_HASH(channel_name); 875 chan_list = registered_channels[hash_index]; 876 while (chan_list != NULL) { 877 if (strcmp(chan_list->scd_channel_name, channel_name) == 0) { 878 break; 879 } else { 880 chan_list = chan_list->scd_next; 881 } 882 } 883 884 return (chan_list); 885 } 886 887 static class_lst_t * 888 create_channel_registration(sysevent_channel_descriptor_t *chan, 889 char *event_class, int index) 890 { 891 size_t class_len; 892 class_lst_t *c_list; 893 894 class_len = strlen(event_class) + 1; 895 c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP); 896 c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP); 897 bcopy(event_class, c_list->cl_name, class_len); 898 899 c_list->cl_subclass_list = 900 kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP); 901 c_list->cl_subclass_list->sl_name = 902 kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP); 903 bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name, 904 sizeof (EC_SUB_ALL)); 905 906 c_list->cl_next = chan->scd_class_list_tbl[index]; 907 chan->scd_class_list_tbl[index] = c_list; 908 909 return (c_list); 910 } 911 912 static void 913 free_channel_registration(sysevent_channel_descriptor_t *chan) 914 { 915 int i; 916 class_lst_t *clist, *next_clist; 917 subclass_lst_t *sclist, *next_sc; 918 919 for (i = 0; i <= CLASS_HASH_SZ; ++i) { 920 921 clist = chan->scd_class_list_tbl[i]; 922 while (clist != NULL) { 923 sclist = clist->cl_subclass_list; 924 while (sclist != NULL) { 925 kmem_free(sclist->sl_name, 926 strlen(sclist->sl_name) + 1); 927 next_sc = sclist->sl_next; 928 kmem_free(sclist, sizeof (subclass_lst_t)); 929 sclist = next_sc; 930 } 931 kmem_free(clist->cl_name, 932 strlen(clist->cl_name) + 1); 933 next_clist = clist->cl_next; 934 kmem_free(clist, sizeof (class_lst_t)); 935 clist = next_clist; 936 } 937 } 938 chan->scd_class_list_tbl[0] = NULL; 939 } 940 941 static int 942 open_channel(char *channel_name) 943 { 944 int hash_index; 945 sysevent_channel_descriptor_t *chan, *chan_list; 946 947 948 if (channel_cnt > MAX_CHAN) { 949 return (-1); 950 } 951 952 /* Find channel descriptor */ 953 hash_index = CHANN_HASH(channel_name); 954 chan_list = registered_channels[hash_index]; 955 while (chan_list != NULL) { 956 if (strcmp(chan_list->scd_channel_name, channel_name) == 0) { 957 chan_list->scd_ref_cnt++; 958 kmem_free(channel_name, strlen(channel_name) + 1); 959 return (0); 960 } else { 961 chan_list = chan_list->scd_next; 962 } 963 } 964 965 966 /* New channel descriptor */ 967 chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP); 968 chan->scd_channel_name = channel_name; 969 970 /* 971 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS). 972 * Subscriber id 0 is never allocated, but is used as a reserved id 973 * by libsysevent 974 */ 975 if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1, 976 MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0, 977 VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) { 978 kmem_free(chan, sizeof (sysevent_channel_descriptor_t)); 979 return (-1); 980 } 981 if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1, 982 MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0, 983 VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) { 984 vmem_destroy(chan->scd_subscriber_cache); 985 kmem_free(chan, sizeof (sysevent_channel_descriptor_t)); 986 return (-1); 987 } 988 989 chan->scd_ref_cnt = 1; 990 991 (void) create_channel_registration(chan, EC_ALL, 0); 992 993 if (registered_channels[hash_index] != NULL) 994 chan->scd_next = registered_channels[hash_index]; 995 996 registered_channels[hash_index] = chan; 997 998 ++channel_cnt; 999 1000 return (0); 1001 } 1002 1003 static void 1004 close_channel(char *channel_name) 1005 { 1006 int hash_index; 1007 sysevent_channel_descriptor_t *chan, *prev_chan; 1008 1009 /* Find channel descriptor */ 1010 hash_index = CHANN_HASH(channel_name); 1011 prev_chan = chan = registered_channels[hash_index]; 1012 1013 while (chan != NULL) { 1014 if (strcmp(chan->scd_channel_name, channel_name) == 0) { 1015 break; 1016 } else { 1017 prev_chan = chan; 1018 chan = chan->scd_next; 1019 } 1020 } 1021 1022 if (chan == NULL) 1023 return; 1024 1025 chan->scd_ref_cnt--; 1026 if (chan->scd_ref_cnt > 0) 1027 return; 1028 1029 free_channel_registration(chan); 1030 vmem_destroy(chan->scd_subscriber_cache); 1031 vmem_destroy(chan->scd_publisher_cache); 1032 kmem_free(chan->scd_channel_name, 1033 strlen(chan->scd_channel_name) + 1); 1034 if (registered_channels[hash_index] == chan) 1035 registered_channels[hash_index] = chan->scd_next; 1036 else 1037 prev_chan->scd_next = chan->scd_next; 1038 kmem_free(chan, sizeof (sysevent_channel_descriptor_t)); 1039 --channel_cnt; 1040 } 1041 1042 static id_t 1043 bind_common(sysevent_channel_descriptor_t *chan, int type) 1044 { 1045 id_t id; 1046 1047 if (type == SUBSCRIBER) { 1048 id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1, 1049 VM_NOSLEEP | VM_NEXTFIT); 1050 if (id <= 0 || id > MAX_SUBSCRIBERS) 1051 return (0); 1052 chan->scd_subscriber_ids[id] = 1; 1053 } else { 1054 id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1, 1055 VM_NOSLEEP | VM_NEXTFIT); 1056 if (id <= 0 || id > MAX_PUBLISHERS) 1057 return (0); 1058 chan->scd_publisher_ids[id] = 1; 1059 } 1060 1061 return (id); 1062 } 1063 1064 static int 1065 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id) 1066 { 1067 if (type == SUBSCRIBER) { 1068 if (id <= 0 || id > MAX_SUBSCRIBERS) 1069 return (0); 1070 if (chan->scd_subscriber_ids[id] == 0) 1071 return (0); 1072 (void) remove_all_class(chan, id); 1073 chan->scd_subscriber_ids[id] = 0; 1074 vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1); 1075 } else { 1076 if (id <= 0 || id > MAX_PUBLISHERS) 1077 return (0); 1078 if (chan->scd_publisher_ids[id] == 0) 1079 return (0); 1080 chan->scd_publisher_ids[id] = 0; 1081 vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1); 1082 } 1083 1084 return (1); 1085 } 1086 1087 static void 1088 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id) 1089 { 1090 if (unbind_common(chan, type, id)) 1091 close_channel(chan->scd_channel_name); 1092 } 1093 1094 static subclass_lst_t * 1095 find_subclass(class_lst_t *c_list, char *subclass) 1096 { 1097 subclass_lst_t *sc_list; 1098 1099 if (c_list == NULL) 1100 return (NULL); 1101 1102 sc_list = c_list->cl_subclass_list; 1103 1104 while (sc_list != NULL) { 1105 if (strcmp(sc_list->sl_name, subclass) == 0) { 1106 return (sc_list); 1107 } 1108 sc_list = sc_list->sl_next; 1109 } 1110 1111 return (NULL); 1112 } 1113 1114 static void 1115 insert_subclass(class_lst_t *c_list, char **subclass_names, 1116 int subclass_num, uint32_t sub_id) 1117 { 1118 int i, subclass_sz; 1119 subclass_lst_t *sc_list; 1120 1121 for (i = 0; i < subclass_num; ++i) { 1122 if ((sc_list = find_subclass(c_list, subclass_names[i])) 1123 != NULL) { 1124 sc_list->sl_num[sub_id] = 1; 1125 } else { 1126 1127 sc_list = kmem_zalloc(sizeof (subclass_lst_t), 1128 KM_SLEEP); 1129 subclass_sz = strlen(subclass_names[i]) + 1; 1130 sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP); 1131 bcopy(subclass_names[i], sc_list->sl_name, 1132 subclass_sz); 1133 1134 sc_list->sl_num[sub_id] = 1; 1135 1136 sc_list->sl_next = c_list->cl_subclass_list; 1137 c_list->cl_subclass_list = sc_list; 1138 } 1139 } 1140 } 1141 1142 static class_lst_t * 1143 find_class(sysevent_channel_descriptor_t *chan, char *class_name) 1144 { 1145 class_lst_t *c_list; 1146 1147 c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)]; 1148 while (c_list != NULL) { 1149 if (strcmp(class_name, c_list->cl_name) == 0) 1150 break; 1151 c_list = c_list->cl_next; 1152 } 1153 1154 return (c_list); 1155 } 1156 1157 static void 1158 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id) 1159 { 1160 int i; 1161 class_lst_t *c_list; 1162 subclass_lst_t *sc_list; 1163 1164 for (i = 0; i <= CLASS_HASH_SZ; ++i) { 1165 1166 c_list = chan->scd_class_list_tbl[i]; 1167 while (c_list != NULL) { 1168 sc_list = c_list->cl_subclass_list; 1169 while (sc_list != NULL) { 1170 sc_list->sl_num[sub_id] = 0; 1171 sc_list = sc_list->sl_next; 1172 } 1173 c_list = c_list->cl_next; 1174 } 1175 } 1176 } 1177 1178 static void 1179 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id, 1180 char *class_name) 1181 { 1182 class_lst_t *c_list; 1183 subclass_lst_t *sc_list; 1184 1185 if (strcmp(class_name, EC_ALL) == 0) { 1186 remove_all_class(chan, sub_id); 1187 return; 1188 } 1189 1190 if ((c_list = find_class(chan, class_name)) == NULL) { 1191 return; 1192 } 1193 1194 sc_list = c_list->cl_subclass_list; 1195 while (sc_list != NULL) { 1196 sc_list->sl_num[sub_id] = 0; 1197 sc_list = sc_list->sl_next; 1198 } 1199 } 1200 1201 static int 1202 insert_class(sysevent_channel_descriptor_t *chan, char *event_class, 1203 char **event_subclass_lst, int subclass_num, uint32_t sub_id) 1204 { 1205 class_lst_t *c_list; 1206 1207 if (strcmp(event_class, EC_ALL) == 0) { 1208 insert_subclass(chan->scd_class_list_tbl[0], 1209 event_subclass_lst, 1, sub_id); 1210 return (0); 1211 } 1212 1213 if (strlen(event_class) + 1 > MAX_CLASS_LEN) 1214 return (-1); 1215 1216 /* New class, add to the registration cache */ 1217 if ((c_list = find_class(chan, event_class)) == NULL) { 1218 c_list = create_channel_registration(chan, event_class, 1219 CLASS_HASH(event_class)); 1220 } 1221 1222 /* Update the subclass list */ 1223 insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id); 1224 1225 return (0); 1226 } 1227 1228 static int 1229 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id, 1230 char *nvlbuf, size_t nvlsize) 1231 { 1232 uint_t num_elem; 1233 char *event_class; 1234 char **event_list; 1235 nvlist_t *nvl; 1236 nvpair_t *nvpair = NULL; 1237 1238 if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0) 1239 return (-1); 1240 1241 if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) { 1242 nvlist_free(nvl); 1243 return (-1); 1244 } 1245 1246 if ((event_class = nvpair_name(nvpair)) == NULL) { 1247 nvlist_free(nvl); 1248 return (-1); 1249 } 1250 if (nvpair_value_string_array(nvpair, &event_list, 1251 &num_elem) != 0) { 1252 nvlist_free(nvl); 1253 return (-1); 1254 } 1255 1256 if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) { 1257 nvlist_free(nvl); 1258 return (-1); 1259 } 1260 1261 nvlist_free(nvl); 1262 1263 return (0); 1264 } 1265 1266 /* 1267 * get_registration - Return the requested class hash chain 1268 */ 1269 static int 1270 get_registration(sysevent_channel_descriptor_t *chan, char *databuf, 1271 uint32_t *bufsz, uint32_t class_index) 1272 { 1273 int num_classes = 0; 1274 char *nvlbuf = NULL; 1275 size_t nvlsize; 1276 nvlist_t *nvl; 1277 class_lst_t *clist; 1278 subclass_lst_t *sc_list; 1279 1280 if (class_index < 0 || class_index > CLASS_HASH_SZ) 1281 return (EINVAL); 1282 1283 if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) { 1284 return (ENOENT); 1285 } 1286 1287 if (nvlist_alloc(&nvl, 0, 0) != 0) { 1288 return (EFAULT); 1289 } 1290 1291 while (clist != NULL) { 1292 if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name) 1293 != 0) { 1294 nvlist_free(nvl); 1295 return (EFAULT); 1296 } 1297 1298 sc_list = clist->cl_subclass_list; 1299 while (sc_list != NULL) { 1300 if (nvlist_add_byte_array(nvl, sc_list->sl_name, 1301 sc_list->sl_num, MAX_SUBSCRIBERS) != 0) { 1302 nvlist_free(nvl); 1303 return (EFAULT); 1304 } 1305 sc_list = sc_list->sl_next; 1306 } 1307 num_classes++; 1308 clist = clist->cl_next; 1309 } 1310 1311 if (num_classes == 0) { 1312 nvlist_free(nvl); 1313 return (ENOENT); 1314 } 1315 1316 if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE, 1317 KM_SLEEP) 1318 != 0) { 1319 nvlist_free(nvl); 1320 return (EFAULT); 1321 } 1322 1323 nvlist_free(nvl); 1324 1325 if (nvlsize > *bufsz) { 1326 kmem_free(nvlbuf, nvlsize); 1327 *bufsz = nvlsize; 1328 return (EAGAIN); 1329 } 1330 1331 bcopy(nvlbuf, databuf, nvlsize); 1332 kmem_free(nvlbuf, nvlsize); 1333 1334 return (0); 1335 } 1336 1337 /* 1338 * log_sysevent_register - Register event subscriber for a particular 1339 * event channel. 1340 */ 1341 int 1342 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata) 1343 { 1344 int error = 0; 1345 char *kchannel, *databuf = NULL; 1346 size_t bufsz; 1347 se_pubsub_t kdata; 1348 sysevent_channel_descriptor_t *chan = NULL; 1349 1350 if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) { 1351 return (EFAULT); 1352 } 1353 if (kdata.ps_channel_name_len == 0) { 1354 return (EINVAL); 1355 } 1356 kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP); 1357 if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) { 1358 kmem_free(kchannel, kdata.ps_channel_name_len); 1359 return (EFAULT); 1360 } 1361 bufsz = kdata.ps_buflen; 1362 if (bufsz > 0) { 1363 databuf = kmem_alloc(bufsz, KM_SLEEP); 1364 if (copyin(udatabuf, databuf, bufsz) == -1) { 1365 kmem_free(kchannel, kdata.ps_channel_name_len); 1366 kmem_free(databuf, bufsz); 1367 return (EFAULT); 1368 } 1369 } 1370 1371 mutex_enter(®istered_channel_mutex); 1372 if (kdata.ps_op != SE_OPEN_REGISTRATION && 1373 kdata.ps_op != SE_CLOSE_REGISTRATION) { 1374 chan = get_channel(kchannel); 1375 if (chan == NULL) { 1376 mutex_exit(®istered_channel_mutex); 1377 kmem_free(kchannel, kdata.ps_channel_name_len); 1378 if (bufsz > 0) 1379 kmem_free(databuf, bufsz); 1380 return (ENOENT); 1381 } 1382 } 1383 1384 switch (kdata.ps_op) { 1385 case SE_OPEN_REGISTRATION: 1386 if (open_channel(kchannel) != 0) { 1387 error = ENOMEM; 1388 if (bufsz > 0) 1389 kmem_free(databuf, bufsz); 1390 kmem_free(kchannel, kdata.ps_channel_name_len); 1391 } 1392 1393 mutex_exit(®istered_channel_mutex); 1394 return (error); 1395 case SE_CLOSE_REGISTRATION: 1396 close_channel(kchannel); 1397 break; 1398 case SE_BIND_REGISTRATION: 1399 if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0) 1400 error = EBUSY; 1401 break; 1402 case SE_UNBIND_REGISTRATION: 1403 (void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id); 1404 break; 1405 case SE_REGISTER: 1406 if (bufsz == 0) { 1407 error = EINVAL; 1408 break; 1409 } 1410 if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1) 1411 error = EINVAL; 1412 break; 1413 case SE_UNREGISTER: 1414 if (bufsz == 0) { 1415 error = EINVAL; 1416 break; 1417 } 1418 remove_class(chan, kdata.ps_id, databuf); 1419 break; 1420 case SE_CLEANUP: 1421 /* Cleanup the indicated subscriber or publisher */ 1422 release_id(chan, kdata.ps_type, kdata.ps_id); 1423 break; 1424 case SE_GET_REGISTRATION: 1425 error = get_registration(chan, databuf, 1426 &kdata.ps_buflen, kdata.ps_id); 1427 break; 1428 default: 1429 error = ENOTSUP; 1430 } 1431 1432 mutex_exit(®istered_channel_mutex); 1433 1434 kmem_free(kchannel, kdata.ps_channel_name_len); 1435 1436 if (bufsz > 0) { 1437 if (copyout(databuf, udatabuf, bufsz) == -1) 1438 error = EFAULT; 1439 kmem_free(databuf, bufsz); 1440 } 1441 1442 if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1) 1443 return (EFAULT); 1444 1445 return (error); 1446 } 1447 1448 /* 1449 * log_sysevent_copyout_data - Copyout event data to userland. 1450 * This is called from modctl(MODEVENTS, MODEVENTS_GETDATA) 1451 * The buffer size is always sufficient. 1452 */ 1453 int 1454 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf) 1455 { 1456 int error = ENOENT; 1457 log_eventq_t *q; 1458 sysevent_t *ev; 1459 sysevent_id_t eid_copy; 1460 1461 /* 1462 * Copy eid 1463 */ 1464 if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) { 1465 return (EFAULT); 1466 } 1467 1468 mutex_enter(&eventq_sent_mutex); 1469 q = log_eventq_sent; 1470 1471 /* 1472 * Search for event buffer on the sent queue with matching 1473 * event identifier 1474 */ 1475 while (q) { 1476 ev = (sysevent_t *)&q->arg.buf; 1477 1478 if (SE_TIME(ev) != eid_copy.eid_ts || 1479 SE_SEQ(ev) != eid_copy.eid_seq) { 1480 q = q->next; 1481 continue; 1482 } 1483 1484 if (ubuflen < SE_SIZE(ev)) { 1485 error = EFAULT; 1486 break; 1487 } 1488 if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) { 1489 error = EFAULT; 1490 LOG_DEBUG((CE_NOTE, "Unable to retrieve system event " 1491 "0x%" PRIx64 " from queue: EFAULT\n", 1492 eid->eid_seq)); 1493 } else { 1494 error = 0; 1495 } 1496 break; 1497 } 1498 1499 mutex_exit(&eventq_sent_mutex); 1500 1501 return (error); 1502 } 1503 1504 /* 1505 * log_sysevent_free_data - Free kernel copy of the event buffer identified 1506 * by eid (must have already been sent). Called from 1507 * modctl(MODEVENTS, MODEVENTS_FREEDATA). 1508 */ 1509 int 1510 log_sysevent_free_data(sysevent_id_t *eid) 1511 { 1512 int error = ENOENT; 1513 sysevent_t *ev; 1514 log_eventq_t *q, *prev = NULL; 1515 sysevent_id_t eid_copy; 1516 1517 /* 1518 * Copy eid 1519 */ 1520 if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) { 1521 return (EFAULT); 1522 } 1523 1524 mutex_enter(&eventq_sent_mutex); 1525 q = log_eventq_sent; 1526 1527 /* 1528 * Look for the event to be freed on the sent queue. Due to delayed 1529 * processing of the event, it may not be on the sent queue yet. 1530 * It is up to the user to retry the free operation to ensure that the 1531 * event is properly freed. 1532 */ 1533 while (q) { 1534 ev = (sysevent_t *)&q->arg.buf; 1535 1536 if (SE_TIME(ev) != eid_copy.eid_ts || 1537 SE_SEQ(ev) != eid_copy.eid_seq) { 1538 prev = q; 1539 q = q->next; 1540 continue; 1541 } 1542 /* 1543 * Take it out of log_eventq_sent and free it 1544 */ 1545 if (prev) { 1546 prev->next = q->next; 1547 } else { 1548 log_eventq_sent = q->next; 1549 } 1550 free_packed_event(ev); 1551 error = 0; 1552 break; 1553 } 1554 1555 mutex_exit(&eventq_sent_mutex); 1556 1557 return (error); 1558 } 1559 1560 /* 1561 * log_sysevent_flushq - Begin or resume event buffer delivery. If neccessary, 1562 * create log_event_deliver thread or wake it up 1563 */ 1564 /*ARGSUSED*/ 1565 void 1566 log_sysevent_flushq(int cmd, uint_t flag) 1567 { 1568 mutex_enter(&eventq_head_mutex); 1569 1570 /* 1571 * Start the event delivery thread 1572 * Mark the upcall status as active since we should 1573 * now be able to begin emptying the queue normally. 1574 */ 1575 if (!async_thread) { 1576 sysevent_upcall_status = 0; 1577 sysevent_daemon_init = 1; 1578 setup_ddi_poststartup(); 1579 async_thread = thread_create(NULL, 0, log_event_deliver, 1580 NULL, 0, &p0, TS_RUN, minclsyspri); 1581 } 1582 1583 log_event_delivery = LOGEVENT_DELIVERY_CONT; 1584 cv_signal(&log_event_cv); 1585 mutex_exit(&eventq_head_mutex); 1586 } 1587 1588 /* 1589 * log_sysevent_filename - Called by syseventd via 1590 * modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME) 1591 * to subsequently bind the event_door. 1592 * 1593 * This routine is called everytime syseventd (re)starts 1594 * and must therefore replay any events buffers that have 1595 * been sent but not freed. 1596 * 1597 * Event buffer delivery begins after a call to 1598 * log_sysevent_flushq(). 1599 */ 1600 int 1601 log_sysevent_filename(char *file) 1602 { 1603 mutex_enter(&event_door_mutex); 1604 1605 (void) strlcpy(logevent_door_upcall_filename, file, 1606 sizeof (logevent_door_upcall_filename)); 1607 1608 /* Unbind old event door */ 1609 if (event_door != NULL) 1610 door_ki_rele(event_door); 1611 /* Establish door connection with user event daemon (syseventd) */ 1612 if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0) 1613 event_door = NULL; 1614 1615 mutex_exit(&event_door_mutex); 1616 1617 /* 1618 * We are called when syseventd restarts. Move all sent, but 1619 * not committed events from log_eventq_sent to log_eventq_head. 1620 * Do it in proper order to maintain increasing event id. 1621 */ 1622 mutex_enter(&eventq_head_mutex); 1623 1624 mutex_enter(&eventq_sent_mutex); 1625 while (log_eventq_sent) { 1626 log_eventq_t *tmp = log_eventq_sent->next; 1627 log_eventq_sent->next = log_eventq_head; 1628 if (log_eventq_head == NULL) { 1629 ASSERT(log_eventq_cnt == 0); 1630 log_eventq_tail = log_eventq_sent; 1631 log_eventq_tail->next = NULL; 1632 } else if (log_eventq_head == log_eventq_tail) { 1633 ASSERT(log_eventq_cnt == 1); 1634 ASSERT(log_eventq_head->next == NULL); 1635 ASSERT(log_eventq_tail->next == NULL); 1636 } 1637 log_eventq_head = log_eventq_sent; 1638 log_eventq_sent = tmp; 1639 log_eventq_cnt++; 1640 } 1641 mutex_exit(&eventq_sent_mutex); 1642 mutex_exit(&eventq_head_mutex); 1643 1644 return (0); 1645 } 1646 1647 /* 1648 * queue_sysevent - queue an event buffer 1649 */ 1650 static int 1651 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag) 1652 { 1653 log_eventq_t *q; 1654 1655 ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP); 1656 1657 DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev); 1658 1659 restart: 1660 1661 /* Max Q size exceeded */ 1662 mutex_enter(&event_qfull_mutex); 1663 if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) { 1664 /* 1665 * If queue full and transport down, return no transport 1666 */ 1667 if (sysevent_upcall_status != 0) { 1668 mutex_exit(&event_qfull_mutex); 1669 free_packed_event(ev); 1670 eid->eid_seq = UINT64_C(0); 1671 eid->eid_ts = INT64_C(0); 1672 return (SE_NO_TRANSPORT); 1673 } 1674 if (flag == SE_NOSLEEP) { 1675 mutex_exit(&event_qfull_mutex); 1676 free_packed_event(ev); 1677 eid->eid_seq = UINT64_C(0); 1678 eid->eid_ts = INT64_C(0); 1679 return (SE_EQSIZE); 1680 } 1681 event_qfull_blocked++; 1682 cv_wait(&event_qfull_cv, &event_qfull_mutex); 1683 event_qfull_blocked--; 1684 mutex_exit(&event_qfull_mutex); 1685 goto restart; 1686 } 1687 mutex_exit(&event_qfull_mutex); 1688 1689 mutex_enter(&eventq_head_mutex); 1690 1691 /* Time stamp and assign ID */ 1692 SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id, 1693 (uint64_t)1); 1694 SE_TIME(ev) = eid->eid_ts = gethrtime(); 1695 1696 LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n", 1697 SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev))); 1698 1699 /* 1700 * Put event on eventq 1701 */ 1702 q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf)); 1703 q->next = NULL; 1704 if (log_eventq_head == NULL) { 1705 ASSERT(log_eventq_cnt == 0); 1706 log_eventq_head = q; 1707 log_eventq_tail = q; 1708 } else { 1709 if (log_eventq_head == log_eventq_tail) { 1710 ASSERT(log_eventq_cnt == 1); 1711 ASSERT(log_eventq_head->next == NULL); 1712 ASSERT(log_eventq_tail->next == NULL); 1713 } 1714 log_eventq_tail->next = q; 1715 log_eventq_tail = q; 1716 } 1717 log_eventq_cnt++; 1718 1719 /* Signal event delivery thread */ 1720 if (log_eventq_cnt == 1) { 1721 cv_signal(&log_event_cv); 1722 } 1723 mutex_exit(&eventq_head_mutex); 1724 1725 return (0); 1726 } 1727 1728 /* 1729 * log_sysevent - kernel system event logger. 1730 * 1731 * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the 1732 * maximum event queue size will be exceeded 1733 * Returns 0 for successfully queued event buffer 1734 */ 1735 int 1736 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid) 1737 { 1738 sysevent_t *ev_copy; 1739 int rval; 1740 1741 ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP); 1742 ASSERT(!(flag == SE_SLEEP && servicing_interrupt())); 1743 1744 ev_copy = se_repack(ev, flag); 1745 if (ev_copy == NULL) { 1746 ASSERT(flag == SE_NOSLEEP); 1747 return (SE_ENOMEM); 1748 } 1749 rval = queue_sysevent(ev_copy, eid, flag); 1750 ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE || 1751 rval == SE_NO_TRANSPORT); 1752 ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM))); 1753 return (rval); 1754 } 1755 1756 /* 1757 * Publish EC_DEV_ADD and EC_DEV_REMOVE events from devfsadm to lofi. 1758 * This interface is needed to pass device link names to the lofi driver, 1759 * to be returned via ioctl() to the lofiadm command. 1760 * The problem is, if lofiadm is executed in local zone, there is no 1761 * mechanism to announce the device name from the /dev tree back to lofiadm, 1762 * as sysevents are not accessible from local zone and devfsadmd is only 1763 * running in global zone. 1764 * 1765 * Delayed/missed events are not fatal for lofi, as the device name returned 1766 * to lofiadm is for information and can be re-queried with listing 1767 * mappings with lofiadm command. 1768 * 1769 * Once we have a better method, this interface should be reworked. 1770 */ 1771 static void 1772 notify_lofi(sysevent_t *ev) 1773 { 1774 nvlist_t *nvlist; 1775 char name[10], *class, *driver; 1776 int32_t instance; 1777 1778 class = sysevent_get_class_name(ev); 1779 if ((strcmp(EC_DEV_ADD, class) != 0) && 1780 (strcmp(EC_DEV_REMOVE, class) != 0)) { 1781 return; 1782 } 1783 1784 (void) sysevent_get_attr_list(ev, &nvlist); 1785 driver = fnvlist_lookup_string(nvlist, DEV_DRIVER_NAME); 1786 instance = fnvlist_lookup_int32(nvlist, DEV_INSTANCE); 1787 1788 /* We are only interested about lofi. */ 1789 if (strcmp(driver, "lofi") != 0) { 1790 fnvlist_free(nvlist); 1791 return; 1792 } 1793 1794 /* 1795 * insert or remove device info, then announce the change 1796 * via cv_broadcast. 1797 */ 1798 (void) snprintf(name, sizeof (name), "%d", instance); 1799 mutex_enter(&lofi_devlink_cache.ln_lock); 1800 if (strcmp(class, EC_DEV_ADD) == 0) { 1801 fnvlist_add_nvlist(lofi_devlink_cache.ln_data, name, nvlist); 1802 } else { 1803 /* Can not use fnvlist_remove() as we can get ENOENT. */ 1804 (void) nvlist_remove_all(lofi_devlink_cache.ln_data, name); 1805 } 1806 cv_broadcast(&lofi_devlink_cache.ln_cv); 1807 mutex_exit(&lofi_devlink_cache.ln_lock); 1808 1809 fnvlist_free(nvlist); 1810 } 1811 1812 /* 1813 * log_usr_sysevent - user system event logger 1814 * Private to devfsadm and accessible only via 1815 * modctl(MODEVENTS, MODEVENTS_POST_EVENT) 1816 */ 1817 int 1818 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid) 1819 { 1820 int ret, copy_sz; 1821 sysevent_t *ev_copy; 1822 sysevent_id_t new_eid; 1823 log_eventq_t *qcopy; 1824 1825 copy_sz = ev_size + offsetof(log_eventq_t, arg) + 1826 offsetof(log_event_upcall_arg_t, buf); 1827 qcopy = kmem_zalloc(copy_sz, KM_SLEEP); 1828 ev_copy = (sysevent_t *)&qcopy->arg.buf; 1829 1830 /* 1831 * Copy event 1832 */ 1833 if (copyin(ev, ev_copy, ev_size) == -1) { 1834 kmem_free(qcopy, copy_sz); 1835 return (EFAULT); 1836 } 1837 1838 notify_lofi(ev_copy); 1839 1840 if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) { 1841 if (ret == SE_ENOMEM || ret == SE_EQSIZE) 1842 return (EAGAIN); 1843 else 1844 return (EIO); 1845 } 1846 1847 if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) { 1848 return (EFAULT); 1849 } 1850 1851 return (0); 1852 } 1853 1854 1855 1856 int 1857 ddi_log_sysevent( 1858 dev_info_t *dip, 1859 char *vendor, 1860 char *class, 1861 char *subclass, 1862 nvlist_t *attr_list, 1863 sysevent_id_t *eidp, 1864 int sleep_flag) 1865 { 1866 sysevent_attr_list_t *list = (sysevent_attr_list_t *)attr_list; 1867 char pubstr[32]; 1868 sysevent_t *event; 1869 sysevent_id_t eid; 1870 const char *drvname; 1871 char *publisher; 1872 int se_flag; 1873 int rval; 1874 int n; 1875 1876 if (sleep_flag == DDI_SLEEP && servicing_interrupt()) { 1877 cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue " 1878 "event from interrupt context with sleep semantics\n", 1879 ddi_driver_name(dip), ddi_get_instance(dip)); 1880 return (DDI_ECONTEXT); 1881 } 1882 1883 drvname = ddi_driver_name(dip); 1884 n = strlen(vendor) + strlen(drvname) + 7; 1885 if (n < sizeof (pubstr)) { 1886 publisher = pubstr; 1887 } else { 1888 publisher = kmem_alloc(n, 1889 (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 1890 if (publisher == NULL) { 1891 return (DDI_ENOMEM); 1892 } 1893 } 1894 (void) strcpy(publisher, vendor); 1895 (void) strcat(publisher, ":kern:"); 1896 (void) strcat(publisher, drvname); 1897 1898 se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP; 1899 event = sysevent_alloc(class, subclass, publisher, se_flag); 1900 1901 if (publisher != pubstr) { 1902 kmem_free(publisher, n); 1903 } 1904 1905 if (event == NULL) { 1906 return (DDI_ENOMEM); 1907 } 1908 1909 if (list) { 1910 (void) sysevent_attach_attributes(event, list); 1911 } 1912 1913 rval = log_sysevent(event, se_flag, &eid); 1914 if (list) { 1915 sysevent_detach_attributes(event); 1916 } 1917 sysevent_free(event); 1918 if (rval == 0) { 1919 if (eidp) { 1920 eidp->eid_seq = eid.eid_seq; 1921 eidp->eid_ts = eid.eid_ts; 1922 } 1923 return (DDI_SUCCESS); 1924 } 1925 if (rval == SE_NO_TRANSPORT) 1926 return (DDI_ETRANSPORT); 1927 1928 ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE); 1929 return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY); 1930 } 1931 1932 uint64_t 1933 log_sysevent_new_id(void) 1934 { 1935 return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1)); 1936 } 1937