1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 16 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 17 18 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 19 #define ZEV_CONTROL_DEVICE_MINOR 0 20 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 21 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 22 23 typedef struct zev_queue { 24 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 25 minor_t zq_minor_number; 26 dev_info_t *zq_dip; 27 struct pollhead zq_pollhead; 28 uint64_t zq_bytes_read; 29 uint64_t zq_events_read; 30 uint64_t zq_bytes_discarded; 31 uint64_t zq_events_discarded; 32 uint64_t zq_bytes_total; 33 uint64_t zq_events_total; 34 uint64_t zq_wakeup_threshold; 35 uint16_t zq_flags; 36 uint16_t zq_need_wakeup; 37 /* protected by zev_mutex */ 38 int zq_refcnt; 39 uint64_t zq_queue_len; 40 uint64_t zq_queue_messages; 41 uint64_t zq_max_queue_len; 42 zev_msg_t *zq_oldest; 43 boolean_t zq_busy; 44 boolean_t zq_to_be_removed; 45 zev_statistics_t zq_statistics; 46 kcondvar_t zq_condvar; 47 } zev_queue_t; 48 49 static void *statep; 50 struct pollhead zev_pollhead; 51 52 kmutex_t zev_mutex; 53 kcondvar_t zev_condvar; 54 kmutex_t zev_queue_msg_mutex; 55 krwlock_t zev_pool_list_rwlock; 56 static zev_statistics_t zev_statistics; 57 static boolean_t zev_attached; 58 static kmutex_t zev_mark_id_mutex; 59 static uint64_t zev_mark_id = 0; 60 61 static uint64_t zev_msg_sequence_number = 0; 62 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 63 static int zev_queue_cnt = 0; 64 static int zev_have_blocking_queues = 1; 65 66 uint64_t zev_memory_allocated = 0; 67 uint64_t zev_memory_freed = 0; 68 69 /* 70 * The longest potential message is from zev_zfs_mount() and 71 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 72 * 73 * Another candidate is zev_znode_rename_cb() and contains three inode 74 * numbers and two filenames of up to MAXNAMELEN bytes each. 75 */ 76 #define ZEV_MAX_MESSAGE_LEN 4096 77 78 static zev_msg_t *zev_queue_head = NULL; 79 static zev_msg_t *zev_queue_tail = NULL; 80 static uint64_t zev_queue_len = 0; 81 82 83 typedef struct zev_pool_list_entry { 84 struct zev_pool_list_entry *next; 85 char name[MAXPATHLEN]; 86 } zev_pool_list_entry_t; 87 88 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 89 90 static volatile int zev_wakeup_thread_run = 1; 91 static kthread_t *zev_poll_wakeup_thread = NULL; 92 93 void * 94 zev_alloc(ssize_t sz) 95 { 96 ZEV_MEM_ADD(sz); 97 return kmem_alloc(sz, KM_SLEEP); 98 } 99 100 void * 101 zev_zalloc(ssize_t sz) 102 { 103 ZEV_MEM_ADD(sz); 104 return kmem_zalloc(sz, KM_SLEEP); 105 } 106 107 void 108 zev_free(void *ptr, ssize_t sz) 109 { 110 ZEV_MEM_SUB(sz); \ 111 kmem_free(ptr, sz); 112 } 113 114 /* must be called with zev_mutex held */ 115 static void 116 zev_update_blockflag(void) 117 { 118 zev_queue_t *q; 119 int had_blocking_queues; 120 int i; 121 122 had_blocking_queues = zev_have_blocking_queues; 123 124 /* do we still have blocking queues? */ 125 zev_have_blocking_queues = 0; 126 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 127 q = zev_queues[i - ZEV_MINOR_MIN]; 128 if (!q) 129 continue; 130 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 131 zev_have_blocking_queues = 1; 132 break; 133 } 134 } 135 /* no blocking queues */ 136 if (had_blocking_queues) 137 cv_broadcast(&zev_condvar); 138 } 139 140 int 141 zev_queue_cmp(const void *a, const void *b) 142 { 143 const zev_queue_t *qa = a; 144 const zev_queue_t *qb = b; 145 if (qa->zq_minor_number > qb->zq_minor_number) 146 return 1; 147 if (qa->zq_minor_number < qb->zq_minor_number) 148 return -1; 149 return 0; 150 } 151 152 /* must be called with zev_mutex held */ 153 void 154 zev_queue_trim(void) 155 { 156 zev_msg_t *m; 157 uint64_t oldest_message; 158 zev_queue_t *q; 159 int i; 160 161 if (!zev_queue_tail) 162 return; 163 164 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 165 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 166 q = zev_queues[i - ZEV_MINOR_MIN]; 167 if (q == NULL) 168 continue; 169 if (!q->zq_oldest) 170 continue; 171 if (oldest_message > q->zq_oldest->seq) 172 oldest_message = q->zq_oldest->seq; 173 } 174 175 /* remove msgs between oldest_message and zev_queue_head */ 176 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 177 m = zev_queue_head; 178 zev_queue_head = m->next; 179 if (zev_queue_head == NULL) { 180 zev_queue_tail = NULL; 181 } else { 182 zev_queue_head->prev = NULL; 183 } 184 if (m->read == 0) { 185 zev_statistics.zev_bytes_discarded += m->size; 186 zev_statistics.zev_cnt_discarded_events++; 187 } 188 zev_statistics.zev_queue_len -= m->size; 189 zev_queue_len--; 190 zev_free(m, sizeof(*m) + m->size); 191 } 192 } 193 194 /* must be called with zev_mutex held */ 195 static void 196 zev_queue_hold(zev_queue_t *q) 197 { 198 q->zq_refcnt++; 199 } 200 201 /* must be called with zev_mutex held */ 202 static void 203 zev_queue_release(zev_queue_t *q) 204 { 205 q->zq_refcnt--; 206 if (q->zq_refcnt > 0) 207 return; 208 209 ASSERT(q->zq_busy == B_FALSE); 210 211 /* persistent queues will not be removed */ 212 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 213 return; 214 215 /* remove queue from queue list */ 216 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 217 218 /* discard messages that no queue references anymore */ 219 zev_queue_trim(); 220 221 cv_destroy(&q->zq_condvar); 222 ddi_remove_minor_node(q->zq_dip, q->zq_name); 223 ddi_soft_state_free(statep, q->zq_minor_number); 224 ZEV_MEM_SUB(sizeof(zev_queue_t)); 225 zev_queue_cnt--; 226 zev_update_blockflag(); 227 } 228 229 int 230 zev_queue_new(zev_queue_t **queue, 231 dev_info_t *dip, 232 char *name, 233 uint64_t max_queue_len, 234 uint16_t flags) 235 { 236 zev_queue_t *q; 237 zev_queue_t *tmp; 238 zev_msg_t *msg; 239 int name_exists = 0; 240 minor_t minor; 241 char *p; 242 int i; 243 244 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 245 return EINVAL; 246 if (max_queue_len == 0) 247 max_queue_len = ZEV_MAX_QUEUE_LEN; 248 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 249 return EINVAL; 250 for (p = name; *p; p++) { 251 if (*p >= 'a' && *p <= 'z') 252 continue; 253 if (*p >= '0' && *p <= '9') 254 continue; 255 if (*p == '.') 256 continue; 257 return EINVAL; 258 } 259 260 mutex_enter(&zev_mutex); 261 262 /* find free minor number.*/ 263 /* if this were a frequent operation we'd have a free-minor list */ 264 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 265 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 266 if (tmp == NULL) 267 break; 268 } 269 if (tmp) { 270 mutex_exit(&zev_mutex); 271 return ENOSPC; 272 } 273 274 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 275 mutex_exit(&zev_mutex); 276 return ENOSPC; 277 } 278 ZEV_MEM_ADD(sizeof(zev_queue_t)); 279 280 q = ddi_get_soft_state(statep, minor); 281 memset(q, 0, sizeof(*q)); 282 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 283 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 284 q->zq_max_queue_len = max_queue_len; 285 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 286 q->zq_flags = flags; 287 q->zq_refcnt = 1; 288 q->zq_dip = dip; 289 q->zq_minor_number = minor; 290 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 291 292 /* insert into queue list */ 293 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 294 /* if this were a frequent operation we'd have a name tree */ 295 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 296 continue; 297 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 298 name_exists = 1; 299 break; 300 } 301 } 302 if (name_exists) { 303 ddi_soft_state_free(statep, minor); 304 ZEV_MEM_SUB(sizeof(zev_queue_t)); 305 mutex_exit(&zev_mutex); 306 return EEXIST; 307 } 308 zev_queues[minor - ZEV_MINOR_MIN] = q; 309 zev_queue_cnt++; 310 311 /* calculate current queue len and find head and tail */ 312 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) { 313 q->zq_oldest = zev_queue_tail; 314 msg = zev_queue_tail; 315 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) { 316 q->zq_queue_len += msg->size; 317 q->zq_queue_messages++; 318 q->zq_oldest = msg; 319 msg = msg->prev; 320 } 321 } 322 323 mutex_exit(&zev_mutex); 324 325 if (ddi_create_minor_node(dip, name, 326 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 327 mutex_enter(&zev_mutex); 328 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 329 zev_queue_cnt--; 330 ddi_soft_state_free(statep, minor); 331 ZEV_MEM_SUB(sizeof(zev_queue_t)); 332 mutex_exit(&zev_mutex); 333 return EFAULT; 334 } 335 336 zev_update_blockflag(); 337 338 *queue = q; 339 return 0; 340 } 341 342 /* 343 * poll() wakeup thread. Used to check periodically whether we have 344 * bytes left in the queue that have not yet been made into a 345 * pollwakeup() call. This is meant to insure a maximum waiting 346 * time until an event is presented as a poll wakeup, while at 347 * the same time not making every single event into a poll wakeup 348 * of it's own. 349 */ 350 351 static void 352 zev_poll_wakeup(boolean_t flush_all) 353 { 354 zev_queue_t *q; 355 int i; 356 357 /* 358 * This loop works with hold() and release() because 359 * pollwakeup() requires us to release our locks before calling it. 360 * 361 * from pollwakeup(9F): 362 * 363 * "Driver defined locks should not be held across calls 364 * to this function." 365 */ 366 367 /* wake up threads for each individual queue */ 368 mutex_enter(&zev_mutex); 369 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 370 q = zev_queues[i - ZEV_MINOR_MIN]; 371 if (q == NULL) 372 continue; 373 if (!q->zq_busy) 374 continue; 375 if (!q->zq_queue_len) 376 continue; 377 if ((flush_all) || 378 (q->zq_queue_len > q->zq_wakeup_threshold)) { 379 zev_queue_hold(q); 380 mutex_exit(&zev_mutex); 381 pollwakeup(&q->zq_pollhead, POLLIN); 382 mutex_enter(&zev_mutex); 383 zev_queue_release(q); 384 } 385 } 386 mutex_exit(&zev_mutex); 387 } 388 389 static void 390 zev_poll_wakeup_thread_main(void) 391 { 392 while (zev_wakeup_thread_run) { 393 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 394 395 zev_poll_wakeup(B_TRUE); 396 } 397 thread_exit(); 398 } 399 400 static int 401 zev_ioc_mute_pool(char *poolname) 402 { 403 zev_pool_list_entry_t *pe; 404 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 405 /* pool already muted? */ 406 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 407 if (!strcmp(pe->name, poolname)) { 408 rw_exit(&zev_pool_list_rwlock); 409 return EEXIST; 410 } 411 } 412 pe = zev_zalloc(sizeof(*pe)); 413 if (!pe) { 414 rw_exit(&zev_pool_list_rwlock); 415 return ENOMEM; 416 } 417 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 418 pe->next = zev_muted_pools_head; 419 zev_muted_pools_head = pe; 420 rw_exit(&zev_pool_list_rwlock); 421 return (0); 422 } 423 424 static int 425 zev_ioc_unmute_pool(char *poolname) 426 { 427 zev_pool_list_entry_t *pe, *peprev; 428 429 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 430 /* pool muted? */ 431 peprev = NULL; 432 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 433 if (!strcmp(pe->name, poolname)) 434 break; 435 peprev = pe; 436 } 437 if (pe) { 438 rw_exit(&zev_pool_list_rwlock); 439 return ENOENT; 440 } 441 442 if (peprev != NULL) { 443 peprev->next = pe->next; 444 } else { 445 zev_muted_pools_head = pe->next; 446 } 447 zev_free(pe, sizeof(*pe)); 448 rw_exit(&zev_pool_list_rwlock); 449 return (0); 450 } 451 452 int 453 zev_skip_pool(objset_t *os) 454 { 455 zev_pool_list_entry_t *pe; 456 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 457 rw_enter(&zev_pool_list_rwlock, RW_READER); 458 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 459 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 460 rw_exit(&zev_pool_list_rwlock); 461 return 1; 462 } 463 } 464 rw_exit(&zev_pool_list_rwlock); 465 return 0; 466 } 467 468 int 469 zev_skip_fs(zfsvfs_t *fs) 470 { 471 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir; 472 dsl_dir_t *prev = NULL; 473 474 while (d && d != prev) { 475 if (strstr(d->dd_myname, "_root")) 476 return 0; 477 prev = d; 478 d = d->dd_parent; 479 } 480 return 1; 481 } 482 483 static void 484 zev_update_statistics(int op, zev_statistics_t *stat) 485 { 486 switch (op) { 487 case ZEV_OP_ERROR: 488 stat->zev_cnt_errors++; 489 break; 490 case ZEV_OP_MARK: 491 stat->zev_cnt_marks++; 492 break; 493 case ZEV_OP_ZFS_MOUNT: 494 stat->zev_cnt_zfs_mount++; 495 break; 496 case ZEV_OP_ZFS_UMOUNT: 497 stat->zev_cnt_zfs_umount++; 498 break; 499 case ZEV_OP_ZVOL_WRITE: 500 stat->zev_cnt_zvol_write++; 501 break; 502 case ZEV_OP_ZVOL_TRUNCATE: 503 stat->zev_cnt_zvol_truncate++; 504 break; 505 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 506 stat->zev_cnt_znode_close_after_update++; 507 break; 508 case ZEV_OP_ZNODE_CREATE: 509 stat->zev_cnt_znode_create++; 510 break; 511 case ZEV_OP_ZNODE_REMOVE: 512 stat->zev_cnt_znode_remove++; 513 break; 514 case ZEV_OP_ZNODE_LINK: 515 stat->zev_cnt_znode_link++; 516 break; 517 case ZEV_OP_ZNODE_SYMLINK: 518 stat->zev_cnt_znode_symlink++; 519 break; 520 case ZEV_OP_ZNODE_RENAME: 521 stat->zev_cnt_znode_rename++; 522 break; 523 case ZEV_OP_ZNODE_WRITE: 524 stat->zev_cnt_znode_write++; 525 break; 526 case ZEV_OP_ZNODE_TRUNCATE: 527 stat->zev_cnt_znode_truncate++; 528 break; 529 case ZEV_OP_ZNODE_SETATTR: 530 stat->zev_cnt_znode_setattr++; 531 break; 532 case ZEV_OP_ZNODE_ACL: 533 stat->zev_cnt_znode_acl++; 534 break; 535 } 536 } 537 538 void 539 zev_queue_message(int op, zev_msg_t *msg) 540 { 541 zev_queue_t *q; 542 int wakeup = 0; 543 zev_msg_t *m; 544 int i; 545 546 msg->next = NULL; 547 msg->prev = NULL; 548 msg->read = 0; 549 550 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 551 zev_queue_error(op, "unknown op id encountered: %d", op); 552 zev_free(msg, sizeof(*msg) + msg->size); 553 return; 554 } 555 556 /* 557 * This mutex protects us agains race conditions when several 558 * threads want to queue a message and one or more queues are 559 * full: we release zev_mutex to wait for the queues to become 560 * less-than-full, but we don't know in which order the waiting 561 * threads will be awoken. If it's not the same order in which 562 * they went to sleep we might mark different messages as "newest" 563 * in different queues, and so we might have dupes or even 564 * skip messages. 565 */ 566 mutex_enter(&zev_queue_msg_mutex); 567 568 mutex_enter(&zev_mutex); 569 570 /* 571 * When the module is loaded, the default behavior ist to 572 * put all events into a queue and block if the queue is full. 573 * This is done even before the pseudo device is attached. 574 * This way, no events are lost. 575 * 576 * To discard events entirely the "beaver" queue, 577 * which never discards anything, has to be removed. 578 */ 579 580 if (zev_queue_cnt == 0) { 581 mutex_exit(&zev_mutex); 582 mutex_exit(&zev_queue_msg_mutex); 583 return; 584 } 585 586 /* put message into global queue */ 587 msg->seq = zev_msg_sequence_number++; 588 589 /* do we need to make room? */ 590 while (zev_statistics.zev_max_queue_len && 591 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) { 592 593 if (zev_have_blocking_queues) { 594 /* queue full. block until it's been shrunk. */ 595 cv_wait(&zev_condvar, &zev_mutex); 596 continue; 597 } 598 599 /* discard events until this message fits into all queues */ 600 601 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 602 q = zev_queues[i - ZEV_MINOR_MIN]; 603 if (!q) 604 continue; 605 /* discard msgs until queue is small enough */ 606 while (q->zq_queue_len && 607 q->zq_queue_len > q->zq_max_queue_len) { 608 m = q->zq_oldest; 609 if (m == NULL) 610 break; 611 q->zq_events_discarded++; 612 q->zq_bytes_discarded += m->size; 613 q->zq_oldest = m->next; 614 q->zq_queue_len -= m->size; 615 q->zq_queue_messages--; 616 } 617 } 618 619 zev_queue_trim(); 620 ASSERT(zev_statistics.zev_queue_len == 0 || 621 zev_statistics.zev_queue_len <= 622 zev_statistics.zev_max_queue_len); 623 } 624 625 if (zev_queue_tail == NULL) { 626 zev_queue_head = zev_queue_tail = msg; 627 } else { 628 zev_queue_tail->next = msg; 629 msg->prev = zev_queue_tail; 630 zev_queue_tail = msg; 631 } 632 zev_queue_len++; 633 zev_statistics.zev_cnt_total_events++; 634 zev_statistics.zev_queue_len += msg->size; 635 636 /* update per-device queues */ 637 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 638 q = zev_queues[i - ZEV_MINOR_MIN]; 639 if (!q) 640 continue; 641 642 zev_queue_hold(q); 643 644 /* make sure queue has enough room */ 645 while (q->zq_max_queue_len && 646 q->zq_queue_len > q->zq_max_queue_len) { 647 648 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 649 /* block until queue has been shrunk. */ 650 cv_wait(&zev_condvar, &zev_mutex); 651 } else { 652 /* discard msgs until queue is small enough */ 653 while (q->zq_queue_len > q->zq_max_queue_len) { 654 m = q->zq_oldest; 655 if (m == NULL) 656 break; 657 q->zq_events_discarded++; 658 q->zq_bytes_discarded += m->size; 659 q->zq_oldest = m->next; 660 q->zq_queue_len -= m->size; 661 q->zq_queue_messages--; 662 } 663 } 664 } 665 666 /* register new message at the end of the queue */ 667 q->zq_queue_len += msg->size; 668 q->zq_queue_messages++; 669 q->zq_bytes_total += msg->size; 670 q->zq_events_total++; 671 if (q->zq_oldest == NULL) 672 q->zq_oldest = msg; 673 674 zev_update_statistics(op, &q->zq_statistics); 675 676 if (q->zq_queue_len > q->zq_wakeup_threshold) 677 wakeup = 1; 678 if (q->zq_queue_len == msg->size) /* queue was empty */ 679 cv_broadcast(&q->zq_condvar); 680 681 zev_queue_release(q); 682 } 683 684 zev_queue_trim(); 685 686 zev_update_statistics(op, &zev_statistics); 687 mutex_exit(&zev_mutex); 688 mutex_exit(&zev_queue_msg_mutex); 689 690 /* one or more queues need a pollwakeup() */ 691 if (op == ZEV_OP_MARK) { 692 zev_poll_wakeup(B_TRUE); 693 } else if (wakeup) { 694 zev_poll_wakeup(B_FALSE); 695 } 696 697 return; 698 } 699 700 void 701 zev_queue_error(int op, char *fmt, ...) 702 { 703 char buf[ZEV_MAX_MESSAGE_LEN]; 704 va_list ap; 705 int len; 706 zev_msg_t *msg = NULL; 707 zev_error_t *rec; 708 int msg_size; 709 710 va_start(ap, fmt); 711 len = vsnprintf(buf, sizeof(buf), fmt, ap); 712 va_end(ap); 713 if (len >= sizeof(buf)) { 714 cmn_err(CE_WARN, "zev: can't report error - " 715 "dropping event entirely."); 716 return; 717 } 718 719 msg_size = sizeof(*rec) + len + 1; 720 msg = zev_alloc(sizeof(*msg) + msg_size); 721 msg->size = msg_size; 722 rec = (zev_error_t *)(msg + 1); 723 rec->record_len = msg_size; 724 rec->op = ZEV_OP_ERROR; 725 rec->op_time = ddi_get_time(); 726 rec->guid = 0; 727 rec->failed_op = op; 728 rec->errstr_len = len; 729 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 730 731 zev_queue_message(ZEV_OP_ERROR, msg); 732 return; 733 } 734 735 static int 736 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 737 { 738 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 739 zev_queue_t *q; 740 int i; 741 742 *out = NULL; 743 744 if (name->zev_namelen == 0) { 745 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 746 return EINVAL; 747 zev_queue_hold(req_q); 748 *out = req_q; 749 return 0; 750 } 751 752 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 753 return EINVAL; 754 strncpy(namebuf, name->zev_name, name->zev_namelen); 755 namebuf[name->zev_namelen] = '\0'; 756 757 mutex_enter(&zev_mutex); 758 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 759 q = zev_queues[i - ZEV_MINOR_MIN]; 760 if (!q) 761 continue; 762 if (!strcmp(q->zq_name, namebuf)) { 763 zev_queue_hold(q); 764 mutex_exit(&zev_mutex); 765 *out = q; 766 return 0; 767 } 768 } 769 mutex_exit(&zev_mutex); 770 return ENOENT; 771 } 772 773 static int 774 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 775 { 776 zev_ioctl_get_queue_statistics_t gs; 777 zev_queue_t *q; 778 int ret; 779 780 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 781 return EFAULT; 782 783 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 784 if (ret) 785 return ret; 786 787 /* ddi_copyout() can take a long time. Better make 788 a copy to be able to release the mutex faster. */ 789 mutex_enter(&zev_mutex); 790 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 791 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 792 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 793 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 794 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 795 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 796 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 797 zev_queue_release(q); 798 mutex_exit(&zev_mutex); 799 800 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 801 return EFAULT; 802 return 0; 803 } 804 805 static int 806 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 807 { 808 zev_ioctl_set_queue_properties_t qp; 809 zev_queue_t *q; 810 uint64_t old_max; 811 uint64_t old_flags; 812 int ret; 813 814 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 815 return EFAULT; 816 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 817 return EINVAL; 818 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 819 return EINVAL; 820 821 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 822 if (ret) 823 return ret; 824 825 mutex_enter(&zev_mutex); 826 827 /* 828 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 829 * the queue should be removed by zev_queue_release() in zev_ioctl(). 830 */ 831 old_flags = qp.zev_flags; 832 q->zq_flags = qp.zev_flags; 833 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 834 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 835 /* queue is no longer blocking - wake blocked threads */ 836 cv_broadcast(&zev_condvar); 837 } 838 839 zev_update_blockflag(); 840 841 old_max = q->zq_max_queue_len; 842 q->zq_max_queue_len = qp.zev_max_queue_len; 843 if (q->zq_max_queue_len < old_max) 844 zev_queue_trim(); 845 if (q->zq_max_queue_len > old_max) 846 cv_broadcast(&zev_condvar); /* threads may be waiting */ 847 848 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 849 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 850 pollwakeup(&q->zq_pollhead, POLLIN); 851 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 852 853 zev_queue_release(q); 854 mutex_exit(&zev_mutex); 855 return 0; 856 } 857 858 static int 859 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 860 { 861 zev_ioctl_get_queue_properties_t qp; 862 zev_queue_t *q; 863 int ret; 864 865 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 866 return EFAULT; 867 868 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 869 if (ret) 870 return ret; 871 872 mutex_enter(&zev_mutex); 873 qp.zev_max_queue_len = q->zq_max_queue_len; 874 qp.zev_flags = q->zq_flags; 875 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 876 zev_queue_release(q); 877 mutex_exit(&zev_mutex); 878 879 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 880 return EFAULT; 881 return 0; 882 } 883 884 static int 885 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 886 { 887 zev_ioctl_add_queue_t aq; 888 zev_queue_t *new_q; 889 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 890 891 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 892 return EFAULT; 893 894 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 895 return EINVAL; 896 strncpy(name, aq.zev_name, aq.zev_namelen); 897 name[aq.zev_namelen] = '\0'; 898 899 return zev_queue_new(&new_q, req_q->zq_dip, name, 900 aq.zev_max_queue_len, aq.zev_flags); 901 } 902 903 static int 904 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 905 { 906 zev_ioctl_remove_queue_t rq; 907 zev_queue_t *q; 908 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 909 int found = 0; 910 int i; 911 912 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 913 return EFAULT; 914 915 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 916 return EINVAL; 917 strncpy(name, rq.zev_queue_name.zev_name, 918 rq.zev_queue_name.zev_namelen); 919 name[rq.zev_queue_name.zev_namelen] = '\0'; 920 921 mutex_enter(&zev_mutex); 922 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 923 q = zev_queues[i - ZEV_MINOR_MIN]; 924 if (!q) 925 continue; 926 if (!strcmp(q->zq_name, name)) { 927 found = 1; 928 break; 929 } 930 } 931 if (!found) { 932 mutex_exit(&zev_mutex); 933 return ENOENT; 934 } 935 936 if (q->zq_busy) { 937 mutex_exit(&zev_mutex); 938 return EBUSY; 939 } 940 /* 941 * clear flags, so that persistent queues are removed aswell 942 * and the queue becomes non-blocking. 943 */ 944 q->zq_flags = 0; 945 if (q->zq_to_be_removed == B_FALSE) { 946 q->zq_to_be_removed = B_TRUE; 947 zev_queue_release(q); 948 } 949 /* some threads might be waiting for this queue to become writable */ 950 cv_broadcast(&zev_condvar); 951 952 mutex_exit(&zev_mutex); 953 return 0; 954 } 955 956 static int 957 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 958 { 959 zev_ioctl_debug_info_t di; 960 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 961 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 962 963 zev_chksum_stats(&di.zev_chksum_cache_size, 964 &di.zev_chksum_cache_hits, 965 &di.zev_chksum_cache_misses); 966 di.zev_memory_allocated = mem_allocated - mem_freed; 967 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 968 return EFAULT; 969 return 0; 970 } 971 972 static int 973 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 974 { 975 zev_ioctl_get_queue_list_t gql; 976 zev_queue_t *q; 977 int i = 0; 978 int count = 0; 979 980 memset(&gql, 0, sizeof(gql)); 981 982 mutex_enter(&zev_mutex); 983 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 984 q = zev_queues[i - ZEV_MINOR_MIN]; 985 if (!q) 986 continue; 987 strncpy(gql.zev_queue_name[count].zev_name, 988 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 989 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 990 count++; 991 } 992 gql.zev_n_queues = count; 993 mutex_exit(&zev_mutex); 994 995 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 996 return EFAULT; 997 return 0; 998 } 999 1000 static int 1001 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode) 1002 { 1003 uint64_t len; 1004 int i; 1005 zev_queue_t *q; 1006 1007 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 1008 return EFAULT; 1009 } 1010 if (len > ZEV_MAX_QUEUE_LEN) { 1011 return EINVAL; 1012 } 1013 mutex_enter(&zev_mutex); 1014 zev_statistics.zev_max_queue_len = len; 1015 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1016 q = zev_queues[i - ZEV_MINOR_MIN]; 1017 if (!q) 1018 continue; 1019 if (q->zq_max_queue_len <= 1020 zev_statistics.zev_max_queue_len) 1021 continue; 1022 q->zq_max_queue_len = zev_statistics.zev_max_queue_len; 1023 } 1024 cv_broadcast(&zev_condvar); 1025 mutex_exit(&zev_mutex); 1026 return 0; 1027 } 1028 1029 /* ARGSUSED */ 1030 static int 1031 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1032 { 1033 zev_statistics_t zs; 1034 zev_ioctl_poolarg_t pa; 1035 zev_ioctl_mark_t mark; 1036 zev_mark_t *rec; 1037 int msg_size; 1038 zev_msg_t *msg; 1039 uint64_t mark_id; 1040 minor_t minor; 1041 zev_queue_t *req_q; 1042 int ret = 0; 1043 1044 minor = getminor(dev); 1045 mutex_enter(&zev_mutex); 1046 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 1047 mutex_exit(&zev_mutex); 1048 return (ENXIO); 1049 } 1050 zev_queue_hold(req_q); 1051 mutex_exit(&zev_mutex); 1052 /* 1053 * all structures passed between kernel and userspace 1054 * are now compatible between 64 and 32 bit. Model 1055 * conversion can be ignored. 1056 */ 1057 switch (cmd) { 1058 case ZEV_IOC_GET_GLOBAL_STATISTICS: 1059 /* ddi_copyout() can take a long time. Better make 1060 a copy to be able to release the mutex faster. */ 1061 mutex_enter(&zev_mutex); 1062 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 1063 mutex_exit(&zev_mutex); 1064 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 1065 ret = EFAULT; 1066 break; 1067 case ZEV_IOC_GET_QUEUE_STATISTICS: 1068 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 1069 break; 1070 case ZEV_IOC_MUTE_POOL: 1071 case ZEV_IOC_UNMUTE_POOL: 1072 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 1073 ret = EFAULT; 1074 break; 1075 } 1076 if (pa.zev_poolname_len >=MAXPATHLEN) { 1077 ret = EINVAL; 1078 break; 1079 } 1080 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 1081 if (cmd == ZEV_IOC_MUTE_POOL) { 1082 ret = zev_ioc_mute_pool(pa.zev_poolname); 1083 } else { 1084 ret = zev_ioc_unmute_pool(pa.zev_poolname); 1085 } 1086 break; 1087 case ZEV_IOC_SET_MAX_QUEUE_LEN: 1088 ret = zev_ioc_set_max_queue_len(req_q, arg, mode); 1089 break; 1090 case ZEV_IOC_GET_QUEUE_PROPERTIES: 1091 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 1092 break; 1093 case ZEV_IOC_SET_QUEUE_PROPERTIES: 1094 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 1095 break; 1096 case ZEV_IOC_MARK: 1097 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1098 ret = EFAULT; 1099 break; 1100 } 1101 /* prepare message */ 1102 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1103 msg = zev_alloc(sizeof(*msg) + msg_size); 1104 msg->size = msg_size; 1105 rec = (zev_mark_t *)(msg + 1); 1106 rec->record_len = msg_size; 1107 rec->op = ZEV_OP_MARK; 1108 rec->op_time = ddi_get_time(); 1109 rec->guid = mark.zev_guid; 1110 rec->payload_len = mark.zev_payload_len; 1111 /* get payload */ 1112 if (ddi_copyin(((char *)arg) + sizeof(mark), 1113 ZEV_PAYLOAD(rec), 1114 mark.zev_payload_len, mode) != 0) { 1115 zev_free(msg, msg_size); 1116 ret = EFAULT; 1117 break; 1118 } 1119 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1120 /* get mark id and queue message */ 1121 mutex_enter(&zev_mark_id_mutex); 1122 mark_id = zev_mark_id++; 1123 mutex_exit(&zev_mark_id_mutex); 1124 rec->mark_id = mark_id; 1125 zev_queue_message(ZEV_OP_MARK, msg); 1126 /* report mark id to userland, ignore errors */ 1127 mark.zev_mark_id = mark_id; 1128 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1129 break; 1130 case ZEV_IOC_ADD_QUEUE: 1131 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1132 ret = EACCES; 1133 break; 1134 } 1135 ret = zev_ioc_add_queue(req_q, arg, mode); 1136 break; 1137 case ZEV_IOC_REMOVE_QUEUE: 1138 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1139 ret = EACCES; 1140 break; 1141 } 1142 ret = zev_ioc_remove_queue(req_q, arg, mode); 1143 break; 1144 case ZEV_IOC_GET_DEBUG_INFO: 1145 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1146 break; 1147 case ZEV_IOC_GET_QUEUE_LIST: 1148 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1149 break; 1150 case ZEV_IOC_GET_FILE_SIGNATURES: 1151 ret = zev_ioc_get_signatures(arg, mode); 1152 break; 1153 default: 1154 /* generic "ioctl unknown" error */ 1155 ret = ENOTTY; 1156 } 1157 1158 mutex_enter(&zev_mutex); 1159 zev_queue_release(req_q); 1160 mutex_exit(&zev_mutex); 1161 if (ret) 1162 SET_ERROR(ret); 1163 return (ret); 1164 } 1165 1166 static int 1167 zev_chpoll(dev_t dev, short events, int anyyet, 1168 short *reventsp, struct pollhead **phpp) 1169 { 1170 int minor; 1171 short revent = 0; 1172 zev_queue_t *q; 1173 1174 /* use minor-specific queue context and it's pollhead */ 1175 minor = getminor(dev); 1176 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1177 return (EINVAL); 1178 mutex_enter(&zev_mutex); 1179 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1180 mutex_exit(&zev_mutex); 1181 return (ENXIO); 1182 } 1183 revent = 0; 1184 if ((events & POLLIN)) { 1185 if (q->zq_oldest) 1186 revent |= POLLIN; 1187 } 1188 if (revent == 0) { 1189 if (!anyyet) { 1190 *phpp = &q->zq_pollhead; 1191 } 1192 } 1193 *reventsp = revent; 1194 mutex_exit(&zev_mutex); 1195 return (0); 1196 } 1197 1198 /* ARGSUSED */ 1199 static int 1200 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1201 { 1202 minor_t minor; 1203 offset_t off; 1204 int ret = 0; 1205 zev_msg_t *msg; 1206 char *data; 1207 zev_queue_t *q; 1208 1209 minor = getminor(dev); 1210 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1211 return (EINVAL); 1212 1213 mutex_enter(&zev_mutex); 1214 q = ddi_get_soft_state(statep, minor); 1215 if (q == NULL) { 1216 mutex_exit(&zev_mutex); 1217 return (ENXIO); 1218 } 1219 off = uio_p->uio_loffset; 1220 msg = q->zq_oldest; 1221 while (msg == NULL) { 1222 if (!ddi_can_receive_sig()) { 1223 /* 1224 * read() shouldn't block because this thread 1225 * can't receive signals. (e.g., it might be 1226 * torn down by exit() right now.) 1227 */ 1228 mutex_exit(&zev_mutex); 1229 return 0; 1230 } 1231 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1232 /* signal received. */ 1233 mutex_exit(&zev_mutex); 1234 return EINTR; 1235 } 1236 msg = q->zq_oldest; 1237 } 1238 if (msg->size > uio_p->uio_resid) { 1239 mutex_exit(&zev_mutex); 1240 return E2BIG; 1241 } 1242 while (msg && uio_p->uio_resid >= msg->size) { 1243 data = (char *)(msg + 1); 1244 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1245 if (ret != 0) { 1246 mutex_exit(&zev_mutex); 1247 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1248 uio_p->uio_loffset = off; 1249 return (ret); 1250 } 1251 q->zq_oldest = msg->next; 1252 q->zq_bytes_read += msg->size; 1253 q->zq_queue_len -= msg->size; 1254 q->zq_queue_messages--; 1255 msg->read++; 1256 msg = q->zq_oldest; 1257 } 1258 zev_queue_trim(); 1259 cv_broadcast(&zev_condvar); 1260 mutex_exit(&zev_mutex); 1261 uio_p->uio_loffset = off; 1262 return 0; 1263 } 1264 1265 /* ARGSUSED */ 1266 static int 1267 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1268 { 1269 zev_queue_t *q; 1270 int minor; 1271 1272 minor = getminor(dev); 1273 if (otyp != OTYP_CHR) 1274 return (EINVAL); 1275 mutex_enter(&zev_mutex); 1276 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1277 mutex_exit(&zev_mutex); 1278 return (ENXIO); 1279 } 1280 if (q->zq_busy != B_TRUE) { 1281 mutex_exit(&zev_mutex); 1282 return (EINVAL); 1283 } 1284 q->zq_busy = B_FALSE; 1285 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1286 zev_queue_release(q); 1287 mutex_exit(&zev_mutex); 1288 return (0); 1289 } 1290 1291 /* ARGSUSED */ 1292 static int 1293 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1294 { 1295 zev_queue_t *q; 1296 minor_t minor; 1297 1298 minor = getminor(*devp); 1299 if (otyp != OTYP_CHR) 1300 return (EINVAL); 1301 if (drv_priv(credp) != 0) 1302 return (EPERM); 1303 mutex_enter(&zev_mutex); 1304 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1305 mutex_exit(&zev_mutex); 1306 return (ENXIO); 1307 } 1308 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1309 /* control device may be used in parallel */ 1310 q->zq_busy = B_TRUE; 1311 mutex_exit(&zev_mutex); 1312 return 0; 1313 } 1314 if (q->zq_busy == B_TRUE) { 1315 mutex_exit(&zev_mutex); 1316 return (EBUSY); 1317 } 1318 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1319 mutex_exit(&zev_mutex); 1320 return (0); 1321 } 1322 1323 static struct cb_ops zev_cb_ops = { 1324 zev_open, /* open */ 1325 zev_close, /* close */ 1326 nodev, /* strategy */ 1327 nodev, /* print */ 1328 nodev, /* dump */ 1329 zev_read, /* read */ 1330 nodev, /* write */ 1331 zev_ioctl, /* ioctl */ 1332 nodev, /* devmap */ 1333 nodev, /* mmap */ 1334 nodev, /* segmap */ 1335 zev_chpoll, /* chpoll */ 1336 ddi_prop_op, /* prop_op */ 1337 NULL, /* streamtab */ 1338 D_MP | D_64BIT, /* cb_flag */ 1339 CB_REV, /* cb_rev */ 1340 nodev, /* aread */ 1341 nodev, /* awrite */ 1342 }; 1343 1344 static void 1345 zev_free_instance(dev_info_t *dip) 1346 { 1347 int instance; 1348 zev_queue_t *q; 1349 int i; 1350 1351 instance = ddi_get_instance(dip); 1352 if (instance != 0) { 1353 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1354 instance); 1355 return; 1356 } 1357 1358 ddi_remove_minor_node(dip, NULL); 1359 1360 /* stop pollwakeup thread */ 1361 zev_wakeup_thread_run = 0; 1362 if (zev_poll_wakeup_thread != NULL) { 1363 thread_join(zev_poll_wakeup_thread->t_did); 1364 zev_poll_wakeup_thread = NULL; 1365 } 1366 1367 mutex_enter(&zev_mutex); 1368 1369 /* remove "ctrl" dummy queue */ 1370 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1371 if (q) { 1372 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1373 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1374 } 1375 1376 /* remove all other queues */ 1377 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1378 q = zev_queues[i- ZEV_MINOR_MIN]; 1379 if (!q) 1380 continue; 1381 ASSERT(q->zq_refcnt == 1); 1382 zev_queue_release(q); 1383 } 1384 zev_queue_trim(); 1385 bzero(&zev_queues, sizeof(zev_queues)); 1386 1387 mutex_exit(&zev_mutex); 1388 1389 } 1390 1391 static int 1392 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1393 { 1394 int instance; 1395 zev_queue_t *q; 1396 1397 /* called once per instance with DDI_DETACH, 1398 may be called to suspend */ 1399 switch (cmd) { 1400 case DDI_DETACH: 1401 /* instance busy? */ 1402 instance = ddi_get_instance(dip); 1403 if (instance != 0) { /* hardcoded in zev.conf */ 1404 /* this module only supports one instance. */ 1405 return (DDI_FAILURE); 1406 } 1407 1408 mutex_enter(&zev_mutex); 1409 if (!zev_attached) { 1410 mutex_exit(&zev_mutex); 1411 return (DDI_FAILURE); 1412 } 1413 1414 /* check "ctrl" queue to see if t is busy */ 1415 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1416 if (q == NULL) { 1417 mutex_exit(&zev_mutex); 1418 return (DDI_FAILURE); 1419 } 1420 if (q->zq_busy) { 1421 mutex_exit(&zev_mutex); 1422 return (DDI_FAILURE); 1423 } 1424 /* are there any queues? */ 1425 if (zev_queue_cnt > 0) { 1426 mutex_exit(&zev_mutex); 1427 return (DDI_FAILURE); 1428 } 1429 1430 zev_attached = B_FALSE; 1431 mutex_exit(&zev_mutex); 1432 1433 /* switch ZFS event callbacks back to default */ 1434 rw_enter(&rz_zev_rwlock, RW_WRITER); 1435 rz_zev_callbacks = rz_zev_default_callbacks; 1436 rz_zev_set_active(B_FALSE); 1437 rw_exit(&rz_zev_rwlock); 1438 1439 /* no thread is inside of the callbacks anymore. */ 1440 1441 /* free resources allocated for this instance */ 1442 zev_free_instance(dip); 1443 zev_chksum_fini(); 1444 #if 0 1445 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1446 zev_memory_allocated - zev_memory_freed); 1447 #endif 1448 return (DDI_SUCCESS); 1449 case DDI_SUSPEND: 1450 /* kernel must not suspend zev devices while ZFS is running */ 1451 return (DDI_FAILURE); 1452 default: 1453 return (DDI_FAILURE); 1454 } 1455 } 1456 1457 static int 1458 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1459 { 1460 /* called once per instance with DDI_ATTACH, 1461 may be called to resume */ 1462 int instance; 1463 int error; 1464 zev_queue_t *q; 1465 switch (cmd) { 1466 case DDI_ATTACH: 1467 /* create instance state */ 1468 instance = ddi_get_instance(dip); 1469 if (instance != 0) { /* hardcoded in zev.conf */ 1470 /* this module only supports one instance. */ 1471 return (DDI_FAILURE); 1472 } 1473 1474 mutex_enter(&zev_mutex); 1475 if (zev_attached) { 1476 mutex_exit(&zev_mutex); 1477 return (DDI_FAILURE); 1478 } 1479 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1480 DDI_SUCCESS) { 1481 mutex_exit(&zev_mutex); 1482 return (DDI_FAILURE); 1483 } 1484 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1485 zev_attached = B_TRUE; 1486 1487 /* init queue list */ 1488 bzero(&zev_queues, sizeof(zev_queues)); 1489 mutex_exit(&zev_mutex); 1490 1491 /* create a dummy queue for management of "ctrl" */ 1492 1493 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1494 q->zq_dip = dip; 1495 q->zq_refcnt = 1; 1496 q->zq_busy = B_FALSE; 1497 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1498 q->zq_flags = ZEV_FL_PERSISTENT; 1499 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1500 1501 /* create device node for "ctrl" */ 1502 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1503 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1504 DDI_PSEUDO, 0) == DDI_FAILURE) { 1505 goto fail; 1506 } 1507 1508 /* note: intentionally not adding ctrl queue to queue list. */ 1509 1510 /* default queue */ 1511 error = zev_queue_new(&q, dip, 1512 ZEV_DEFAULT_QUEUE_NAME, 1513 ZEV_MAX_QUEUE_LEN, 1514 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1515 ZEV_FL_PERSISTENT); 1516 if (error) 1517 goto fail; 1518 1519 /* start pollwakeup thread */ 1520 zev_wakeup_thread_run = 1; 1521 zev_poll_wakeup_thread = thread_create(NULL, 0, 1522 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1523 TS_RUN, minclsyspri); 1524 1525 ddi_report_dev(dip); 1526 1527 zev_chksum_init(); 1528 1529 /* switch ZFS event callbacks to zev module callbacks */ 1530 rw_enter(&rz_zev_rwlock, RW_WRITER); 1531 rz_zev_callbacks = &zev_callbacks; 1532 rz_zev_set_active(B_TRUE); 1533 rw_exit(&rz_zev_rwlock); 1534 1535 return (DDI_SUCCESS); 1536 case DDI_RESUME: 1537 /* suspendeding zev devices should never happen */ 1538 return (DDI_SUCCESS); 1539 default: 1540 return (DDI_FAILURE); 1541 } 1542 fail: 1543 cmn_err(CE_WARN, "zev: attach failed"); 1544 zev_free_instance(dip); 1545 mutex_enter(&zev_mutex); 1546 zev_attached = B_FALSE; 1547 mutex_exit(&zev_mutex); 1548 return (DDI_FAILURE); 1549 } 1550 1551 /* ARGSUSED */ 1552 static int 1553 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1554 { 1555 minor_t minor; 1556 zev_queue_t *q; 1557 1558 /* arg is dev_t */ 1559 minor = getminor((dev_t)arg); 1560 mutex_enter(&zev_mutex); 1561 q = ddi_get_soft_state(statep, minor); 1562 if (q == NULL) { 1563 *resultp = NULL; 1564 mutex_exit(&zev_mutex); 1565 return (DDI_FAILURE); 1566 } 1567 1568 switch (infocmd) { 1569 case DDI_INFO_DEVT2DEVINFO: 1570 *resultp = q->zq_dip; 1571 break; 1572 case DDI_INFO_DEVT2INSTANCE: 1573 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1574 break; 1575 default: 1576 mutex_exit(&zev_mutex); 1577 return (DDI_FAILURE); 1578 } 1579 mutex_exit(&zev_mutex); 1580 return (DDI_SUCCESS); 1581 } 1582 1583 static struct dev_ops zev_dev_ops = { 1584 DEVO_REV, /* driver build revision */ 1585 0, /* driver reference count */ 1586 zev_getinfo, /* getinfo */ 1587 nulldev, /* identify (obsolete) */ 1588 nulldev, /* probe (search for devices) */ 1589 zev_attach, /* attach */ 1590 zev_detach, /* detach */ 1591 nodev, /* reset (obsolete, use quiesce) */ 1592 &zev_cb_ops, /* character and block device ops */ 1593 NULL, /* bus driver ops */ 1594 NULL, /* power management, not needed */ 1595 ddi_quiesce_not_needed, /* quiesce */ 1596 }; 1597 1598 static struct modldrv zev_modldrv = { 1599 &mod_driverops, /* all loadable modules use this */ 1600 "zev ZFS event provider, v1.0", /* driver name and version info */ 1601 &zev_dev_ops /* ops method pointers */ 1602 }; 1603 1604 static struct modlinkage zev_modlinkage = { 1605 MODREV_1, /* fixed value */ 1606 { 1607 &zev_modldrv, /* driver linkage structure */ 1608 NULL /* list terminator */ 1609 } 1610 }; 1611 1612 int 1613 _init(void) 1614 { 1615 int error; 1616 1617 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1618 return (error); 1619 zev_attached = B_FALSE; 1620 1621 zev_queue_head = NULL; 1622 zev_queue_tail = NULL; 1623 zev_queue_len = 0; 1624 zev_muted_pools_head = NULL; 1625 zev_memory_allocated = 0; 1626 zev_memory_freed = 0; 1627 zev_queue_cnt = 0; 1628 zev_have_blocking_queues = 1; 1629 1630 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1631 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1632 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1633 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1634 zev_mark_id = gethrtime(); 1635 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1636 zev_msg_sequence_number = gethrtime(); 1637 bzero(&zev_statistics, sizeof(zev_statistics)); 1638 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1639 bzero(&zev_queues, sizeof(zev_queues)); 1640 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1641 if (zev_ioc_mute_pool("zg0")) { 1642 cmn_err(CE_WARN, "zev: could not init mute list"); 1643 goto FAIL; 1644 } 1645 1646 if ((error = mod_install(&zev_modlinkage)) != 0) { 1647 cmn_err(CE_WARN, "zev: could not install module"); 1648 goto FAIL; 1649 } 1650 1651 return (0); 1652 FAIL: 1653 /* free resources */ 1654 cmn_err(CE_WARN, "zev: _init failed"); 1655 mutex_destroy(&zev_mutex); 1656 ddi_soft_state_fini(&statep); 1657 return (error); 1658 } 1659 1660 int 1661 _info(struct modinfo *modinfop) 1662 { 1663 return (mod_info(&zev_modlinkage, modinfop)); 1664 } 1665 1666 int 1667 _fini(void) 1668 { 1669 int error = 0; 1670 zev_msg_t *msg; 1671 zev_pool_list_entry_t *pe, *npe; 1672 1673 mutex_enter(&zev_mutex); 1674 if (zev_attached == B_TRUE) { 1675 mutex_exit(&zev_mutex); 1676 return (SET_ERROR(EBUSY)); 1677 } 1678 if (zev_queue_cnt != 0) { 1679 /* should never happen */ 1680 mutex_exit(&zev_mutex); 1681 return (SET_ERROR(EBUSY)); 1682 } 1683 1684 /* 1685 * avoid deadlock if event list is full: make sure threads currently 1686 * blocking on the event list can append their event and then release 1687 * rz_zev_rwlock. Since there should be no queues left when we 1688 * reach this point we can simply empty the event list and then 1689 * wake everybody. 1690 */ 1691 while (zev_queue_head) { 1692 msg = zev_queue_head; 1693 zev_queue_head = msg->next; 1694 zev_free(msg, sizeof(*msg) + msg->size); 1695 } 1696 cv_broadcast(&zev_condvar); 1697 mutex_exit(&zev_mutex); 1698 1699 /* switch ZFS event callbacks back to default (again) */ 1700 rw_enter(&rz_zev_rwlock, RW_WRITER); 1701 rz_zev_callbacks = rz_zev_default_callbacks; 1702 rz_zev_set_active(B_FALSE); 1703 rw_exit(&rz_zev_rwlock); 1704 1705 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1706 1707 /* unload module callbacks */ 1708 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1709 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1710 return (error); 1711 } 1712 1713 /* free resources */ 1714 mutex_enter(&zev_mutex); 1715 while (zev_queue_head) { 1716 msg = zev_queue_head; 1717 zev_queue_head = msg->next; 1718 zev_free(msg, sizeof(*msg) + msg->size); 1719 } 1720 mutex_exit(&zev_mutex); 1721 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1722 pe = zev_muted_pools_head; 1723 while (pe) { 1724 npe = pe; 1725 pe = pe->next; 1726 zev_free(npe, sizeof(*npe)); 1727 } 1728 rw_exit(&zev_pool_list_rwlock); 1729 ddi_soft_state_fini(&statep); 1730 rw_destroy(&zev_pool_list_rwlock); 1731 cv_destroy(&zev_condvar); 1732 mutex_destroy(&zev_mutex); 1733 mutex_destroy(&zev_mark_id_mutex); 1734 mutex_destroy(&zev_queue_msg_mutex); 1735 1736 return (0); 1737 } 1738 1739