1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 #include <sys/fs/dv_node.h> 16 17 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 18 19 #define XSTRING(x) STRING(x) 20 #define STRING(x) #x 21 22 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 23 #define ZEV_CONTROL_DEVICE_MINOR 0 24 #define ZEV_TMPQUEUE_DEVICE_MINOR 1 25 #define ZEV_MINOR_MIN (ZEV_TMPQUEUE_DEVICE_MINOR + 1) 26 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 27 28 typedef struct zev_queue { 29 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 30 minor_t zq_minor_number; 31 dev_info_t *zq_dip; 32 struct pollhead zq_pollhead; 33 uint64_t zq_bytes_read; 34 uint64_t zq_events_read; 35 uint64_t zq_bytes_discarded; 36 uint64_t zq_events_discarded; 37 uint64_t zq_bytes_total; 38 uint64_t zq_events_total; 39 uint64_t zq_wakeup_threshold; 40 uint16_t zq_flags; 41 uint16_t zq_need_wakeup; 42 /* protected by zev_mutex */ 43 int zq_refcnt; 44 uint64_t zq_queue_len; 45 uint64_t zq_queue_messages; 46 uint64_t zq_max_queue_len; 47 zev_msg_t *zq_oldest; 48 boolean_t zq_busy; 49 boolean_t zq_to_be_removed; 50 zev_statistics_t zq_statistics; 51 kcondvar_t zq_condvar; 52 } zev_queue_t; 53 54 static void *statep; 55 struct pollhead zev_pollhead; 56 57 kmutex_t zev_mutex; 58 kcondvar_t zev_condvar; 59 kmutex_t zev_queue_msg_mutex; 60 krwlock_t zev_pool_list_rwlock; 61 static zev_statistics_t zev_statistics; 62 static boolean_t zev_attached; 63 static kmutex_t zev_mark_id_mutex; 64 static uint64_t zev_mark_id = 0; 65 66 static uint64_t zev_msg_sequence_number = 0; 67 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 68 static int zev_queue_cnt = 0; 69 static int zev_have_blocking_queues = 1; 70 static int zev_tmpqueue_num = 0; 71 72 uint64_t zev_memory_allocated = 0; 73 uint64_t zev_memory_freed = 0; 74 75 /* 76 * The longest potential message is from zev_zfs_mount() and 77 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 78 * 79 * Another candidate is zev_znode_rename_cb() and contains three inode 80 * numbers and two filenames of up to MAXNAMELEN bytes each. 81 */ 82 #define ZEV_MAX_MESSAGE_LEN 4096 83 84 static zev_msg_t *zev_queue_head = NULL; 85 static zev_msg_t *zev_queue_tail = NULL; 86 static uint64_t zev_queue_len = 0; 87 88 89 typedef struct zev_pool_list_entry { 90 struct zev_pool_list_entry *next; 91 char name[MAXPATHLEN]; 92 } zev_pool_list_entry_t; 93 94 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 95 96 static volatile int zev_wakeup_thread_run = 1; 97 static kthread_t *zev_poll_wakeup_thread = NULL; 98 99 void * 100 zev_alloc(ssize_t sz) 101 { 102 ZEV_MEM_ADD(sz); 103 return kmem_alloc(sz, KM_SLEEP); 104 } 105 106 void * 107 zev_zalloc(ssize_t sz) 108 { 109 ZEV_MEM_ADD(sz); 110 return kmem_zalloc(sz, KM_SLEEP); 111 } 112 113 void 114 zev_free(void *ptr, ssize_t sz) 115 { 116 ZEV_MEM_SUB(sz); \ 117 kmem_free(ptr, sz); 118 } 119 120 /* must be called with zev_mutex held */ 121 static void 122 zev_update_blockflag(void) 123 { 124 zev_queue_t *q; 125 int had_blocking_queues; 126 int i; 127 128 had_blocking_queues = zev_have_blocking_queues; 129 130 /* do we still have blocking queues? */ 131 zev_have_blocking_queues = 0; 132 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 133 q = zev_queues[i - ZEV_MINOR_MIN]; 134 if (!q) 135 continue; 136 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 137 zev_have_blocking_queues = 1; 138 break; 139 } 140 } 141 /* no blocking queues */ 142 if (had_blocking_queues) 143 cv_broadcast(&zev_condvar); 144 } 145 146 int 147 zev_queue_cmp(const void *a, const void *b) 148 { 149 const zev_queue_t *qa = a; 150 const zev_queue_t *qb = b; 151 if (qa->zq_minor_number > qb->zq_minor_number) 152 return 1; 153 if (qa->zq_minor_number < qb->zq_minor_number) 154 return -1; 155 return 0; 156 } 157 158 /* must be called with zev_mutex held */ 159 void 160 zev_queue_trim(void) 161 { 162 zev_msg_t *m; 163 uint64_t oldest_message; 164 zev_queue_t *q; 165 int i; 166 167 if (!zev_queue_tail) 168 return; 169 170 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 171 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 172 q = zev_queues[i - ZEV_MINOR_MIN]; 173 if (q == NULL) 174 continue; 175 if (!q->zq_oldest) 176 continue; 177 if (oldest_message > q->zq_oldest->seq) 178 oldest_message = q->zq_oldest->seq; 179 } 180 181 /* remove msgs between oldest_message and zev_queue_head */ 182 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 183 m = zev_queue_head; 184 zev_queue_head = m->next; 185 if (zev_queue_head == NULL) { 186 zev_queue_tail = NULL; 187 } else { 188 zev_queue_head->prev = NULL; 189 } 190 if (m->read == 0) { 191 zev_statistics.zev_bytes_discarded += m->size; 192 zev_statistics.zev_cnt_discarded_events++; 193 } 194 zev_statistics.zev_queue_len -= m->size; 195 zev_queue_len--; 196 zev_free(m, sizeof(*m) + m->size); 197 } 198 } 199 200 /* must be called with zev_mutex held */ 201 static void 202 zev_queue_hold(zev_queue_t *q) 203 { 204 q->zq_refcnt++; 205 } 206 207 /* must be called with zev_mutex held */ 208 static void 209 zev_queue_release(zev_queue_t *q) 210 { 211 q->zq_refcnt--; 212 if (q->zq_refcnt > 0) 213 return; 214 215 ASSERT(q->zq_busy == B_FALSE); 216 217 /* persistent queues will not be removed */ 218 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 219 return; 220 221 /* remove queue from queue list */ 222 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 223 224 /* discard messages that no queue references anymore */ 225 zev_queue_trim(); 226 227 cv_destroy(&q->zq_condvar); 228 ddi_remove_minor_node(q->zq_dip, q->zq_name); 229 devfs_clean(ddi_root_node() ? ddi_root_node() : q->zq_dip, 230 NULL, DV_CLEAN_FORCE); 231 ddi_soft_state_free(statep, q->zq_minor_number); 232 ZEV_MEM_SUB(sizeof(zev_queue_t)); 233 zev_queue_cnt--; 234 zev_update_blockflag(); 235 } 236 237 int 238 zev_queue_new(zev_queue_t **queue, 239 dev_info_t *dip, 240 char *name, 241 uint64_t max_queue_len, 242 uint16_t flags) 243 { 244 zev_queue_t *q; 245 zev_queue_t *tmp; 246 zev_msg_t *msg; 247 int name_exists = 0; 248 minor_t minor; 249 char *p; 250 int i; 251 252 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 253 return EINVAL; 254 if (max_queue_len == 0) 255 max_queue_len = ZEV_MAX_QUEUE_LEN; 256 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 257 return EINVAL; 258 for (p = name; *p; p++) { 259 if (*p >= 'a' && *p <= 'z') 260 continue; 261 if (*p >= '0' && *p <= '9') 262 continue; 263 if (*p == '.') 264 continue; 265 return EINVAL; 266 } 267 268 mutex_enter(&zev_mutex); 269 270 /* find free minor number.*/ 271 /* if this were a frequent operation we'd have a free-minor list */ 272 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 273 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 274 if (tmp == NULL) 275 break; 276 } 277 if (tmp) { 278 mutex_exit(&zev_mutex); 279 return ENOSPC; 280 } 281 282 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 283 mutex_exit(&zev_mutex); 284 return ENOSPC; 285 } 286 ZEV_MEM_ADD(sizeof(zev_queue_t)); 287 288 q = ddi_get_soft_state(statep, minor); 289 memset(q, 0, sizeof(*q)); 290 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 291 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 292 q->zq_max_queue_len = max_queue_len; 293 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 294 q->zq_flags = flags; 295 q->zq_refcnt = 1; 296 q->zq_dip = dip; 297 q->zq_minor_number = minor; 298 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 299 300 /* insert into queue list */ 301 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 302 /* if this were a frequent operation we'd have a name tree */ 303 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 304 continue; 305 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 306 name_exists = 1; 307 break; 308 } 309 } 310 if (name_exists) { 311 ddi_soft_state_free(statep, minor); 312 ZEV_MEM_SUB(sizeof(zev_queue_t)); 313 mutex_exit(&zev_mutex); 314 return EEXIST; 315 } 316 zev_queues[minor - ZEV_MINOR_MIN] = q; 317 zev_queue_cnt++; 318 319 /* calculate current queue len and find head and tail */ 320 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) { 321 q->zq_oldest = zev_queue_tail; 322 msg = zev_queue_tail; 323 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) { 324 q->zq_queue_len += msg->size; 325 q->zq_queue_messages++; 326 q->zq_oldest = msg; 327 msg = msg->prev; 328 } 329 } 330 331 zev_update_blockflag(); 332 333 mutex_exit(&zev_mutex); 334 335 if (ddi_create_minor_node(dip, name, 336 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 337 mutex_enter(&zev_mutex); 338 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 339 zev_queue_cnt--; 340 ddi_soft_state_free(statep, minor); 341 ZEV_MEM_SUB(sizeof(zev_queue_t)); 342 zev_update_blockflag(); 343 mutex_exit(&zev_mutex); 344 return EFAULT; 345 } 346 347 *queue = q; 348 return 0; 349 } 350 351 /* 352 * poll() wakeup thread. Used to check periodically whether we have 353 * bytes left in the queue that have not yet been made into a 354 * pollwakeup() call. This is meant to insure a maximum waiting 355 * time until an event is presented as a poll wakeup, while at 356 * the same time not making every single event into a poll wakeup 357 * of it's own. 358 */ 359 360 static void 361 zev_poll_wakeup(boolean_t flush_all) 362 { 363 zev_queue_t *q; 364 int i; 365 366 /* 367 * This loop works with hold() and release() because 368 * pollwakeup() requires us to release our locks before calling it. 369 * 370 * from pollwakeup(9F): 371 * 372 * "Driver defined locks should not be held across calls 373 * to this function." 374 */ 375 376 /* wake up threads for each individual queue */ 377 mutex_enter(&zev_mutex); 378 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 379 q = zev_queues[i - ZEV_MINOR_MIN]; 380 if (q == NULL) 381 continue; 382 if (!q->zq_busy) 383 continue; 384 if (!q->zq_queue_len) 385 continue; 386 if ((flush_all) || 387 (q->zq_queue_len > q->zq_wakeup_threshold)) { 388 zev_queue_hold(q); 389 mutex_exit(&zev_mutex); 390 pollwakeup(&q->zq_pollhead, POLLIN); 391 mutex_enter(&zev_mutex); 392 zev_queue_release(q); 393 } 394 } 395 mutex_exit(&zev_mutex); 396 } 397 398 static void 399 zev_poll_wakeup_thread_main(void) 400 { 401 while (zev_wakeup_thread_run) { 402 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 403 404 zev_poll_wakeup(B_TRUE); 405 } 406 thread_exit(); 407 } 408 409 static int 410 zev_ioc_mute_pool(char *poolname) 411 { 412 zev_pool_list_entry_t *pe; 413 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 414 /* pool already muted? */ 415 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 416 if (!strcmp(pe->name, poolname)) { 417 rw_exit(&zev_pool_list_rwlock); 418 return EEXIST; 419 } 420 } 421 pe = zev_zalloc(sizeof(*pe)); 422 if (!pe) { 423 rw_exit(&zev_pool_list_rwlock); 424 return ENOMEM; 425 } 426 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 427 pe->next = zev_muted_pools_head; 428 zev_muted_pools_head = pe; 429 rw_exit(&zev_pool_list_rwlock); 430 return (0); 431 } 432 433 static int 434 zev_ioc_unmute_pool(char *poolname) 435 { 436 zev_pool_list_entry_t *pe, *peprev; 437 438 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 439 /* pool muted? */ 440 peprev = NULL; 441 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 442 if (!strcmp(pe->name, poolname)) 443 break; 444 peprev = pe; 445 } 446 if (pe) { 447 rw_exit(&zev_pool_list_rwlock); 448 return ENOENT; 449 } 450 451 if (peprev != NULL) { 452 peprev->next = pe->next; 453 } else { 454 zev_muted_pools_head = pe->next; 455 } 456 zev_free(pe, sizeof(*pe)); 457 rw_exit(&zev_pool_list_rwlock); 458 return (0); 459 } 460 461 int 462 zev_skip_pool(objset_t *os) 463 { 464 zev_pool_list_entry_t *pe; 465 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 466 rw_enter(&zev_pool_list_rwlock, RW_READER); 467 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 468 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 469 rw_exit(&zev_pool_list_rwlock); 470 return 1; 471 } 472 } 473 rw_exit(&zev_pool_list_rwlock); 474 return 0; 475 } 476 477 int 478 zev_skip_fs(zfsvfs_t *fs) 479 { 480 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir; 481 dsl_dir_t *prev = NULL; 482 483 while (d && d != prev) { 484 if (strstr(d->dd_myname, "_root")) 485 return 0; 486 prev = d; 487 d = d->dd_parent; 488 } 489 return 1; 490 } 491 492 static void 493 zev_update_statistics(int op, zev_statistics_t *stat) 494 { 495 switch (op) { 496 case ZEV_OP_ERROR: 497 stat->zev_cnt_errors++; 498 break; 499 case ZEV_OP_MARK: 500 stat->zev_cnt_marks++; 501 break; 502 case ZEV_OP_ZFS_MOUNT: 503 stat->zev_cnt_zfs_mount++; 504 break; 505 case ZEV_OP_ZFS_UMOUNT: 506 stat->zev_cnt_zfs_umount++; 507 break; 508 case ZEV_OP_ZVOL_WRITE: 509 stat->zev_cnt_zvol_write++; 510 break; 511 case ZEV_OP_ZVOL_TRUNCATE: 512 stat->zev_cnt_zvol_truncate++; 513 break; 514 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 515 stat->zev_cnt_znode_close_after_update++; 516 break; 517 case ZEV_OP_ZNODE_CREATE: 518 stat->zev_cnt_znode_create++; 519 break; 520 case ZEV_OP_ZNODE_REMOVE: 521 stat->zev_cnt_znode_remove++; 522 break; 523 case ZEV_OP_ZNODE_LINK: 524 stat->zev_cnt_znode_link++; 525 break; 526 case ZEV_OP_ZNODE_SYMLINK: 527 stat->zev_cnt_znode_symlink++; 528 break; 529 case ZEV_OP_ZNODE_RENAME: 530 stat->zev_cnt_znode_rename++; 531 break; 532 case ZEV_OP_ZNODE_WRITE: 533 stat->zev_cnt_znode_write++; 534 break; 535 case ZEV_OP_ZNODE_TRUNCATE: 536 stat->zev_cnt_znode_truncate++; 537 break; 538 case ZEV_OP_ZNODE_SETATTR: 539 stat->zev_cnt_znode_setattr++; 540 break; 541 case ZEV_OP_ZNODE_ACL: 542 stat->zev_cnt_znode_acl++; 543 break; 544 } 545 } 546 547 void 548 zev_queue_message(int op, zev_msg_t *msg) 549 { 550 zev_queue_t *q; 551 int wakeup = 0; 552 zev_msg_t *m; 553 int i; 554 555 msg->next = NULL; 556 msg->prev = NULL; 557 msg->read = 0; 558 559 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 560 zev_queue_error(op, "unknown op id encountered: %d", op); 561 zev_free(msg, sizeof(*msg) + msg->size); 562 return; 563 } 564 565 /* 566 * This mutex protects us agains race conditions when several 567 * threads want to queue a message and one or more queues are 568 * full: we release zev_mutex to wait for the queues to become 569 * less-than-full, but we don't know in which order the waiting 570 * threads will be awoken. If it's not the same order in which 571 * they went to sleep we might mark different messages as "newest" 572 * in different queues, and so we might have dupes or even 573 * skip messages. 574 */ 575 mutex_enter(&zev_queue_msg_mutex); 576 577 mutex_enter(&zev_mutex); 578 579 /* 580 * When the module is loaded, the default behavior ist to 581 * put all events into a queue and block if the queue is full. 582 * This is done even before the pseudo device is attached. 583 * This way, no events are lost. 584 * 585 * To discard events entirely the "beaver" queue, 586 * which never discards anything, has to be removed. 587 */ 588 589 if (zev_queue_cnt == 0) { 590 mutex_exit(&zev_mutex); 591 mutex_exit(&zev_queue_msg_mutex); 592 return; 593 } 594 595 /* put message into global queue */ 596 msg->seq = zev_msg_sequence_number++; 597 598 /* do we need to make room? */ 599 again: 600 while (zev_statistics.zev_max_queue_len && 601 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) { 602 603 if (zev_have_blocking_queues) { 604 /* so we have blocking queues. are they full? */ 605 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 606 q = zev_queues[i - ZEV_MINOR_MIN]; 607 if (!q) 608 continue; 609 if ((q->zq_flags & 610 ZEV_FL_BLOCK_WHILE_QUEUE_FULL) == 0) 611 continue; 612 if (q->zq_queue_len && 613 q->zq_queue_len > q->zq_max_queue_len) { 614 /* block until queue's been shrunk. */ 615 cv_wait(&zev_condvar, &zev_mutex); 616 goto again; 617 } 618 } 619 } 620 621 /* discard events until this message fits into all queues */ 622 623 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 624 q = zev_queues[i - ZEV_MINOR_MIN]; 625 if (!q) 626 continue; 627 /* discard msgs until queue is small enough */ 628 while (q->zq_queue_len && 629 q->zq_queue_len > q->zq_max_queue_len) { 630 m = q->zq_oldest; 631 if (m == NULL) 632 break; 633 q->zq_events_discarded++; 634 q->zq_bytes_discarded += m->size; 635 q->zq_oldest = m->next; 636 q->zq_queue_len -= m->size; 637 q->zq_queue_messages--; 638 } 639 } 640 641 zev_queue_trim(); 642 ASSERT(zev_statistics.zev_queue_len == 0 || 643 zev_statistics.zev_queue_len <= 644 zev_statistics.zev_max_queue_len); 645 } 646 647 if (zev_queue_tail == NULL) { 648 zev_queue_head = zev_queue_tail = msg; 649 } else { 650 zev_queue_tail->next = msg; 651 msg->prev = zev_queue_tail; 652 zev_queue_tail = msg; 653 } 654 zev_queue_len++; 655 zev_statistics.zev_cnt_total_events++; 656 zev_statistics.zev_queue_len += msg->size; 657 658 /* update per-device queues */ 659 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 660 q = zev_queues[i - ZEV_MINOR_MIN]; 661 if (!q) 662 continue; 663 664 zev_queue_hold(q); 665 666 /* make sure queue has enough room */ 667 while (q->zq_max_queue_len && 668 q->zq_queue_len > q->zq_max_queue_len) { 669 670 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 671 /* block until queue has been shrunk. */ 672 cv_wait(&zev_condvar, &zev_mutex); 673 } else { 674 /* discard msgs until queue is small enough */ 675 while (q->zq_queue_len > q->zq_max_queue_len) { 676 m = q->zq_oldest; 677 if (m == NULL) 678 break; 679 q->zq_events_discarded++; 680 q->zq_bytes_discarded += m->size; 681 q->zq_oldest = m->next; 682 q->zq_queue_len -= m->size; 683 q->zq_queue_messages--; 684 } 685 } 686 } 687 688 /* register new message at the end of the queue */ 689 q->zq_queue_len += msg->size; 690 q->zq_queue_messages++; 691 q->zq_bytes_total += msg->size; 692 q->zq_events_total++; 693 if (q->zq_oldest == NULL) 694 q->zq_oldest = msg; 695 696 zev_update_statistics(op, &q->zq_statistics); 697 698 if (q->zq_queue_len > q->zq_wakeup_threshold) 699 wakeup = 1; 700 if (q->zq_queue_len == msg->size) /* queue was empty */ 701 cv_broadcast(&q->zq_condvar); 702 703 zev_queue_release(q); 704 } 705 706 zev_queue_trim(); 707 708 zev_update_statistics(op, &zev_statistics); 709 mutex_exit(&zev_mutex); 710 mutex_exit(&zev_queue_msg_mutex); 711 712 /* one or more queues need a pollwakeup() */ 713 if (op == ZEV_OP_MARK) { 714 zev_poll_wakeup(B_TRUE); 715 } else if (wakeup) { 716 zev_poll_wakeup(B_FALSE); 717 } 718 719 return; 720 } 721 722 void 723 zev_queue_error(int op, char *fmt, ...) 724 { 725 char buf[ZEV_MAX_MESSAGE_LEN]; 726 va_list ap; 727 int len; 728 zev_msg_t *msg = NULL; 729 zev_error_t *rec; 730 int msg_size; 731 732 va_start(ap, fmt); 733 len = vsnprintf(buf, sizeof(buf), fmt, ap); 734 va_end(ap); 735 if (len >= sizeof(buf)) { 736 cmn_err(CE_WARN, "zev: can't report error - " 737 "dropping event entirely."); 738 return; 739 } 740 741 msg_size = sizeof(*rec) + len + 1; 742 msg = zev_alloc(sizeof(*msg) + msg_size); 743 msg->size = msg_size; 744 rec = (zev_error_t *)(msg + 1); 745 rec->record_len = msg_size; 746 rec->op = ZEV_OP_ERROR; 747 rec->op_time = ddi_get_time(); 748 rec->guid = 0; 749 rec->failed_op = op; 750 rec->errstr_len = len; 751 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 752 753 zev_queue_message(ZEV_OP_ERROR, msg); 754 return; 755 } 756 757 static int 758 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 759 { 760 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 761 zev_queue_t *q; 762 int i; 763 764 *out = NULL; 765 766 if (name->zev_namelen == 0) { 767 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 768 return EINVAL; 769 mutex_enter(&zev_mutex); 770 zev_queue_hold(req_q); 771 mutex_exit(&zev_mutex); 772 *out = req_q; 773 return 0; 774 } 775 776 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 777 return EINVAL; 778 strncpy(namebuf, name->zev_name, name->zev_namelen); 779 namebuf[name->zev_namelen] = '\0'; 780 781 mutex_enter(&zev_mutex); 782 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 783 q = zev_queues[i - ZEV_MINOR_MIN]; 784 if (!q) 785 continue; 786 if (!strcmp(q->zq_name, namebuf)) { 787 zev_queue_hold(q); 788 mutex_exit(&zev_mutex); 789 *out = q; 790 return 0; 791 } 792 } 793 mutex_exit(&zev_mutex); 794 return ENOENT; 795 } 796 797 static int 798 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 799 { 800 zev_ioctl_get_queue_statistics_t gs; 801 zev_queue_t *q; 802 int ret; 803 804 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 805 return EFAULT; 806 807 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 808 if (ret) 809 return ret; 810 811 /* ddi_copyout() can take a long time. Better make 812 a copy to be able to release the mutex faster. */ 813 mutex_enter(&zev_mutex); 814 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 815 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 816 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 817 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 818 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 819 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 820 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 821 zev_queue_release(q); 822 mutex_exit(&zev_mutex); 823 824 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 825 return EFAULT; 826 return 0; 827 } 828 829 static int 830 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 831 { 832 zev_ioctl_set_queue_properties_t qp; 833 zev_queue_t *q; 834 uint64_t old_max; 835 uint64_t old_flags; 836 int ret; 837 838 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 839 return EFAULT; 840 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 841 return EINVAL; 842 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 843 return EINVAL; 844 845 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 846 if (ret) 847 return ret; 848 849 mutex_enter(&zev_mutex); 850 851 /* 852 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 853 * the queue should be removed by zev_queue_release() in zev_ioctl(). 854 */ 855 old_flags = qp.zev_flags; 856 q->zq_flags = qp.zev_flags; 857 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 858 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 859 /* queue is no longer blocking - wake blocked threads */ 860 cv_broadcast(&zev_condvar); 861 } 862 863 zev_update_blockflag(); 864 865 old_max = q->zq_max_queue_len; 866 q->zq_max_queue_len = qp.zev_max_queue_len; 867 if (q->zq_max_queue_len < old_max) 868 zev_queue_trim(); 869 if (q->zq_max_queue_len > old_max) 870 cv_broadcast(&zev_condvar); /* threads may be waiting */ 871 872 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 873 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 874 pollwakeup(&q->zq_pollhead, POLLIN); 875 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 876 877 zev_queue_release(q); 878 mutex_exit(&zev_mutex); 879 return 0; 880 } 881 882 static int 883 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 884 { 885 zev_ioctl_get_queue_properties_t qp; 886 zev_queue_t *q; 887 int ret; 888 889 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 890 return EFAULT; 891 892 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 893 if (ret) 894 return ret; 895 896 mutex_enter(&zev_mutex); 897 qp.zev_max_queue_len = q->zq_max_queue_len; 898 qp.zev_flags = q->zq_flags; 899 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 900 zev_queue_release(q); 901 mutex_exit(&zev_mutex); 902 903 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 904 return EFAULT; 905 return 0; 906 } 907 908 static int 909 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 910 { 911 zev_ioctl_add_queue_t aq; 912 zev_queue_t *new_q; 913 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 914 915 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 916 return EFAULT; 917 918 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 919 return EINVAL; 920 strncpy(name, aq.zev_name, aq.zev_namelen); 921 name[aq.zev_namelen] = '\0'; 922 if (!strncmp(name, ZEV_TMPQUEUE_DEVICE_NAME, 923 strlen(ZEV_TMPQUEUE_DEVICE_NAME))) 924 return EINVAL; 925 926 return zev_queue_new(&new_q, req_q->zq_dip, name, 927 aq.zev_max_queue_len, aq.zev_flags); 928 } 929 930 static int 931 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 932 { 933 zev_ioctl_remove_queue_t rq; 934 zev_queue_t *q; 935 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 936 int found = 0; 937 int i; 938 939 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 940 return EFAULT; 941 942 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 943 return EINVAL; 944 strncpy(name, rq.zev_queue_name.zev_name, 945 rq.zev_queue_name.zev_namelen); 946 name[rq.zev_queue_name.zev_namelen] = '\0'; 947 948 mutex_enter(&zev_mutex); 949 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 950 q = zev_queues[i - ZEV_MINOR_MIN]; 951 if (!q) 952 continue; 953 if (!strcmp(q->zq_name, name)) { 954 found = 1; 955 break; 956 } 957 } 958 if (!found) { 959 mutex_exit(&zev_mutex); 960 return ENOENT; 961 } 962 963 if (q->zq_busy) { 964 mutex_exit(&zev_mutex); 965 return EBUSY; 966 } 967 /* 968 * clear flags, so that persistent queues are removed aswell 969 * and the queue becomes non-blocking. 970 */ 971 q->zq_flags = 0; 972 if (q->zq_to_be_removed == B_FALSE) { 973 q->zq_to_be_removed = B_TRUE; 974 zev_queue_release(q); 975 } 976 /* some threads might be waiting for this queue to become writable */ 977 cv_broadcast(&zev_condvar); 978 979 mutex_exit(&zev_mutex); 980 return 0; 981 } 982 983 static int 984 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 985 { 986 zev_ioctl_debug_info_t di; 987 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 988 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 989 990 zev_chksum_stats(&di.zev_chksum_cache_size, 991 &di.zev_chksum_cache_hits, 992 &di.zev_chksum_cache_misses); 993 di.zev_memory_allocated = mem_allocated - mem_freed; 994 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 995 return EFAULT; 996 return 0; 997 } 998 999 static int 1000 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 1001 { 1002 zev_ioctl_get_queue_list_t gql; 1003 zev_queue_t *q; 1004 int i = 0; 1005 int count = 0; 1006 1007 memset(&gql, 0, sizeof(gql)); 1008 1009 mutex_enter(&zev_mutex); 1010 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1011 q = zev_queues[i - ZEV_MINOR_MIN]; 1012 if (!q) 1013 continue; 1014 strncpy(gql.zev_queue_name[count].zev_name, 1015 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 1016 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 1017 count++; 1018 } 1019 gql.zev_n_queues = count; 1020 mutex_exit(&zev_mutex); 1021 1022 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 1023 return EFAULT; 1024 return 0; 1025 } 1026 1027 static int 1028 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode) 1029 { 1030 uint64_t len; 1031 int i; 1032 zev_queue_t *q; 1033 1034 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 1035 return EFAULT; 1036 } 1037 if (len > ZEV_MAX_QUEUE_LEN) { 1038 return EINVAL; 1039 } 1040 mutex_enter(&zev_mutex); 1041 zev_statistics.zev_max_queue_len = len; 1042 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1043 q = zev_queues[i - ZEV_MINOR_MIN]; 1044 if (!q) 1045 continue; 1046 if (q->zq_max_queue_len <= 1047 zev_statistics.zev_max_queue_len) 1048 continue; 1049 q->zq_max_queue_len = zev_statistics.zev_max_queue_len; 1050 } 1051 cv_broadcast(&zev_condvar); 1052 mutex_exit(&zev_mutex); 1053 return 0; 1054 } 1055 1056 static int 1057 zev_ioc_get_zev_version(intptr_t arg, int mode) 1058 { 1059 zev_ioctl_get_zev_version vi; 1060 vi.zev_major_version = ZEV_MAJOR_VERSION; 1061 vi.zev_minor_version = ZEV_MINOR_VERSION; 1062 if (ddi_copyout(&vi, (void *)arg, sizeof(vi), mode) != 0) 1063 return EFAULT; 1064 return 0; 1065 } 1066 1067 /* ARGSUSED */ 1068 static int 1069 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1070 { 1071 zev_statistics_t zs; 1072 zev_ioctl_poolarg_t pa; 1073 zev_ioctl_mark_t mark; 1074 zev_mark_t *rec; 1075 int msg_size; 1076 zev_msg_t *msg; 1077 uint64_t mark_id; 1078 minor_t minor; 1079 zev_queue_t *req_q; 1080 int ret = 0; 1081 1082 minor = getminor(dev); 1083 mutex_enter(&zev_mutex); 1084 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 1085 mutex_exit(&zev_mutex); 1086 return (ENXIO); 1087 } 1088 zev_queue_hold(req_q); 1089 mutex_exit(&zev_mutex); 1090 /* 1091 * all structures passed between kernel and userspace 1092 * are now compatible between 64 and 32 bit. Model 1093 * conversion can be ignored. 1094 */ 1095 switch (cmd) { 1096 case ZEV_IOC_GET_GLOBAL_STATISTICS: 1097 /* ddi_copyout() can take a long time. Better make 1098 a copy to be able to release the mutex faster. */ 1099 mutex_enter(&zev_mutex); 1100 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 1101 mutex_exit(&zev_mutex); 1102 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 1103 ret = EFAULT; 1104 break; 1105 case ZEV_IOC_GET_QUEUE_STATISTICS: 1106 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 1107 break; 1108 case ZEV_IOC_MUTE_POOL: 1109 case ZEV_IOC_UNMUTE_POOL: 1110 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 1111 ret = EFAULT; 1112 break; 1113 } 1114 if (pa.zev_poolname_len >=MAXPATHLEN) { 1115 ret = EINVAL; 1116 break; 1117 } 1118 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 1119 if (cmd == ZEV_IOC_MUTE_POOL) { 1120 ret = zev_ioc_mute_pool(pa.zev_poolname); 1121 } else { 1122 ret = zev_ioc_unmute_pool(pa.zev_poolname); 1123 } 1124 break; 1125 case ZEV_IOC_SET_MAX_QUEUE_LEN: 1126 ret = zev_ioc_set_max_queue_len(req_q, arg, mode); 1127 break; 1128 case ZEV_IOC_GET_QUEUE_PROPERTIES: 1129 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 1130 break; 1131 case ZEV_IOC_SET_QUEUE_PROPERTIES: 1132 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 1133 break; 1134 case ZEV_IOC_MARK: 1135 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1136 ret = EFAULT; 1137 break; 1138 } 1139 /* prepare message */ 1140 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1141 msg = zev_alloc(sizeof(*msg) + msg_size); 1142 msg->size = msg_size; 1143 rec = (zev_mark_t *)(msg + 1); 1144 rec->record_len = msg_size; 1145 rec->op = ZEV_OP_MARK; 1146 rec->op_time = ddi_get_time(); 1147 rec->guid = mark.zev_guid; 1148 rec->payload_len = mark.zev_payload_len; 1149 /* get payload */ 1150 if (ddi_copyin(((char *)arg) + sizeof(mark), 1151 ZEV_PAYLOAD(rec), 1152 mark.zev_payload_len, mode) != 0) { 1153 zev_free(msg, msg_size); 1154 ret = EFAULT; 1155 break; 1156 } 1157 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1158 /* get mark id and queue message */ 1159 mutex_enter(&zev_mark_id_mutex); 1160 mark_id = zev_mark_id++; 1161 mutex_exit(&zev_mark_id_mutex); 1162 rec->mark_id = mark_id; 1163 zev_queue_message(ZEV_OP_MARK, msg); 1164 /* report mark id to userland, ignore errors */ 1165 mark.zev_mark_id = mark_id; 1166 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1167 break; 1168 case ZEV_IOC_ADD_QUEUE: 1169 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1170 ret = EACCES; 1171 break; 1172 } 1173 ret = zev_ioc_add_queue(req_q, arg, mode); 1174 break; 1175 case ZEV_IOC_REMOVE_QUEUE: 1176 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1177 ret = EACCES; 1178 break; 1179 } 1180 ret = zev_ioc_remove_queue(req_q, arg, mode); 1181 break; 1182 case ZEV_IOC_GET_DEBUG_INFO: 1183 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1184 break; 1185 case ZEV_IOC_GET_QUEUE_LIST: 1186 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1187 break; 1188 case ZEV_IOC_GET_FILE_SIGNATURES: 1189 ret = zev_ioc_get_signatures(arg, mode); 1190 break; 1191 case ZEV_IOC_GET_ZEV_VERSION: 1192 ret = zev_ioc_get_zev_version(arg, mode); 1193 break; 1194 default: 1195 /* generic "ioctl unknown" error */ 1196 ret = ENOTTY; 1197 } 1198 1199 mutex_enter(&zev_mutex); 1200 zev_queue_release(req_q); 1201 mutex_exit(&zev_mutex); 1202 if (ret) 1203 return(SET_ERROR(ret)); 1204 return (ret); 1205 } 1206 1207 static int 1208 zev_chpoll(dev_t dev, short events, int anyyet, 1209 short *reventsp, struct pollhead **phpp) 1210 { 1211 int minor; 1212 short revent = 0; 1213 zev_queue_t *q; 1214 1215 /* use minor-specific queue context and it's pollhead */ 1216 minor = getminor(dev); 1217 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1218 return (EINVAL); 1219 mutex_enter(&zev_mutex); 1220 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1221 mutex_exit(&zev_mutex); 1222 return (ENXIO); 1223 } 1224 revent = 0; 1225 if ((events & POLLIN)) { 1226 if (q->zq_oldest) 1227 revent |= POLLIN; 1228 } 1229 if (revent == 0) { 1230 if (!anyyet) { 1231 *phpp = &q->zq_pollhead; 1232 } 1233 } 1234 *reventsp = revent; 1235 mutex_exit(&zev_mutex); 1236 return (0); 1237 } 1238 1239 /* ARGSUSED */ 1240 static int 1241 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1242 { 1243 minor_t minor; 1244 offset_t off; 1245 int ret = 0; 1246 zev_msg_t *msg; 1247 char *data; 1248 zev_queue_t *q; 1249 1250 minor = getminor(dev); 1251 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1252 return (EINVAL); 1253 1254 mutex_enter(&zev_mutex); 1255 q = ddi_get_soft_state(statep, minor); 1256 if (q == NULL) { 1257 mutex_exit(&zev_mutex); 1258 return (ENXIO); 1259 } 1260 off = uio_p->uio_loffset; 1261 msg = q->zq_oldest; 1262 while (msg == NULL) { 1263 if (!ddi_can_receive_sig()) { 1264 /* 1265 * read() shouldn't block because this thread 1266 * can't receive signals. (e.g., it might be 1267 * torn down by exit() right now.) 1268 */ 1269 mutex_exit(&zev_mutex); 1270 return 0; 1271 } 1272 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1273 /* signal received. */ 1274 mutex_exit(&zev_mutex); 1275 return EINTR; 1276 } 1277 msg = q->zq_oldest; 1278 } 1279 if (msg->size > uio_p->uio_resid) { 1280 mutex_exit(&zev_mutex); 1281 return E2BIG; 1282 } 1283 while (msg && uio_p->uio_resid >= msg->size) { 1284 data = (char *)(msg + 1); 1285 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1286 if (ret != 0) { 1287 mutex_exit(&zev_mutex); 1288 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1289 uio_p->uio_loffset = off; 1290 return (ret); 1291 } 1292 q->zq_oldest = msg->next; 1293 q->zq_bytes_read += msg->size; 1294 q->zq_queue_len -= msg->size; 1295 q->zq_queue_messages--; 1296 msg->read++; 1297 msg = q->zq_oldest; 1298 } 1299 zev_queue_trim(); 1300 cv_broadcast(&zev_condvar); 1301 mutex_exit(&zev_mutex); 1302 uio_p->uio_loffset = off; 1303 return 0; 1304 } 1305 1306 /* ARGSUSED */ 1307 static int 1308 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1309 { 1310 zev_queue_t *q; 1311 int minor; 1312 1313 minor = getminor(dev); 1314 if (otyp != OTYP_CHR) 1315 return (EINVAL); 1316 mutex_enter(&zev_mutex); 1317 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1318 mutex_exit(&zev_mutex); 1319 return (ENXIO); 1320 } 1321 if (q->zq_busy != B_TRUE) { 1322 mutex_exit(&zev_mutex); 1323 return (EINVAL); 1324 } 1325 q->zq_busy = B_FALSE; 1326 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1327 zev_queue_release(q); 1328 mutex_exit(&zev_mutex); 1329 return (0); 1330 } 1331 1332 /* ARGSUSED */ 1333 static int 1334 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1335 { 1336 zev_queue_t *q; 1337 minor_t minor; 1338 char zq_name[ZEV_MAX_QUEUE_NAME_LEN]; 1339 int ret; 1340 1341 minor = getminor(*devp); 1342 if (otyp != OTYP_CHR) 1343 return (EINVAL); 1344 if (drv_priv(credp) != 0) 1345 return (EPERM); 1346 if (minor == ZEV_TMPQUEUE_DEVICE_MINOR) { 1347 /* get control queue soft state to have dip */ 1348 if ((q = ddi_get_soft_state(statep, 1349 ZEV_CONTROL_DEVICE_MINOR)) == NULL){ 1350 mutex_exit(&zev_mutex); 1351 return (ENXIO); 1352 } 1353 1354 /* create new temporary queue and return it. */ 1355 1356 snprintf(zq_name, sizeof(zq_name), 1357 ZEV_TMPQUEUE_DEVICE_NAME ".%d", zev_tmpqueue_num++); 1358 1359 ret = zev_queue_new(&q, q->zq_dip, zq_name, 0, 1360 ZEV_FL_INITIALLY_EMPTY); 1361 if (ret) { 1362 return ret; 1363 } 1364 1365 q->zq_busy = B_TRUE; 1366 *devp = makedevice(getmajor(*devp), q->zq_minor_number); 1367 return 0; 1368 } 1369 mutex_enter(&zev_mutex); 1370 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1371 mutex_exit(&zev_mutex); 1372 return (ENXIO); 1373 } 1374 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1375 /* control device may be used in parallel */ 1376 q->zq_busy = B_TRUE; 1377 mutex_exit(&zev_mutex); 1378 return 0; 1379 } 1380 if (q->zq_busy == B_TRUE) { 1381 mutex_exit(&zev_mutex); 1382 return (EBUSY); 1383 } 1384 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1385 mutex_exit(&zev_mutex); 1386 return (0); 1387 } 1388 1389 static struct cb_ops zev_cb_ops = { 1390 zev_open, /* open */ 1391 zev_close, /* close */ 1392 nodev, /* strategy */ 1393 nodev, /* print */ 1394 nodev, /* dump */ 1395 zev_read, /* read */ 1396 nodev, /* write */ 1397 zev_ioctl, /* ioctl */ 1398 nodev, /* devmap */ 1399 nodev, /* mmap */ 1400 nodev, /* segmap */ 1401 zev_chpoll, /* chpoll */ 1402 ddi_prop_op, /* prop_op */ 1403 NULL, /* streamtab */ 1404 D_MP | D_64BIT, /* cb_flag */ 1405 CB_REV, /* cb_rev */ 1406 nodev, /* aread */ 1407 nodev, /* awrite */ 1408 }; 1409 1410 static void 1411 zev_free_instance(dev_info_t *dip) 1412 { 1413 int instance; 1414 zev_queue_t *q; 1415 int i; 1416 1417 instance = ddi_get_instance(dip); 1418 if (instance != 0) { 1419 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1420 instance); 1421 return; 1422 } 1423 1424 ddi_remove_minor_node(dip, NULL); 1425 devfs_clean(ddi_root_node() ? ddi_root_node() : dip, 1426 NULL, DV_CLEAN_FORCE); 1427 1428 /* stop pollwakeup thread */ 1429 zev_wakeup_thread_run = 0; 1430 if (zev_poll_wakeup_thread != NULL) { 1431 thread_join(zev_poll_wakeup_thread->t_did); 1432 zev_poll_wakeup_thread = NULL; 1433 } 1434 1435 mutex_enter(&zev_mutex); 1436 1437 /* remove "ctrl" dummy queue */ 1438 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1439 if (q) { 1440 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1441 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1442 } 1443 1444 /* remove all other queues */ 1445 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1446 q = zev_queues[i- ZEV_MINOR_MIN]; 1447 if (!q) 1448 continue; 1449 ASSERT(q->zq_refcnt == 1); 1450 zev_queue_release(q); 1451 } 1452 zev_queue_trim(); 1453 bzero(&zev_queues, sizeof(zev_queues)); 1454 1455 mutex_exit(&zev_mutex); 1456 1457 } 1458 1459 static int 1460 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1461 { 1462 int instance; 1463 zev_queue_t *q; 1464 1465 /* called once per instance with DDI_DETACH, 1466 may be called to suspend */ 1467 switch (cmd) { 1468 case DDI_DETACH: 1469 /* instance busy? */ 1470 instance = ddi_get_instance(dip); 1471 if (instance != 0) { /* hardcoded in zev.conf */ 1472 /* this module only supports one instance. */ 1473 return (DDI_FAILURE); 1474 } 1475 1476 mutex_enter(&zev_mutex); 1477 if (!zev_attached) { 1478 mutex_exit(&zev_mutex); 1479 return (DDI_FAILURE); 1480 } 1481 1482 /* check "ctrl" queue to see if t is busy */ 1483 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1484 if (q == NULL) { 1485 mutex_exit(&zev_mutex); 1486 return (DDI_FAILURE); 1487 } 1488 if (q->zq_busy) { 1489 mutex_exit(&zev_mutex); 1490 return (DDI_FAILURE); 1491 } 1492 /* are there any queues? */ 1493 if (zev_queue_cnt > 0) { 1494 mutex_exit(&zev_mutex); 1495 return (DDI_FAILURE); 1496 } 1497 1498 zev_attached = B_FALSE; 1499 mutex_exit(&zev_mutex); 1500 1501 /* switch ZFS event callbacks back to default */ 1502 rw_enter(&rz_zev_rwlock, RW_WRITER); 1503 rz_zev_callbacks = rz_zev_default_callbacks; 1504 rz_zev_set_active(B_FALSE); 1505 rw_exit(&rz_zev_rwlock); 1506 1507 /* no thread is inside of the callbacks anymore. */ 1508 1509 /* free resources allocated for this instance */ 1510 zev_free_instance(dip); 1511 zev_chksum_fini(); 1512 #if 0 1513 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1514 zev_memory_allocated - zev_memory_freed); 1515 #endif 1516 return (DDI_SUCCESS); 1517 case DDI_SUSPEND: 1518 /* kernel must not suspend zev devices while ZFS is running */ 1519 return (DDI_FAILURE); 1520 default: 1521 return (DDI_FAILURE); 1522 } 1523 } 1524 1525 static int 1526 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1527 { 1528 /* called once per instance with DDI_ATTACH, 1529 may be called to resume */ 1530 int instance; 1531 int error; 1532 zev_queue_t *q; 1533 switch (cmd) { 1534 case DDI_ATTACH: 1535 /* create instance state */ 1536 instance = ddi_get_instance(dip); 1537 if (instance != 0) { /* hardcoded in zev.conf */ 1538 /* this module only supports one instance. */ 1539 return (DDI_FAILURE); 1540 } 1541 1542 mutex_enter(&zev_mutex); 1543 if (zev_attached) { 1544 mutex_exit(&zev_mutex); 1545 return (DDI_FAILURE); 1546 } 1547 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1548 DDI_SUCCESS) { 1549 mutex_exit(&zev_mutex); 1550 return (DDI_FAILURE); 1551 } 1552 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1553 zev_attached = B_TRUE; 1554 1555 /* init queue list */ 1556 bzero(&zev_queues, sizeof(zev_queues)); 1557 mutex_exit(&zev_mutex); 1558 1559 /* create a dummy queue for management of "ctrl" */ 1560 1561 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1562 q->zq_dip = dip; 1563 q->zq_refcnt = 1; 1564 q->zq_busy = B_FALSE; 1565 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1566 q->zq_flags = ZEV_FL_PERSISTENT; 1567 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1568 1569 /* create device node for "ctrl" */ 1570 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1571 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1572 DDI_PSEUDO, 0) == DDI_FAILURE) { 1573 goto fail; 1574 } 1575 1576 /* note: intentionally not adding ctrl queue to queue list. */ 1577 1578 /* create device node for "tmpqueue" */ 1579 if (ddi_create_minor_node(dip, ZEV_TMPQUEUE_DEVICE_NAME, 1580 S_IFCHR, ZEV_TMPQUEUE_DEVICE_MINOR, 1581 DDI_PSEUDO, 0) == DDI_FAILURE) { 1582 goto fail; 1583 } 1584 1585 /* default queue */ 1586 error = zev_queue_new(&q, dip, 1587 ZEV_DEFAULT_QUEUE_NAME, 1588 ZEV_MAX_QUEUE_LEN, 1589 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1590 ZEV_FL_PERSISTENT); 1591 if (error) 1592 goto fail; 1593 1594 /* start pollwakeup thread */ 1595 zev_wakeup_thread_run = 1; 1596 zev_poll_wakeup_thread = thread_create(NULL, 0, 1597 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1598 TS_RUN, minclsyspri); 1599 1600 ddi_report_dev(dip); 1601 1602 zev_chksum_init(); 1603 1604 /* switch ZFS event callbacks to zev module callbacks */ 1605 rw_enter(&rz_zev_rwlock, RW_WRITER); 1606 rz_zev_callbacks = &zev_callbacks; 1607 rz_zev_set_active(B_TRUE); 1608 rw_exit(&rz_zev_rwlock); 1609 1610 return (DDI_SUCCESS); 1611 case DDI_RESUME: 1612 /* suspendeding zev devices should never happen */ 1613 return (DDI_SUCCESS); 1614 default: 1615 return (DDI_FAILURE); 1616 } 1617 fail: 1618 cmn_err(CE_WARN, "zev: attach failed"); 1619 zev_free_instance(dip); 1620 mutex_enter(&zev_mutex); 1621 zev_attached = B_FALSE; 1622 mutex_exit(&zev_mutex); 1623 return (DDI_FAILURE); 1624 } 1625 1626 /* ARGSUSED */ 1627 static int 1628 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1629 { 1630 minor_t minor; 1631 zev_queue_t *q; 1632 1633 /* arg is dev_t */ 1634 minor = getminor((dev_t)arg); 1635 mutex_enter(&zev_mutex); 1636 q = ddi_get_soft_state(statep, minor); 1637 if (q == NULL) { 1638 *resultp = NULL; 1639 mutex_exit(&zev_mutex); 1640 return (DDI_FAILURE); 1641 } 1642 1643 switch (infocmd) { 1644 case DDI_INFO_DEVT2DEVINFO: 1645 *resultp = q->zq_dip; 1646 break; 1647 case DDI_INFO_DEVT2INSTANCE: 1648 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1649 break; 1650 default: 1651 mutex_exit(&zev_mutex); 1652 return (DDI_FAILURE); 1653 } 1654 mutex_exit(&zev_mutex); 1655 return (DDI_SUCCESS); 1656 } 1657 1658 static struct dev_ops zev_dev_ops = { 1659 DEVO_REV, /* driver build revision */ 1660 0, /* driver reference count */ 1661 zev_getinfo, /* getinfo */ 1662 nulldev, /* identify (obsolete) */ 1663 nulldev, /* probe (search for devices) */ 1664 zev_attach, /* attach */ 1665 zev_detach, /* detach */ 1666 nodev, /* reset (obsolete, use quiesce) */ 1667 &zev_cb_ops, /* character and block device ops */ 1668 NULL, /* bus driver ops */ 1669 NULL, /* power management, not needed */ 1670 ddi_quiesce_not_needed, /* quiesce */ 1671 }; 1672 1673 static struct modldrv zev_modldrv = { 1674 &mod_driverops, /* all loadable modules use this */ 1675 "ZFS event provider, v" 1676 XSTRING(ZEV_MAJOR_VERSION) "." 1677 XSTRING(ZEV_MINOR_VERSION), 1678 /* driver name and version info */ 1679 &zev_dev_ops /* ops method pointers */ 1680 }; 1681 1682 static struct modlinkage zev_modlinkage = { 1683 MODREV_1, /* fixed value */ 1684 { 1685 &zev_modldrv, /* driver linkage structure */ 1686 NULL /* list terminator */ 1687 } 1688 }; 1689 1690 int 1691 _init(void) 1692 { 1693 int error; 1694 1695 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1696 return (error); 1697 zev_attached = B_FALSE; 1698 1699 zev_queue_head = NULL; 1700 zev_queue_tail = NULL; 1701 zev_queue_len = 0; 1702 zev_muted_pools_head = NULL; 1703 zev_memory_allocated = 0; 1704 zev_memory_freed = 0; 1705 zev_queue_cnt = 0; 1706 zev_have_blocking_queues = 1; 1707 1708 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1709 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1710 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1711 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1712 zev_mark_id = gethrtime(); 1713 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1714 zev_msg_sequence_number = gethrtime(); 1715 bzero(&zev_statistics, sizeof(zev_statistics)); 1716 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1717 bzero(&zev_queues, sizeof(zev_queues)); 1718 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1719 if (zev_ioc_mute_pool("zg0")) { 1720 cmn_err(CE_WARN, "zev: could not init mute list"); 1721 goto FAIL; 1722 } 1723 1724 if ((error = mod_install(&zev_modlinkage)) != 0) { 1725 cmn_err(CE_WARN, "zev: could not install module"); 1726 goto FAIL; 1727 } 1728 1729 return (0); 1730 FAIL: 1731 /* free resources */ 1732 cmn_err(CE_WARN, "zev: _init failed"); 1733 mutex_destroy(&zev_mutex); 1734 ddi_soft_state_fini(&statep); 1735 return (error); 1736 } 1737 1738 int 1739 _info(struct modinfo *modinfop) 1740 { 1741 return (mod_info(&zev_modlinkage, modinfop)); 1742 } 1743 1744 int 1745 _fini(void) 1746 { 1747 int error = 0; 1748 zev_msg_t *msg; 1749 zev_pool_list_entry_t *pe, *npe; 1750 1751 mutex_enter(&zev_mutex); 1752 if (zev_attached == B_TRUE) { 1753 mutex_exit(&zev_mutex); 1754 return (SET_ERROR(EBUSY)); 1755 } 1756 if (zev_queue_cnt != 0) { 1757 /* should never happen */ 1758 mutex_exit(&zev_mutex); 1759 return (SET_ERROR(EBUSY)); 1760 } 1761 1762 /* 1763 * avoid deadlock if event list is full: make sure threads currently 1764 * blocking on the event list can append their event and then release 1765 * rz_zev_rwlock. Since there should be no queues left when we 1766 * reach this point we can simply empty the event list and then 1767 * wake everybody. 1768 */ 1769 while (zev_queue_head) { 1770 msg = zev_queue_head; 1771 zev_queue_head = msg->next; 1772 zev_free(msg, sizeof(*msg) + msg->size); 1773 } 1774 cv_broadcast(&zev_condvar); 1775 mutex_exit(&zev_mutex); 1776 1777 /* switch ZFS event callbacks back to default (again) */ 1778 rw_enter(&rz_zev_rwlock, RW_WRITER); 1779 rz_zev_callbacks = rz_zev_default_callbacks; 1780 rz_zev_set_active(B_FALSE); 1781 rw_exit(&rz_zev_rwlock); 1782 1783 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1784 1785 /* unload module callbacks */ 1786 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1787 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1788 return (error); 1789 } 1790 1791 /* free resources */ 1792 mutex_enter(&zev_mutex); 1793 while (zev_queue_head) { 1794 msg = zev_queue_head; 1795 zev_queue_head = msg->next; 1796 zev_free(msg, sizeof(*msg) + msg->size); 1797 } 1798 mutex_exit(&zev_mutex); 1799 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1800 pe = zev_muted_pools_head; 1801 while (pe) { 1802 npe = pe; 1803 pe = pe->next; 1804 zev_free(npe, sizeof(*npe)); 1805 } 1806 rw_exit(&zev_pool_list_rwlock); 1807 ddi_soft_state_fini(&statep); 1808 rw_destroy(&zev_pool_list_rwlock); 1809 cv_destroy(&zev_condvar); 1810 mutex_destroy(&zev_mutex); 1811 mutex_destroy(&zev_mark_id_mutex); 1812 mutex_destroy(&zev_queue_msg_mutex); 1813 1814 return (0); 1815 } 1816 1817